• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1r"""HTTP/1.1 client library
2
3<intro stuff goes here>
4<other stuff, too>
5
6HTTPConnection goes through a number of "states", which define when a client
7may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
9
10    (null)
11      |
12      | HTTPConnection()
13      v
14    Idle
15      |
16      | putrequest()
17      v
18    Request-started
19      |
20      | ( putheader() )*  endheaders()
21      v
22    Request-sent
23      |\_____________________________
24      |                              | getresponse() raises
25      | response = getresponse()     | ConnectionError
26      v                              v
27    Unread-response                Idle
28    [Response-headers-read]
29      |\____________________
30      |                     |
31      | response.read()     | putrequest()
32      v                     v
33    Idle                  Req-started-unread-response
34                     ______/|
35                   /        |
36   response.read() |        | ( putheader() )*  endheaders()
37                   v        v
38       Request-started    Req-sent-unread-response
39                            |
40                            | response.read()
41                            v
42                          Request-sent
43
44This diagram presents the following rules:
45  -- a second request may not be started until {response-headers-read}
46  -- a response [object] cannot be retrieved until {request-sent}
47  -- there is no differentiation between an unread response body and a
48     partially read response body
49
50Note: this enforcement is applied by the HTTPConnection class. The
51      HTTPResponse class does not enforce this state machine, which
52      implies sophisticated clients may accelerate the request/response
53      pipeline. Caution should be taken, though: accelerating the states
54      beyond the above pattern may imply knowledge of the server's
55      connection-close behavior for certain requests. For example, it
56      is impossible to tell whether the server will close the connection
57      UNTIL the response headers have been read; this means that further
58      requests cannot be placed into the pipeline until it is known that
59      the server will NOT be closing the connection.
60
61Logical State                  __state            __response
62-------------                  -------            ----------
63Idle                           _CS_IDLE           None
64Request-started                _CS_REQ_STARTED    None
65Request-sent                   _CS_REQ_SENT       None
66Unread-response                _CS_IDLE           <response_class>
67Req-started-unread-response    _CS_REQ_STARTED    <response_class>
68Req-sent-unread-response       _CS_REQ_SENT       <response_class>
69"""
70
71import email.parser
72import email.message
73import http
74import io
75import re
76import socket
77import collections.abc
78from urllib.parse import urlsplit
79
80# HTTPMessage, parse_headers(), and the HTTP status code constants are
81# intentionally omitted for simplicity
82__all__ = ["HTTPResponse", "HTTPConnection",
83           "HTTPException", "NotConnected", "UnknownProtocol",
84           "UnknownTransferEncoding", "UnimplementedFileMode",
85           "IncompleteRead", "InvalidURL", "ImproperConnectionState",
86           "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
87           "BadStatusLine", "LineTooLong", "RemoteDisconnected", "error",
88           "responses"]
89
90HTTP_PORT = 80
91HTTPS_PORT = 443
92
93_UNKNOWN = 'UNKNOWN'
94
95# connection states
96_CS_IDLE = 'Idle'
97_CS_REQ_STARTED = 'Request-started'
98_CS_REQ_SENT = 'Request-sent'
99
100
101# hack to maintain backwards compatibility
102globals().update(http.HTTPStatus.__members__)
103
104# another hack to maintain backwards compatibility
105# Mapping status codes to official W3C names
106responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()}
107
108# maximal line length when calling readline().
109_MAXLINE = 65536
110_MAXHEADERS = 100
111
112# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
113#
114# VCHAR          = %x21-7E
115# obs-text       = %x80-FF
116# header-field   = field-name ":" OWS field-value OWS
117# field-name     = token
118# field-value    = *( field-content / obs-fold )
119# field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
120# field-vchar    = VCHAR / obs-text
121#
122# obs-fold       = CRLF 1*( SP / HTAB )
123#                ; obsolete line folding
124#                ; see Section 3.2.4
125
126# token          = 1*tchar
127#
128# tchar          = "!" / "#" / "$" / "%" / "&" / "'" / "*"
129#                / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
130#                / DIGIT / ALPHA
131#                ; any VCHAR, except delimiters
132#
133# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
134
135# the patterns for both name and value are more lenient than RFC
136# definitions to allow for backwards compatibility
137_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
138_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search
139
140# These characters are not allowed within HTTP URL paths.
141#  See https://tools.ietf.org/html/rfc3986#section-3.3 and the
142#  https://tools.ietf.org/html/rfc3986#appendix-A pchar definition.
143# Prevents CVE-2019-9740.  Includes control characters such as \r\n.
144# We don't restrict chars above \x7f as putrequest() limits us to ASCII.
145_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]')
146# Arguably only these _should_ allowed:
147#  _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")
148# We are more lenient for assumed real world compatibility purposes.
149
150# These characters are not allowed within HTTP method names
151# to prevent http header injection.
152_contains_disallowed_method_pchar_re = re.compile('[\x00-\x1f]')
153
154# We always set the Content-Length header for these methods because some
155# servers will otherwise respond with a 411
156_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
157
158
159def _encode(data, name='data'):
160    """Call data.encode("latin-1") but show a better error message."""
161    try:
162        return data.encode("latin-1")
163    except UnicodeEncodeError as err:
164        raise UnicodeEncodeError(
165            err.encoding,
166            err.object,
167            err.start,
168            err.end,
169            "%s (%.20r) is not valid Latin-1. Use %s.encode('utf-8') "
170            "if you want to send it encoded in UTF-8." %
171            (name.title(), data[err.start:err.end], name)) from None
172
173
174class HTTPMessage(email.message.Message):
175    # XXX The only usage of this method is in
176    # http.server.CGIHTTPRequestHandler.  Maybe move the code there so
177    # that it doesn't need to be part of the public API.  The API has
178    # never been defined so this could cause backwards compatibility
179    # issues.
180
181    def getallmatchingheaders(self, name):
182        """Find all header lines matching a given header name.
183
184        Look through the list of headers and find all lines matching a given
185        header name (and their continuation lines).  A list of the lines is
186        returned, without interpretation.  If the header does not occur, an
187        empty list is returned.  If the header occurs multiple times, all
188        occurrences are returned.  Case is not important in the header name.
189
190        """
191        name = name.lower() + ':'
192        n = len(name)
193        lst = []
194        hit = 0
195        for line in self.keys():
196            if line[:n].lower() == name:
197                hit = 1
198            elif not line[:1].isspace():
199                hit = 0
200            if hit:
201                lst.append(line)
202        return lst
203
204def parse_headers(fp, _class=HTTPMessage):
205    """Parses only RFC2822 headers from a file pointer.
206
207    email Parser wants to see strings rather than bytes.
208    But a TextIOWrapper around self.rfile would buffer too many bytes
209    from the stream, bytes which we later need to read as bytes.
210    So we read the correct bytes here, as bytes, for email Parser
211    to parse.
212
213    """
214    headers = []
215    while True:
216        line = fp.readline(_MAXLINE + 1)
217        if len(line) > _MAXLINE:
218            raise LineTooLong("header line")
219        headers.append(line)
220        if len(headers) > _MAXHEADERS:
221            raise HTTPException("got more than %d headers" % _MAXHEADERS)
222        if line in (b'\r\n', b'\n', b''):
223            break
224    hstring = b''.join(headers).decode('iso-8859-1')
225    return email.parser.Parser(_class=_class).parsestr(hstring)
226
227
228class HTTPResponse(io.BufferedIOBase):
229
230    # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
231
232    # The bytes from the socket object are iso-8859-1 strings.
233    # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
234    # text following RFC 2047.  The basic status line parsing only
235    # accepts iso-8859-1.
236
237    def __init__(self, sock, debuglevel=0, method=None, url=None):
238        # If the response includes a content-length header, we need to
239        # make sure that the client doesn't read more than the
240        # specified number of bytes.  If it does, it will block until
241        # the server times out and closes the connection.  This will
242        # happen if a self.fp.read() is done (without a size) whether
243        # self.fp is buffered or not.  So, no self.fp.read() by
244        # clients unless they know what they are doing.
245        self.fp = sock.makefile("rb")
246        self.debuglevel = debuglevel
247        self._method = method
248
249        # The HTTPResponse object is returned via urllib.  The clients
250        # of http and urllib expect different attributes for the
251        # headers.  headers is used here and supports urllib.  msg is
252        # provided as a backwards compatibility layer for http
253        # clients.
254
255        self.headers = self.msg = None
256
257        # from the Status-Line of the response
258        self.version = _UNKNOWN # HTTP-Version
259        self.status = _UNKNOWN  # Status-Code
260        self.reason = _UNKNOWN  # Reason-Phrase
261
262        self.chunked = _UNKNOWN         # is "chunked" being used?
263        self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
264        self.length = _UNKNOWN          # number of bytes left in response
265        self.will_close = _UNKNOWN      # conn will close at end of response
266
267    def _read_status(self):
268        line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
269        if len(line) > _MAXLINE:
270            raise LineTooLong("status line")
271        if self.debuglevel > 0:
272            print("reply:", repr(line))
273        if not line:
274            # Presumably, the server closed the connection before
275            # sending a valid response.
276            raise RemoteDisconnected("Remote end closed connection without"
277                                     " response")
278        try:
279            version, status, reason = line.split(None, 2)
280        except ValueError:
281            try:
282                version, status = line.split(None, 1)
283                reason = ""
284            except ValueError:
285                # empty version will cause next test to fail.
286                version = ""
287        if not version.startswith("HTTP/"):
288            self._close_conn()
289            raise BadStatusLine(line)
290
291        # The status code is a three-digit number
292        try:
293            status = int(status)
294            if status < 100 or status > 999:
295                raise BadStatusLine(line)
296        except ValueError:
297            raise BadStatusLine(line)
298        return version, status, reason
299
300    def begin(self):
301        if self.headers is not None:
302            # we've already started reading the response
303            return
304
305        # read until we get a non-100 response
306        while True:
307            version, status, reason = self._read_status()
308            if status != CONTINUE:
309                break
310            # skip the header from the 100 response
311            while True:
312                skip = self.fp.readline(_MAXLINE + 1)
313                if len(skip) > _MAXLINE:
314                    raise LineTooLong("header line")
315                skip = skip.strip()
316                if not skip:
317                    break
318                if self.debuglevel > 0:
319                    print("header:", skip)
320
321        self.code = self.status = status
322        self.reason = reason.strip()
323        if version in ("HTTP/1.0", "HTTP/0.9"):
324            # Some servers might still return "0.9", treat it as 1.0 anyway
325            self.version = 10
326        elif version.startswith("HTTP/1."):
327            self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
328        else:
329            raise UnknownProtocol(version)
330
331        self.headers = self.msg = parse_headers(self.fp)
332
333        if self.debuglevel > 0:
334            for hdr, val in self.headers.items():
335                print("header:", hdr + ":", val)
336
337        # are we using the chunked-style of transfer encoding?
338        tr_enc = self.headers.get("transfer-encoding")
339        if tr_enc and tr_enc.lower() == "chunked":
340            self.chunked = True
341            self.chunk_left = None
342        else:
343            self.chunked = False
344
345        # will the connection close at the end of the response?
346        self.will_close = self._check_close()
347
348        # do we have a Content-Length?
349        # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
350        self.length = None
351        length = self.headers.get("content-length")
352
353         # are we using the chunked-style of transfer encoding?
354        tr_enc = self.headers.get("transfer-encoding")
355        if length and not self.chunked:
356            try:
357                self.length = int(length)
358            except ValueError:
359                self.length = None
360            else:
361                if self.length < 0:  # ignore nonsensical negative lengths
362                    self.length = None
363        else:
364            self.length = None
365
366        # does the body have a fixed length? (of zero)
367        if (status == NO_CONTENT or status == NOT_MODIFIED or
368            100 <= status < 200 or      # 1xx codes
369            self._method == "HEAD"):
370            self.length = 0
371
372        # if the connection remains open, and we aren't using chunked, and
373        # a content-length was not provided, then assume that the connection
374        # WILL close.
375        if (not self.will_close and
376            not self.chunked and
377            self.length is None):
378            self.will_close = True
379
380    def _check_close(self):
381        conn = self.headers.get("connection")
382        if self.version == 11:
383            # An HTTP/1.1 proxy is assumed to stay open unless
384            # explicitly closed.
385            if conn and "close" in conn.lower():
386                return True
387            return False
388
389        # Some HTTP/1.0 implementations have support for persistent
390        # connections, using rules different than HTTP/1.1.
391
392        # For older HTTP, Keep-Alive indicates persistent connection.
393        if self.headers.get("keep-alive"):
394            return False
395
396        # At least Akamai returns a "Connection: Keep-Alive" header,
397        # which was supposed to be sent by the client.
398        if conn and "keep-alive" in conn.lower():
399            return False
400
401        # Proxy-Connection is a netscape hack.
402        pconn = self.headers.get("proxy-connection")
403        if pconn and "keep-alive" in pconn.lower():
404            return False
405
406        # otherwise, assume it will close
407        return True
408
409    def _close_conn(self):
410        fp = self.fp
411        self.fp = None
412        fp.close()
413
414    def close(self):
415        try:
416            super().close() # set "closed" flag
417        finally:
418            if self.fp:
419                self._close_conn()
420
421    # These implementations are for the benefit of io.BufferedReader.
422
423    # XXX This class should probably be revised to act more like
424    # the "raw stream" that BufferedReader expects.
425
426    def flush(self):
427        super().flush()
428        if self.fp:
429            self.fp.flush()
430
431    def readable(self):
432        """Always returns True"""
433        return True
434
435    # End of "raw stream" methods
436
437    def isclosed(self):
438        """True if the connection is closed."""
439        # NOTE: it is possible that we will not ever call self.close(). This
440        #       case occurs when will_close is TRUE, length is None, and we
441        #       read up to the last byte, but NOT past it.
442        #
443        # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
444        #          called, meaning self.isclosed() is meaningful.
445        return self.fp is None
446
447    def read(self, amt=None):
448        if self.fp is None:
449            return b""
450
451        if self._method == "HEAD":
452            self._close_conn()
453            return b""
454
455        if amt is not None:
456            # Amount is given, implement using readinto
457            b = bytearray(amt)
458            n = self.readinto(b)
459            return memoryview(b)[:n].tobytes()
460        else:
461            # Amount is not given (unbounded read) so we must check self.length
462            # and self.chunked
463
464            if self.chunked:
465                return self._readall_chunked()
466
467            if self.length is None:
468                s = self.fp.read()
469            else:
470                try:
471                    s = self._safe_read(self.length)
472                except IncompleteRead:
473                    self._close_conn()
474                    raise
475                self.length = 0
476            self._close_conn()        # we read everything
477            return s
478
479    def readinto(self, b):
480        """Read up to len(b) bytes into bytearray b and return the number
481        of bytes read.
482        """
483
484        if self.fp is None:
485            return 0
486
487        if self._method == "HEAD":
488            self._close_conn()
489            return 0
490
491        if self.chunked:
492            return self._readinto_chunked(b)
493
494        if self.length is not None:
495            if len(b) > self.length:
496                # clip the read to the "end of response"
497                b = memoryview(b)[0:self.length]
498
499        # we do not use _safe_read() here because this may be a .will_close
500        # connection, and the user is reading more bytes than will be provided
501        # (for example, reading in 1k chunks)
502        n = self.fp.readinto(b)
503        if not n and b:
504            # Ideally, we would raise IncompleteRead if the content-length
505            # wasn't satisfied, but it might break compatibility.
506            self._close_conn()
507        elif self.length is not None:
508            self.length -= n
509            if not self.length:
510                self._close_conn()
511        return n
512
513    def _read_next_chunk_size(self):
514        # Read the next chunk size from the file
515        line = self.fp.readline(_MAXLINE + 1)
516        if len(line) > _MAXLINE:
517            raise LineTooLong("chunk size")
518        i = line.find(b";")
519        if i >= 0:
520            line = line[:i] # strip chunk-extensions
521        try:
522            return int(line, 16)
523        except ValueError:
524            # close the connection as protocol synchronisation is
525            # probably lost
526            self._close_conn()
527            raise
528
529    def _read_and_discard_trailer(self):
530        # read and discard trailer up to the CRLF terminator
531        ### note: we shouldn't have any trailers!
532        while True:
533            line = self.fp.readline(_MAXLINE + 1)
534            if len(line) > _MAXLINE:
535                raise LineTooLong("trailer line")
536            if not line:
537                # a vanishingly small number of sites EOF without
538                # sending the trailer
539                break
540            if line in (b'\r\n', b'\n', b''):
541                break
542
543    def _get_chunk_left(self):
544        # return self.chunk_left, reading a new chunk if necessary.
545        # chunk_left == 0: at the end of the current chunk, need to close it
546        # chunk_left == None: No current chunk, should read next.
547        # This function returns non-zero or None if the last chunk has
548        # been read.
549        chunk_left = self.chunk_left
550        if not chunk_left: # Can be 0 or None
551            if chunk_left is not None:
552                # We are at the end of chunk, discard chunk end
553                self._safe_read(2)  # toss the CRLF at the end of the chunk
554            try:
555                chunk_left = self._read_next_chunk_size()
556            except ValueError:
557                raise IncompleteRead(b'')
558            if chunk_left == 0:
559                # last chunk: 1*("0") [ chunk-extension ] CRLF
560                self._read_and_discard_trailer()
561                # we read everything; close the "file"
562                self._close_conn()
563                chunk_left = None
564            self.chunk_left = chunk_left
565        return chunk_left
566
567    def _readall_chunked(self):
568        assert self.chunked != _UNKNOWN
569        value = []
570        try:
571            while True:
572                chunk_left = self._get_chunk_left()
573                if chunk_left is None:
574                    break
575                value.append(self._safe_read(chunk_left))
576                self.chunk_left = 0
577            return b''.join(value)
578        except IncompleteRead:
579            raise IncompleteRead(b''.join(value))
580
581    def _readinto_chunked(self, b):
582        assert self.chunked != _UNKNOWN
583        total_bytes = 0
584        mvb = memoryview(b)
585        try:
586            while True:
587                chunk_left = self._get_chunk_left()
588                if chunk_left is None:
589                    return total_bytes
590
591                if len(mvb) <= chunk_left:
592                    n = self._safe_readinto(mvb)
593                    self.chunk_left = chunk_left - n
594                    return total_bytes + n
595
596                temp_mvb = mvb[:chunk_left]
597                n = self._safe_readinto(temp_mvb)
598                mvb = mvb[n:]
599                total_bytes += n
600                self.chunk_left = 0
601
602        except IncompleteRead:
603            raise IncompleteRead(bytes(b[0:total_bytes]))
604
605    def _safe_read(self, amt):
606        """Read the number of bytes requested.
607
608        This function should be used when <amt> bytes "should" be present for
609        reading. If the bytes are truly not available (due to EOF), then the
610        IncompleteRead exception can be used to detect the problem.
611        """
612        data = self.fp.read(amt)
613        if len(data) < amt:
614            raise IncompleteRead(data, amt-len(data))
615        return data
616
617    def _safe_readinto(self, b):
618        """Same as _safe_read, but for reading into a buffer."""
619        amt = len(b)
620        n = self.fp.readinto(b)
621        if n < amt:
622            raise IncompleteRead(bytes(b[:n]), amt-n)
623        return n
624
625    def read1(self, n=-1):
626        """Read with at most one underlying system call.  If at least one
627        byte is buffered, return that instead.
628        """
629        if self.fp is None or self._method == "HEAD":
630            return b""
631        if self.chunked:
632            return self._read1_chunked(n)
633        if self.length is not None and (n < 0 or n > self.length):
634            n = self.length
635        result = self.fp.read1(n)
636        if not result and n:
637            self._close_conn()
638        elif self.length is not None:
639            self.length -= len(result)
640        return result
641
642    def peek(self, n=-1):
643        # Having this enables IOBase.readline() to read more than one
644        # byte at a time
645        if self.fp is None or self._method == "HEAD":
646            return b""
647        if self.chunked:
648            return self._peek_chunked(n)
649        return self.fp.peek(n)
650
651    def readline(self, limit=-1):
652        if self.fp is None or self._method == "HEAD":
653            return b""
654        if self.chunked:
655            # Fallback to IOBase readline which uses peek() and read()
656            return super().readline(limit)
657        if self.length is not None and (limit < 0 or limit > self.length):
658            limit = self.length
659        result = self.fp.readline(limit)
660        if not result and limit:
661            self._close_conn()
662        elif self.length is not None:
663            self.length -= len(result)
664        return result
665
666    def _read1_chunked(self, n):
667        # Strictly speaking, _get_chunk_left() may cause more than one read,
668        # but that is ok, since that is to satisfy the chunked protocol.
669        chunk_left = self._get_chunk_left()
670        if chunk_left is None or n == 0:
671            return b''
672        if not (0 <= n <= chunk_left):
673            n = chunk_left # if n is negative or larger than chunk_left
674        read = self.fp.read1(n)
675        self.chunk_left -= len(read)
676        if not read:
677            raise IncompleteRead(b"")
678        return read
679
680    def _peek_chunked(self, n):
681        # Strictly speaking, _get_chunk_left() may cause more than one read,
682        # but that is ok, since that is to satisfy the chunked protocol.
683        try:
684            chunk_left = self._get_chunk_left()
685        except IncompleteRead:
686            return b'' # peek doesn't worry about protocol
687        if chunk_left is None:
688            return b'' # eof
689        # peek is allowed to return more than requested.  Just request the
690        # entire chunk, and truncate what we get.
691        return self.fp.peek(chunk_left)[:chunk_left]
692
693    def fileno(self):
694        return self.fp.fileno()
695
696    def getheader(self, name, default=None):
697        '''Returns the value of the header matching *name*.
698
699        If there are multiple matching headers, the values are
700        combined into a single string separated by commas and spaces.
701
702        If no matching header is found, returns *default* or None if
703        the *default* is not specified.
704
705        If the headers are unknown, raises http.client.ResponseNotReady.
706
707        '''
708        if self.headers is None:
709            raise ResponseNotReady()
710        headers = self.headers.get_all(name) or default
711        if isinstance(headers, str) or not hasattr(headers, '__iter__'):
712            return headers
713        else:
714            return ', '.join(headers)
715
716    def getheaders(self):
717        """Return list of (header, value) tuples."""
718        if self.headers is None:
719            raise ResponseNotReady()
720        return list(self.headers.items())
721
722    # We override IOBase.__iter__ so that it doesn't check for closed-ness
723
724    def __iter__(self):
725        return self
726
727    # For compatibility with old-style urllib responses.
728
729    def info(self):
730        '''Returns an instance of the class mimetools.Message containing
731        meta-information associated with the URL.
732
733        When the method is HTTP, these headers are those returned by
734        the server at the head of the retrieved HTML page (including
735        Content-Length and Content-Type).
736
737        When the method is FTP, a Content-Length header will be
738        present if (as is now usual) the server passed back a file
739        length in response to the FTP retrieval request. A
740        Content-Type header will be present if the MIME type can be
741        guessed.
742
743        When the method is local-file, returned headers will include
744        a Date representing the file's last-modified time, a
745        Content-Length giving file size, and a Content-Type
746        containing a guess at the file's type. See also the
747        description of the mimetools module.
748
749        '''
750        return self.headers
751
752    def geturl(self):
753        '''Return the real URL of the page.
754
755        In some cases, the HTTP server redirects a client to another
756        URL. The urlopen() function handles this transparently, but in
757        some cases the caller needs to know which URL the client was
758        redirected to. The geturl() method can be used to get at this
759        redirected URL.
760
761        '''
762        return self.url
763
764    def getcode(self):
765        '''Return the HTTP status code that was sent with the response,
766        or None if the URL is not an HTTP URL.
767
768        '''
769        return self.status
770
771class HTTPConnection:
772
773    _http_vsn = 11
774    _http_vsn_str = 'HTTP/1.1'
775
776    response_class = HTTPResponse
777    default_port = HTTP_PORT
778    auto_open = 1
779    debuglevel = 0
780
781    @staticmethod
782    def _is_textIO(stream):
783        """Test whether a file-like object is a text or a binary stream.
784        """
785        return isinstance(stream, io.TextIOBase)
786
787    @staticmethod
788    def _get_content_length(body, method):
789        """Get the content-length based on the body.
790
791        If the body is None, we set Content-Length: 0 for methods that expect
792        a body (RFC 7230, Section 3.3.2). We also set the Content-Length for
793        any method if the body is a str or bytes-like object and not a file.
794        """
795        if body is None:
796            # do an explicit check for not None here to distinguish
797            # between unset and set but empty
798            if method.upper() in _METHODS_EXPECTING_BODY:
799                return 0
800            else:
801                return None
802
803        if hasattr(body, 'read'):
804            # file-like object.
805            return None
806
807        try:
808            # does it implement the buffer protocol (bytes, bytearray, array)?
809            mv = memoryview(body)
810            return mv.nbytes
811        except TypeError:
812            pass
813
814        if isinstance(body, str):
815            return len(body)
816
817        return None
818
819    def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
820                 source_address=None, blocksize=8192):
821        self.timeout = timeout
822        self.source_address = source_address
823        self.blocksize = blocksize
824        self.sock = None
825        self._buffer = []
826        self.__response = None
827        self.__state = _CS_IDLE
828        self._method = None
829        self._tunnel_host = None
830        self._tunnel_port = None
831        self._tunnel_headers = {}
832
833        (self.host, self.port) = self._get_hostport(host, port)
834
835        self._validate_host(self.host)
836
837        # This is stored as an instance variable to allow unit
838        # tests to replace it with a suitable mockup
839        self._create_connection = socket.create_connection
840
841    def set_tunnel(self, host, port=None, headers=None):
842        """Set up host and port for HTTP CONNECT tunnelling.
843
844        In a connection that uses HTTP CONNECT tunneling, the host passed to the
845        constructor is used as a proxy server that relays all communication to
846        the endpoint passed to `set_tunnel`. This done by sending an HTTP
847        CONNECT request to the proxy server when the connection is established.
848
849        This method must be called before the HTML connection has been
850        established.
851
852        The headers argument should be a mapping of extra HTTP headers to send
853        with the CONNECT request.
854        """
855
856        if self.sock:
857            raise RuntimeError("Can't set up tunnel for established connection")
858
859        self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)
860        if headers:
861            self._tunnel_headers = headers
862        else:
863            self._tunnel_headers.clear()
864
865    def _get_hostport(self, host, port):
866        if port is None:
867            i = host.rfind(':')
868            j = host.rfind(']')         # ipv6 addresses have [...]
869            if i > j:
870                try:
871                    port = int(host[i+1:])
872                except ValueError:
873                    if host[i+1:] == "": # http://foo.com:/ == http://foo.com/
874                        port = self.default_port
875                    else:
876                        raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
877                host = host[:i]
878            else:
879                port = self.default_port
880            if host and host[0] == '[' and host[-1] == ']':
881                host = host[1:-1]
882
883        return (host, port)
884
885    def set_debuglevel(self, level):
886        self.debuglevel = level
887
888    def _tunnel(self):
889        connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host,
890            self._tunnel_port)
891        connect_bytes = connect_str.encode("ascii")
892        self.send(connect_bytes)
893        for header, value in self._tunnel_headers.items():
894            header_str = "%s: %s\r\n" % (header, value)
895            header_bytes = header_str.encode("latin-1")
896            self.send(header_bytes)
897        self.send(b'\r\n')
898
899        response = self.response_class(self.sock, method=self._method)
900        (version, code, message) = response._read_status()
901
902        if code != http.HTTPStatus.OK:
903            self.close()
904            raise OSError("Tunnel connection failed: %d %s" % (code,
905                                                               message.strip()))
906        while True:
907            line = response.fp.readline(_MAXLINE + 1)
908            if len(line) > _MAXLINE:
909                raise LineTooLong("header line")
910            if not line:
911                # for sites which EOF without sending a trailer
912                break
913            if line in (b'\r\n', b'\n', b''):
914                break
915
916            if self.debuglevel > 0:
917                print('header:', line.decode())
918
919    def connect(self):
920        """Connect to the host and port specified in __init__."""
921        self.sock = self._create_connection(
922            (self.host,self.port), self.timeout, self.source_address)
923        self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
924
925        if self._tunnel_host:
926            self._tunnel()
927
928    def close(self):
929        """Close the connection to the HTTP server."""
930        self.__state = _CS_IDLE
931        try:
932            sock = self.sock
933            if sock:
934                self.sock = None
935                sock.close()   # close it manually... there may be other refs
936        finally:
937            response = self.__response
938            if response:
939                self.__response = None
940                response.close()
941
942    def send(self, data):
943        """Send `data' to the server.
944        ``data`` can be a string object, a bytes object, an array object, a
945        file-like object that supports a .read() method, or an iterable object.
946        """
947
948        if self.sock is None:
949            if self.auto_open:
950                self.connect()
951            else:
952                raise NotConnected()
953
954        if self.debuglevel > 0:
955            print("send:", repr(data))
956        if hasattr(data, "read") :
957            if self.debuglevel > 0:
958                print("sendIng a read()able")
959            encode = self._is_textIO(data)
960            if encode and self.debuglevel > 0:
961                print("encoding file using iso-8859-1")
962            while 1:
963                datablock = data.read(self.blocksize)
964                if not datablock:
965                    break
966                if encode:
967                    datablock = datablock.encode("iso-8859-1")
968                self.sock.sendall(datablock)
969            return
970        try:
971            self.sock.sendall(data)
972        except TypeError:
973            if isinstance(data, collections.abc.Iterable):
974                for d in data:
975                    self.sock.sendall(d)
976            else:
977                raise TypeError("data should be a bytes-like object "
978                                "or an iterable, got %r" % type(data))
979
980    def _output(self, s):
981        """Add a line of output to the current request buffer.
982
983        Assumes that the line does *not* end with \\r\\n.
984        """
985        self._buffer.append(s)
986
987    def _read_readable(self, readable):
988        if self.debuglevel > 0:
989            print("sendIng a read()able")
990        encode = self._is_textIO(readable)
991        if encode and self.debuglevel > 0:
992            print("encoding file using iso-8859-1")
993        while True:
994            datablock = readable.read(self.blocksize)
995            if not datablock:
996                break
997            if encode:
998                datablock = datablock.encode("iso-8859-1")
999            yield datablock
1000
1001    def _send_output(self, message_body=None, encode_chunked=False):
1002        """Send the currently buffered request and clear the buffer.
1003
1004        Appends an extra \\r\\n to the buffer.
1005        A message_body may be specified, to be appended to the request.
1006        """
1007        self._buffer.extend((b"", b""))
1008        msg = b"\r\n".join(self._buffer)
1009        del self._buffer[:]
1010        self.send(msg)
1011
1012        if message_body is not None:
1013
1014            # create a consistent interface to message_body
1015            if hasattr(message_body, 'read'):
1016                # Let file-like take precedence over byte-like.  This
1017                # is needed to allow the current position of mmap'ed
1018                # files to be taken into account.
1019                chunks = self._read_readable(message_body)
1020            else:
1021                try:
1022                    # this is solely to check to see if message_body
1023                    # implements the buffer API.  it /would/ be easier
1024                    # to capture if PyObject_CheckBuffer was exposed
1025                    # to Python.
1026                    memoryview(message_body)
1027                except TypeError:
1028                    try:
1029                        chunks = iter(message_body)
1030                    except TypeError:
1031                        raise TypeError("message_body should be a bytes-like "
1032                                        "object or an iterable, got %r"
1033                                        % type(message_body))
1034                else:
1035                    # the object implements the buffer interface and
1036                    # can be passed directly into socket methods
1037                    chunks = (message_body,)
1038
1039            for chunk in chunks:
1040                if not chunk:
1041                    if self.debuglevel > 0:
1042                        print('Zero length chunk ignored')
1043                    continue
1044
1045                if encode_chunked and self._http_vsn == 11:
1046                    # chunked encoding
1047                    chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \
1048                        + b'\r\n'
1049                self.send(chunk)
1050
1051            if encode_chunked and self._http_vsn == 11:
1052                # end chunked transfer
1053                self.send(b'0\r\n\r\n')
1054
1055    def putrequest(self, method, url, skip_host=False,
1056                   skip_accept_encoding=False):
1057        """Send a request to the server.
1058
1059        `method' specifies an HTTP request method, e.g. 'GET'.
1060        `url' specifies the object being requested, e.g. '/index.html'.
1061        `skip_host' if True does not add automatically a 'Host:' header
1062        `skip_accept_encoding' if True does not add automatically an
1063           'Accept-Encoding:' header
1064        """
1065
1066        # if a prior response has been completed, then forget about it.
1067        if self.__response and self.__response.isclosed():
1068            self.__response = None
1069
1070
1071        # in certain cases, we cannot issue another request on this connection.
1072        # this occurs when:
1073        #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
1074        #   2) a response to a previous request has signalled that it is going
1075        #      to close the connection upon completion.
1076        #   3) the headers for the previous response have not been read, thus
1077        #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
1078        #
1079        # if there is no prior response, then we can request at will.
1080        #
1081        # if point (2) is true, then we will have passed the socket to the
1082        # response (effectively meaning, "there is no prior response"), and
1083        # will open a new one when a new request is made.
1084        #
1085        # Note: if a prior response exists, then we *can* start a new request.
1086        #       We are not allowed to begin fetching the response to this new
1087        #       request, however, until that prior response is complete.
1088        #
1089        if self.__state == _CS_IDLE:
1090            self.__state = _CS_REQ_STARTED
1091        else:
1092            raise CannotSendRequest(self.__state)
1093
1094        self._validate_method(method)
1095
1096        # Save the method for use later in the response phase
1097        self._method = method
1098
1099        url = url or '/'
1100        self._validate_path(url)
1101
1102        request = '%s %s %s' % (method, url, self._http_vsn_str)
1103
1104        self._output(self._encode_request(request))
1105
1106        if self._http_vsn == 11:
1107            # Issue some standard headers for better HTTP/1.1 compliance
1108
1109            if not skip_host:
1110                # this header is issued *only* for HTTP/1.1
1111                # connections. more specifically, this means it is
1112                # only issued when the client uses the new
1113                # HTTPConnection() class. backwards-compat clients
1114                # will be using HTTP/1.0 and those clients may be
1115                # issuing this header themselves. we should NOT issue
1116                # it twice; some web servers (such as Apache) barf
1117                # when they see two Host: headers
1118
1119                # If we need a non-standard port,include it in the
1120                # header.  If the request is going through a proxy,
1121                # but the host of the actual URL, not the host of the
1122                # proxy.
1123
1124                netloc = ''
1125                if url.startswith('http'):
1126                    nil, netloc, nil, nil, nil = urlsplit(url)
1127
1128                if netloc:
1129                    try:
1130                        netloc_enc = netloc.encode("ascii")
1131                    except UnicodeEncodeError:
1132                        netloc_enc = netloc.encode("idna")
1133                    self.putheader('Host', netloc_enc)
1134                else:
1135                    if self._tunnel_host:
1136                        host = self._tunnel_host
1137                        port = self._tunnel_port
1138                    else:
1139                        host = self.host
1140                        port = self.port
1141
1142                    try:
1143                        host_enc = host.encode("ascii")
1144                    except UnicodeEncodeError:
1145                        host_enc = host.encode("idna")
1146
1147                    # As per RFC 273, IPv6 address should be wrapped with []
1148                    # when used as Host header
1149
1150                    if host.find(':') >= 0:
1151                        host_enc = b'[' + host_enc + b']'
1152
1153                    if port == self.default_port:
1154                        self.putheader('Host', host_enc)
1155                    else:
1156                        host_enc = host_enc.decode("ascii")
1157                        self.putheader('Host', "%s:%s" % (host_enc, port))
1158
1159            # note: we are assuming that clients will not attempt to set these
1160            #       headers since *this* library must deal with the
1161            #       consequences. this also means that when the supporting
1162            #       libraries are updated to recognize other forms, then this
1163            #       code should be changed (removed or updated).
1164
1165            # we only want a Content-Encoding of "identity" since we don't
1166            # support encodings such as x-gzip or x-deflate.
1167            if not skip_accept_encoding:
1168                self.putheader('Accept-Encoding', 'identity')
1169
1170            # we can accept "chunked" Transfer-Encodings, but no others
1171            # NOTE: no TE header implies *only* "chunked"
1172            #self.putheader('TE', 'chunked')
1173
1174            # if TE is supplied in the header, then it must appear in a
1175            # Connection header.
1176            #self.putheader('Connection', 'TE')
1177
1178        else:
1179            # For HTTP/1.0, the server will assume "not chunked"
1180            pass
1181
1182    def _encode_request(self, request):
1183        # ASCII also helps prevent CVE-2019-9740.
1184        return request.encode('ascii')
1185
1186    def _validate_method(self, method):
1187        """Validate a method name for putrequest."""
1188        # prevent http header injection
1189        match = _contains_disallowed_method_pchar_re.search(method)
1190        if match:
1191            raise ValueError(
1192                    f"method can't contain control characters. {method!r} "
1193                    f"(found at least {match.group()!r})")
1194
1195    def _validate_path(self, url):
1196        """Validate a url for putrequest."""
1197        # Prevent CVE-2019-9740.
1198        match = _contains_disallowed_url_pchar_re.search(url)
1199        if match:
1200            raise InvalidURL(f"URL can't contain control characters. {url!r} "
1201                             f"(found at least {match.group()!r})")
1202
1203    def _validate_host(self, host):
1204        """Validate a host so it doesn't contain control characters."""
1205        # Prevent CVE-2019-18348.
1206        match = _contains_disallowed_url_pchar_re.search(host)
1207        if match:
1208            raise InvalidURL(f"URL can't contain control characters. {host!r} "
1209                             f"(found at least {match.group()!r})")
1210
1211    def putheader(self, header, *values):
1212        """Send a request header line to the server.
1213
1214        For example: h.putheader('Accept', 'text/html')
1215        """
1216        if self.__state != _CS_REQ_STARTED:
1217            raise CannotSendHeader()
1218
1219        if hasattr(header, 'encode'):
1220            header = header.encode('ascii')
1221
1222        if not _is_legal_header_name(header):
1223            raise ValueError('Invalid header name %r' % (header,))
1224
1225        values = list(values)
1226        for i, one_value in enumerate(values):
1227            if hasattr(one_value, 'encode'):
1228                values[i] = one_value.encode('latin-1')
1229            elif isinstance(one_value, int):
1230                values[i] = str(one_value).encode('ascii')
1231
1232            if _is_illegal_header_value(values[i]):
1233                raise ValueError('Invalid header value %r' % (values[i],))
1234
1235        value = b'\r\n\t'.join(values)
1236        header = header + b': ' + value
1237        self._output(header)
1238
1239    def endheaders(self, message_body=None, *, encode_chunked=False):
1240        """Indicate that the last header line has been sent to the server.
1241
1242        This method sends the request to the server.  The optional message_body
1243        argument can be used to pass a message body associated with the
1244        request.
1245        """
1246        if self.__state == _CS_REQ_STARTED:
1247            self.__state = _CS_REQ_SENT
1248        else:
1249            raise CannotSendHeader()
1250        self._send_output(message_body, encode_chunked=encode_chunked)
1251
1252    def request(self, method, url, body=None, headers={}, *,
1253                encode_chunked=False):
1254        """Send a complete request to the server."""
1255        self._send_request(method, url, body, headers, encode_chunked)
1256
1257    def _send_request(self, method, url, body, headers, encode_chunked):
1258        # Honor explicitly requested Host: and Accept-Encoding: headers.
1259        header_names = frozenset(k.lower() for k in headers)
1260        skips = {}
1261        if 'host' in header_names:
1262            skips['skip_host'] = 1
1263        if 'accept-encoding' in header_names:
1264            skips['skip_accept_encoding'] = 1
1265
1266        self.putrequest(method, url, **skips)
1267
1268        # chunked encoding will happen if HTTP/1.1 is used and either
1269        # the caller passes encode_chunked=True or the following
1270        # conditions hold:
1271        # 1. content-length has not been explicitly set
1272        # 2. the body is a file or iterable, but not a str or bytes-like
1273        # 3. Transfer-Encoding has NOT been explicitly set by the caller
1274
1275        if 'content-length' not in header_names:
1276            # only chunk body if not explicitly set for backwards
1277            # compatibility, assuming the client code is already handling the
1278            # chunking
1279            if 'transfer-encoding' not in header_names:
1280                # if content-length cannot be automatically determined, fall
1281                # back to chunked encoding
1282                encode_chunked = False
1283                content_length = self._get_content_length(body, method)
1284                if content_length is None:
1285                    if body is not None:
1286                        if self.debuglevel > 0:
1287                            print('Unable to determine size of %r' % body)
1288                        encode_chunked = True
1289                        self.putheader('Transfer-Encoding', 'chunked')
1290                else:
1291                    self.putheader('Content-Length', str(content_length))
1292        else:
1293            encode_chunked = False
1294
1295        for hdr, value in headers.items():
1296            self.putheader(hdr, value)
1297        if isinstance(body, str):
1298            # RFC 2616 Section 3.7.1 says that text default has a
1299            # default charset of iso-8859-1.
1300            body = _encode(body, 'body')
1301        self.endheaders(body, encode_chunked=encode_chunked)
1302
1303    def getresponse(self):
1304        """Get the response from the server.
1305
1306        If the HTTPConnection is in the correct state, returns an
1307        instance of HTTPResponse or of whatever object is returned by
1308        the response_class variable.
1309
1310        If a request has not been sent or if a previous response has
1311        not be handled, ResponseNotReady is raised.  If the HTTP
1312        response indicates that the connection should be closed, then
1313        it will be closed before the response is returned.  When the
1314        connection is closed, the underlying socket is closed.
1315        """
1316
1317        # if a prior response has been completed, then forget about it.
1318        if self.__response and self.__response.isclosed():
1319            self.__response = None
1320
1321        # if a prior response exists, then it must be completed (otherwise, we
1322        # cannot read this response's header to determine the connection-close
1323        # behavior)
1324        #
1325        # note: if a prior response existed, but was connection-close, then the
1326        # socket and response were made independent of this HTTPConnection
1327        # object since a new request requires that we open a whole new
1328        # connection
1329        #
1330        # this means the prior response had one of two states:
1331        #   1) will_close: this connection was reset and the prior socket and
1332        #                  response operate independently
1333        #   2) persistent: the response was retained and we await its
1334        #                  isclosed() status to become true.
1335        #
1336        if self.__state != _CS_REQ_SENT or self.__response:
1337            raise ResponseNotReady(self.__state)
1338
1339        if self.debuglevel > 0:
1340            response = self.response_class(self.sock, self.debuglevel,
1341                                           method=self._method)
1342        else:
1343            response = self.response_class(self.sock, method=self._method)
1344
1345        try:
1346            try:
1347                response.begin()
1348            except ConnectionError:
1349                self.close()
1350                raise
1351            assert response.will_close != _UNKNOWN
1352            self.__state = _CS_IDLE
1353
1354            if response.will_close:
1355                # this effectively passes the connection to the response
1356                self.close()
1357            else:
1358                # remember this, so we can tell when it is complete
1359                self.__response = response
1360
1361            return response
1362        except:
1363            response.close()
1364            raise
1365
1366try:
1367    import ssl
1368except ImportError:
1369    pass
1370else:
1371    class HTTPSConnection(HTTPConnection):
1372        "This class allows communication via SSL."
1373
1374        default_port = HTTPS_PORT
1375
1376        # XXX Should key_file and cert_file be deprecated in favour of context?
1377
1378        def __init__(self, host, port=None, key_file=None, cert_file=None,
1379                     timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
1380                     source_address=None, *, context=None,
1381                     check_hostname=None, blocksize=8192):
1382            super(HTTPSConnection, self).__init__(host, port, timeout,
1383                                                  source_address,
1384                                                  blocksize=blocksize)
1385            if (key_file is not None or cert_file is not None or
1386                        check_hostname is not None):
1387                import warnings
1388                warnings.warn("key_file, cert_file and check_hostname are "
1389                              "deprecated, use a custom context instead.",
1390                              DeprecationWarning, 2)
1391            self.key_file = key_file
1392            self.cert_file = cert_file
1393            if context is None:
1394                context = ssl._create_default_https_context()
1395                # enable PHA for TLS 1.3 connections if available
1396                if context.post_handshake_auth is not None:
1397                    context.post_handshake_auth = True
1398            will_verify = context.verify_mode != ssl.CERT_NONE
1399            if check_hostname is None:
1400                check_hostname = context.check_hostname
1401            if check_hostname and not will_verify:
1402                raise ValueError("check_hostname needs a SSL context with "
1403                                 "either CERT_OPTIONAL or CERT_REQUIRED")
1404            if key_file or cert_file:
1405                context.load_cert_chain(cert_file, key_file)
1406                # cert and key file means the user wants to authenticate.
1407                # enable TLS 1.3 PHA implicitly even for custom contexts.
1408                if context.post_handshake_auth is not None:
1409                    context.post_handshake_auth = True
1410            self._context = context
1411            if check_hostname is not None:
1412                self._context.check_hostname = check_hostname
1413
1414        def connect(self):
1415            "Connect to a host on a given (SSL) port."
1416
1417            super().connect()
1418
1419            if self._tunnel_host:
1420                server_hostname = self._tunnel_host
1421            else:
1422                server_hostname = self.host
1423
1424            self.sock = self._context.wrap_socket(self.sock,
1425                                                  server_hostname=server_hostname)
1426
1427    __all__.append("HTTPSConnection")
1428
1429class HTTPException(Exception):
1430    # Subclasses that define an __init__ must call Exception.__init__
1431    # or define self.args.  Otherwise, str() will fail.
1432    pass
1433
1434class NotConnected(HTTPException):
1435    pass
1436
1437class InvalidURL(HTTPException):
1438    pass
1439
1440class UnknownProtocol(HTTPException):
1441    def __init__(self, version):
1442        self.args = version,
1443        self.version = version
1444
1445class UnknownTransferEncoding(HTTPException):
1446    pass
1447
1448class UnimplementedFileMode(HTTPException):
1449    pass
1450
1451class IncompleteRead(HTTPException):
1452    def __init__(self, partial, expected=None):
1453        self.args = partial,
1454        self.partial = partial
1455        self.expected = expected
1456    def __repr__(self):
1457        if self.expected is not None:
1458            e = ', %i more expected' % self.expected
1459        else:
1460            e = ''
1461        return '%s(%i bytes read%s)' % (self.__class__.__name__,
1462                                        len(self.partial), e)
1463    __str__ = object.__str__
1464
1465class ImproperConnectionState(HTTPException):
1466    pass
1467
1468class CannotSendRequest(ImproperConnectionState):
1469    pass
1470
1471class CannotSendHeader(ImproperConnectionState):
1472    pass
1473
1474class ResponseNotReady(ImproperConnectionState):
1475    pass
1476
1477class BadStatusLine(HTTPException):
1478    def __init__(self, line):
1479        if not line:
1480            line = repr(line)
1481        self.args = line,
1482        self.line = line
1483
1484class LineTooLong(HTTPException):
1485    def __init__(self, line_type):
1486        HTTPException.__init__(self, "got more than %d bytes when reading %s"
1487                                     % (_MAXLINE, line_type))
1488
1489class RemoteDisconnected(ConnectionResetError, BadStatusLine):
1490    def __init__(self, *pos, **kw):
1491        BadStatusLine.__init__(self, "")
1492        ConnectionResetError.__init__(self, *pos, **kw)
1493
1494# for backwards compatibility
1495error = HTTPException
1496