• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1r"""HTTP/1.1 client library
2
3<intro stuff goes here>
4<other stuff, too>
5
6HTTPConnection goes through a number of "states", which define when a client
7may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
9
10    (null)
11      |
12      | HTTPConnection()
13      v
14    Idle
15      |
16      | putrequest()
17      v
18    Request-started
19      |
20      | ( putheader() )*  endheaders()
21      v
22    Request-sent
23      |
24      | response = getresponse()
25      v
26    Unread-response   [Response-headers-read]
27      |\____________________
28      |                     |
29      | response.read()     | putrequest()
30      v                     v
31    Idle                  Req-started-unread-response
32                     ______/|
33                   /        |
34   response.read() |        | ( putheader() )*  endheaders()
35                   v        v
36       Request-started    Req-sent-unread-response
37                            |
38                            | response.read()
39                            v
40                          Request-sent
41
42This diagram presents the following rules:
43  -- a second request may not be started until {response-headers-read}
44  -- a response [object] cannot be retrieved until {request-sent}
45  -- there is no differentiation between an unread response body and a
46     partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49      HTTPResponse class does not enforce this state machine, which
50      implies sophisticated clients may accelerate the request/response
51      pipeline. Caution should be taken, though: accelerating the states
52      beyond the above pattern may imply knowledge of the server's
53      connection-close behavior for certain requests. For example, it
54      is impossible to tell whether the server will close the connection
55      UNTIL the response headers have been read; this means that further
56      requests cannot be placed into the pipeline until it is known that
57      the server will NOT be closing the connection.
58
59Logical State                  __state            __response
60-------------                  -------            ----------
61Idle                           _CS_IDLE           None
62Request-started                _CS_REQ_STARTED    None
63Request-sent                   _CS_REQ_SENT       None
64Unread-response                _CS_IDLE           <response_class>
65Req-started-unread-response    _CS_REQ_STARTED    <response_class>
66Req-sent-unread-response       _CS_REQ_SENT       <response_class>
67"""
68
69from array import array
70import os
71import re
72import socket
73from sys import py3kwarning
74from urlparse import urlsplit
75import warnings
76with warnings.catch_warnings():
77    if py3kwarning:
78        warnings.filterwarnings("ignore", ".*mimetools has been removed",
79                                DeprecationWarning)
80    import mimetools
81
82try:
83    from cStringIO import StringIO
84except ImportError:
85    from StringIO import StringIO
86
87__all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
88           "HTTPException", "NotConnected", "UnknownProtocol",
89           "UnknownTransferEncoding", "UnimplementedFileMode",
90           "IncompleteRead", "InvalidURL", "ImproperConnectionState",
91           "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
92           "BadStatusLine", "error", "responses"]
93
94HTTP_PORT = 80
95HTTPS_PORT = 443
96
97_UNKNOWN = 'UNKNOWN'
98
99# connection states
100_CS_IDLE = 'Idle'
101_CS_REQ_STARTED = 'Request-started'
102_CS_REQ_SENT = 'Request-sent'
103
104# status codes
105# informational
106CONTINUE = 100
107SWITCHING_PROTOCOLS = 101
108PROCESSING = 102
109
110# successful
111OK = 200
112CREATED = 201
113ACCEPTED = 202
114NON_AUTHORITATIVE_INFORMATION = 203
115NO_CONTENT = 204
116RESET_CONTENT = 205
117PARTIAL_CONTENT = 206
118MULTI_STATUS = 207
119IM_USED = 226
120
121# redirection
122MULTIPLE_CHOICES = 300
123MOVED_PERMANENTLY = 301
124FOUND = 302
125SEE_OTHER = 303
126NOT_MODIFIED = 304
127USE_PROXY = 305
128TEMPORARY_REDIRECT = 307
129
130# client error
131BAD_REQUEST = 400
132UNAUTHORIZED = 401
133PAYMENT_REQUIRED = 402
134FORBIDDEN = 403
135NOT_FOUND = 404
136METHOD_NOT_ALLOWED = 405
137NOT_ACCEPTABLE = 406
138PROXY_AUTHENTICATION_REQUIRED = 407
139REQUEST_TIMEOUT = 408
140CONFLICT = 409
141GONE = 410
142LENGTH_REQUIRED = 411
143PRECONDITION_FAILED = 412
144REQUEST_ENTITY_TOO_LARGE = 413
145REQUEST_URI_TOO_LONG = 414
146UNSUPPORTED_MEDIA_TYPE = 415
147REQUESTED_RANGE_NOT_SATISFIABLE = 416
148EXPECTATION_FAILED = 417
149UNPROCESSABLE_ENTITY = 422
150LOCKED = 423
151FAILED_DEPENDENCY = 424
152UPGRADE_REQUIRED = 426
153
154# server error
155INTERNAL_SERVER_ERROR = 500
156NOT_IMPLEMENTED = 501
157BAD_GATEWAY = 502
158SERVICE_UNAVAILABLE = 503
159GATEWAY_TIMEOUT = 504
160HTTP_VERSION_NOT_SUPPORTED = 505
161INSUFFICIENT_STORAGE = 507
162NOT_EXTENDED = 510
163
164# Mapping status codes to official W3C names
165responses = {
166    100: 'Continue',
167    101: 'Switching Protocols',
168
169    200: 'OK',
170    201: 'Created',
171    202: 'Accepted',
172    203: 'Non-Authoritative Information',
173    204: 'No Content',
174    205: 'Reset Content',
175    206: 'Partial Content',
176
177    300: 'Multiple Choices',
178    301: 'Moved Permanently',
179    302: 'Found',
180    303: 'See Other',
181    304: 'Not Modified',
182    305: 'Use Proxy',
183    306: '(Unused)',
184    307: 'Temporary Redirect',
185
186    400: 'Bad Request',
187    401: 'Unauthorized',
188    402: 'Payment Required',
189    403: 'Forbidden',
190    404: 'Not Found',
191    405: 'Method Not Allowed',
192    406: 'Not Acceptable',
193    407: 'Proxy Authentication Required',
194    408: 'Request Timeout',
195    409: 'Conflict',
196    410: 'Gone',
197    411: 'Length Required',
198    412: 'Precondition Failed',
199    413: 'Request Entity Too Large',
200    414: 'Request-URI Too Long',
201    415: 'Unsupported Media Type',
202    416: 'Requested Range Not Satisfiable',
203    417: 'Expectation Failed',
204
205    500: 'Internal Server Error',
206    501: 'Not Implemented',
207    502: 'Bad Gateway',
208    503: 'Service Unavailable',
209    504: 'Gateway Timeout',
210    505: 'HTTP Version Not Supported',
211}
212
213# maximal amount of data to read at one time in _safe_read
214MAXAMOUNT = 1048576
215
216# maximal line length when calling readline().
217_MAXLINE = 65536
218
219# maximum amount of headers accepted
220_MAXHEADERS = 100
221
222# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
223#
224# VCHAR          = %x21-7E
225# obs-text       = %x80-FF
226# header-field   = field-name ":" OWS field-value OWS
227# field-name     = token
228# field-value    = *( field-content / obs-fold )
229# field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
230# field-vchar    = VCHAR / obs-text
231#
232# obs-fold       = CRLF 1*( SP / HTAB )
233#                ; obsolete line folding
234#                ; see Section 3.2.4
235
236# token          = 1*tchar
237#
238# tchar          = "!" / "#" / "$" / "%" / "&" / "'" / "*"
239#                / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
240#                / DIGIT / ALPHA
241#                ; any VCHAR, except delimiters
242#
243# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
244
245# the patterns for both name and value are more lenient than RFC
246# definitions to allow for backwards compatibility
247_is_legal_header_name = re.compile(r'\A[^:\s][^:\r\n]*\Z').match
248_is_illegal_header_value = re.compile(r'\n(?![ \t])|\r(?![ \t\n])').search
249
250# We always set the Content-Length header for these methods because some
251# servers will otherwise respond with a 411
252_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
253
254
255class HTTPMessage(mimetools.Message):
256
257    def addheader(self, key, value):
258        """Add header for field key handling repeats."""
259        prev = self.dict.get(key)
260        if prev is None:
261            self.dict[key] = value
262        else:
263            combined = ", ".join((prev, value))
264            self.dict[key] = combined
265
266    def addcontinue(self, key, more):
267        """Add more field data from a continuation line."""
268        prev = self.dict[key]
269        self.dict[key] = prev + "\n " + more
270
271    def readheaders(self):
272        """Read header lines.
273
274        Read header lines up to the entirely blank line that terminates them.
275        The (normally blank) line that ends the headers is skipped, but not
276        included in the returned list.  If an invalid line is found in the
277        header section, it is skipped, and further lines are processed.
278
279        The variable self.status is set to the empty string if all went well,
280        otherwise it is an error message.  The variable self.headers is a
281        completely uninterpreted list of lines contained in the header (so
282        printing them will reproduce the header exactly as it appears in the
283        file).
284
285        If multiple header fields with the same name occur, they are combined
286        according to the rules in RFC 2616 sec 4.2:
287
288        Appending each subsequent field-value to the first, each separated
289        by a comma. The order in which header fields with the same field-name
290        are received is significant to the interpretation of the combined
291        field value.
292        """
293        # XXX The implementation overrides the readheaders() method of
294        # rfc822.Message.  The base class design isn't amenable to
295        # customized behavior here so the method here is a copy of the
296        # base class code with a few small changes.
297
298        self.dict = {}
299        self.unixfrom = ''
300        self.headers = hlist = []
301        self.status = ''
302        headerseen = ""
303        firstline = 1
304        tell = None
305        if not hasattr(self.fp, 'unread') and self.seekable:
306            tell = self.fp.tell
307        while True:
308            if len(hlist) > _MAXHEADERS:
309                raise HTTPException("got more than %d headers" % _MAXHEADERS)
310            if tell:
311                try:
312                    tell()
313                except IOError:
314                    tell = None
315                    self.seekable = 0
316            line = self.fp.readline(_MAXLINE + 1)
317            if len(line) > _MAXLINE:
318                raise LineTooLong("header line")
319            if not line:
320                self.status = 'EOF in headers'
321                break
322            # Skip unix From name time lines
323            if firstline and line.startswith('From '):
324                self.unixfrom = self.unixfrom + line
325                continue
326            firstline = 0
327            if headerseen and line[0] in ' \t':
328                # XXX Not sure if continuation lines are handled properly
329                # for http and/or for repeating headers
330                # It's a continuation line.
331                hlist.append(line)
332                self.addcontinue(headerseen, line.strip())
333                continue
334            elif self.iscomment(line):
335                # It's a comment.  Ignore it.
336                continue
337            elif self.islast(line):
338                # Note! No pushback here!  The delimiter line gets eaten.
339                break
340            headerseen = self.isheader(line)
341            if headerseen:
342                # It's a legal header line, save it.
343                hlist.append(line)
344                self.addheader(headerseen, line[len(headerseen)+1:].strip())
345            elif headerseen is not None:
346                # An empty header name. These aren't allowed in HTTP, but it's
347                # probably a benign mistake. Don't add the header, just keep
348                # going.
349                pass
350            else:
351                # It's not a header line; skip it and try the next line.
352                self.status = 'Non-header line where header expected'
353
354class HTTPResponse:
355
356    # strict: If true, raise BadStatusLine if the status line can't be
357    # parsed as a valid HTTP/1.0 or 1.1 status line.  By default it is
358    # false because it prevents clients from talking to HTTP/0.9
359    # servers.  Note that a response with a sufficiently corrupted
360    # status line will look like an HTTP/0.9 response.
361
362    # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
363
364    def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
365        if buffering:
366            # The caller won't be using any sock.recv() calls, so buffering
367            # is fine and recommended for performance.
368            self.fp = sock.makefile('rb')
369        else:
370            # The buffer size is specified as zero, because the headers of
371            # the response are read with readline().  If the reads were
372            # buffered the readline() calls could consume some of the
373            # response, which make be read via a recv() on the underlying
374            # socket.
375            self.fp = sock.makefile('rb', 0)
376        self.debuglevel = debuglevel
377        self.strict = strict
378        self._method = method
379
380        self.msg = None
381
382        # from the Status-Line of the response
383        self.version = _UNKNOWN # HTTP-Version
384        self.status = _UNKNOWN  # Status-Code
385        self.reason = _UNKNOWN  # Reason-Phrase
386
387        self.chunked = _UNKNOWN         # is "chunked" being used?
388        self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
389        self.length = _UNKNOWN          # number of bytes left in response
390        self.will_close = _UNKNOWN      # conn will close at end of response
391
392    def _read_status(self):
393        # Initialize with Simple-Response defaults
394        line = self.fp.readline(_MAXLINE + 1)
395        if len(line) > _MAXLINE:
396            raise LineTooLong("header line")
397        if self.debuglevel > 0:
398            print "reply:", repr(line)
399        if not line:
400            # Presumably, the server closed the connection before
401            # sending a valid response.
402            raise BadStatusLine(line)
403        try:
404            [version, status, reason] = line.split(None, 2)
405        except ValueError:
406            try:
407                [version, status] = line.split(None, 1)
408                reason = ""
409            except ValueError:
410                # empty version will cause next test to fail and status
411                # will be treated as 0.9 response.
412                version = ""
413        if not version.startswith('HTTP/'):
414            if self.strict:
415                self.close()
416                raise BadStatusLine(line)
417            else:
418                # assume it's a Simple-Response from an 0.9 server
419                self.fp = LineAndFileWrapper(line, self.fp)
420                return "HTTP/0.9", 200, ""
421
422        # The status code is a three-digit number
423        try:
424            status = int(status)
425            if status < 100 or status > 999:
426                raise BadStatusLine(line)
427        except ValueError:
428            raise BadStatusLine(line)
429        return version, status, reason
430
431    def begin(self):
432        if self.msg is not None:
433            # we've already started reading the response
434            return
435
436        # read until we get a non-100 response
437        while True:
438            version, status, reason = self._read_status()
439            if status != CONTINUE:
440                break
441            # skip the header from the 100 response
442            while True:
443                skip = self.fp.readline(_MAXLINE + 1)
444                if len(skip) > _MAXLINE:
445                    raise LineTooLong("header line")
446                skip = skip.strip()
447                if not skip:
448                    break
449                if self.debuglevel > 0:
450                    print "header:", skip
451
452        self.status = status
453        self.reason = reason.strip()
454        if version == 'HTTP/1.0':
455            self.version = 10
456        elif version.startswith('HTTP/1.'):
457            self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
458        elif version == 'HTTP/0.9':
459            self.version = 9
460        else:
461            raise UnknownProtocol(version)
462
463        if self.version == 9:
464            self.length = None
465            self.chunked = 0
466            self.will_close = 1
467            self.msg = HTTPMessage(StringIO())
468            return
469
470        self.msg = HTTPMessage(self.fp, 0)
471        if self.debuglevel > 0:
472            for hdr in self.msg.headers:
473                print "header:", hdr,
474
475        # don't let the msg keep an fp
476        self.msg.fp = None
477
478        # are we using the chunked-style of transfer encoding?
479        tr_enc = self.msg.getheader('transfer-encoding')
480        if tr_enc and tr_enc.lower() == "chunked":
481            self.chunked = 1
482            self.chunk_left = None
483        else:
484            self.chunked = 0
485
486        # will the connection close at the end of the response?
487        self.will_close = self._check_close()
488
489        # do we have a Content-Length?
490        # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
491        length = self.msg.getheader('content-length')
492        if length and not self.chunked:
493            try:
494                self.length = int(length)
495            except ValueError:
496                self.length = None
497            else:
498                if self.length < 0:  # ignore nonsensical negative lengths
499                    self.length = None
500        else:
501            self.length = None
502
503        # does the body have a fixed length? (of zero)
504        if (status == NO_CONTENT or status == NOT_MODIFIED or
505            100 <= status < 200 or      # 1xx codes
506            self._method == 'HEAD'):
507            self.length = 0
508
509        # if the connection remains open, and we aren't using chunked, and
510        # a content-length was not provided, then assume that the connection
511        # WILL close.
512        if not self.will_close and \
513           not self.chunked and \
514           self.length is None:
515            self.will_close = 1
516
517    def _check_close(self):
518        conn = self.msg.getheader('connection')
519        if self.version == 11:
520            # An HTTP/1.1 proxy is assumed to stay open unless
521            # explicitly closed.
522            conn = self.msg.getheader('connection')
523            if conn and "close" in conn.lower():
524                return True
525            return False
526
527        # Some HTTP/1.0 implementations have support for persistent
528        # connections, using rules different than HTTP/1.1.
529
530        # For older HTTP, Keep-Alive indicates persistent connection.
531        if self.msg.getheader('keep-alive'):
532            return False
533
534        # At least Akamai returns a "Connection: Keep-Alive" header,
535        # which was supposed to be sent by the client.
536        if conn and "keep-alive" in conn.lower():
537            return False
538
539        # Proxy-Connection is a netscape hack.
540        pconn = self.msg.getheader('proxy-connection')
541        if pconn and "keep-alive" in pconn.lower():
542            return False
543
544        # otherwise, assume it will close
545        return True
546
547    def close(self):
548        fp = self.fp
549        if fp:
550            self.fp = None
551            fp.close()
552
553    def isclosed(self):
554        # NOTE: it is possible that we will not ever call self.close(). This
555        #       case occurs when will_close is TRUE, length is None, and we
556        #       read up to the last byte, but NOT past it.
557        #
558        # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
559        #          called, meaning self.isclosed() is meaningful.
560        return self.fp is None
561
562    # XXX It would be nice to have readline and __iter__ for this, too.
563
564    def read(self, amt=None):
565        if self.fp is None:
566            return ''
567
568        if self._method == 'HEAD':
569            self.close()
570            return ''
571
572        if self.chunked:
573            return self._read_chunked(amt)
574
575        if amt is None:
576            # unbounded read
577            if self.length is None:
578                s = self.fp.read()
579            else:
580                try:
581                    s = self._safe_read(self.length)
582                except IncompleteRead:
583                    self.close()
584                    raise
585                self.length = 0
586            self.close()        # we read everything
587            return s
588
589        if self.length is not None:
590            if amt > self.length:
591                # clip the read to the "end of response"
592                amt = self.length
593
594        # we do not use _safe_read() here because this may be a .will_close
595        # connection, and the user is reading more bytes than will be provided
596        # (for example, reading in 1k chunks)
597        s = self.fp.read(amt)
598        if not s and amt:
599            # Ideally, we would raise IncompleteRead if the content-length
600            # wasn't satisfied, but it might break compatibility.
601            self.close()
602        if self.length is not None:
603            self.length -= len(s)
604            if not self.length:
605                self.close()
606
607        return s
608
609    def _read_chunked(self, amt):
610        assert self.chunked != _UNKNOWN
611        chunk_left = self.chunk_left
612        value = []
613        while True:
614            if chunk_left is None:
615                line = self.fp.readline(_MAXLINE + 1)
616                if len(line) > _MAXLINE:
617                    raise LineTooLong("chunk size")
618                i = line.find(';')
619                if i >= 0:
620                    line = line[:i] # strip chunk-extensions
621                try:
622                    chunk_left = int(line, 16)
623                except ValueError:
624                    # close the connection as protocol synchronisation is
625                    # probably lost
626                    self.close()
627                    raise IncompleteRead(''.join(value))
628                if chunk_left == 0:
629                    break
630            if amt is None:
631                value.append(self._safe_read(chunk_left))
632            elif amt < chunk_left:
633                value.append(self._safe_read(amt))
634                self.chunk_left = chunk_left - amt
635                return ''.join(value)
636            elif amt == chunk_left:
637                value.append(self._safe_read(amt))
638                self._safe_read(2)  # toss the CRLF at the end of the chunk
639                self.chunk_left = None
640                return ''.join(value)
641            else:
642                value.append(self._safe_read(chunk_left))
643                amt -= chunk_left
644
645            # we read the whole chunk, get another
646            self._safe_read(2)      # toss the CRLF at the end of the chunk
647            chunk_left = None
648
649        # read and discard trailer up to the CRLF terminator
650        ### note: we shouldn't have any trailers!
651        while True:
652            line = self.fp.readline(_MAXLINE + 1)
653            if len(line) > _MAXLINE:
654                raise LineTooLong("trailer line")
655            if not line:
656                # a vanishingly small number of sites EOF without
657                # sending the trailer
658                break
659            if line == '\r\n':
660                break
661
662        # we read everything; close the "file"
663        self.close()
664
665        return ''.join(value)
666
667    def _safe_read(self, amt):
668        """Read the number of bytes requested, compensating for partial reads.
669
670        Normally, we have a blocking socket, but a read() can be interrupted
671        by a signal (resulting in a partial read).
672
673        Note that we cannot distinguish between EOF and an interrupt when zero
674        bytes have been read. IncompleteRead() will be raised in this
675        situation.
676
677        This function should be used when <amt> bytes "should" be present for
678        reading. If the bytes are truly not available (due to EOF), then the
679        IncompleteRead exception can be used to detect the problem.
680        """
681        # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never
682        # return less than x bytes unless EOF is encountered.  It now handles
683        # signal interruptions (socket.error EINTR) internally.  This code
684        # never caught that exception anyways.  It seems largely pointless.
685        # self.fp.read(amt) will work fine.
686        s = []
687        while amt > 0:
688            chunk = self.fp.read(min(amt, MAXAMOUNT))
689            if not chunk:
690                raise IncompleteRead(''.join(s), amt)
691            s.append(chunk)
692            amt -= len(chunk)
693        return ''.join(s)
694
695    def fileno(self):
696        return self.fp.fileno()
697
698    def getheader(self, name, default=None):
699        if self.msg is None:
700            raise ResponseNotReady()
701        return self.msg.getheader(name, default)
702
703    def getheaders(self):
704        """Return list of (header, value) tuples."""
705        if self.msg is None:
706            raise ResponseNotReady()
707        return self.msg.items()
708
709
710class HTTPConnection:
711
712    _http_vsn = 11
713    _http_vsn_str = 'HTTP/1.1'
714
715    response_class = HTTPResponse
716    default_port = HTTP_PORT
717    auto_open = 1
718    debuglevel = 0
719    strict = 0
720
721    def __init__(self, host, port=None, strict=None,
722                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
723        self.timeout = timeout
724        self.source_address = source_address
725        self.sock = None
726        self._buffer = []
727        self.__response = None
728        self.__state = _CS_IDLE
729        self._method = None
730        self._tunnel_host = None
731        self._tunnel_port = None
732        self._tunnel_headers = {}
733        if strict is not None:
734            self.strict = strict
735
736        (self.host, self.port) = self._get_hostport(host, port)
737
738        # This is stored as an instance variable to allow unittests
739        # to replace with a suitable mock
740        self._create_connection = socket.create_connection
741
742    def set_tunnel(self, host, port=None, headers=None):
743        """ Set up host and port for HTTP CONNECT tunnelling.
744
745        In a connection that uses HTTP Connect tunneling, the host passed to the
746        constructor is used as proxy server that relays all communication to the
747        endpoint passed to set_tunnel. This is done by sending a HTTP CONNECT
748        request to the proxy server when the connection is established.
749
750        This method must be called before the HTTP connection has been
751        established.
752
753        The headers argument should be a mapping of extra HTTP headers
754        to send with the CONNECT request.
755        """
756        # Verify if this is required.
757        if self.sock:
758            raise RuntimeError("Can't setup tunnel for established connection.")
759
760        self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)
761        if headers:
762            self._tunnel_headers = headers
763        else:
764            self._tunnel_headers.clear()
765
766    def _get_hostport(self, host, port):
767        if port is None:
768            i = host.rfind(':')
769            j = host.rfind(']')         # ipv6 addresses have [...]
770            if i > j:
771                try:
772                    port = int(host[i+1:])
773                except ValueError:
774                    if host[i+1:] == "":  # http://foo.com:/ == http://foo.com/
775                        port = self.default_port
776                    else:
777                        raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
778                host = host[:i]
779            else:
780                port = self.default_port
781            if host and host[0] == '[' and host[-1] == ']':
782                host = host[1:-1]
783        return (host, port)
784
785    def set_debuglevel(self, level):
786        self.debuglevel = level
787
788    def _tunnel(self):
789        self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host,
790            self._tunnel_port))
791        for header, value in self._tunnel_headers.iteritems():
792            self.send("%s: %s\r\n" % (header, value))
793        self.send("\r\n")
794        response = self.response_class(self.sock, strict = self.strict,
795                                       method = self._method)
796        (version, code, message) = response._read_status()
797
798        if version == "HTTP/0.9":
799            # HTTP/0.9 doesn't support the CONNECT verb, so if httplib has
800            # concluded HTTP/0.9 is being used something has gone wrong.
801            self.close()
802            raise socket.error("Invalid response from tunnel request")
803        if code != 200:
804            self.close()
805            raise socket.error("Tunnel connection failed: %d %s" % (code,
806                                                                    message.strip()))
807        while True:
808            line = response.fp.readline(_MAXLINE + 1)
809            if len(line) > _MAXLINE:
810                raise LineTooLong("header line")
811            if not line:
812                # for sites which EOF without sending trailer
813                break
814            if line == '\r\n':
815                break
816
817
818    def connect(self):
819        """Connect to the host and port specified in __init__."""
820        self.sock = self._create_connection((self.host,self.port),
821                                           self.timeout, self.source_address)
822
823        if self._tunnel_host:
824            self._tunnel()
825
826    def close(self):
827        """Close the connection to the HTTP server."""
828        self.__state = _CS_IDLE
829        try:
830            sock = self.sock
831            if sock:
832                self.sock = None
833                sock.close()   # close it manually... there may be other refs
834        finally:
835            response = self.__response
836            if response:
837                self.__response = None
838                response.close()
839
840    def send(self, data):
841        """Send `data' to the server."""
842        if self.sock is None:
843            if self.auto_open:
844                self.connect()
845            else:
846                raise NotConnected()
847
848        if self.debuglevel > 0:
849            print "send:", repr(data)
850        blocksize = 8192
851        if hasattr(data,'read') and not isinstance(data, array):
852            if self.debuglevel > 0: print "sendIng a read()able"
853            datablock = data.read(blocksize)
854            while datablock:
855                self.sock.sendall(datablock)
856                datablock = data.read(blocksize)
857        else:
858            self.sock.sendall(data)
859
860    def _output(self, s):
861        """Add a line of output to the current request buffer.
862
863        Assumes that the line does *not* end with \\r\\n.
864        """
865        self._buffer.append(s)
866
867    def _send_output(self, message_body=None):
868        """Send the currently buffered request and clear the buffer.
869
870        Appends an extra \\r\\n to the buffer.
871        A message_body may be specified, to be appended to the request.
872        """
873        self._buffer.extend(("", ""))
874        msg = "\r\n".join(self._buffer)
875        del self._buffer[:]
876        # If msg and message_body are sent in a single send() call,
877        # it will avoid performance problems caused by the interaction
878        # between delayed ack and the Nagle algorithm.
879        if isinstance(message_body, str):
880            msg += message_body
881            message_body = None
882        self.send(msg)
883        if message_body is not None:
884            #message_body was not a string (i.e. it is a file) and
885            #we must run the risk of Nagle
886            self.send(message_body)
887
888    def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
889        """Send a request to the server.
890
891        `method' specifies an HTTP request method, e.g. 'GET'.
892        `url' specifies the object being requested, e.g. '/index.html'.
893        `skip_host' if True does not add automatically a 'Host:' header
894        `skip_accept_encoding' if True does not add automatically an
895           'Accept-Encoding:' header
896        """
897
898        # if a prior response has been completed, then forget about it.
899        if self.__response and self.__response.isclosed():
900            self.__response = None
901
902
903        # in certain cases, we cannot issue another request on this connection.
904        # this occurs when:
905        #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
906        #   2) a response to a previous request has signalled that it is going
907        #      to close the connection upon completion.
908        #   3) the headers for the previous response have not been read, thus
909        #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
910        #
911        # if there is no prior response, then we can request at will.
912        #
913        # if point (2) is true, then we will have passed the socket to the
914        # response (effectively meaning, "there is no prior response"), and
915        # will open a new one when a new request is made.
916        #
917        # Note: if a prior response exists, then we *can* start a new request.
918        #       We are not allowed to begin fetching the response to this new
919        #       request, however, until that prior response is complete.
920        #
921        if self.__state == _CS_IDLE:
922            self.__state = _CS_REQ_STARTED
923        else:
924            raise CannotSendRequest()
925
926        # Save the method we use, we need it later in the response phase
927        self._method = method
928        if not url:
929            url = '/'
930        hdr = '%s %s %s' % (method, url, self._http_vsn_str)
931
932        self._output(hdr)
933
934        if self._http_vsn == 11:
935            # Issue some standard headers for better HTTP/1.1 compliance
936
937            if not skip_host:
938                # this header is issued *only* for HTTP/1.1
939                # connections. more specifically, this means it is
940                # only issued when the client uses the new
941                # HTTPConnection() class. backwards-compat clients
942                # will be using HTTP/1.0 and those clients may be
943                # issuing this header themselves. we should NOT issue
944                # it twice; some web servers (such as Apache) barf
945                # when they see two Host: headers
946
947                # If we need a non-standard port,include it in the
948                # header.  If the request is going through a proxy,
949                # but the host of the actual URL, not the host of the
950                # proxy.
951
952                netloc = ''
953                if url.startswith('http'):
954                    nil, netloc, nil, nil, nil = urlsplit(url)
955
956                if netloc:
957                    try:
958                        netloc_enc = netloc.encode("ascii")
959                    except UnicodeEncodeError:
960                        netloc_enc = netloc.encode("idna")
961                    self.putheader('Host', netloc_enc)
962                else:
963                    if self._tunnel_host:
964                        host = self._tunnel_host
965                        port = self._tunnel_port
966                    else:
967                        host = self.host
968                        port = self.port
969
970                    try:
971                        host_enc = host.encode("ascii")
972                    except UnicodeEncodeError:
973                        host_enc = host.encode("idna")
974                    # Wrap the IPv6 Host Header with [] (RFC 2732)
975                    if host_enc.find(':') >= 0:
976                        host_enc = "[" + host_enc + "]"
977                    if port == self.default_port:
978                        self.putheader('Host', host_enc)
979                    else:
980                        self.putheader('Host', "%s:%s" % (host_enc, port))
981
982            # note: we are assuming that clients will not attempt to set these
983            #       headers since *this* library must deal with the
984            #       consequences. this also means that when the supporting
985            #       libraries are updated to recognize other forms, then this
986            #       code should be changed (removed or updated).
987
988            # we only want a Content-Encoding of "identity" since we don't
989            # support encodings such as x-gzip or x-deflate.
990            if not skip_accept_encoding:
991                self.putheader('Accept-Encoding', 'identity')
992
993            # we can accept "chunked" Transfer-Encodings, but no others
994            # NOTE: no TE header implies *only* "chunked"
995            #self.putheader('TE', 'chunked')
996
997            # if TE is supplied in the header, then it must appear in a
998            # Connection header.
999            #self.putheader('Connection', 'TE')
1000
1001        else:
1002            # For HTTP/1.0, the server will assume "not chunked"
1003            pass
1004
1005    def putheader(self, header, *values):
1006        """Send a request header line to the server.
1007
1008        For example: h.putheader('Accept', 'text/html')
1009        """
1010        if self.__state != _CS_REQ_STARTED:
1011            raise CannotSendHeader()
1012
1013        header = '%s' % header
1014        if not _is_legal_header_name(header):
1015            raise ValueError('Invalid header name %r' % (header,))
1016
1017        values = [str(v) for v in values]
1018        for one_value in values:
1019            if _is_illegal_header_value(one_value):
1020                raise ValueError('Invalid header value %r' % (one_value,))
1021
1022        hdr = '%s: %s' % (header, '\r\n\t'.join(values))
1023        self._output(hdr)
1024
1025    def endheaders(self, message_body=None):
1026        """Indicate that the last header line has been sent to the server.
1027
1028        This method sends the request to the server.  The optional
1029        message_body argument can be used to pass a message body
1030        associated with the request.  The message body will be sent in
1031        the same packet as the message headers if it is string, otherwise it is
1032        sent as a separate packet.
1033        """
1034        if self.__state == _CS_REQ_STARTED:
1035            self.__state = _CS_REQ_SENT
1036        else:
1037            raise CannotSendHeader()
1038        self._send_output(message_body)
1039
1040    def request(self, method, url, body=None, headers={}):
1041        """Send a complete request to the server."""
1042        self._send_request(method, url, body, headers)
1043
1044    def _set_content_length(self, body, method):
1045        # Set the content-length based on the body. If the body is "empty", we
1046        # set Content-Length: 0 for methods that expect a body (RFC 7230,
1047        # Section 3.3.2). If the body is set for other methods, we set the
1048        # header provided we can figure out what the length is.
1049        thelen = None
1050        if body is None and method.upper() in _METHODS_EXPECTING_BODY:
1051            thelen = '0'
1052        elif body is not None:
1053            try:
1054                thelen = str(len(body))
1055            except (TypeError, AttributeError):
1056                # If this is a file-like object, try to
1057                # fstat its file descriptor
1058                try:
1059                    thelen = str(os.fstat(body.fileno()).st_size)
1060                except (AttributeError, OSError):
1061                    # Don't send a length if this failed
1062                    if self.debuglevel > 0: print "Cannot stat!!"
1063
1064        if thelen is not None:
1065            self.putheader('Content-Length', thelen)
1066
1067    def _send_request(self, method, url, body, headers):
1068        # Honor explicitly requested Host: and Accept-Encoding: headers.
1069        header_names = dict.fromkeys([k.lower() for k in headers])
1070        skips = {}
1071        if 'host' in header_names:
1072            skips['skip_host'] = 1
1073        if 'accept-encoding' in header_names:
1074            skips['skip_accept_encoding'] = 1
1075
1076        self.putrequest(method, url, **skips)
1077
1078        if 'content-length' not in header_names:
1079            self._set_content_length(body, method)
1080        for hdr, value in headers.iteritems():
1081            self.putheader(hdr, value)
1082        self.endheaders(body)
1083
1084    def getresponse(self, buffering=False):
1085        "Get the response from the server."
1086
1087        # if a prior response has been completed, then forget about it.
1088        if self.__response and self.__response.isclosed():
1089            self.__response = None
1090
1091        #
1092        # if a prior response exists, then it must be completed (otherwise, we
1093        # cannot read this response's header to determine the connection-close
1094        # behavior)
1095        #
1096        # note: if a prior response existed, but was connection-close, then the
1097        # socket and response were made independent of this HTTPConnection
1098        # object since a new request requires that we open a whole new
1099        # connection
1100        #
1101        # this means the prior response had one of two states:
1102        #   1) will_close: this connection was reset and the prior socket and
1103        #                  response operate independently
1104        #   2) persistent: the response was retained and we await its
1105        #                  isclosed() status to become true.
1106        #
1107        if self.__state != _CS_REQ_SENT or self.__response:
1108            raise ResponseNotReady()
1109
1110        args = (self.sock,)
1111        kwds = {"strict":self.strict, "method":self._method}
1112        if self.debuglevel > 0:
1113            args += (self.debuglevel,)
1114        if buffering:
1115            #only add this keyword if non-default, for compatibility with
1116            #other response_classes.
1117            kwds["buffering"] = True;
1118        response = self.response_class(*args, **kwds)
1119
1120        try:
1121            response.begin()
1122            assert response.will_close != _UNKNOWN
1123            self.__state = _CS_IDLE
1124
1125            if response.will_close:
1126                # this effectively passes the connection to the response
1127                self.close()
1128            else:
1129                # remember this, so we can tell when it is complete
1130                self.__response = response
1131
1132            return response
1133        except:
1134            response.close()
1135            raise
1136
1137
1138class HTTP:
1139    "Compatibility class with httplib.py from 1.5."
1140
1141    _http_vsn = 10
1142    _http_vsn_str = 'HTTP/1.0'
1143
1144    debuglevel = 0
1145
1146    _connection_class = HTTPConnection
1147
1148    def __init__(self, host='', port=None, strict=None):
1149        "Provide a default host, since the superclass requires one."
1150
1151        # some joker passed 0 explicitly, meaning default port
1152        if port == 0:
1153            port = None
1154
1155        # Note that we may pass an empty string as the host; this will raise
1156        # an error when we attempt to connect. Presumably, the client code
1157        # will call connect before then, with a proper host.
1158        self._setup(self._connection_class(host, port, strict))
1159
1160    def _setup(self, conn):
1161        self._conn = conn
1162
1163        # set up delegation to flesh out interface
1164        self.send = conn.send
1165        self.putrequest = conn.putrequest
1166        self.putheader = conn.putheader
1167        self.endheaders = conn.endheaders
1168        self.set_debuglevel = conn.set_debuglevel
1169
1170        conn._http_vsn = self._http_vsn
1171        conn._http_vsn_str = self._http_vsn_str
1172
1173        self.file = None
1174
1175    def connect(self, host=None, port=None):
1176        "Accept arguments to set the host/port, since the superclass doesn't."
1177
1178        if host is not None:
1179            (self._conn.host, self._conn.port) = self._conn._get_hostport(host, port)
1180        self._conn.connect()
1181
1182    def getfile(self):
1183        "Provide a getfile, since the superclass' does not use this concept."
1184        return self.file
1185
1186    def getreply(self, buffering=False):
1187        """Compat definition since superclass does not define it.
1188
1189        Returns a tuple consisting of:
1190        - server status code (e.g. '200' if all goes well)
1191        - server "reason" corresponding to status code
1192        - any RFC822 headers in the response from the server
1193        """
1194        try:
1195            if not buffering:
1196                response = self._conn.getresponse()
1197            else:
1198                #only add this keyword if non-default for compatibility
1199                #with other connection classes
1200                response = self._conn.getresponse(buffering)
1201        except BadStatusLine, e:
1202            ### hmm. if getresponse() ever closes the socket on a bad request,
1203            ### then we are going to have problems with self.sock
1204
1205            ### should we keep this behavior? do people use it?
1206            # keep the socket open (as a file), and return it
1207            self.file = self._conn.sock.makefile('rb', 0)
1208
1209            # close our socket -- we want to restart after any protocol error
1210            self.close()
1211
1212            self.headers = None
1213            return -1, e.line, None
1214
1215        self.headers = response.msg
1216        self.file = response.fp
1217        return response.status, response.reason, response.msg
1218
1219    def close(self):
1220        self._conn.close()
1221
1222        # note that self.file == response.fp, which gets closed by the
1223        # superclass. just clear the object ref here.
1224        ### hmm. messy. if status==-1, then self.file is owned by us.
1225        ### well... we aren't explicitly closing, but losing this ref will
1226        ### do it
1227        self.file = None
1228
1229try:
1230    import ssl
1231except ImportError:
1232    pass
1233else:
1234    class HTTPSConnection(HTTPConnection):
1235        "This class allows communication via SSL."
1236
1237        default_port = HTTPS_PORT
1238
1239        def __init__(self, host, port=None, key_file=None, cert_file=None,
1240                     strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
1241                     source_address=None, context=None):
1242            HTTPConnection.__init__(self, host, port, strict, timeout,
1243                                    source_address)
1244            self.key_file = key_file
1245            self.cert_file = cert_file
1246            if context is None:
1247                context = ssl._create_default_https_context()
1248            if key_file or cert_file:
1249                context.load_cert_chain(cert_file, key_file)
1250            self._context = context
1251
1252        def connect(self):
1253            "Connect to a host on a given (SSL) port."
1254
1255            HTTPConnection.connect(self)
1256
1257            if self._tunnel_host:
1258                server_hostname = self._tunnel_host
1259            else:
1260                server_hostname = self.host
1261
1262            self.sock = self._context.wrap_socket(self.sock,
1263                                                  server_hostname=server_hostname)
1264
1265    __all__.append("HTTPSConnection")
1266
1267    class HTTPS(HTTP):
1268        """Compatibility with 1.5 httplib interface
1269
1270        Python 1.5.2 did not have an HTTPS class, but it defined an
1271        interface for sending http requests that is also useful for
1272        https.
1273        """
1274
1275        _connection_class = HTTPSConnection
1276
1277        def __init__(self, host='', port=None, key_file=None, cert_file=None,
1278                     strict=None, context=None):
1279            # provide a default host, pass the X509 cert info
1280
1281            # urf. compensate for bad input.
1282            if port == 0:
1283                port = None
1284            self._setup(self._connection_class(host, port, key_file,
1285                                               cert_file, strict,
1286                                               context=context))
1287
1288            # we never actually use these for anything, but we keep them
1289            # here for compatibility with post-1.5.2 CVS.
1290            self.key_file = key_file
1291            self.cert_file = cert_file
1292
1293
1294    def FakeSocket (sock, sslobj):
1295        warnings.warn("FakeSocket is deprecated, and won't be in 3.x.  " +
1296                      "Use the result of ssl.wrap_socket() directly instead.",
1297                      DeprecationWarning, stacklevel=2)
1298        return sslobj
1299
1300
1301class HTTPException(Exception):
1302    # Subclasses that define an __init__ must call Exception.__init__
1303    # or define self.args.  Otherwise, str() will fail.
1304    pass
1305
1306class NotConnected(HTTPException):
1307    pass
1308
1309class InvalidURL(HTTPException):
1310    pass
1311
1312class UnknownProtocol(HTTPException):
1313    def __init__(self, version):
1314        self.args = version,
1315        self.version = version
1316
1317class UnknownTransferEncoding(HTTPException):
1318    pass
1319
1320class UnimplementedFileMode(HTTPException):
1321    pass
1322
1323class IncompleteRead(HTTPException):
1324    def __init__(self, partial, expected=None):
1325        self.args = partial,
1326        self.partial = partial
1327        self.expected = expected
1328    def __repr__(self):
1329        if self.expected is not None:
1330            e = ', %i more expected' % self.expected
1331        else:
1332            e = ''
1333        return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
1334    def __str__(self):
1335        return repr(self)
1336
1337class ImproperConnectionState(HTTPException):
1338    pass
1339
1340class CannotSendRequest(ImproperConnectionState):
1341    pass
1342
1343class CannotSendHeader(ImproperConnectionState):
1344    pass
1345
1346class ResponseNotReady(ImproperConnectionState):
1347    pass
1348
1349class BadStatusLine(HTTPException):
1350    def __init__(self, line):
1351        if not line:
1352            line = repr(line)
1353        self.args = line,
1354        self.line = line
1355
1356class LineTooLong(HTTPException):
1357    def __init__(self, line_type):
1358        HTTPException.__init__(self, "got more than %d bytes when reading %s"
1359                                     % (_MAXLINE, line_type))
1360
1361# for backwards compatibility
1362error = HTTPException
1363
1364class LineAndFileWrapper:
1365    """A limited file-like object for HTTP/0.9 responses."""
1366
1367    # The status-line parsing code calls readline(), which normally
1368    # get the HTTP status line.  For a 0.9 response, however, this is
1369    # actually the first line of the body!  Clients need to get a
1370    # readable file object that contains that line.
1371
1372    def __init__(self, line, file):
1373        self._line = line
1374        self._file = file
1375        self._line_consumed = 0
1376        self._line_offset = 0
1377        self._line_left = len(line)
1378
1379    def __getattr__(self, attr):
1380        return getattr(self._file, attr)
1381
1382    def _done(self):
1383        # called when the last byte is read from the line.  After the
1384        # call, all read methods are delegated to the underlying file
1385        # object.
1386        self._line_consumed = 1
1387        self.read = self._file.read
1388        self.readline = self._file.readline
1389        self.readlines = self._file.readlines
1390
1391    def read(self, amt=None):
1392        if self._line_consumed:
1393            return self._file.read(amt)
1394        assert self._line_left
1395        if amt is None or amt > self._line_left:
1396            s = self._line[self._line_offset:]
1397            self._done()
1398            if amt is None:
1399                return s + self._file.read()
1400            else:
1401                return s + self._file.read(amt - len(s))
1402        else:
1403            assert amt <= self._line_left
1404            i = self._line_offset
1405            j = i + amt
1406            s = self._line[i:j]
1407            self._line_offset = j
1408            self._line_left -= amt
1409            if self._line_left == 0:
1410                self._done()
1411            return s
1412
1413    def readline(self):
1414        if self._line_consumed:
1415            return self._file.readline()
1416        assert self._line_left
1417        s = self._line[self._line_offset:]
1418        self._done()
1419        return s
1420
1421    def readlines(self, size=None):
1422        if self._line_consumed:
1423            return self._file.readlines(size)
1424        assert self._line_left
1425        L = [self._line[self._line_offset:]]
1426        self._done()
1427        if size is None:
1428            return L + self._file.readlines()
1429        else:
1430            return L + self._file.readlines(size)
1431