• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""HTTP server base class.
2
3Note: the class in this module doesn't implement any HTTP request; see
4SimpleHTTPServer for simple implementations of GET, HEAD and POST
5(including CGI scripts).  It does, however, optionally implement HTTP/1.1
6persistent connections, as of version 0.3.
7
8Contents:
9
10- BaseHTTPRequestHandler: HTTP request handler base class
11- test: test function
12
13XXX To do:
14
15- log requests even later (to capture byte count)
16- log user-agent header and other interesting goodies
17- send error log to separate file
18"""
19
20
21# See also:
22#
23# HTTP Working Group                                        T. Berners-Lee
24# INTERNET-DRAFT                                            R. T. Fielding
25# <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
26# Expires September 8, 1995                                  March 8, 1995
27#
28# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
29#
30# and
31#
32# Network Working Group                                      R. Fielding
33# Request for Comments: 2616                                       et al
34# Obsoletes: 2068                                              June 1999
35# Category: Standards Track
36#
37# URL: http://www.faqs.org/rfcs/rfc2616.html
38
39# Log files
40# ---------
41#
42# Here's a quote from the NCSA httpd docs about log file format.
43#
44# | The logfile format is as follows. Each line consists of:
45# |
46# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
47# |
48# |        host: Either the DNS name or the IP number of the remote client
49# |        rfc931: Any information returned by identd for this person,
50# |                - otherwise.
51# |        authuser: If user sent a userid for authentication, the user name,
52# |                  - otherwise.
53# |        DD: Day
54# |        Mon: Month (calendar name)
55# |        YYYY: Year
56# |        hh: hour (24-hour format, the machine's timezone)
57# |        mm: minutes
58# |        ss: seconds
59# |        request: The first line of the HTTP request as sent by the client.
60# |        ddd: the status code returned by the server, - if not available.
61# |        bbbb: the total number of bytes sent,
62# |              *not including the HTTP/1.0 header*, - if not available
63# |
64# | You can determine the name of the file accessed through request.
65#
66# (Actually, the latter is only true if you know the server configuration
67# at the time the request was made!)
68
69__version__ = "0.3"
70
71__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
72
73import sys
74import time
75import socket # For gethostbyaddr()
76from warnings import filterwarnings, catch_warnings
77with catch_warnings():
78    if sys.py3kwarning:
79        filterwarnings("ignore", ".*mimetools has been removed",
80                        DeprecationWarning)
81    import mimetools
82import SocketServer
83
84# Default error message template
85DEFAULT_ERROR_MESSAGE = """\
86<head>
87<title>Error response</title>
88</head>
89<body>
90<h1>Error response</h1>
91<p>Error code %(code)d.
92<p>Message: %(message)s.
93<p>Error code explanation: %(code)s = %(explain)s.
94</body>
95"""
96
97DEFAULT_ERROR_CONTENT_TYPE = "text/html"
98
99def _quote_html(html):
100    return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
101
102class HTTPServer(SocketServer.TCPServer):
103
104    allow_reuse_address = 1    # Seems to make sense in testing environment
105
106    def server_bind(self):
107        """Override server_bind to store the server name."""
108        SocketServer.TCPServer.server_bind(self)
109        host, port = self.socket.getsockname()[:2]
110        self.server_name = socket.getfqdn(host)
111        self.server_port = port
112
113
114class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
115
116    """HTTP request handler base class.
117
118    The following explanation of HTTP serves to guide you through the
119    code as well as to expose any misunderstandings I may have about
120    HTTP (so you don't need to read the code to figure out I'm wrong
121    :-).
122
123    HTTP (HyperText Transfer Protocol) is an extensible protocol on
124    top of a reliable stream transport (e.g. TCP/IP).  The protocol
125    recognizes three parts to a request:
126
127    1. One line identifying the request type and path
128    2. An optional set of RFC-822-style headers
129    3. An optional data part
130
131    The headers and data are separated by a blank line.
132
133    The first line of the request has the form
134
135    <command> <path> <version>
136
137    where <command> is a (case-sensitive) keyword such as GET or POST,
138    <path> is a string containing path information for the request,
139    and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
140    <path> is encoded using the URL encoding scheme (using %xx to signify
141    the ASCII character with hex code xx).
142
143    The specification specifies that lines are separated by CRLF but
144    for compatibility with the widest range of clients recommends
145    servers also handle LF.  Similarly, whitespace in the request line
146    is treated sensibly (allowing multiple spaces between components
147    and allowing trailing whitespace).
148
149    Similarly, for output, lines ought to be separated by CRLF pairs
150    but most clients grok LF characters just fine.
151
152    If the first line of the request has the form
153
154    <command> <path>
155
156    (i.e. <version> is left out) then this is assumed to be an HTTP
157    0.9 request; this form has no optional headers and data part and
158    the reply consists of just the data.
159
160    The reply form of the HTTP 1.x protocol again has three parts:
161
162    1. One line giving the response code
163    2. An optional set of RFC-822-style headers
164    3. The data
165
166    Again, the headers and data are separated by a blank line.
167
168    The response code line has the form
169
170    <version> <responsecode> <responsestring>
171
172    where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
173    <responsecode> is a 3-digit response code indicating success or
174    failure of the request, and <responsestring> is an optional
175    human-readable string explaining what the response code means.
176
177    This server parses the request and the headers, and then calls a
178    function specific to the request type (<command>).  Specifically,
179    a request SPAM will be handled by a method do_SPAM().  If no
180    such method exists the server sends an error response to the
181    client.  If it exists, it is called with no arguments:
182
183    do_SPAM()
184
185    Note that the request name is case sensitive (i.e. SPAM and spam
186    are different requests).
187
188    The various request details are stored in instance variables:
189
190    - client_address is the client IP address in the form (host,
191    port);
192
193    - command, path and version are the broken-down request line;
194
195    - headers is an instance of mimetools.Message (or a derived
196    class) containing the header information;
197
198    - rfile is a file object open for reading positioned at the
199    start of the optional input data part;
200
201    - wfile is a file object open for writing.
202
203    IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
204
205    The first thing to be written must be the response line.  Then
206    follow 0 or more header lines, then a blank line, and then the
207    actual data (if any).  The meaning of the header lines depends on
208    the command executed by the server; in most cases, when data is
209    returned, there should be at least one header line of the form
210
211    Content-type: <type>/<subtype>
212
213    where <type> and <subtype> should be registered MIME types,
214    e.g. "text/html" or "text/plain".
215
216    """
217
218    # The Python system version, truncated to its first component.
219    sys_version = "Python/" + sys.version.split()[0]
220
221    # The server software version.  You may want to override this.
222    # The format is multiple whitespace-separated strings,
223    # where each string is of the form name[/version].
224    server_version = "BaseHTTP/" + __version__
225
226    # The default request version.  This only affects responses up until
227    # the point where the request line is parsed, so it mainly decides what
228    # the client gets back when sending a malformed request line.
229    # Most web servers default to HTTP 0.9, i.e. don't send a status line.
230    default_request_version = "HTTP/0.9"
231
232    def parse_request(self):
233        """Parse a request (internal).
234
235        The request should be stored in self.raw_requestline; the results
236        are in self.command, self.path, self.request_version and
237        self.headers.
238
239        Return True for success, False for failure; on failure, an
240        error is sent back.
241
242        """
243        self.command = None  # set in case of error on the first line
244        self.request_version = version = self.default_request_version
245        self.close_connection = 1
246        requestline = self.raw_requestline
247        requestline = requestline.rstrip('\r\n')
248        self.requestline = requestline
249        words = requestline.split()
250        if len(words) == 3:
251            command, path, version = words
252            if version[:5] != 'HTTP/':
253                self.send_error(400, "Bad request version (%r)" % version)
254                return False
255            try:
256                base_version_number = version.split('/', 1)[1]
257                version_number = base_version_number.split(".")
258                # RFC 2145 section 3.1 says there can be only one "." and
259                #   - major and minor numbers MUST be treated as
260                #      separate integers;
261                #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
262                #      turn is lower than HTTP/12.3;
263                #   - Leading zeros MUST be ignored by recipients.
264                if len(version_number) != 2:
265                    raise ValueError
266                version_number = int(version_number[0]), int(version_number[1])
267            except (ValueError, IndexError):
268                self.send_error(400, "Bad request version (%r)" % version)
269                return False
270            if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
271                self.close_connection = 0
272            if version_number >= (2, 0):
273                self.send_error(505,
274                          "Invalid HTTP Version (%s)" % base_version_number)
275                return False
276        elif len(words) == 2:
277            command, path = words
278            self.close_connection = 1
279            if command != 'GET':
280                self.send_error(400,
281                                "Bad HTTP/0.9 request type (%r)" % command)
282                return False
283        elif not words:
284            return False
285        else:
286            self.send_error(400, "Bad request syntax (%r)" % requestline)
287            return False
288        self.command, self.path, self.request_version = command, path, version
289
290        # Examine the headers and look for a Connection directive
291        self.headers = self.MessageClass(self.rfile, 0)
292
293        conntype = self.headers.get('Connection', "")
294        if conntype.lower() == 'close':
295            self.close_connection = 1
296        elif (conntype.lower() == 'keep-alive' and
297              self.protocol_version >= "HTTP/1.1"):
298            self.close_connection = 0
299        return True
300
301    def handle_one_request(self):
302        """Handle a single HTTP request.
303
304        You normally don't need to override this method; see the class
305        __doc__ string for information on how to handle specific HTTP
306        commands such as GET and POST.
307
308        """
309        try:
310            self.raw_requestline = self.rfile.readline(65537)
311            if len(self.raw_requestline) > 65536:
312                self.requestline = ''
313                self.request_version = ''
314                self.command = ''
315                self.send_error(414)
316                return
317            if not self.raw_requestline:
318                self.close_connection = 1
319                return
320            if not self.parse_request():
321                # An error code has been sent, just exit
322                return
323            mname = 'do_' + self.command
324            if not hasattr(self, mname):
325                self.send_error(501, "Unsupported method (%r)" % self.command)
326                return
327            method = getattr(self, mname)
328            method()
329            self.wfile.flush() #actually send the response if not already done.
330        except socket.timeout, e:
331            #a read or a write timed out.  Discard this connection
332            self.log_error("Request timed out: %r", e)
333            self.close_connection = 1
334            return
335
336    def handle(self):
337        """Handle multiple requests if necessary."""
338        self.close_connection = 1
339
340        self.handle_one_request()
341        while not self.close_connection:
342            self.handle_one_request()
343
344    def send_error(self, code, message=None):
345        """Send and log an error reply.
346
347        Arguments are the error code, and a detailed message.
348        The detailed message defaults to the short entry matching the
349        response code.
350
351        This sends an error response (so it must be called before any
352        output has been generated), logs the error, and finally sends
353        a piece of HTML explaining the error to the user.
354
355        """
356
357        try:
358            short, long = self.responses[code]
359        except KeyError:
360            short, long = '???', '???'
361        if message is None:
362            message = short
363        explain = long
364        self.log_error("code %d, message %s", code, message)
365        self.send_response(code, message)
366        self.send_header('Connection', 'close')
367
368        # Message body is omitted for cases described in:
369        #  - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
370        #  - RFC7231: 6.3.6. 205(Reset Content)
371        content = None
372        if code >= 200 and code not in (204, 205, 304):
373            # HTML encode to prevent Cross Site Scripting attacks
374            # (see bug #1100201)
375            content = (self.error_message_format % {
376                'code': code,
377                'message': _quote_html(message),
378                'explain': explain
379            })
380            self.send_header("Content-Type", self.error_content_type)
381        self.end_headers()
382
383        if self.command != 'HEAD' and content:
384            self.wfile.write(content)
385
386    error_message_format = DEFAULT_ERROR_MESSAGE
387    error_content_type = DEFAULT_ERROR_CONTENT_TYPE
388
389    def send_response(self, code, message=None):
390        """Send the response header and log the response code.
391
392        Also send two standard headers with the server software
393        version and the current date.
394
395        """
396        self.log_request(code)
397        if message is None:
398            if code in self.responses:
399                message = self.responses[code][0]
400            else:
401                message = ''
402        if self.request_version != 'HTTP/0.9':
403            self.wfile.write("%s %d %s\r\n" %
404                             (self.protocol_version, code, message))
405            # print (self.protocol_version, code, message)
406        self.send_header('Server', self.version_string())
407        self.send_header('Date', self.date_time_string())
408
409    def send_header(self, keyword, value):
410        """Send a MIME header."""
411        if self.request_version != 'HTTP/0.9':
412            self.wfile.write("%s: %s\r\n" % (keyword, value))
413
414        if keyword.lower() == 'connection':
415            if value.lower() == 'close':
416                self.close_connection = 1
417            elif value.lower() == 'keep-alive':
418                self.close_connection = 0
419
420    def end_headers(self):
421        """Send the blank line ending the MIME headers."""
422        if self.request_version != 'HTTP/0.9':
423            self.wfile.write("\r\n")
424
425    def log_request(self, code='-', size='-'):
426        """Log an accepted request.
427
428        This is called by send_response().
429
430        """
431
432        self.log_message('"%s" %s %s',
433                         self.requestline, str(code), str(size))
434
435    def log_error(self, format, *args):
436        """Log an error.
437
438        This is called when a request cannot be fulfilled.  By
439        default it passes the message on to log_message().
440
441        Arguments are the same as for log_message().
442
443        XXX This should go to the separate error log.
444
445        """
446
447        self.log_message(format, *args)
448
449    def log_message(self, format, *args):
450        """Log an arbitrary message.
451
452        This is used by all other logging functions.  Override
453        it if you have specific logging wishes.
454
455        The first argument, FORMAT, is a format string for the
456        message to be logged.  If the format string contains
457        any % escapes requiring parameters, they should be
458        specified as subsequent arguments (it's just like
459        printf!).
460
461        The client ip address and current date/time are prefixed to every
462        message.
463
464        """
465
466        sys.stderr.write("%s - - [%s] %s\n" %
467                         (self.client_address[0],
468                          self.log_date_time_string(),
469                          format%args))
470
471    def version_string(self):
472        """Return the server software version string."""
473        return self.server_version + ' ' + self.sys_version
474
475    def date_time_string(self, timestamp=None):
476        """Return the current date and time formatted for a message header."""
477        if timestamp is None:
478            timestamp = time.time()
479        year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
480        s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
481                self.weekdayname[wd],
482                day, self.monthname[month], year,
483                hh, mm, ss)
484        return s
485
486    def log_date_time_string(self):
487        """Return the current time formatted for logging."""
488        now = time.time()
489        year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
490        s = "%02d/%3s/%04d %02d:%02d:%02d" % (
491                day, self.monthname[month], year, hh, mm, ss)
492        return s
493
494    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
495
496    monthname = [None,
497                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
498                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
499
500    def address_string(self):
501        """Return the client address formatted for logging.
502
503        This version looks up the full hostname using gethostbyaddr(),
504        and tries to find a name that contains at least one dot.
505
506        """
507
508        host, port = self.client_address[:2]
509        return socket.getfqdn(host)
510
511    # Essentially static class variables
512
513    # The version of the HTTP protocol we support.
514    # Set this to HTTP/1.1 to enable automatic keepalive
515    protocol_version = "HTTP/1.0"
516
517    # The Message-like class used to parse headers
518    MessageClass = mimetools.Message
519
520    # Table mapping response codes to messages; entries have the
521    # form {code: (shortmessage, longmessage)}.
522    # See RFC 2616.
523    responses = {
524        100: ('Continue', 'Request received, please continue'),
525        101: ('Switching Protocols',
526              'Switching to new protocol; obey Upgrade header'),
527
528        200: ('OK', 'Request fulfilled, document follows'),
529        201: ('Created', 'Document created, URL follows'),
530        202: ('Accepted',
531              'Request accepted, processing continues off-line'),
532        203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
533        204: ('No Content', 'Request fulfilled, nothing follows'),
534        205: ('Reset Content', 'Clear input form for further input.'),
535        206: ('Partial Content', 'Partial content follows.'),
536
537        300: ('Multiple Choices',
538              'Object has several resources -- see URI list'),
539        301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
540        302: ('Found', 'Object moved temporarily -- see URI list'),
541        303: ('See Other', 'Object moved -- see Method and URL list'),
542        304: ('Not Modified',
543              'Document has not changed since given time'),
544        305: ('Use Proxy',
545              'You must use proxy specified in Location to access this '
546              'resource.'),
547        307: ('Temporary Redirect',
548              'Object moved temporarily -- see URI list'),
549
550        400: ('Bad Request',
551              'Bad request syntax or unsupported method'),
552        401: ('Unauthorized',
553              'No permission -- see authorization schemes'),
554        402: ('Payment Required',
555              'No payment -- see charging schemes'),
556        403: ('Forbidden',
557              'Request forbidden -- authorization will not help'),
558        404: ('Not Found', 'Nothing matches the given URI'),
559        405: ('Method Not Allowed',
560              'Specified method is invalid for this resource.'),
561        406: ('Not Acceptable', 'URI not available in preferred format.'),
562        407: ('Proxy Authentication Required', 'You must authenticate with '
563              'this proxy before proceeding.'),
564        408: ('Request Timeout', 'Request timed out; try again later.'),
565        409: ('Conflict', 'Request conflict.'),
566        410: ('Gone',
567              'URI no longer exists and has been permanently removed.'),
568        411: ('Length Required', 'Client must specify Content-Length.'),
569        412: ('Precondition Failed', 'Precondition in headers is false.'),
570        413: ('Request Entity Too Large', 'Entity is too large.'),
571        414: ('Request-URI Too Long', 'URI is too long.'),
572        415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
573        416: ('Requested Range Not Satisfiable',
574              'Cannot satisfy request range.'),
575        417: ('Expectation Failed',
576              'Expect condition could not be satisfied.'),
577
578        500: ('Internal Server Error', 'Server got itself in trouble'),
579        501: ('Not Implemented',
580              'Server does not support this operation'),
581        502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
582        503: ('Service Unavailable',
583              'The server cannot process the request due to a high load'),
584        504: ('Gateway Timeout',
585              'The gateway server did not receive a timely response'),
586        505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
587        }
588
589
590def test(HandlerClass = BaseHTTPRequestHandler,
591         ServerClass = HTTPServer, protocol="HTTP/1.0"):
592    """Test the HTTP request handler class.
593
594    This runs an HTTP server on port 8000 (or the first command line
595    argument).
596
597    """
598
599    if sys.argv[1:]:
600        port = int(sys.argv[1])
601    else:
602        port = 8000
603    server_address = ('', port)
604
605    HandlerClass.protocol_version = protocol
606    httpd = ServerClass(server_address, HandlerClass)
607
608    sa = httpd.socket.getsockname()
609    print "Serving HTTP on", sa[0], "port", sa[1], "..."
610    httpd.serve_forever()
611
612
613if __name__ == '__main__':
614    test()
615