• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""HTTP server classes.
2
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and CGIHTTPRequestHandler for CGI scripts.
6
7It does, however, optionally implement HTTP/1.1 persistent connections,
8as of version 0.3.
9
10Notes on CGIHTTPRequestHandler
11------------------------------
12
13This class implements GET and POST requests to cgi-bin scripts.
14
15If the os.fork() function is not present (e.g. on Windows),
16subprocess.Popen() is used as a fallback, with slightly altered semantics.
17
18In all cases, the implementation is intentionally naive -- all
19requests are executed synchronously.
20
21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22-- it may execute arbitrary Python code or external programs.
23
24Note that status code 200 is sent prior to execution of a CGI script, so
25scripts cannot send other status codes such as 302 (redirect).
26
27XXX To do:
28
29- log requests even later (to capture byte count)
30- log user-agent header and other interesting goodies
31- send error log to separate file
32"""
33
34
35# See also:
36#
37# HTTP Working Group                                        T. Berners-Lee
38# INTERNET-DRAFT                                            R. T. Fielding
39# <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
40# Expires September 8, 1995                                  March 8, 1995
41#
42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43#
44# and
45#
46# Network Working Group                                      R. Fielding
47# Request for Comments: 2616                                       et al
48# Obsoletes: 2068                                              June 1999
49# Category: Standards Track
50#
51# URL: http://www.faqs.org/rfcs/rfc2616.html
52
53# Log files
54# ---------
55#
56# Here's a quote from the NCSA httpd docs about log file format.
57#
58# | The logfile format is as follows. Each line consists of:
59# |
60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61# |
62# |        host: Either the DNS name or the IP number of the remote client
63# |        rfc931: Any information returned by identd for this person,
64# |                - otherwise.
65# |        authuser: If user sent a userid for authentication, the user name,
66# |                  - otherwise.
67# |        DD: Day
68# |        Mon: Month (calendar name)
69# |        YYYY: Year
70# |        hh: hour (24-hour format, the machine's timezone)
71# |        mm: minutes
72# |        ss: seconds
73# |        request: The first line of the HTTP request as sent by the client.
74# |        ddd: the status code returned by the server, - if not available.
75# |        bbbb: the total number of bytes sent,
76# |              *not including the HTTP/1.0 header*, - if not available
77# |
78# | You can determine the name of the file accessed through request.
79#
80# (Actually, the latter is only true if you know the server configuration
81# at the time the request was made!)
82
83__version__ = "0.6"
84
85__all__ = [
86    "HTTPServer", "BaseHTTPRequestHandler",
87    "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
88]
89
90import email.utils
91import html
92import http.client
93import io
94import mimetypes
95import os
96import posixpath
97import select
98import shutil
99import socket # For gethostbyaddr()
100import socketserver
101import sys
102import time
103import urllib.parse
104import copy
105import argparse
106
107from http import HTTPStatus
108
109
110# Default error message template
111DEFAULT_ERROR_MESSAGE = """\
112<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
113        "http://www.w3.org/TR/html4/strict.dtd">
114<html>
115    <head>
116        <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
117        <title>Error response</title>
118    </head>
119    <body>
120        <h1>Error response</h1>
121        <p>Error code: %(code)d</p>
122        <p>Message: %(message)s.</p>
123        <p>Error code explanation: %(code)s - %(explain)s.</p>
124    </body>
125</html>
126"""
127
128DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
129
130class HTTPServer(socketserver.TCPServer):
131
132    allow_reuse_address = 1    # Seems to make sense in testing environment
133
134    def server_bind(self):
135        """Override server_bind to store the server name."""
136        socketserver.TCPServer.server_bind(self)
137        host, port = self.server_address[:2]
138        self.server_name = socket.getfqdn(host)
139        self.server_port = port
140
141
142class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
143
144    """HTTP request handler base class.
145
146    The following explanation of HTTP serves to guide you through the
147    code as well as to expose any misunderstandings I may have about
148    HTTP (so you don't need to read the code to figure out I'm wrong
149    :-).
150
151    HTTP (HyperText Transfer Protocol) is an extensible protocol on
152    top of a reliable stream transport (e.g. TCP/IP).  The protocol
153    recognizes three parts to a request:
154
155    1. One line identifying the request type and path
156    2. An optional set of RFC-822-style headers
157    3. An optional data part
158
159    The headers and data are separated by a blank line.
160
161    The first line of the request has the form
162
163    <command> <path> <version>
164
165    where <command> is a (case-sensitive) keyword such as GET or POST,
166    <path> is a string containing path information for the request,
167    and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
168    <path> is encoded using the URL encoding scheme (using %xx to signify
169    the ASCII character with hex code xx).
170
171    The specification specifies that lines are separated by CRLF but
172    for compatibility with the widest range of clients recommends
173    servers also handle LF.  Similarly, whitespace in the request line
174    is treated sensibly (allowing multiple spaces between components
175    and allowing trailing whitespace).
176
177    Similarly, for output, lines ought to be separated by CRLF pairs
178    but most clients grok LF characters just fine.
179
180    If the first line of the request has the form
181
182    <command> <path>
183
184    (i.e. <version> is left out) then this is assumed to be an HTTP
185    0.9 request; this form has no optional headers and data part and
186    the reply consists of just the data.
187
188    The reply form of the HTTP 1.x protocol again has three parts:
189
190    1. One line giving the response code
191    2. An optional set of RFC-822-style headers
192    3. The data
193
194    Again, the headers and data are separated by a blank line.
195
196    The response code line has the form
197
198    <version> <responsecode> <responsestring>
199
200    where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
201    <responsecode> is a 3-digit response code indicating success or
202    failure of the request, and <responsestring> is an optional
203    human-readable string explaining what the response code means.
204
205    This server parses the request and the headers, and then calls a
206    function specific to the request type (<command>).  Specifically,
207    a request SPAM will be handled by a method do_SPAM().  If no
208    such method exists the server sends an error response to the
209    client.  If it exists, it is called with no arguments:
210
211    do_SPAM()
212
213    Note that the request name is case sensitive (i.e. SPAM and spam
214    are different requests).
215
216    The various request details are stored in instance variables:
217
218    - client_address is the client IP address in the form (host,
219    port);
220
221    - command, path and version are the broken-down request line;
222
223    - headers is an instance of email.message.Message (or a derived
224    class) containing the header information;
225
226    - rfile is a file object open for reading positioned at the
227    start of the optional input data part;
228
229    - wfile is a file object open for writing.
230
231    IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
232
233    The first thing to be written must be the response line.  Then
234    follow 0 or more header lines, then a blank line, and then the
235    actual data (if any).  The meaning of the header lines depends on
236    the command executed by the server; in most cases, when data is
237    returned, there should be at least one header line of the form
238
239    Content-type: <type>/<subtype>
240
241    where <type> and <subtype> should be registered MIME types,
242    e.g. "text/html" or "text/plain".
243
244    """
245
246    # The Python system version, truncated to its first component.
247    sys_version = "Python/" + sys.version.split()[0]
248
249    # The server software version.  You may want to override this.
250    # The format is multiple whitespace-separated strings,
251    # where each string is of the form name[/version].
252    server_version = "BaseHTTP/" + __version__
253
254    error_message_format = DEFAULT_ERROR_MESSAGE
255    error_content_type = DEFAULT_ERROR_CONTENT_TYPE
256
257    # The default request version.  This only affects responses up until
258    # the point where the request line is parsed, so it mainly decides what
259    # the client gets back when sending a malformed request line.
260    # Most web servers default to HTTP 0.9, i.e. don't send a status line.
261    default_request_version = "HTTP/0.9"
262
263    def parse_request(self):
264        """Parse a request (internal).
265
266        The request should be stored in self.raw_requestline; the results
267        are in self.command, self.path, self.request_version and
268        self.headers.
269
270        Return True for success, False for failure; on failure, an
271        error is sent back.
272
273        """
274        self.command = None  # set in case of error on the first line
275        self.request_version = version = self.default_request_version
276        self.close_connection = True
277        requestline = str(self.raw_requestline, 'iso-8859-1')
278        requestline = requestline.rstrip('\r\n')
279        self.requestline = requestline
280        words = requestline.split()
281        if len(words) == 3:
282            command, path, version = words
283            try:
284                if version[:5] != 'HTTP/':
285                    raise ValueError
286                base_version_number = version.split('/', 1)[1]
287                version_number = base_version_number.split(".")
288                # RFC 2145 section 3.1 says there can be only one "." and
289                #   - major and minor numbers MUST be treated as
290                #      separate integers;
291                #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
292                #      turn is lower than HTTP/12.3;
293                #   - Leading zeros MUST be ignored by recipients.
294                if len(version_number) != 2:
295                    raise ValueError
296                version_number = int(version_number[0]), int(version_number[1])
297            except (ValueError, IndexError):
298                self.send_error(
299                    HTTPStatus.BAD_REQUEST,
300                    "Bad request version (%r)" % version)
301                return False
302            if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
303                self.close_connection = False
304            if version_number >= (2, 0):
305                self.send_error(
306                    HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
307                    "Invalid HTTP version (%s)" % base_version_number)
308                return False
309        elif len(words) == 2:
310            command, path = words
311            self.close_connection = True
312            if command != 'GET':
313                self.send_error(
314                    HTTPStatus.BAD_REQUEST,
315                    "Bad HTTP/0.9 request type (%r)" % command)
316                return False
317        elif not words:
318            return False
319        else:
320            self.send_error(
321                HTTPStatus.BAD_REQUEST,
322                "Bad request syntax (%r)" % requestline)
323            return False
324        self.command, self.path, self.request_version = command, path, version
325
326        # Examine the headers and look for a Connection directive.
327        try:
328            self.headers = http.client.parse_headers(self.rfile,
329                                                     _class=self.MessageClass)
330        except http.client.LineTooLong as err:
331            self.send_error(
332                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
333                "Line too long",
334                str(err))
335            return False
336        except http.client.HTTPException as err:
337            self.send_error(
338                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
339                "Too many headers",
340                str(err)
341            )
342            return False
343
344        conntype = self.headers.get('Connection', "")
345        if conntype.lower() == 'close':
346            self.close_connection = True
347        elif (conntype.lower() == 'keep-alive' and
348              self.protocol_version >= "HTTP/1.1"):
349            self.close_connection = False
350        # Examine the headers and look for an Expect directive
351        expect = self.headers.get('Expect', "")
352        if (expect.lower() == "100-continue" and
353                self.protocol_version >= "HTTP/1.1" and
354                self.request_version >= "HTTP/1.1"):
355            if not self.handle_expect_100():
356                return False
357        return True
358
359    def handle_expect_100(self):
360        """Decide what to do with an "Expect: 100-continue" header.
361
362        If the client is expecting a 100 Continue response, we must
363        respond with either a 100 Continue or a final response before
364        waiting for the request body. The default is to always respond
365        with a 100 Continue. You can behave differently (for example,
366        reject unauthorized requests) by overriding this method.
367
368        This method should either return True (possibly after sending
369        a 100 Continue response) or send an error response and return
370        False.
371
372        """
373        self.send_response_only(HTTPStatus.CONTINUE)
374        self.end_headers()
375        return True
376
377    def handle_one_request(self):
378        """Handle a single HTTP request.
379
380        You normally don't need to override this method; see the class
381        __doc__ string for information on how to handle specific HTTP
382        commands such as GET and POST.
383
384        """
385        try:
386            self.raw_requestline = self.rfile.readline(65537)
387            if len(self.raw_requestline) > 65536:
388                self.requestline = ''
389                self.request_version = ''
390                self.command = ''
391                self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
392                return
393            if not self.raw_requestline:
394                self.close_connection = True
395                return
396            if not self.parse_request():
397                # An error code has been sent, just exit
398                return
399            mname = 'do_' + self.command
400            if not hasattr(self, mname):
401                self.send_error(
402                    HTTPStatus.NOT_IMPLEMENTED,
403                    "Unsupported method (%r)" % self.command)
404                return
405            method = getattr(self, mname)
406            method()
407            self.wfile.flush() #actually send the response if not already done.
408        except socket.timeout as e:
409            #a read or a write timed out.  Discard this connection
410            self.log_error("Request timed out: %r", e)
411            self.close_connection = True
412            return
413
414    def handle(self):
415        """Handle multiple requests if necessary."""
416        self.close_connection = True
417
418        self.handle_one_request()
419        while not self.close_connection:
420            self.handle_one_request()
421
422    def send_error(self, code, message=None, explain=None):
423        """Send and log an error reply.
424
425        Arguments are
426        * code:    an HTTP error code
427                   3 digits
428        * message: a simple optional 1 line reason phrase.
429                   *( HTAB / SP / VCHAR / %x80-FF )
430                   defaults to short entry matching the response code
431        * explain: a detailed message defaults to the long entry
432                   matching the response code.
433
434        This sends an error response (so it must be called before any
435        output has been generated), logs the error, and finally sends
436        a piece of HTML explaining the error to the user.
437
438        """
439
440        try:
441            shortmsg, longmsg = self.responses[code]
442        except KeyError:
443            shortmsg, longmsg = '???', '???'
444        if message is None:
445            message = shortmsg
446        if explain is None:
447            explain = longmsg
448        self.log_error("code %d, message %s", code, message)
449        self.send_response(code, message)
450        self.send_header('Connection', 'close')
451
452        # Message body is omitted for cases described in:
453        #  - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
454        #  - RFC7231: 6.3.6. 205(Reset Content)
455        body = None
456        if (code >= 200 and
457            code not in (HTTPStatus.NO_CONTENT,
458                         HTTPStatus.RESET_CONTENT,
459                         HTTPStatus.NOT_MODIFIED)):
460            # HTML encode to prevent Cross Site Scripting attacks
461            # (see bug #1100201)
462            content = (self.error_message_format % {
463                'code': code,
464                'message': html.escape(message, quote=False),
465                'explain': html.escape(explain, quote=False)
466            })
467            body = content.encode('UTF-8', 'replace')
468            self.send_header("Content-Type", self.error_content_type)
469            self.send_header('Content-Length', int(len(body)))
470        self.end_headers()
471
472        if self.command != 'HEAD' and body:
473            self.wfile.write(body)
474
475    def send_response(self, code, message=None):
476        """Add the response header to the headers buffer and log the
477        response code.
478
479        Also send two standard headers with the server software
480        version and the current date.
481
482        """
483        self.log_request(code)
484        self.send_response_only(code, message)
485        self.send_header('Server', self.version_string())
486        self.send_header('Date', self.date_time_string())
487
488    def send_response_only(self, code, message=None):
489        """Send the response header only."""
490        if self.request_version != 'HTTP/0.9':
491            if message is None:
492                if code in self.responses:
493                    message = self.responses[code][0]
494                else:
495                    message = ''
496            if not hasattr(self, '_headers_buffer'):
497                self._headers_buffer = []
498            self._headers_buffer.append(("%s %d %s\r\n" %
499                    (self.protocol_version, code, message)).encode(
500                        'latin-1', 'strict'))
501
502    def send_header(self, keyword, value):
503        """Send a MIME header to the headers buffer."""
504        if self.request_version != 'HTTP/0.9':
505            if not hasattr(self, '_headers_buffer'):
506                self._headers_buffer = []
507            self._headers_buffer.append(
508                ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
509
510        if keyword.lower() == 'connection':
511            if value.lower() == 'close':
512                self.close_connection = True
513            elif value.lower() == 'keep-alive':
514                self.close_connection = False
515
516    def end_headers(self):
517        """Send the blank line ending the MIME headers."""
518        if self.request_version != 'HTTP/0.9':
519            self._headers_buffer.append(b"\r\n")
520            self.flush_headers()
521
522    def flush_headers(self):
523        if hasattr(self, '_headers_buffer'):
524            self.wfile.write(b"".join(self._headers_buffer))
525            self._headers_buffer = []
526
527    def log_request(self, code='-', size='-'):
528        """Log an accepted request.
529
530        This is called by send_response().
531
532        """
533        if isinstance(code, HTTPStatus):
534            code = code.value
535        self.log_message('"%s" %s %s',
536                         self.requestline, str(code), str(size))
537
538    def log_error(self, format, *args):
539        """Log an error.
540
541        This is called when a request cannot be fulfilled.  By
542        default it passes the message on to log_message().
543
544        Arguments are the same as for log_message().
545
546        XXX This should go to the separate error log.
547
548        """
549
550        self.log_message(format, *args)
551
552    def log_message(self, format, *args):
553        """Log an arbitrary message.
554
555        This is used by all other logging functions.  Override
556        it if you have specific logging wishes.
557
558        The first argument, FORMAT, is a format string for the
559        message to be logged.  If the format string contains
560        any % escapes requiring parameters, they should be
561        specified as subsequent arguments (it's just like
562        printf!).
563
564        The client ip and current date/time are prefixed to
565        every message.
566
567        """
568
569        sys.stderr.write("%s - - [%s] %s\n" %
570                         (self.address_string(),
571                          self.log_date_time_string(),
572                          format%args))
573
574    def version_string(self):
575        """Return the server software version string."""
576        return self.server_version + ' ' + self.sys_version
577
578    def date_time_string(self, timestamp=None):
579        """Return the current date and time formatted for a message header."""
580        if timestamp is None:
581            timestamp = time.time()
582        return email.utils.formatdate(timestamp, usegmt=True)
583
584    def log_date_time_string(self):
585        """Return the current time formatted for logging."""
586        now = time.time()
587        year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
588        s = "%02d/%3s/%04d %02d:%02d:%02d" % (
589                day, self.monthname[month], year, hh, mm, ss)
590        return s
591
592    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
593
594    monthname = [None,
595                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
596                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
597
598    def address_string(self):
599        """Return the client address."""
600
601        return self.client_address[0]
602
603    # Essentially static class variables
604
605    # The version of the HTTP protocol we support.
606    # Set this to HTTP/1.1 to enable automatic keepalive
607    protocol_version = "HTTP/1.0"
608
609    # MessageClass used to parse headers
610    MessageClass = http.client.HTTPMessage
611
612    # hack to maintain backwards compatibility
613    responses = {
614        v: (v.phrase, v.description)
615        for v in HTTPStatus.__members__.values()
616    }
617
618
619class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
620
621    """Simple HTTP request handler with GET and HEAD commands.
622
623    This serves files from the current directory and any of its
624    subdirectories.  The MIME type for files is determined by
625    calling the .guess_type() method.
626
627    The GET and HEAD requests are identical except that the HEAD
628    request omits the actual contents of the file.
629
630    """
631
632    server_version = "SimpleHTTP/" + __version__
633
634    def do_GET(self):
635        """Serve a GET request."""
636        f = self.send_head()
637        if f:
638            try:
639                self.copyfile(f, self.wfile)
640            finally:
641                f.close()
642
643    def do_HEAD(self):
644        """Serve a HEAD request."""
645        f = self.send_head()
646        if f:
647            f.close()
648
649    def send_head(self):
650        """Common code for GET and HEAD commands.
651
652        This sends the response code and MIME headers.
653
654        Return value is either a file object (which has to be copied
655        to the outputfile by the caller unless the command was HEAD,
656        and must be closed by the caller under all circumstances), or
657        None, in which case the caller has nothing further to do.
658
659        """
660        path = self.translate_path(self.path)
661        f = None
662        if os.path.isdir(path):
663            parts = urllib.parse.urlsplit(self.path)
664            if not parts.path.endswith('/'):
665                # redirect browser - doing basically what apache does
666                self.send_response(HTTPStatus.MOVED_PERMANENTLY)
667                new_parts = (parts[0], parts[1], parts[2] + '/',
668                             parts[3], parts[4])
669                new_url = urllib.parse.urlunsplit(new_parts)
670                self.send_header("Location", new_url)
671                self.end_headers()
672                return None
673            for index in "index.html", "index.htm":
674                index = os.path.join(path, index)
675                if os.path.exists(index):
676                    path = index
677                    break
678            else:
679                return self.list_directory(path)
680        ctype = self.guess_type(path)
681        try:
682            f = open(path, 'rb')
683        except OSError:
684            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
685            return None
686        try:
687            self.send_response(HTTPStatus.OK)
688            self.send_header("Content-type", ctype)
689            fs = os.fstat(f.fileno())
690            self.send_header("Content-Length", str(fs[6]))
691            self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
692            self.end_headers()
693            return f
694        except:
695            f.close()
696            raise
697
698    def list_directory(self, path):
699        """Helper to produce a directory listing (absent index.html).
700
701        Return value is either a file object, or None (indicating an
702        error).  In either case, the headers are sent, making the
703        interface the same as for send_head().
704
705        """
706        try:
707            list = os.listdir(path)
708        except OSError:
709            self.send_error(
710                HTTPStatus.NOT_FOUND,
711                "No permission to list directory")
712            return None
713        list.sort(key=lambda a: a.lower())
714        r = []
715        try:
716            displaypath = urllib.parse.unquote(self.path,
717                                               errors='surrogatepass')
718        except UnicodeDecodeError:
719            displaypath = urllib.parse.unquote(path)
720        displaypath = html.escape(displaypath, quote=False)
721        enc = sys.getfilesystemencoding()
722        title = 'Directory listing for %s' % displaypath
723        r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
724                 '"http://www.w3.org/TR/html4/strict.dtd">')
725        r.append('<html>\n<head>')
726        r.append('<meta http-equiv="Content-Type" '
727                 'content="text/html; charset=%s">' % enc)
728        r.append('<title>%s</title>\n</head>' % title)
729        r.append('<body>\n<h1>%s</h1>' % title)
730        r.append('<hr>\n<ul>')
731        for name in list:
732            fullname = os.path.join(path, name)
733            displayname = linkname = name
734            # Append / for directories or @ for symbolic links
735            if os.path.isdir(fullname):
736                displayname = name + "/"
737                linkname = name + "/"
738            if os.path.islink(fullname):
739                displayname = name + "@"
740                # Note: a link to a directory displays with @ and links with /
741            r.append('<li><a href="%s">%s</a></li>'
742                    % (urllib.parse.quote(linkname,
743                                          errors='surrogatepass'),
744                       html.escape(displayname, quote=False)))
745        r.append('</ul>\n<hr>\n</body>\n</html>\n')
746        encoded = '\n'.join(r).encode(enc, 'surrogateescape')
747        f = io.BytesIO()
748        f.write(encoded)
749        f.seek(0)
750        self.send_response(HTTPStatus.OK)
751        self.send_header("Content-type", "text/html; charset=%s" % enc)
752        self.send_header("Content-Length", str(len(encoded)))
753        self.end_headers()
754        return f
755
756    def translate_path(self, path):
757        """Translate a /-separated PATH to the local filename syntax.
758
759        Components that mean special things to the local file system
760        (e.g. drive or directory names) are ignored.  (XXX They should
761        probably be diagnosed.)
762
763        """
764        # abandon query parameters
765        path = path.split('?',1)[0]
766        path = path.split('#',1)[0]
767        # Don't forget explicit trailing slash when normalizing. Issue17324
768        trailing_slash = path.rstrip().endswith('/')
769        try:
770            path = urllib.parse.unquote(path, errors='surrogatepass')
771        except UnicodeDecodeError:
772            path = urllib.parse.unquote(path)
773        path = posixpath.normpath(path)
774        words = path.split('/')
775        words = filter(None, words)
776        path = os.getcwd()
777        for word in words:
778            if os.path.dirname(word) or word in (os.curdir, os.pardir):
779                # Ignore components that are not a simple file/directory name
780                continue
781            path = os.path.join(path, word)
782        if trailing_slash:
783            path += '/'
784        return path
785
786    def copyfile(self, source, outputfile):
787        """Copy all data between two file objects.
788
789        The SOURCE argument is a file object open for reading
790        (or anything with a read() method) and the DESTINATION
791        argument is a file object open for writing (or
792        anything with a write() method).
793
794        The only reason for overriding this would be to change
795        the block size or perhaps to replace newlines by CRLF
796        -- note however that this the default server uses this
797        to copy binary data as well.
798
799        """
800        shutil.copyfileobj(source, outputfile)
801
802    def guess_type(self, path):
803        """Guess the type of a file.
804
805        Argument is a PATH (a filename).
806
807        Return value is a string of the form type/subtype,
808        usable for a MIME Content-type header.
809
810        The default implementation looks the file's extension
811        up in the table self.extensions_map, using application/octet-stream
812        as a default; however it would be permissible (if
813        slow) to look inside the data to make a better guess.
814
815        """
816
817        base, ext = posixpath.splitext(path)
818        if ext in self.extensions_map:
819            return self.extensions_map[ext]
820        ext = ext.lower()
821        if ext in self.extensions_map:
822            return self.extensions_map[ext]
823        else:
824            return self.extensions_map['']
825
826    if not mimetypes.inited:
827        mimetypes.init() # try to read system mime.types
828    extensions_map = mimetypes.types_map.copy()
829    extensions_map.update({
830        '': 'application/octet-stream', # Default
831        '.py': 'text/plain',
832        '.c': 'text/plain',
833        '.h': 'text/plain',
834        })
835
836
837# Utilities for CGIHTTPRequestHandler
838
839def _url_collapse_path(path):
840    """
841    Given a URL path, remove extra '/'s and '.' path elements and collapse
842    any '..' references and returns a collapsed path.
843
844    Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
845    The utility of this function is limited to is_cgi method and helps
846    preventing some security attacks.
847
848    Returns: The reconstituted URL, which will always start with a '/'.
849
850    Raises: IndexError if too many '..' occur within the path.
851
852    """
853    # Query component should not be involved.
854    path, _, query = path.partition('?')
855    path = urllib.parse.unquote(path)
856
857    # Similar to os.path.split(os.path.normpath(path)) but specific to URL
858    # path semantics rather than local operating system semantics.
859    path_parts = path.split('/')
860    head_parts = []
861    for part in path_parts[:-1]:
862        if part == '..':
863            head_parts.pop() # IndexError if more '..' than prior parts
864        elif part and part != '.':
865            head_parts.append( part )
866    if path_parts:
867        tail_part = path_parts.pop()
868        if tail_part:
869            if tail_part == '..':
870                head_parts.pop()
871                tail_part = ''
872            elif tail_part == '.':
873                tail_part = ''
874    else:
875        tail_part = ''
876
877    if query:
878        tail_part = '?'.join((tail_part, query))
879
880    splitpath = ('/' + '/'.join(head_parts), tail_part)
881    collapsed_path = "/".join(splitpath)
882
883    return collapsed_path
884
885
886
887nobody = None
888
889def nobody_uid():
890    """Internal routine to get nobody's uid"""
891    global nobody
892    if nobody:
893        return nobody
894    try:
895        import pwd
896    except ImportError:
897        return -1
898    try:
899        nobody = pwd.getpwnam('nobody')[2]
900    except KeyError:
901        nobody = 1 + max(x[2] for x in pwd.getpwall())
902    return nobody
903
904
905def executable(path):
906    """Test for executable file."""
907    return os.access(path, os.X_OK)
908
909
910class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
911
912    """Complete HTTP server with GET, HEAD and POST commands.
913
914    GET and HEAD also support running CGI scripts.
915
916    The POST command is *only* implemented for CGI scripts.
917
918    """
919
920    # Determine platform specifics
921    have_fork = hasattr(os, 'fork')
922
923    # Make rfile unbuffered -- we need to read one line and then pass
924    # the rest to a subprocess, so we can't use buffered input.
925    rbufsize = 0
926
927    def do_POST(self):
928        """Serve a POST request.
929
930        This is only implemented for CGI scripts.
931
932        """
933
934        if self.is_cgi():
935            self.run_cgi()
936        else:
937            self.send_error(
938                HTTPStatus.NOT_IMPLEMENTED,
939                "Can only POST to CGI scripts")
940
941    def send_head(self):
942        """Version of send_head that support CGI scripts"""
943        if self.is_cgi():
944            return self.run_cgi()
945        else:
946            return SimpleHTTPRequestHandler.send_head(self)
947
948    def is_cgi(self):
949        """Test whether self.path corresponds to a CGI script.
950
951        Returns True and updates the cgi_info attribute to the tuple
952        (dir, rest) if self.path requires running a CGI script.
953        Returns False otherwise.
954
955        If any exception is raised, the caller should assume that
956        self.path was rejected as invalid and act accordingly.
957
958        The default implementation tests whether the normalized url
959        path begins with one of the strings in self.cgi_directories
960        (and the next character is a '/' or the end of the string).
961
962        """
963        collapsed_path = _url_collapse_path(self.path)
964        dir_sep = collapsed_path.find('/', 1)
965        head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
966        if head in self.cgi_directories:
967            self.cgi_info = head, tail
968            return True
969        return False
970
971
972    cgi_directories = ['/cgi-bin', '/htbin']
973
974    def is_executable(self, path):
975        """Test whether argument path is an executable file."""
976        return executable(path)
977
978    def is_python(self, path):
979        """Test whether argument path is a Python script."""
980        head, tail = os.path.splitext(path)
981        return tail.lower() in (".py", ".pyw")
982
983    def run_cgi(self):
984        """Execute a CGI script."""
985        dir, rest = self.cgi_info
986        path = dir + '/' + rest
987        i = path.find('/', len(dir)+1)
988        while i >= 0:
989            nextdir = path[:i]
990            nextrest = path[i+1:]
991
992            scriptdir = self.translate_path(nextdir)
993            if os.path.isdir(scriptdir):
994                dir, rest = nextdir, nextrest
995                i = path.find('/', len(dir)+1)
996            else:
997                break
998
999        # find an explicit query string, if present.
1000        rest, _, query = rest.partition('?')
1001
1002        # dissect the part after the directory name into a script name &
1003        # a possible additional path, to be stored in PATH_INFO.
1004        i = rest.find('/')
1005        if i >= 0:
1006            script, rest = rest[:i], rest[i:]
1007        else:
1008            script, rest = rest, ''
1009
1010        scriptname = dir + '/' + script
1011        scriptfile = self.translate_path(scriptname)
1012        if not os.path.exists(scriptfile):
1013            self.send_error(
1014                HTTPStatus.NOT_FOUND,
1015                "No such CGI script (%r)" % scriptname)
1016            return
1017        if not os.path.isfile(scriptfile):
1018            self.send_error(
1019                HTTPStatus.FORBIDDEN,
1020                "CGI script is not a plain file (%r)" % scriptname)
1021            return
1022        ispy = self.is_python(scriptname)
1023        if self.have_fork or not ispy:
1024            if not self.is_executable(scriptfile):
1025                self.send_error(
1026                    HTTPStatus.FORBIDDEN,
1027                    "CGI script is not executable (%r)" % scriptname)
1028                return
1029
1030        # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1031        # XXX Much of the following could be prepared ahead of time!
1032        env = copy.deepcopy(os.environ)
1033        env['SERVER_SOFTWARE'] = self.version_string()
1034        env['SERVER_NAME'] = self.server.server_name
1035        env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1036        env['SERVER_PROTOCOL'] = self.protocol_version
1037        env['SERVER_PORT'] = str(self.server.server_port)
1038        env['REQUEST_METHOD'] = self.command
1039        uqrest = urllib.parse.unquote(rest)
1040        env['PATH_INFO'] = uqrest
1041        env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1042        env['SCRIPT_NAME'] = scriptname
1043        if query:
1044            env['QUERY_STRING'] = query
1045        env['REMOTE_ADDR'] = self.client_address[0]
1046        authorization = self.headers.get("authorization")
1047        if authorization:
1048            authorization = authorization.split()
1049            if len(authorization) == 2:
1050                import base64, binascii
1051                env['AUTH_TYPE'] = authorization[0]
1052                if authorization[0].lower() == "basic":
1053                    try:
1054                        authorization = authorization[1].encode('ascii')
1055                        authorization = base64.decodebytes(authorization).\
1056                                        decode('ascii')
1057                    except (binascii.Error, UnicodeError):
1058                        pass
1059                    else:
1060                        authorization = authorization.split(':')
1061                        if len(authorization) == 2:
1062                            env['REMOTE_USER'] = authorization[0]
1063        # XXX REMOTE_IDENT
1064        if self.headers.get('content-type') is None:
1065            env['CONTENT_TYPE'] = self.headers.get_content_type()
1066        else:
1067            env['CONTENT_TYPE'] = self.headers['content-type']
1068        length = self.headers.get('content-length')
1069        if length:
1070            env['CONTENT_LENGTH'] = length
1071        referer = self.headers.get('referer')
1072        if referer:
1073            env['HTTP_REFERER'] = referer
1074        accept = []
1075        for line in self.headers.getallmatchingheaders('accept'):
1076            if line[:1] in "\t\n\r ":
1077                accept.append(line.strip())
1078            else:
1079                accept = accept + line[7:].split(',')
1080        env['HTTP_ACCEPT'] = ','.join(accept)
1081        ua = self.headers.get('user-agent')
1082        if ua:
1083            env['HTTP_USER_AGENT'] = ua
1084        co = filter(None, self.headers.get_all('cookie', []))
1085        cookie_str = ', '.join(co)
1086        if cookie_str:
1087            env['HTTP_COOKIE'] = cookie_str
1088        # XXX Other HTTP_* headers
1089        # Since we're setting the env in the parent, provide empty
1090        # values to override previously set values
1091        for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1092                  'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1093            env.setdefault(k, "")
1094
1095        self.send_response(HTTPStatus.OK, "Script output follows")
1096        self.flush_headers()
1097
1098        decoded_query = query.replace('+', ' ')
1099
1100        if self.have_fork:
1101            # Unix -- fork as we should
1102            args = [script]
1103            if '=' not in decoded_query:
1104                args.append(decoded_query)
1105            nobody = nobody_uid()
1106            self.wfile.flush() # Always flush before forking
1107            pid = os.fork()
1108            if pid != 0:
1109                # Parent
1110                pid, sts = os.waitpid(pid, 0)
1111                # throw away additional data [see bug #427345]
1112                while select.select([self.rfile], [], [], 0)[0]:
1113                    if not self.rfile.read(1):
1114                        break
1115                if sts:
1116                    self.log_error("CGI script exit status %#x", sts)
1117                return
1118            # Child
1119            try:
1120                try:
1121                    os.setuid(nobody)
1122                except OSError:
1123                    pass
1124                os.dup2(self.rfile.fileno(), 0)
1125                os.dup2(self.wfile.fileno(), 1)
1126                os.execve(scriptfile, args, env)
1127            except:
1128                self.server.handle_error(self.request, self.client_address)
1129                os._exit(127)
1130
1131        else:
1132            # Non-Unix -- use subprocess
1133            import subprocess
1134            cmdline = [scriptfile]
1135            if self.is_python(scriptfile):
1136                interp = sys.executable
1137                if interp.lower().endswith("w.exe"):
1138                    # On Windows, use python.exe, not pythonw.exe
1139                    interp = interp[:-5] + interp[-4:]
1140                cmdline = [interp, '-u'] + cmdline
1141            if '=' not in query:
1142                cmdline.append(query)
1143            self.log_message("command: %s", subprocess.list2cmdline(cmdline))
1144            try:
1145                nbytes = int(length)
1146            except (TypeError, ValueError):
1147                nbytes = 0
1148            p = subprocess.Popen(cmdline,
1149                                 stdin=subprocess.PIPE,
1150                                 stdout=subprocess.PIPE,
1151                                 stderr=subprocess.PIPE,
1152                                 env = env
1153                                 )
1154            if self.command.lower() == "post" and nbytes > 0:
1155                data = self.rfile.read(nbytes)
1156            else:
1157                data = None
1158            # throw away additional data [see bug #427345]
1159            while select.select([self.rfile._sock], [], [], 0)[0]:
1160                if not self.rfile._sock.recv(1):
1161                    break
1162            stdout, stderr = p.communicate(data)
1163            self.wfile.write(stdout)
1164            if stderr:
1165                self.log_error('%s', stderr)
1166            p.stderr.close()
1167            p.stdout.close()
1168            status = p.returncode
1169            if status:
1170                self.log_error("CGI script exit status %#x", status)
1171            else:
1172                self.log_message("CGI script exited OK")
1173
1174
1175def test(HandlerClass=BaseHTTPRequestHandler,
1176         ServerClass=HTTPServer, protocol="HTTP/1.0", port=8000, bind=""):
1177    """Test the HTTP request handler class.
1178
1179    This runs an HTTP server on port 8000 (or the port argument).
1180
1181    """
1182    server_address = (bind, port)
1183
1184    HandlerClass.protocol_version = protocol
1185    with ServerClass(server_address, HandlerClass) as httpd:
1186        sa = httpd.socket.getsockname()
1187        serve_message = "Serving HTTP on {host} port {port} (http://{host}:{port}/) ..."
1188        print(serve_message.format(host=sa[0], port=sa[1]))
1189        try:
1190            httpd.serve_forever()
1191        except KeyboardInterrupt:
1192            print("\nKeyboard interrupt received, exiting.")
1193            sys.exit(0)
1194
1195if __name__ == '__main__':
1196    parser = argparse.ArgumentParser()
1197    parser.add_argument('--cgi', action='store_true',
1198                       help='Run as CGI Server')
1199    parser.add_argument('--bind', '-b', default='', metavar='ADDRESS',
1200                        help='Specify alternate bind address '
1201                             '[default: all interfaces]')
1202    parser.add_argument('port', action='store',
1203                        default=8000, type=int,
1204                        nargs='?',
1205                        help='Specify alternate port [default: 8000]')
1206    args = parser.parse_args()
1207    if args.cgi:
1208        handler_class = CGIHTTPRequestHandler
1209    else:
1210        handler_class = SimpleHTTPRequestHandler
1211    test(HandlerClass=handler_class, port=args.port, bind=args.bind)
1212