• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""HTTP server classes.
2
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and CGIHTTPRequestHandler for CGI scripts.
6
7It does, however, optionally implement HTTP/1.1 persistent connections,
8as of version 0.3.
9
10Notes on CGIHTTPRequestHandler
11------------------------------
12
13This class implements GET and POST requests to cgi-bin scripts.
14
15If the os.fork() function is not present (e.g. on Windows),
16subprocess.Popen() is used as a fallback, with slightly altered semantics.
17
18In all cases, the implementation is intentionally naive -- all
19requests are executed synchronously.
20
21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22-- it may execute arbitrary Python code or external programs.
23
24Note that status code 200 is sent prior to execution of a CGI script, so
25scripts cannot send other status codes such as 302 (redirect).
26
27XXX To do:
28
29- log requests even later (to capture byte count)
30- log user-agent header and other interesting goodies
31- send error log to separate file
32"""
33
34
35# See also:
36#
37# HTTP Working Group                                        T. Berners-Lee
38# INTERNET-DRAFT                                            R. T. Fielding
39# <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
40# Expires September 8, 1995                                  March 8, 1995
41#
42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43#
44# and
45#
46# Network Working Group                                      R. Fielding
47# Request for Comments: 2616                                       et al
48# Obsoletes: 2068                                              June 1999
49# Category: Standards Track
50#
51# URL: http://www.faqs.org/rfcs/rfc2616.html
52
53# Log files
54# ---------
55#
56# Here's a quote from the NCSA httpd docs about log file format.
57#
58# | The logfile format is as follows. Each line consists of:
59# |
60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61# |
62# |        host: Either the DNS name or the IP number of the remote client
63# |        rfc931: Any information returned by identd for this person,
64# |                - otherwise.
65# |        authuser: If user sent a userid for authentication, the user name,
66# |                  - otherwise.
67# |        DD: Day
68# |        Mon: Month (calendar name)
69# |        YYYY: Year
70# |        hh: hour (24-hour format, the machine's timezone)
71# |        mm: minutes
72# |        ss: seconds
73# |        request: The first line of the HTTP request as sent by the client.
74# |        ddd: the status code returned by the server, - if not available.
75# |        bbbb: the total number of bytes sent,
76# |              *not including the HTTP/1.0 header*, - if not available
77# |
78# | You can determine the name of the file accessed through request.
79#
80# (Actually, the latter is only true if you know the server configuration
81# at the time the request was made!)
82
83__version__ = "0.6"
84
85__all__ = [
86    "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",
87    "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
88]
89
90import copy
91import datetime
92import email.utils
93import html
94import http.client
95import io
96import mimetypes
97import os
98import posixpath
99import select
100import shutil
101import socket # For gethostbyaddr()
102import socketserver
103import sys
104import time
105import urllib.parse
106from functools import partial
107
108from http import HTTPStatus
109
110
111# Default error message template
112DEFAULT_ERROR_MESSAGE = """\
113<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
114        "http://www.w3.org/TR/html4/strict.dtd">
115<html>
116    <head>
117        <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
118        <title>Error response</title>
119    </head>
120    <body>
121        <h1>Error response</h1>
122        <p>Error code: %(code)d</p>
123        <p>Message: %(message)s.</p>
124        <p>Error code explanation: %(code)s - %(explain)s.</p>
125    </body>
126</html>
127"""
128
129DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
130
131class HTTPServer(socketserver.TCPServer):
132
133    allow_reuse_address = 1    # Seems to make sense in testing environment
134
135    def server_bind(self):
136        """Override server_bind to store the server name."""
137        socketserver.TCPServer.server_bind(self)
138        host, port = self.server_address[:2]
139        self.server_name = socket.getfqdn(host)
140        self.server_port = port
141
142
143class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer):
144    daemon_threads = True
145
146
147class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
148
149    """HTTP request handler base class.
150
151    The following explanation of HTTP serves to guide you through the
152    code as well as to expose any misunderstandings I may have about
153    HTTP (so you don't need to read the code to figure out I'm wrong
154    :-).
155
156    HTTP (HyperText Transfer Protocol) is an extensible protocol on
157    top of a reliable stream transport (e.g. TCP/IP).  The protocol
158    recognizes three parts to a request:
159
160    1. One line identifying the request type and path
161    2. An optional set of RFC-822-style headers
162    3. An optional data part
163
164    The headers and data are separated by a blank line.
165
166    The first line of the request has the form
167
168    <command> <path> <version>
169
170    where <command> is a (case-sensitive) keyword such as GET or POST,
171    <path> is a string containing path information for the request,
172    and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
173    <path> is encoded using the URL encoding scheme (using %xx to signify
174    the ASCII character with hex code xx).
175
176    The specification specifies that lines are separated by CRLF but
177    for compatibility with the widest range of clients recommends
178    servers also handle LF.  Similarly, whitespace in the request line
179    is treated sensibly (allowing multiple spaces between components
180    and allowing trailing whitespace).
181
182    Similarly, for output, lines ought to be separated by CRLF pairs
183    but most clients grok LF characters just fine.
184
185    If the first line of the request has the form
186
187    <command> <path>
188
189    (i.e. <version> is left out) then this is assumed to be an HTTP
190    0.9 request; this form has no optional headers and data part and
191    the reply consists of just the data.
192
193    The reply form of the HTTP 1.x protocol again has three parts:
194
195    1. One line giving the response code
196    2. An optional set of RFC-822-style headers
197    3. The data
198
199    Again, the headers and data are separated by a blank line.
200
201    The response code line has the form
202
203    <version> <responsecode> <responsestring>
204
205    where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
206    <responsecode> is a 3-digit response code indicating success or
207    failure of the request, and <responsestring> is an optional
208    human-readable string explaining what the response code means.
209
210    This server parses the request and the headers, and then calls a
211    function specific to the request type (<command>).  Specifically,
212    a request SPAM will be handled by a method do_SPAM().  If no
213    such method exists the server sends an error response to the
214    client.  If it exists, it is called with no arguments:
215
216    do_SPAM()
217
218    Note that the request name is case sensitive (i.e. SPAM and spam
219    are different requests).
220
221    The various request details are stored in instance variables:
222
223    - client_address is the client IP address in the form (host,
224    port);
225
226    - command, path and version are the broken-down request line;
227
228    - headers is an instance of email.message.Message (or a derived
229    class) containing the header information;
230
231    - rfile is a file object open for reading positioned at the
232    start of the optional input data part;
233
234    - wfile is a file object open for writing.
235
236    IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
237
238    The first thing to be written must be the response line.  Then
239    follow 0 or more header lines, then a blank line, and then the
240    actual data (if any).  The meaning of the header lines depends on
241    the command executed by the server; in most cases, when data is
242    returned, there should be at least one header line of the form
243
244    Content-type: <type>/<subtype>
245
246    where <type> and <subtype> should be registered MIME types,
247    e.g. "text/html" or "text/plain".
248
249    """
250
251    # The Python system version, truncated to its first component.
252    sys_version = "Python/" + sys.version.split()[0]
253
254    # The server software version.  You may want to override this.
255    # The format is multiple whitespace-separated strings,
256    # where each string is of the form name[/version].
257    server_version = "BaseHTTP/" + __version__
258
259    error_message_format = DEFAULT_ERROR_MESSAGE
260    error_content_type = DEFAULT_ERROR_CONTENT_TYPE
261
262    # The default request version.  This only affects responses up until
263    # the point where the request line is parsed, so it mainly decides what
264    # the client gets back when sending a malformed request line.
265    # Most web servers default to HTTP 0.9, i.e. don't send a status line.
266    default_request_version = "HTTP/0.9"
267
268    def parse_request(self):
269        """Parse a request (internal).
270
271        The request should be stored in self.raw_requestline; the results
272        are in self.command, self.path, self.request_version and
273        self.headers.
274
275        Return True for success, False for failure; on failure, any relevant
276        error response has already been sent back.
277
278        """
279        self.command = None  # set in case of error on the first line
280        self.request_version = version = self.default_request_version
281        self.close_connection = True
282        requestline = str(self.raw_requestline, 'iso-8859-1')
283        requestline = requestline.rstrip('\r\n')
284        self.requestline = requestline
285        words = requestline.split()
286        if len(words) == 0:
287            return False
288
289        if len(words) >= 3:  # Enough to determine protocol version
290            version = words[-1]
291            try:
292                if not version.startswith('HTTP/'):
293                    raise ValueError
294                base_version_number = version.split('/', 1)[1]
295                version_number = base_version_number.split(".")
296                # RFC 2145 section 3.1 says there can be only one "." and
297                #   - major and minor numbers MUST be treated as
298                #      separate integers;
299                #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
300                #      turn is lower than HTTP/12.3;
301                #   - Leading zeros MUST be ignored by recipients.
302                if len(version_number) != 2:
303                    raise ValueError
304                version_number = int(version_number[0]), int(version_number[1])
305            except (ValueError, IndexError):
306                self.send_error(
307                    HTTPStatus.BAD_REQUEST,
308                    "Bad request version (%r)" % version)
309                return False
310            if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
311                self.close_connection = False
312            if version_number >= (2, 0):
313                self.send_error(
314                    HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
315                    "Invalid HTTP version (%s)" % base_version_number)
316                return False
317            self.request_version = version
318
319        if not 2 <= len(words) <= 3:
320            self.send_error(
321                HTTPStatus.BAD_REQUEST,
322                "Bad request syntax (%r)" % requestline)
323            return False
324        command, path = words[:2]
325        if len(words) == 2:
326            self.close_connection = True
327            if command != 'GET':
328                self.send_error(
329                    HTTPStatus.BAD_REQUEST,
330                    "Bad HTTP/0.9 request type (%r)" % command)
331                return False
332        self.command, self.path = command, path
333
334        # Examine the headers and look for a Connection directive.
335        try:
336            self.headers = http.client.parse_headers(self.rfile,
337                                                     _class=self.MessageClass)
338        except http.client.LineTooLong as err:
339            self.send_error(
340                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
341                "Line too long",
342                str(err))
343            return False
344        except http.client.HTTPException as err:
345            self.send_error(
346                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
347                "Too many headers",
348                str(err)
349            )
350            return False
351
352        conntype = self.headers.get('Connection', "")
353        if conntype.lower() == 'close':
354            self.close_connection = True
355        elif (conntype.lower() == 'keep-alive' and
356              self.protocol_version >= "HTTP/1.1"):
357            self.close_connection = False
358        # Examine the headers and look for an Expect directive
359        expect = self.headers.get('Expect', "")
360        if (expect.lower() == "100-continue" and
361                self.protocol_version >= "HTTP/1.1" and
362                self.request_version >= "HTTP/1.1"):
363            if not self.handle_expect_100():
364                return False
365        return True
366
367    def handle_expect_100(self):
368        """Decide what to do with an "Expect: 100-continue" header.
369
370        If the client is expecting a 100 Continue response, we must
371        respond with either a 100 Continue or a final response before
372        waiting for the request body. The default is to always respond
373        with a 100 Continue. You can behave differently (for example,
374        reject unauthorized requests) by overriding this method.
375
376        This method should either return True (possibly after sending
377        a 100 Continue response) or send an error response and return
378        False.
379
380        """
381        self.send_response_only(HTTPStatus.CONTINUE)
382        self.end_headers()
383        return True
384
385    def handle_one_request(self):
386        """Handle a single HTTP request.
387
388        You normally don't need to override this method; see the class
389        __doc__ string for information on how to handle specific HTTP
390        commands such as GET and POST.
391
392        """
393        try:
394            self.raw_requestline = self.rfile.readline(65537)
395            if len(self.raw_requestline) > 65536:
396                self.requestline = ''
397                self.request_version = ''
398                self.command = ''
399                self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
400                return
401            if not self.raw_requestline:
402                self.close_connection = True
403                return
404            if not self.parse_request():
405                # An error code has been sent, just exit
406                return
407            mname = 'do_' + self.command
408            if not hasattr(self, mname):
409                self.send_error(
410                    HTTPStatus.NOT_IMPLEMENTED,
411                    "Unsupported method (%r)" % self.command)
412                return
413            method = getattr(self, mname)
414            method()
415            self.wfile.flush() #actually send the response if not already done.
416        except socket.timeout as e:
417            #a read or a write timed out.  Discard this connection
418            self.log_error("Request timed out: %r", e)
419            self.close_connection = True
420            return
421
422    def handle(self):
423        """Handle multiple requests if necessary."""
424        self.close_connection = True
425
426        self.handle_one_request()
427        while not self.close_connection:
428            self.handle_one_request()
429
430    def send_error(self, code, message=None, explain=None):
431        """Send and log an error reply.
432
433        Arguments are
434        * code:    an HTTP error code
435                   3 digits
436        * message: a simple optional 1 line reason phrase.
437                   *( HTAB / SP / VCHAR / %x80-FF )
438                   defaults to short entry matching the response code
439        * explain: a detailed message defaults to the long entry
440                   matching the response code.
441
442        This sends an error response (so it must be called before any
443        output has been generated), logs the error, and finally sends
444        a piece of HTML explaining the error to the user.
445
446        """
447
448        try:
449            shortmsg, longmsg = self.responses[code]
450        except KeyError:
451            shortmsg, longmsg = '???', '???'
452        if message is None:
453            message = shortmsg
454        if explain is None:
455            explain = longmsg
456        self.log_error("code %d, message %s", code, message)
457        self.send_response(code, message)
458        self.send_header('Connection', 'close')
459
460        # Message body is omitted for cases described in:
461        #  - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
462        #  - RFC7231: 6.3.6. 205(Reset Content)
463        body = None
464        if (code >= 200 and
465            code not in (HTTPStatus.NO_CONTENT,
466                         HTTPStatus.RESET_CONTENT,
467                         HTTPStatus.NOT_MODIFIED)):
468            # HTML encode to prevent Cross Site Scripting attacks
469            # (see bug #1100201)
470            content = (self.error_message_format % {
471                'code': code,
472                'message': html.escape(message, quote=False),
473                'explain': html.escape(explain, quote=False)
474            })
475            body = content.encode('UTF-8', 'replace')
476            self.send_header("Content-Type", self.error_content_type)
477            self.send_header('Content-Length', str(len(body)))
478        self.end_headers()
479
480        if self.command != 'HEAD' and body:
481            self.wfile.write(body)
482
483    def send_response(self, code, message=None):
484        """Add the response header to the headers buffer and log the
485        response code.
486
487        Also send two standard headers with the server software
488        version and the current date.
489
490        """
491        self.log_request(code)
492        self.send_response_only(code, message)
493        self.send_header('Server', self.version_string())
494        self.send_header('Date', self.date_time_string())
495
496    def send_response_only(self, code, message=None):
497        """Send the response header only."""
498        if self.request_version != 'HTTP/0.9':
499            if message is None:
500                if code in self.responses:
501                    message = self.responses[code][0]
502                else:
503                    message = ''
504            if not hasattr(self, '_headers_buffer'):
505                self._headers_buffer = []
506            self._headers_buffer.append(("%s %d %s\r\n" %
507                    (self.protocol_version, code, message)).encode(
508                        'latin-1', 'strict'))
509
510    def send_header(self, keyword, value):
511        """Send a MIME header to the headers buffer."""
512        if self.request_version != 'HTTP/0.9':
513            if not hasattr(self, '_headers_buffer'):
514                self._headers_buffer = []
515            self._headers_buffer.append(
516                ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
517
518        if keyword.lower() == 'connection':
519            if value.lower() == 'close':
520                self.close_connection = True
521            elif value.lower() == 'keep-alive':
522                self.close_connection = False
523
524    def end_headers(self):
525        """Send the blank line ending the MIME headers."""
526        if self.request_version != 'HTTP/0.9':
527            self._headers_buffer.append(b"\r\n")
528            self.flush_headers()
529
530    def flush_headers(self):
531        if hasattr(self, '_headers_buffer'):
532            self.wfile.write(b"".join(self._headers_buffer))
533            self._headers_buffer = []
534
535    def log_request(self, code='-', size='-'):
536        """Log an accepted request.
537
538        This is called by send_response().
539
540        """
541        if isinstance(code, HTTPStatus):
542            code = code.value
543        self.log_message('"%s" %s %s',
544                         self.requestline, str(code), str(size))
545
546    def log_error(self, format, *args):
547        """Log an error.
548
549        This is called when a request cannot be fulfilled.  By
550        default it passes the message on to log_message().
551
552        Arguments are the same as for log_message().
553
554        XXX This should go to the separate error log.
555
556        """
557
558        self.log_message(format, *args)
559
560    def log_message(self, format, *args):
561        """Log an arbitrary message.
562
563        This is used by all other logging functions.  Override
564        it if you have specific logging wishes.
565
566        The first argument, FORMAT, is a format string for the
567        message to be logged.  If the format string contains
568        any % escapes requiring parameters, they should be
569        specified as subsequent arguments (it's just like
570        printf!).
571
572        The client ip and current date/time are prefixed to
573        every message.
574
575        """
576
577        sys.stderr.write("%s - - [%s] %s\n" %
578                         (self.address_string(),
579                          self.log_date_time_string(),
580                          format%args))
581
582    def version_string(self):
583        """Return the server software version string."""
584        return self.server_version + ' ' + self.sys_version
585
586    def date_time_string(self, timestamp=None):
587        """Return the current date and time formatted for a message header."""
588        if timestamp is None:
589            timestamp = time.time()
590        return email.utils.formatdate(timestamp, usegmt=True)
591
592    def log_date_time_string(self):
593        """Return the current time formatted for logging."""
594        now = time.time()
595        year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
596        s = "%02d/%3s/%04d %02d:%02d:%02d" % (
597                day, self.monthname[month], year, hh, mm, ss)
598        return s
599
600    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
601
602    monthname = [None,
603                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
604                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
605
606    def address_string(self):
607        """Return the client address."""
608
609        return self.client_address[0]
610
611    # Essentially static class variables
612
613    # The version of the HTTP protocol we support.
614    # Set this to HTTP/1.1 to enable automatic keepalive
615    protocol_version = "HTTP/1.0"
616
617    # MessageClass used to parse headers
618    MessageClass = http.client.HTTPMessage
619
620    # hack to maintain backwards compatibility
621    responses = {
622        v: (v.phrase, v.description)
623        for v in HTTPStatus.__members__.values()
624    }
625
626
627class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
628
629    """Simple HTTP request handler with GET and HEAD commands.
630
631    This serves files from the current directory and any of its
632    subdirectories.  The MIME type for files is determined by
633    calling the .guess_type() method.
634
635    The GET and HEAD requests are identical except that the HEAD
636    request omits the actual contents of the file.
637
638    """
639
640    server_version = "SimpleHTTP/" + __version__
641
642    def __init__(self, *args, directory=None, **kwargs):
643        if directory is None:
644            directory = os.getcwd()
645        self.directory = directory
646        super().__init__(*args, **kwargs)
647
648    def do_GET(self):
649        """Serve a GET request."""
650        f = self.send_head()
651        if f:
652            try:
653                self.copyfile(f, self.wfile)
654            finally:
655                f.close()
656
657    def do_HEAD(self):
658        """Serve a HEAD request."""
659        f = self.send_head()
660        if f:
661            f.close()
662
663    def send_head(self):
664        """Common code for GET and HEAD commands.
665
666        This sends the response code and MIME headers.
667
668        Return value is either a file object (which has to be copied
669        to the outputfile by the caller unless the command was HEAD,
670        and must be closed by the caller under all circumstances), or
671        None, in which case the caller has nothing further to do.
672
673        """
674        path = self.translate_path(self.path)
675        f = None
676        if os.path.isdir(path):
677            parts = urllib.parse.urlsplit(self.path)
678            if not parts.path.endswith('/'):
679                # redirect browser - doing basically what apache does
680                self.send_response(HTTPStatus.MOVED_PERMANENTLY)
681                new_parts = (parts[0], parts[1], parts[2] + '/',
682                             parts[3], parts[4])
683                new_url = urllib.parse.urlunsplit(new_parts)
684                self.send_header("Location", new_url)
685                self.end_headers()
686                return None
687            for index in "index.html", "index.htm":
688                index = os.path.join(path, index)
689                if os.path.exists(index):
690                    path = index
691                    break
692            else:
693                return self.list_directory(path)
694        ctype = self.guess_type(path)
695        try:
696            f = open(path, 'rb')
697        except OSError:
698            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
699            return None
700
701        try:
702            fs = os.fstat(f.fileno())
703            # Use browser cache if possible
704            if ("If-Modified-Since" in self.headers
705                    and "If-None-Match" not in self.headers):
706                # compare If-Modified-Since and time of last file modification
707                try:
708                    ims = email.utils.parsedate_to_datetime(
709                        self.headers["If-Modified-Since"])
710                except (TypeError, IndexError, OverflowError, ValueError):
711                    # ignore ill-formed values
712                    pass
713                else:
714                    if ims.tzinfo is None:
715                        # obsolete format with no timezone, cf.
716                        # https://tools.ietf.org/html/rfc7231#section-7.1.1.1
717                        ims = ims.replace(tzinfo=datetime.timezone.utc)
718                    if ims.tzinfo is datetime.timezone.utc:
719                        # compare to UTC datetime of last modification
720                        last_modif = datetime.datetime.fromtimestamp(
721                            fs.st_mtime, datetime.timezone.utc)
722                        # remove microseconds, like in If-Modified-Since
723                        last_modif = last_modif.replace(microsecond=0)
724
725                        if last_modif <= ims:
726                            self.send_response(HTTPStatus.NOT_MODIFIED)
727                            self.end_headers()
728                            f.close()
729                            return None
730
731            self.send_response(HTTPStatus.OK)
732            self.send_header("Content-type", ctype)
733            self.send_header("Content-Length", str(fs[6]))
734            self.send_header("Last-Modified",
735                self.date_time_string(fs.st_mtime))
736            self.end_headers()
737            return f
738        except:
739            f.close()
740            raise
741
742    def list_directory(self, path):
743        """Helper to produce a directory listing (absent index.html).
744
745        Return value is either a file object, or None (indicating an
746        error).  In either case, the headers are sent, making the
747        interface the same as for send_head().
748
749        """
750        try:
751            list = os.listdir(path)
752        except OSError:
753            self.send_error(
754                HTTPStatus.NOT_FOUND,
755                "No permission to list directory")
756            return None
757        list.sort(key=lambda a: a.lower())
758        r = []
759        try:
760            displaypath = urllib.parse.unquote(self.path,
761                                               errors='surrogatepass')
762        except UnicodeDecodeError:
763            displaypath = urllib.parse.unquote(path)
764        displaypath = html.escape(displaypath, quote=False)
765        enc = sys.getfilesystemencoding()
766        title = 'Directory listing for %s' % displaypath
767        r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
768                 '"http://www.w3.org/TR/html4/strict.dtd">')
769        r.append('<html>\n<head>')
770        r.append('<meta http-equiv="Content-Type" '
771                 'content="text/html; charset=%s">' % enc)
772        r.append('<title>%s</title>\n</head>' % title)
773        r.append('<body>\n<h1>%s</h1>' % title)
774        r.append('<hr>\n<ul>')
775        for name in list:
776            fullname = os.path.join(path, name)
777            displayname = linkname = name
778            # Append / for directories or @ for symbolic links
779            if os.path.isdir(fullname):
780                displayname = name + "/"
781                linkname = name + "/"
782            if os.path.islink(fullname):
783                displayname = name + "@"
784                # Note: a link to a directory displays with @ and links with /
785            r.append('<li><a href="%s">%s</a></li>'
786                    % (urllib.parse.quote(linkname,
787                                          errors='surrogatepass'),
788                       html.escape(displayname, quote=False)))
789        r.append('</ul>\n<hr>\n</body>\n</html>\n')
790        encoded = '\n'.join(r).encode(enc, 'surrogateescape')
791        f = io.BytesIO()
792        f.write(encoded)
793        f.seek(0)
794        self.send_response(HTTPStatus.OK)
795        self.send_header("Content-type", "text/html; charset=%s" % enc)
796        self.send_header("Content-Length", str(len(encoded)))
797        self.end_headers()
798        return f
799
800    def translate_path(self, path):
801        """Translate a /-separated PATH to the local filename syntax.
802
803        Components that mean special things to the local file system
804        (e.g. drive or directory names) are ignored.  (XXX They should
805        probably be diagnosed.)
806
807        """
808        # abandon query parameters
809        path = path.split('?',1)[0]
810        path = path.split('#',1)[0]
811        # Don't forget explicit trailing slash when normalizing. Issue17324
812        trailing_slash = path.rstrip().endswith('/')
813        try:
814            path = urllib.parse.unquote(path, errors='surrogatepass')
815        except UnicodeDecodeError:
816            path = urllib.parse.unquote(path)
817        path = posixpath.normpath(path)
818        words = path.split('/')
819        words = filter(None, words)
820        path = self.directory
821        for word in words:
822            if os.path.dirname(word) or word in (os.curdir, os.pardir):
823                # Ignore components that are not a simple file/directory name
824                continue
825            path = os.path.join(path, word)
826        if trailing_slash:
827            path += '/'
828        return path
829
830    def copyfile(self, source, outputfile):
831        """Copy all data between two file objects.
832
833        The SOURCE argument is a file object open for reading
834        (or anything with a read() method) and the DESTINATION
835        argument is a file object open for writing (or
836        anything with a write() method).
837
838        The only reason for overriding this would be to change
839        the block size or perhaps to replace newlines by CRLF
840        -- note however that this the default server uses this
841        to copy binary data as well.
842
843        """
844        shutil.copyfileobj(source, outputfile)
845
846    def guess_type(self, path):
847        """Guess the type of a file.
848
849        Argument is a PATH (a filename).
850
851        Return value is a string of the form type/subtype,
852        usable for a MIME Content-type header.
853
854        The default implementation looks the file's extension
855        up in the table self.extensions_map, using application/octet-stream
856        as a default; however it would be permissible (if
857        slow) to look inside the data to make a better guess.
858
859        """
860
861        base, ext = posixpath.splitext(path)
862        if ext in self.extensions_map:
863            return self.extensions_map[ext]
864        ext = ext.lower()
865        if ext in self.extensions_map:
866            return self.extensions_map[ext]
867        else:
868            return self.extensions_map['']
869
870    if not mimetypes.inited:
871        mimetypes.init() # try to read system mime.types
872    extensions_map = mimetypes.types_map.copy()
873    extensions_map.update({
874        '': 'application/octet-stream', # Default
875        '.py': 'text/plain',
876        '.c': 'text/plain',
877        '.h': 'text/plain',
878        })
879
880
881# Utilities for CGIHTTPRequestHandler
882
883def _url_collapse_path(path):
884    """
885    Given a URL path, remove extra '/'s and '.' path elements and collapse
886    any '..' references and returns a collapsed path.
887
888    Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
889    The utility of this function is limited to is_cgi method and helps
890    preventing some security attacks.
891
892    Returns: The reconstituted URL, which will always start with a '/'.
893
894    Raises: IndexError if too many '..' occur within the path.
895
896    """
897    # Query component should not be involved.
898    path, _, query = path.partition('?')
899    path = urllib.parse.unquote(path)
900
901    # Similar to os.path.split(os.path.normpath(path)) but specific to URL
902    # path semantics rather than local operating system semantics.
903    path_parts = path.split('/')
904    head_parts = []
905    for part in path_parts[:-1]:
906        if part == '..':
907            head_parts.pop() # IndexError if more '..' than prior parts
908        elif part and part != '.':
909            head_parts.append( part )
910    if path_parts:
911        tail_part = path_parts.pop()
912        if tail_part:
913            if tail_part == '..':
914                head_parts.pop()
915                tail_part = ''
916            elif tail_part == '.':
917                tail_part = ''
918    else:
919        tail_part = ''
920
921    if query:
922        tail_part = '?'.join((tail_part, query))
923
924    splitpath = ('/' + '/'.join(head_parts), tail_part)
925    collapsed_path = "/".join(splitpath)
926
927    return collapsed_path
928
929
930
931nobody = None
932
933def nobody_uid():
934    """Internal routine to get nobody's uid"""
935    global nobody
936    if nobody:
937        return nobody
938    try:
939        import pwd
940    except ImportError:
941        return -1
942    try:
943        nobody = pwd.getpwnam('nobody')[2]
944    except KeyError:
945        nobody = 1 + max(x[2] for x in pwd.getpwall())
946    return nobody
947
948
949def executable(path):
950    """Test for executable file."""
951    return os.access(path, os.X_OK)
952
953
954class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
955
956    """Complete HTTP server with GET, HEAD and POST commands.
957
958    GET and HEAD also support running CGI scripts.
959
960    The POST command is *only* implemented for CGI scripts.
961
962    """
963
964    # Determine platform specifics
965    have_fork = hasattr(os, 'fork')
966
967    # Make rfile unbuffered -- we need to read one line and then pass
968    # the rest to a subprocess, so we can't use buffered input.
969    rbufsize = 0
970
971    def do_POST(self):
972        """Serve a POST request.
973
974        This is only implemented for CGI scripts.
975
976        """
977
978        if self.is_cgi():
979            self.run_cgi()
980        else:
981            self.send_error(
982                HTTPStatus.NOT_IMPLEMENTED,
983                "Can only POST to CGI scripts")
984
985    def send_head(self):
986        """Version of send_head that support CGI scripts"""
987        if self.is_cgi():
988            return self.run_cgi()
989        else:
990            return SimpleHTTPRequestHandler.send_head(self)
991
992    def is_cgi(self):
993        """Test whether self.path corresponds to a CGI script.
994
995        Returns True and updates the cgi_info attribute to the tuple
996        (dir, rest) if self.path requires running a CGI script.
997        Returns False otherwise.
998
999        If any exception is raised, the caller should assume that
1000        self.path was rejected as invalid and act accordingly.
1001
1002        The default implementation tests whether the normalized url
1003        path begins with one of the strings in self.cgi_directories
1004        (and the next character is a '/' or the end of the string).
1005
1006        """
1007        collapsed_path = _url_collapse_path(self.path)
1008        dir_sep = collapsed_path.find('/', 1)
1009        head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
1010        if head in self.cgi_directories:
1011            self.cgi_info = head, tail
1012            return True
1013        return False
1014
1015
1016    cgi_directories = ['/cgi-bin', '/htbin']
1017
1018    def is_executable(self, path):
1019        """Test whether argument path is an executable file."""
1020        return executable(path)
1021
1022    def is_python(self, path):
1023        """Test whether argument path is a Python script."""
1024        head, tail = os.path.splitext(path)
1025        return tail.lower() in (".py", ".pyw")
1026
1027    def run_cgi(self):
1028        """Execute a CGI script."""
1029        dir, rest = self.cgi_info
1030        path = dir + '/' + rest
1031        i = path.find('/', len(dir)+1)
1032        while i >= 0:
1033            nextdir = path[:i]
1034            nextrest = path[i+1:]
1035
1036            scriptdir = self.translate_path(nextdir)
1037            if os.path.isdir(scriptdir):
1038                dir, rest = nextdir, nextrest
1039                i = path.find('/', len(dir)+1)
1040            else:
1041                break
1042
1043        # find an explicit query string, if present.
1044        rest, _, query = rest.partition('?')
1045
1046        # dissect the part after the directory name into a script name &
1047        # a possible additional path, to be stored in PATH_INFO.
1048        i = rest.find('/')
1049        if i >= 0:
1050            script, rest = rest[:i], rest[i:]
1051        else:
1052            script, rest = rest, ''
1053
1054        scriptname = dir + '/' + script
1055        scriptfile = self.translate_path(scriptname)
1056        if not os.path.exists(scriptfile):
1057            self.send_error(
1058                HTTPStatus.NOT_FOUND,
1059                "No such CGI script (%r)" % scriptname)
1060            return
1061        if not os.path.isfile(scriptfile):
1062            self.send_error(
1063                HTTPStatus.FORBIDDEN,
1064                "CGI script is not a plain file (%r)" % scriptname)
1065            return
1066        ispy = self.is_python(scriptname)
1067        if self.have_fork or not ispy:
1068            if not self.is_executable(scriptfile):
1069                self.send_error(
1070                    HTTPStatus.FORBIDDEN,
1071                    "CGI script is not executable (%r)" % scriptname)
1072                return
1073
1074        # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1075        # XXX Much of the following could be prepared ahead of time!
1076        env = copy.deepcopy(os.environ)
1077        env['SERVER_SOFTWARE'] = self.version_string()
1078        env['SERVER_NAME'] = self.server.server_name
1079        env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1080        env['SERVER_PROTOCOL'] = self.protocol_version
1081        env['SERVER_PORT'] = str(self.server.server_port)
1082        env['REQUEST_METHOD'] = self.command
1083        uqrest = urllib.parse.unquote(rest)
1084        env['PATH_INFO'] = uqrest
1085        env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1086        env['SCRIPT_NAME'] = scriptname
1087        if query:
1088            env['QUERY_STRING'] = query
1089        env['REMOTE_ADDR'] = self.client_address[0]
1090        authorization = self.headers.get("authorization")
1091        if authorization:
1092            authorization = authorization.split()
1093            if len(authorization) == 2:
1094                import base64, binascii
1095                env['AUTH_TYPE'] = authorization[0]
1096                if authorization[0].lower() == "basic":
1097                    try:
1098                        authorization = authorization[1].encode('ascii')
1099                        authorization = base64.decodebytes(authorization).\
1100                                        decode('ascii')
1101                    except (binascii.Error, UnicodeError):
1102                        pass
1103                    else:
1104                        authorization = authorization.split(':')
1105                        if len(authorization) == 2:
1106                            env['REMOTE_USER'] = authorization[0]
1107        # XXX REMOTE_IDENT
1108        if self.headers.get('content-type') is None:
1109            env['CONTENT_TYPE'] = self.headers.get_content_type()
1110        else:
1111            env['CONTENT_TYPE'] = self.headers['content-type']
1112        length = self.headers.get('content-length')
1113        if length:
1114            env['CONTENT_LENGTH'] = length
1115        referer = self.headers.get('referer')
1116        if referer:
1117            env['HTTP_REFERER'] = referer
1118        accept = []
1119        for line in self.headers.getallmatchingheaders('accept'):
1120            if line[:1] in "\t\n\r ":
1121                accept.append(line.strip())
1122            else:
1123                accept = accept + line[7:].split(',')
1124        env['HTTP_ACCEPT'] = ','.join(accept)
1125        ua = self.headers.get('user-agent')
1126        if ua:
1127            env['HTTP_USER_AGENT'] = ua
1128        co = filter(None, self.headers.get_all('cookie', []))
1129        cookie_str = ', '.join(co)
1130        if cookie_str:
1131            env['HTTP_COOKIE'] = cookie_str
1132        # XXX Other HTTP_* headers
1133        # Since we're setting the env in the parent, provide empty
1134        # values to override previously set values
1135        for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1136                  'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1137            env.setdefault(k, "")
1138
1139        self.send_response(HTTPStatus.OK, "Script output follows")
1140        self.flush_headers()
1141
1142        decoded_query = query.replace('+', ' ')
1143
1144        if self.have_fork:
1145            # Unix -- fork as we should
1146            args = [script]
1147            if '=' not in decoded_query:
1148                args.append(decoded_query)
1149            nobody = nobody_uid()
1150            self.wfile.flush() # Always flush before forking
1151            pid = os.fork()
1152            if pid != 0:
1153                # Parent
1154                pid, sts = os.waitpid(pid, 0)
1155                # throw away additional data [see bug #427345]
1156                while select.select([self.rfile], [], [], 0)[0]:
1157                    if not self.rfile.read(1):
1158                        break
1159                if sts:
1160                    self.log_error("CGI script exit status %#x", sts)
1161                return
1162            # Child
1163            try:
1164                try:
1165                    os.setuid(nobody)
1166                except OSError:
1167                    pass
1168                os.dup2(self.rfile.fileno(), 0)
1169                os.dup2(self.wfile.fileno(), 1)
1170                os.execve(scriptfile, args, env)
1171            except:
1172                self.server.handle_error(self.request, self.client_address)
1173                os._exit(127)
1174
1175        else:
1176            # Non-Unix -- use subprocess
1177            import subprocess
1178            cmdline = [scriptfile]
1179            if self.is_python(scriptfile):
1180                interp = sys.executable
1181                if interp.lower().endswith("w.exe"):
1182                    # On Windows, use python.exe, not pythonw.exe
1183                    interp = interp[:-5] + interp[-4:]
1184                cmdline = [interp, '-u'] + cmdline
1185            if '=' not in query:
1186                cmdline.append(query)
1187            self.log_message("command: %s", subprocess.list2cmdline(cmdline))
1188            try:
1189                nbytes = int(length)
1190            except (TypeError, ValueError):
1191                nbytes = 0
1192            p = subprocess.Popen(cmdline,
1193                                 stdin=subprocess.PIPE,
1194                                 stdout=subprocess.PIPE,
1195                                 stderr=subprocess.PIPE,
1196                                 env = env
1197                                 )
1198            if self.command.lower() == "post" and nbytes > 0:
1199                data = self.rfile.read(nbytes)
1200            else:
1201                data = None
1202            # throw away additional data [see bug #427345]
1203            while select.select([self.rfile._sock], [], [], 0)[0]:
1204                if not self.rfile._sock.recv(1):
1205                    break
1206            stdout, stderr = p.communicate(data)
1207            self.wfile.write(stdout)
1208            if stderr:
1209                self.log_error('%s', stderr)
1210            p.stderr.close()
1211            p.stdout.close()
1212            status = p.returncode
1213            if status:
1214                self.log_error("CGI script exit status %#x", status)
1215            else:
1216                self.log_message("CGI script exited OK")
1217
1218
1219def test(HandlerClass=BaseHTTPRequestHandler,
1220         ServerClass=ThreadingHTTPServer,
1221         protocol="HTTP/1.0", port=8000, bind=""):
1222    """Test the HTTP request handler class.
1223
1224    This runs an HTTP server on port 8000 (or the port argument).
1225
1226    """
1227    server_address = (bind, port)
1228
1229    HandlerClass.protocol_version = protocol
1230    with ServerClass(server_address, HandlerClass) as httpd:
1231        sa = httpd.socket.getsockname()
1232        serve_message = "Serving HTTP on {host} port {port} (http://{host}:{port}/) ..."
1233        print(serve_message.format(host=sa[0], port=sa[1]))
1234        try:
1235            httpd.serve_forever()
1236        except KeyboardInterrupt:
1237            print("\nKeyboard interrupt received, exiting.")
1238            sys.exit(0)
1239
1240if __name__ == '__main__':
1241    import argparse
1242
1243    parser = argparse.ArgumentParser()
1244    parser.add_argument('--cgi', action='store_true',
1245                       help='Run as CGI Server')
1246    parser.add_argument('--bind', '-b', default='', metavar='ADDRESS',
1247                        help='Specify alternate bind address '
1248                             '[default: all interfaces]')
1249    parser.add_argument('--directory', '-d', default=os.getcwd(),
1250                        help='Specify alternative directory '
1251                        '[default:current directory]')
1252    parser.add_argument('port', action='store',
1253                        default=8000, type=int,
1254                        nargs='?',
1255                        help='Specify alternate port [default: 8000]')
1256    args = parser.parse_args()
1257    if args.cgi:
1258        handler_class = CGIHTTPRequestHandler
1259    else:
1260        handler_class = partial(SimpleHTTPRequestHandler,
1261                                directory=args.directory)
1262    test(HandlerClass=handler_class, port=args.port, bind=args.bind)
1263