• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""HTTP server classes.
2
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and CGIHTTPRequestHandler for CGI scripts.
6
7It does, however, optionally implement HTTP/1.1 persistent connections,
8as of version 0.3.
9
10Notes on CGIHTTPRequestHandler
11------------------------------
12
13This class implements GET and POST requests to cgi-bin scripts.
14
15If the os.fork() function is not present (e.g. on Windows),
16subprocess.Popen() is used as a fallback, with slightly altered semantics.
17
18In all cases, the implementation is intentionally naive -- all
19requests are executed synchronously.
20
21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22-- it may execute arbitrary Python code or external programs.
23
24Note that status code 200 is sent prior to execution of a CGI script, so
25scripts cannot send other status codes such as 302 (redirect).
26
27XXX To do:
28
29- log requests even later (to capture byte count)
30- log user-agent header and other interesting goodies
31- send error log to separate file
32"""
33
34
35# See also:
36#
37# HTTP Working Group                                        T. Berners-Lee
38# INTERNET-DRAFT                                            R. T. Fielding
39# <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
40# Expires September 8, 1995                                  March 8, 1995
41#
42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43#
44# and
45#
46# Network Working Group                                      R. Fielding
47# Request for Comments: 2616                                       et al
48# Obsoletes: 2068                                              June 1999
49# Category: Standards Track
50#
51# URL: http://www.faqs.org/rfcs/rfc2616.html
52
53# Log files
54# ---------
55#
56# Here's a quote from the NCSA httpd docs about log file format.
57#
58# | The logfile format is as follows. Each line consists of:
59# |
60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61# |
62# |        host: Either the DNS name or the IP number of the remote client
63# |        rfc931: Any information returned by identd for this person,
64# |                - otherwise.
65# |        authuser: If user sent a userid for authentication, the user name,
66# |                  - otherwise.
67# |        DD: Day
68# |        Mon: Month (calendar name)
69# |        YYYY: Year
70# |        hh: hour (24-hour format, the machine's timezone)
71# |        mm: minutes
72# |        ss: seconds
73# |        request: The first line of the HTTP request as sent by the client.
74# |        ddd: the status code returned by the server, - if not available.
75# |        bbbb: the total number of bytes sent,
76# |              *not including the HTTP/1.0 header*, - if not available
77# |
78# | You can determine the name of the file accessed through request.
79#
80# (Actually, the latter is only true if you know the server configuration
81# at the time the request was made!)
82
83__version__ = "0.6"
84
85__all__ = [
86    "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",
87    "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
88]
89
90import copy
91import datetime
92import email.utils
93import html
94import http.client
95import io
96import mimetypes
97import os
98import posixpath
99import select
100import shutil
101import socket # For gethostbyaddr()
102import socketserver
103import sys
104import time
105import urllib.parse
106import contextlib
107from functools import partial
108
109from http import HTTPStatus
110
111
112# Default error message template
113DEFAULT_ERROR_MESSAGE = """\
114<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
115        "http://www.w3.org/TR/html4/strict.dtd">
116<html>
117    <head>
118        <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
119        <title>Error response</title>
120    </head>
121    <body>
122        <h1>Error response</h1>
123        <p>Error code: %(code)d</p>
124        <p>Message: %(message)s.</p>
125        <p>Error code explanation: %(code)s - %(explain)s.</p>
126    </body>
127</html>
128"""
129
130DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
131
132class HTTPServer(socketserver.TCPServer):
133
134    allow_reuse_address = 1    # Seems to make sense in testing environment
135
136    def server_bind(self):
137        """Override server_bind to store the server name."""
138        socketserver.TCPServer.server_bind(self)
139        host, port = self.server_address[:2]
140        self.server_name = socket.getfqdn(host)
141        self.server_port = port
142
143
144class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer):
145    daemon_threads = True
146
147
148class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
149
150    """HTTP request handler base class.
151
152    The following explanation of HTTP serves to guide you through the
153    code as well as to expose any misunderstandings I may have about
154    HTTP (so you don't need to read the code to figure out I'm wrong
155    :-).
156
157    HTTP (HyperText Transfer Protocol) is an extensible protocol on
158    top of a reliable stream transport (e.g. TCP/IP).  The protocol
159    recognizes three parts to a request:
160
161    1. One line identifying the request type and path
162    2. An optional set of RFC-822-style headers
163    3. An optional data part
164
165    The headers and data are separated by a blank line.
166
167    The first line of the request has the form
168
169    <command> <path> <version>
170
171    where <command> is a (case-sensitive) keyword such as GET or POST,
172    <path> is a string containing path information for the request,
173    and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
174    <path> is encoded using the URL encoding scheme (using %xx to signify
175    the ASCII character with hex code xx).
176
177    The specification specifies that lines are separated by CRLF but
178    for compatibility with the widest range of clients recommends
179    servers also handle LF.  Similarly, whitespace in the request line
180    is treated sensibly (allowing multiple spaces between components
181    and allowing trailing whitespace).
182
183    Similarly, for output, lines ought to be separated by CRLF pairs
184    but most clients grok LF characters just fine.
185
186    If the first line of the request has the form
187
188    <command> <path>
189
190    (i.e. <version> is left out) then this is assumed to be an HTTP
191    0.9 request; this form has no optional headers and data part and
192    the reply consists of just the data.
193
194    The reply form of the HTTP 1.x protocol again has three parts:
195
196    1. One line giving the response code
197    2. An optional set of RFC-822-style headers
198    3. The data
199
200    Again, the headers and data are separated by a blank line.
201
202    The response code line has the form
203
204    <version> <responsecode> <responsestring>
205
206    where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
207    <responsecode> is a 3-digit response code indicating success or
208    failure of the request, and <responsestring> is an optional
209    human-readable string explaining what the response code means.
210
211    This server parses the request and the headers, and then calls a
212    function specific to the request type (<command>).  Specifically,
213    a request SPAM will be handled by a method do_SPAM().  If no
214    such method exists the server sends an error response to the
215    client.  If it exists, it is called with no arguments:
216
217    do_SPAM()
218
219    Note that the request name is case sensitive (i.e. SPAM and spam
220    are different requests).
221
222    The various request details are stored in instance variables:
223
224    - client_address is the client IP address in the form (host,
225    port);
226
227    - command, path and version are the broken-down request line;
228
229    - headers is an instance of email.message.Message (or a derived
230    class) containing the header information;
231
232    - rfile is a file object open for reading positioned at the
233    start of the optional input data part;
234
235    - wfile is a file object open for writing.
236
237    IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
238
239    The first thing to be written must be the response line.  Then
240    follow 0 or more header lines, then a blank line, and then the
241    actual data (if any).  The meaning of the header lines depends on
242    the command executed by the server; in most cases, when data is
243    returned, there should be at least one header line of the form
244
245    Content-type: <type>/<subtype>
246
247    where <type> and <subtype> should be registered MIME types,
248    e.g. "text/html" or "text/plain".
249
250    """
251
252    # The Python system version, truncated to its first component.
253    sys_version = "Python/" + sys.version.split()[0]
254
255    # The server software version.  You may want to override this.
256    # The format is multiple whitespace-separated strings,
257    # where each string is of the form name[/version].
258    server_version = "BaseHTTP/" + __version__
259
260    error_message_format = DEFAULT_ERROR_MESSAGE
261    error_content_type = DEFAULT_ERROR_CONTENT_TYPE
262
263    # The default request version.  This only affects responses up until
264    # the point where the request line is parsed, so it mainly decides what
265    # the client gets back when sending a malformed request line.
266    # Most web servers default to HTTP 0.9, i.e. don't send a status line.
267    default_request_version = "HTTP/0.9"
268
269    def parse_request(self):
270        """Parse a request (internal).
271
272        The request should be stored in self.raw_requestline; the results
273        are in self.command, self.path, self.request_version and
274        self.headers.
275
276        Return True for success, False for failure; on failure, any relevant
277        error response has already been sent back.
278
279        """
280        self.command = None  # set in case of error on the first line
281        self.request_version = version = self.default_request_version
282        self.close_connection = True
283        requestline = str(self.raw_requestline, 'iso-8859-1')
284        requestline = requestline.rstrip('\r\n')
285        self.requestline = requestline
286        words = requestline.split()
287        if len(words) == 0:
288            return False
289
290        if len(words) >= 3:  # Enough to determine protocol version
291            version = words[-1]
292            try:
293                if not version.startswith('HTTP/'):
294                    raise ValueError
295                base_version_number = version.split('/', 1)[1]
296                version_number = base_version_number.split(".")
297                # RFC 2145 section 3.1 says there can be only one "." and
298                #   - major and minor numbers MUST be treated as
299                #      separate integers;
300                #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
301                #      turn is lower than HTTP/12.3;
302                #   - Leading zeros MUST be ignored by recipients.
303                if len(version_number) != 2:
304                    raise ValueError
305                version_number = int(version_number[0]), int(version_number[1])
306            except (ValueError, IndexError):
307                self.send_error(
308                    HTTPStatus.BAD_REQUEST,
309                    "Bad request version (%r)" % version)
310                return False
311            if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
312                self.close_connection = False
313            if version_number >= (2, 0):
314                self.send_error(
315                    HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
316                    "Invalid HTTP version (%s)" % base_version_number)
317                return False
318            self.request_version = version
319
320        if not 2 <= len(words) <= 3:
321            self.send_error(
322                HTTPStatus.BAD_REQUEST,
323                "Bad request syntax (%r)" % requestline)
324            return False
325        command, path = words[:2]
326        if len(words) == 2:
327            self.close_connection = True
328            if command != 'GET':
329                self.send_error(
330                    HTTPStatus.BAD_REQUEST,
331                    "Bad HTTP/0.9 request type (%r)" % command)
332                return False
333        self.command, self.path = command, path
334
335        # gh-87389: The purpose of replacing '//' with '/' is to protect
336        # against open redirect attacks possibly triggered if the path starts
337        # with '//' because http clients treat //path as an absolute URI
338        # without scheme (similar to http://path) rather than a path.
339        if self.path.startswith('//'):
340            self.path = '/' + self.path.lstrip('/')  # Reduce to a single /
341
342        # Examine the headers and look for a Connection directive.
343        try:
344            self.headers = http.client.parse_headers(self.rfile,
345                                                     _class=self.MessageClass)
346        except http.client.LineTooLong as err:
347            self.send_error(
348                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
349                "Line too long",
350                str(err))
351            return False
352        except http.client.HTTPException as err:
353            self.send_error(
354                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
355                "Too many headers",
356                str(err)
357            )
358            return False
359
360        conntype = self.headers.get('Connection', "")
361        if conntype.lower() == 'close':
362            self.close_connection = True
363        elif (conntype.lower() == 'keep-alive' and
364              self.protocol_version >= "HTTP/1.1"):
365            self.close_connection = False
366        # Examine the headers and look for an Expect directive
367        expect = self.headers.get('Expect', "")
368        if (expect.lower() == "100-continue" and
369                self.protocol_version >= "HTTP/1.1" and
370                self.request_version >= "HTTP/1.1"):
371            if not self.handle_expect_100():
372                return False
373        return True
374
375    def handle_expect_100(self):
376        """Decide what to do with an "Expect: 100-continue" header.
377
378        If the client is expecting a 100 Continue response, we must
379        respond with either a 100 Continue or a final response before
380        waiting for the request body. The default is to always respond
381        with a 100 Continue. You can behave differently (for example,
382        reject unauthorized requests) by overriding this method.
383
384        This method should either return True (possibly after sending
385        a 100 Continue response) or send an error response and return
386        False.
387
388        """
389        self.send_response_only(HTTPStatus.CONTINUE)
390        self.end_headers()
391        return True
392
393    def handle_one_request(self):
394        """Handle a single HTTP request.
395
396        You normally don't need to override this method; see the class
397        __doc__ string for information on how to handle specific HTTP
398        commands such as GET and POST.
399
400        """
401        try:
402            self.raw_requestline = self.rfile.readline(65537)
403            if len(self.raw_requestline) > 65536:
404                self.requestline = ''
405                self.request_version = ''
406                self.command = ''
407                self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
408                return
409            if not self.raw_requestline:
410                self.close_connection = True
411                return
412            if not self.parse_request():
413                # An error code has been sent, just exit
414                return
415            mname = 'do_' + self.command
416            if not hasattr(self, mname):
417                self.send_error(
418                    HTTPStatus.NOT_IMPLEMENTED,
419                    "Unsupported method (%r)" % self.command)
420                return
421            method = getattr(self, mname)
422            method()
423            self.wfile.flush() #actually send the response if not already done.
424        except socket.timeout as e:
425            #a read or a write timed out.  Discard this connection
426            self.log_error("Request timed out: %r", e)
427            self.close_connection = True
428            return
429
430    def handle(self):
431        """Handle multiple requests if necessary."""
432        self.close_connection = True
433
434        self.handle_one_request()
435        while not self.close_connection:
436            self.handle_one_request()
437
438    def send_error(self, code, message=None, explain=None):
439        """Send and log an error reply.
440
441        Arguments are
442        * code:    an HTTP error code
443                   3 digits
444        * message: a simple optional 1 line reason phrase.
445                   *( HTAB / SP / VCHAR / %x80-FF )
446                   defaults to short entry matching the response code
447        * explain: a detailed message defaults to the long entry
448                   matching the response code.
449
450        This sends an error response (so it must be called before any
451        output has been generated), logs the error, and finally sends
452        a piece of HTML explaining the error to the user.
453
454        """
455
456        try:
457            shortmsg, longmsg = self.responses[code]
458        except KeyError:
459            shortmsg, longmsg = '???', '???'
460        if message is None:
461            message = shortmsg
462        if explain is None:
463            explain = longmsg
464        self.log_error("code %d, message %s", code, message)
465        self.send_response(code, message)
466        self.send_header('Connection', 'close')
467
468        # Message body is omitted for cases described in:
469        #  - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
470        #  - RFC7231: 6.3.6. 205(Reset Content)
471        body = None
472        if (code >= 200 and
473            code not in (HTTPStatus.NO_CONTENT,
474                         HTTPStatus.RESET_CONTENT,
475                         HTTPStatus.NOT_MODIFIED)):
476            # HTML encode to prevent Cross Site Scripting attacks
477            # (see bug #1100201)
478            content = (self.error_message_format % {
479                'code': code,
480                'message': html.escape(message, quote=False),
481                'explain': html.escape(explain, quote=False)
482            })
483            body = content.encode('UTF-8', 'replace')
484            self.send_header("Content-Type", self.error_content_type)
485            self.send_header('Content-Length', str(len(body)))
486        self.end_headers()
487
488        if self.command != 'HEAD' and body:
489            self.wfile.write(body)
490
491    def send_response(self, code, message=None):
492        """Add the response header to the headers buffer and log the
493        response code.
494
495        Also send two standard headers with the server software
496        version and the current date.
497
498        """
499        self.log_request(code)
500        self.send_response_only(code, message)
501        self.send_header('Server', self.version_string())
502        self.send_header('Date', self.date_time_string())
503
504    def send_response_only(self, code, message=None):
505        """Send the response header only."""
506        if self.request_version != 'HTTP/0.9':
507            if message is None:
508                if code in self.responses:
509                    message = self.responses[code][0]
510                else:
511                    message = ''
512            if not hasattr(self, '_headers_buffer'):
513                self._headers_buffer = []
514            self._headers_buffer.append(("%s %d %s\r\n" %
515                    (self.protocol_version, code, message)).encode(
516                        'latin-1', 'strict'))
517
518    def send_header(self, keyword, value):
519        """Send a MIME header to the headers buffer."""
520        if self.request_version != 'HTTP/0.9':
521            if not hasattr(self, '_headers_buffer'):
522                self._headers_buffer = []
523            self._headers_buffer.append(
524                ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
525
526        if keyword.lower() == 'connection':
527            if value.lower() == 'close':
528                self.close_connection = True
529            elif value.lower() == 'keep-alive':
530                self.close_connection = False
531
532    def end_headers(self):
533        """Send the blank line ending the MIME headers."""
534        if self.request_version != 'HTTP/0.9':
535            self._headers_buffer.append(b"\r\n")
536            self.flush_headers()
537
538    def flush_headers(self):
539        if hasattr(self, '_headers_buffer'):
540            self.wfile.write(b"".join(self._headers_buffer))
541            self._headers_buffer = []
542
543    def log_request(self, code='-', size='-'):
544        """Log an accepted request.
545
546        This is called by send_response().
547
548        """
549        if isinstance(code, HTTPStatus):
550            code = code.value
551        self.log_message('"%s" %s %s',
552                         self.requestline, str(code), str(size))
553
554    def log_error(self, format, *args):
555        """Log an error.
556
557        This is called when a request cannot be fulfilled.  By
558        default it passes the message on to log_message().
559
560        Arguments are the same as for log_message().
561
562        XXX This should go to the separate error log.
563
564        """
565
566        self.log_message(format, *args)
567
568    def log_message(self, format, *args):
569        """Log an arbitrary message.
570
571        This is used by all other logging functions.  Override
572        it if you have specific logging wishes.
573
574        The first argument, FORMAT, is a format string for the
575        message to be logged.  If the format string contains
576        any % escapes requiring parameters, they should be
577        specified as subsequent arguments (it's just like
578        printf!).
579
580        The client ip and current date/time are prefixed to
581        every message.
582
583        """
584
585        sys.stderr.write("%s - - [%s] %s\n" %
586                         (self.address_string(),
587                          self.log_date_time_string(),
588                          format%args))
589
590    def version_string(self):
591        """Return the server software version string."""
592        return self.server_version + ' ' + self.sys_version
593
594    def date_time_string(self, timestamp=None):
595        """Return the current date and time formatted for a message header."""
596        if timestamp is None:
597            timestamp = time.time()
598        return email.utils.formatdate(timestamp, usegmt=True)
599
600    def log_date_time_string(self):
601        """Return the current time formatted for logging."""
602        now = time.time()
603        year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
604        s = "%02d/%3s/%04d %02d:%02d:%02d" % (
605                day, self.monthname[month], year, hh, mm, ss)
606        return s
607
608    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
609
610    monthname = [None,
611                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
612                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
613
614    def address_string(self):
615        """Return the client address."""
616
617        return self.client_address[0]
618
619    # Essentially static class variables
620
621    # The version of the HTTP protocol we support.
622    # Set this to HTTP/1.1 to enable automatic keepalive
623    protocol_version = "HTTP/1.0"
624
625    # MessageClass used to parse headers
626    MessageClass = http.client.HTTPMessage
627
628    # hack to maintain backwards compatibility
629    responses = {
630        v: (v.phrase, v.description)
631        for v in HTTPStatus.__members__.values()
632    }
633
634
635class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
636
637    """Simple HTTP request handler with GET and HEAD commands.
638
639    This serves files from the current directory and any of its
640    subdirectories.  The MIME type for files is determined by
641    calling the .guess_type() method.
642
643    The GET and HEAD requests are identical except that the HEAD
644    request omits the actual contents of the file.
645
646    """
647
648    server_version = "SimpleHTTP/" + __version__
649
650    def __init__(self, *args, directory=None, **kwargs):
651        if directory is None:
652            directory = os.getcwd()
653        self.directory = directory
654        super().__init__(*args, **kwargs)
655
656    def do_GET(self):
657        """Serve a GET request."""
658        f = self.send_head()
659        if f:
660            try:
661                self.copyfile(f, self.wfile)
662            finally:
663                f.close()
664
665    def do_HEAD(self):
666        """Serve a HEAD request."""
667        f = self.send_head()
668        if f:
669            f.close()
670
671    def send_head(self):
672        """Common code for GET and HEAD commands.
673
674        This sends the response code and MIME headers.
675
676        Return value is either a file object (which has to be copied
677        to the outputfile by the caller unless the command was HEAD,
678        and must be closed by the caller under all circumstances), or
679        None, in which case the caller has nothing further to do.
680
681        """
682        path = self.translate_path(self.path)
683        f = None
684        if os.path.isdir(path):
685            parts = urllib.parse.urlsplit(self.path)
686            if not parts.path.endswith('/'):
687                # redirect browser - doing basically what apache does
688                self.send_response(HTTPStatus.MOVED_PERMANENTLY)
689                new_parts = (parts[0], parts[1], parts[2] + '/',
690                             parts[3], parts[4])
691                new_url = urllib.parse.urlunsplit(new_parts)
692                self.send_header("Location", new_url)
693                self.end_headers()
694                return None
695            for index in "index.html", "index.htm":
696                index = os.path.join(path, index)
697                if os.path.exists(index):
698                    path = index
699                    break
700            else:
701                return self.list_directory(path)
702        ctype = self.guess_type(path)
703        # check for trailing "/" which should return 404. See Issue17324
704        # The test for this was added in test_httpserver.py
705        # However, some OS platforms accept a trailingSlash as a filename
706        # See discussion on python-dev and Issue34711 regarding
707        # parseing and rejection of filenames with a trailing slash
708        if path.endswith("/"):
709            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
710            return None
711        try:
712            f = open(path, 'rb')
713        except OSError:
714            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
715            return None
716
717        try:
718            fs = os.fstat(f.fileno())
719            # Use browser cache if possible
720            if ("If-Modified-Since" in self.headers
721                    and "If-None-Match" not in self.headers):
722                # compare If-Modified-Since and time of last file modification
723                try:
724                    ims = email.utils.parsedate_to_datetime(
725                        self.headers["If-Modified-Since"])
726                except (TypeError, IndexError, OverflowError, ValueError):
727                    # ignore ill-formed values
728                    pass
729                else:
730                    if ims.tzinfo is None:
731                        # obsolete format with no timezone, cf.
732                        # https://tools.ietf.org/html/rfc7231#section-7.1.1.1
733                        ims = ims.replace(tzinfo=datetime.timezone.utc)
734                    if ims.tzinfo is datetime.timezone.utc:
735                        # compare to UTC datetime of last modification
736                        last_modif = datetime.datetime.fromtimestamp(
737                            fs.st_mtime, datetime.timezone.utc)
738                        # remove microseconds, like in If-Modified-Since
739                        last_modif = last_modif.replace(microsecond=0)
740
741                        if last_modif <= ims:
742                            self.send_response(HTTPStatus.NOT_MODIFIED)
743                            self.end_headers()
744                            f.close()
745                            return None
746
747            self.send_response(HTTPStatus.OK)
748            self.send_header("Content-type", ctype)
749            self.send_header("Content-Length", str(fs[6]))
750            self.send_header("Last-Modified",
751                self.date_time_string(fs.st_mtime))
752            self.end_headers()
753            return f
754        except:
755            f.close()
756            raise
757
758    def list_directory(self, path):
759        """Helper to produce a directory listing (absent index.html).
760
761        Return value is either a file object, or None (indicating an
762        error).  In either case, the headers are sent, making the
763        interface the same as for send_head().
764
765        """
766        try:
767            list = os.listdir(path)
768        except OSError:
769            self.send_error(
770                HTTPStatus.NOT_FOUND,
771                "No permission to list directory")
772            return None
773        list.sort(key=lambda a: a.lower())
774        r = []
775        try:
776            displaypath = urllib.parse.unquote(self.path,
777                                               errors='surrogatepass')
778        except UnicodeDecodeError:
779            displaypath = urllib.parse.unquote(path)
780        displaypath = html.escape(displaypath, quote=False)
781        enc = sys.getfilesystemencoding()
782        title = 'Directory listing for %s' % displaypath
783        r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
784                 '"http://www.w3.org/TR/html4/strict.dtd">')
785        r.append('<html>\n<head>')
786        r.append('<meta http-equiv="Content-Type" '
787                 'content="text/html; charset=%s">' % enc)
788        r.append('<title>%s</title>\n</head>' % title)
789        r.append('<body>\n<h1>%s</h1>' % title)
790        r.append('<hr>\n<ul>')
791        for name in list:
792            fullname = os.path.join(path, name)
793            displayname = linkname = name
794            # Append / for directories or @ for symbolic links
795            if os.path.isdir(fullname):
796                displayname = name + "/"
797                linkname = name + "/"
798            if os.path.islink(fullname):
799                displayname = name + "@"
800                # Note: a link to a directory displays with @ and links with /
801            r.append('<li><a href="%s">%s</a></li>'
802                    % (urllib.parse.quote(linkname,
803                                          errors='surrogatepass'),
804                       html.escape(displayname, quote=False)))
805        r.append('</ul>\n<hr>\n</body>\n</html>\n')
806        encoded = '\n'.join(r).encode(enc, 'surrogateescape')
807        f = io.BytesIO()
808        f.write(encoded)
809        f.seek(0)
810        self.send_response(HTTPStatus.OK)
811        self.send_header("Content-type", "text/html; charset=%s" % enc)
812        self.send_header("Content-Length", str(len(encoded)))
813        self.end_headers()
814        return f
815
816    def translate_path(self, path):
817        """Translate a /-separated PATH to the local filename syntax.
818
819        Components that mean special things to the local file system
820        (e.g. drive or directory names) are ignored.  (XXX They should
821        probably be diagnosed.)
822
823        """
824        # abandon query parameters
825        path = path.split('?',1)[0]
826        path = path.split('#',1)[0]
827        # Don't forget explicit trailing slash when normalizing. Issue17324
828        trailing_slash = path.rstrip().endswith('/')
829        try:
830            path = urllib.parse.unquote(path, errors='surrogatepass')
831        except UnicodeDecodeError:
832            path = urllib.parse.unquote(path)
833        path = posixpath.normpath(path)
834        words = path.split('/')
835        words = filter(None, words)
836        path = self.directory
837        for word in words:
838            if os.path.dirname(word) or word in (os.curdir, os.pardir):
839                # Ignore components that are not a simple file/directory name
840                continue
841            path = os.path.join(path, word)
842        if trailing_slash:
843            path += '/'
844        return path
845
846    def copyfile(self, source, outputfile):
847        """Copy all data between two file objects.
848
849        The SOURCE argument is a file object open for reading
850        (or anything with a read() method) and the DESTINATION
851        argument is a file object open for writing (or
852        anything with a write() method).
853
854        The only reason for overriding this would be to change
855        the block size or perhaps to replace newlines by CRLF
856        -- note however that this the default server uses this
857        to copy binary data as well.
858
859        """
860        shutil.copyfileobj(source, outputfile)
861
862    def guess_type(self, path):
863        """Guess the type of a file.
864
865        Argument is a PATH (a filename).
866
867        Return value is a string of the form type/subtype,
868        usable for a MIME Content-type header.
869
870        The default implementation looks the file's extension
871        up in the table self.extensions_map, using application/octet-stream
872        as a default; however it would be permissible (if
873        slow) to look inside the data to make a better guess.
874
875        """
876
877        base, ext = posixpath.splitext(path)
878        if ext in self.extensions_map:
879            return self.extensions_map[ext]
880        ext = ext.lower()
881        if ext in self.extensions_map:
882            return self.extensions_map[ext]
883        else:
884            return self.extensions_map['']
885
886    if not mimetypes.inited:
887        mimetypes.init() # try to read system mime.types
888    extensions_map = mimetypes.types_map.copy()
889    extensions_map.update({
890        '': 'application/octet-stream', # Default
891        '.py': 'text/plain',
892        '.c': 'text/plain',
893        '.h': 'text/plain',
894        })
895
896
897# Utilities for CGIHTTPRequestHandler
898
899def _url_collapse_path(path):
900    """
901    Given a URL path, remove extra '/'s and '.' path elements and collapse
902    any '..' references and returns a collapsed path.
903
904    Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
905    The utility of this function is limited to is_cgi method and helps
906    preventing some security attacks.
907
908    Returns: The reconstituted URL, which will always start with a '/'.
909
910    Raises: IndexError if too many '..' occur within the path.
911
912    """
913    # Query component should not be involved.
914    path, _, query = path.partition('?')
915    path = urllib.parse.unquote(path)
916
917    # Similar to os.path.split(os.path.normpath(path)) but specific to URL
918    # path semantics rather than local operating system semantics.
919    path_parts = path.split('/')
920    head_parts = []
921    for part in path_parts[:-1]:
922        if part == '..':
923            head_parts.pop() # IndexError if more '..' than prior parts
924        elif part and part != '.':
925            head_parts.append( part )
926    if path_parts:
927        tail_part = path_parts.pop()
928        if tail_part:
929            if tail_part == '..':
930                head_parts.pop()
931                tail_part = ''
932            elif tail_part == '.':
933                tail_part = ''
934    else:
935        tail_part = ''
936
937    if query:
938        tail_part = '?'.join((tail_part, query))
939
940    splitpath = ('/' + '/'.join(head_parts), tail_part)
941    collapsed_path = "/".join(splitpath)
942
943    return collapsed_path
944
945
946
947nobody = None
948
949def nobody_uid():
950    """Internal routine to get nobody's uid"""
951    global nobody
952    if nobody:
953        return nobody
954    try:
955        import pwd
956    except ImportError:
957        return -1
958    try:
959        nobody = pwd.getpwnam('nobody')[2]
960    except KeyError:
961        nobody = 1 + max(x[2] for x in pwd.getpwall())
962    return nobody
963
964
965def executable(path):
966    """Test for executable file."""
967    return os.access(path, os.X_OK)
968
969
970class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
971
972    """Complete HTTP server with GET, HEAD and POST commands.
973
974    GET and HEAD also support running CGI scripts.
975
976    The POST command is *only* implemented for CGI scripts.
977
978    """
979
980    # Determine platform specifics
981    have_fork = hasattr(os, 'fork')
982
983    # Make rfile unbuffered -- we need to read one line and then pass
984    # the rest to a subprocess, so we can't use buffered input.
985    rbufsize = 0
986
987    def do_POST(self):
988        """Serve a POST request.
989
990        This is only implemented for CGI scripts.
991
992        """
993
994        if self.is_cgi():
995            self.run_cgi()
996        else:
997            self.send_error(
998                HTTPStatus.NOT_IMPLEMENTED,
999                "Can only POST to CGI scripts")
1000
1001    def send_head(self):
1002        """Version of send_head that support CGI scripts"""
1003        if self.is_cgi():
1004            return self.run_cgi()
1005        else:
1006            return SimpleHTTPRequestHandler.send_head(self)
1007
1008    def is_cgi(self):
1009        """Test whether self.path corresponds to a CGI script.
1010
1011        Returns True and updates the cgi_info attribute to the tuple
1012        (dir, rest) if self.path requires running a CGI script.
1013        Returns False otherwise.
1014
1015        If any exception is raised, the caller should assume that
1016        self.path was rejected as invalid and act accordingly.
1017
1018        The default implementation tests whether the normalized url
1019        path begins with one of the strings in self.cgi_directories
1020        (and the next character is a '/' or the end of the string).
1021
1022        """
1023        collapsed_path = _url_collapse_path(self.path)
1024        dir_sep = collapsed_path.find('/', 1)
1025        head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
1026        if head in self.cgi_directories:
1027            self.cgi_info = head, tail
1028            return True
1029        return False
1030
1031
1032    cgi_directories = ['/cgi-bin', '/htbin']
1033
1034    def is_executable(self, path):
1035        """Test whether argument path is an executable file."""
1036        return executable(path)
1037
1038    def is_python(self, path):
1039        """Test whether argument path is a Python script."""
1040        head, tail = os.path.splitext(path)
1041        return tail.lower() in (".py", ".pyw")
1042
1043    def run_cgi(self):
1044        """Execute a CGI script."""
1045        dir, rest = self.cgi_info
1046        path = dir + '/' + rest
1047        i = path.find('/', len(dir)+1)
1048        while i >= 0:
1049            nextdir = path[:i]
1050            nextrest = path[i+1:]
1051
1052            scriptdir = self.translate_path(nextdir)
1053            if os.path.isdir(scriptdir):
1054                dir, rest = nextdir, nextrest
1055                i = path.find('/', len(dir)+1)
1056            else:
1057                break
1058
1059        # find an explicit query string, if present.
1060        rest, _, query = rest.partition('?')
1061
1062        # dissect the part after the directory name into a script name &
1063        # a possible additional path, to be stored in PATH_INFO.
1064        i = rest.find('/')
1065        if i >= 0:
1066            script, rest = rest[:i], rest[i:]
1067        else:
1068            script, rest = rest, ''
1069
1070        scriptname = dir + '/' + script
1071        scriptfile = self.translate_path(scriptname)
1072        if not os.path.exists(scriptfile):
1073            self.send_error(
1074                HTTPStatus.NOT_FOUND,
1075                "No such CGI script (%r)" % scriptname)
1076            return
1077        if not os.path.isfile(scriptfile):
1078            self.send_error(
1079                HTTPStatus.FORBIDDEN,
1080                "CGI script is not a plain file (%r)" % scriptname)
1081            return
1082        ispy = self.is_python(scriptname)
1083        if self.have_fork or not ispy:
1084            if not self.is_executable(scriptfile):
1085                self.send_error(
1086                    HTTPStatus.FORBIDDEN,
1087                    "CGI script is not executable (%r)" % scriptname)
1088                return
1089
1090        # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1091        # XXX Much of the following could be prepared ahead of time!
1092        env = copy.deepcopy(os.environ)
1093        env['SERVER_SOFTWARE'] = self.version_string()
1094        env['SERVER_NAME'] = self.server.server_name
1095        env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1096        env['SERVER_PROTOCOL'] = self.protocol_version
1097        env['SERVER_PORT'] = str(self.server.server_port)
1098        env['REQUEST_METHOD'] = self.command
1099        uqrest = urllib.parse.unquote(rest)
1100        env['PATH_INFO'] = uqrest
1101        env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1102        env['SCRIPT_NAME'] = scriptname
1103        if query:
1104            env['QUERY_STRING'] = query
1105        env['REMOTE_ADDR'] = self.client_address[0]
1106        authorization = self.headers.get("authorization")
1107        if authorization:
1108            authorization = authorization.split()
1109            if len(authorization) == 2:
1110                import base64, binascii
1111                env['AUTH_TYPE'] = authorization[0]
1112                if authorization[0].lower() == "basic":
1113                    try:
1114                        authorization = authorization[1].encode('ascii')
1115                        authorization = base64.decodebytes(authorization).\
1116                                        decode('ascii')
1117                    except (binascii.Error, UnicodeError):
1118                        pass
1119                    else:
1120                        authorization = authorization.split(':')
1121                        if len(authorization) == 2:
1122                            env['REMOTE_USER'] = authorization[0]
1123        # XXX REMOTE_IDENT
1124        if self.headers.get('content-type') is None:
1125            env['CONTENT_TYPE'] = self.headers.get_content_type()
1126        else:
1127            env['CONTENT_TYPE'] = self.headers['content-type']
1128        length = self.headers.get('content-length')
1129        if length:
1130            env['CONTENT_LENGTH'] = length
1131        referer = self.headers.get('referer')
1132        if referer:
1133            env['HTTP_REFERER'] = referer
1134        accept = []
1135        for line in self.headers.getallmatchingheaders('accept'):
1136            if line[:1] in "\t\n\r ":
1137                accept.append(line.strip())
1138            else:
1139                accept = accept + line[7:].split(',')
1140        env['HTTP_ACCEPT'] = ','.join(accept)
1141        ua = self.headers.get('user-agent')
1142        if ua:
1143            env['HTTP_USER_AGENT'] = ua
1144        co = filter(None, self.headers.get_all('cookie', []))
1145        cookie_str = ', '.join(co)
1146        if cookie_str:
1147            env['HTTP_COOKIE'] = cookie_str
1148        # XXX Other HTTP_* headers
1149        # Since we're setting the env in the parent, provide empty
1150        # values to override previously set values
1151        for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1152                  'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1153            env.setdefault(k, "")
1154
1155        self.send_response(HTTPStatus.OK, "Script output follows")
1156        self.flush_headers()
1157
1158        decoded_query = query.replace('+', ' ')
1159
1160        if self.have_fork:
1161            # Unix -- fork as we should
1162            args = [script]
1163            if '=' not in decoded_query:
1164                args.append(decoded_query)
1165            nobody = nobody_uid()
1166            self.wfile.flush() # Always flush before forking
1167            pid = os.fork()
1168            if pid != 0:
1169                # Parent
1170                pid, sts = os.waitpid(pid, 0)
1171                # throw away additional data [see bug #427345]
1172                while select.select([self.rfile], [], [], 0)[0]:
1173                    if not self.rfile.read(1):
1174                        break
1175                if sts:
1176                    self.log_error("CGI script exit status %#x", sts)
1177                return
1178            # Child
1179            try:
1180                try:
1181                    os.setuid(nobody)
1182                except OSError:
1183                    pass
1184                os.dup2(self.rfile.fileno(), 0)
1185                os.dup2(self.wfile.fileno(), 1)
1186                os.execve(scriptfile, args, env)
1187            except:
1188                self.server.handle_error(self.request, self.client_address)
1189                os._exit(127)
1190
1191        else:
1192            # Non-Unix -- use subprocess
1193            import subprocess
1194            cmdline = [scriptfile]
1195            if self.is_python(scriptfile):
1196                interp = sys.executable
1197                if interp.lower().endswith("w.exe"):
1198                    # On Windows, use python.exe, not pythonw.exe
1199                    interp = interp[:-5] + interp[-4:]
1200                cmdline = [interp, '-u'] + cmdline
1201            if '=' not in query:
1202                cmdline.append(query)
1203            self.log_message("command: %s", subprocess.list2cmdline(cmdline))
1204            try:
1205                nbytes = int(length)
1206            except (TypeError, ValueError):
1207                nbytes = 0
1208            p = subprocess.Popen(cmdline,
1209                                 stdin=subprocess.PIPE,
1210                                 stdout=subprocess.PIPE,
1211                                 stderr=subprocess.PIPE,
1212                                 env = env
1213                                 )
1214            if self.command.lower() == "post" and nbytes > 0:
1215                data = self.rfile.read(nbytes)
1216            else:
1217                data = None
1218            # throw away additional data [see bug #427345]
1219            while select.select([self.rfile._sock], [], [], 0)[0]:
1220                if not self.rfile._sock.recv(1):
1221                    break
1222            stdout, stderr = p.communicate(data)
1223            self.wfile.write(stdout)
1224            if stderr:
1225                self.log_error('%s', stderr)
1226            p.stderr.close()
1227            p.stdout.close()
1228            status = p.returncode
1229            if status:
1230                self.log_error("CGI script exit status %#x", status)
1231            else:
1232                self.log_message("CGI script exited OK")
1233
1234
1235def _get_best_family(*address):
1236    infos = socket.getaddrinfo(
1237        *address,
1238        type=socket.SOCK_STREAM,
1239        flags=socket.AI_PASSIVE,
1240    )
1241    family, type, proto, canonname, sockaddr = next(iter(infos))
1242    return family, sockaddr
1243
1244
1245def test(HandlerClass=BaseHTTPRequestHandler,
1246         ServerClass=ThreadingHTTPServer,
1247         protocol="HTTP/1.0", port=8000, bind=None):
1248    """Test the HTTP request handler class.
1249
1250    This runs an HTTP server on port 8000 (or the port argument).
1251
1252    """
1253    ServerClass.address_family, addr = _get_best_family(bind, port)
1254
1255    HandlerClass.protocol_version = protocol
1256    with ServerClass(addr, HandlerClass) as httpd:
1257        host, port = httpd.socket.getsockname()[:2]
1258        url_host = f'[{host}]' if ':' in host else host
1259        print(
1260            f"Serving HTTP on {host} port {port} "
1261            f"(http://{url_host}:{port}/) ..."
1262        )
1263        try:
1264            httpd.serve_forever()
1265        except KeyboardInterrupt:
1266            print("\nKeyboard interrupt received, exiting.")
1267            sys.exit(0)
1268
1269if __name__ == '__main__':
1270    import argparse
1271
1272    parser = argparse.ArgumentParser()
1273    parser.add_argument('--cgi', action='store_true',
1274                       help='Run as CGI Server')
1275    parser.add_argument('--bind', '-b', metavar='ADDRESS',
1276                        help='Specify alternate bind address '
1277                             '[default: all interfaces]')
1278    parser.add_argument('--directory', '-d', default=os.getcwd(),
1279                        help='Specify alternative directory '
1280                        '[default:current directory]')
1281    parser.add_argument('port', action='store',
1282                        default=8000, type=int,
1283                        nargs='?',
1284                        help='Specify alternate port [default: 8000]')
1285    args = parser.parse_args()
1286    if args.cgi:
1287        handler_class = CGIHTTPRequestHandler
1288    else:
1289        handler_class = partial(SimpleHTTPRequestHandler,
1290                                directory=args.directory)
1291
1292    # ensure dual-stack is not disabled; ref #38907
1293    class DualStackServer(ThreadingHTTPServer):
1294        def server_bind(self):
1295            # suppress exception when protocol is IPv4
1296            with contextlib.suppress(Exception):
1297                self.socket.setsockopt(
1298                    socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
1299            return super().server_bind()
1300
1301    test(
1302        HandlerClass=handler_class,
1303        ServerClass=DualStackServer,
1304        port=args.port,
1305        bind=args.bind,
1306    )
1307