• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""HTTP server classes.
2
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and CGIHTTPRequestHandler for CGI scripts.
6
7It does, however, optionally implement HTTP/1.1 persistent connections,
8as of version 0.3.
9
10Notes on CGIHTTPRequestHandler
11------------------------------
12
13This class implements GET and POST requests to cgi-bin scripts.
14
15If the os.fork() function is not present (e.g. on Windows),
16subprocess.Popen() is used as a fallback, with slightly altered semantics.
17
18In all cases, the implementation is intentionally naive -- all
19requests are executed synchronously.
20
21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22-- it may execute arbitrary Python code or external programs.
23
24Note that status code 200 is sent prior to execution of a CGI script, so
25scripts cannot send other status codes such as 302 (redirect).
26
27XXX To do:
28
29- log requests even later (to capture byte count)
30- log user-agent header and other interesting goodies
31- send error log to separate file
32"""
33
34
35# See also:
36#
37# HTTP Working Group                                        T. Berners-Lee
38# INTERNET-DRAFT                                            R. T. Fielding
39# <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
40# Expires September 8, 1995                                  March 8, 1995
41#
42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43#
44# and
45#
46# Network Working Group                                      R. Fielding
47# Request for Comments: 2616                                       et al
48# Obsoletes: 2068                                              June 1999
49# Category: Standards Track
50#
51# URL: http://www.faqs.org/rfcs/rfc2616.html
52
53# Log files
54# ---------
55#
56# Here's a quote from the NCSA httpd docs about log file format.
57#
58# | The logfile format is as follows. Each line consists of:
59# |
60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61# |
62# |        host: Either the DNS name or the IP number of the remote client
63# |        rfc931: Any information returned by identd for this person,
64# |                - otherwise.
65# |        authuser: If user sent a userid for authentication, the user name,
66# |                  - otherwise.
67# |        DD: Day
68# |        Mon: Month (calendar name)
69# |        YYYY: Year
70# |        hh: hour (24-hour format, the machine's timezone)
71# |        mm: minutes
72# |        ss: seconds
73# |        request: The first line of the HTTP request as sent by the client.
74# |        ddd: the status code returned by the server, - if not available.
75# |        bbbb: the total number of bytes sent,
76# |              *not including the HTTP/1.0 header*, - if not available
77# |
78# | You can determine the name of the file accessed through request.
79#
80# (Actually, the latter is only true if you know the server configuration
81# at the time the request was made!)
82
83__version__ = "0.6"
84
85__all__ = [
86    "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",
87    "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
88]
89
90import copy
91import datetime
92import email.utils
93import html
94import http.client
95import io
96import mimetypes
97import os
98import posixpath
99import select
100import shutil
101import socket # For gethostbyaddr()
102import socketserver
103import sys
104import time
105import urllib.parse
106import contextlib
107from functools import partial
108
109from http import HTTPStatus
110
111
112# Default error message template
113DEFAULT_ERROR_MESSAGE = """\
114<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
115        "http://www.w3.org/TR/html4/strict.dtd">
116<html>
117    <head>
118        <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
119        <title>Error response</title>
120    </head>
121    <body>
122        <h1>Error response</h1>
123        <p>Error code: %(code)d</p>
124        <p>Message: %(message)s.</p>
125        <p>Error code explanation: %(code)s - %(explain)s.</p>
126    </body>
127</html>
128"""
129
130DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
131
132class HTTPServer(socketserver.TCPServer):
133
134    allow_reuse_address = 1    # Seems to make sense in testing environment
135
136    def server_bind(self):
137        """Override server_bind to store the server name."""
138        socketserver.TCPServer.server_bind(self)
139        host, port = self.server_address[:2]
140        self.server_name = socket.getfqdn(host)
141        self.server_port = port
142
143
144class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer):
145    daemon_threads = True
146
147
148class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
149
150    """HTTP request handler base class.
151
152    The following explanation of HTTP serves to guide you through the
153    code as well as to expose any misunderstandings I may have about
154    HTTP (so you don't need to read the code to figure out I'm wrong
155    :-).
156
157    HTTP (HyperText Transfer Protocol) is an extensible protocol on
158    top of a reliable stream transport (e.g. TCP/IP).  The protocol
159    recognizes three parts to a request:
160
161    1. One line identifying the request type and path
162    2. An optional set of RFC-822-style headers
163    3. An optional data part
164
165    The headers and data are separated by a blank line.
166
167    The first line of the request has the form
168
169    <command> <path> <version>
170
171    where <command> is a (case-sensitive) keyword such as GET or POST,
172    <path> is a string containing path information for the request,
173    and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
174    <path> is encoded using the URL encoding scheme (using %xx to signify
175    the ASCII character with hex code xx).
176
177    The specification specifies that lines are separated by CRLF but
178    for compatibility with the widest range of clients recommends
179    servers also handle LF.  Similarly, whitespace in the request line
180    is treated sensibly (allowing multiple spaces between components
181    and allowing trailing whitespace).
182
183    Similarly, for output, lines ought to be separated by CRLF pairs
184    but most clients grok LF characters just fine.
185
186    If the first line of the request has the form
187
188    <command> <path>
189
190    (i.e. <version> is left out) then this is assumed to be an HTTP
191    0.9 request; this form has no optional headers and data part and
192    the reply consists of just the data.
193
194    The reply form of the HTTP 1.x protocol again has three parts:
195
196    1. One line giving the response code
197    2. An optional set of RFC-822-style headers
198    3. The data
199
200    Again, the headers and data are separated by a blank line.
201
202    The response code line has the form
203
204    <version> <responsecode> <responsestring>
205
206    where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
207    <responsecode> is a 3-digit response code indicating success or
208    failure of the request, and <responsestring> is an optional
209    human-readable string explaining what the response code means.
210
211    This server parses the request and the headers, and then calls a
212    function specific to the request type (<command>).  Specifically,
213    a request SPAM will be handled by a method do_SPAM().  If no
214    such method exists the server sends an error response to the
215    client.  If it exists, it is called with no arguments:
216
217    do_SPAM()
218
219    Note that the request name is case sensitive (i.e. SPAM and spam
220    are different requests).
221
222    The various request details are stored in instance variables:
223
224    - client_address is the client IP address in the form (host,
225    port);
226
227    - command, path and version are the broken-down request line;
228
229    - headers is an instance of email.message.Message (or a derived
230    class) containing the header information;
231
232    - rfile is a file object open for reading positioned at the
233    start of the optional input data part;
234
235    - wfile is a file object open for writing.
236
237    IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
238
239    The first thing to be written must be the response line.  Then
240    follow 0 or more header lines, then a blank line, and then the
241    actual data (if any).  The meaning of the header lines depends on
242    the command executed by the server; in most cases, when data is
243    returned, there should be at least one header line of the form
244
245    Content-type: <type>/<subtype>
246
247    where <type> and <subtype> should be registered MIME types,
248    e.g. "text/html" or "text/plain".
249
250    """
251
252    # The Python system version, truncated to its first component.
253    sys_version = "Python/" + sys.version.split()[0]
254
255    # The server software version.  You may want to override this.
256    # The format is multiple whitespace-separated strings,
257    # where each string is of the form name[/version].
258    server_version = "BaseHTTP/" + __version__
259
260    error_message_format = DEFAULT_ERROR_MESSAGE
261    error_content_type = DEFAULT_ERROR_CONTENT_TYPE
262
263    # The default request version.  This only affects responses up until
264    # the point where the request line is parsed, so it mainly decides what
265    # the client gets back when sending a malformed request line.
266    # Most web servers default to HTTP 0.9, i.e. don't send a status line.
267    default_request_version = "HTTP/0.9"
268
269    def parse_request(self):
270        """Parse a request (internal).
271
272        The request should be stored in self.raw_requestline; the results
273        are in self.command, self.path, self.request_version and
274        self.headers.
275
276        Return True for success, False for failure; on failure, any relevant
277        error response has already been sent back.
278
279        """
280        self.command = None  # set in case of error on the first line
281        self.request_version = version = self.default_request_version
282        self.close_connection = True
283        requestline = str(self.raw_requestline, 'iso-8859-1')
284        requestline = requestline.rstrip('\r\n')
285        self.requestline = requestline
286        words = requestline.split()
287        if len(words) == 0:
288            return False
289
290        if len(words) >= 3:  # Enough to determine protocol version
291            version = words[-1]
292            try:
293                if not version.startswith('HTTP/'):
294                    raise ValueError
295                base_version_number = version.split('/', 1)[1]
296                version_number = base_version_number.split(".")
297                # RFC 2145 section 3.1 says there can be only one "." and
298                #   - major and minor numbers MUST be treated as
299                #      separate integers;
300                #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
301                #      turn is lower than HTTP/12.3;
302                #   - Leading zeros MUST be ignored by recipients.
303                if len(version_number) != 2:
304                    raise ValueError
305                version_number = int(version_number[0]), int(version_number[1])
306            except (ValueError, IndexError):
307                self.send_error(
308                    HTTPStatus.BAD_REQUEST,
309                    "Bad request version (%r)" % version)
310                return False
311            if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
312                self.close_connection = False
313            if version_number >= (2, 0):
314                self.send_error(
315                    HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
316                    "Invalid HTTP version (%s)" % base_version_number)
317                return False
318            self.request_version = version
319
320        if not 2 <= len(words) <= 3:
321            self.send_error(
322                HTTPStatus.BAD_REQUEST,
323                "Bad request syntax (%r)" % requestline)
324            return False
325        command, path = words[:2]
326        if len(words) == 2:
327            self.close_connection = True
328            if command != 'GET':
329                self.send_error(
330                    HTTPStatus.BAD_REQUEST,
331                    "Bad HTTP/0.9 request type (%r)" % command)
332                return False
333        self.command, self.path = command, path
334
335        # gh-87389: The purpose of replacing '//' with '/' is to protect
336        # against open redirect attacks possibly triggered if the path starts
337        # with '//' because http clients treat //path as an absolute URI
338        # without scheme (similar to http://path) rather than a path.
339        if self.path.startswith('//'):
340            self.path = '/' + self.path.lstrip('/')  # Reduce to a single /
341
342        # Examine the headers and look for a Connection directive.
343        try:
344            self.headers = http.client.parse_headers(self.rfile,
345                                                     _class=self.MessageClass)
346        except http.client.LineTooLong as err:
347            self.send_error(
348                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
349                "Line too long",
350                str(err))
351            return False
352        except http.client.HTTPException as err:
353            self.send_error(
354                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
355                "Too many headers",
356                str(err)
357            )
358            return False
359
360        conntype = self.headers.get('Connection', "")
361        if conntype.lower() == 'close':
362            self.close_connection = True
363        elif (conntype.lower() == 'keep-alive' and
364              self.protocol_version >= "HTTP/1.1"):
365            self.close_connection = False
366        # Examine the headers and look for an Expect directive
367        expect = self.headers.get('Expect', "")
368        if (expect.lower() == "100-continue" and
369                self.protocol_version >= "HTTP/1.1" and
370                self.request_version >= "HTTP/1.1"):
371            if not self.handle_expect_100():
372                return False
373        return True
374
375    def handle_expect_100(self):
376        """Decide what to do with an "Expect: 100-continue" header.
377
378        If the client is expecting a 100 Continue response, we must
379        respond with either a 100 Continue or a final response before
380        waiting for the request body. The default is to always respond
381        with a 100 Continue. You can behave differently (for example,
382        reject unauthorized requests) by overriding this method.
383
384        This method should either return True (possibly after sending
385        a 100 Continue response) or send an error response and return
386        False.
387
388        """
389        self.send_response_only(HTTPStatus.CONTINUE)
390        self.end_headers()
391        return True
392
393    def handle_one_request(self):
394        """Handle a single HTTP request.
395
396        You normally don't need to override this method; see the class
397        __doc__ string for information on how to handle specific HTTP
398        commands such as GET and POST.
399
400        """
401        try:
402            self.raw_requestline = self.rfile.readline(65537)
403            if len(self.raw_requestline) > 65536:
404                self.requestline = ''
405                self.request_version = ''
406                self.command = ''
407                self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
408                return
409            if not self.raw_requestline:
410                self.close_connection = True
411                return
412            if not self.parse_request():
413                # An error code has been sent, just exit
414                return
415            mname = 'do_' + self.command
416            if not hasattr(self, mname):
417                self.send_error(
418                    HTTPStatus.NOT_IMPLEMENTED,
419                    "Unsupported method (%r)" % self.command)
420                return
421            method = getattr(self, mname)
422            method()
423            self.wfile.flush() #actually send the response if not already done.
424        except TimeoutError as e:
425            #a read or a write timed out.  Discard this connection
426            self.log_error("Request timed out: %r", e)
427            self.close_connection = True
428            return
429
430    def handle(self):
431        """Handle multiple requests if necessary."""
432        self.close_connection = True
433
434        self.handle_one_request()
435        while not self.close_connection:
436            self.handle_one_request()
437
438    def send_error(self, code, message=None, explain=None):
439        """Send and log an error reply.
440
441        Arguments are
442        * code:    an HTTP error code
443                   3 digits
444        * message: a simple optional 1 line reason phrase.
445                   *( HTAB / SP / VCHAR / %x80-FF )
446                   defaults to short entry matching the response code
447        * explain: a detailed message defaults to the long entry
448                   matching the response code.
449
450        This sends an error response (so it must be called before any
451        output has been generated), logs the error, and finally sends
452        a piece of HTML explaining the error to the user.
453
454        """
455
456        try:
457            shortmsg, longmsg = self.responses[code]
458        except KeyError:
459            shortmsg, longmsg = '???', '???'
460        if message is None:
461            message = shortmsg
462        if explain is None:
463            explain = longmsg
464        self.log_error("code %d, message %s", code, message)
465        self.send_response(code, message)
466        self.send_header('Connection', 'close')
467
468        # Message body is omitted for cases described in:
469        #  - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
470        #  - RFC7231: 6.3.6. 205(Reset Content)
471        body = None
472        if (code >= 200 and
473            code not in (HTTPStatus.NO_CONTENT,
474                         HTTPStatus.RESET_CONTENT,
475                         HTTPStatus.NOT_MODIFIED)):
476            # HTML encode to prevent Cross Site Scripting attacks
477            # (see bug #1100201)
478            content = (self.error_message_format % {
479                'code': code,
480                'message': html.escape(message, quote=False),
481                'explain': html.escape(explain, quote=False)
482            })
483            body = content.encode('UTF-8', 'replace')
484            self.send_header("Content-Type", self.error_content_type)
485            self.send_header('Content-Length', str(len(body)))
486        self.end_headers()
487
488        if self.command != 'HEAD' and body:
489            self.wfile.write(body)
490
491    def send_response(self, code, message=None):
492        """Add the response header to the headers buffer and log the
493        response code.
494
495        Also send two standard headers with the server software
496        version and the current date.
497
498        """
499        self.log_request(code)
500        self.send_response_only(code, message)
501        self.send_header('Server', self.version_string())
502        self.send_header('Date', self.date_time_string())
503
504    def send_response_only(self, code, message=None):
505        """Send the response header only."""
506        if self.request_version != 'HTTP/0.9':
507            if message is None:
508                if code in self.responses:
509                    message = self.responses[code][0]
510                else:
511                    message = ''
512            if not hasattr(self, '_headers_buffer'):
513                self._headers_buffer = []
514            self._headers_buffer.append(("%s %d %s\r\n" %
515                    (self.protocol_version, code, message)).encode(
516                        'latin-1', 'strict'))
517
518    def send_header(self, keyword, value):
519        """Send a MIME header to the headers buffer."""
520        if self.request_version != 'HTTP/0.9':
521            if not hasattr(self, '_headers_buffer'):
522                self._headers_buffer = []
523            self._headers_buffer.append(
524                ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
525
526        if keyword.lower() == 'connection':
527            if value.lower() == 'close':
528                self.close_connection = True
529            elif value.lower() == 'keep-alive':
530                self.close_connection = False
531
532    def end_headers(self):
533        """Send the blank line ending the MIME headers."""
534        if self.request_version != 'HTTP/0.9':
535            self._headers_buffer.append(b"\r\n")
536            self.flush_headers()
537
538    def flush_headers(self):
539        if hasattr(self, '_headers_buffer'):
540            self.wfile.write(b"".join(self._headers_buffer))
541            self._headers_buffer = []
542
543    def log_request(self, code='-', size='-'):
544        """Log an accepted request.
545
546        This is called by send_response().
547
548        """
549        if isinstance(code, HTTPStatus):
550            code = code.value
551        self.log_message('"%s" %s %s',
552                         self.requestline, str(code), str(size))
553
554    def log_error(self, format, *args):
555        """Log an error.
556
557        This is called when a request cannot be fulfilled.  By
558        default it passes the message on to log_message().
559
560        Arguments are the same as for log_message().
561
562        XXX This should go to the separate error log.
563
564        """
565
566        self.log_message(format, *args)
567
568    def log_message(self, format, *args):
569        """Log an arbitrary message.
570
571        This is used by all other logging functions.  Override
572        it if you have specific logging wishes.
573
574        The first argument, FORMAT, is a format string for the
575        message to be logged.  If the format string contains
576        any % escapes requiring parameters, they should be
577        specified as subsequent arguments (it's just like
578        printf!).
579
580        The client ip and current date/time are prefixed to
581        every message.
582
583        """
584
585        sys.stderr.write("%s - - [%s] %s\n" %
586                         (self.address_string(),
587                          self.log_date_time_string(),
588                          format%args))
589
590    def version_string(self):
591        """Return the server software version string."""
592        return self.server_version + ' ' + self.sys_version
593
594    def date_time_string(self, timestamp=None):
595        """Return the current date and time formatted for a message header."""
596        if timestamp is None:
597            timestamp = time.time()
598        return email.utils.formatdate(timestamp, usegmt=True)
599
600    def log_date_time_string(self):
601        """Return the current time formatted for logging."""
602        now = time.time()
603        year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
604        s = "%02d/%3s/%04d %02d:%02d:%02d" % (
605                day, self.monthname[month], year, hh, mm, ss)
606        return s
607
608    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
609
610    monthname = [None,
611                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
612                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
613
614    def address_string(self):
615        """Return the client address."""
616
617        return self.client_address[0]
618
619    # Essentially static class variables
620
621    # The version of the HTTP protocol we support.
622    # Set this to HTTP/1.1 to enable automatic keepalive
623    protocol_version = "HTTP/1.0"
624
625    # MessageClass used to parse headers
626    MessageClass = http.client.HTTPMessage
627
628    # hack to maintain backwards compatibility
629    responses = {
630        v: (v.phrase, v.description)
631        for v in HTTPStatus.__members__.values()
632    }
633
634
635class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
636
637    """Simple HTTP request handler with GET and HEAD commands.
638
639    This serves files from the current directory and any of its
640    subdirectories.  The MIME type for files is determined by
641    calling the .guess_type() method.
642
643    The GET and HEAD requests are identical except that the HEAD
644    request omits the actual contents of the file.
645
646    """
647
648    server_version = "SimpleHTTP/" + __version__
649    extensions_map = _encodings_map_default = {
650        '.gz': 'application/gzip',
651        '.Z': 'application/octet-stream',
652        '.bz2': 'application/x-bzip2',
653        '.xz': 'application/x-xz',
654    }
655
656    def __init__(self, *args, directory=None, **kwargs):
657        if directory is None:
658            directory = os.getcwd()
659        self.directory = os.fspath(directory)
660        super().__init__(*args, **kwargs)
661
662    def do_GET(self):
663        """Serve a GET request."""
664        f = self.send_head()
665        if f:
666            try:
667                self.copyfile(f, self.wfile)
668            finally:
669                f.close()
670
671    def do_HEAD(self):
672        """Serve a HEAD request."""
673        f = self.send_head()
674        if f:
675            f.close()
676
677    def send_head(self):
678        """Common code for GET and HEAD commands.
679
680        This sends the response code and MIME headers.
681
682        Return value is either a file object (which has to be copied
683        to the outputfile by the caller unless the command was HEAD,
684        and must be closed by the caller under all circumstances), or
685        None, in which case the caller has nothing further to do.
686
687        """
688        path = self.translate_path(self.path)
689        f = None
690        if os.path.isdir(path):
691            parts = urllib.parse.urlsplit(self.path)
692            if not parts.path.endswith('/'):
693                # redirect browser - doing basically what apache does
694                self.send_response(HTTPStatus.MOVED_PERMANENTLY)
695                new_parts = (parts[0], parts[1], parts[2] + '/',
696                             parts[3], parts[4])
697                new_url = urllib.parse.urlunsplit(new_parts)
698                self.send_header("Location", new_url)
699                self.send_header("Content-Length", "0")
700                self.end_headers()
701                return None
702            for index in "index.html", "index.htm":
703                index = os.path.join(path, index)
704                if os.path.exists(index):
705                    path = index
706                    break
707            else:
708                return self.list_directory(path)
709        ctype = self.guess_type(path)
710        # check for trailing "/" which should return 404. See Issue17324
711        # The test for this was added in test_httpserver.py
712        # However, some OS platforms accept a trailingSlash as a filename
713        # See discussion on python-dev and Issue34711 regarding
714        # parseing and rejection of filenames with a trailing slash
715        if path.endswith("/"):
716            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
717            return None
718        try:
719            f = open(path, 'rb')
720        except OSError:
721            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
722            return None
723
724        try:
725            fs = os.fstat(f.fileno())
726            # Use browser cache if possible
727            if ("If-Modified-Since" in self.headers
728                    and "If-None-Match" not in self.headers):
729                # compare If-Modified-Since and time of last file modification
730                try:
731                    ims = email.utils.parsedate_to_datetime(
732                        self.headers["If-Modified-Since"])
733                except (TypeError, IndexError, OverflowError, ValueError):
734                    # ignore ill-formed values
735                    pass
736                else:
737                    if ims.tzinfo is None:
738                        # obsolete format with no timezone, cf.
739                        # https://tools.ietf.org/html/rfc7231#section-7.1.1.1
740                        ims = ims.replace(tzinfo=datetime.timezone.utc)
741                    if ims.tzinfo is datetime.timezone.utc:
742                        # compare to UTC datetime of last modification
743                        last_modif = datetime.datetime.fromtimestamp(
744                            fs.st_mtime, datetime.timezone.utc)
745                        # remove microseconds, like in If-Modified-Since
746                        last_modif = last_modif.replace(microsecond=0)
747
748                        if last_modif <= ims:
749                            self.send_response(HTTPStatus.NOT_MODIFIED)
750                            self.end_headers()
751                            f.close()
752                            return None
753
754            self.send_response(HTTPStatus.OK)
755            self.send_header("Content-type", ctype)
756            self.send_header("Content-Length", str(fs[6]))
757            self.send_header("Last-Modified",
758                self.date_time_string(fs.st_mtime))
759            self.end_headers()
760            return f
761        except:
762            f.close()
763            raise
764
765    def list_directory(self, path):
766        """Helper to produce a directory listing (absent index.html).
767
768        Return value is either a file object, or None (indicating an
769        error).  In either case, the headers are sent, making the
770        interface the same as for send_head().
771
772        """
773        try:
774            list = os.listdir(path)
775        except OSError:
776            self.send_error(
777                HTTPStatus.NOT_FOUND,
778                "No permission to list directory")
779            return None
780        list.sort(key=lambda a: a.lower())
781        r = []
782        try:
783            displaypath = urllib.parse.unquote(self.path,
784                                               errors='surrogatepass')
785        except UnicodeDecodeError:
786            displaypath = urllib.parse.unquote(path)
787        displaypath = html.escape(displaypath, quote=False)
788        enc = sys.getfilesystemencoding()
789        title = 'Directory listing for %s' % displaypath
790        r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
791                 '"http://www.w3.org/TR/html4/strict.dtd">')
792        r.append('<html>\n<head>')
793        r.append('<meta http-equiv="Content-Type" '
794                 'content="text/html; charset=%s">' % enc)
795        r.append('<title>%s</title>\n</head>' % title)
796        r.append('<body>\n<h1>%s</h1>' % title)
797        r.append('<hr>\n<ul>')
798        for name in list:
799            fullname = os.path.join(path, name)
800            displayname = linkname = name
801            # Append / for directories or @ for symbolic links
802            if os.path.isdir(fullname):
803                displayname = name + "/"
804                linkname = name + "/"
805            if os.path.islink(fullname):
806                displayname = name + "@"
807                # Note: a link to a directory displays with @ and links with /
808            r.append('<li><a href="%s">%s</a></li>'
809                    % (urllib.parse.quote(linkname,
810                                          errors='surrogatepass'),
811                       html.escape(displayname, quote=False)))
812        r.append('</ul>\n<hr>\n</body>\n</html>\n')
813        encoded = '\n'.join(r).encode(enc, 'surrogateescape')
814        f = io.BytesIO()
815        f.write(encoded)
816        f.seek(0)
817        self.send_response(HTTPStatus.OK)
818        self.send_header("Content-type", "text/html; charset=%s" % enc)
819        self.send_header("Content-Length", str(len(encoded)))
820        self.end_headers()
821        return f
822
823    def translate_path(self, path):
824        """Translate a /-separated PATH to the local filename syntax.
825
826        Components that mean special things to the local file system
827        (e.g. drive or directory names) are ignored.  (XXX They should
828        probably be diagnosed.)
829
830        """
831        # abandon query parameters
832        path = path.split('?',1)[0]
833        path = path.split('#',1)[0]
834        # Don't forget explicit trailing slash when normalizing. Issue17324
835        trailing_slash = path.rstrip().endswith('/')
836        try:
837            path = urllib.parse.unquote(path, errors='surrogatepass')
838        except UnicodeDecodeError:
839            path = urllib.parse.unquote(path)
840        path = posixpath.normpath(path)
841        words = path.split('/')
842        words = filter(None, words)
843        path = self.directory
844        for word in words:
845            if os.path.dirname(word) or word in (os.curdir, os.pardir):
846                # Ignore components that are not a simple file/directory name
847                continue
848            path = os.path.join(path, word)
849        if trailing_slash:
850            path += '/'
851        return path
852
853    def copyfile(self, source, outputfile):
854        """Copy all data between two file objects.
855
856        The SOURCE argument is a file object open for reading
857        (or anything with a read() method) and the DESTINATION
858        argument is a file object open for writing (or
859        anything with a write() method).
860
861        The only reason for overriding this would be to change
862        the block size or perhaps to replace newlines by CRLF
863        -- note however that this the default server uses this
864        to copy binary data as well.
865
866        """
867        shutil.copyfileobj(source, outputfile)
868
869    def guess_type(self, path):
870        """Guess the type of a file.
871
872        Argument is a PATH (a filename).
873
874        Return value is a string of the form type/subtype,
875        usable for a MIME Content-type header.
876
877        The default implementation looks the file's extension
878        up in the table self.extensions_map, using application/octet-stream
879        as a default; however it would be permissible (if
880        slow) to look inside the data to make a better guess.
881
882        """
883        base, ext = posixpath.splitext(path)
884        if ext in self.extensions_map:
885            return self.extensions_map[ext]
886        ext = ext.lower()
887        if ext in self.extensions_map:
888            return self.extensions_map[ext]
889        guess, _ = mimetypes.guess_type(path)
890        if guess:
891            return guess
892        return 'application/octet-stream'
893
894
895# Utilities for CGIHTTPRequestHandler
896
897def _url_collapse_path(path):
898    """
899    Given a URL path, remove extra '/'s and '.' path elements and collapse
900    any '..' references and returns a collapsed path.
901
902    Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
903    The utility of this function is limited to is_cgi method and helps
904    preventing some security attacks.
905
906    Returns: The reconstituted URL, which will always start with a '/'.
907
908    Raises: IndexError if too many '..' occur within the path.
909
910    """
911    # Query component should not be involved.
912    path, _, query = path.partition('?')
913    path = urllib.parse.unquote(path)
914
915    # Similar to os.path.split(os.path.normpath(path)) but specific to URL
916    # path semantics rather than local operating system semantics.
917    path_parts = path.split('/')
918    head_parts = []
919    for part in path_parts[:-1]:
920        if part == '..':
921            head_parts.pop() # IndexError if more '..' than prior parts
922        elif part and part != '.':
923            head_parts.append( part )
924    if path_parts:
925        tail_part = path_parts.pop()
926        if tail_part:
927            if tail_part == '..':
928                head_parts.pop()
929                tail_part = ''
930            elif tail_part == '.':
931                tail_part = ''
932    else:
933        tail_part = ''
934
935    if query:
936        tail_part = '?'.join((tail_part, query))
937
938    splitpath = ('/' + '/'.join(head_parts), tail_part)
939    collapsed_path = "/".join(splitpath)
940
941    return collapsed_path
942
943
944
945nobody = None
946
947def nobody_uid():
948    """Internal routine to get nobody's uid"""
949    global nobody
950    if nobody:
951        return nobody
952    try:
953        import pwd
954    except ImportError:
955        return -1
956    try:
957        nobody = pwd.getpwnam('nobody')[2]
958    except KeyError:
959        nobody = 1 + max(x[2] for x in pwd.getpwall())
960    return nobody
961
962
963def executable(path):
964    """Test for executable file."""
965    return os.access(path, os.X_OK)
966
967
968class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
969
970    """Complete HTTP server with GET, HEAD and POST commands.
971
972    GET and HEAD also support running CGI scripts.
973
974    The POST command is *only* implemented for CGI scripts.
975
976    """
977
978    # Determine platform specifics
979    have_fork = hasattr(os, 'fork')
980
981    # Make rfile unbuffered -- we need to read one line and then pass
982    # the rest to a subprocess, so we can't use buffered input.
983    rbufsize = 0
984
985    def do_POST(self):
986        """Serve a POST request.
987
988        This is only implemented for CGI scripts.
989
990        """
991
992        if self.is_cgi():
993            self.run_cgi()
994        else:
995            self.send_error(
996                HTTPStatus.NOT_IMPLEMENTED,
997                "Can only POST to CGI scripts")
998
999    def send_head(self):
1000        """Version of send_head that support CGI scripts"""
1001        if self.is_cgi():
1002            return self.run_cgi()
1003        else:
1004            return SimpleHTTPRequestHandler.send_head(self)
1005
1006    def is_cgi(self):
1007        """Test whether self.path corresponds to a CGI script.
1008
1009        Returns True and updates the cgi_info attribute to the tuple
1010        (dir, rest) if self.path requires running a CGI script.
1011        Returns False otherwise.
1012
1013        If any exception is raised, the caller should assume that
1014        self.path was rejected as invalid and act accordingly.
1015
1016        The default implementation tests whether the normalized url
1017        path begins with one of the strings in self.cgi_directories
1018        (and the next character is a '/' or the end of the string).
1019
1020        """
1021        collapsed_path = _url_collapse_path(self.path)
1022        dir_sep = collapsed_path.find('/', 1)
1023        while dir_sep > 0 and not collapsed_path[:dir_sep] in self.cgi_directories:
1024            dir_sep = collapsed_path.find('/', dir_sep+1)
1025        if dir_sep > 0:
1026            head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
1027            self.cgi_info = head, tail
1028            return True
1029        return False
1030
1031
1032    cgi_directories = ['/cgi-bin', '/htbin']
1033
1034    def is_executable(self, path):
1035        """Test whether argument path is an executable file."""
1036        return executable(path)
1037
1038    def is_python(self, path):
1039        """Test whether argument path is a Python script."""
1040        head, tail = os.path.splitext(path)
1041        return tail.lower() in (".py", ".pyw")
1042
1043    def run_cgi(self):
1044        """Execute a CGI script."""
1045        dir, rest = self.cgi_info
1046        path = dir + '/' + rest
1047        i = path.find('/', len(dir)+1)
1048        while i >= 0:
1049            nextdir = path[:i]
1050            nextrest = path[i+1:]
1051
1052            scriptdir = self.translate_path(nextdir)
1053            if os.path.isdir(scriptdir):
1054                dir, rest = nextdir, nextrest
1055                i = path.find('/', len(dir)+1)
1056            else:
1057                break
1058
1059        # find an explicit query string, if present.
1060        rest, _, query = rest.partition('?')
1061
1062        # dissect the part after the directory name into a script name &
1063        # a possible additional path, to be stored in PATH_INFO.
1064        i = rest.find('/')
1065        if i >= 0:
1066            script, rest = rest[:i], rest[i:]
1067        else:
1068            script, rest = rest, ''
1069
1070        scriptname = dir + '/' + script
1071        scriptfile = self.translate_path(scriptname)
1072        if not os.path.exists(scriptfile):
1073            self.send_error(
1074                HTTPStatus.NOT_FOUND,
1075                "No such CGI script (%r)" % scriptname)
1076            return
1077        if not os.path.isfile(scriptfile):
1078            self.send_error(
1079                HTTPStatus.FORBIDDEN,
1080                "CGI script is not a plain file (%r)" % scriptname)
1081            return
1082        ispy = self.is_python(scriptname)
1083        if self.have_fork or not ispy:
1084            if not self.is_executable(scriptfile):
1085                self.send_error(
1086                    HTTPStatus.FORBIDDEN,
1087                    "CGI script is not executable (%r)" % scriptname)
1088                return
1089
1090        # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1091        # XXX Much of the following could be prepared ahead of time!
1092        env = copy.deepcopy(os.environ)
1093        env['SERVER_SOFTWARE'] = self.version_string()
1094        env['SERVER_NAME'] = self.server.server_name
1095        env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1096        env['SERVER_PROTOCOL'] = self.protocol_version
1097        env['SERVER_PORT'] = str(self.server.server_port)
1098        env['REQUEST_METHOD'] = self.command
1099        uqrest = urllib.parse.unquote(rest)
1100        env['PATH_INFO'] = uqrest
1101        env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1102        env['SCRIPT_NAME'] = scriptname
1103        env['QUERY_STRING'] = query
1104        env['REMOTE_ADDR'] = self.client_address[0]
1105        authorization = self.headers.get("authorization")
1106        if authorization:
1107            authorization = authorization.split()
1108            if len(authorization) == 2:
1109                import base64, binascii
1110                env['AUTH_TYPE'] = authorization[0]
1111                if authorization[0].lower() == "basic":
1112                    try:
1113                        authorization = authorization[1].encode('ascii')
1114                        authorization = base64.decodebytes(authorization).\
1115                                        decode('ascii')
1116                    except (binascii.Error, UnicodeError):
1117                        pass
1118                    else:
1119                        authorization = authorization.split(':')
1120                        if len(authorization) == 2:
1121                            env['REMOTE_USER'] = authorization[0]
1122        # XXX REMOTE_IDENT
1123        if self.headers.get('content-type') is None:
1124            env['CONTENT_TYPE'] = self.headers.get_content_type()
1125        else:
1126            env['CONTENT_TYPE'] = self.headers['content-type']
1127        length = self.headers.get('content-length')
1128        if length:
1129            env['CONTENT_LENGTH'] = length
1130        referer = self.headers.get('referer')
1131        if referer:
1132            env['HTTP_REFERER'] = referer
1133        accept = self.headers.get_all('accept', ())
1134        env['HTTP_ACCEPT'] = ','.join(accept)
1135        ua = self.headers.get('user-agent')
1136        if ua:
1137            env['HTTP_USER_AGENT'] = ua
1138        co = filter(None, self.headers.get_all('cookie', []))
1139        cookie_str = ', '.join(co)
1140        if cookie_str:
1141            env['HTTP_COOKIE'] = cookie_str
1142        # XXX Other HTTP_* headers
1143        # Since we're setting the env in the parent, provide empty
1144        # values to override previously set values
1145        for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1146                  'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1147            env.setdefault(k, "")
1148
1149        self.send_response(HTTPStatus.OK, "Script output follows")
1150        self.flush_headers()
1151
1152        decoded_query = query.replace('+', ' ')
1153
1154        if self.have_fork:
1155            # Unix -- fork as we should
1156            args = [script]
1157            if '=' not in decoded_query:
1158                args.append(decoded_query)
1159            nobody = nobody_uid()
1160            self.wfile.flush() # Always flush before forking
1161            pid = os.fork()
1162            if pid != 0:
1163                # Parent
1164                pid, sts = os.waitpid(pid, 0)
1165                # throw away additional data [see bug #427345]
1166                while select.select([self.rfile], [], [], 0)[0]:
1167                    if not self.rfile.read(1):
1168                        break
1169                exitcode = os.waitstatus_to_exitcode(sts)
1170                if exitcode:
1171                    self.log_error(f"CGI script exit code {exitcode}")
1172                return
1173            # Child
1174            try:
1175                try:
1176                    os.setuid(nobody)
1177                except OSError:
1178                    pass
1179                os.dup2(self.rfile.fileno(), 0)
1180                os.dup2(self.wfile.fileno(), 1)
1181                os.execve(scriptfile, args, env)
1182            except:
1183                self.server.handle_error(self.request, self.client_address)
1184                os._exit(127)
1185
1186        else:
1187            # Non-Unix -- use subprocess
1188            import subprocess
1189            cmdline = [scriptfile]
1190            if self.is_python(scriptfile):
1191                interp = sys.executable
1192                if interp.lower().endswith("w.exe"):
1193                    # On Windows, use python.exe, not pythonw.exe
1194                    interp = interp[:-5] + interp[-4:]
1195                cmdline = [interp, '-u'] + cmdline
1196            if '=' not in query:
1197                cmdline.append(query)
1198            self.log_message("command: %s", subprocess.list2cmdline(cmdline))
1199            try:
1200                nbytes = int(length)
1201            except (TypeError, ValueError):
1202                nbytes = 0
1203            p = subprocess.Popen(cmdline,
1204                                 stdin=subprocess.PIPE,
1205                                 stdout=subprocess.PIPE,
1206                                 stderr=subprocess.PIPE,
1207                                 env = env
1208                                 )
1209            if self.command.lower() == "post" and nbytes > 0:
1210                data = self.rfile.read(nbytes)
1211            else:
1212                data = None
1213            # throw away additional data [see bug #427345]
1214            while select.select([self.rfile._sock], [], [], 0)[0]:
1215                if not self.rfile._sock.recv(1):
1216                    break
1217            stdout, stderr = p.communicate(data)
1218            self.wfile.write(stdout)
1219            if stderr:
1220                self.log_error('%s', stderr)
1221            p.stderr.close()
1222            p.stdout.close()
1223            status = p.returncode
1224            if status:
1225                self.log_error("CGI script exit status %#x", status)
1226            else:
1227                self.log_message("CGI script exited OK")
1228
1229
1230def _get_best_family(*address):
1231    infos = socket.getaddrinfo(
1232        *address,
1233        type=socket.SOCK_STREAM,
1234        flags=socket.AI_PASSIVE,
1235    )
1236    family, type, proto, canonname, sockaddr = next(iter(infos))
1237    return family, sockaddr
1238
1239
1240def test(HandlerClass=BaseHTTPRequestHandler,
1241         ServerClass=ThreadingHTTPServer,
1242         protocol="HTTP/1.0", port=8000, bind=None):
1243    """Test the HTTP request handler class.
1244
1245    This runs an HTTP server on port 8000 (or the port argument).
1246
1247    """
1248    ServerClass.address_family, addr = _get_best_family(bind, port)
1249
1250    HandlerClass.protocol_version = protocol
1251    with ServerClass(addr, HandlerClass) as httpd:
1252        host, port = httpd.socket.getsockname()[:2]
1253        url_host = f'[{host}]' if ':' in host else host
1254        print(
1255            f"Serving HTTP on {host} port {port} "
1256            f"(http://{url_host}:{port}/) ..."
1257        )
1258        try:
1259            httpd.serve_forever()
1260        except KeyboardInterrupt:
1261            print("\nKeyboard interrupt received, exiting.")
1262            sys.exit(0)
1263
1264if __name__ == '__main__':
1265    import argparse
1266
1267    parser = argparse.ArgumentParser()
1268    parser.add_argument('--cgi', action='store_true',
1269                       help='Run as CGI Server')
1270    parser.add_argument('--bind', '-b', metavar='ADDRESS',
1271                        help='Specify alternate bind address '
1272                             '[default: all interfaces]')
1273    parser.add_argument('--directory', '-d', default=os.getcwd(),
1274                        help='Specify alternative directory '
1275                        '[default:current directory]')
1276    parser.add_argument('port', action='store',
1277                        default=8000, type=int,
1278                        nargs='?',
1279                        help='Specify alternate port [default: 8000]')
1280    args = parser.parse_args()
1281    if args.cgi:
1282        handler_class = CGIHTTPRequestHandler
1283    else:
1284        handler_class = partial(SimpleHTTPRequestHandler,
1285                                directory=args.directory)
1286
1287    # ensure dual-stack is not disabled; ref #38907
1288    class DualStackServer(ThreadingHTTPServer):
1289        def server_bind(self):
1290            # suppress exception when protocol is IPv4
1291            with contextlib.suppress(Exception):
1292                self.socket.setsockopt(
1293                    socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
1294            return super().server_bind()
1295
1296    test(
1297        HandlerClass=handler_class,
1298        ServerClass=DualStackServer,
1299        port=args.port,
1300        bind=args.bind,
1301    )
1302