• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""HTTP server classes.
2
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and (deprecated) CGIHTTPRequestHandler for CGI scripts.
6
7It does, however, optionally implement HTTP/1.1 persistent connections.
8
9Notes on CGIHTTPRequestHandler
10------------------------------
11
12This class is deprecated. It implements GET and POST requests to cgi-bin scripts.
13
14If the os.fork() function is not present (Windows), subprocess.Popen() is used,
15with slightly altered but never documented semantics.  Use from a threaded
16process is likely to trigger a warning at os.fork() time.
17
18In all cases, the implementation is intentionally naive -- all
19requests are executed synchronously.
20
21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22-- it may execute arbitrary Python code or external programs.
23
24Note that status code 200 is sent prior to execution of a CGI script, so
25scripts cannot send other status codes such as 302 (redirect).
26
27XXX To do:
28
29- log requests even later (to capture byte count)
30- log user-agent header and other interesting goodies
31- send error log to separate file
32"""
33
34
35# See also:
36#
37# HTTP Working Group                                        T. Berners-Lee
38# INTERNET-DRAFT                                            R. T. Fielding
39# <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
40# Expires September 8, 1995                                  March 8, 1995
41#
42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43#
44# and
45#
46# Network Working Group                                      R. Fielding
47# Request for Comments: 2616                                       et al
48# Obsoletes: 2068                                              June 1999
49# Category: Standards Track
50#
51# URL: http://www.faqs.org/rfcs/rfc2616.html
52
53# Log files
54# ---------
55#
56# Here's a quote from the NCSA httpd docs about log file format.
57#
58# | The logfile format is as follows. Each line consists of:
59# |
60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61# |
62# |        host: Either the DNS name or the IP number of the remote client
63# |        rfc931: Any information returned by identd for this person,
64# |                - otherwise.
65# |        authuser: If user sent a userid for authentication, the user name,
66# |                  - otherwise.
67# |        DD: Day
68# |        Mon: Month (calendar name)
69# |        YYYY: Year
70# |        hh: hour (24-hour format, the machine's timezone)
71# |        mm: minutes
72# |        ss: seconds
73# |        request: The first line of the HTTP request as sent by the client.
74# |        ddd: the status code returned by the server, - if not available.
75# |        bbbb: the total number of bytes sent,
76# |              *not including the HTTP/1.0 header*, - if not available
77# |
78# | You can determine the name of the file accessed through request.
79#
80# (Actually, the latter is only true if you know the server configuration
81# at the time the request was made!)
82
83__version__ = "0.6"
84
85__all__ = [
86    "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",
87    "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
88]
89
90import copy
91import datetime
92import email.utils
93import html
94import http.client
95import io
96import itertools
97import mimetypes
98import os
99import posixpath
100import select
101import shutil
102import socket # For gethostbyaddr()
103import socketserver
104import sys
105import time
106import urllib.parse
107
108from http import HTTPStatus
109
110
111# Default error message template
112DEFAULT_ERROR_MESSAGE = """\
113<!DOCTYPE HTML>
114<html lang="en">
115    <head>
116        <meta charset="utf-8">
117        <title>Error response</title>
118    </head>
119    <body>
120        <h1>Error response</h1>
121        <p>Error code: %(code)d</p>
122        <p>Message: %(message)s.</p>
123        <p>Error code explanation: %(code)s - %(explain)s.</p>
124    </body>
125</html>
126"""
127
128DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
129
130class HTTPServer(socketserver.TCPServer):
131
132    allow_reuse_address = 1    # Seems to make sense in testing environment
133
134    def server_bind(self):
135        """Override server_bind to store the server name."""
136        socketserver.TCPServer.server_bind(self)
137        host, port = self.server_address[:2]
138        self.server_name = socket.getfqdn(host)
139        self.server_port = port
140
141
142class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer):
143    daemon_threads = True
144
145
146class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
147
148    """HTTP request handler base class.
149
150    The following explanation of HTTP serves to guide you through the
151    code as well as to expose any misunderstandings I may have about
152    HTTP (so you don't need to read the code to figure out I'm wrong
153    :-).
154
155    HTTP (HyperText Transfer Protocol) is an extensible protocol on
156    top of a reliable stream transport (e.g. TCP/IP).  The protocol
157    recognizes three parts to a request:
158
159    1. One line identifying the request type and path
160    2. An optional set of RFC-822-style headers
161    3. An optional data part
162
163    The headers and data are separated by a blank line.
164
165    The first line of the request has the form
166
167    <command> <path> <version>
168
169    where <command> is a (case-sensitive) keyword such as GET or POST,
170    <path> is a string containing path information for the request,
171    and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
172    <path> is encoded using the URL encoding scheme (using %xx to signify
173    the ASCII character with hex code xx).
174
175    The specification specifies that lines are separated by CRLF but
176    for compatibility with the widest range of clients recommends
177    servers also handle LF.  Similarly, whitespace in the request line
178    is treated sensibly (allowing multiple spaces between components
179    and allowing trailing whitespace).
180
181    Similarly, for output, lines ought to be separated by CRLF pairs
182    but most clients grok LF characters just fine.
183
184    If the first line of the request has the form
185
186    <command> <path>
187
188    (i.e. <version> is left out) then this is assumed to be an HTTP
189    0.9 request; this form has no optional headers and data part and
190    the reply consists of just the data.
191
192    The reply form of the HTTP 1.x protocol again has three parts:
193
194    1. One line giving the response code
195    2. An optional set of RFC-822-style headers
196    3. The data
197
198    Again, the headers and data are separated by a blank line.
199
200    The response code line has the form
201
202    <version> <responsecode> <responsestring>
203
204    where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
205    <responsecode> is a 3-digit response code indicating success or
206    failure of the request, and <responsestring> is an optional
207    human-readable string explaining what the response code means.
208
209    This server parses the request and the headers, and then calls a
210    function specific to the request type (<command>).  Specifically,
211    a request SPAM will be handled by a method do_SPAM().  If no
212    such method exists the server sends an error response to the
213    client.  If it exists, it is called with no arguments:
214
215    do_SPAM()
216
217    Note that the request name is case sensitive (i.e. SPAM and spam
218    are different requests).
219
220    The various request details are stored in instance variables:
221
222    - client_address is the client IP address in the form (host,
223    port);
224
225    - command, path and version are the broken-down request line;
226
227    - headers is an instance of email.message.Message (or a derived
228    class) containing the header information;
229
230    - rfile is a file object open for reading positioned at the
231    start of the optional input data part;
232
233    - wfile is a file object open for writing.
234
235    IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
236
237    The first thing to be written must be the response line.  Then
238    follow 0 or more header lines, then a blank line, and then the
239    actual data (if any).  The meaning of the header lines depends on
240    the command executed by the server; in most cases, when data is
241    returned, there should be at least one header line of the form
242
243    Content-type: <type>/<subtype>
244
245    where <type> and <subtype> should be registered MIME types,
246    e.g. "text/html" or "text/plain".
247
248    """
249
250    # The Python system version, truncated to its first component.
251    sys_version = "Python/" + sys.version.split()[0]
252
253    # The server software version.  You may want to override this.
254    # The format is multiple whitespace-separated strings,
255    # where each string is of the form name[/version].
256    server_version = "BaseHTTP/" + __version__
257
258    error_message_format = DEFAULT_ERROR_MESSAGE
259    error_content_type = DEFAULT_ERROR_CONTENT_TYPE
260
261    # The default request version.  This only affects responses up until
262    # the point where the request line is parsed, so it mainly decides what
263    # the client gets back when sending a malformed request line.
264    # Most web servers default to HTTP 0.9, i.e. don't send a status line.
265    default_request_version = "HTTP/0.9"
266
267    def parse_request(self):
268        """Parse a request (internal).
269
270        The request should be stored in self.raw_requestline; the results
271        are in self.command, self.path, self.request_version and
272        self.headers.
273
274        Return True for success, False for failure; on failure, any relevant
275        error response has already been sent back.
276
277        """
278        self.command = None  # set in case of error on the first line
279        self.request_version = version = self.default_request_version
280        self.close_connection = True
281        requestline = str(self.raw_requestline, 'iso-8859-1')
282        requestline = requestline.rstrip('\r\n')
283        self.requestline = requestline
284        words = requestline.split()
285        if len(words) == 0:
286            return False
287
288        if len(words) >= 3:  # Enough to determine protocol version
289            version = words[-1]
290            try:
291                if not version.startswith('HTTP/'):
292                    raise ValueError
293                base_version_number = version.split('/', 1)[1]
294                version_number = base_version_number.split(".")
295                # RFC 2145 section 3.1 says there can be only one "." and
296                #   - major and minor numbers MUST be treated as
297                #      separate integers;
298                #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
299                #      turn is lower than HTTP/12.3;
300                #   - Leading zeros MUST be ignored by recipients.
301                if len(version_number) != 2:
302                    raise ValueError
303                if any(not component.isdigit() for component in version_number):
304                    raise ValueError("non digit in http version")
305                if any(len(component) > 10 for component in version_number):
306                    raise ValueError("unreasonable length http version")
307                version_number = int(version_number[0]), int(version_number[1])
308            except (ValueError, IndexError):
309                self.send_error(
310                    HTTPStatus.BAD_REQUEST,
311                    "Bad request version (%r)" % version)
312                return False
313            if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
314                self.close_connection = False
315            if version_number >= (2, 0):
316                self.send_error(
317                    HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
318                    "Invalid HTTP version (%s)" % base_version_number)
319                return False
320            self.request_version = version
321
322        if not 2 <= len(words) <= 3:
323            self.send_error(
324                HTTPStatus.BAD_REQUEST,
325                "Bad request syntax (%r)" % requestline)
326            return False
327        command, path = words[:2]
328        if len(words) == 2:
329            self.close_connection = True
330            if command != 'GET':
331                self.send_error(
332                    HTTPStatus.BAD_REQUEST,
333                    "Bad HTTP/0.9 request type (%r)" % command)
334                return False
335        self.command, self.path = command, path
336
337        # gh-87389: The purpose of replacing '//' with '/' is to protect
338        # against open redirect attacks possibly triggered if the path starts
339        # with '//' because http clients treat //path as an absolute URI
340        # without scheme (similar to http://path) rather than a path.
341        if self.path.startswith('//'):
342            self.path = '/' + self.path.lstrip('/')  # Reduce to a single /
343
344        # Examine the headers and look for a Connection directive.
345        try:
346            self.headers = http.client.parse_headers(self.rfile,
347                                                     _class=self.MessageClass)
348        except http.client.LineTooLong as err:
349            self.send_error(
350                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
351                "Line too long",
352                str(err))
353            return False
354        except http.client.HTTPException as err:
355            self.send_error(
356                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
357                "Too many headers",
358                str(err)
359            )
360            return False
361
362        conntype = self.headers.get('Connection', "")
363        if conntype.lower() == 'close':
364            self.close_connection = True
365        elif (conntype.lower() == 'keep-alive' and
366              self.protocol_version >= "HTTP/1.1"):
367            self.close_connection = False
368        # Examine the headers and look for an Expect directive
369        expect = self.headers.get('Expect', "")
370        if (expect.lower() == "100-continue" and
371                self.protocol_version >= "HTTP/1.1" and
372                self.request_version >= "HTTP/1.1"):
373            if not self.handle_expect_100():
374                return False
375        return True
376
377    def handle_expect_100(self):
378        """Decide what to do with an "Expect: 100-continue" header.
379
380        If the client is expecting a 100 Continue response, we must
381        respond with either a 100 Continue or a final response before
382        waiting for the request body. The default is to always respond
383        with a 100 Continue. You can behave differently (for example,
384        reject unauthorized requests) by overriding this method.
385
386        This method should either return True (possibly after sending
387        a 100 Continue response) or send an error response and return
388        False.
389
390        """
391        self.send_response_only(HTTPStatus.CONTINUE)
392        self.end_headers()
393        return True
394
395    def handle_one_request(self):
396        """Handle a single HTTP request.
397
398        You normally don't need to override this method; see the class
399        __doc__ string for information on how to handle specific HTTP
400        commands such as GET and POST.
401
402        """
403        try:
404            self.raw_requestline = self.rfile.readline(65537)
405            if len(self.raw_requestline) > 65536:
406                self.requestline = ''
407                self.request_version = ''
408                self.command = ''
409                self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
410                return
411            if not self.raw_requestline:
412                self.close_connection = True
413                return
414            if not self.parse_request():
415                # An error code has been sent, just exit
416                return
417            mname = 'do_' + self.command
418            if not hasattr(self, mname):
419                self.send_error(
420                    HTTPStatus.NOT_IMPLEMENTED,
421                    "Unsupported method (%r)" % self.command)
422                return
423            method = getattr(self, mname)
424            method()
425            self.wfile.flush() #actually send the response if not already done.
426        except TimeoutError as e:
427            #a read or a write timed out.  Discard this connection
428            self.log_error("Request timed out: %r", e)
429            self.close_connection = True
430            return
431
432    def handle(self):
433        """Handle multiple requests if necessary."""
434        self.close_connection = True
435
436        self.handle_one_request()
437        while not self.close_connection:
438            self.handle_one_request()
439
440    def send_error(self, code, message=None, explain=None):
441        """Send and log an error reply.
442
443        Arguments are
444        * code:    an HTTP error code
445                   3 digits
446        * message: a simple optional 1 line reason phrase.
447                   *( HTAB / SP / VCHAR / %x80-FF )
448                   defaults to short entry matching the response code
449        * explain: a detailed message defaults to the long entry
450                   matching the response code.
451
452        This sends an error response (so it must be called before any
453        output has been generated), logs the error, and finally sends
454        a piece of HTML explaining the error to the user.
455
456        """
457
458        try:
459            shortmsg, longmsg = self.responses[code]
460        except KeyError:
461            shortmsg, longmsg = '???', '???'
462        if message is None:
463            message = shortmsg
464        if explain is None:
465            explain = longmsg
466        self.log_error("code %d, message %s", code, message)
467        self.send_response(code, message)
468        self.send_header('Connection', 'close')
469
470        # Message body is omitted for cases described in:
471        #  - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
472        #  - RFC7231: 6.3.6. 205(Reset Content)
473        body = None
474        if (code >= 200 and
475            code not in (HTTPStatus.NO_CONTENT,
476                         HTTPStatus.RESET_CONTENT,
477                         HTTPStatus.NOT_MODIFIED)):
478            # HTML encode to prevent Cross Site Scripting attacks
479            # (see bug #1100201)
480            content = (self.error_message_format % {
481                'code': code,
482                'message': html.escape(message, quote=False),
483                'explain': html.escape(explain, quote=False)
484            })
485            body = content.encode('UTF-8', 'replace')
486            self.send_header("Content-Type", self.error_content_type)
487            self.send_header('Content-Length', str(len(body)))
488        self.end_headers()
489
490        if self.command != 'HEAD' and body:
491            self.wfile.write(body)
492
493    def send_response(self, code, message=None):
494        """Add the response header to the headers buffer and log the
495        response code.
496
497        Also send two standard headers with the server software
498        version and the current date.
499
500        """
501        self.log_request(code)
502        self.send_response_only(code, message)
503        self.send_header('Server', self.version_string())
504        self.send_header('Date', self.date_time_string())
505
506    def send_response_only(self, code, message=None):
507        """Send the response header only."""
508        if self.request_version != 'HTTP/0.9':
509            if message is None:
510                if code in self.responses:
511                    message = self.responses[code][0]
512                else:
513                    message = ''
514            if not hasattr(self, '_headers_buffer'):
515                self._headers_buffer = []
516            self._headers_buffer.append(("%s %d %s\r\n" %
517                    (self.protocol_version, code, message)).encode(
518                        'latin-1', 'strict'))
519
520    def send_header(self, keyword, value):
521        """Send a MIME header to the headers buffer."""
522        if self.request_version != 'HTTP/0.9':
523            if not hasattr(self, '_headers_buffer'):
524                self._headers_buffer = []
525            self._headers_buffer.append(
526                ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
527
528        if keyword.lower() == 'connection':
529            if value.lower() == 'close':
530                self.close_connection = True
531            elif value.lower() == 'keep-alive':
532                self.close_connection = False
533
534    def end_headers(self):
535        """Send the blank line ending the MIME headers."""
536        if self.request_version != 'HTTP/0.9':
537            self._headers_buffer.append(b"\r\n")
538            self.flush_headers()
539
540    def flush_headers(self):
541        if hasattr(self, '_headers_buffer'):
542            self.wfile.write(b"".join(self._headers_buffer))
543            self._headers_buffer = []
544
545    def log_request(self, code='-', size='-'):
546        """Log an accepted request.
547
548        This is called by send_response().
549
550        """
551        if isinstance(code, HTTPStatus):
552            code = code.value
553        self.log_message('"%s" %s %s',
554                         self.requestline, str(code), str(size))
555
556    def log_error(self, format, *args):
557        """Log an error.
558
559        This is called when a request cannot be fulfilled.  By
560        default it passes the message on to log_message().
561
562        Arguments are the same as for log_message().
563
564        XXX This should go to the separate error log.
565
566        """
567
568        self.log_message(format, *args)
569
570    # https://en.wikipedia.org/wiki/List_of_Unicode_characters#Control_codes
571    _control_char_table = str.maketrans(
572            {c: fr'\x{c:02x}' for c in itertools.chain(range(0x20), range(0x7f,0xa0))})
573    _control_char_table[ord('\\')] = r'\\'
574
575    def log_message(self, format, *args):
576        """Log an arbitrary message.
577
578        This is used by all other logging functions.  Override
579        it if you have specific logging wishes.
580
581        The first argument, FORMAT, is a format string for the
582        message to be logged.  If the format string contains
583        any % escapes requiring parameters, they should be
584        specified as subsequent arguments (it's just like
585        printf!).
586
587        The client ip and current date/time are prefixed to
588        every message.
589
590        Unicode control characters are replaced with escaped hex
591        before writing the output to stderr.
592
593        """
594
595        message = format % args
596        sys.stderr.write("%s - - [%s] %s\n" %
597                         (self.address_string(),
598                          self.log_date_time_string(),
599                          message.translate(self._control_char_table)))
600
601    def version_string(self):
602        """Return the server software version string."""
603        return self.server_version + ' ' + self.sys_version
604
605    def date_time_string(self, timestamp=None):
606        """Return the current date and time formatted for a message header."""
607        if timestamp is None:
608            timestamp = time.time()
609        return email.utils.formatdate(timestamp, usegmt=True)
610
611    def log_date_time_string(self):
612        """Return the current time formatted for logging."""
613        now = time.time()
614        year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
615        s = "%02d/%3s/%04d %02d:%02d:%02d" % (
616                day, self.monthname[month], year, hh, mm, ss)
617        return s
618
619    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
620
621    monthname = [None,
622                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
623                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
624
625    def address_string(self):
626        """Return the client address."""
627
628        return self.client_address[0]
629
630    # Essentially static class variables
631
632    # The version of the HTTP protocol we support.
633    # Set this to HTTP/1.1 to enable automatic keepalive
634    protocol_version = "HTTP/1.0"
635
636    # MessageClass used to parse headers
637    MessageClass = http.client.HTTPMessage
638
639    # hack to maintain backwards compatibility
640    responses = {
641        v: (v.phrase, v.description)
642        for v in HTTPStatus.__members__.values()
643    }
644
645
646class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
647
648    """Simple HTTP request handler with GET and HEAD commands.
649
650    This serves files from the current directory and any of its
651    subdirectories.  The MIME type for files is determined by
652    calling the .guess_type() method.
653
654    The GET and HEAD requests are identical except that the HEAD
655    request omits the actual contents of the file.
656
657    """
658
659    server_version = "SimpleHTTP/" + __version__
660    index_pages = ("index.html", "index.htm")
661    extensions_map = _encodings_map_default = {
662        '.gz': 'application/gzip',
663        '.Z': 'application/octet-stream',
664        '.bz2': 'application/x-bzip2',
665        '.xz': 'application/x-xz',
666    }
667
668    def __init__(self, *args, directory=None, **kwargs):
669        if directory is None:
670            directory = os.getcwd()
671        self.directory = os.fspath(directory)
672        super().__init__(*args, **kwargs)
673
674    def do_GET(self):
675        """Serve a GET request."""
676        f = self.send_head()
677        if f:
678            try:
679                self.copyfile(f, self.wfile)
680            finally:
681                f.close()
682
683    def do_HEAD(self):
684        """Serve a HEAD request."""
685        f = self.send_head()
686        if f:
687            f.close()
688
689    def send_head(self):
690        """Common code for GET and HEAD commands.
691
692        This sends the response code and MIME headers.
693
694        Return value is either a file object (which has to be copied
695        to the outputfile by the caller unless the command was HEAD,
696        and must be closed by the caller under all circumstances), or
697        None, in which case the caller has nothing further to do.
698
699        """
700        path = self.translate_path(self.path)
701        f = None
702        if os.path.isdir(path):
703            parts = urllib.parse.urlsplit(self.path)
704            if not parts.path.endswith('/'):
705                # redirect browser - doing basically what apache does
706                self.send_response(HTTPStatus.MOVED_PERMANENTLY)
707                new_parts = (parts[0], parts[1], parts[2] + '/',
708                             parts[3], parts[4])
709                new_url = urllib.parse.urlunsplit(new_parts)
710                self.send_header("Location", new_url)
711                self.send_header("Content-Length", "0")
712                self.end_headers()
713                return None
714            for index in self.index_pages:
715                index = os.path.join(path, index)
716                if os.path.isfile(index):
717                    path = index
718                    break
719            else:
720                return self.list_directory(path)
721        ctype = self.guess_type(path)
722        # check for trailing "/" which should return 404. See Issue17324
723        # The test for this was added in test_httpserver.py
724        # However, some OS platforms accept a trailingSlash as a filename
725        # See discussion on python-dev and Issue34711 regarding
726        # parsing and rejection of filenames with a trailing slash
727        if path.endswith("/"):
728            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
729            return None
730        try:
731            f = open(path, 'rb')
732        except OSError:
733            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
734            return None
735
736        try:
737            fs = os.fstat(f.fileno())
738            # Use browser cache if possible
739            if ("If-Modified-Since" in self.headers
740                    and "If-None-Match" not in self.headers):
741                # compare If-Modified-Since and time of last file modification
742                try:
743                    ims = email.utils.parsedate_to_datetime(
744                        self.headers["If-Modified-Since"])
745                except (TypeError, IndexError, OverflowError, ValueError):
746                    # ignore ill-formed values
747                    pass
748                else:
749                    if ims.tzinfo is None:
750                        # obsolete format with no timezone, cf.
751                        # https://tools.ietf.org/html/rfc7231#section-7.1.1.1
752                        ims = ims.replace(tzinfo=datetime.timezone.utc)
753                    if ims.tzinfo is datetime.timezone.utc:
754                        # compare to UTC datetime of last modification
755                        last_modif = datetime.datetime.fromtimestamp(
756                            fs.st_mtime, datetime.timezone.utc)
757                        # remove microseconds, like in If-Modified-Since
758                        last_modif = last_modif.replace(microsecond=0)
759
760                        if last_modif <= ims:
761                            self.send_response(HTTPStatus.NOT_MODIFIED)
762                            self.end_headers()
763                            f.close()
764                            return None
765
766            self.send_response(HTTPStatus.OK)
767            self.send_header("Content-type", ctype)
768            self.send_header("Content-Length", str(fs[6]))
769            self.send_header("Last-Modified",
770                self.date_time_string(fs.st_mtime))
771            self.end_headers()
772            return f
773        except:
774            f.close()
775            raise
776
777    def list_directory(self, path):
778        """Helper to produce a directory listing (absent index.html).
779
780        Return value is either a file object, or None (indicating an
781        error).  In either case, the headers are sent, making the
782        interface the same as for send_head().
783
784        """
785        try:
786            list = os.listdir(path)
787        except OSError:
788            self.send_error(
789                HTTPStatus.NOT_FOUND,
790                "No permission to list directory")
791            return None
792        list.sort(key=lambda a: a.lower())
793        r = []
794        try:
795            displaypath = urllib.parse.unquote(self.path,
796                                               errors='surrogatepass')
797        except UnicodeDecodeError:
798            displaypath = urllib.parse.unquote(self.path)
799        displaypath = html.escape(displaypath, quote=False)
800        enc = sys.getfilesystemencoding()
801        title = f'Directory listing for {displaypath}'
802        r.append('<!DOCTYPE HTML>')
803        r.append('<html lang="en">')
804        r.append('<head>')
805        r.append(f'<meta charset="{enc}">')
806        r.append(f'<title>{title}</title>\n</head>')
807        r.append(f'<body>\n<h1>{title}</h1>')
808        r.append('<hr>\n<ul>')
809        for name in list:
810            fullname = os.path.join(path, name)
811            displayname = linkname = name
812            # Append / for directories or @ for symbolic links
813            if os.path.isdir(fullname):
814                displayname = name + "/"
815                linkname = name + "/"
816            if os.path.islink(fullname):
817                displayname = name + "@"
818                # Note: a link to a directory displays with @ and links with /
819            r.append('<li><a href="%s">%s</a></li>'
820                    % (urllib.parse.quote(linkname,
821                                          errors='surrogatepass'),
822                       html.escape(displayname, quote=False)))
823        r.append('</ul>\n<hr>\n</body>\n</html>\n')
824        encoded = '\n'.join(r).encode(enc, 'surrogateescape')
825        f = io.BytesIO()
826        f.write(encoded)
827        f.seek(0)
828        self.send_response(HTTPStatus.OK)
829        self.send_header("Content-type", "text/html; charset=%s" % enc)
830        self.send_header("Content-Length", str(len(encoded)))
831        self.end_headers()
832        return f
833
834    def translate_path(self, path):
835        """Translate a /-separated PATH to the local filename syntax.
836
837        Components that mean special things to the local file system
838        (e.g. drive or directory names) are ignored.  (XXX They should
839        probably be diagnosed.)
840
841        """
842        # abandon query parameters
843        path = path.split('?',1)[0]
844        path = path.split('#',1)[0]
845        # Don't forget explicit trailing slash when normalizing. Issue17324
846        trailing_slash = path.rstrip().endswith('/')
847        try:
848            path = urllib.parse.unquote(path, errors='surrogatepass')
849        except UnicodeDecodeError:
850            path = urllib.parse.unquote(path)
851        path = posixpath.normpath(path)
852        words = path.split('/')
853        words = filter(None, words)
854        path = self.directory
855        for word in words:
856            if os.path.dirname(word) or word in (os.curdir, os.pardir):
857                # Ignore components that are not a simple file/directory name
858                continue
859            path = os.path.join(path, word)
860        if trailing_slash:
861            path += '/'
862        return path
863
864    def copyfile(self, source, outputfile):
865        """Copy all data between two file objects.
866
867        The SOURCE argument is a file object open for reading
868        (or anything with a read() method) and the DESTINATION
869        argument is a file object open for writing (or
870        anything with a write() method).
871
872        The only reason for overriding this would be to change
873        the block size or perhaps to replace newlines by CRLF
874        -- note however that this the default server uses this
875        to copy binary data as well.
876
877        """
878        shutil.copyfileobj(source, outputfile)
879
880    def guess_type(self, path):
881        """Guess the type of a file.
882
883        Argument is a PATH (a filename).
884
885        Return value is a string of the form type/subtype,
886        usable for a MIME Content-type header.
887
888        The default implementation looks the file's extension
889        up in the table self.extensions_map, using application/octet-stream
890        as a default; however it would be permissible (if
891        slow) to look inside the data to make a better guess.
892
893        """
894        base, ext = posixpath.splitext(path)
895        if ext in self.extensions_map:
896            return self.extensions_map[ext]
897        ext = ext.lower()
898        if ext in self.extensions_map:
899            return self.extensions_map[ext]
900        guess, _ = mimetypes.guess_file_type(path)
901        if guess:
902            return guess
903        return 'application/octet-stream'
904
905
906# Utilities for CGIHTTPRequestHandler
907
908def _url_collapse_path(path):
909    """
910    Given a URL path, remove extra '/'s and '.' path elements and collapse
911    any '..' references and returns a collapsed path.
912
913    Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
914    The utility of this function is limited to is_cgi method and helps
915    preventing some security attacks.
916
917    Returns: The reconstituted URL, which will always start with a '/'.
918
919    Raises: IndexError if too many '..' occur within the path.
920
921    """
922    # Query component should not be involved.
923    path, _, query = path.partition('?')
924    path = urllib.parse.unquote(path)
925
926    # Similar to os.path.split(os.path.normpath(path)) but specific to URL
927    # path semantics rather than local operating system semantics.
928    path_parts = path.split('/')
929    head_parts = []
930    for part in path_parts[:-1]:
931        if part == '..':
932            head_parts.pop() # IndexError if more '..' than prior parts
933        elif part and part != '.':
934            head_parts.append( part )
935    if path_parts:
936        tail_part = path_parts.pop()
937        if tail_part:
938            if tail_part == '..':
939                head_parts.pop()
940                tail_part = ''
941            elif tail_part == '.':
942                tail_part = ''
943    else:
944        tail_part = ''
945
946    if query:
947        tail_part = '?'.join((tail_part, query))
948
949    splitpath = ('/' + '/'.join(head_parts), tail_part)
950    collapsed_path = "/".join(splitpath)
951
952    return collapsed_path
953
954
955
956nobody = None
957
958def nobody_uid():
959    """Internal routine to get nobody's uid"""
960    global nobody
961    if nobody:
962        return nobody
963    try:
964        import pwd
965    except ImportError:
966        return -1
967    try:
968        nobody = pwd.getpwnam('nobody')[2]
969    except KeyError:
970        nobody = 1 + max(x[2] for x in pwd.getpwall())
971    return nobody
972
973
974def executable(path):
975    """Test for executable file."""
976    return os.access(path, os.X_OK)
977
978
979class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
980
981    """Complete HTTP server with GET, HEAD and POST commands.
982
983    GET and HEAD also support running CGI scripts.
984
985    The POST command is *only* implemented for CGI scripts.
986
987    """
988
989    def __init__(self, *args, **kwargs):
990        import warnings
991        warnings._deprecated("http.server.CGIHTTPRequestHandler",
992                             remove=(3, 15))
993        super().__init__(*args, **kwargs)
994
995    # Determine platform specifics
996    have_fork = hasattr(os, 'fork')
997
998    # Make rfile unbuffered -- we need to read one line and then pass
999    # the rest to a subprocess, so we can't use buffered input.
1000    rbufsize = 0
1001
1002    def do_POST(self):
1003        """Serve a POST request.
1004
1005        This is only implemented for CGI scripts.
1006
1007        """
1008
1009        if self.is_cgi():
1010            self.run_cgi()
1011        else:
1012            self.send_error(
1013                HTTPStatus.NOT_IMPLEMENTED,
1014                "Can only POST to CGI scripts")
1015
1016    def send_head(self):
1017        """Version of send_head that support CGI scripts"""
1018        if self.is_cgi():
1019            return self.run_cgi()
1020        else:
1021            return SimpleHTTPRequestHandler.send_head(self)
1022
1023    def is_cgi(self):
1024        """Test whether self.path corresponds to a CGI script.
1025
1026        Returns True and updates the cgi_info attribute to the tuple
1027        (dir, rest) if self.path requires running a CGI script.
1028        Returns False otherwise.
1029
1030        If any exception is raised, the caller should assume that
1031        self.path was rejected as invalid and act accordingly.
1032
1033        The default implementation tests whether the normalized url
1034        path begins with one of the strings in self.cgi_directories
1035        (and the next character is a '/' or the end of the string).
1036
1037        """
1038        collapsed_path = _url_collapse_path(self.path)
1039        dir_sep = collapsed_path.find('/', 1)
1040        while dir_sep > 0 and not collapsed_path[:dir_sep] in self.cgi_directories:
1041            dir_sep = collapsed_path.find('/', dir_sep+1)
1042        if dir_sep > 0:
1043            head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
1044            self.cgi_info = head, tail
1045            return True
1046        return False
1047
1048
1049    cgi_directories = ['/cgi-bin', '/htbin']
1050
1051    def is_executable(self, path):
1052        """Test whether argument path is an executable file."""
1053        return executable(path)
1054
1055    def is_python(self, path):
1056        """Test whether argument path is a Python script."""
1057        head, tail = os.path.splitext(path)
1058        return tail.lower() in (".py", ".pyw")
1059
1060    def run_cgi(self):
1061        """Execute a CGI script."""
1062        dir, rest = self.cgi_info
1063        path = dir + '/' + rest
1064        i = path.find('/', len(dir)+1)
1065        while i >= 0:
1066            nextdir = path[:i]
1067            nextrest = path[i+1:]
1068
1069            scriptdir = self.translate_path(nextdir)
1070            if os.path.isdir(scriptdir):
1071                dir, rest = nextdir, nextrest
1072                i = path.find('/', len(dir)+1)
1073            else:
1074                break
1075
1076        # find an explicit query string, if present.
1077        rest, _, query = rest.partition('?')
1078
1079        # dissect the part after the directory name into a script name &
1080        # a possible additional path, to be stored in PATH_INFO.
1081        i = rest.find('/')
1082        if i >= 0:
1083            script, rest = rest[:i], rest[i:]
1084        else:
1085            script, rest = rest, ''
1086
1087        scriptname = dir + '/' + script
1088        scriptfile = self.translate_path(scriptname)
1089        if not os.path.exists(scriptfile):
1090            self.send_error(
1091                HTTPStatus.NOT_FOUND,
1092                "No such CGI script (%r)" % scriptname)
1093            return
1094        if not os.path.isfile(scriptfile):
1095            self.send_error(
1096                HTTPStatus.FORBIDDEN,
1097                "CGI script is not a plain file (%r)" % scriptname)
1098            return
1099        ispy = self.is_python(scriptname)
1100        if self.have_fork or not ispy:
1101            if not self.is_executable(scriptfile):
1102                self.send_error(
1103                    HTTPStatus.FORBIDDEN,
1104                    "CGI script is not executable (%r)" % scriptname)
1105                return
1106
1107        # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1108        # XXX Much of the following could be prepared ahead of time!
1109        env = copy.deepcopy(os.environ)
1110        env['SERVER_SOFTWARE'] = self.version_string()
1111        env['SERVER_NAME'] = self.server.server_name
1112        env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1113        env['SERVER_PROTOCOL'] = self.protocol_version
1114        env['SERVER_PORT'] = str(self.server.server_port)
1115        env['REQUEST_METHOD'] = self.command
1116        uqrest = urllib.parse.unquote(rest)
1117        env['PATH_INFO'] = uqrest
1118        env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1119        env['SCRIPT_NAME'] = scriptname
1120        env['QUERY_STRING'] = query
1121        env['REMOTE_ADDR'] = self.client_address[0]
1122        authorization = self.headers.get("authorization")
1123        if authorization:
1124            authorization = authorization.split()
1125            if len(authorization) == 2:
1126                import base64, binascii
1127                env['AUTH_TYPE'] = authorization[0]
1128                if authorization[0].lower() == "basic":
1129                    try:
1130                        authorization = authorization[1].encode('ascii')
1131                        authorization = base64.decodebytes(authorization).\
1132                                        decode('ascii')
1133                    except (binascii.Error, UnicodeError):
1134                        pass
1135                    else:
1136                        authorization = authorization.split(':')
1137                        if len(authorization) == 2:
1138                            env['REMOTE_USER'] = authorization[0]
1139        # XXX REMOTE_IDENT
1140        if self.headers.get('content-type') is None:
1141            env['CONTENT_TYPE'] = self.headers.get_content_type()
1142        else:
1143            env['CONTENT_TYPE'] = self.headers['content-type']
1144        length = self.headers.get('content-length')
1145        if length:
1146            env['CONTENT_LENGTH'] = length
1147        referer = self.headers.get('referer')
1148        if referer:
1149            env['HTTP_REFERER'] = referer
1150        accept = self.headers.get_all('accept', ())
1151        env['HTTP_ACCEPT'] = ','.join(accept)
1152        ua = self.headers.get('user-agent')
1153        if ua:
1154            env['HTTP_USER_AGENT'] = ua
1155        co = filter(None, self.headers.get_all('cookie', []))
1156        cookie_str = ', '.join(co)
1157        if cookie_str:
1158            env['HTTP_COOKIE'] = cookie_str
1159        # XXX Other HTTP_* headers
1160        # Since we're setting the env in the parent, provide empty
1161        # values to override previously set values
1162        for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1163                  'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1164            env.setdefault(k, "")
1165
1166        self.send_response(HTTPStatus.OK, "Script output follows")
1167        self.flush_headers()
1168
1169        decoded_query = query.replace('+', ' ')
1170
1171        if self.have_fork:
1172            # Unix -- fork as we should
1173            args = [script]
1174            if '=' not in decoded_query:
1175                args.append(decoded_query)
1176            nobody = nobody_uid()
1177            self.wfile.flush() # Always flush before forking
1178            pid = os.fork()
1179            if pid != 0:
1180                # Parent
1181                pid, sts = os.waitpid(pid, 0)
1182                # throw away additional data [see bug #427345]
1183                while select.select([self.rfile], [], [], 0)[0]:
1184                    if not self.rfile.read(1):
1185                        break
1186                exitcode = os.waitstatus_to_exitcode(sts)
1187                if exitcode:
1188                    self.log_error(f"CGI script exit code {exitcode}")
1189                return
1190            # Child
1191            try:
1192                try:
1193                    os.setuid(nobody)
1194                except OSError:
1195                    pass
1196                os.dup2(self.rfile.fileno(), 0)
1197                os.dup2(self.wfile.fileno(), 1)
1198                os.execve(scriptfile, args, env)
1199            except:
1200                self.server.handle_error(self.request, self.client_address)
1201                os._exit(127)
1202
1203        else:
1204            # Non-Unix -- use subprocess
1205            import subprocess
1206            cmdline = [scriptfile]
1207            if self.is_python(scriptfile):
1208                interp = sys.executable
1209                if interp.lower().endswith("w.exe"):
1210                    # On Windows, use python.exe, not pythonw.exe
1211                    interp = interp[:-5] + interp[-4:]
1212                cmdline = [interp, '-u'] + cmdline
1213            if '=' not in query:
1214                cmdline.append(query)
1215            self.log_message("command: %s", subprocess.list2cmdline(cmdline))
1216            try:
1217                nbytes = int(length)
1218            except (TypeError, ValueError):
1219                nbytes = 0
1220            p = subprocess.Popen(cmdline,
1221                                 stdin=subprocess.PIPE,
1222                                 stdout=subprocess.PIPE,
1223                                 stderr=subprocess.PIPE,
1224                                 env = env
1225                                 )
1226            if self.command.lower() == "post" and nbytes > 0:
1227                data = self.rfile.read(nbytes)
1228            else:
1229                data = None
1230            # throw away additional data [see bug #427345]
1231            while select.select([self.rfile._sock], [], [], 0)[0]:
1232                if not self.rfile._sock.recv(1):
1233                    break
1234            stdout, stderr = p.communicate(data)
1235            self.wfile.write(stdout)
1236            if stderr:
1237                self.log_error('%s', stderr)
1238            p.stderr.close()
1239            p.stdout.close()
1240            status = p.returncode
1241            if status:
1242                self.log_error("CGI script exit status %#x", status)
1243            else:
1244                self.log_message("CGI script exited OK")
1245
1246
1247def _get_best_family(*address):
1248    infos = socket.getaddrinfo(
1249        *address,
1250        type=socket.SOCK_STREAM,
1251        flags=socket.AI_PASSIVE,
1252    )
1253    family, type, proto, canonname, sockaddr = next(iter(infos))
1254    return family, sockaddr
1255
1256
1257def test(HandlerClass=BaseHTTPRequestHandler,
1258         ServerClass=ThreadingHTTPServer,
1259         protocol="HTTP/1.0", port=8000, bind=None):
1260    """Test the HTTP request handler class.
1261
1262    This runs an HTTP server on port 8000 (or the port argument).
1263
1264    """
1265    ServerClass.address_family, addr = _get_best_family(bind, port)
1266    HandlerClass.protocol_version = protocol
1267    with ServerClass(addr, HandlerClass) as httpd:
1268        host, port = httpd.socket.getsockname()[:2]
1269        url_host = f'[{host}]' if ':' in host else host
1270        print(
1271            f"Serving HTTP on {host} port {port} "
1272            f"(http://{url_host}:{port}/) ..."
1273        )
1274        try:
1275            httpd.serve_forever()
1276        except KeyboardInterrupt:
1277            print("\nKeyboard interrupt received, exiting.")
1278            sys.exit(0)
1279
1280if __name__ == '__main__':
1281    import argparse
1282    import contextlib
1283
1284    parser = argparse.ArgumentParser()
1285    parser.add_argument('--cgi', action='store_true',
1286                        help='run as CGI server')
1287    parser.add_argument('-b', '--bind', metavar='ADDRESS',
1288                        help='bind to this address '
1289                             '(default: all interfaces)')
1290    parser.add_argument('-d', '--directory', default=os.getcwd(),
1291                        help='serve this directory '
1292                             '(default: current directory)')
1293    parser.add_argument('-p', '--protocol', metavar='VERSION',
1294                        default='HTTP/1.0',
1295                        help='conform to this HTTP version '
1296                             '(default: %(default)s)')
1297    parser.add_argument('port', default=8000, type=int, nargs='?',
1298                        help='bind to this port '
1299                             '(default: %(default)s)')
1300    args = parser.parse_args()
1301    if args.cgi:
1302        handler_class = CGIHTTPRequestHandler
1303    else:
1304        handler_class = SimpleHTTPRequestHandler
1305
1306    # ensure dual-stack is not disabled; ref #38907
1307    class DualStackServer(ThreadingHTTPServer):
1308
1309        def server_bind(self):
1310            # suppress exception when protocol is IPv4
1311            with contextlib.suppress(Exception):
1312                self.socket.setsockopt(
1313                    socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
1314            return super().server_bind()
1315
1316        def finish_request(self, request, client_address):
1317            self.RequestHandlerClass(request, client_address, self,
1318                                     directory=args.directory)
1319
1320    test(
1321        HandlerClass=handler_class,
1322        ServerClass=DualStackServer,
1323        port=args.port,
1324        bind=args.bind,
1325        protocol=args.protocol,
1326    )
1327