• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""HTTP server classes.
2
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and CGIHTTPRequestHandler for CGI scripts.
6
7It does, however, optionally implement HTTP/1.1 persistent connections,
8as of version 0.3.
9
10Notes on CGIHTTPRequestHandler
11------------------------------
12
13This class implements GET and POST requests to cgi-bin scripts.
14
15If the os.fork() function is not present (e.g. on Windows),
16subprocess.Popen() is used as a fallback, with slightly altered semantics.
17
18In all cases, the implementation is intentionally naive -- all
19requests are executed synchronously.
20
21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22-- it may execute arbitrary Python code or external programs.
23
24Note that status code 200 is sent prior to execution of a CGI script, so
25scripts cannot send other status codes such as 302 (redirect).
26
27XXX To do:
28
29- log requests even later (to capture byte count)
30- log user-agent header and other interesting goodies
31- send error log to separate file
32"""
33
34
35# See also:
36#
37# HTTP Working Group                                        T. Berners-Lee
38# INTERNET-DRAFT                                            R. T. Fielding
39# <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
40# Expires September 8, 1995                                  March 8, 1995
41#
42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43#
44# and
45#
46# Network Working Group                                      R. Fielding
47# Request for Comments: 2616                                       et al
48# Obsoletes: 2068                                              June 1999
49# Category: Standards Track
50#
51# URL: http://www.faqs.org/rfcs/rfc2616.html
52
53# Log files
54# ---------
55#
56# Here's a quote from the NCSA httpd docs about log file format.
57#
58# | The logfile format is as follows. Each line consists of:
59# |
60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61# |
62# |        host: Either the DNS name or the IP number of the remote client
63# |        rfc931: Any information returned by identd for this person,
64# |                - otherwise.
65# |        authuser: If user sent a userid for authentication, the user name,
66# |                  - otherwise.
67# |        DD: Day
68# |        Mon: Month (calendar name)
69# |        YYYY: Year
70# |        hh: hour (24-hour format, the machine's timezone)
71# |        mm: minutes
72# |        ss: seconds
73# |        request: The first line of the HTTP request as sent by the client.
74# |        ddd: the status code returned by the server, - if not available.
75# |        bbbb: the total number of bytes sent,
76# |              *not including the HTTP/1.0 header*, - if not available
77# |
78# | You can determine the name of the file accessed through request.
79#
80# (Actually, the latter is only true if you know the server configuration
81# at the time the request was made!)
82
83__version__ = "0.6"
84
85__all__ = [
86    "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",
87    "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
88]
89
90import copy
91import datetime
92import email.utils
93import html
94import http.client
95import io
96import mimetypes
97import os
98import posixpath
99import select
100import shutil
101import socket # For gethostbyaddr()
102import socketserver
103import sys
104import time
105import urllib.parse
106import contextlib
107from functools import partial
108
109from http import HTTPStatus
110
111
112# Default error message template
113DEFAULT_ERROR_MESSAGE = """\
114<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
115        "http://www.w3.org/TR/html4/strict.dtd">
116<html>
117    <head>
118        <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
119        <title>Error response</title>
120    </head>
121    <body>
122        <h1>Error response</h1>
123        <p>Error code: %(code)d</p>
124        <p>Message: %(message)s.</p>
125        <p>Error code explanation: %(code)s - %(explain)s.</p>
126    </body>
127</html>
128"""
129
130DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
131
132class HTTPServer(socketserver.TCPServer):
133
134    allow_reuse_address = 1    # Seems to make sense in testing environment
135
136    def server_bind(self):
137        """Override server_bind to store the server name."""
138        socketserver.TCPServer.server_bind(self)
139        host, port = self.server_address[:2]
140        self.server_name = socket.getfqdn(host)
141        self.server_port = port
142
143
144class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer):
145    daemon_threads = True
146
147
148class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
149
150    """HTTP request handler base class.
151
152    The following explanation of HTTP serves to guide you through the
153    code as well as to expose any misunderstandings I may have about
154    HTTP (so you don't need to read the code to figure out I'm wrong
155    :-).
156
157    HTTP (HyperText Transfer Protocol) is an extensible protocol on
158    top of a reliable stream transport (e.g. TCP/IP).  The protocol
159    recognizes three parts to a request:
160
161    1. One line identifying the request type and path
162    2. An optional set of RFC-822-style headers
163    3. An optional data part
164
165    The headers and data are separated by a blank line.
166
167    The first line of the request has the form
168
169    <command> <path> <version>
170
171    where <command> is a (case-sensitive) keyword such as GET or POST,
172    <path> is a string containing path information for the request,
173    and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
174    <path> is encoded using the URL encoding scheme (using %xx to signify
175    the ASCII character with hex code xx).
176
177    The specification specifies that lines are separated by CRLF but
178    for compatibility with the widest range of clients recommends
179    servers also handle LF.  Similarly, whitespace in the request line
180    is treated sensibly (allowing multiple spaces between components
181    and allowing trailing whitespace).
182
183    Similarly, for output, lines ought to be separated by CRLF pairs
184    but most clients grok LF characters just fine.
185
186    If the first line of the request has the form
187
188    <command> <path>
189
190    (i.e. <version> is left out) then this is assumed to be an HTTP
191    0.9 request; this form has no optional headers and data part and
192    the reply consists of just the data.
193
194    The reply form of the HTTP 1.x protocol again has three parts:
195
196    1. One line giving the response code
197    2. An optional set of RFC-822-style headers
198    3. The data
199
200    Again, the headers and data are separated by a blank line.
201
202    The response code line has the form
203
204    <version> <responsecode> <responsestring>
205
206    where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
207    <responsecode> is a 3-digit response code indicating success or
208    failure of the request, and <responsestring> is an optional
209    human-readable string explaining what the response code means.
210
211    This server parses the request and the headers, and then calls a
212    function specific to the request type (<command>).  Specifically,
213    a request SPAM will be handled by a method do_SPAM().  If no
214    such method exists the server sends an error response to the
215    client.  If it exists, it is called with no arguments:
216
217    do_SPAM()
218
219    Note that the request name is case sensitive (i.e. SPAM and spam
220    are different requests).
221
222    The various request details are stored in instance variables:
223
224    - client_address is the client IP address in the form (host,
225    port);
226
227    - command, path and version are the broken-down request line;
228
229    - headers is an instance of email.message.Message (or a derived
230    class) containing the header information;
231
232    - rfile is a file object open for reading positioned at the
233    start of the optional input data part;
234
235    - wfile is a file object open for writing.
236
237    IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
238
239    The first thing to be written must be the response line.  Then
240    follow 0 or more header lines, then a blank line, and then the
241    actual data (if any).  The meaning of the header lines depends on
242    the command executed by the server; in most cases, when data is
243    returned, there should be at least one header line of the form
244
245    Content-type: <type>/<subtype>
246
247    where <type> and <subtype> should be registered MIME types,
248    e.g. "text/html" or "text/plain".
249
250    """
251
252    # The Python system version, truncated to its first component.
253    sys_version = "Python/" + sys.version.split()[0]
254
255    # The server software version.  You may want to override this.
256    # The format is multiple whitespace-separated strings,
257    # where each string is of the form name[/version].
258    server_version = "BaseHTTP/" + __version__
259
260    error_message_format = DEFAULT_ERROR_MESSAGE
261    error_content_type = DEFAULT_ERROR_CONTENT_TYPE
262
263    # The default request version.  This only affects responses up until
264    # the point where the request line is parsed, so it mainly decides what
265    # the client gets back when sending a malformed request line.
266    # Most web servers default to HTTP 0.9, i.e. don't send a status line.
267    default_request_version = "HTTP/0.9"
268
269    def parse_request(self):
270        """Parse a request (internal).
271
272        The request should be stored in self.raw_requestline; the results
273        are in self.command, self.path, self.request_version and
274        self.headers.
275
276        Return True for success, False for failure; on failure, any relevant
277        error response has already been sent back.
278
279        """
280        self.command = None  # set in case of error on the first line
281        self.request_version = version = self.default_request_version
282        self.close_connection = True
283        requestline = str(self.raw_requestline, 'iso-8859-1')
284        requestline = requestline.rstrip('\r\n')
285        self.requestline = requestline
286        words = requestline.split()
287        if len(words) == 0:
288            return False
289
290        if len(words) >= 3:  # Enough to determine protocol version
291            version = words[-1]
292            try:
293                if not version.startswith('HTTP/'):
294                    raise ValueError
295                base_version_number = version.split('/', 1)[1]
296                version_number = base_version_number.split(".")
297                # RFC 2145 section 3.1 says there can be only one "." and
298                #   - major and minor numbers MUST be treated as
299                #      separate integers;
300                #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
301                #      turn is lower than HTTP/12.3;
302                #   - Leading zeros MUST be ignored by recipients.
303                if len(version_number) != 2:
304                    raise ValueError
305                version_number = int(version_number[0]), int(version_number[1])
306            except (ValueError, IndexError):
307                self.send_error(
308                    HTTPStatus.BAD_REQUEST,
309                    "Bad request version (%r)" % version)
310                return False
311            if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
312                self.close_connection = False
313            if version_number >= (2, 0):
314                self.send_error(
315                    HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
316                    "Invalid HTTP version (%s)" % base_version_number)
317                return False
318            self.request_version = version
319
320        if not 2 <= len(words) <= 3:
321            self.send_error(
322                HTTPStatus.BAD_REQUEST,
323                "Bad request syntax (%r)" % requestline)
324            return False
325        command, path = words[:2]
326        if len(words) == 2:
327            self.close_connection = True
328            if command != 'GET':
329                self.send_error(
330                    HTTPStatus.BAD_REQUEST,
331                    "Bad HTTP/0.9 request type (%r)" % command)
332                return False
333        self.command, self.path = command, path
334
335        # Examine the headers and look for a Connection directive.
336        try:
337            self.headers = http.client.parse_headers(self.rfile,
338                                                     _class=self.MessageClass)
339        except http.client.LineTooLong as err:
340            self.send_error(
341                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
342                "Line too long",
343                str(err))
344            return False
345        except http.client.HTTPException as err:
346            self.send_error(
347                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
348                "Too many headers",
349                str(err)
350            )
351            return False
352
353        conntype = self.headers.get('Connection', "")
354        if conntype.lower() == 'close':
355            self.close_connection = True
356        elif (conntype.lower() == 'keep-alive' and
357              self.protocol_version >= "HTTP/1.1"):
358            self.close_connection = False
359        # Examine the headers and look for an Expect directive
360        expect = self.headers.get('Expect', "")
361        if (expect.lower() == "100-continue" and
362                self.protocol_version >= "HTTP/1.1" and
363                self.request_version >= "HTTP/1.1"):
364            if not self.handle_expect_100():
365                return False
366        return True
367
368    def handle_expect_100(self):
369        """Decide what to do with an "Expect: 100-continue" header.
370
371        If the client is expecting a 100 Continue response, we must
372        respond with either a 100 Continue or a final response before
373        waiting for the request body. The default is to always respond
374        with a 100 Continue. You can behave differently (for example,
375        reject unauthorized requests) by overriding this method.
376
377        This method should either return True (possibly after sending
378        a 100 Continue response) or send an error response and return
379        False.
380
381        """
382        self.send_response_only(HTTPStatus.CONTINUE)
383        self.end_headers()
384        return True
385
386    def handle_one_request(self):
387        """Handle a single HTTP request.
388
389        You normally don't need to override this method; see the class
390        __doc__ string for information on how to handle specific HTTP
391        commands such as GET and POST.
392
393        """
394        try:
395            self.raw_requestline = self.rfile.readline(65537)
396            if len(self.raw_requestline) > 65536:
397                self.requestline = ''
398                self.request_version = ''
399                self.command = ''
400                self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
401                return
402            if not self.raw_requestline:
403                self.close_connection = True
404                return
405            if not self.parse_request():
406                # An error code has been sent, just exit
407                return
408            mname = 'do_' + self.command
409            if not hasattr(self, mname):
410                self.send_error(
411                    HTTPStatus.NOT_IMPLEMENTED,
412                    "Unsupported method (%r)" % self.command)
413                return
414            method = getattr(self, mname)
415            method()
416            self.wfile.flush() #actually send the response if not already done.
417        except TimeoutError as e:
418            #a read or a write timed out.  Discard this connection
419            self.log_error("Request timed out: %r", e)
420            self.close_connection = True
421            return
422
423    def handle(self):
424        """Handle multiple requests if necessary."""
425        self.close_connection = True
426
427        self.handle_one_request()
428        while not self.close_connection:
429            self.handle_one_request()
430
431    def send_error(self, code, message=None, explain=None):
432        """Send and log an error reply.
433
434        Arguments are
435        * code:    an HTTP error code
436                   3 digits
437        * message: a simple optional 1 line reason phrase.
438                   *( HTAB / SP / VCHAR / %x80-FF )
439                   defaults to short entry matching the response code
440        * explain: a detailed message defaults to the long entry
441                   matching the response code.
442
443        This sends an error response (so it must be called before any
444        output has been generated), logs the error, and finally sends
445        a piece of HTML explaining the error to the user.
446
447        """
448
449        try:
450            shortmsg, longmsg = self.responses[code]
451        except KeyError:
452            shortmsg, longmsg = '???', '???'
453        if message is None:
454            message = shortmsg
455        if explain is None:
456            explain = longmsg
457        self.log_error("code %d, message %s", code, message)
458        self.send_response(code, message)
459        self.send_header('Connection', 'close')
460
461        # Message body is omitted for cases described in:
462        #  - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
463        #  - RFC7231: 6.3.6. 205(Reset Content)
464        body = None
465        if (code >= 200 and
466            code not in (HTTPStatus.NO_CONTENT,
467                         HTTPStatus.RESET_CONTENT,
468                         HTTPStatus.NOT_MODIFIED)):
469            # HTML encode to prevent Cross Site Scripting attacks
470            # (see bug #1100201)
471            content = (self.error_message_format % {
472                'code': code,
473                'message': html.escape(message, quote=False),
474                'explain': html.escape(explain, quote=False)
475            })
476            body = content.encode('UTF-8', 'replace')
477            self.send_header("Content-Type", self.error_content_type)
478            self.send_header('Content-Length', str(len(body)))
479        self.end_headers()
480
481        if self.command != 'HEAD' and body:
482            self.wfile.write(body)
483
484    def send_response(self, code, message=None):
485        """Add the response header to the headers buffer and log the
486        response code.
487
488        Also send two standard headers with the server software
489        version and the current date.
490
491        """
492        self.log_request(code)
493        self.send_response_only(code, message)
494        self.send_header('Server', self.version_string())
495        self.send_header('Date', self.date_time_string())
496
497    def send_response_only(self, code, message=None):
498        """Send the response header only."""
499        if self.request_version != 'HTTP/0.9':
500            if message is None:
501                if code in self.responses:
502                    message = self.responses[code][0]
503                else:
504                    message = ''
505            if not hasattr(self, '_headers_buffer'):
506                self._headers_buffer = []
507            self._headers_buffer.append(("%s %d %s\r\n" %
508                    (self.protocol_version, code, message)).encode(
509                        'latin-1', 'strict'))
510
511    def send_header(self, keyword, value):
512        """Send a MIME header to the headers buffer."""
513        if self.request_version != 'HTTP/0.9':
514            if not hasattr(self, '_headers_buffer'):
515                self._headers_buffer = []
516            self._headers_buffer.append(
517                ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
518
519        if keyword.lower() == 'connection':
520            if value.lower() == 'close':
521                self.close_connection = True
522            elif value.lower() == 'keep-alive':
523                self.close_connection = False
524
525    def end_headers(self):
526        """Send the blank line ending the MIME headers."""
527        if self.request_version != 'HTTP/0.9':
528            self._headers_buffer.append(b"\r\n")
529            self.flush_headers()
530
531    def flush_headers(self):
532        if hasattr(self, '_headers_buffer'):
533            self.wfile.write(b"".join(self._headers_buffer))
534            self._headers_buffer = []
535
536    def log_request(self, code='-', size='-'):
537        """Log an accepted request.
538
539        This is called by send_response().
540
541        """
542        if isinstance(code, HTTPStatus):
543            code = code.value
544        self.log_message('"%s" %s %s',
545                         self.requestline, str(code), str(size))
546
547    def log_error(self, format, *args):
548        """Log an error.
549
550        This is called when a request cannot be fulfilled.  By
551        default it passes the message on to log_message().
552
553        Arguments are the same as for log_message().
554
555        XXX This should go to the separate error log.
556
557        """
558
559        self.log_message(format, *args)
560
561    def log_message(self, format, *args):
562        """Log an arbitrary message.
563
564        This is used by all other logging functions.  Override
565        it if you have specific logging wishes.
566
567        The first argument, FORMAT, is a format string for the
568        message to be logged.  If the format string contains
569        any % escapes requiring parameters, they should be
570        specified as subsequent arguments (it's just like
571        printf!).
572
573        The client ip and current date/time are prefixed to
574        every message.
575
576        """
577
578        sys.stderr.write("%s - - [%s] %s\n" %
579                         (self.address_string(),
580                          self.log_date_time_string(),
581                          format%args))
582
583    def version_string(self):
584        """Return the server software version string."""
585        return self.server_version + ' ' + self.sys_version
586
587    def date_time_string(self, timestamp=None):
588        """Return the current date and time formatted for a message header."""
589        if timestamp is None:
590            timestamp = time.time()
591        return email.utils.formatdate(timestamp, usegmt=True)
592
593    def log_date_time_string(self):
594        """Return the current time formatted for logging."""
595        now = time.time()
596        year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
597        s = "%02d/%3s/%04d %02d:%02d:%02d" % (
598                day, self.monthname[month], year, hh, mm, ss)
599        return s
600
601    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
602
603    monthname = [None,
604                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
605                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
606
607    def address_string(self):
608        """Return the client address."""
609
610        return self.client_address[0]
611
612    # Essentially static class variables
613
614    # The version of the HTTP protocol we support.
615    # Set this to HTTP/1.1 to enable automatic keepalive
616    protocol_version = "HTTP/1.0"
617
618    # MessageClass used to parse headers
619    MessageClass = http.client.HTTPMessage
620
621    # hack to maintain backwards compatibility
622    responses = {
623        v: (v.phrase, v.description)
624        for v in HTTPStatus.__members__.values()
625    }
626
627
628class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
629
630    """Simple HTTP request handler with GET and HEAD commands.
631
632    This serves files from the current directory and any of its
633    subdirectories.  The MIME type for files is determined by
634    calling the .guess_type() method.
635
636    The GET and HEAD requests are identical except that the HEAD
637    request omits the actual contents of the file.
638
639    """
640
641    server_version = "SimpleHTTP/" + __version__
642    extensions_map = _encodings_map_default = {
643        '.gz': 'application/gzip',
644        '.Z': 'application/octet-stream',
645        '.bz2': 'application/x-bzip2',
646        '.xz': 'application/x-xz',
647    }
648
649    def __init__(self, *args, directory=None, **kwargs):
650        if directory is None:
651            directory = os.getcwd()
652        self.directory = os.fspath(directory)
653        super().__init__(*args, **kwargs)
654
655    def do_GET(self):
656        """Serve a GET request."""
657        f = self.send_head()
658        if f:
659            try:
660                self.copyfile(f, self.wfile)
661            finally:
662                f.close()
663
664    def do_HEAD(self):
665        """Serve a HEAD request."""
666        f = self.send_head()
667        if f:
668            f.close()
669
670    def send_head(self):
671        """Common code for GET and HEAD commands.
672
673        This sends the response code and MIME headers.
674
675        Return value is either a file object (which has to be copied
676        to the outputfile by the caller unless the command was HEAD,
677        and must be closed by the caller under all circumstances), or
678        None, in which case the caller has nothing further to do.
679
680        """
681        path = self.translate_path(self.path)
682        f = None
683        if os.path.isdir(path):
684            parts = urllib.parse.urlsplit(self.path)
685            if not parts.path.endswith('/'):
686                # redirect browser - doing basically what apache does
687                self.send_response(HTTPStatus.MOVED_PERMANENTLY)
688                new_parts = (parts[0], parts[1], parts[2] + '/',
689                             parts[3], parts[4])
690                new_url = urllib.parse.urlunsplit(new_parts)
691                self.send_header("Location", new_url)
692                self.send_header("Content-Length", "0")
693                self.end_headers()
694                return None
695            for index in "index.html", "index.htm":
696                index = os.path.join(path, index)
697                if os.path.exists(index):
698                    path = index
699                    break
700            else:
701                return self.list_directory(path)
702        ctype = self.guess_type(path)
703        # check for trailing "/" which should return 404. See Issue17324
704        # The test for this was added in test_httpserver.py
705        # However, some OS platforms accept a trailingSlash as a filename
706        # See discussion on python-dev and Issue34711 regarding
707        # parseing and rejection of filenames with a trailing slash
708        if path.endswith("/"):
709            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
710            return None
711        try:
712            f = open(path, 'rb')
713        except OSError:
714            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
715            return None
716
717        try:
718            fs = os.fstat(f.fileno())
719            # Use browser cache if possible
720            if ("If-Modified-Since" in self.headers
721                    and "If-None-Match" not in self.headers):
722                # compare If-Modified-Since and time of last file modification
723                try:
724                    ims = email.utils.parsedate_to_datetime(
725                        self.headers["If-Modified-Since"])
726                except (TypeError, IndexError, OverflowError, ValueError):
727                    # ignore ill-formed values
728                    pass
729                else:
730                    if ims.tzinfo is None:
731                        # obsolete format with no timezone, cf.
732                        # https://tools.ietf.org/html/rfc7231#section-7.1.1.1
733                        ims = ims.replace(tzinfo=datetime.timezone.utc)
734                    if ims.tzinfo is datetime.timezone.utc:
735                        # compare to UTC datetime of last modification
736                        last_modif = datetime.datetime.fromtimestamp(
737                            fs.st_mtime, datetime.timezone.utc)
738                        # remove microseconds, like in If-Modified-Since
739                        last_modif = last_modif.replace(microsecond=0)
740
741                        if last_modif <= ims:
742                            self.send_response(HTTPStatus.NOT_MODIFIED)
743                            self.end_headers()
744                            f.close()
745                            return None
746
747            self.send_response(HTTPStatus.OK)
748            self.send_header("Content-type", ctype)
749            self.send_header("Content-Length", str(fs[6]))
750            self.send_header("Last-Modified",
751                self.date_time_string(fs.st_mtime))
752            self.end_headers()
753            return f
754        except:
755            f.close()
756            raise
757
758    def list_directory(self, path):
759        """Helper to produce a directory listing (absent index.html).
760
761        Return value is either a file object, or None (indicating an
762        error).  In either case, the headers are sent, making the
763        interface the same as for send_head().
764
765        """
766        try:
767            list = os.listdir(path)
768        except OSError:
769            self.send_error(
770                HTTPStatus.NOT_FOUND,
771                "No permission to list directory")
772            return None
773        list.sort(key=lambda a: a.lower())
774        r = []
775        try:
776            displaypath = urllib.parse.unquote(self.path,
777                                               errors='surrogatepass')
778        except UnicodeDecodeError:
779            displaypath = urllib.parse.unquote(path)
780        displaypath = html.escape(displaypath, quote=False)
781        enc = sys.getfilesystemencoding()
782        title = 'Directory listing for %s' % displaypath
783        r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
784                 '"http://www.w3.org/TR/html4/strict.dtd">')
785        r.append('<html>\n<head>')
786        r.append('<meta http-equiv="Content-Type" '
787                 'content="text/html; charset=%s">' % enc)
788        r.append('<title>%s</title>\n</head>' % title)
789        r.append('<body>\n<h1>%s</h1>' % title)
790        r.append('<hr>\n<ul>')
791        for name in list:
792            fullname = os.path.join(path, name)
793            displayname = linkname = name
794            # Append / for directories or @ for symbolic links
795            if os.path.isdir(fullname):
796                displayname = name + "/"
797                linkname = name + "/"
798            if os.path.islink(fullname):
799                displayname = name + "@"
800                # Note: a link to a directory displays with @ and links with /
801            r.append('<li><a href="%s">%s</a></li>'
802                    % (urllib.parse.quote(linkname,
803                                          errors='surrogatepass'),
804                       html.escape(displayname, quote=False)))
805        r.append('</ul>\n<hr>\n</body>\n</html>\n')
806        encoded = '\n'.join(r).encode(enc, 'surrogateescape')
807        f = io.BytesIO()
808        f.write(encoded)
809        f.seek(0)
810        self.send_response(HTTPStatus.OK)
811        self.send_header("Content-type", "text/html; charset=%s" % enc)
812        self.send_header("Content-Length", str(len(encoded)))
813        self.end_headers()
814        return f
815
816    def translate_path(self, path):
817        """Translate a /-separated PATH to the local filename syntax.
818
819        Components that mean special things to the local file system
820        (e.g. drive or directory names) are ignored.  (XXX They should
821        probably be diagnosed.)
822
823        """
824        # abandon query parameters
825        path = path.split('?',1)[0]
826        path = path.split('#',1)[0]
827        # Don't forget explicit trailing slash when normalizing. Issue17324
828        trailing_slash = path.rstrip().endswith('/')
829        try:
830            path = urllib.parse.unquote(path, errors='surrogatepass')
831        except UnicodeDecodeError:
832            path = urllib.parse.unquote(path)
833        path = posixpath.normpath(path)
834        words = path.split('/')
835        words = filter(None, words)
836        path = self.directory
837        for word in words:
838            if os.path.dirname(word) or word in (os.curdir, os.pardir):
839                # Ignore components that are not a simple file/directory name
840                continue
841            path = os.path.join(path, word)
842        if trailing_slash:
843            path += '/'
844        return path
845
846    def copyfile(self, source, outputfile):
847        """Copy all data between two file objects.
848
849        The SOURCE argument is a file object open for reading
850        (or anything with a read() method) and the DESTINATION
851        argument is a file object open for writing (or
852        anything with a write() method).
853
854        The only reason for overriding this would be to change
855        the block size or perhaps to replace newlines by CRLF
856        -- note however that this the default server uses this
857        to copy binary data as well.
858
859        """
860        shutil.copyfileobj(source, outputfile)
861
862    def guess_type(self, path):
863        """Guess the type of a file.
864
865        Argument is a PATH (a filename).
866
867        Return value is a string of the form type/subtype,
868        usable for a MIME Content-type header.
869
870        The default implementation looks the file's extension
871        up in the table self.extensions_map, using application/octet-stream
872        as a default; however it would be permissible (if
873        slow) to look inside the data to make a better guess.
874
875        """
876        base, ext = posixpath.splitext(path)
877        if ext in self.extensions_map:
878            return self.extensions_map[ext]
879        ext = ext.lower()
880        if ext in self.extensions_map:
881            return self.extensions_map[ext]
882        guess, _ = mimetypes.guess_type(path)
883        if guess:
884            return guess
885        return 'application/octet-stream'
886
887
888# Utilities for CGIHTTPRequestHandler
889
890def _url_collapse_path(path):
891    """
892    Given a URL path, remove extra '/'s and '.' path elements and collapse
893    any '..' references and returns a collapsed path.
894
895    Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
896    The utility of this function is limited to is_cgi method and helps
897    preventing some security attacks.
898
899    Returns: The reconstituted URL, which will always start with a '/'.
900
901    Raises: IndexError if too many '..' occur within the path.
902
903    """
904    # Query component should not be involved.
905    path, _, query = path.partition('?')
906    path = urllib.parse.unquote(path)
907
908    # Similar to os.path.split(os.path.normpath(path)) but specific to URL
909    # path semantics rather than local operating system semantics.
910    path_parts = path.split('/')
911    head_parts = []
912    for part in path_parts[:-1]:
913        if part == '..':
914            head_parts.pop() # IndexError if more '..' than prior parts
915        elif part and part != '.':
916            head_parts.append( part )
917    if path_parts:
918        tail_part = path_parts.pop()
919        if tail_part:
920            if tail_part == '..':
921                head_parts.pop()
922                tail_part = ''
923            elif tail_part == '.':
924                tail_part = ''
925    else:
926        tail_part = ''
927
928    if query:
929        tail_part = '?'.join((tail_part, query))
930
931    splitpath = ('/' + '/'.join(head_parts), tail_part)
932    collapsed_path = "/".join(splitpath)
933
934    return collapsed_path
935
936
937
938nobody = None
939
940def nobody_uid():
941    """Internal routine to get nobody's uid"""
942    global nobody
943    if nobody:
944        return nobody
945    try:
946        import pwd
947    except ImportError:
948        return -1
949    try:
950        nobody = pwd.getpwnam('nobody')[2]
951    except KeyError:
952        nobody = 1 + max(x[2] for x in pwd.getpwall())
953    return nobody
954
955
956def executable(path):
957    """Test for executable file."""
958    return os.access(path, os.X_OK)
959
960
961class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
962
963    """Complete HTTP server with GET, HEAD and POST commands.
964
965    GET and HEAD also support running CGI scripts.
966
967    The POST command is *only* implemented for CGI scripts.
968
969    """
970
971    # Determine platform specifics
972    have_fork = hasattr(os, 'fork')
973
974    # Make rfile unbuffered -- we need to read one line and then pass
975    # the rest to a subprocess, so we can't use buffered input.
976    rbufsize = 0
977
978    def do_POST(self):
979        """Serve a POST request.
980
981        This is only implemented for CGI scripts.
982
983        """
984
985        if self.is_cgi():
986            self.run_cgi()
987        else:
988            self.send_error(
989                HTTPStatus.NOT_IMPLEMENTED,
990                "Can only POST to CGI scripts")
991
992    def send_head(self):
993        """Version of send_head that support CGI scripts"""
994        if self.is_cgi():
995            return self.run_cgi()
996        else:
997            return SimpleHTTPRequestHandler.send_head(self)
998
999    def is_cgi(self):
1000        """Test whether self.path corresponds to a CGI script.
1001
1002        Returns True and updates the cgi_info attribute to the tuple
1003        (dir, rest) if self.path requires running a CGI script.
1004        Returns False otherwise.
1005
1006        If any exception is raised, the caller should assume that
1007        self.path was rejected as invalid and act accordingly.
1008
1009        The default implementation tests whether the normalized url
1010        path begins with one of the strings in self.cgi_directories
1011        (and the next character is a '/' or the end of the string).
1012
1013        """
1014        collapsed_path = _url_collapse_path(self.path)
1015        dir_sep = collapsed_path.find('/', 1)
1016        while dir_sep > 0 and not collapsed_path[:dir_sep] in self.cgi_directories:
1017            dir_sep = collapsed_path.find('/', dir_sep+1)
1018        if dir_sep > 0:
1019            head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
1020            self.cgi_info = head, tail
1021            return True
1022        return False
1023
1024
1025    cgi_directories = ['/cgi-bin', '/htbin']
1026
1027    def is_executable(self, path):
1028        """Test whether argument path is an executable file."""
1029        return executable(path)
1030
1031    def is_python(self, path):
1032        """Test whether argument path is a Python script."""
1033        head, tail = os.path.splitext(path)
1034        return tail.lower() in (".py", ".pyw")
1035
1036    def run_cgi(self):
1037        """Execute a CGI script."""
1038        dir, rest = self.cgi_info
1039        path = dir + '/' + rest
1040        i = path.find('/', len(dir)+1)
1041        while i >= 0:
1042            nextdir = path[:i]
1043            nextrest = path[i+1:]
1044
1045            scriptdir = self.translate_path(nextdir)
1046            if os.path.isdir(scriptdir):
1047                dir, rest = nextdir, nextrest
1048                i = path.find('/', len(dir)+1)
1049            else:
1050                break
1051
1052        # find an explicit query string, if present.
1053        rest, _, query = rest.partition('?')
1054
1055        # dissect the part after the directory name into a script name &
1056        # a possible additional path, to be stored in PATH_INFO.
1057        i = rest.find('/')
1058        if i >= 0:
1059            script, rest = rest[:i], rest[i:]
1060        else:
1061            script, rest = rest, ''
1062
1063        scriptname = dir + '/' + script
1064        scriptfile = self.translate_path(scriptname)
1065        if not os.path.exists(scriptfile):
1066            self.send_error(
1067                HTTPStatus.NOT_FOUND,
1068                "No such CGI script (%r)" % scriptname)
1069            return
1070        if not os.path.isfile(scriptfile):
1071            self.send_error(
1072                HTTPStatus.FORBIDDEN,
1073                "CGI script is not a plain file (%r)" % scriptname)
1074            return
1075        ispy = self.is_python(scriptname)
1076        if self.have_fork or not ispy:
1077            if not self.is_executable(scriptfile):
1078                self.send_error(
1079                    HTTPStatus.FORBIDDEN,
1080                    "CGI script is not executable (%r)" % scriptname)
1081                return
1082
1083        # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1084        # XXX Much of the following could be prepared ahead of time!
1085        env = copy.deepcopy(os.environ)
1086        env['SERVER_SOFTWARE'] = self.version_string()
1087        env['SERVER_NAME'] = self.server.server_name
1088        env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1089        env['SERVER_PROTOCOL'] = self.protocol_version
1090        env['SERVER_PORT'] = str(self.server.server_port)
1091        env['REQUEST_METHOD'] = self.command
1092        uqrest = urllib.parse.unquote(rest)
1093        env['PATH_INFO'] = uqrest
1094        env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1095        env['SCRIPT_NAME'] = scriptname
1096        env['QUERY_STRING'] = query
1097        env['REMOTE_ADDR'] = self.client_address[0]
1098        authorization = self.headers.get("authorization")
1099        if authorization:
1100            authorization = authorization.split()
1101            if len(authorization) == 2:
1102                import base64, binascii
1103                env['AUTH_TYPE'] = authorization[0]
1104                if authorization[0].lower() == "basic":
1105                    try:
1106                        authorization = authorization[1].encode('ascii')
1107                        authorization = base64.decodebytes(authorization).\
1108                                        decode('ascii')
1109                    except (binascii.Error, UnicodeError):
1110                        pass
1111                    else:
1112                        authorization = authorization.split(':')
1113                        if len(authorization) == 2:
1114                            env['REMOTE_USER'] = authorization[0]
1115        # XXX REMOTE_IDENT
1116        if self.headers.get('content-type') is None:
1117            env['CONTENT_TYPE'] = self.headers.get_content_type()
1118        else:
1119            env['CONTENT_TYPE'] = self.headers['content-type']
1120        length = self.headers.get('content-length')
1121        if length:
1122            env['CONTENT_LENGTH'] = length
1123        referer = self.headers.get('referer')
1124        if referer:
1125            env['HTTP_REFERER'] = referer
1126        accept = self.headers.get_all('accept', ())
1127        env['HTTP_ACCEPT'] = ','.join(accept)
1128        ua = self.headers.get('user-agent')
1129        if ua:
1130            env['HTTP_USER_AGENT'] = ua
1131        co = filter(None, self.headers.get_all('cookie', []))
1132        cookie_str = ', '.join(co)
1133        if cookie_str:
1134            env['HTTP_COOKIE'] = cookie_str
1135        # XXX Other HTTP_* headers
1136        # Since we're setting the env in the parent, provide empty
1137        # values to override previously set values
1138        for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1139                  'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1140            env.setdefault(k, "")
1141
1142        self.send_response(HTTPStatus.OK, "Script output follows")
1143        self.flush_headers()
1144
1145        decoded_query = query.replace('+', ' ')
1146
1147        if self.have_fork:
1148            # Unix -- fork as we should
1149            args = [script]
1150            if '=' not in decoded_query:
1151                args.append(decoded_query)
1152            nobody = nobody_uid()
1153            self.wfile.flush() # Always flush before forking
1154            pid = os.fork()
1155            if pid != 0:
1156                # Parent
1157                pid, sts = os.waitpid(pid, 0)
1158                # throw away additional data [see bug #427345]
1159                while select.select([self.rfile], [], [], 0)[0]:
1160                    if not self.rfile.read(1):
1161                        break
1162                exitcode = os.waitstatus_to_exitcode(sts)
1163                if exitcode:
1164                    self.log_error(f"CGI script exit code {exitcode}")
1165                return
1166            # Child
1167            try:
1168                try:
1169                    os.setuid(nobody)
1170                except OSError:
1171                    pass
1172                os.dup2(self.rfile.fileno(), 0)
1173                os.dup2(self.wfile.fileno(), 1)
1174                os.execve(scriptfile, args, env)
1175            except:
1176                self.server.handle_error(self.request, self.client_address)
1177                os._exit(127)
1178
1179        else:
1180            # Non-Unix -- use subprocess
1181            import subprocess
1182            cmdline = [scriptfile]
1183            if self.is_python(scriptfile):
1184                interp = sys.executable
1185                if interp.lower().endswith("w.exe"):
1186                    # On Windows, use python.exe, not pythonw.exe
1187                    interp = interp[:-5] + interp[-4:]
1188                cmdline = [interp, '-u'] + cmdline
1189            if '=' not in query:
1190                cmdline.append(query)
1191            self.log_message("command: %s", subprocess.list2cmdline(cmdline))
1192            try:
1193                nbytes = int(length)
1194            except (TypeError, ValueError):
1195                nbytes = 0
1196            p = subprocess.Popen(cmdline,
1197                                 stdin=subprocess.PIPE,
1198                                 stdout=subprocess.PIPE,
1199                                 stderr=subprocess.PIPE,
1200                                 env = env
1201                                 )
1202            if self.command.lower() == "post" and nbytes > 0:
1203                data = self.rfile.read(nbytes)
1204            else:
1205                data = None
1206            # throw away additional data [see bug #427345]
1207            while select.select([self.rfile._sock], [], [], 0)[0]:
1208                if not self.rfile._sock.recv(1):
1209                    break
1210            stdout, stderr = p.communicate(data)
1211            self.wfile.write(stdout)
1212            if stderr:
1213                self.log_error('%s', stderr)
1214            p.stderr.close()
1215            p.stdout.close()
1216            status = p.returncode
1217            if status:
1218                self.log_error("CGI script exit status %#x", status)
1219            else:
1220                self.log_message("CGI script exited OK")
1221
1222
1223def _get_best_family(*address):
1224    infos = socket.getaddrinfo(
1225        *address,
1226        type=socket.SOCK_STREAM,
1227        flags=socket.AI_PASSIVE,
1228    )
1229    family, type, proto, canonname, sockaddr = next(iter(infos))
1230    return family, sockaddr
1231
1232
1233def test(HandlerClass=BaseHTTPRequestHandler,
1234         ServerClass=ThreadingHTTPServer,
1235         protocol="HTTP/1.0", port=8000, bind=None):
1236    """Test the HTTP request handler class.
1237
1238    This runs an HTTP server on port 8000 (or the port argument).
1239
1240    """
1241    ServerClass.address_family, addr = _get_best_family(bind, port)
1242
1243    HandlerClass.protocol_version = protocol
1244    with ServerClass(addr, HandlerClass) as httpd:
1245        host, port = httpd.socket.getsockname()[:2]
1246        url_host = f'[{host}]' if ':' in host else host
1247        print(
1248            f"Serving HTTP on {host} port {port} "
1249            f"(http://{url_host}:{port}/) ..."
1250        )
1251        try:
1252            httpd.serve_forever()
1253        except KeyboardInterrupt:
1254            print("\nKeyboard interrupt received, exiting.")
1255            sys.exit(0)
1256
1257if __name__ == '__main__':
1258    import argparse
1259
1260    parser = argparse.ArgumentParser()
1261    parser.add_argument('--cgi', action='store_true',
1262                       help='Run as CGI Server')
1263    parser.add_argument('--bind', '-b', metavar='ADDRESS',
1264                        help='Specify alternate bind address '
1265                             '[default: all interfaces]')
1266    parser.add_argument('--directory', '-d', default=os.getcwd(),
1267                        help='Specify alternative directory '
1268                        '[default:current directory]')
1269    parser.add_argument('port', action='store',
1270                        default=8000, type=int,
1271                        nargs='?',
1272                        help='Specify alternate port [default: 8000]')
1273    args = parser.parse_args()
1274    if args.cgi:
1275        handler_class = CGIHTTPRequestHandler
1276    else:
1277        handler_class = partial(SimpleHTTPRequestHandler,
1278                                directory=args.directory)
1279
1280    # ensure dual-stack is not disabled; ref #38907
1281    class DualStackServer(ThreadingHTTPServer):
1282        def server_bind(self):
1283            # suppress exception when protocol is IPv4
1284            with contextlib.suppress(Exception):
1285                self.socket.setsockopt(
1286                    socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
1287            return super().server_bind()
1288
1289    test(
1290        HandlerClass=handler_class,
1291        ServerClass=DualStackServer,
1292        port=args.port,
1293        bind=args.bind,
1294    )
1295