• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""HTTP server classes.
2
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and CGIHTTPRequestHandler for CGI scripts.
6
7It does, however, optionally implement HTTP/1.1 persistent connections,
8as of version 0.3.
9
10Notes on CGIHTTPRequestHandler
11------------------------------
12
13This class implements GET and POST requests to cgi-bin scripts.
14
15If the os.fork() function is not present (e.g. on Windows),
16subprocess.Popen() is used as a fallback, with slightly altered semantics.
17
18In all cases, the implementation is intentionally naive -- all
19requests are executed synchronously.
20
21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22-- it may execute arbitrary Python code or external programs.
23
24Note that status code 200 is sent prior to execution of a CGI script, so
25scripts cannot send other status codes such as 302 (redirect).
26
27XXX To do:
28
29- log requests even later (to capture byte count)
30- log user-agent header and other interesting goodies
31- send error log to separate file
32"""
33
34
35# See also:
36#
37# HTTP Working Group                                        T. Berners-Lee
38# INTERNET-DRAFT                                            R. T. Fielding
39# <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
40# Expires September 8, 1995                                  March 8, 1995
41#
42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43#
44# and
45#
46# Network Working Group                                      R. Fielding
47# Request for Comments: 2616                                       et al
48# Obsoletes: 2068                                              June 1999
49# Category: Standards Track
50#
51# URL: http://www.faqs.org/rfcs/rfc2616.html
52
53# Log files
54# ---------
55#
56# Here's a quote from the NCSA httpd docs about log file format.
57#
58# | The logfile format is as follows. Each line consists of:
59# |
60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61# |
62# |        host: Either the DNS name or the IP number of the remote client
63# |        rfc931: Any information returned by identd for this person,
64# |                - otherwise.
65# |        authuser: If user sent a userid for authentication, the user name,
66# |                  - otherwise.
67# |        DD: Day
68# |        Mon: Month (calendar name)
69# |        YYYY: Year
70# |        hh: hour (24-hour format, the machine's timezone)
71# |        mm: minutes
72# |        ss: seconds
73# |        request: The first line of the HTTP request as sent by the client.
74# |        ddd: the status code returned by the server, - if not available.
75# |        bbbb: the total number of bytes sent,
76# |              *not including the HTTP/1.0 header*, - if not available
77# |
78# | You can determine the name of the file accessed through request.
79#
80# (Actually, the latter is only true if you know the server configuration
81# at the time the request was made!)
82
83__version__ = "0.6"
84
85__all__ = [
86    "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",
87    "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
88]
89
90import copy
91import datetime
92import email.utils
93import html
94import http.client
95import io
96import mimetypes
97import os
98import posixpath
99import select
100import shutil
101import socket # For gethostbyaddr()
102import socketserver
103import sys
104import time
105import urllib.parse
106import contextlib
107from functools import partial
108
109from http import HTTPStatus
110
111
112# Default error message template
113DEFAULT_ERROR_MESSAGE = """\
114<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
115        "http://www.w3.org/TR/html4/strict.dtd">
116<html>
117    <head>
118        <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
119        <title>Error response</title>
120    </head>
121    <body>
122        <h1>Error response</h1>
123        <p>Error code: %(code)d</p>
124        <p>Message: %(message)s.</p>
125        <p>Error code explanation: %(code)s - %(explain)s.</p>
126    </body>
127</html>
128"""
129
130DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
131
132class HTTPServer(socketserver.TCPServer):
133
134    allow_reuse_address = 1    # Seems to make sense in testing environment
135
136    def server_bind(self):
137        """Override server_bind to store the server name."""
138        socketserver.TCPServer.server_bind(self)
139        host, port = self.server_address[:2]
140        self.server_name = socket.getfqdn(host)
141        self.server_port = port
142
143
144class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer):
145    daemon_threads = True
146
147
148class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
149
150    """HTTP request handler base class.
151
152    The following explanation of HTTP serves to guide you through the
153    code as well as to expose any misunderstandings I may have about
154    HTTP (so you don't need to read the code to figure out I'm wrong
155    :-).
156
157    HTTP (HyperText Transfer Protocol) is an extensible protocol on
158    top of a reliable stream transport (e.g. TCP/IP).  The protocol
159    recognizes three parts to a request:
160
161    1. One line identifying the request type and path
162    2. An optional set of RFC-822-style headers
163    3. An optional data part
164
165    The headers and data are separated by a blank line.
166
167    The first line of the request has the form
168
169    <command> <path> <version>
170
171    where <command> is a (case-sensitive) keyword such as GET or POST,
172    <path> is a string containing path information for the request,
173    and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
174    <path> is encoded using the URL encoding scheme (using %xx to signify
175    the ASCII character with hex code xx).
176
177    The specification specifies that lines are separated by CRLF but
178    for compatibility with the widest range of clients recommends
179    servers also handle LF.  Similarly, whitespace in the request line
180    is treated sensibly (allowing multiple spaces between components
181    and allowing trailing whitespace).
182
183    Similarly, for output, lines ought to be separated by CRLF pairs
184    but most clients grok LF characters just fine.
185
186    If the first line of the request has the form
187
188    <command> <path>
189
190    (i.e. <version> is left out) then this is assumed to be an HTTP
191    0.9 request; this form has no optional headers and data part and
192    the reply consists of just the data.
193
194    The reply form of the HTTP 1.x protocol again has three parts:
195
196    1. One line giving the response code
197    2. An optional set of RFC-822-style headers
198    3. The data
199
200    Again, the headers and data are separated by a blank line.
201
202    The response code line has the form
203
204    <version> <responsecode> <responsestring>
205
206    where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
207    <responsecode> is a 3-digit response code indicating success or
208    failure of the request, and <responsestring> is an optional
209    human-readable string explaining what the response code means.
210
211    This server parses the request and the headers, and then calls a
212    function specific to the request type (<command>).  Specifically,
213    a request SPAM will be handled by a method do_SPAM().  If no
214    such method exists the server sends an error response to the
215    client.  If it exists, it is called with no arguments:
216
217    do_SPAM()
218
219    Note that the request name is case sensitive (i.e. SPAM and spam
220    are different requests).
221
222    The various request details are stored in instance variables:
223
224    - client_address is the client IP address in the form (host,
225    port);
226
227    - command, path and version are the broken-down request line;
228
229    - headers is an instance of email.message.Message (or a derived
230    class) containing the header information;
231
232    - rfile is a file object open for reading positioned at the
233    start of the optional input data part;
234
235    - wfile is a file object open for writing.
236
237    IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
238
239    The first thing to be written must be the response line.  Then
240    follow 0 or more header lines, then a blank line, and then the
241    actual data (if any).  The meaning of the header lines depends on
242    the command executed by the server; in most cases, when data is
243    returned, there should be at least one header line of the form
244
245    Content-type: <type>/<subtype>
246
247    where <type> and <subtype> should be registered MIME types,
248    e.g. "text/html" or "text/plain".
249
250    """
251
252    # The Python system version, truncated to its first component.
253    sys_version = "Python/" + sys.version.split()[0]
254
255    # The server software version.  You may want to override this.
256    # The format is multiple whitespace-separated strings,
257    # where each string is of the form name[/version].
258    server_version = "BaseHTTP/" + __version__
259
260    error_message_format = DEFAULT_ERROR_MESSAGE
261    error_content_type = DEFAULT_ERROR_CONTENT_TYPE
262
263    # The default request version.  This only affects responses up until
264    # the point where the request line is parsed, so it mainly decides what
265    # the client gets back when sending a malformed request line.
266    # Most web servers default to HTTP 0.9, i.e. don't send a status line.
267    default_request_version = "HTTP/0.9"
268
269    def parse_request(self):
270        """Parse a request (internal).
271
272        The request should be stored in self.raw_requestline; the results
273        are in self.command, self.path, self.request_version and
274        self.headers.
275
276        Return True for success, False for failure; on failure, any relevant
277        error response has already been sent back.
278
279        """
280        self.command = None  # set in case of error on the first line
281        self.request_version = version = self.default_request_version
282        self.close_connection = True
283        requestline = str(self.raw_requestline, 'iso-8859-1')
284        requestline = requestline.rstrip('\r\n')
285        self.requestline = requestline
286        words = requestline.split()
287        if len(words) == 0:
288            return False
289
290        if len(words) >= 3:  # Enough to determine protocol version
291            version = words[-1]
292            try:
293                if not version.startswith('HTTP/'):
294                    raise ValueError
295                base_version_number = version.split('/', 1)[1]
296                version_number = base_version_number.split(".")
297                # RFC 2145 section 3.1 says there can be only one "." and
298                #   - major and minor numbers MUST be treated as
299                #      separate integers;
300                #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
301                #      turn is lower than HTTP/12.3;
302                #   - Leading zeros MUST be ignored by recipients.
303                if len(version_number) != 2:
304                    raise ValueError
305                version_number = int(version_number[0]), int(version_number[1])
306            except (ValueError, IndexError):
307                self.send_error(
308                    HTTPStatus.BAD_REQUEST,
309                    "Bad request version (%r)" % version)
310                return False
311            if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
312                self.close_connection = False
313            if version_number >= (2, 0):
314                self.send_error(
315                    HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
316                    "Invalid HTTP version (%s)" % base_version_number)
317                return False
318            self.request_version = version
319
320        if not 2 <= len(words) <= 3:
321            self.send_error(
322                HTTPStatus.BAD_REQUEST,
323                "Bad request syntax (%r)" % requestline)
324            return False
325        command, path = words[:2]
326        if len(words) == 2:
327            self.close_connection = True
328            if command != 'GET':
329                self.send_error(
330                    HTTPStatus.BAD_REQUEST,
331                    "Bad HTTP/0.9 request type (%r)" % command)
332                return False
333        self.command, self.path = command, path
334
335        # gh-87389: The purpose of replacing '//' with '/' is to protect
336        # against open redirect attacks possibly triggered if the path starts
337        # with '//' because http clients treat //path as an absolute URI
338        # without scheme (similar to http://path) rather than a path.
339        if self.path.startswith('//'):
340            self.path = '/' + self.path.lstrip('/')  # Reduce to a single /
341
342        # Examine the headers and look for a Connection directive.
343        try:
344            self.headers = http.client.parse_headers(self.rfile,
345                                                     _class=self.MessageClass)
346        except http.client.LineTooLong as err:
347            self.send_error(
348                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
349                "Line too long",
350                str(err))
351            return False
352        except http.client.HTTPException as err:
353            self.send_error(
354                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
355                "Too many headers",
356                str(err)
357            )
358            return False
359
360        conntype = self.headers.get('Connection', "")
361        if conntype.lower() == 'close':
362            self.close_connection = True
363        elif (conntype.lower() == 'keep-alive' and
364              self.protocol_version >= "HTTP/1.1"):
365            self.close_connection = False
366        # Examine the headers and look for an Expect directive
367        expect = self.headers.get('Expect', "")
368        if (expect.lower() == "100-continue" and
369                self.protocol_version >= "HTTP/1.1" and
370                self.request_version >= "HTTP/1.1"):
371            if not self.handle_expect_100():
372                return False
373        return True
374
375    def handle_expect_100(self):
376        """Decide what to do with an "Expect: 100-continue" header.
377
378        If the client is expecting a 100 Continue response, we must
379        respond with either a 100 Continue or a final response before
380        waiting for the request body. The default is to always respond
381        with a 100 Continue. You can behave differently (for example,
382        reject unauthorized requests) by overriding this method.
383
384        This method should either return True (possibly after sending
385        a 100 Continue response) or send an error response and return
386        False.
387
388        """
389        self.send_response_only(HTTPStatus.CONTINUE)
390        self.end_headers()
391        return True
392
393    def handle_one_request(self):
394        """Handle a single HTTP request.
395
396        You normally don't need to override this method; see the class
397        __doc__ string for information on how to handle specific HTTP
398        commands such as GET and POST.
399
400        """
401        try:
402            self.raw_requestline = self.rfile.readline(65537)
403            if len(self.raw_requestline) > 65536:
404                self.requestline = ''
405                self.request_version = ''
406                self.command = ''
407                self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
408                return
409            if not self.raw_requestline:
410                self.close_connection = True
411                return
412            if not self.parse_request():
413                # An error code has been sent, just exit
414                return
415            mname = 'do_' + self.command
416            if not hasattr(self, mname):
417                self.send_error(
418                    HTTPStatus.NOT_IMPLEMENTED,
419                    "Unsupported method (%r)" % self.command)
420                return
421            method = getattr(self, mname)
422            method()
423            self.wfile.flush() #actually send the response if not already done.
424        except socket.timeout as e:
425            #a read or a write timed out.  Discard this connection
426            self.log_error("Request timed out: %r", e)
427            self.close_connection = True
428            return
429
430    def handle(self):
431        """Handle multiple requests if necessary."""
432        self.close_connection = True
433
434        self.handle_one_request()
435        while not self.close_connection:
436            self.handle_one_request()
437
438    def send_error(self, code, message=None, explain=None):
439        """Send and log an error reply.
440
441        Arguments are
442        * code:    an HTTP error code
443                   3 digits
444        * message: a simple optional 1 line reason phrase.
445                   *( HTAB / SP / VCHAR / %x80-FF )
446                   defaults to short entry matching the response code
447        * explain: a detailed message defaults to the long entry
448                   matching the response code.
449
450        This sends an error response (so it must be called before any
451        output has been generated), logs the error, and finally sends
452        a piece of HTML explaining the error to the user.
453
454        """
455
456        try:
457            shortmsg, longmsg = self.responses[code]
458        except KeyError:
459            shortmsg, longmsg = '???', '???'
460        if message is None:
461            message = shortmsg
462        if explain is None:
463            explain = longmsg
464        self.log_error("code %d, message %s", code, message)
465        self.send_response(code, message)
466        self.send_header('Connection', 'close')
467
468        # Message body is omitted for cases described in:
469        #  - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
470        #  - RFC7231: 6.3.6. 205(Reset Content)
471        body = None
472        if (code >= 200 and
473            code not in (HTTPStatus.NO_CONTENT,
474                         HTTPStatus.RESET_CONTENT,
475                         HTTPStatus.NOT_MODIFIED)):
476            # HTML encode to prevent Cross Site Scripting attacks
477            # (see bug #1100201)
478            content = (self.error_message_format % {
479                'code': code,
480                'message': html.escape(message, quote=False),
481                'explain': html.escape(explain, quote=False)
482            })
483            body = content.encode('UTF-8', 'replace')
484            self.send_header("Content-Type", self.error_content_type)
485            self.send_header('Content-Length', str(len(body)))
486        self.end_headers()
487
488        if self.command != 'HEAD' and body:
489            self.wfile.write(body)
490
491    def send_response(self, code, message=None):
492        """Add the response header to the headers buffer and log the
493        response code.
494
495        Also send two standard headers with the server software
496        version and the current date.
497
498        """
499        self.log_request(code)
500        self.send_response_only(code, message)
501        self.send_header('Server', self.version_string())
502        self.send_header('Date', self.date_time_string())
503
504    def send_response_only(self, code, message=None):
505        """Send the response header only."""
506        if self.request_version != 'HTTP/0.9':
507            if message is None:
508                if code in self.responses:
509                    message = self.responses[code][0]
510                else:
511                    message = ''
512            if not hasattr(self, '_headers_buffer'):
513                self._headers_buffer = []
514            self._headers_buffer.append(("%s %d %s\r\n" %
515                    (self.protocol_version, code, message)).encode(
516                        'latin-1', 'strict'))
517
518    def send_header(self, keyword, value):
519        """Send a MIME header to the headers buffer."""
520        if self.request_version != 'HTTP/0.9':
521            if not hasattr(self, '_headers_buffer'):
522                self._headers_buffer = []
523            self._headers_buffer.append(
524                ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
525
526        if keyword.lower() == 'connection':
527            if value.lower() == 'close':
528                self.close_connection = True
529            elif value.lower() == 'keep-alive':
530                self.close_connection = False
531
532    def end_headers(self):
533        """Send the blank line ending the MIME headers."""
534        if self.request_version != 'HTTP/0.9':
535            self._headers_buffer.append(b"\r\n")
536            self.flush_headers()
537
538    def flush_headers(self):
539        if hasattr(self, '_headers_buffer'):
540            self.wfile.write(b"".join(self._headers_buffer))
541            self._headers_buffer = []
542
543    def log_request(self, code='-', size='-'):
544        """Log an accepted request.
545
546        This is called by send_response().
547
548        """
549        if isinstance(code, HTTPStatus):
550            code = code.value
551        self.log_message('"%s" %s %s',
552                         self.requestline, str(code), str(size))
553
554    def log_error(self, format, *args):
555        """Log an error.
556
557        This is called when a request cannot be fulfilled.  By
558        default it passes the message on to log_message().
559
560        Arguments are the same as for log_message().
561
562        XXX This should go to the separate error log.
563
564        """
565
566        self.log_message(format, *args)
567
568    def log_message(self, format, *args):
569        """Log an arbitrary message.
570
571        This is used by all other logging functions.  Override
572        it if you have specific logging wishes.
573
574        The first argument, FORMAT, is a format string for the
575        message to be logged.  If the format string contains
576        any % escapes requiring parameters, they should be
577        specified as subsequent arguments (it's just like
578        printf!).
579
580        The client ip and current date/time are prefixed to
581        every message.
582
583        """
584
585        sys.stderr.write("%s - - [%s] %s\n" %
586                         (self.address_string(),
587                          self.log_date_time_string(),
588                          format%args))
589
590    def version_string(self):
591        """Return the server software version string."""
592        return self.server_version + ' ' + self.sys_version
593
594    def date_time_string(self, timestamp=None):
595        """Return the current date and time formatted for a message header."""
596        if timestamp is None:
597            timestamp = time.time()
598        return email.utils.formatdate(timestamp, usegmt=True)
599
600    def log_date_time_string(self):
601        """Return the current time formatted for logging."""
602        now = time.time()
603        year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
604        s = "%02d/%3s/%04d %02d:%02d:%02d" % (
605                day, self.monthname[month], year, hh, mm, ss)
606        return s
607
608    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
609
610    monthname = [None,
611                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
612                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
613
614    def address_string(self):
615        """Return the client address."""
616
617        return self.client_address[0]
618
619    # Essentially static class variables
620
621    # The version of the HTTP protocol we support.
622    # Set this to HTTP/1.1 to enable automatic keepalive
623    protocol_version = "HTTP/1.0"
624
625    # MessageClass used to parse headers
626    MessageClass = http.client.HTTPMessage
627
628    # hack to maintain backwards compatibility
629    responses = {
630        v: (v.phrase, v.description)
631        for v in HTTPStatus.__members__.values()
632    }
633
634
635class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
636
637    """Simple HTTP request handler with GET and HEAD commands.
638
639    This serves files from the current directory and any of its
640    subdirectories.  The MIME type for files is determined by
641    calling the .guess_type() method.
642
643    The GET and HEAD requests are identical except that the HEAD
644    request omits the actual contents of the file.
645
646    """
647
648    server_version = "SimpleHTTP/" + __version__
649    extensions_map = _encodings_map_default = {
650        '.gz': 'application/gzip',
651        '.Z': 'application/octet-stream',
652        '.bz2': 'application/x-bzip2',
653        '.xz': 'application/x-xz',
654    }
655
656    def __init__(self, *args, directory=None, **kwargs):
657        if directory is None:
658            directory = os.getcwd()
659        self.directory = os.fspath(directory)
660        super().__init__(*args, **kwargs)
661
662    def do_GET(self):
663        """Serve a GET request."""
664        f = self.send_head()
665        if f:
666            try:
667                self.copyfile(f, self.wfile)
668            finally:
669                f.close()
670
671    def do_HEAD(self):
672        """Serve a HEAD request."""
673        f = self.send_head()
674        if f:
675            f.close()
676
677    def send_head(self):
678        """Common code for GET and HEAD commands.
679
680        This sends the response code and MIME headers.
681
682        Return value is either a file object (which has to be copied
683        to the outputfile by the caller unless the command was HEAD,
684        and must be closed by the caller under all circumstances), or
685        None, in which case the caller has nothing further to do.
686
687        """
688        path = self.translate_path(self.path)
689        f = None
690        if os.path.isdir(path):
691            parts = urllib.parse.urlsplit(self.path)
692            if not parts.path.endswith('/'):
693                # redirect browser - doing basically what apache does
694                self.send_response(HTTPStatus.MOVED_PERMANENTLY)
695                new_parts = (parts[0], parts[1], parts[2] + '/',
696                             parts[3], parts[4])
697                new_url = urllib.parse.urlunsplit(new_parts)
698                self.send_header("Location", new_url)
699                self.end_headers()
700                return None
701            for index in "index.html", "index.htm":
702                index = os.path.join(path, index)
703                if os.path.exists(index):
704                    path = index
705                    break
706            else:
707                return self.list_directory(path)
708        ctype = self.guess_type(path)
709        # check for trailing "/" which should return 404. See Issue17324
710        # The test for this was added in test_httpserver.py
711        # However, some OS platforms accept a trailingSlash as a filename
712        # See discussion on python-dev and Issue34711 regarding
713        # parseing and rejection of filenames with a trailing slash
714        if path.endswith("/"):
715            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
716            return None
717        try:
718            f = open(path, 'rb')
719        except OSError:
720            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
721            return None
722
723        try:
724            fs = os.fstat(f.fileno())
725            # Use browser cache if possible
726            if ("If-Modified-Since" in self.headers
727                    and "If-None-Match" not in self.headers):
728                # compare If-Modified-Since and time of last file modification
729                try:
730                    ims = email.utils.parsedate_to_datetime(
731                        self.headers["If-Modified-Since"])
732                except (TypeError, IndexError, OverflowError, ValueError):
733                    # ignore ill-formed values
734                    pass
735                else:
736                    if ims.tzinfo is None:
737                        # obsolete format with no timezone, cf.
738                        # https://tools.ietf.org/html/rfc7231#section-7.1.1.1
739                        ims = ims.replace(tzinfo=datetime.timezone.utc)
740                    if ims.tzinfo is datetime.timezone.utc:
741                        # compare to UTC datetime of last modification
742                        last_modif = datetime.datetime.fromtimestamp(
743                            fs.st_mtime, datetime.timezone.utc)
744                        # remove microseconds, like in If-Modified-Since
745                        last_modif = last_modif.replace(microsecond=0)
746
747                        if last_modif <= ims:
748                            self.send_response(HTTPStatus.NOT_MODIFIED)
749                            self.end_headers()
750                            f.close()
751                            return None
752
753            self.send_response(HTTPStatus.OK)
754            self.send_header("Content-type", ctype)
755            self.send_header("Content-Length", str(fs[6]))
756            self.send_header("Last-Modified",
757                self.date_time_string(fs.st_mtime))
758            self.end_headers()
759            return f
760        except:
761            f.close()
762            raise
763
764    def list_directory(self, path):
765        """Helper to produce a directory listing (absent index.html).
766
767        Return value is either a file object, or None (indicating an
768        error).  In either case, the headers are sent, making the
769        interface the same as for send_head().
770
771        """
772        try:
773            list = os.listdir(path)
774        except OSError:
775            self.send_error(
776                HTTPStatus.NOT_FOUND,
777                "No permission to list directory")
778            return None
779        list.sort(key=lambda a: a.lower())
780        r = []
781        try:
782            displaypath = urllib.parse.unquote(self.path,
783                                               errors='surrogatepass')
784        except UnicodeDecodeError:
785            displaypath = urllib.parse.unquote(path)
786        displaypath = html.escape(displaypath, quote=False)
787        enc = sys.getfilesystemencoding()
788        title = 'Directory listing for %s' % displaypath
789        r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
790                 '"http://www.w3.org/TR/html4/strict.dtd">')
791        r.append('<html>\n<head>')
792        r.append('<meta http-equiv="Content-Type" '
793                 'content="text/html; charset=%s">' % enc)
794        r.append('<title>%s</title>\n</head>' % title)
795        r.append('<body>\n<h1>%s</h1>' % title)
796        r.append('<hr>\n<ul>')
797        for name in list:
798            fullname = os.path.join(path, name)
799            displayname = linkname = name
800            # Append / for directories or @ for symbolic links
801            if os.path.isdir(fullname):
802                displayname = name + "/"
803                linkname = name + "/"
804            if os.path.islink(fullname):
805                displayname = name + "@"
806                # Note: a link to a directory displays with @ and links with /
807            r.append('<li><a href="%s">%s</a></li>'
808                    % (urllib.parse.quote(linkname,
809                                          errors='surrogatepass'),
810                       html.escape(displayname, quote=False)))
811        r.append('</ul>\n<hr>\n</body>\n</html>\n')
812        encoded = '\n'.join(r).encode(enc, 'surrogateescape')
813        f = io.BytesIO()
814        f.write(encoded)
815        f.seek(0)
816        self.send_response(HTTPStatus.OK)
817        self.send_header("Content-type", "text/html; charset=%s" % enc)
818        self.send_header("Content-Length", str(len(encoded)))
819        self.end_headers()
820        return f
821
822    def translate_path(self, path):
823        """Translate a /-separated PATH to the local filename syntax.
824
825        Components that mean special things to the local file system
826        (e.g. drive or directory names) are ignored.  (XXX They should
827        probably be diagnosed.)
828
829        """
830        # abandon query parameters
831        path = path.split('?',1)[0]
832        path = path.split('#',1)[0]
833        # Don't forget explicit trailing slash when normalizing. Issue17324
834        trailing_slash = path.rstrip().endswith('/')
835        try:
836            path = urllib.parse.unquote(path, errors='surrogatepass')
837        except UnicodeDecodeError:
838            path = urllib.parse.unquote(path)
839        path = posixpath.normpath(path)
840        words = path.split('/')
841        words = filter(None, words)
842        path = self.directory
843        for word in words:
844            if os.path.dirname(word) or word in (os.curdir, os.pardir):
845                # Ignore components that are not a simple file/directory name
846                continue
847            path = os.path.join(path, word)
848        if trailing_slash:
849            path += '/'
850        return path
851
852    def copyfile(self, source, outputfile):
853        """Copy all data between two file objects.
854
855        The SOURCE argument is a file object open for reading
856        (or anything with a read() method) and the DESTINATION
857        argument is a file object open for writing (or
858        anything with a write() method).
859
860        The only reason for overriding this would be to change
861        the block size or perhaps to replace newlines by CRLF
862        -- note however that this the default server uses this
863        to copy binary data as well.
864
865        """
866        shutil.copyfileobj(source, outputfile)
867
868    def guess_type(self, path):
869        """Guess the type of a file.
870
871        Argument is a PATH (a filename).
872
873        Return value is a string of the form type/subtype,
874        usable for a MIME Content-type header.
875
876        The default implementation looks the file's extension
877        up in the table self.extensions_map, using application/octet-stream
878        as a default; however it would be permissible (if
879        slow) to look inside the data to make a better guess.
880
881        """
882        base, ext = posixpath.splitext(path)
883        if ext in self.extensions_map:
884            return self.extensions_map[ext]
885        ext = ext.lower()
886        if ext in self.extensions_map:
887            return self.extensions_map[ext]
888        guess, _ = mimetypes.guess_type(path)
889        if guess:
890            return guess
891        return 'application/octet-stream'
892
893
894# Utilities for CGIHTTPRequestHandler
895
896def _url_collapse_path(path):
897    """
898    Given a URL path, remove extra '/'s and '.' path elements and collapse
899    any '..' references and returns a collapsed path.
900
901    Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
902    The utility of this function is limited to is_cgi method and helps
903    preventing some security attacks.
904
905    Returns: The reconstituted URL, which will always start with a '/'.
906
907    Raises: IndexError if too many '..' occur within the path.
908
909    """
910    # Query component should not be involved.
911    path, _, query = path.partition('?')
912    path = urllib.parse.unquote(path)
913
914    # Similar to os.path.split(os.path.normpath(path)) but specific to URL
915    # path semantics rather than local operating system semantics.
916    path_parts = path.split('/')
917    head_parts = []
918    for part in path_parts[:-1]:
919        if part == '..':
920            head_parts.pop() # IndexError if more '..' than prior parts
921        elif part and part != '.':
922            head_parts.append( part )
923    if path_parts:
924        tail_part = path_parts.pop()
925        if tail_part:
926            if tail_part == '..':
927                head_parts.pop()
928                tail_part = ''
929            elif tail_part == '.':
930                tail_part = ''
931    else:
932        tail_part = ''
933
934    if query:
935        tail_part = '?'.join((tail_part, query))
936
937    splitpath = ('/' + '/'.join(head_parts), tail_part)
938    collapsed_path = "/".join(splitpath)
939
940    return collapsed_path
941
942
943
944nobody = None
945
946def nobody_uid():
947    """Internal routine to get nobody's uid"""
948    global nobody
949    if nobody:
950        return nobody
951    try:
952        import pwd
953    except ImportError:
954        return -1
955    try:
956        nobody = pwd.getpwnam('nobody')[2]
957    except KeyError:
958        nobody = 1 + max(x[2] for x in pwd.getpwall())
959    return nobody
960
961
962def executable(path):
963    """Test for executable file."""
964    return os.access(path, os.X_OK)
965
966
967class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
968
969    """Complete HTTP server with GET, HEAD and POST commands.
970
971    GET and HEAD also support running CGI scripts.
972
973    The POST command is *only* implemented for CGI scripts.
974
975    """
976
977    # Determine platform specifics
978    have_fork = hasattr(os, 'fork')
979
980    # Make rfile unbuffered -- we need to read one line and then pass
981    # the rest to a subprocess, so we can't use buffered input.
982    rbufsize = 0
983
984    def do_POST(self):
985        """Serve a POST request.
986
987        This is only implemented for CGI scripts.
988
989        """
990
991        if self.is_cgi():
992            self.run_cgi()
993        else:
994            self.send_error(
995                HTTPStatus.NOT_IMPLEMENTED,
996                "Can only POST to CGI scripts")
997
998    def send_head(self):
999        """Version of send_head that support CGI scripts"""
1000        if self.is_cgi():
1001            return self.run_cgi()
1002        else:
1003            return SimpleHTTPRequestHandler.send_head(self)
1004
1005    def is_cgi(self):
1006        """Test whether self.path corresponds to a CGI script.
1007
1008        Returns True and updates the cgi_info attribute to the tuple
1009        (dir, rest) if self.path requires running a CGI script.
1010        Returns False otherwise.
1011
1012        If any exception is raised, the caller should assume that
1013        self.path was rejected as invalid and act accordingly.
1014
1015        The default implementation tests whether the normalized url
1016        path begins with one of the strings in self.cgi_directories
1017        (and the next character is a '/' or the end of the string).
1018
1019        """
1020        collapsed_path = _url_collapse_path(self.path)
1021        dir_sep = collapsed_path.find('/', 1)
1022        while dir_sep > 0 and not collapsed_path[:dir_sep] in self.cgi_directories:
1023            dir_sep = collapsed_path.find('/', dir_sep+1)
1024        if dir_sep > 0:
1025            head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
1026            self.cgi_info = head, tail
1027            return True
1028        return False
1029
1030
1031    cgi_directories = ['/cgi-bin', '/htbin']
1032
1033    def is_executable(self, path):
1034        """Test whether argument path is an executable file."""
1035        return executable(path)
1036
1037    def is_python(self, path):
1038        """Test whether argument path is a Python script."""
1039        head, tail = os.path.splitext(path)
1040        return tail.lower() in (".py", ".pyw")
1041
1042    def run_cgi(self):
1043        """Execute a CGI script."""
1044        dir, rest = self.cgi_info
1045        path = dir + '/' + rest
1046        i = path.find('/', len(dir)+1)
1047        while i >= 0:
1048            nextdir = path[:i]
1049            nextrest = path[i+1:]
1050
1051            scriptdir = self.translate_path(nextdir)
1052            if os.path.isdir(scriptdir):
1053                dir, rest = nextdir, nextrest
1054                i = path.find('/', len(dir)+1)
1055            else:
1056                break
1057
1058        # find an explicit query string, if present.
1059        rest, _, query = rest.partition('?')
1060
1061        # dissect the part after the directory name into a script name &
1062        # a possible additional path, to be stored in PATH_INFO.
1063        i = rest.find('/')
1064        if i >= 0:
1065            script, rest = rest[:i], rest[i:]
1066        else:
1067            script, rest = rest, ''
1068
1069        scriptname = dir + '/' + script
1070        scriptfile = self.translate_path(scriptname)
1071        if not os.path.exists(scriptfile):
1072            self.send_error(
1073                HTTPStatus.NOT_FOUND,
1074                "No such CGI script (%r)" % scriptname)
1075            return
1076        if not os.path.isfile(scriptfile):
1077            self.send_error(
1078                HTTPStatus.FORBIDDEN,
1079                "CGI script is not a plain file (%r)" % scriptname)
1080            return
1081        ispy = self.is_python(scriptname)
1082        if self.have_fork or not ispy:
1083            if not self.is_executable(scriptfile):
1084                self.send_error(
1085                    HTTPStatus.FORBIDDEN,
1086                    "CGI script is not executable (%r)" % scriptname)
1087                return
1088
1089        # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1090        # XXX Much of the following could be prepared ahead of time!
1091        env = copy.deepcopy(os.environ)
1092        env['SERVER_SOFTWARE'] = self.version_string()
1093        env['SERVER_NAME'] = self.server.server_name
1094        env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1095        env['SERVER_PROTOCOL'] = self.protocol_version
1096        env['SERVER_PORT'] = str(self.server.server_port)
1097        env['REQUEST_METHOD'] = self.command
1098        uqrest = urllib.parse.unquote(rest)
1099        env['PATH_INFO'] = uqrest
1100        env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1101        env['SCRIPT_NAME'] = scriptname
1102        if query:
1103            env['QUERY_STRING'] = query
1104        env['REMOTE_ADDR'] = self.client_address[0]
1105        authorization = self.headers.get("authorization")
1106        if authorization:
1107            authorization = authorization.split()
1108            if len(authorization) == 2:
1109                import base64, binascii
1110                env['AUTH_TYPE'] = authorization[0]
1111                if authorization[0].lower() == "basic":
1112                    try:
1113                        authorization = authorization[1].encode('ascii')
1114                        authorization = base64.decodebytes(authorization).\
1115                                        decode('ascii')
1116                    except (binascii.Error, UnicodeError):
1117                        pass
1118                    else:
1119                        authorization = authorization.split(':')
1120                        if len(authorization) == 2:
1121                            env['REMOTE_USER'] = authorization[0]
1122        # XXX REMOTE_IDENT
1123        if self.headers.get('content-type') is None:
1124            env['CONTENT_TYPE'] = self.headers.get_content_type()
1125        else:
1126            env['CONTENT_TYPE'] = self.headers['content-type']
1127        length = self.headers.get('content-length')
1128        if length:
1129            env['CONTENT_LENGTH'] = length
1130        referer = self.headers.get('referer')
1131        if referer:
1132            env['HTTP_REFERER'] = referer
1133        accept = self.headers.get_all('accept', ())
1134        env['HTTP_ACCEPT'] = ','.join(accept)
1135        ua = self.headers.get('user-agent')
1136        if ua:
1137            env['HTTP_USER_AGENT'] = ua
1138        co = filter(None, self.headers.get_all('cookie', []))
1139        cookie_str = ', '.join(co)
1140        if cookie_str:
1141            env['HTTP_COOKIE'] = cookie_str
1142        # XXX Other HTTP_* headers
1143        # Since we're setting the env in the parent, provide empty
1144        # values to override previously set values
1145        for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1146                  'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1147            env.setdefault(k, "")
1148
1149        self.send_response(HTTPStatus.OK, "Script output follows")
1150        self.flush_headers()
1151
1152        decoded_query = query.replace('+', ' ')
1153
1154        if self.have_fork:
1155            # Unix -- fork as we should
1156            args = [script]
1157            if '=' not in decoded_query:
1158                args.append(decoded_query)
1159            nobody = nobody_uid()
1160            self.wfile.flush() # Always flush before forking
1161            pid = os.fork()
1162            if pid != 0:
1163                # Parent
1164                pid, sts = os.waitpid(pid, 0)
1165                # throw away additional data [see bug #427345]
1166                while select.select([self.rfile], [], [], 0)[0]:
1167                    if not self.rfile.read(1):
1168                        break
1169                exitcode = os.waitstatus_to_exitcode(sts)
1170                if exitcode:
1171                    self.log_error(f"CGI script exit code {exitcode}")
1172                return
1173            # Child
1174            try:
1175                try:
1176                    os.setuid(nobody)
1177                except OSError:
1178                    pass
1179                os.dup2(self.rfile.fileno(), 0)
1180                os.dup2(self.wfile.fileno(), 1)
1181                os.execve(scriptfile, args, env)
1182            except:
1183                self.server.handle_error(self.request, self.client_address)
1184                os._exit(127)
1185
1186        else:
1187            # Non-Unix -- use subprocess
1188            import subprocess
1189            cmdline = [scriptfile]
1190            if self.is_python(scriptfile):
1191                interp = sys.executable
1192                if interp.lower().endswith("w.exe"):
1193                    # On Windows, use python.exe, not pythonw.exe
1194                    interp = interp[:-5] + interp[-4:]
1195                cmdline = [interp, '-u'] + cmdline
1196            if '=' not in query:
1197                cmdline.append(query)
1198            self.log_message("command: %s", subprocess.list2cmdline(cmdline))
1199            try:
1200                nbytes = int(length)
1201            except (TypeError, ValueError):
1202                nbytes = 0
1203            p = subprocess.Popen(cmdline,
1204                                 stdin=subprocess.PIPE,
1205                                 stdout=subprocess.PIPE,
1206                                 stderr=subprocess.PIPE,
1207                                 env = env
1208                                 )
1209            if self.command.lower() == "post" and nbytes > 0:
1210                data = self.rfile.read(nbytes)
1211            else:
1212                data = None
1213            # throw away additional data [see bug #427345]
1214            while select.select([self.rfile._sock], [], [], 0)[0]:
1215                if not self.rfile._sock.recv(1):
1216                    break
1217            stdout, stderr = p.communicate(data)
1218            self.wfile.write(stdout)
1219            if stderr:
1220                self.log_error('%s', stderr)
1221            p.stderr.close()
1222            p.stdout.close()
1223            status = p.returncode
1224            if status:
1225                self.log_error("CGI script exit status %#x", status)
1226            else:
1227                self.log_message("CGI script exited OK")
1228
1229
1230def _get_best_family(*address):
1231    infos = socket.getaddrinfo(
1232        *address,
1233        type=socket.SOCK_STREAM,
1234        flags=socket.AI_PASSIVE,
1235    )
1236    family, type, proto, canonname, sockaddr = next(iter(infos))
1237    return family, sockaddr
1238
1239
1240def test(HandlerClass=BaseHTTPRequestHandler,
1241         ServerClass=ThreadingHTTPServer,
1242         protocol="HTTP/1.0", port=8000, bind=None):
1243    """Test the HTTP request handler class.
1244
1245    This runs an HTTP server on port 8000 (or the port argument).
1246
1247    """
1248    ServerClass.address_family, addr = _get_best_family(bind, port)
1249
1250    HandlerClass.protocol_version = protocol
1251    with ServerClass(addr, HandlerClass) as httpd:
1252        host, port = httpd.socket.getsockname()[:2]
1253        url_host = f'[{host}]' if ':' in host else host
1254        print(
1255            f"Serving HTTP on {host} port {port} "
1256            f"(http://{url_host}:{port}/) ..."
1257        )
1258        try:
1259            httpd.serve_forever()
1260        except KeyboardInterrupt:
1261            print("\nKeyboard interrupt received, exiting.")
1262            sys.exit(0)
1263
1264if __name__ == '__main__':
1265    import argparse
1266
1267    parser = argparse.ArgumentParser()
1268    parser.add_argument('--cgi', action='store_true',
1269                       help='Run as CGI Server')
1270    parser.add_argument('--bind', '-b', metavar='ADDRESS',
1271                        help='Specify alternate bind address '
1272                             '[default: all interfaces]')
1273    parser.add_argument('--directory', '-d', default=os.getcwd(),
1274                        help='Specify alternative directory '
1275                        '[default:current directory]')
1276    parser.add_argument('port', action='store',
1277                        default=8000, type=int,
1278                        nargs='?',
1279                        help='Specify alternate port [default: 8000]')
1280    args = parser.parse_args()
1281    if args.cgi:
1282        handler_class = CGIHTTPRequestHandler
1283    else:
1284        handler_class = partial(SimpleHTTPRequestHandler,
1285                                directory=args.directory)
1286
1287    # ensure dual-stack is not disabled; ref #38907
1288    class DualStackServer(ThreadingHTTPServer):
1289        def server_bind(self):
1290            # suppress exception when protocol is IPv4
1291            with contextlib.suppress(Exception):
1292                self.socket.setsockopt(
1293                    socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
1294            return super().server_bind()
1295
1296    test(
1297        HandlerClass=handler_class,
1298        ServerClass=DualStackServer,
1299        port=args.port,
1300        bind=args.bind,
1301    )
1302