• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""HTTP server classes.
2
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and CGIHTTPRequestHandler for CGI scripts.
6
7It does, however, optionally implement HTTP/1.1 persistent connections,
8as of version 0.3.
9
10Notes on CGIHTTPRequestHandler
11------------------------------
12
13This class implements GET and POST requests to cgi-bin scripts.
14
15If the os.fork() function is not present (e.g. on Windows),
16subprocess.Popen() is used as a fallback, with slightly altered semantics.
17
18In all cases, the implementation is intentionally naive -- all
19requests are executed synchronously.
20
21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22-- it may execute arbitrary Python code or external programs.
23
24Note that status code 200 is sent prior to execution of a CGI script, so
25scripts cannot send other status codes such as 302 (redirect).
26
27XXX To do:
28
29- log requests even later (to capture byte count)
30- log user-agent header and other interesting goodies
31- send error log to separate file
32"""
33
34
35# See also:
36#
37# HTTP Working Group                                        T. Berners-Lee
38# INTERNET-DRAFT                                            R. T. Fielding
39# <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
40# Expires September 8, 1995                                  March 8, 1995
41#
42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43#
44# and
45#
46# Network Working Group                                      R. Fielding
47# Request for Comments: 2616                                       et al
48# Obsoletes: 2068                                              June 1999
49# Category: Standards Track
50#
51# URL: http://www.faqs.org/rfcs/rfc2616.html
52
53# Log files
54# ---------
55#
56# Here's a quote from the NCSA httpd docs about log file format.
57#
58# | The logfile format is as follows. Each line consists of:
59# |
60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61# |
62# |        host: Either the DNS name or the IP number of the remote client
63# |        rfc931: Any information returned by identd for this person,
64# |                - otherwise.
65# |        authuser: If user sent a userid for authentication, the user name,
66# |                  - otherwise.
67# |        DD: Day
68# |        Mon: Month (calendar name)
69# |        YYYY: Year
70# |        hh: hour (24-hour format, the machine's timezone)
71# |        mm: minutes
72# |        ss: seconds
73# |        request: The first line of the HTTP request as sent by the client.
74# |        ddd: the status code returned by the server, - if not available.
75# |        bbbb: the total number of bytes sent,
76# |              *not including the HTTP/1.0 header*, - if not available
77# |
78# | You can determine the name of the file accessed through request.
79#
80# (Actually, the latter is only true if you know the server configuration
81# at the time the request was made!)
82
83__version__ = "0.6"
84
85__all__ = [
86    "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",
87    "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
88]
89
90import copy
91import datetime
92import email.utils
93import html
94import http.client
95import io
96import mimetypes
97import os
98import posixpath
99import select
100import shutil
101import socket # For gethostbyaddr()
102import socketserver
103import sys
104import time
105import urllib.parse
106from functools import partial
107
108from http import HTTPStatus
109
110
111# Default error message template
112DEFAULT_ERROR_MESSAGE = """\
113<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
114        "http://www.w3.org/TR/html4/strict.dtd">
115<html>
116    <head>
117        <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
118        <title>Error response</title>
119    </head>
120    <body>
121        <h1>Error response</h1>
122        <p>Error code: %(code)d</p>
123        <p>Message: %(message)s.</p>
124        <p>Error code explanation: %(code)s - %(explain)s.</p>
125    </body>
126</html>
127"""
128
129DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
130
131class HTTPServer(socketserver.TCPServer):
132
133    allow_reuse_address = 1    # Seems to make sense in testing environment
134
135    def server_bind(self):
136        """Override server_bind to store the server name."""
137        socketserver.TCPServer.server_bind(self)
138        host, port = self.server_address[:2]
139        self.server_name = socket.getfqdn(host)
140        self.server_port = port
141
142
143class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer):
144    daemon_threads = True
145
146
147class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
148
149    """HTTP request handler base class.
150
151    The following explanation of HTTP serves to guide you through the
152    code as well as to expose any misunderstandings I may have about
153    HTTP (so you don't need to read the code to figure out I'm wrong
154    :-).
155
156    HTTP (HyperText Transfer Protocol) is an extensible protocol on
157    top of a reliable stream transport (e.g. TCP/IP).  The protocol
158    recognizes three parts to a request:
159
160    1. One line identifying the request type and path
161    2. An optional set of RFC-822-style headers
162    3. An optional data part
163
164    The headers and data are separated by a blank line.
165
166    The first line of the request has the form
167
168    <command> <path> <version>
169
170    where <command> is a (case-sensitive) keyword such as GET or POST,
171    <path> is a string containing path information for the request,
172    and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
173    <path> is encoded using the URL encoding scheme (using %xx to signify
174    the ASCII character with hex code xx).
175
176    The specification specifies that lines are separated by CRLF but
177    for compatibility with the widest range of clients recommends
178    servers also handle LF.  Similarly, whitespace in the request line
179    is treated sensibly (allowing multiple spaces between components
180    and allowing trailing whitespace).
181
182    Similarly, for output, lines ought to be separated by CRLF pairs
183    but most clients grok LF characters just fine.
184
185    If the first line of the request has the form
186
187    <command> <path>
188
189    (i.e. <version> is left out) then this is assumed to be an HTTP
190    0.9 request; this form has no optional headers and data part and
191    the reply consists of just the data.
192
193    The reply form of the HTTP 1.x protocol again has three parts:
194
195    1. One line giving the response code
196    2. An optional set of RFC-822-style headers
197    3. The data
198
199    Again, the headers and data are separated by a blank line.
200
201    The response code line has the form
202
203    <version> <responsecode> <responsestring>
204
205    where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
206    <responsecode> is a 3-digit response code indicating success or
207    failure of the request, and <responsestring> is an optional
208    human-readable string explaining what the response code means.
209
210    This server parses the request and the headers, and then calls a
211    function specific to the request type (<command>).  Specifically,
212    a request SPAM will be handled by a method do_SPAM().  If no
213    such method exists the server sends an error response to the
214    client.  If it exists, it is called with no arguments:
215
216    do_SPAM()
217
218    Note that the request name is case sensitive (i.e. SPAM and spam
219    are different requests).
220
221    The various request details are stored in instance variables:
222
223    - client_address is the client IP address in the form (host,
224    port);
225
226    - command, path and version are the broken-down request line;
227
228    - headers is an instance of email.message.Message (or a derived
229    class) containing the header information;
230
231    - rfile is a file object open for reading positioned at the
232    start of the optional input data part;
233
234    - wfile is a file object open for writing.
235
236    IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
237
238    The first thing to be written must be the response line.  Then
239    follow 0 or more header lines, then a blank line, and then the
240    actual data (if any).  The meaning of the header lines depends on
241    the command executed by the server; in most cases, when data is
242    returned, there should be at least one header line of the form
243
244    Content-type: <type>/<subtype>
245
246    where <type> and <subtype> should be registered MIME types,
247    e.g. "text/html" or "text/plain".
248
249    """
250
251    # The Python system version, truncated to its first component.
252    sys_version = "Python/" + sys.version.split()[0]
253
254    # The server software version.  You may want to override this.
255    # The format is multiple whitespace-separated strings,
256    # where each string is of the form name[/version].
257    server_version = "BaseHTTP/" + __version__
258
259    error_message_format = DEFAULT_ERROR_MESSAGE
260    error_content_type = DEFAULT_ERROR_CONTENT_TYPE
261
262    # The default request version.  This only affects responses up until
263    # the point where the request line is parsed, so it mainly decides what
264    # the client gets back when sending a malformed request line.
265    # Most web servers default to HTTP 0.9, i.e. don't send a status line.
266    default_request_version = "HTTP/0.9"
267
268    def parse_request(self):
269        """Parse a request (internal).
270
271        The request should be stored in self.raw_requestline; the results
272        are in self.command, self.path, self.request_version and
273        self.headers.
274
275        Return True for success, False for failure; on failure, any relevant
276        error response has already been sent back.
277
278        """
279        self.command = None  # set in case of error on the first line
280        self.request_version = version = self.default_request_version
281        self.close_connection = True
282        requestline = str(self.raw_requestline, 'iso-8859-1')
283        requestline = requestline.rstrip('\r\n')
284        self.requestline = requestline
285        words = requestline.split()
286        if len(words) == 0:
287            return False
288
289        if len(words) >= 3:  # Enough to determine protocol version
290            version = words[-1]
291            try:
292                if not version.startswith('HTTP/'):
293                    raise ValueError
294                base_version_number = version.split('/', 1)[1]
295                version_number = base_version_number.split(".")
296                # RFC 2145 section 3.1 says there can be only one "." and
297                #   - major and minor numbers MUST be treated as
298                #      separate integers;
299                #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
300                #      turn is lower than HTTP/12.3;
301                #   - Leading zeros MUST be ignored by recipients.
302                if len(version_number) != 2:
303                    raise ValueError
304                version_number = int(version_number[0]), int(version_number[1])
305            except (ValueError, IndexError):
306                self.send_error(
307                    HTTPStatus.BAD_REQUEST,
308                    "Bad request version (%r)" % version)
309                return False
310            if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
311                self.close_connection = False
312            if version_number >= (2, 0):
313                self.send_error(
314                    HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
315                    "Invalid HTTP version (%s)" % base_version_number)
316                return False
317            self.request_version = version
318
319        if not 2 <= len(words) <= 3:
320            self.send_error(
321                HTTPStatus.BAD_REQUEST,
322                "Bad request syntax (%r)" % requestline)
323            return False
324        command, path = words[:2]
325        if len(words) == 2:
326            self.close_connection = True
327            if command != 'GET':
328                self.send_error(
329                    HTTPStatus.BAD_REQUEST,
330                    "Bad HTTP/0.9 request type (%r)" % command)
331                return False
332        self.command, self.path = command, path
333
334        # Examine the headers and look for a Connection directive.
335        try:
336            self.headers = http.client.parse_headers(self.rfile,
337                                                     _class=self.MessageClass)
338        except http.client.LineTooLong as err:
339            self.send_error(
340                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
341                "Line too long",
342                str(err))
343            return False
344        except http.client.HTTPException as err:
345            self.send_error(
346                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
347                "Too many headers",
348                str(err)
349            )
350            return False
351
352        conntype = self.headers.get('Connection', "")
353        if conntype.lower() == 'close':
354            self.close_connection = True
355        elif (conntype.lower() == 'keep-alive' and
356              self.protocol_version >= "HTTP/1.1"):
357            self.close_connection = False
358        # Examine the headers and look for an Expect directive
359        expect = self.headers.get('Expect', "")
360        if (expect.lower() == "100-continue" and
361                self.protocol_version >= "HTTP/1.1" and
362                self.request_version >= "HTTP/1.1"):
363            if not self.handle_expect_100():
364                return False
365        return True
366
367    def handle_expect_100(self):
368        """Decide what to do with an "Expect: 100-continue" header.
369
370        If the client is expecting a 100 Continue response, we must
371        respond with either a 100 Continue or a final response before
372        waiting for the request body. The default is to always respond
373        with a 100 Continue. You can behave differently (for example,
374        reject unauthorized requests) by overriding this method.
375
376        This method should either return True (possibly after sending
377        a 100 Continue response) or send an error response and return
378        False.
379
380        """
381        self.send_response_only(HTTPStatus.CONTINUE)
382        self.end_headers()
383        return True
384
385    def handle_one_request(self):
386        """Handle a single HTTP request.
387
388        You normally don't need to override this method; see the class
389        __doc__ string for information on how to handle specific HTTP
390        commands such as GET and POST.
391
392        """
393        try:
394            self.raw_requestline = self.rfile.readline(65537)
395            if len(self.raw_requestline) > 65536:
396                self.requestline = ''
397                self.request_version = ''
398                self.command = ''
399                self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
400                return
401            if not self.raw_requestline:
402                self.close_connection = True
403                return
404            if not self.parse_request():
405                # An error code has been sent, just exit
406                return
407            mname = 'do_' + self.command
408            if not hasattr(self, mname):
409                self.send_error(
410                    HTTPStatus.NOT_IMPLEMENTED,
411                    "Unsupported method (%r)" % self.command)
412                return
413            method = getattr(self, mname)
414            method()
415            self.wfile.flush() #actually send the response if not already done.
416        except socket.timeout as e:
417            #a read or a write timed out.  Discard this connection
418            self.log_error("Request timed out: %r", e)
419            self.close_connection = True
420            return
421
422    def handle(self):
423        """Handle multiple requests if necessary."""
424        self.close_connection = True
425
426        self.handle_one_request()
427        while not self.close_connection:
428            self.handle_one_request()
429
430    def send_error(self, code, message=None, explain=None):
431        """Send and log an error reply.
432
433        Arguments are
434        * code:    an HTTP error code
435                   3 digits
436        * message: a simple optional 1 line reason phrase.
437                   *( HTAB / SP / VCHAR / %x80-FF )
438                   defaults to short entry matching the response code
439        * explain: a detailed message defaults to the long entry
440                   matching the response code.
441
442        This sends an error response (so it must be called before any
443        output has been generated), logs the error, and finally sends
444        a piece of HTML explaining the error to the user.
445
446        """
447
448        try:
449            shortmsg, longmsg = self.responses[code]
450        except KeyError:
451            shortmsg, longmsg = '???', '???'
452        if message is None:
453            message = shortmsg
454        if explain is None:
455            explain = longmsg
456        self.log_error("code %d, message %s", code, message)
457        self.send_response(code, message)
458        self.send_header('Connection', 'close')
459
460        # Message body is omitted for cases described in:
461        #  - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
462        #  - RFC7231: 6.3.6. 205(Reset Content)
463        body = None
464        if (code >= 200 and
465            code not in (HTTPStatus.NO_CONTENT,
466                         HTTPStatus.RESET_CONTENT,
467                         HTTPStatus.NOT_MODIFIED)):
468            # HTML encode to prevent Cross Site Scripting attacks
469            # (see bug #1100201)
470            content = (self.error_message_format % {
471                'code': code,
472                'message': html.escape(message, quote=False),
473                'explain': html.escape(explain, quote=False)
474            })
475            body = content.encode('UTF-8', 'replace')
476            self.send_header("Content-Type", self.error_content_type)
477            self.send_header('Content-Length', str(len(body)))
478        self.end_headers()
479
480        if self.command != 'HEAD' and body:
481            self.wfile.write(body)
482
483    def send_response(self, code, message=None):
484        """Add the response header to the headers buffer and log the
485        response code.
486
487        Also send two standard headers with the server software
488        version and the current date.
489
490        """
491        self.log_request(code)
492        self.send_response_only(code, message)
493        self.send_header('Server', self.version_string())
494        self.send_header('Date', self.date_time_string())
495
496    def send_response_only(self, code, message=None):
497        """Send the response header only."""
498        if self.request_version != 'HTTP/0.9':
499            if message is None:
500                if code in self.responses:
501                    message = self.responses[code][0]
502                else:
503                    message = ''
504            if not hasattr(self, '_headers_buffer'):
505                self._headers_buffer = []
506            self._headers_buffer.append(("%s %d %s\r\n" %
507                    (self.protocol_version, code, message)).encode(
508                        'latin-1', 'strict'))
509
510    def send_header(self, keyword, value):
511        """Send a MIME header to the headers buffer."""
512        if self.request_version != 'HTTP/0.9':
513            if not hasattr(self, '_headers_buffer'):
514                self._headers_buffer = []
515            self._headers_buffer.append(
516                ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
517
518        if keyword.lower() == 'connection':
519            if value.lower() == 'close':
520                self.close_connection = True
521            elif value.lower() == 'keep-alive':
522                self.close_connection = False
523
524    def end_headers(self):
525        """Send the blank line ending the MIME headers."""
526        if self.request_version != 'HTTP/0.9':
527            self._headers_buffer.append(b"\r\n")
528            self.flush_headers()
529
530    def flush_headers(self):
531        if hasattr(self, '_headers_buffer'):
532            self.wfile.write(b"".join(self._headers_buffer))
533            self._headers_buffer = []
534
535    def log_request(self, code='-', size='-'):
536        """Log an accepted request.
537
538        This is called by send_response().
539
540        """
541        if isinstance(code, HTTPStatus):
542            code = code.value
543        self.log_message('"%s" %s %s',
544                         self.requestline, str(code), str(size))
545
546    def log_error(self, format, *args):
547        """Log an error.
548
549        This is called when a request cannot be fulfilled.  By
550        default it passes the message on to log_message().
551
552        Arguments are the same as for log_message().
553
554        XXX This should go to the separate error log.
555
556        """
557
558        self.log_message(format, *args)
559
560    def log_message(self, format, *args):
561        """Log an arbitrary message.
562
563        This is used by all other logging functions.  Override
564        it if you have specific logging wishes.
565
566        The first argument, FORMAT, is a format string for the
567        message to be logged.  If the format string contains
568        any % escapes requiring parameters, they should be
569        specified as subsequent arguments (it's just like
570        printf!).
571
572        The client ip and current date/time are prefixed to
573        every message.
574
575        """
576
577        sys.stderr.write("%s - - [%s] %s\n" %
578                         (self.address_string(),
579                          self.log_date_time_string(),
580                          format%args))
581
582    def version_string(self):
583        """Return the server software version string."""
584        return self.server_version + ' ' + self.sys_version
585
586    def date_time_string(self, timestamp=None):
587        """Return the current date and time formatted for a message header."""
588        if timestamp is None:
589            timestamp = time.time()
590        return email.utils.formatdate(timestamp, usegmt=True)
591
592    def log_date_time_string(self):
593        """Return the current time formatted for logging."""
594        now = time.time()
595        year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
596        s = "%02d/%3s/%04d %02d:%02d:%02d" % (
597                day, self.monthname[month], year, hh, mm, ss)
598        return s
599
600    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
601
602    monthname = [None,
603                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
604                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
605
606    def address_string(self):
607        """Return the client address."""
608
609        return self.client_address[0]
610
611    # Essentially static class variables
612
613    # The version of the HTTP protocol we support.
614    # Set this to HTTP/1.1 to enable automatic keepalive
615    protocol_version = "HTTP/1.0"
616
617    # MessageClass used to parse headers
618    MessageClass = http.client.HTTPMessage
619
620    # hack to maintain backwards compatibility
621    responses = {
622        v: (v.phrase, v.description)
623        for v in HTTPStatus.__members__.values()
624    }
625
626
627class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
628
629    """Simple HTTP request handler with GET and HEAD commands.
630
631    This serves files from the current directory and any of its
632    subdirectories.  The MIME type for files is determined by
633    calling the .guess_type() method.
634
635    The GET and HEAD requests are identical except that the HEAD
636    request omits the actual contents of the file.
637
638    """
639
640    server_version = "SimpleHTTP/" + __version__
641
642    def __init__(self, *args, directory=None, **kwargs):
643        if directory is None:
644            directory = os.getcwd()
645        self.directory = directory
646        super().__init__(*args, **kwargs)
647
648    def do_GET(self):
649        """Serve a GET request."""
650        f = self.send_head()
651        if f:
652            try:
653                self.copyfile(f, self.wfile)
654            finally:
655                f.close()
656
657    def do_HEAD(self):
658        """Serve a HEAD request."""
659        f = self.send_head()
660        if f:
661            f.close()
662
663    def send_head(self):
664        """Common code for GET and HEAD commands.
665
666        This sends the response code and MIME headers.
667
668        Return value is either a file object (which has to be copied
669        to the outputfile by the caller unless the command was HEAD,
670        and must be closed by the caller under all circumstances), or
671        None, in which case the caller has nothing further to do.
672
673        """
674        path = self.translate_path(self.path)
675        f = None
676        if os.path.isdir(path):
677            parts = urllib.parse.urlsplit(self.path)
678            if not parts.path.endswith('/'):
679                # redirect browser - doing basically what apache does
680                self.send_response(HTTPStatus.MOVED_PERMANENTLY)
681                new_parts = (parts[0], parts[1], parts[2] + '/',
682                             parts[3], parts[4])
683                new_url = urllib.parse.urlunsplit(new_parts)
684                self.send_header("Location", new_url)
685                self.end_headers()
686                return None
687            for index in "index.html", "index.htm":
688                index = os.path.join(path, index)
689                if os.path.exists(index):
690                    path = index
691                    break
692            else:
693                return self.list_directory(path)
694        ctype = self.guess_type(path)
695        # check for trailing "/" which should return 404. See Issue17324
696        # The test for this was added in test_httpserver.py
697        # However, some OS platforms accept a trailingSlash as a filename
698        # See discussion on python-dev and Issue34711 regarding
699        # parseing and rejection of filenames with a trailing slash
700        if path.endswith("/"):
701            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
702            return None
703        try:
704            f = open(path, 'rb')
705        except OSError:
706            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
707            return None
708
709        try:
710            fs = os.fstat(f.fileno())
711            # Use browser cache if possible
712            if ("If-Modified-Since" in self.headers
713                    and "If-None-Match" not in self.headers):
714                # compare If-Modified-Since and time of last file modification
715                try:
716                    ims = email.utils.parsedate_to_datetime(
717                        self.headers["If-Modified-Since"])
718                except (TypeError, IndexError, OverflowError, ValueError):
719                    # ignore ill-formed values
720                    pass
721                else:
722                    if ims.tzinfo is None:
723                        # obsolete format with no timezone, cf.
724                        # https://tools.ietf.org/html/rfc7231#section-7.1.1.1
725                        ims = ims.replace(tzinfo=datetime.timezone.utc)
726                    if ims.tzinfo is datetime.timezone.utc:
727                        # compare to UTC datetime of last modification
728                        last_modif = datetime.datetime.fromtimestamp(
729                            fs.st_mtime, datetime.timezone.utc)
730                        # remove microseconds, like in If-Modified-Since
731                        last_modif = last_modif.replace(microsecond=0)
732
733                        if last_modif <= ims:
734                            self.send_response(HTTPStatus.NOT_MODIFIED)
735                            self.end_headers()
736                            f.close()
737                            return None
738
739            self.send_response(HTTPStatus.OK)
740            self.send_header("Content-type", ctype)
741            self.send_header("Content-Length", str(fs[6]))
742            self.send_header("Last-Modified",
743                self.date_time_string(fs.st_mtime))
744            self.end_headers()
745            return f
746        except:
747            f.close()
748            raise
749
750    def list_directory(self, path):
751        """Helper to produce a directory listing (absent index.html).
752
753        Return value is either a file object, or None (indicating an
754        error).  In either case, the headers are sent, making the
755        interface the same as for send_head().
756
757        """
758        try:
759            list = os.listdir(path)
760        except OSError:
761            self.send_error(
762                HTTPStatus.NOT_FOUND,
763                "No permission to list directory")
764            return None
765        list.sort(key=lambda a: a.lower())
766        r = []
767        try:
768            displaypath = urllib.parse.unquote(self.path,
769                                               errors='surrogatepass')
770        except UnicodeDecodeError:
771            displaypath = urllib.parse.unquote(path)
772        displaypath = html.escape(displaypath, quote=False)
773        enc = sys.getfilesystemencoding()
774        title = 'Directory listing for %s' % displaypath
775        r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
776                 '"http://www.w3.org/TR/html4/strict.dtd">')
777        r.append('<html>\n<head>')
778        r.append('<meta http-equiv="Content-Type" '
779                 'content="text/html; charset=%s">' % enc)
780        r.append('<title>%s</title>\n</head>' % title)
781        r.append('<body>\n<h1>%s</h1>' % title)
782        r.append('<hr>\n<ul>')
783        for name in list:
784            fullname = os.path.join(path, name)
785            displayname = linkname = name
786            # Append / for directories or @ for symbolic links
787            if os.path.isdir(fullname):
788                displayname = name + "/"
789                linkname = name + "/"
790            if os.path.islink(fullname):
791                displayname = name + "@"
792                # Note: a link to a directory displays with @ and links with /
793            r.append('<li><a href="%s">%s</a></li>'
794                    % (urllib.parse.quote(linkname,
795                                          errors='surrogatepass'),
796                       html.escape(displayname, quote=False)))
797        r.append('</ul>\n<hr>\n</body>\n</html>\n')
798        encoded = '\n'.join(r).encode(enc, 'surrogateescape')
799        f = io.BytesIO()
800        f.write(encoded)
801        f.seek(0)
802        self.send_response(HTTPStatus.OK)
803        self.send_header("Content-type", "text/html; charset=%s" % enc)
804        self.send_header("Content-Length", str(len(encoded)))
805        self.end_headers()
806        return f
807
808    def translate_path(self, path):
809        """Translate a /-separated PATH to the local filename syntax.
810
811        Components that mean special things to the local file system
812        (e.g. drive or directory names) are ignored.  (XXX They should
813        probably be diagnosed.)
814
815        """
816        # abandon query parameters
817        path = path.split('?',1)[0]
818        path = path.split('#',1)[0]
819        # Don't forget explicit trailing slash when normalizing. Issue17324
820        trailing_slash = path.rstrip().endswith('/')
821        try:
822            path = urllib.parse.unquote(path, errors='surrogatepass')
823        except UnicodeDecodeError:
824            path = urllib.parse.unquote(path)
825        path = posixpath.normpath(path)
826        words = path.split('/')
827        words = filter(None, words)
828        path = self.directory
829        for word in words:
830            if os.path.dirname(word) or word in (os.curdir, os.pardir):
831                # Ignore components that are not a simple file/directory name
832                continue
833            path = os.path.join(path, word)
834        if trailing_slash:
835            path += '/'
836        return path
837
838    def copyfile(self, source, outputfile):
839        """Copy all data between two file objects.
840
841        The SOURCE argument is a file object open for reading
842        (or anything with a read() method) and the DESTINATION
843        argument is a file object open for writing (or
844        anything with a write() method).
845
846        The only reason for overriding this would be to change
847        the block size or perhaps to replace newlines by CRLF
848        -- note however that this the default server uses this
849        to copy binary data as well.
850
851        """
852        shutil.copyfileobj(source, outputfile)
853
854    def guess_type(self, path):
855        """Guess the type of a file.
856
857        Argument is a PATH (a filename).
858
859        Return value is a string of the form type/subtype,
860        usable for a MIME Content-type header.
861
862        The default implementation looks the file's extension
863        up in the table self.extensions_map, using application/octet-stream
864        as a default; however it would be permissible (if
865        slow) to look inside the data to make a better guess.
866
867        """
868
869        base, ext = posixpath.splitext(path)
870        if ext in self.extensions_map:
871            return self.extensions_map[ext]
872        ext = ext.lower()
873        if ext in self.extensions_map:
874            return self.extensions_map[ext]
875        else:
876            return self.extensions_map['']
877
878    if not mimetypes.inited:
879        mimetypes.init() # try to read system mime.types
880    extensions_map = mimetypes.types_map.copy()
881    extensions_map.update({
882        '': 'application/octet-stream', # Default
883        '.py': 'text/plain',
884        '.c': 'text/plain',
885        '.h': 'text/plain',
886        })
887
888
889# Utilities for CGIHTTPRequestHandler
890
891def _url_collapse_path(path):
892    """
893    Given a URL path, remove extra '/'s and '.' path elements and collapse
894    any '..' references and returns a collapsed path.
895
896    Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
897    The utility of this function is limited to is_cgi method and helps
898    preventing some security attacks.
899
900    Returns: The reconstituted URL, which will always start with a '/'.
901
902    Raises: IndexError if too many '..' occur within the path.
903
904    """
905    # Query component should not be involved.
906    path, _, query = path.partition('?')
907    path = urllib.parse.unquote(path)
908
909    # Similar to os.path.split(os.path.normpath(path)) but specific to URL
910    # path semantics rather than local operating system semantics.
911    path_parts = path.split('/')
912    head_parts = []
913    for part in path_parts[:-1]:
914        if part == '..':
915            head_parts.pop() # IndexError if more '..' than prior parts
916        elif part and part != '.':
917            head_parts.append( part )
918    if path_parts:
919        tail_part = path_parts.pop()
920        if tail_part:
921            if tail_part == '..':
922                head_parts.pop()
923                tail_part = ''
924            elif tail_part == '.':
925                tail_part = ''
926    else:
927        tail_part = ''
928
929    if query:
930        tail_part = '?'.join((tail_part, query))
931
932    splitpath = ('/' + '/'.join(head_parts), tail_part)
933    collapsed_path = "/".join(splitpath)
934
935    return collapsed_path
936
937
938
939nobody = None
940
941def nobody_uid():
942    """Internal routine to get nobody's uid"""
943    global nobody
944    if nobody:
945        return nobody
946    try:
947        import pwd
948    except ImportError:
949        return -1
950    try:
951        nobody = pwd.getpwnam('nobody')[2]
952    except KeyError:
953        nobody = 1 + max(x[2] for x in pwd.getpwall())
954    return nobody
955
956
957def executable(path):
958    """Test for executable file."""
959    return os.access(path, os.X_OK)
960
961
962class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
963
964    """Complete HTTP server with GET, HEAD and POST commands.
965
966    GET and HEAD also support running CGI scripts.
967
968    The POST command is *only* implemented for CGI scripts.
969
970    """
971
972    # Determine platform specifics
973    have_fork = hasattr(os, 'fork')
974
975    # Make rfile unbuffered -- we need to read one line and then pass
976    # the rest to a subprocess, so we can't use buffered input.
977    rbufsize = 0
978
979    def do_POST(self):
980        """Serve a POST request.
981
982        This is only implemented for CGI scripts.
983
984        """
985
986        if self.is_cgi():
987            self.run_cgi()
988        else:
989            self.send_error(
990                HTTPStatus.NOT_IMPLEMENTED,
991                "Can only POST to CGI scripts")
992
993    def send_head(self):
994        """Version of send_head that support CGI scripts"""
995        if self.is_cgi():
996            return self.run_cgi()
997        else:
998            return SimpleHTTPRequestHandler.send_head(self)
999
1000    def is_cgi(self):
1001        """Test whether self.path corresponds to a CGI script.
1002
1003        Returns True and updates the cgi_info attribute to the tuple
1004        (dir, rest) if self.path requires running a CGI script.
1005        Returns False otherwise.
1006
1007        If any exception is raised, the caller should assume that
1008        self.path was rejected as invalid and act accordingly.
1009
1010        The default implementation tests whether the normalized url
1011        path begins with one of the strings in self.cgi_directories
1012        (and the next character is a '/' or the end of the string).
1013
1014        """
1015        collapsed_path = _url_collapse_path(self.path)
1016        dir_sep = collapsed_path.find('/', 1)
1017        head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
1018        if head in self.cgi_directories:
1019            self.cgi_info = head, tail
1020            return True
1021        return False
1022
1023
1024    cgi_directories = ['/cgi-bin', '/htbin']
1025
1026    def is_executable(self, path):
1027        """Test whether argument path is an executable file."""
1028        return executable(path)
1029
1030    def is_python(self, path):
1031        """Test whether argument path is a Python script."""
1032        head, tail = os.path.splitext(path)
1033        return tail.lower() in (".py", ".pyw")
1034
1035    def run_cgi(self):
1036        """Execute a CGI script."""
1037        dir, rest = self.cgi_info
1038        path = dir + '/' + rest
1039        i = path.find('/', len(dir)+1)
1040        while i >= 0:
1041            nextdir = path[:i]
1042            nextrest = path[i+1:]
1043
1044            scriptdir = self.translate_path(nextdir)
1045            if os.path.isdir(scriptdir):
1046                dir, rest = nextdir, nextrest
1047                i = path.find('/', len(dir)+1)
1048            else:
1049                break
1050
1051        # find an explicit query string, if present.
1052        rest, _, query = rest.partition('?')
1053
1054        # dissect the part after the directory name into a script name &
1055        # a possible additional path, to be stored in PATH_INFO.
1056        i = rest.find('/')
1057        if i >= 0:
1058            script, rest = rest[:i], rest[i:]
1059        else:
1060            script, rest = rest, ''
1061
1062        scriptname = dir + '/' + script
1063        scriptfile = self.translate_path(scriptname)
1064        if not os.path.exists(scriptfile):
1065            self.send_error(
1066                HTTPStatus.NOT_FOUND,
1067                "No such CGI script (%r)" % scriptname)
1068            return
1069        if not os.path.isfile(scriptfile):
1070            self.send_error(
1071                HTTPStatus.FORBIDDEN,
1072                "CGI script is not a plain file (%r)" % scriptname)
1073            return
1074        ispy = self.is_python(scriptname)
1075        if self.have_fork or not ispy:
1076            if not self.is_executable(scriptfile):
1077                self.send_error(
1078                    HTTPStatus.FORBIDDEN,
1079                    "CGI script is not executable (%r)" % scriptname)
1080                return
1081
1082        # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1083        # XXX Much of the following could be prepared ahead of time!
1084        env = copy.deepcopy(os.environ)
1085        env['SERVER_SOFTWARE'] = self.version_string()
1086        env['SERVER_NAME'] = self.server.server_name
1087        env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1088        env['SERVER_PROTOCOL'] = self.protocol_version
1089        env['SERVER_PORT'] = str(self.server.server_port)
1090        env['REQUEST_METHOD'] = self.command
1091        uqrest = urllib.parse.unquote(rest)
1092        env['PATH_INFO'] = uqrest
1093        env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1094        env['SCRIPT_NAME'] = scriptname
1095        if query:
1096            env['QUERY_STRING'] = query
1097        env['REMOTE_ADDR'] = self.client_address[0]
1098        authorization = self.headers.get("authorization")
1099        if authorization:
1100            authorization = authorization.split()
1101            if len(authorization) == 2:
1102                import base64, binascii
1103                env['AUTH_TYPE'] = authorization[0]
1104                if authorization[0].lower() == "basic":
1105                    try:
1106                        authorization = authorization[1].encode('ascii')
1107                        authorization = base64.decodebytes(authorization).\
1108                                        decode('ascii')
1109                    except (binascii.Error, UnicodeError):
1110                        pass
1111                    else:
1112                        authorization = authorization.split(':')
1113                        if len(authorization) == 2:
1114                            env['REMOTE_USER'] = authorization[0]
1115        # XXX REMOTE_IDENT
1116        if self.headers.get('content-type') is None:
1117            env['CONTENT_TYPE'] = self.headers.get_content_type()
1118        else:
1119            env['CONTENT_TYPE'] = self.headers['content-type']
1120        length = self.headers.get('content-length')
1121        if length:
1122            env['CONTENT_LENGTH'] = length
1123        referer = self.headers.get('referer')
1124        if referer:
1125            env['HTTP_REFERER'] = referer
1126        accept = []
1127        for line in self.headers.getallmatchingheaders('accept'):
1128            if line[:1] in "\t\n\r ":
1129                accept.append(line.strip())
1130            else:
1131                accept = accept + line[7:].split(',')
1132        env['HTTP_ACCEPT'] = ','.join(accept)
1133        ua = self.headers.get('user-agent')
1134        if ua:
1135            env['HTTP_USER_AGENT'] = ua
1136        co = filter(None, self.headers.get_all('cookie', []))
1137        cookie_str = ', '.join(co)
1138        if cookie_str:
1139            env['HTTP_COOKIE'] = cookie_str
1140        # XXX Other HTTP_* headers
1141        # Since we're setting the env in the parent, provide empty
1142        # values to override previously set values
1143        for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1144                  'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1145            env.setdefault(k, "")
1146
1147        self.send_response(HTTPStatus.OK, "Script output follows")
1148        self.flush_headers()
1149
1150        decoded_query = query.replace('+', ' ')
1151
1152        if self.have_fork:
1153            # Unix -- fork as we should
1154            args = [script]
1155            if '=' not in decoded_query:
1156                args.append(decoded_query)
1157            nobody = nobody_uid()
1158            self.wfile.flush() # Always flush before forking
1159            pid = os.fork()
1160            if pid != 0:
1161                # Parent
1162                pid, sts = os.waitpid(pid, 0)
1163                # throw away additional data [see bug #427345]
1164                while select.select([self.rfile], [], [], 0)[0]:
1165                    if not self.rfile.read(1):
1166                        break
1167                if sts:
1168                    self.log_error("CGI script exit status %#x", sts)
1169                return
1170            # Child
1171            try:
1172                try:
1173                    os.setuid(nobody)
1174                except OSError:
1175                    pass
1176                os.dup2(self.rfile.fileno(), 0)
1177                os.dup2(self.wfile.fileno(), 1)
1178                os.execve(scriptfile, args, env)
1179            except:
1180                self.server.handle_error(self.request, self.client_address)
1181                os._exit(127)
1182
1183        else:
1184            # Non-Unix -- use subprocess
1185            import subprocess
1186            cmdline = [scriptfile]
1187            if self.is_python(scriptfile):
1188                interp = sys.executable
1189                if interp.lower().endswith("w.exe"):
1190                    # On Windows, use python.exe, not pythonw.exe
1191                    interp = interp[:-5] + interp[-4:]
1192                cmdline = [interp, '-u'] + cmdline
1193            if '=' not in query:
1194                cmdline.append(query)
1195            self.log_message("command: %s", subprocess.list2cmdline(cmdline))
1196            try:
1197                nbytes = int(length)
1198            except (TypeError, ValueError):
1199                nbytes = 0
1200            p = subprocess.Popen(cmdline,
1201                                 stdin=subprocess.PIPE,
1202                                 stdout=subprocess.PIPE,
1203                                 stderr=subprocess.PIPE,
1204                                 env = env
1205                                 )
1206            if self.command.lower() == "post" and nbytes > 0:
1207                data = self.rfile.read(nbytes)
1208            else:
1209                data = None
1210            # throw away additional data [see bug #427345]
1211            while select.select([self.rfile._sock], [], [], 0)[0]:
1212                if not self.rfile._sock.recv(1):
1213                    break
1214            stdout, stderr = p.communicate(data)
1215            self.wfile.write(stdout)
1216            if stderr:
1217                self.log_error('%s', stderr)
1218            p.stderr.close()
1219            p.stdout.close()
1220            status = p.returncode
1221            if status:
1222                self.log_error("CGI script exit status %#x", status)
1223            else:
1224                self.log_message("CGI script exited OK")
1225
1226
1227def _get_best_family(*address):
1228    infos = socket.getaddrinfo(
1229        *address,
1230        type=socket.SOCK_STREAM,
1231        flags=socket.AI_PASSIVE,
1232    )
1233    family, type, proto, canonname, sockaddr = next(iter(infos))
1234    return family, sockaddr
1235
1236
1237def test(HandlerClass=BaseHTTPRequestHandler,
1238         ServerClass=ThreadingHTTPServer,
1239         protocol="HTTP/1.0", port=8000, bind=None):
1240    """Test the HTTP request handler class.
1241
1242    This runs an HTTP server on port 8000 (or the port argument).
1243
1244    """
1245    ServerClass.address_family, addr = _get_best_family(bind, port)
1246
1247    HandlerClass.protocol_version = protocol
1248    with ServerClass(addr, HandlerClass) as httpd:
1249        host, port = httpd.socket.getsockname()[:2]
1250        url_host = f'[{host}]' if ':' in host else host
1251        print(
1252            f"Serving HTTP on {host} port {port} "
1253            f"(http://{url_host}:{port}/) ..."
1254        )
1255        try:
1256            httpd.serve_forever()
1257        except KeyboardInterrupt:
1258            print("\nKeyboard interrupt received, exiting.")
1259            sys.exit(0)
1260
1261if __name__ == '__main__':
1262    import argparse
1263
1264    parser = argparse.ArgumentParser()
1265    parser.add_argument('--cgi', action='store_true',
1266                       help='Run as CGI Server')
1267    parser.add_argument('--bind', '-b', metavar='ADDRESS',
1268                        help='Specify alternate bind address '
1269                             '[default: all interfaces]')
1270    parser.add_argument('--directory', '-d', default=os.getcwd(),
1271                        help='Specify alternative directory '
1272                        '[default:current directory]')
1273    parser.add_argument('port', action='store',
1274                        default=8000, type=int,
1275                        nargs='?',
1276                        help='Specify alternate port [default: 8000]')
1277    args = parser.parse_args()
1278    if args.cgi:
1279        handler_class = CGIHTTPRequestHandler
1280    else:
1281        handler_class = partial(SimpleHTTPRequestHandler,
1282                                directory=args.directory)
1283    test(HandlerClass=handler_class, port=args.port, bind=args.bind)
1284