• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""HTTP server classes.
2
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and CGIHTTPRequestHandler for CGI scripts.
6
7It does, however, optionally implement HTTP/1.1 persistent connections,
8as of version 0.3.
9
10Notes on CGIHTTPRequestHandler
11------------------------------
12
13This class implements GET and POST requests to cgi-bin scripts.
14
15If the os.fork() function is not present (e.g. on Windows),
16subprocess.Popen() is used as a fallback, with slightly altered semantics.
17
18In all cases, the implementation is intentionally naive -- all
19requests are executed synchronously.
20
21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22-- it may execute arbitrary Python code or external programs.
23
24Note that status code 200 is sent prior to execution of a CGI script, so
25scripts cannot send other status codes such as 302 (redirect).
26
27XXX To do:
28
29- log requests even later (to capture byte count)
30- log user-agent header and other interesting goodies
31- send error log to separate file
32"""
33
34
35# See also:
36#
37# HTTP Working Group                                        T. Berners-Lee
38# INTERNET-DRAFT                                            R. T. Fielding
39# <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
40# Expires September 8, 1995                                  March 8, 1995
41#
42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43#
44# and
45#
46# Network Working Group                                      R. Fielding
47# Request for Comments: 2616                                       et al
48# Obsoletes: 2068                                              June 1999
49# Category: Standards Track
50#
51# URL: http://www.faqs.org/rfcs/rfc2616.html
52
53# Log files
54# ---------
55#
56# Here's a quote from the NCSA httpd docs about log file format.
57#
58# | The logfile format is as follows. Each line consists of:
59# |
60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61# |
62# |        host: Either the DNS name or the IP number of the remote client
63# |        rfc931: Any information returned by identd for this person,
64# |                - otherwise.
65# |        authuser: If user sent a userid for authentication, the user name,
66# |                  - otherwise.
67# |        DD: Day
68# |        Mon: Month (calendar name)
69# |        YYYY: Year
70# |        hh: hour (24-hour format, the machine's timezone)
71# |        mm: minutes
72# |        ss: seconds
73# |        request: The first line of the HTTP request as sent by the client.
74# |        ddd: the status code returned by the server, - if not available.
75# |        bbbb: the total number of bytes sent,
76# |              *not including the HTTP/1.0 header*, - if not available
77# |
78# | You can determine the name of the file accessed through request.
79#
80# (Actually, the latter is only true if you know the server configuration
81# at the time the request was made!)
82
83__version__ = "0.6"
84
85__all__ = [
86    "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",
87    "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
88]
89
90import copy
91import datetime
92import email.utils
93import html
94import http.client
95import io
96import mimetypes
97import os
98import posixpath
99import select
100import shutil
101import socket # For gethostbyaddr()
102import socketserver
103import sys
104import time
105import urllib.parse
106
107from http import HTTPStatus
108
109
110# Default error message template
111DEFAULT_ERROR_MESSAGE = """\
112<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
113        "http://www.w3.org/TR/html4/strict.dtd">
114<html>
115    <head>
116        <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
117        <title>Error response</title>
118    </head>
119    <body>
120        <h1>Error response</h1>
121        <p>Error code: %(code)d</p>
122        <p>Message: %(message)s.</p>
123        <p>Error code explanation: %(code)s - %(explain)s.</p>
124    </body>
125</html>
126"""
127
128DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
129
130class HTTPServer(socketserver.TCPServer):
131
132    allow_reuse_address = 1    # Seems to make sense in testing environment
133
134    def server_bind(self):
135        """Override server_bind to store the server name."""
136        socketserver.TCPServer.server_bind(self)
137        host, port = self.server_address[:2]
138        self.server_name = socket.getfqdn(host)
139        self.server_port = port
140
141
142class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer):
143    daemon_threads = True
144
145
146class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
147
148    """HTTP request handler base class.
149
150    The following explanation of HTTP serves to guide you through the
151    code as well as to expose any misunderstandings I may have about
152    HTTP (so you don't need to read the code to figure out I'm wrong
153    :-).
154
155    HTTP (HyperText Transfer Protocol) is an extensible protocol on
156    top of a reliable stream transport (e.g. TCP/IP).  The protocol
157    recognizes three parts to a request:
158
159    1. One line identifying the request type and path
160    2. An optional set of RFC-822-style headers
161    3. An optional data part
162
163    The headers and data are separated by a blank line.
164
165    The first line of the request has the form
166
167    <command> <path> <version>
168
169    where <command> is a (case-sensitive) keyword such as GET or POST,
170    <path> is a string containing path information for the request,
171    and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
172    <path> is encoded using the URL encoding scheme (using %xx to signify
173    the ASCII character with hex code xx).
174
175    The specification specifies that lines are separated by CRLF but
176    for compatibility with the widest range of clients recommends
177    servers also handle LF.  Similarly, whitespace in the request line
178    is treated sensibly (allowing multiple spaces between components
179    and allowing trailing whitespace).
180
181    Similarly, for output, lines ought to be separated by CRLF pairs
182    but most clients grok LF characters just fine.
183
184    If the first line of the request has the form
185
186    <command> <path>
187
188    (i.e. <version> is left out) then this is assumed to be an HTTP
189    0.9 request; this form has no optional headers and data part and
190    the reply consists of just the data.
191
192    The reply form of the HTTP 1.x protocol again has three parts:
193
194    1. One line giving the response code
195    2. An optional set of RFC-822-style headers
196    3. The data
197
198    Again, the headers and data are separated by a blank line.
199
200    The response code line has the form
201
202    <version> <responsecode> <responsestring>
203
204    where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
205    <responsecode> is a 3-digit response code indicating success or
206    failure of the request, and <responsestring> is an optional
207    human-readable string explaining what the response code means.
208
209    This server parses the request and the headers, and then calls a
210    function specific to the request type (<command>).  Specifically,
211    a request SPAM will be handled by a method do_SPAM().  If no
212    such method exists the server sends an error response to the
213    client.  If it exists, it is called with no arguments:
214
215    do_SPAM()
216
217    Note that the request name is case sensitive (i.e. SPAM and spam
218    are different requests).
219
220    The various request details are stored in instance variables:
221
222    - client_address is the client IP address in the form (host,
223    port);
224
225    - command, path and version are the broken-down request line;
226
227    - headers is an instance of email.message.Message (or a derived
228    class) containing the header information;
229
230    - rfile is a file object open for reading positioned at the
231    start of the optional input data part;
232
233    - wfile is a file object open for writing.
234
235    IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
236
237    The first thing to be written must be the response line.  Then
238    follow 0 or more header lines, then a blank line, and then the
239    actual data (if any).  The meaning of the header lines depends on
240    the command executed by the server; in most cases, when data is
241    returned, there should be at least one header line of the form
242
243    Content-type: <type>/<subtype>
244
245    where <type> and <subtype> should be registered MIME types,
246    e.g. "text/html" or "text/plain".
247
248    """
249
250    # The Python system version, truncated to its first component.
251    sys_version = "Python/" + sys.version.split()[0]
252
253    # The server software version.  You may want to override this.
254    # The format is multiple whitespace-separated strings,
255    # where each string is of the form name[/version].
256    server_version = "BaseHTTP/" + __version__
257
258    error_message_format = DEFAULT_ERROR_MESSAGE
259    error_content_type = DEFAULT_ERROR_CONTENT_TYPE
260
261    # The default request version.  This only affects responses up until
262    # the point where the request line is parsed, so it mainly decides what
263    # the client gets back when sending a malformed request line.
264    # Most web servers default to HTTP 0.9, i.e. don't send a status line.
265    default_request_version = "HTTP/0.9"
266
267    def parse_request(self):
268        """Parse a request (internal).
269
270        The request should be stored in self.raw_requestline; the results
271        are in self.command, self.path, self.request_version and
272        self.headers.
273
274        Return True for success, False for failure; on failure, any relevant
275        error response has already been sent back.
276
277        """
278        self.command = None  # set in case of error on the first line
279        self.request_version = version = self.default_request_version
280        self.close_connection = True
281        requestline = str(self.raw_requestline, 'iso-8859-1')
282        requestline = requestline.rstrip('\r\n')
283        self.requestline = requestline
284        words = requestline.split()
285        if len(words) == 0:
286            return False
287
288        if len(words) >= 3:  # Enough to determine protocol version
289            version = words[-1]
290            try:
291                if not version.startswith('HTTP/'):
292                    raise ValueError
293                base_version_number = version.split('/', 1)[1]
294                version_number = base_version_number.split(".")
295                # RFC 2145 section 3.1 says there can be only one "." and
296                #   - major and minor numbers MUST be treated as
297                #      separate integers;
298                #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
299                #      turn is lower than HTTP/12.3;
300                #   - Leading zeros MUST be ignored by recipients.
301                if len(version_number) != 2:
302                    raise ValueError
303                version_number = int(version_number[0]), int(version_number[1])
304            except (ValueError, IndexError):
305                self.send_error(
306                    HTTPStatus.BAD_REQUEST,
307                    "Bad request version (%r)" % version)
308                return False
309            if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
310                self.close_connection = False
311            if version_number >= (2, 0):
312                self.send_error(
313                    HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
314                    "Invalid HTTP version (%s)" % base_version_number)
315                return False
316            self.request_version = version
317
318        if not 2 <= len(words) <= 3:
319            self.send_error(
320                HTTPStatus.BAD_REQUEST,
321                "Bad request syntax (%r)" % requestline)
322            return False
323        command, path = words[:2]
324        if len(words) == 2:
325            self.close_connection = True
326            if command != 'GET':
327                self.send_error(
328                    HTTPStatus.BAD_REQUEST,
329                    "Bad HTTP/0.9 request type (%r)" % command)
330                return False
331        self.command, self.path = command, path
332
333        # Examine the headers and look for a Connection directive.
334        try:
335            self.headers = http.client.parse_headers(self.rfile,
336                                                     _class=self.MessageClass)
337        except http.client.LineTooLong as err:
338            self.send_error(
339                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
340                "Line too long",
341                str(err))
342            return False
343        except http.client.HTTPException as err:
344            self.send_error(
345                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
346                "Too many headers",
347                str(err)
348            )
349            return False
350
351        conntype = self.headers.get('Connection', "")
352        if conntype.lower() == 'close':
353            self.close_connection = True
354        elif (conntype.lower() == 'keep-alive' and
355              self.protocol_version >= "HTTP/1.1"):
356            self.close_connection = False
357        # Examine the headers and look for an Expect directive
358        expect = self.headers.get('Expect', "")
359        if (expect.lower() == "100-continue" and
360                self.protocol_version >= "HTTP/1.1" and
361                self.request_version >= "HTTP/1.1"):
362            if not self.handle_expect_100():
363                return False
364        return True
365
366    def handle_expect_100(self):
367        """Decide what to do with an "Expect: 100-continue" header.
368
369        If the client is expecting a 100 Continue response, we must
370        respond with either a 100 Continue or a final response before
371        waiting for the request body. The default is to always respond
372        with a 100 Continue. You can behave differently (for example,
373        reject unauthorized requests) by overriding this method.
374
375        This method should either return True (possibly after sending
376        a 100 Continue response) or send an error response and return
377        False.
378
379        """
380        self.send_response_only(HTTPStatus.CONTINUE)
381        self.end_headers()
382        return True
383
384    def handle_one_request(self):
385        """Handle a single HTTP request.
386
387        You normally don't need to override this method; see the class
388        __doc__ string for information on how to handle specific HTTP
389        commands such as GET and POST.
390
391        """
392        try:
393            self.raw_requestline = self.rfile.readline(65537)
394            if len(self.raw_requestline) > 65536:
395                self.requestline = ''
396                self.request_version = ''
397                self.command = ''
398                self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
399                return
400            if not self.raw_requestline:
401                self.close_connection = True
402                return
403            if not self.parse_request():
404                # An error code has been sent, just exit
405                return
406            mname = 'do_' + self.command
407            if not hasattr(self, mname):
408                self.send_error(
409                    HTTPStatus.NOT_IMPLEMENTED,
410                    "Unsupported method (%r)" % self.command)
411                return
412            method = getattr(self, mname)
413            method()
414            self.wfile.flush() #actually send the response if not already done.
415        except TimeoutError as e:
416            #a read or a write timed out.  Discard this connection
417            self.log_error("Request timed out: %r", e)
418            self.close_connection = True
419            return
420
421    def handle(self):
422        """Handle multiple requests if necessary."""
423        self.close_connection = True
424
425        self.handle_one_request()
426        while not self.close_connection:
427            self.handle_one_request()
428
429    def send_error(self, code, message=None, explain=None):
430        """Send and log an error reply.
431
432        Arguments are
433        * code:    an HTTP error code
434                   3 digits
435        * message: a simple optional 1 line reason phrase.
436                   *( HTAB / SP / VCHAR / %x80-FF )
437                   defaults to short entry matching the response code
438        * explain: a detailed message defaults to the long entry
439                   matching the response code.
440
441        This sends an error response (so it must be called before any
442        output has been generated), logs the error, and finally sends
443        a piece of HTML explaining the error to the user.
444
445        """
446
447        try:
448            shortmsg, longmsg = self.responses[code]
449        except KeyError:
450            shortmsg, longmsg = '???', '???'
451        if message is None:
452            message = shortmsg
453        if explain is None:
454            explain = longmsg
455        self.log_error("code %d, message %s", code, message)
456        self.send_response(code, message)
457        self.send_header('Connection', 'close')
458
459        # Message body is omitted for cases described in:
460        #  - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
461        #  - RFC7231: 6.3.6. 205(Reset Content)
462        body = None
463        if (code >= 200 and
464            code not in (HTTPStatus.NO_CONTENT,
465                         HTTPStatus.RESET_CONTENT,
466                         HTTPStatus.NOT_MODIFIED)):
467            # HTML encode to prevent Cross Site Scripting attacks
468            # (see bug #1100201)
469            content = (self.error_message_format % {
470                'code': code,
471                'message': html.escape(message, quote=False),
472                'explain': html.escape(explain, quote=False)
473            })
474            body = content.encode('UTF-8', 'replace')
475            self.send_header("Content-Type", self.error_content_type)
476            self.send_header('Content-Length', str(len(body)))
477        self.end_headers()
478
479        if self.command != 'HEAD' and body:
480            self.wfile.write(body)
481
482    def send_response(self, code, message=None):
483        """Add the response header to the headers buffer and log the
484        response code.
485
486        Also send two standard headers with the server software
487        version and the current date.
488
489        """
490        self.log_request(code)
491        self.send_response_only(code, message)
492        self.send_header('Server', self.version_string())
493        self.send_header('Date', self.date_time_string())
494
495    def send_response_only(self, code, message=None):
496        """Send the response header only."""
497        if self.request_version != 'HTTP/0.9':
498            if message is None:
499                if code in self.responses:
500                    message = self.responses[code][0]
501                else:
502                    message = ''
503            if not hasattr(self, '_headers_buffer'):
504                self._headers_buffer = []
505            self._headers_buffer.append(("%s %d %s\r\n" %
506                    (self.protocol_version, code, message)).encode(
507                        'latin-1', 'strict'))
508
509    def send_header(self, keyword, value):
510        """Send a MIME header to the headers buffer."""
511        if self.request_version != 'HTTP/0.9':
512            if not hasattr(self, '_headers_buffer'):
513                self._headers_buffer = []
514            self._headers_buffer.append(
515                ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
516
517        if keyword.lower() == 'connection':
518            if value.lower() == 'close':
519                self.close_connection = True
520            elif value.lower() == 'keep-alive':
521                self.close_connection = False
522
523    def end_headers(self):
524        """Send the blank line ending the MIME headers."""
525        if self.request_version != 'HTTP/0.9':
526            self._headers_buffer.append(b"\r\n")
527            self.flush_headers()
528
529    def flush_headers(self):
530        if hasattr(self, '_headers_buffer'):
531            self.wfile.write(b"".join(self._headers_buffer))
532            self._headers_buffer = []
533
534    def log_request(self, code='-', size='-'):
535        """Log an accepted request.
536
537        This is called by send_response().
538
539        """
540        if isinstance(code, HTTPStatus):
541            code = code.value
542        self.log_message('"%s" %s %s',
543                         self.requestline, str(code), str(size))
544
545    def log_error(self, format, *args):
546        """Log an error.
547
548        This is called when a request cannot be fulfilled.  By
549        default it passes the message on to log_message().
550
551        Arguments are the same as for log_message().
552
553        XXX This should go to the separate error log.
554
555        """
556
557        self.log_message(format, *args)
558
559    def log_message(self, format, *args):
560        """Log an arbitrary message.
561
562        This is used by all other logging functions.  Override
563        it if you have specific logging wishes.
564
565        The first argument, FORMAT, is a format string for the
566        message to be logged.  If the format string contains
567        any % escapes requiring parameters, they should be
568        specified as subsequent arguments (it's just like
569        printf!).
570
571        The client ip and current date/time are prefixed to
572        every message.
573
574        """
575
576        sys.stderr.write("%s - - [%s] %s\n" %
577                         (self.address_string(),
578                          self.log_date_time_string(),
579                          format%args))
580
581    def version_string(self):
582        """Return the server software version string."""
583        return self.server_version + ' ' + self.sys_version
584
585    def date_time_string(self, timestamp=None):
586        """Return the current date and time formatted for a message header."""
587        if timestamp is None:
588            timestamp = time.time()
589        return email.utils.formatdate(timestamp, usegmt=True)
590
591    def log_date_time_string(self):
592        """Return the current time formatted for logging."""
593        now = time.time()
594        year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
595        s = "%02d/%3s/%04d %02d:%02d:%02d" % (
596                day, self.monthname[month], year, hh, mm, ss)
597        return s
598
599    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
600
601    monthname = [None,
602                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
603                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
604
605    def address_string(self):
606        """Return the client address."""
607
608        return self.client_address[0]
609
610    # Essentially static class variables
611
612    # The version of the HTTP protocol we support.
613    # Set this to HTTP/1.1 to enable automatic keepalive
614    protocol_version = "HTTP/1.0"
615
616    # MessageClass used to parse headers
617    MessageClass = http.client.HTTPMessage
618
619    # hack to maintain backwards compatibility
620    responses = {
621        v: (v.phrase, v.description)
622        for v in HTTPStatus.__members__.values()
623    }
624
625
626class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
627
628    """Simple HTTP request handler with GET and HEAD commands.
629
630    This serves files from the current directory and any of its
631    subdirectories.  The MIME type for files is determined by
632    calling the .guess_type() method.
633
634    The GET and HEAD requests are identical except that the HEAD
635    request omits the actual contents of the file.
636
637    """
638
639    server_version = "SimpleHTTP/" + __version__
640    extensions_map = _encodings_map_default = {
641        '.gz': 'application/gzip',
642        '.Z': 'application/octet-stream',
643        '.bz2': 'application/x-bzip2',
644        '.xz': 'application/x-xz',
645    }
646
647    def __init__(self, *args, directory=None, **kwargs):
648        if directory is None:
649            directory = os.getcwd()
650        self.directory = os.fspath(directory)
651        super().__init__(*args, **kwargs)
652
653    def do_GET(self):
654        """Serve a GET request."""
655        f = self.send_head()
656        if f:
657            try:
658                self.copyfile(f, self.wfile)
659            finally:
660                f.close()
661
662    def do_HEAD(self):
663        """Serve a HEAD request."""
664        f = self.send_head()
665        if f:
666            f.close()
667
668    def send_head(self):
669        """Common code for GET and HEAD commands.
670
671        This sends the response code and MIME headers.
672
673        Return value is either a file object (which has to be copied
674        to the outputfile by the caller unless the command was HEAD,
675        and must be closed by the caller under all circumstances), or
676        None, in which case the caller has nothing further to do.
677
678        """
679        path = self.translate_path(self.path)
680        f = None
681        if os.path.isdir(path):
682            parts = urllib.parse.urlsplit(self.path)
683            if not parts.path.endswith('/'):
684                # redirect browser - doing basically what apache does
685                self.send_response(HTTPStatus.MOVED_PERMANENTLY)
686                new_parts = (parts[0], parts[1], parts[2] + '/',
687                             parts[3], parts[4])
688                new_url = urllib.parse.urlunsplit(new_parts)
689                self.send_header("Location", new_url)
690                self.send_header("Content-Length", "0")
691                self.end_headers()
692                return None
693            for index in "index.html", "index.htm":
694                index = os.path.join(path, index)
695                if os.path.exists(index):
696                    path = index
697                    break
698            else:
699                return self.list_directory(path)
700        ctype = self.guess_type(path)
701        # check for trailing "/" which should return 404. See Issue17324
702        # The test for this was added in test_httpserver.py
703        # However, some OS platforms accept a trailingSlash as a filename
704        # See discussion on python-dev and Issue34711 regarding
705        # parseing and rejection of filenames with a trailing slash
706        if path.endswith("/"):
707            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
708            return None
709        try:
710            f = open(path, 'rb')
711        except OSError:
712            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
713            return None
714
715        try:
716            fs = os.fstat(f.fileno())
717            # Use browser cache if possible
718            if ("If-Modified-Since" in self.headers
719                    and "If-None-Match" not in self.headers):
720                # compare If-Modified-Since and time of last file modification
721                try:
722                    ims = email.utils.parsedate_to_datetime(
723                        self.headers["If-Modified-Since"])
724                except (TypeError, IndexError, OverflowError, ValueError):
725                    # ignore ill-formed values
726                    pass
727                else:
728                    if ims.tzinfo is None:
729                        # obsolete format with no timezone, cf.
730                        # https://tools.ietf.org/html/rfc7231#section-7.1.1.1
731                        ims = ims.replace(tzinfo=datetime.timezone.utc)
732                    if ims.tzinfo is datetime.timezone.utc:
733                        # compare to UTC datetime of last modification
734                        last_modif = datetime.datetime.fromtimestamp(
735                            fs.st_mtime, datetime.timezone.utc)
736                        # remove microseconds, like in If-Modified-Since
737                        last_modif = last_modif.replace(microsecond=0)
738
739                        if last_modif <= ims:
740                            self.send_response(HTTPStatus.NOT_MODIFIED)
741                            self.end_headers()
742                            f.close()
743                            return None
744
745            self.send_response(HTTPStatus.OK)
746            self.send_header("Content-type", ctype)
747            self.send_header("Content-Length", str(fs[6]))
748            self.send_header("Last-Modified",
749                self.date_time_string(fs.st_mtime))
750            self.end_headers()
751            return f
752        except:
753            f.close()
754            raise
755
756    def list_directory(self, path):
757        """Helper to produce a directory listing (absent index.html).
758
759        Return value is either a file object, or None (indicating an
760        error).  In either case, the headers are sent, making the
761        interface the same as for send_head().
762
763        """
764        try:
765            list = os.listdir(path)
766        except OSError:
767            self.send_error(
768                HTTPStatus.NOT_FOUND,
769                "No permission to list directory")
770            return None
771        list.sort(key=lambda a: a.lower())
772        r = []
773        try:
774            displaypath = urllib.parse.unquote(self.path,
775                                               errors='surrogatepass')
776        except UnicodeDecodeError:
777            displaypath = urllib.parse.unquote(path)
778        displaypath = html.escape(displaypath, quote=False)
779        enc = sys.getfilesystemencoding()
780        title = 'Directory listing for %s' % displaypath
781        r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
782                 '"http://www.w3.org/TR/html4/strict.dtd">')
783        r.append('<html>\n<head>')
784        r.append('<meta http-equiv="Content-Type" '
785                 'content="text/html; charset=%s">' % enc)
786        r.append('<title>%s</title>\n</head>' % title)
787        r.append('<body>\n<h1>%s</h1>' % title)
788        r.append('<hr>\n<ul>')
789        for name in list:
790            fullname = os.path.join(path, name)
791            displayname = linkname = name
792            # Append / for directories or @ for symbolic links
793            if os.path.isdir(fullname):
794                displayname = name + "/"
795                linkname = name + "/"
796            if os.path.islink(fullname):
797                displayname = name + "@"
798                # Note: a link to a directory displays with @ and links with /
799            r.append('<li><a href="%s">%s</a></li>'
800                    % (urllib.parse.quote(linkname,
801                                          errors='surrogatepass'),
802                       html.escape(displayname, quote=False)))
803        r.append('</ul>\n<hr>\n</body>\n</html>\n')
804        encoded = '\n'.join(r).encode(enc, 'surrogateescape')
805        f = io.BytesIO()
806        f.write(encoded)
807        f.seek(0)
808        self.send_response(HTTPStatus.OK)
809        self.send_header("Content-type", "text/html; charset=%s" % enc)
810        self.send_header("Content-Length", str(len(encoded)))
811        self.end_headers()
812        return f
813
814    def translate_path(self, path):
815        """Translate a /-separated PATH to the local filename syntax.
816
817        Components that mean special things to the local file system
818        (e.g. drive or directory names) are ignored.  (XXX They should
819        probably be diagnosed.)
820
821        """
822        # abandon query parameters
823        path = path.split('?',1)[0]
824        path = path.split('#',1)[0]
825        # Don't forget explicit trailing slash when normalizing. Issue17324
826        trailing_slash = path.rstrip().endswith('/')
827        try:
828            path = urllib.parse.unquote(path, errors='surrogatepass')
829        except UnicodeDecodeError:
830            path = urllib.parse.unquote(path)
831        path = posixpath.normpath(path)
832        words = path.split('/')
833        words = filter(None, words)
834        path = self.directory
835        for word in words:
836            if os.path.dirname(word) or word in (os.curdir, os.pardir):
837                # Ignore components that are not a simple file/directory name
838                continue
839            path = os.path.join(path, word)
840        if trailing_slash:
841            path += '/'
842        return path
843
844    def copyfile(self, source, outputfile):
845        """Copy all data between two file objects.
846
847        The SOURCE argument is a file object open for reading
848        (or anything with a read() method) and the DESTINATION
849        argument is a file object open for writing (or
850        anything with a write() method).
851
852        The only reason for overriding this would be to change
853        the block size or perhaps to replace newlines by CRLF
854        -- note however that this the default server uses this
855        to copy binary data as well.
856
857        """
858        shutil.copyfileobj(source, outputfile)
859
860    def guess_type(self, path):
861        """Guess the type of a file.
862
863        Argument is a PATH (a filename).
864
865        Return value is a string of the form type/subtype,
866        usable for a MIME Content-type header.
867
868        The default implementation looks the file's extension
869        up in the table self.extensions_map, using application/octet-stream
870        as a default; however it would be permissible (if
871        slow) to look inside the data to make a better guess.
872
873        """
874        base, ext = posixpath.splitext(path)
875        if ext in self.extensions_map:
876            return self.extensions_map[ext]
877        ext = ext.lower()
878        if ext in self.extensions_map:
879            return self.extensions_map[ext]
880        guess, _ = mimetypes.guess_type(path)
881        if guess:
882            return guess
883        return 'application/octet-stream'
884
885
886# Utilities for CGIHTTPRequestHandler
887
888def _url_collapse_path(path):
889    """
890    Given a URL path, remove extra '/'s and '.' path elements and collapse
891    any '..' references and returns a collapsed path.
892
893    Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
894    The utility of this function is limited to is_cgi method and helps
895    preventing some security attacks.
896
897    Returns: The reconstituted URL, which will always start with a '/'.
898
899    Raises: IndexError if too many '..' occur within the path.
900
901    """
902    # Query component should not be involved.
903    path, _, query = path.partition('?')
904    path = urllib.parse.unquote(path)
905
906    # Similar to os.path.split(os.path.normpath(path)) but specific to URL
907    # path semantics rather than local operating system semantics.
908    path_parts = path.split('/')
909    head_parts = []
910    for part in path_parts[:-1]:
911        if part == '..':
912            head_parts.pop() # IndexError if more '..' than prior parts
913        elif part and part != '.':
914            head_parts.append( part )
915    if path_parts:
916        tail_part = path_parts.pop()
917        if tail_part:
918            if tail_part == '..':
919                head_parts.pop()
920                tail_part = ''
921            elif tail_part == '.':
922                tail_part = ''
923    else:
924        tail_part = ''
925
926    if query:
927        tail_part = '?'.join((tail_part, query))
928
929    splitpath = ('/' + '/'.join(head_parts), tail_part)
930    collapsed_path = "/".join(splitpath)
931
932    return collapsed_path
933
934
935
936nobody = None
937
938def nobody_uid():
939    """Internal routine to get nobody's uid"""
940    global nobody
941    if nobody:
942        return nobody
943    try:
944        import pwd
945    except ImportError:
946        return -1
947    try:
948        nobody = pwd.getpwnam('nobody')[2]
949    except KeyError:
950        nobody = 1 + max(x[2] for x in pwd.getpwall())
951    return nobody
952
953
954def executable(path):
955    """Test for executable file."""
956    return os.access(path, os.X_OK)
957
958
959class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
960
961    """Complete HTTP server with GET, HEAD and POST commands.
962
963    GET and HEAD also support running CGI scripts.
964
965    The POST command is *only* implemented for CGI scripts.
966
967    """
968
969    # Determine platform specifics
970    have_fork = hasattr(os, 'fork')
971
972    # Make rfile unbuffered -- we need to read one line and then pass
973    # the rest to a subprocess, so we can't use buffered input.
974    rbufsize = 0
975
976    def do_POST(self):
977        """Serve a POST request.
978
979        This is only implemented for CGI scripts.
980
981        """
982
983        if self.is_cgi():
984            self.run_cgi()
985        else:
986            self.send_error(
987                HTTPStatus.NOT_IMPLEMENTED,
988                "Can only POST to CGI scripts")
989
990    def send_head(self):
991        """Version of send_head that support CGI scripts"""
992        if self.is_cgi():
993            return self.run_cgi()
994        else:
995            return SimpleHTTPRequestHandler.send_head(self)
996
997    def is_cgi(self):
998        """Test whether self.path corresponds to a CGI script.
999
1000        Returns True and updates the cgi_info attribute to the tuple
1001        (dir, rest) if self.path requires running a CGI script.
1002        Returns False otherwise.
1003
1004        If any exception is raised, the caller should assume that
1005        self.path was rejected as invalid and act accordingly.
1006
1007        The default implementation tests whether the normalized url
1008        path begins with one of the strings in self.cgi_directories
1009        (and the next character is a '/' or the end of the string).
1010
1011        """
1012        collapsed_path = _url_collapse_path(self.path)
1013        dir_sep = collapsed_path.find('/', 1)
1014        while dir_sep > 0 and not collapsed_path[:dir_sep] in self.cgi_directories:
1015            dir_sep = collapsed_path.find('/', dir_sep+1)
1016        if dir_sep > 0:
1017            head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
1018            self.cgi_info = head, tail
1019            return True
1020        return False
1021
1022
1023    cgi_directories = ['/cgi-bin', '/htbin']
1024
1025    def is_executable(self, path):
1026        """Test whether argument path is an executable file."""
1027        return executable(path)
1028
1029    def is_python(self, path):
1030        """Test whether argument path is a Python script."""
1031        head, tail = os.path.splitext(path)
1032        return tail.lower() in (".py", ".pyw")
1033
1034    def run_cgi(self):
1035        """Execute a CGI script."""
1036        dir, rest = self.cgi_info
1037        path = dir + '/' + rest
1038        i = path.find('/', len(dir)+1)
1039        while i >= 0:
1040            nextdir = path[:i]
1041            nextrest = path[i+1:]
1042
1043            scriptdir = self.translate_path(nextdir)
1044            if os.path.isdir(scriptdir):
1045                dir, rest = nextdir, nextrest
1046                i = path.find('/', len(dir)+1)
1047            else:
1048                break
1049
1050        # find an explicit query string, if present.
1051        rest, _, query = rest.partition('?')
1052
1053        # dissect the part after the directory name into a script name &
1054        # a possible additional path, to be stored in PATH_INFO.
1055        i = rest.find('/')
1056        if i >= 0:
1057            script, rest = rest[:i], rest[i:]
1058        else:
1059            script, rest = rest, ''
1060
1061        scriptname = dir + '/' + script
1062        scriptfile = self.translate_path(scriptname)
1063        if not os.path.exists(scriptfile):
1064            self.send_error(
1065                HTTPStatus.NOT_FOUND,
1066                "No such CGI script (%r)" % scriptname)
1067            return
1068        if not os.path.isfile(scriptfile):
1069            self.send_error(
1070                HTTPStatus.FORBIDDEN,
1071                "CGI script is not a plain file (%r)" % scriptname)
1072            return
1073        ispy = self.is_python(scriptname)
1074        if self.have_fork or not ispy:
1075            if not self.is_executable(scriptfile):
1076                self.send_error(
1077                    HTTPStatus.FORBIDDEN,
1078                    "CGI script is not executable (%r)" % scriptname)
1079                return
1080
1081        # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1082        # XXX Much of the following could be prepared ahead of time!
1083        env = copy.deepcopy(os.environ)
1084        env['SERVER_SOFTWARE'] = self.version_string()
1085        env['SERVER_NAME'] = self.server.server_name
1086        env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1087        env['SERVER_PROTOCOL'] = self.protocol_version
1088        env['SERVER_PORT'] = str(self.server.server_port)
1089        env['REQUEST_METHOD'] = self.command
1090        uqrest = urllib.parse.unquote(rest)
1091        env['PATH_INFO'] = uqrest
1092        env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1093        env['SCRIPT_NAME'] = scriptname
1094        env['QUERY_STRING'] = query
1095        env['REMOTE_ADDR'] = self.client_address[0]
1096        authorization = self.headers.get("authorization")
1097        if authorization:
1098            authorization = authorization.split()
1099            if len(authorization) == 2:
1100                import base64, binascii
1101                env['AUTH_TYPE'] = authorization[0]
1102                if authorization[0].lower() == "basic":
1103                    try:
1104                        authorization = authorization[1].encode('ascii')
1105                        authorization = base64.decodebytes(authorization).\
1106                                        decode('ascii')
1107                    except (binascii.Error, UnicodeError):
1108                        pass
1109                    else:
1110                        authorization = authorization.split(':')
1111                        if len(authorization) == 2:
1112                            env['REMOTE_USER'] = authorization[0]
1113        # XXX REMOTE_IDENT
1114        if self.headers.get('content-type') is None:
1115            env['CONTENT_TYPE'] = self.headers.get_content_type()
1116        else:
1117            env['CONTENT_TYPE'] = self.headers['content-type']
1118        length = self.headers.get('content-length')
1119        if length:
1120            env['CONTENT_LENGTH'] = length
1121        referer = self.headers.get('referer')
1122        if referer:
1123            env['HTTP_REFERER'] = referer
1124        accept = self.headers.get_all('accept', ())
1125        env['HTTP_ACCEPT'] = ','.join(accept)
1126        ua = self.headers.get('user-agent')
1127        if ua:
1128            env['HTTP_USER_AGENT'] = ua
1129        co = filter(None, self.headers.get_all('cookie', []))
1130        cookie_str = ', '.join(co)
1131        if cookie_str:
1132            env['HTTP_COOKIE'] = cookie_str
1133        # XXX Other HTTP_* headers
1134        # Since we're setting the env in the parent, provide empty
1135        # values to override previously set values
1136        for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1137                  'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1138            env.setdefault(k, "")
1139
1140        self.send_response(HTTPStatus.OK, "Script output follows")
1141        self.flush_headers()
1142
1143        decoded_query = query.replace('+', ' ')
1144
1145        if self.have_fork:
1146            # Unix -- fork as we should
1147            args = [script]
1148            if '=' not in decoded_query:
1149                args.append(decoded_query)
1150            nobody = nobody_uid()
1151            self.wfile.flush() # Always flush before forking
1152            pid = os.fork()
1153            if pid != 0:
1154                # Parent
1155                pid, sts = os.waitpid(pid, 0)
1156                # throw away additional data [see bug #427345]
1157                while select.select([self.rfile], [], [], 0)[0]:
1158                    if not self.rfile.read(1):
1159                        break
1160                exitcode = os.waitstatus_to_exitcode(sts)
1161                if exitcode:
1162                    self.log_error(f"CGI script exit code {exitcode}")
1163                return
1164            # Child
1165            try:
1166                try:
1167                    os.setuid(nobody)
1168                except OSError:
1169                    pass
1170                os.dup2(self.rfile.fileno(), 0)
1171                os.dup2(self.wfile.fileno(), 1)
1172                os.execve(scriptfile, args, env)
1173            except:
1174                self.server.handle_error(self.request, self.client_address)
1175                os._exit(127)
1176
1177        else:
1178            # Non-Unix -- use subprocess
1179            import subprocess
1180            cmdline = [scriptfile]
1181            if self.is_python(scriptfile):
1182                interp = sys.executable
1183                if interp.lower().endswith("w.exe"):
1184                    # On Windows, use python.exe, not pythonw.exe
1185                    interp = interp[:-5] + interp[-4:]
1186                cmdline = [interp, '-u'] + cmdline
1187            if '=' not in query:
1188                cmdline.append(query)
1189            self.log_message("command: %s", subprocess.list2cmdline(cmdline))
1190            try:
1191                nbytes = int(length)
1192            except (TypeError, ValueError):
1193                nbytes = 0
1194            p = subprocess.Popen(cmdline,
1195                                 stdin=subprocess.PIPE,
1196                                 stdout=subprocess.PIPE,
1197                                 stderr=subprocess.PIPE,
1198                                 env = env
1199                                 )
1200            if self.command.lower() == "post" and nbytes > 0:
1201                data = self.rfile.read(nbytes)
1202            else:
1203                data = None
1204            # throw away additional data [see bug #427345]
1205            while select.select([self.rfile._sock], [], [], 0)[0]:
1206                if not self.rfile._sock.recv(1):
1207                    break
1208            stdout, stderr = p.communicate(data)
1209            self.wfile.write(stdout)
1210            if stderr:
1211                self.log_error('%s', stderr)
1212            p.stderr.close()
1213            p.stdout.close()
1214            status = p.returncode
1215            if status:
1216                self.log_error("CGI script exit status %#x", status)
1217            else:
1218                self.log_message("CGI script exited OK")
1219
1220
1221def _get_best_family(*address):
1222    infos = socket.getaddrinfo(
1223        *address,
1224        type=socket.SOCK_STREAM,
1225        flags=socket.AI_PASSIVE,
1226    )
1227    family, type, proto, canonname, sockaddr = next(iter(infos))
1228    return family, sockaddr
1229
1230
1231def test(HandlerClass=BaseHTTPRequestHandler,
1232         ServerClass=ThreadingHTTPServer,
1233         protocol="HTTP/1.0", port=8000, bind=None):
1234    """Test the HTTP request handler class.
1235
1236    This runs an HTTP server on port 8000 (or the port argument).
1237
1238    """
1239    ServerClass.address_family, addr = _get_best_family(bind, port)
1240    HandlerClass.protocol_version = protocol
1241    with ServerClass(addr, HandlerClass) as httpd:
1242        host, port = httpd.socket.getsockname()[:2]
1243        url_host = f'[{host}]' if ':' in host else host
1244        print(
1245            f"Serving HTTP on {host} port {port} "
1246            f"(http://{url_host}:{port}/) ..."
1247        )
1248        try:
1249            httpd.serve_forever()
1250        except KeyboardInterrupt:
1251            print("\nKeyboard interrupt received, exiting.")
1252            sys.exit(0)
1253
1254if __name__ == '__main__':
1255    import argparse
1256    import contextlib
1257
1258    parser = argparse.ArgumentParser()
1259    parser.add_argument('--cgi', action='store_true',
1260                        help='run as CGI server')
1261    parser.add_argument('--bind', '-b', metavar='ADDRESS',
1262                        help='specify alternate bind address '
1263                             '(default: all interfaces)')
1264    parser.add_argument('--directory', '-d', default=os.getcwd(),
1265                        help='specify alternate directory '
1266                             '(default: current directory)')
1267    parser.add_argument('port', action='store', default=8000, type=int,
1268                        nargs='?',
1269                        help='specify alternate port (default: 8000)')
1270    args = parser.parse_args()
1271    if args.cgi:
1272        handler_class = CGIHTTPRequestHandler
1273    else:
1274        handler_class = SimpleHTTPRequestHandler
1275
1276    # ensure dual-stack is not disabled; ref #38907
1277    class DualStackServer(ThreadingHTTPServer):
1278
1279        def server_bind(self):
1280            # suppress exception when protocol is IPv4
1281            with contextlib.suppress(Exception):
1282                self.socket.setsockopt(
1283                    socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
1284            return super().server_bind()
1285
1286        def finish_request(self, request, client_address):
1287            self.RequestHandlerClass(request, client_address, self,
1288                                     directory=args.directory)
1289
1290    test(
1291        HandlerClass=handler_class,
1292        ServerClass=DualStackServer,
1293        port=args.port,
1294        bind=args.bind,
1295    )
1296