1"""HTTP server classes. 2 3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see 4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, 5and CGIHTTPRequestHandler for CGI scripts. 6 7It does, however, optionally implement HTTP/1.1 persistent connections, 8as of version 0.3. 9 10Notes on CGIHTTPRequestHandler 11------------------------------ 12 13This class implements GET and POST requests to cgi-bin scripts. 14 15If the os.fork() function is not present (e.g. on Windows), 16subprocess.Popen() is used as a fallback, with slightly altered semantics. 17 18In all cases, the implementation is intentionally naive -- all 19requests are executed synchronously. 20 21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL 22-- it may execute arbitrary Python code or external programs. 23 24Note that status code 200 is sent prior to execution of a CGI script, so 25scripts cannot send other status codes such as 302 (redirect). 26 27XXX To do: 28 29- log requests even later (to capture byte count) 30- log user-agent header and other interesting goodies 31- send error log to separate file 32""" 33 34 35# See also: 36# 37# HTTP Working Group T. Berners-Lee 38# INTERNET-DRAFT R. T. Fielding 39# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen 40# Expires September 8, 1995 March 8, 1995 41# 42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt 43# 44# and 45# 46# Network Working Group R. Fielding 47# Request for Comments: 2616 et al 48# Obsoletes: 2068 June 1999 49# Category: Standards Track 50# 51# URL: http://www.faqs.org/rfcs/rfc2616.html 52 53# Log files 54# --------- 55# 56# Here's a quote from the NCSA httpd docs about log file format. 57# 58# | The logfile format is as follows. Each line consists of: 59# | 60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb 61# | 62# | host: Either the DNS name or the IP number of the remote client 63# | rfc931: Any information returned by identd for this person, 64# | - otherwise. 65# | authuser: If user sent a userid for authentication, the user name, 66# | - otherwise. 67# | DD: Day 68# | Mon: Month (calendar name) 69# | YYYY: Year 70# | hh: hour (24-hour format, the machine's timezone) 71# | mm: minutes 72# | ss: seconds 73# | request: The first line of the HTTP request as sent by the client. 74# | ddd: the status code returned by the server, - if not available. 75# | bbbb: the total number of bytes sent, 76# | *not including the HTTP/1.0 header*, - if not available 77# | 78# | You can determine the name of the file accessed through request. 79# 80# (Actually, the latter is only true if you know the server configuration 81# at the time the request was made!) 82 83__version__ = "0.6" 84 85__all__ = [ 86 "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler", 87 "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler", 88] 89 90import copy 91import datetime 92import email.utils 93import html 94import http.client 95import io 96import mimetypes 97import os 98import posixpath 99import select 100import shutil 101import socket # For gethostbyaddr() 102import socketserver 103import sys 104import time 105import urllib.parse 106import contextlib 107from functools import partial 108 109from http import HTTPStatus 110 111 112# Default error message template 113DEFAULT_ERROR_MESSAGE = """\ 114<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" 115 "http://www.w3.org/TR/html4/strict.dtd"> 116<html> 117 <head> 118 <meta http-equiv="Content-Type" content="text/html;charset=utf-8"> 119 <title>Error response</title> 120 </head> 121 <body> 122 <h1>Error response</h1> 123 <p>Error code: %(code)d</p> 124 <p>Message: %(message)s.</p> 125 <p>Error code explanation: %(code)s - %(explain)s.</p> 126 </body> 127</html> 128""" 129 130DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" 131 132class HTTPServer(socketserver.TCPServer): 133 134 allow_reuse_address = 1 # Seems to make sense in testing environment 135 136 def server_bind(self): 137 """Override server_bind to store the server name.""" 138 socketserver.TCPServer.server_bind(self) 139 host, port = self.server_address[:2] 140 self.server_name = socket.getfqdn(host) 141 self.server_port = port 142 143 144class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer): 145 daemon_threads = True 146 147 148class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): 149 150 """HTTP request handler base class. 151 152 The following explanation of HTTP serves to guide you through the 153 code as well as to expose any misunderstandings I may have about 154 HTTP (so you don't need to read the code to figure out I'm wrong 155 :-). 156 157 HTTP (HyperText Transfer Protocol) is an extensible protocol on 158 top of a reliable stream transport (e.g. TCP/IP). The protocol 159 recognizes three parts to a request: 160 161 1. One line identifying the request type and path 162 2. An optional set of RFC-822-style headers 163 3. An optional data part 164 165 The headers and data are separated by a blank line. 166 167 The first line of the request has the form 168 169 <command> <path> <version> 170 171 where <command> is a (case-sensitive) keyword such as GET or POST, 172 <path> is a string containing path information for the request, 173 and <version> should be the string "HTTP/1.0" or "HTTP/1.1". 174 <path> is encoded using the URL encoding scheme (using %xx to signify 175 the ASCII character with hex code xx). 176 177 The specification specifies that lines are separated by CRLF but 178 for compatibility with the widest range of clients recommends 179 servers also handle LF. Similarly, whitespace in the request line 180 is treated sensibly (allowing multiple spaces between components 181 and allowing trailing whitespace). 182 183 Similarly, for output, lines ought to be separated by CRLF pairs 184 but most clients grok LF characters just fine. 185 186 If the first line of the request has the form 187 188 <command> <path> 189 190 (i.e. <version> is left out) then this is assumed to be an HTTP 191 0.9 request; this form has no optional headers and data part and 192 the reply consists of just the data. 193 194 The reply form of the HTTP 1.x protocol again has three parts: 195 196 1. One line giving the response code 197 2. An optional set of RFC-822-style headers 198 3. The data 199 200 Again, the headers and data are separated by a blank line. 201 202 The response code line has the form 203 204 <version> <responsecode> <responsestring> 205 206 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"), 207 <responsecode> is a 3-digit response code indicating success or 208 failure of the request, and <responsestring> is an optional 209 human-readable string explaining what the response code means. 210 211 This server parses the request and the headers, and then calls a 212 function specific to the request type (<command>). Specifically, 213 a request SPAM will be handled by a method do_SPAM(). If no 214 such method exists the server sends an error response to the 215 client. If it exists, it is called with no arguments: 216 217 do_SPAM() 218 219 Note that the request name is case sensitive (i.e. SPAM and spam 220 are different requests). 221 222 The various request details are stored in instance variables: 223 224 - client_address is the client IP address in the form (host, 225 port); 226 227 - command, path and version are the broken-down request line; 228 229 - headers is an instance of email.message.Message (or a derived 230 class) containing the header information; 231 232 - rfile is a file object open for reading positioned at the 233 start of the optional input data part; 234 235 - wfile is a file object open for writing. 236 237 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! 238 239 The first thing to be written must be the response line. Then 240 follow 0 or more header lines, then a blank line, and then the 241 actual data (if any). The meaning of the header lines depends on 242 the command executed by the server; in most cases, when data is 243 returned, there should be at least one header line of the form 244 245 Content-type: <type>/<subtype> 246 247 where <type> and <subtype> should be registered MIME types, 248 e.g. "text/html" or "text/plain". 249 250 """ 251 252 # The Python system version, truncated to its first component. 253 sys_version = "Python/" + sys.version.split()[0] 254 255 # The server software version. You may want to override this. 256 # The format is multiple whitespace-separated strings, 257 # where each string is of the form name[/version]. 258 server_version = "BaseHTTP/" + __version__ 259 260 error_message_format = DEFAULT_ERROR_MESSAGE 261 error_content_type = DEFAULT_ERROR_CONTENT_TYPE 262 263 # The default request version. This only affects responses up until 264 # the point where the request line is parsed, so it mainly decides what 265 # the client gets back when sending a malformed request line. 266 # Most web servers default to HTTP 0.9, i.e. don't send a status line. 267 default_request_version = "HTTP/0.9" 268 269 def parse_request(self): 270 """Parse a request (internal). 271 272 The request should be stored in self.raw_requestline; the results 273 are in self.command, self.path, self.request_version and 274 self.headers. 275 276 Return True for success, False for failure; on failure, any relevant 277 error response has already been sent back. 278 279 """ 280 self.command = None # set in case of error on the first line 281 self.request_version = version = self.default_request_version 282 self.close_connection = True 283 requestline = str(self.raw_requestline, 'iso-8859-1') 284 requestline = requestline.rstrip('\r\n') 285 self.requestline = requestline 286 words = requestline.split() 287 if len(words) == 0: 288 return False 289 290 if len(words) >= 3: # Enough to determine protocol version 291 version = words[-1] 292 try: 293 if not version.startswith('HTTP/'): 294 raise ValueError 295 base_version_number = version.split('/', 1)[1] 296 version_number = base_version_number.split(".") 297 # RFC 2145 section 3.1 says there can be only one "." and 298 # - major and minor numbers MUST be treated as 299 # separate integers; 300 # - HTTP/2.4 is a lower version than HTTP/2.13, which in 301 # turn is lower than HTTP/12.3; 302 # - Leading zeros MUST be ignored by recipients. 303 if len(version_number) != 2: 304 raise ValueError 305 version_number = int(version_number[0]), int(version_number[1]) 306 except (ValueError, IndexError): 307 self.send_error( 308 HTTPStatus.BAD_REQUEST, 309 "Bad request version (%r)" % version) 310 return False 311 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": 312 self.close_connection = False 313 if version_number >= (2, 0): 314 self.send_error( 315 HTTPStatus.HTTP_VERSION_NOT_SUPPORTED, 316 "Invalid HTTP version (%s)" % base_version_number) 317 return False 318 self.request_version = version 319 320 if not 2 <= len(words) <= 3: 321 self.send_error( 322 HTTPStatus.BAD_REQUEST, 323 "Bad request syntax (%r)" % requestline) 324 return False 325 command, path = words[:2] 326 if len(words) == 2: 327 self.close_connection = True 328 if command != 'GET': 329 self.send_error( 330 HTTPStatus.BAD_REQUEST, 331 "Bad HTTP/0.9 request type (%r)" % command) 332 return False 333 self.command, self.path = command, path 334 335 # gh-87389: The purpose of replacing '//' with '/' is to protect 336 # against open redirect attacks possibly triggered if the path starts 337 # with '//' because http clients treat //path as an absolute URI 338 # without scheme (similar to http://path) rather than a path. 339 if self.path.startswith('//'): 340 self.path = '/' + self.path.lstrip('/') # Reduce to a single / 341 342 # Examine the headers and look for a Connection directive. 343 try: 344 self.headers = http.client.parse_headers(self.rfile, 345 _class=self.MessageClass) 346 except http.client.LineTooLong as err: 347 self.send_error( 348 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, 349 "Line too long", 350 str(err)) 351 return False 352 except http.client.HTTPException as err: 353 self.send_error( 354 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, 355 "Too many headers", 356 str(err) 357 ) 358 return False 359 360 conntype = self.headers.get('Connection', "") 361 if conntype.lower() == 'close': 362 self.close_connection = True 363 elif (conntype.lower() == 'keep-alive' and 364 self.protocol_version >= "HTTP/1.1"): 365 self.close_connection = False 366 # Examine the headers and look for an Expect directive 367 expect = self.headers.get('Expect', "") 368 if (expect.lower() == "100-continue" and 369 self.protocol_version >= "HTTP/1.1" and 370 self.request_version >= "HTTP/1.1"): 371 if not self.handle_expect_100(): 372 return False 373 return True 374 375 def handle_expect_100(self): 376 """Decide what to do with an "Expect: 100-continue" header. 377 378 If the client is expecting a 100 Continue response, we must 379 respond with either a 100 Continue or a final response before 380 waiting for the request body. The default is to always respond 381 with a 100 Continue. You can behave differently (for example, 382 reject unauthorized requests) by overriding this method. 383 384 This method should either return True (possibly after sending 385 a 100 Continue response) or send an error response and return 386 False. 387 388 """ 389 self.send_response_only(HTTPStatus.CONTINUE) 390 self.end_headers() 391 return True 392 393 def handle_one_request(self): 394 """Handle a single HTTP request. 395 396 You normally don't need to override this method; see the class 397 __doc__ string for information on how to handle specific HTTP 398 commands such as GET and POST. 399 400 """ 401 try: 402 self.raw_requestline = self.rfile.readline(65537) 403 if len(self.raw_requestline) > 65536: 404 self.requestline = '' 405 self.request_version = '' 406 self.command = '' 407 self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG) 408 return 409 if not self.raw_requestline: 410 self.close_connection = True 411 return 412 if not self.parse_request(): 413 # An error code has been sent, just exit 414 return 415 mname = 'do_' + self.command 416 if not hasattr(self, mname): 417 self.send_error( 418 HTTPStatus.NOT_IMPLEMENTED, 419 "Unsupported method (%r)" % self.command) 420 return 421 method = getattr(self, mname) 422 method() 423 self.wfile.flush() #actually send the response if not already done. 424 except socket.timeout as e: 425 #a read or a write timed out. Discard this connection 426 self.log_error("Request timed out: %r", e) 427 self.close_connection = True 428 return 429 430 def handle(self): 431 """Handle multiple requests if necessary.""" 432 self.close_connection = True 433 434 self.handle_one_request() 435 while not self.close_connection: 436 self.handle_one_request() 437 438 def send_error(self, code, message=None, explain=None): 439 """Send and log an error reply. 440 441 Arguments are 442 * code: an HTTP error code 443 3 digits 444 * message: a simple optional 1 line reason phrase. 445 *( HTAB / SP / VCHAR / %x80-FF ) 446 defaults to short entry matching the response code 447 * explain: a detailed message defaults to the long entry 448 matching the response code. 449 450 This sends an error response (so it must be called before any 451 output has been generated), logs the error, and finally sends 452 a piece of HTML explaining the error to the user. 453 454 """ 455 456 try: 457 shortmsg, longmsg = self.responses[code] 458 except KeyError: 459 shortmsg, longmsg = '???', '???' 460 if message is None: 461 message = shortmsg 462 if explain is None: 463 explain = longmsg 464 self.log_error("code %d, message %s", code, message) 465 self.send_response(code, message) 466 self.send_header('Connection', 'close') 467 468 # Message body is omitted for cases described in: 469 # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified) 470 # - RFC7231: 6.3.6. 205(Reset Content) 471 body = None 472 if (code >= 200 and 473 code not in (HTTPStatus.NO_CONTENT, 474 HTTPStatus.RESET_CONTENT, 475 HTTPStatus.NOT_MODIFIED)): 476 # HTML encode to prevent Cross Site Scripting attacks 477 # (see bug #1100201) 478 content = (self.error_message_format % { 479 'code': code, 480 'message': html.escape(message, quote=False), 481 'explain': html.escape(explain, quote=False) 482 }) 483 body = content.encode('UTF-8', 'replace') 484 self.send_header("Content-Type", self.error_content_type) 485 self.send_header('Content-Length', str(len(body))) 486 self.end_headers() 487 488 if self.command != 'HEAD' and body: 489 self.wfile.write(body) 490 491 def send_response(self, code, message=None): 492 """Add the response header to the headers buffer and log the 493 response code. 494 495 Also send two standard headers with the server software 496 version and the current date. 497 498 """ 499 self.log_request(code) 500 self.send_response_only(code, message) 501 self.send_header('Server', self.version_string()) 502 self.send_header('Date', self.date_time_string()) 503 504 def send_response_only(self, code, message=None): 505 """Send the response header only.""" 506 if self.request_version != 'HTTP/0.9': 507 if message is None: 508 if code in self.responses: 509 message = self.responses[code][0] 510 else: 511 message = '' 512 if not hasattr(self, '_headers_buffer'): 513 self._headers_buffer = [] 514 self._headers_buffer.append(("%s %d %s\r\n" % 515 (self.protocol_version, code, message)).encode( 516 'latin-1', 'strict')) 517 518 def send_header(self, keyword, value): 519 """Send a MIME header to the headers buffer.""" 520 if self.request_version != 'HTTP/0.9': 521 if not hasattr(self, '_headers_buffer'): 522 self._headers_buffer = [] 523 self._headers_buffer.append( 524 ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict')) 525 526 if keyword.lower() == 'connection': 527 if value.lower() == 'close': 528 self.close_connection = True 529 elif value.lower() == 'keep-alive': 530 self.close_connection = False 531 532 def end_headers(self): 533 """Send the blank line ending the MIME headers.""" 534 if self.request_version != 'HTTP/0.9': 535 self._headers_buffer.append(b"\r\n") 536 self.flush_headers() 537 538 def flush_headers(self): 539 if hasattr(self, '_headers_buffer'): 540 self.wfile.write(b"".join(self._headers_buffer)) 541 self._headers_buffer = [] 542 543 def log_request(self, code='-', size='-'): 544 """Log an accepted request. 545 546 This is called by send_response(). 547 548 """ 549 if isinstance(code, HTTPStatus): 550 code = code.value 551 self.log_message('"%s" %s %s', 552 self.requestline, str(code), str(size)) 553 554 def log_error(self, format, *args): 555 """Log an error. 556 557 This is called when a request cannot be fulfilled. By 558 default it passes the message on to log_message(). 559 560 Arguments are the same as for log_message(). 561 562 XXX This should go to the separate error log. 563 564 """ 565 566 self.log_message(format, *args) 567 568 def log_message(self, format, *args): 569 """Log an arbitrary message. 570 571 This is used by all other logging functions. Override 572 it if you have specific logging wishes. 573 574 The first argument, FORMAT, is a format string for the 575 message to be logged. If the format string contains 576 any % escapes requiring parameters, they should be 577 specified as subsequent arguments (it's just like 578 printf!). 579 580 The client ip and current date/time are prefixed to 581 every message. 582 583 """ 584 585 sys.stderr.write("%s - - [%s] %s\n" % 586 (self.address_string(), 587 self.log_date_time_string(), 588 format%args)) 589 590 def version_string(self): 591 """Return the server software version string.""" 592 return self.server_version + ' ' + self.sys_version 593 594 def date_time_string(self, timestamp=None): 595 """Return the current date and time formatted for a message header.""" 596 if timestamp is None: 597 timestamp = time.time() 598 return email.utils.formatdate(timestamp, usegmt=True) 599 600 def log_date_time_string(self): 601 """Return the current time formatted for logging.""" 602 now = time.time() 603 year, month, day, hh, mm, ss, x, y, z = time.localtime(now) 604 s = "%02d/%3s/%04d %02d:%02d:%02d" % ( 605 day, self.monthname[month], year, hh, mm, ss) 606 return s 607 608 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] 609 610 monthname = [None, 611 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 612 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] 613 614 def address_string(self): 615 """Return the client address.""" 616 617 return self.client_address[0] 618 619 # Essentially static class variables 620 621 # The version of the HTTP protocol we support. 622 # Set this to HTTP/1.1 to enable automatic keepalive 623 protocol_version = "HTTP/1.0" 624 625 # MessageClass used to parse headers 626 MessageClass = http.client.HTTPMessage 627 628 # hack to maintain backwards compatibility 629 responses = { 630 v: (v.phrase, v.description) 631 for v in HTTPStatus.__members__.values() 632 } 633 634 635class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): 636 637 """Simple HTTP request handler with GET and HEAD commands. 638 639 This serves files from the current directory and any of its 640 subdirectories. The MIME type for files is determined by 641 calling the .guess_type() method. 642 643 The GET and HEAD requests are identical except that the HEAD 644 request omits the actual contents of the file. 645 646 """ 647 648 server_version = "SimpleHTTP/" + __version__ 649 extensions_map = _encodings_map_default = { 650 '.gz': 'application/gzip', 651 '.Z': 'application/octet-stream', 652 '.bz2': 'application/x-bzip2', 653 '.xz': 'application/x-xz', 654 } 655 656 def __init__(self, *args, directory=None, **kwargs): 657 if directory is None: 658 directory = os.getcwd() 659 self.directory = os.fspath(directory) 660 super().__init__(*args, **kwargs) 661 662 def do_GET(self): 663 """Serve a GET request.""" 664 f = self.send_head() 665 if f: 666 try: 667 self.copyfile(f, self.wfile) 668 finally: 669 f.close() 670 671 def do_HEAD(self): 672 """Serve a HEAD request.""" 673 f = self.send_head() 674 if f: 675 f.close() 676 677 def send_head(self): 678 """Common code for GET and HEAD commands. 679 680 This sends the response code and MIME headers. 681 682 Return value is either a file object (which has to be copied 683 to the outputfile by the caller unless the command was HEAD, 684 and must be closed by the caller under all circumstances), or 685 None, in which case the caller has nothing further to do. 686 687 """ 688 path = self.translate_path(self.path) 689 f = None 690 if os.path.isdir(path): 691 parts = urllib.parse.urlsplit(self.path) 692 if not parts.path.endswith('/'): 693 # redirect browser - doing basically what apache does 694 self.send_response(HTTPStatus.MOVED_PERMANENTLY) 695 new_parts = (parts[0], parts[1], parts[2] + '/', 696 parts[3], parts[4]) 697 new_url = urllib.parse.urlunsplit(new_parts) 698 self.send_header("Location", new_url) 699 self.end_headers() 700 return None 701 for index in "index.html", "index.htm": 702 index = os.path.join(path, index) 703 if os.path.exists(index): 704 path = index 705 break 706 else: 707 return self.list_directory(path) 708 ctype = self.guess_type(path) 709 # check for trailing "/" which should return 404. See Issue17324 710 # The test for this was added in test_httpserver.py 711 # However, some OS platforms accept a trailingSlash as a filename 712 # See discussion on python-dev and Issue34711 regarding 713 # parseing and rejection of filenames with a trailing slash 714 if path.endswith("/"): 715 self.send_error(HTTPStatus.NOT_FOUND, "File not found") 716 return None 717 try: 718 f = open(path, 'rb') 719 except OSError: 720 self.send_error(HTTPStatus.NOT_FOUND, "File not found") 721 return None 722 723 try: 724 fs = os.fstat(f.fileno()) 725 # Use browser cache if possible 726 if ("If-Modified-Since" in self.headers 727 and "If-None-Match" not in self.headers): 728 # compare If-Modified-Since and time of last file modification 729 try: 730 ims = email.utils.parsedate_to_datetime( 731 self.headers["If-Modified-Since"]) 732 except (TypeError, IndexError, OverflowError, ValueError): 733 # ignore ill-formed values 734 pass 735 else: 736 if ims.tzinfo is None: 737 # obsolete format with no timezone, cf. 738 # https://tools.ietf.org/html/rfc7231#section-7.1.1.1 739 ims = ims.replace(tzinfo=datetime.timezone.utc) 740 if ims.tzinfo is datetime.timezone.utc: 741 # compare to UTC datetime of last modification 742 last_modif = datetime.datetime.fromtimestamp( 743 fs.st_mtime, datetime.timezone.utc) 744 # remove microseconds, like in If-Modified-Since 745 last_modif = last_modif.replace(microsecond=0) 746 747 if last_modif <= ims: 748 self.send_response(HTTPStatus.NOT_MODIFIED) 749 self.end_headers() 750 f.close() 751 return None 752 753 self.send_response(HTTPStatus.OK) 754 self.send_header("Content-type", ctype) 755 self.send_header("Content-Length", str(fs[6])) 756 self.send_header("Last-Modified", 757 self.date_time_string(fs.st_mtime)) 758 self.end_headers() 759 return f 760 except: 761 f.close() 762 raise 763 764 def list_directory(self, path): 765 """Helper to produce a directory listing (absent index.html). 766 767 Return value is either a file object, or None (indicating an 768 error). In either case, the headers are sent, making the 769 interface the same as for send_head(). 770 771 """ 772 try: 773 list = os.listdir(path) 774 except OSError: 775 self.send_error( 776 HTTPStatus.NOT_FOUND, 777 "No permission to list directory") 778 return None 779 list.sort(key=lambda a: a.lower()) 780 r = [] 781 try: 782 displaypath = urllib.parse.unquote(self.path, 783 errors='surrogatepass') 784 except UnicodeDecodeError: 785 displaypath = urllib.parse.unquote(path) 786 displaypath = html.escape(displaypath, quote=False) 787 enc = sys.getfilesystemencoding() 788 title = 'Directory listing for %s' % displaypath 789 r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' 790 '"http://www.w3.org/TR/html4/strict.dtd">') 791 r.append('<html>\n<head>') 792 r.append('<meta http-equiv="Content-Type" ' 793 'content="text/html; charset=%s">' % enc) 794 r.append('<title>%s</title>\n</head>' % title) 795 r.append('<body>\n<h1>%s</h1>' % title) 796 r.append('<hr>\n<ul>') 797 for name in list: 798 fullname = os.path.join(path, name) 799 displayname = linkname = name 800 # Append / for directories or @ for symbolic links 801 if os.path.isdir(fullname): 802 displayname = name + "/" 803 linkname = name + "/" 804 if os.path.islink(fullname): 805 displayname = name + "@" 806 # Note: a link to a directory displays with @ and links with / 807 r.append('<li><a href="%s">%s</a></li>' 808 % (urllib.parse.quote(linkname, 809 errors='surrogatepass'), 810 html.escape(displayname, quote=False))) 811 r.append('</ul>\n<hr>\n</body>\n</html>\n') 812 encoded = '\n'.join(r).encode(enc, 'surrogateescape') 813 f = io.BytesIO() 814 f.write(encoded) 815 f.seek(0) 816 self.send_response(HTTPStatus.OK) 817 self.send_header("Content-type", "text/html; charset=%s" % enc) 818 self.send_header("Content-Length", str(len(encoded))) 819 self.end_headers() 820 return f 821 822 def translate_path(self, path): 823 """Translate a /-separated PATH to the local filename syntax. 824 825 Components that mean special things to the local file system 826 (e.g. drive or directory names) are ignored. (XXX They should 827 probably be diagnosed.) 828 829 """ 830 # abandon query parameters 831 path = path.split('?',1)[0] 832 path = path.split('#',1)[0] 833 # Don't forget explicit trailing slash when normalizing. Issue17324 834 trailing_slash = path.rstrip().endswith('/') 835 try: 836 path = urllib.parse.unquote(path, errors='surrogatepass') 837 except UnicodeDecodeError: 838 path = urllib.parse.unquote(path) 839 path = posixpath.normpath(path) 840 words = path.split('/') 841 words = filter(None, words) 842 path = self.directory 843 for word in words: 844 if os.path.dirname(word) or word in (os.curdir, os.pardir): 845 # Ignore components that are not a simple file/directory name 846 continue 847 path = os.path.join(path, word) 848 if trailing_slash: 849 path += '/' 850 return path 851 852 def copyfile(self, source, outputfile): 853 """Copy all data between two file objects. 854 855 The SOURCE argument is a file object open for reading 856 (or anything with a read() method) and the DESTINATION 857 argument is a file object open for writing (or 858 anything with a write() method). 859 860 The only reason for overriding this would be to change 861 the block size or perhaps to replace newlines by CRLF 862 -- note however that this the default server uses this 863 to copy binary data as well. 864 865 """ 866 shutil.copyfileobj(source, outputfile) 867 868 def guess_type(self, path): 869 """Guess the type of a file. 870 871 Argument is a PATH (a filename). 872 873 Return value is a string of the form type/subtype, 874 usable for a MIME Content-type header. 875 876 The default implementation looks the file's extension 877 up in the table self.extensions_map, using application/octet-stream 878 as a default; however it would be permissible (if 879 slow) to look inside the data to make a better guess. 880 881 """ 882 base, ext = posixpath.splitext(path) 883 if ext in self.extensions_map: 884 return self.extensions_map[ext] 885 ext = ext.lower() 886 if ext in self.extensions_map: 887 return self.extensions_map[ext] 888 guess, _ = mimetypes.guess_type(path) 889 if guess: 890 return guess 891 return 'application/octet-stream' 892 893 894# Utilities for CGIHTTPRequestHandler 895 896def _url_collapse_path(path): 897 """ 898 Given a URL path, remove extra '/'s and '.' path elements and collapse 899 any '..' references and returns a collapsed path. 900 901 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. 902 The utility of this function is limited to is_cgi method and helps 903 preventing some security attacks. 904 905 Returns: The reconstituted URL, which will always start with a '/'. 906 907 Raises: IndexError if too many '..' occur within the path. 908 909 """ 910 # Query component should not be involved. 911 path, _, query = path.partition('?') 912 path = urllib.parse.unquote(path) 913 914 # Similar to os.path.split(os.path.normpath(path)) but specific to URL 915 # path semantics rather than local operating system semantics. 916 path_parts = path.split('/') 917 head_parts = [] 918 for part in path_parts[:-1]: 919 if part == '..': 920 head_parts.pop() # IndexError if more '..' than prior parts 921 elif part and part != '.': 922 head_parts.append( part ) 923 if path_parts: 924 tail_part = path_parts.pop() 925 if tail_part: 926 if tail_part == '..': 927 head_parts.pop() 928 tail_part = '' 929 elif tail_part == '.': 930 tail_part = '' 931 else: 932 tail_part = '' 933 934 if query: 935 tail_part = '?'.join((tail_part, query)) 936 937 splitpath = ('/' + '/'.join(head_parts), tail_part) 938 collapsed_path = "/".join(splitpath) 939 940 return collapsed_path 941 942 943 944nobody = None 945 946def nobody_uid(): 947 """Internal routine to get nobody's uid""" 948 global nobody 949 if nobody: 950 return nobody 951 try: 952 import pwd 953 except ImportError: 954 return -1 955 try: 956 nobody = pwd.getpwnam('nobody')[2] 957 except KeyError: 958 nobody = 1 + max(x[2] for x in pwd.getpwall()) 959 return nobody 960 961 962def executable(path): 963 """Test for executable file.""" 964 return os.access(path, os.X_OK) 965 966 967class CGIHTTPRequestHandler(SimpleHTTPRequestHandler): 968 969 """Complete HTTP server with GET, HEAD and POST commands. 970 971 GET and HEAD also support running CGI scripts. 972 973 The POST command is *only* implemented for CGI scripts. 974 975 """ 976 977 # Determine platform specifics 978 have_fork = hasattr(os, 'fork') 979 980 # Make rfile unbuffered -- we need to read one line and then pass 981 # the rest to a subprocess, so we can't use buffered input. 982 rbufsize = 0 983 984 def do_POST(self): 985 """Serve a POST request. 986 987 This is only implemented for CGI scripts. 988 989 """ 990 991 if self.is_cgi(): 992 self.run_cgi() 993 else: 994 self.send_error( 995 HTTPStatus.NOT_IMPLEMENTED, 996 "Can only POST to CGI scripts") 997 998 def send_head(self): 999 """Version of send_head that support CGI scripts""" 1000 if self.is_cgi(): 1001 return self.run_cgi() 1002 else: 1003 return SimpleHTTPRequestHandler.send_head(self) 1004 1005 def is_cgi(self): 1006 """Test whether self.path corresponds to a CGI script. 1007 1008 Returns True and updates the cgi_info attribute to the tuple 1009 (dir, rest) if self.path requires running a CGI script. 1010 Returns False otherwise. 1011 1012 If any exception is raised, the caller should assume that 1013 self.path was rejected as invalid and act accordingly. 1014 1015 The default implementation tests whether the normalized url 1016 path begins with one of the strings in self.cgi_directories 1017 (and the next character is a '/' or the end of the string). 1018 1019 """ 1020 collapsed_path = _url_collapse_path(self.path) 1021 dir_sep = collapsed_path.find('/', 1) 1022 while dir_sep > 0 and not collapsed_path[:dir_sep] in self.cgi_directories: 1023 dir_sep = collapsed_path.find('/', dir_sep+1) 1024 if dir_sep > 0: 1025 head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:] 1026 self.cgi_info = head, tail 1027 return True 1028 return False 1029 1030 1031 cgi_directories = ['/cgi-bin', '/htbin'] 1032 1033 def is_executable(self, path): 1034 """Test whether argument path is an executable file.""" 1035 return executable(path) 1036 1037 def is_python(self, path): 1038 """Test whether argument path is a Python script.""" 1039 head, tail = os.path.splitext(path) 1040 return tail.lower() in (".py", ".pyw") 1041 1042 def run_cgi(self): 1043 """Execute a CGI script.""" 1044 dir, rest = self.cgi_info 1045 path = dir + '/' + rest 1046 i = path.find('/', len(dir)+1) 1047 while i >= 0: 1048 nextdir = path[:i] 1049 nextrest = path[i+1:] 1050 1051 scriptdir = self.translate_path(nextdir) 1052 if os.path.isdir(scriptdir): 1053 dir, rest = nextdir, nextrest 1054 i = path.find('/', len(dir)+1) 1055 else: 1056 break 1057 1058 # find an explicit query string, if present. 1059 rest, _, query = rest.partition('?') 1060 1061 # dissect the part after the directory name into a script name & 1062 # a possible additional path, to be stored in PATH_INFO. 1063 i = rest.find('/') 1064 if i >= 0: 1065 script, rest = rest[:i], rest[i:] 1066 else: 1067 script, rest = rest, '' 1068 1069 scriptname = dir + '/' + script 1070 scriptfile = self.translate_path(scriptname) 1071 if not os.path.exists(scriptfile): 1072 self.send_error( 1073 HTTPStatus.NOT_FOUND, 1074 "No such CGI script (%r)" % scriptname) 1075 return 1076 if not os.path.isfile(scriptfile): 1077 self.send_error( 1078 HTTPStatus.FORBIDDEN, 1079 "CGI script is not a plain file (%r)" % scriptname) 1080 return 1081 ispy = self.is_python(scriptname) 1082 if self.have_fork or not ispy: 1083 if not self.is_executable(scriptfile): 1084 self.send_error( 1085 HTTPStatus.FORBIDDEN, 1086 "CGI script is not executable (%r)" % scriptname) 1087 return 1088 1089 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html 1090 # XXX Much of the following could be prepared ahead of time! 1091 env = copy.deepcopy(os.environ) 1092 env['SERVER_SOFTWARE'] = self.version_string() 1093 env['SERVER_NAME'] = self.server.server_name 1094 env['GATEWAY_INTERFACE'] = 'CGI/1.1' 1095 env['SERVER_PROTOCOL'] = self.protocol_version 1096 env['SERVER_PORT'] = str(self.server.server_port) 1097 env['REQUEST_METHOD'] = self.command 1098 uqrest = urllib.parse.unquote(rest) 1099 env['PATH_INFO'] = uqrest 1100 env['PATH_TRANSLATED'] = self.translate_path(uqrest) 1101 env['SCRIPT_NAME'] = scriptname 1102 if query: 1103 env['QUERY_STRING'] = query 1104 env['REMOTE_ADDR'] = self.client_address[0] 1105 authorization = self.headers.get("authorization") 1106 if authorization: 1107 authorization = authorization.split() 1108 if len(authorization) == 2: 1109 import base64, binascii 1110 env['AUTH_TYPE'] = authorization[0] 1111 if authorization[0].lower() == "basic": 1112 try: 1113 authorization = authorization[1].encode('ascii') 1114 authorization = base64.decodebytes(authorization).\ 1115 decode('ascii') 1116 except (binascii.Error, UnicodeError): 1117 pass 1118 else: 1119 authorization = authorization.split(':') 1120 if len(authorization) == 2: 1121 env['REMOTE_USER'] = authorization[0] 1122 # XXX REMOTE_IDENT 1123 if self.headers.get('content-type') is None: 1124 env['CONTENT_TYPE'] = self.headers.get_content_type() 1125 else: 1126 env['CONTENT_TYPE'] = self.headers['content-type'] 1127 length = self.headers.get('content-length') 1128 if length: 1129 env['CONTENT_LENGTH'] = length 1130 referer = self.headers.get('referer') 1131 if referer: 1132 env['HTTP_REFERER'] = referer 1133 accept = self.headers.get_all('accept', ()) 1134 env['HTTP_ACCEPT'] = ','.join(accept) 1135 ua = self.headers.get('user-agent') 1136 if ua: 1137 env['HTTP_USER_AGENT'] = ua 1138 co = filter(None, self.headers.get_all('cookie', [])) 1139 cookie_str = ', '.join(co) 1140 if cookie_str: 1141 env['HTTP_COOKIE'] = cookie_str 1142 # XXX Other HTTP_* headers 1143 # Since we're setting the env in the parent, provide empty 1144 # values to override previously set values 1145 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', 1146 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): 1147 env.setdefault(k, "") 1148 1149 self.send_response(HTTPStatus.OK, "Script output follows") 1150 self.flush_headers() 1151 1152 decoded_query = query.replace('+', ' ') 1153 1154 if self.have_fork: 1155 # Unix -- fork as we should 1156 args = [script] 1157 if '=' not in decoded_query: 1158 args.append(decoded_query) 1159 nobody = nobody_uid() 1160 self.wfile.flush() # Always flush before forking 1161 pid = os.fork() 1162 if pid != 0: 1163 # Parent 1164 pid, sts = os.waitpid(pid, 0) 1165 # throw away additional data [see bug #427345] 1166 while select.select([self.rfile], [], [], 0)[0]: 1167 if not self.rfile.read(1): 1168 break 1169 exitcode = os.waitstatus_to_exitcode(sts) 1170 if exitcode: 1171 self.log_error(f"CGI script exit code {exitcode}") 1172 return 1173 # Child 1174 try: 1175 try: 1176 os.setuid(nobody) 1177 except OSError: 1178 pass 1179 os.dup2(self.rfile.fileno(), 0) 1180 os.dup2(self.wfile.fileno(), 1) 1181 os.execve(scriptfile, args, env) 1182 except: 1183 self.server.handle_error(self.request, self.client_address) 1184 os._exit(127) 1185 1186 else: 1187 # Non-Unix -- use subprocess 1188 import subprocess 1189 cmdline = [scriptfile] 1190 if self.is_python(scriptfile): 1191 interp = sys.executable 1192 if interp.lower().endswith("w.exe"): 1193 # On Windows, use python.exe, not pythonw.exe 1194 interp = interp[:-5] + interp[-4:] 1195 cmdline = [interp, '-u'] + cmdline 1196 if '=' not in query: 1197 cmdline.append(query) 1198 self.log_message("command: %s", subprocess.list2cmdline(cmdline)) 1199 try: 1200 nbytes = int(length) 1201 except (TypeError, ValueError): 1202 nbytes = 0 1203 p = subprocess.Popen(cmdline, 1204 stdin=subprocess.PIPE, 1205 stdout=subprocess.PIPE, 1206 stderr=subprocess.PIPE, 1207 env = env 1208 ) 1209 if self.command.lower() == "post" and nbytes > 0: 1210 data = self.rfile.read(nbytes) 1211 else: 1212 data = None 1213 # throw away additional data [see bug #427345] 1214 while select.select([self.rfile._sock], [], [], 0)[0]: 1215 if not self.rfile._sock.recv(1): 1216 break 1217 stdout, stderr = p.communicate(data) 1218 self.wfile.write(stdout) 1219 if stderr: 1220 self.log_error('%s', stderr) 1221 p.stderr.close() 1222 p.stdout.close() 1223 status = p.returncode 1224 if status: 1225 self.log_error("CGI script exit status %#x", status) 1226 else: 1227 self.log_message("CGI script exited OK") 1228 1229 1230def _get_best_family(*address): 1231 infos = socket.getaddrinfo( 1232 *address, 1233 type=socket.SOCK_STREAM, 1234 flags=socket.AI_PASSIVE, 1235 ) 1236 family, type, proto, canonname, sockaddr = next(iter(infos)) 1237 return family, sockaddr 1238 1239 1240def test(HandlerClass=BaseHTTPRequestHandler, 1241 ServerClass=ThreadingHTTPServer, 1242 protocol="HTTP/1.0", port=8000, bind=None): 1243 """Test the HTTP request handler class. 1244 1245 This runs an HTTP server on port 8000 (or the port argument). 1246 1247 """ 1248 ServerClass.address_family, addr = _get_best_family(bind, port) 1249 1250 HandlerClass.protocol_version = protocol 1251 with ServerClass(addr, HandlerClass) as httpd: 1252 host, port = httpd.socket.getsockname()[:2] 1253 url_host = f'[{host}]' if ':' in host else host 1254 print( 1255 f"Serving HTTP on {host} port {port} " 1256 f"(http://{url_host}:{port}/) ..." 1257 ) 1258 try: 1259 httpd.serve_forever() 1260 except KeyboardInterrupt: 1261 print("\nKeyboard interrupt received, exiting.") 1262 sys.exit(0) 1263 1264if __name__ == '__main__': 1265 import argparse 1266 1267 parser = argparse.ArgumentParser() 1268 parser.add_argument('--cgi', action='store_true', 1269 help='Run as CGI Server') 1270 parser.add_argument('--bind', '-b', metavar='ADDRESS', 1271 help='Specify alternate bind address ' 1272 '[default: all interfaces]') 1273 parser.add_argument('--directory', '-d', default=os.getcwd(), 1274 help='Specify alternative directory ' 1275 '[default:current directory]') 1276 parser.add_argument('port', action='store', 1277 default=8000, type=int, 1278 nargs='?', 1279 help='Specify alternate port [default: 8000]') 1280 args = parser.parse_args() 1281 if args.cgi: 1282 handler_class = CGIHTTPRequestHandler 1283 else: 1284 handler_class = partial(SimpleHTTPRequestHandler, 1285 directory=args.directory) 1286 1287 # ensure dual-stack is not disabled; ref #38907 1288 class DualStackServer(ThreadingHTTPServer): 1289 def server_bind(self): 1290 # suppress exception when protocol is IPv4 1291 with contextlib.suppress(Exception): 1292 self.socket.setsockopt( 1293 socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) 1294 return super().server_bind() 1295 1296 test( 1297 HandlerClass=handler_class, 1298 ServerClass=DualStackServer, 1299 port=args.port, 1300 bind=args.bind, 1301 ) 1302