1"""HTTP server classes. 2 3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see 4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, 5and CGIHTTPRequestHandler for CGI scripts. 6 7It does, however, optionally implement HTTP/1.1 persistent connections, 8as of version 0.3. 9 10Notes on CGIHTTPRequestHandler 11------------------------------ 12 13This class implements GET and POST requests to cgi-bin scripts. 14 15If the os.fork() function is not present (e.g. on Windows), 16subprocess.Popen() is used as a fallback, with slightly altered semantics. 17 18In all cases, the implementation is intentionally naive -- all 19requests are executed synchronously. 20 21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL 22-- it may execute arbitrary Python code or external programs. 23 24Note that status code 200 is sent prior to execution of a CGI script, so 25scripts cannot send other status codes such as 302 (redirect). 26 27XXX To do: 28 29- log requests even later (to capture byte count) 30- log user-agent header and other interesting goodies 31- send error log to separate file 32""" 33 34 35# See also: 36# 37# HTTP Working Group T. Berners-Lee 38# INTERNET-DRAFT R. T. Fielding 39# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen 40# Expires September 8, 1995 March 8, 1995 41# 42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt 43# 44# and 45# 46# Network Working Group R. Fielding 47# Request for Comments: 2616 et al 48# Obsoletes: 2068 June 1999 49# Category: Standards Track 50# 51# URL: http://www.faqs.org/rfcs/rfc2616.html 52 53# Log files 54# --------- 55# 56# Here's a quote from the NCSA httpd docs about log file format. 57# 58# | The logfile format is as follows. Each line consists of: 59# | 60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb 61# | 62# | host: Either the DNS name or the IP number of the remote client 63# | rfc931: Any information returned by identd for this person, 64# | - otherwise. 65# | authuser: If user sent a userid for authentication, the user name, 66# | - otherwise. 67# | DD: Day 68# | Mon: Month (calendar name) 69# | YYYY: Year 70# | hh: hour (24-hour format, the machine's timezone) 71# | mm: minutes 72# | ss: seconds 73# | request: The first line of the HTTP request as sent by the client. 74# | ddd: the status code returned by the server, - if not available. 75# | bbbb: the total number of bytes sent, 76# | *not including the HTTP/1.0 header*, - if not available 77# | 78# | You can determine the name of the file accessed through request. 79# 80# (Actually, the latter is only true if you know the server configuration 81# at the time the request was made!) 82 83__version__ = "0.6" 84 85__all__ = [ 86 "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler", 87 "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler", 88] 89 90import copy 91import datetime 92import email.utils 93import html 94import http.client 95import io 96import mimetypes 97import os 98import posixpath 99import select 100import shutil 101import socket # For gethostbyaddr() 102import socketserver 103import sys 104import time 105import urllib.parse 106import contextlib 107from functools import partial 108 109from http import HTTPStatus 110 111 112# Default error message template 113DEFAULT_ERROR_MESSAGE = """\ 114<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" 115 "http://www.w3.org/TR/html4/strict.dtd"> 116<html> 117 <head> 118 <meta http-equiv="Content-Type" content="text/html;charset=utf-8"> 119 <title>Error response</title> 120 </head> 121 <body> 122 <h1>Error response</h1> 123 <p>Error code: %(code)d</p> 124 <p>Message: %(message)s.</p> 125 <p>Error code explanation: %(code)s - %(explain)s.</p> 126 </body> 127</html> 128""" 129 130DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" 131 132class HTTPServer(socketserver.TCPServer): 133 134 allow_reuse_address = 1 # Seems to make sense in testing environment 135 136 def server_bind(self): 137 """Override server_bind to store the server name.""" 138 socketserver.TCPServer.server_bind(self) 139 host, port = self.server_address[:2] 140 self.server_name = socket.getfqdn(host) 141 self.server_port = port 142 143 144class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer): 145 daemon_threads = True 146 147 148class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): 149 150 """HTTP request handler base class. 151 152 The following explanation of HTTP serves to guide you through the 153 code as well as to expose any misunderstandings I may have about 154 HTTP (so you don't need to read the code to figure out I'm wrong 155 :-). 156 157 HTTP (HyperText Transfer Protocol) is an extensible protocol on 158 top of a reliable stream transport (e.g. TCP/IP). The protocol 159 recognizes three parts to a request: 160 161 1. One line identifying the request type and path 162 2. An optional set of RFC-822-style headers 163 3. An optional data part 164 165 The headers and data are separated by a blank line. 166 167 The first line of the request has the form 168 169 <command> <path> <version> 170 171 where <command> is a (case-sensitive) keyword such as GET or POST, 172 <path> is a string containing path information for the request, 173 and <version> should be the string "HTTP/1.0" or "HTTP/1.1". 174 <path> is encoded using the URL encoding scheme (using %xx to signify 175 the ASCII character with hex code xx). 176 177 The specification specifies that lines are separated by CRLF but 178 for compatibility with the widest range of clients recommends 179 servers also handle LF. Similarly, whitespace in the request line 180 is treated sensibly (allowing multiple spaces between components 181 and allowing trailing whitespace). 182 183 Similarly, for output, lines ought to be separated by CRLF pairs 184 but most clients grok LF characters just fine. 185 186 If the first line of the request has the form 187 188 <command> <path> 189 190 (i.e. <version> is left out) then this is assumed to be an HTTP 191 0.9 request; this form has no optional headers and data part and 192 the reply consists of just the data. 193 194 The reply form of the HTTP 1.x protocol again has three parts: 195 196 1. One line giving the response code 197 2. An optional set of RFC-822-style headers 198 3. The data 199 200 Again, the headers and data are separated by a blank line. 201 202 The response code line has the form 203 204 <version> <responsecode> <responsestring> 205 206 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"), 207 <responsecode> is a 3-digit response code indicating success or 208 failure of the request, and <responsestring> is an optional 209 human-readable string explaining what the response code means. 210 211 This server parses the request and the headers, and then calls a 212 function specific to the request type (<command>). Specifically, 213 a request SPAM will be handled by a method do_SPAM(). If no 214 such method exists the server sends an error response to the 215 client. If it exists, it is called with no arguments: 216 217 do_SPAM() 218 219 Note that the request name is case sensitive (i.e. SPAM and spam 220 are different requests). 221 222 The various request details are stored in instance variables: 223 224 - client_address is the client IP address in the form (host, 225 port); 226 227 - command, path and version are the broken-down request line; 228 229 - headers is an instance of email.message.Message (or a derived 230 class) containing the header information; 231 232 - rfile is a file object open for reading positioned at the 233 start of the optional input data part; 234 235 - wfile is a file object open for writing. 236 237 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! 238 239 The first thing to be written must be the response line. Then 240 follow 0 or more header lines, then a blank line, and then the 241 actual data (if any). The meaning of the header lines depends on 242 the command executed by the server; in most cases, when data is 243 returned, there should be at least one header line of the form 244 245 Content-type: <type>/<subtype> 246 247 where <type> and <subtype> should be registered MIME types, 248 e.g. "text/html" or "text/plain". 249 250 """ 251 252 # The Python system version, truncated to its first component. 253 sys_version = "Python/" + sys.version.split()[0] 254 255 # The server software version. You may want to override this. 256 # The format is multiple whitespace-separated strings, 257 # where each string is of the form name[/version]. 258 server_version = "BaseHTTP/" + __version__ 259 260 error_message_format = DEFAULT_ERROR_MESSAGE 261 error_content_type = DEFAULT_ERROR_CONTENT_TYPE 262 263 # The default request version. This only affects responses up until 264 # the point where the request line is parsed, so it mainly decides what 265 # the client gets back when sending a malformed request line. 266 # Most web servers default to HTTP 0.9, i.e. don't send a status line. 267 default_request_version = "HTTP/0.9" 268 269 def parse_request(self): 270 """Parse a request (internal). 271 272 The request should be stored in self.raw_requestline; the results 273 are in self.command, self.path, self.request_version and 274 self.headers. 275 276 Return True for success, False for failure; on failure, any relevant 277 error response has already been sent back. 278 279 """ 280 self.command = None # set in case of error on the first line 281 self.request_version = version = self.default_request_version 282 self.close_connection = True 283 requestline = str(self.raw_requestline, 'iso-8859-1') 284 requestline = requestline.rstrip('\r\n') 285 self.requestline = requestline 286 words = requestline.split() 287 if len(words) == 0: 288 return False 289 290 if len(words) >= 3: # Enough to determine protocol version 291 version = words[-1] 292 try: 293 if not version.startswith('HTTP/'): 294 raise ValueError 295 base_version_number = version.split('/', 1)[1] 296 version_number = base_version_number.split(".") 297 # RFC 2145 section 3.1 says there can be only one "." and 298 # - major and minor numbers MUST be treated as 299 # separate integers; 300 # - HTTP/2.4 is a lower version than HTTP/2.13, which in 301 # turn is lower than HTTP/12.3; 302 # - Leading zeros MUST be ignored by recipients. 303 if len(version_number) != 2: 304 raise ValueError 305 version_number = int(version_number[0]), int(version_number[1]) 306 except (ValueError, IndexError): 307 self.send_error( 308 HTTPStatus.BAD_REQUEST, 309 "Bad request version (%r)" % version) 310 return False 311 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": 312 self.close_connection = False 313 if version_number >= (2, 0): 314 self.send_error( 315 HTTPStatus.HTTP_VERSION_NOT_SUPPORTED, 316 "Invalid HTTP version (%s)" % base_version_number) 317 return False 318 self.request_version = version 319 320 if not 2 <= len(words) <= 3: 321 self.send_error( 322 HTTPStatus.BAD_REQUEST, 323 "Bad request syntax (%r)" % requestline) 324 return False 325 command, path = words[:2] 326 if len(words) == 2: 327 self.close_connection = True 328 if command != 'GET': 329 self.send_error( 330 HTTPStatus.BAD_REQUEST, 331 "Bad HTTP/0.9 request type (%r)" % command) 332 return False 333 self.command, self.path = command, path 334 335 # gh-87389: The purpose of replacing '//' with '/' is to protect 336 # against open redirect attacks possibly triggered if the path starts 337 # with '//' because http clients treat //path as an absolute URI 338 # without scheme (similar to http://path) rather than a path. 339 if self.path.startswith('//'): 340 self.path = '/' + self.path.lstrip('/') # Reduce to a single / 341 342 # Examine the headers and look for a Connection directive. 343 try: 344 self.headers = http.client.parse_headers(self.rfile, 345 _class=self.MessageClass) 346 except http.client.LineTooLong as err: 347 self.send_error( 348 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, 349 "Line too long", 350 str(err)) 351 return False 352 except http.client.HTTPException as err: 353 self.send_error( 354 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, 355 "Too many headers", 356 str(err) 357 ) 358 return False 359 360 conntype = self.headers.get('Connection', "") 361 if conntype.lower() == 'close': 362 self.close_connection = True 363 elif (conntype.lower() == 'keep-alive' and 364 self.protocol_version >= "HTTP/1.1"): 365 self.close_connection = False 366 # Examine the headers and look for an Expect directive 367 expect = self.headers.get('Expect', "") 368 if (expect.lower() == "100-continue" and 369 self.protocol_version >= "HTTP/1.1" and 370 self.request_version >= "HTTP/1.1"): 371 if not self.handle_expect_100(): 372 return False 373 return True 374 375 def handle_expect_100(self): 376 """Decide what to do with an "Expect: 100-continue" header. 377 378 If the client is expecting a 100 Continue response, we must 379 respond with either a 100 Continue or a final response before 380 waiting for the request body. The default is to always respond 381 with a 100 Continue. You can behave differently (for example, 382 reject unauthorized requests) by overriding this method. 383 384 This method should either return True (possibly after sending 385 a 100 Continue response) or send an error response and return 386 False. 387 388 """ 389 self.send_response_only(HTTPStatus.CONTINUE) 390 self.end_headers() 391 return True 392 393 def handle_one_request(self): 394 """Handle a single HTTP request. 395 396 You normally don't need to override this method; see the class 397 __doc__ string for information on how to handle specific HTTP 398 commands such as GET and POST. 399 400 """ 401 try: 402 self.raw_requestline = self.rfile.readline(65537) 403 if len(self.raw_requestline) > 65536: 404 self.requestline = '' 405 self.request_version = '' 406 self.command = '' 407 self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG) 408 return 409 if not self.raw_requestline: 410 self.close_connection = True 411 return 412 if not self.parse_request(): 413 # An error code has been sent, just exit 414 return 415 mname = 'do_' + self.command 416 if not hasattr(self, mname): 417 self.send_error( 418 HTTPStatus.NOT_IMPLEMENTED, 419 "Unsupported method (%r)" % self.command) 420 return 421 method = getattr(self, mname) 422 method() 423 self.wfile.flush() #actually send the response if not already done. 424 except socket.timeout as e: 425 #a read or a write timed out. Discard this connection 426 self.log_error("Request timed out: %r", e) 427 self.close_connection = True 428 return 429 430 def handle(self): 431 """Handle multiple requests if necessary.""" 432 self.close_connection = True 433 434 self.handle_one_request() 435 while not self.close_connection: 436 self.handle_one_request() 437 438 def send_error(self, code, message=None, explain=None): 439 """Send and log an error reply. 440 441 Arguments are 442 * code: an HTTP error code 443 3 digits 444 * message: a simple optional 1 line reason phrase. 445 *( HTAB / SP / VCHAR / %x80-FF ) 446 defaults to short entry matching the response code 447 * explain: a detailed message defaults to the long entry 448 matching the response code. 449 450 This sends an error response (so it must be called before any 451 output has been generated), logs the error, and finally sends 452 a piece of HTML explaining the error to the user. 453 454 """ 455 456 try: 457 shortmsg, longmsg = self.responses[code] 458 except KeyError: 459 shortmsg, longmsg = '???', '???' 460 if message is None: 461 message = shortmsg 462 if explain is None: 463 explain = longmsg 464 self.log_error("code %d, message %s", code, message) 465 self.send_response(code, message) 466 self.send_header('Connection', 'close') 467 468 # Message body is omitted for cases described in: 469 # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified) 470 # - RFC7231: 6.3.6. 205(Reset Content) 471 body = None 472 if (code >= 200 and 473 code not in (HTTPStatus.NO_CONTENT, 474 HTTPStatus.RESET_CONTENT, 475 HTTPStatus.NOT_MODIFIED)): 476 # HTML encode to prevent Cross Site Scripting attacks 477 # (see bug #1100201) 478 content = (self.error_message_format % { 479 'code': code, 480 'message': html.escape(message, quote=False), 481 'explain': html.escape(explain, quote=False) 482 }) 483 body = content.encode('UTF-8', 'replace') 484 self.send_header("Content-Type", self.error_content_type) 485 self.send_header('Content-Length', str(len(body))) 486 self.end_headers() 487 488 if self.command != 'HEAD' and body: 489 self.wfile.write(body) 490 491 def send_response(self, code, message=None): 492 """Add the response header to the headers buffer and log the 493 response code. 494 495 Also send two standard headers with the server software 496 version and the current date. 497 498 """ 499 self.log_request(code) 500 self.send_response_only(code, message) 501 self.send_header('Server', self.version_string()) 502 self.send_header('Date', self.date_time_string()) 503 504 def send_response_only(self, code, message=None): 505 """Send the response header only.""" 506 if self.request_version != 'HTTP/0.9': 507 if message is None: 508 if code in self.responses: 509 message = self.responses[code][0] 510 else: 511 message = '' 512 if not hasattr(self, '_headers_buffer'): 513 self._headers_buffer = [] 514 self._headers_buffer.append(("%s %d %s\r\n" % 515 (self.protocol_version, code, message)).encode( 516 'latin-1', 'strict')) 517 518 def send_header(self, keyword, value): 519 """Send a MIME header to the headers buffer.""" 520 if self.request_version != 'HTTP/0.9': 521 if not hasattr(self, '_headers_buffer'): 522 self._headers_buffer = [] 523 self._headers_buffer.append( 524 ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict')) 525 526 if keyword.lower() == 'connection': 527 if value.lower() == 'close': 528 self.close_connection = True 529 elif value.lower() == 'keep-alive': 530 self.close_connection = False 531 532 def end_headers(self): 533 """Send the blank line ending the MIME headers.""" 534 if self.request_version != 'HTTP/0.9': 535 self._headers_buffer.append(b"\r\n") 536 self.flush_headers() 537 538 def flush_headers(self): 539 if hasattr(self, '_headers_buffer'): 540 self.wfile.write(b"".join(self._headers_buffer)) 541 self._headers_buffer = [] 542 543 def log_request(self, code='-', size='-'): 544 """Log an accepted request. 545 546 This is called by send_response(). 547 548 """ 549 if isinstance(code, HTTPStatus): 550 code = code.value 551 self.log_message('"%s" %s %s', 552 self.requestline, str(code), str(size)) 553 554 def log_error(self, format, *args): 555 """Log an error. 556 557 This is called when a request cannot be fulfilled. By 558 default it passes the message on to log_message(). 559 560 Arguments are the same as for log_message(). 561 562 XXX This should go to the separate error log. 563 564 """ 565 566 self.log_message(format, *args) 567 568 def log_message(self, format, *args): 569 """Log an arbitrary message. 570 571 This is used by all other logging functions. Override 572 it if you have specific logging wishes. 573 574 The first argument, FORMAT, is a format string for the 575 message to be logged. If the format string contains 576 any % escapes requiring parameters, they should be 577 specified as subsequent arguments (it's just like 578 printf!). 579 580 The client ip and current date/time are prefixed to 581 every message. 582 583 """ 584 585 sys.stderr.write("%s - - [%s] %s\n" % 586 (self.address_string(), 587 self.log_date_time_string(), 588 format%args)) 589 590 def version_string(self): 591 """Return the server software version string.""" 592 return self.server_version + ' ' + self.sys_version 593 594 def date_time_string(self, timestamp=None): 595 """Return the current date and time formatted for a message header.""" 596 if timestamp is None: 597 timestamp = time.time() 598 return email.utils.formatdate(timestamp, usegmt=True) 599 600 def log_date_time_string(self): 601 """Return the current time formatted for logging.""" 602 now = time.time() 603 year, month, day, hh, mm, ss, x, y, z = time.localtime(now) 604 s = "%02d/%3s/%04d %02d:%02d:%02d" % ( 605 day, self.monthname[month], year, hh, mm, ss) 606 return s 607 608 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] 609 610 monthname = [None, 611 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 612 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] 613 614 def address_string(self): 615 """Return the client address.""" 616 617 return self.client_address[0] 618 619 # Essentially static class variables 620 621 # The version of the HTTP protocol we support. 622 # Set this to HTTP/1.1 to enable automatic keepalive 623 protocol_version = "HTTP/1.0" 624 625 # MessageClass used to parse headers 626 MessageClass = http.client.HTTPMessage 627 628 # hack to maintain backwards compatibility 629 responses = { 630 v: (v.phrase, v.description) 631 for v in HTTPStatus.__members__.values() 632 } 633 634 635class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): 636 637 """Simple HTTP request handler with GET and HEAD commands. 638 639 This serves files from the current directory and any of its 640 subdirectories. The MIME type for files is determined by 641 calling the .guess_type() method. 642 643 The GET and HEAD requests are identical except that the HEAD 644 request omits the actual contents of the file. 645 646 """ 647 648 server_version = "SimpleHTTP/" + __version__ 649 650 def __init__(self, *args, directory=None, **kwargs): 651 if directory is None: 652 directory = os.getcwd() 653 self.directory = directory 654 super().__init__(*args, **kwargs) 655 656 def do_GET(self): 657 """Serve a GET request.""" 658 f = self.send_head() 659 if f: 660 try: 661 self.copyfile(f, self.wfile) 662 finally: 663 f.close() 664 665 def do_HEAD(self): 666 """Serve a HEAD request.""" 667 f = self.send_head() 668 if f: 669 f.close() 670 671 def send_head(self): 672 """Common code for GET and HEAD commands. 673 674 This sends the response code and MIME headers. 675 676 Return value is either a file object (which has to be copied 677 to the outputfile by the caller unless the command was HEAD, 678 and must be closed by the caller under all circumstances), or 679 None, in which case the caller has nothing further to do. 680 681 """ 682 path = self.translate_path(self.path) 683 f = None 684 if os.path.isdir(path): 685 parts = urllib.parse.urlsplit(self.path) 686 if not parts.path.endswith('/'): 687 # redirect browser - doing basically what apache does 688 self.send_response(HTTPStatus.MOVED_PERMANENTLY) 689 new_parts = (parts[0], parts[1], parts[2] + '/', 690 parts[3], parts[4]) 691 new_url = urllib.parse.urlunsplit(new_parts) 692 self.send_header("Location", new_url) 693 self.end_headers() 694 return None 695 for index in "index.html", "index.htm": 696 index = os.path.join(path, index) 697 if os.path.exists(index): 698 path = index 699 break 700 else: 701 return self.list_directory(path) 702 ctype = self.guess_type(path) 703 # check for trailing "/" which should return 404. See Issue17324 704 # The test for this was added in test_httpserver.py 705 # However, some OS platforms accept a trailingSlash as a filename 706 # See discussion on python-dev and Issue34711 regarding 707 # parseing and rejection of filenames with a trailing slash 708 if path.endswith("/"): 709 self.send_error(HTTPStatus.NOT_FOUND, "File not found") 710 return None 711 try: 712 f = open(path, 'rb') 713 except OSError: 714 self.send_error(HTTPStatus.NOT_FOUND, "File not found") 715 return None 716 717 try: 718 fs = os.fstat(f.fileno()) 719 # Use browser cache if possible 720 if ("If-Modified-Since" in self.headers 721 and "If-None-Match" not in self.headers): 722 # compare If-Modified-Since and time of last file modification 723 try: 724 ims = email.utils.parsedate_to_datetime( 725 self.headers["If-Modified-Since"]) 726 except (TypeError, IndexError, OverflowError, ValueError): 727 # ignore ill-formed values 728 pass 729 else: 730 if ims.tzinfo is None: 731 # obsolete format with no timezone, cf. 732 # https://tools.ietf.org/html/rfc7231#section-7.1.1.1 733 ims = ims.replace(tzinfo=datetime.timezone.utc) 734 if ims.tzinfo is datetime.timezone.utc: 735 # compare to UTC datetime of last modification 736 last_modif = datetime.datetime.fromtimestamp( 737 fs.st_mtime, datetime.timezone.utc) 738 # remove microseconds, like in If-Modified-Since 739 last_modif = last_modif.replace(microsecond=0) 740 741 if last_modif <= ims: 742 self.send_response(HTTPStatus.NOT_MODIFIED) 743 self.end_headers() 744 f.close() 745 return None 746 747 self.send_response(HTTPStatus.OK) 748 self.send_header("Content-type", ctype) 749 self.send_header("Content-Length", str(fs[6])) 750 self.send_header("Last-Modified", 751 self.date_time_string(fs.st_mtime)) 752 self.end_headers() 753 return f 754 except: 755 f.close() 756 raise 757 758 def list_directory(self, path): 759 """Helper to produce a directory listing (absent index.html). 760 761 Return value is either a file object, or None (indicating an 762 error). In either case, the headers are sent, making the 763 interface the same as for send_head(). 764 765 """ 766 try: 767 list = os.listdir(path) 768 except OSError: 769 self.send_error( 770 HTTPStatus.NOT_FOUND, 771 "No permission to list directory") 772 return None 773 list.sort(key=lambda a: a.lower()) 774 r = [] 775 try: 776 displaypath = urllib.parse.unquote(self.path, 777 errors='surrogatepass') 778 except UnicodeDecodeError: 779 displaypath = urllib.parse.unquote(path) 780 displaypath = html.escape(displaypath, quote=False) 781 enc = sys.getfilesystemencoding() 782 title = 'Directory listing for %s' % displaypath 783 r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' 784 '"http://www.w3.org/TR/html4/strict.dtd">') 785 r.append('<html>\n<head>') 786 r.append('<meta http-equiv="Content-Type" ' 787 'content="text/html; charset=%s">' % enc) 788 r.append('<title>%s</title>\n</head>' % title) 789 r.append('<body>\n<h1>%s</h1>' % title) 790 r.append('<hr>\n<ul>') 791 for name in list: 792 fullname = os.path.join(path, name) 793 displayname = linkname = name 794 # Append / for directories or @ for symbolic links 795 if os.path.isdir(fullname): 796 displayname = name + "/" 797 linkname = name + "/" 798 if os.path.islink(fullname): 799 displayname = name + "@" 800 # Note: a link to a directory displays with @ and links with / 801 r.append('<li><a href="%s">%s</a></li>' 802 % (urllib.parse.quote(linkname, 803 errors='surrogatepass'), 804 html.escape(displayname, quote=False))) 805 r.append('</ul>\n<hr>\n</body>\n</html>\n') 806 encoded = '\n'.join(r).encode(enc, 'surrogateescape') 807 f = io.BytesIO() 808 f.write(encoded) 809 f.seek(0) 810 self.send_response(HTTPStatus.OK) 811 self.send_header("Content-type", "text/html; charset=%s" % enc) 812 self.send_header("Content-Length", str(len(encoded))) 813 self.end_headers() 814 return f 815 816 def translate_path(self, path): 817 """Translate a /-separated PATH to the local filename syntax. 818 819 Components that mean special things to the local file system 820 (e.g. drive or directory names) are ignored. (XXX They should 821 probably be diagnosed.) 822 823 """ 824 # abandon query parameters 825 path = path.split('?',1)[0] 826 path = path.split('#',1)[0] 827 # Don't forget explicit trailing slash when normalizing. Issue17324 828 trailing_slash = path.rstrip().endswith('/') 829 try: 830 path = urllib.parse.unquote(path, errors='surrogatepass') 831 except UnicodeDecodeError: 832 path = urllib.parse.unquote(path) 833 path = posixpath.normpath(path) 834 words = path.split('/') 835 words = filter(None, words) 836 path = self.directory 837 for word in words: 838 if os.path.dirname(word) or word in (os.curdir, os.pardir): 839 # Ignore components that are not a simple file/directory name 840 continue 841 path = os.path.join(path, word) 842 if trailing_slash: 843 path += '/' 844 return path 845 846 def copyfile(self, source, outputfile): 847 """Copy all data between two file objects. 848 849 The SOURCE argument is a file object open for reading 850 (or anything with a read() method) and the DESTINATION 851 argument is a file object open for writing (or 852 anything with a write() method). 853 854 The only reason for overriding this would be to change 855 the block size or perhaps to replace newlines by CRLF 856 -- note however that this the default server uses this 857 to copy binary data as well. 858 859 """ 860 shutil.copyfileobj(source, outputfile) 861 862 def guess_type(self, path): 863 """Guess the type of a file. 864 865 Argument is a PATH (a filename). 866 867 Return value is a string of the form type/subtype, 868 usable for a MIME Content-type header. 869 870 The default implementation looks the file's extension 871 up in the table self.extensions_map, using application/octet-stream 872 as a default; however it would be permissible (if 873 slow) to look inside the data to make a better guess. 874 875 """ 876 877 base, ext = posixpath.splitext(path) 878 if ext in self.extensions_map: 879 return self.extensions_map[ext] 880 ext = ext.lower() 881 if ext in self.extensions_map: 882 return self.extensions_map[ext] 883 else: 884 return self.extensions_map[''] 885 886 if not mimetypes.inited: 887 mimetypes.init() # try to read system mime.types 888 extensions_map = mimetypes.types_map.copy() 889 extensions_map.update({ 890 '': 'application/octet-stream', # Default 891 '.py': 'text/plain', 892 '.c': 'text/plain', 893 '.h': 'text/plain', 894 }) 895 896 897# Utilities for CGIHTTPRequestHandler 898 899def _url_collapse_path(path): 900 """ 901 Given a URL path, remove extra '/'s and '.' path elements and collapse 902 any '..' references and returns a collapsed path. 903 904 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. 905 The utility of this function is limited to is_cgi method and helps 906 preventing some security attacks. 907 908 Returns: The reconstituted URL, which will always start with a '/'. 909 910 Raises: IndexError if too many '..' occur within the path. 911 912 """ 913 # Query component should not be involved. 914 path, _, query = path.partition('?') 915 path = urllib.parse.unquote(path) 916 917 # Similar to os.path.split(os.path.normpath(path)) but specific to URL 918 # path semantics rather than local operating system semantics. 919 path_parts = path.split('/') 920 head_parts = [] 921 for part in path_parts[:-1]: 922 if part == '..': 923 head_parts.pop() # IndexError if more '..' than prior parts 924 elif part and part != '.': 925 head_parts.append( part ) 926 if path_parts: 927 tail_part = path_parts.pop() 928 if tail_part: 929 if tail_part == '..': 930 head_parts.pop() 931 tail_part = '' 932 elif tail_part == '.': 933 tail_part = '' 934 else: 935 tail_part = '' 936 937 if query: 938 tail_part = '?'.join((tail_part, query)) 939 940 splitpath = ('/' + '/'.join(head_parts), tail_part) 941 collapsed_path = "/".join(splitpath) 942 943 return collapsed_path 944 945 946 947nobody = None 948 949def nobody_uid(): 950 """Internal routine to get nobody's uid""" 951 global nobody 952 if nobody: 953 return nobody 954 try: 955 import pwd 956 except ImportError: 957 return -1 958 try: 959 nobody = pwd.getpwnam('nobody')[2] 960 except KeyError: 961 nobody = 1 + max(x[2] for x in pwd.getpwall()) 962 return nobody 963 964 965def executable(path): 966 """Test for executable file.""" 967 return os.access(path, os.X_OK) 968 969 970class CGIHTTPRequestHandler(SimpleHTTPRequestHandler): 971 972 """Complete HTTP server with GET, HEAD and POST commands. 973 974 GET and HEAD also support running CGI scripts. 975 976 The POST command is *only* implemented for CGI scripts. 977 978 """ 979 980 # Determine platform specifics 981 have_fork = hasattr(os, 'fork') 982 983 # Make rfile unbuffered -- we need to read one line and then pass 984 # the rest to a subprocess, so we can't use buffered input. 985 rbufsize = 0 986 987 def do_POST(self): 988 """Serve a POST request. 989 990 This is only implemented for CGI scripts. 991 992 """ 993 994 if self.is_cgi(): 995 self.run_cgi() 996 else: 997 self.send_error( 998 HTTPStatus.NOT_IMPLEMENTED, 999 "Can only POST to CGI scripts") 1000 1001 def send_head(self): 1002 """Version of send_head that support CGI scripts""" 1003 if self.is_cgi(): 1004 return self.run_cgi() 1005 else: 1006 return SimpleHTTPRequestHandler.send_head(self) 1007 1008 def is_cgi(self): 1009 """Test whether self.path corresponds to a CGI script. 1010 1011 Returns True and updates the cgi_info attribute to the tuple 1012 (dir, rest) if self.path requires running a CGI script. 1013 Returns False otherwise. 1014 1015 If any exception is raised, the caller should assume that 1016 self.path was rejected as invalid and act accordingly. 1017 1018 The default implementation tests whether the normalized url 1019 path begins with one of the strings in self.cgi_directories 1020 (and the next character is a '/' or the end of the string). 1021 1022 """ 1023 collapsed_path = _url_collapse_path(self.path) 1024 dir_sep = collapsed_path.find('/', 1) 1025 head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:] 1026 if head in self.cgi_directories: 1027 self.cgi_info = head, tail 1028 return True 1029 return False 1030 1031 1032 cgi_directories = ['/cgi-bin', '/htbin'] 1033 1034 def is_executable(self, path): 1035 """Test whether argument path is an executable file.""" 1036 return executable(path) 1037 1038 def is_python(self, path): 1039 """Test whether argument path is a Python script.""" 1040 head, tail = os.path.splitext(path) 1041 return tail.lower() in (".py", ".pyw") 1042 1043 def run_cgi(self): 1044 """Execute a CGI script.""" 1045 dir, rest = self.cgi_info 1046 path = dir + '/' + rest 1047 i = path.find('/', len(dir)+1) 1048 while i >= 0: 1049 nextdir = path[:i] 1050 nextrest = path[i+1:] 1051 1052 scriptdir = self.translate_path(nextdir) 1053 if os.path.isdir(scriptdir): 1054 dir, rest = nextdir, nextrest 1055 i = path.find('/', len(dir)+1) 1056 else: 1057 break 1058 1059 # find an explicit query string, if present. 1060 rest, _, query = rest.partition('?') 1061 1062 # dissect the part after the directory name into a script name & 1063 # a possible additional path, to be stored in PATH_INFO. 1064 i = rest.find('/') 1065 if i >= 0: 1066 script, rest = rest[:i], rest[i:] 1067 else: 1068 script, rest = rest, '' 1069 1070 scriptname = dir + '/' + script 1071 scriptfile = self.translate_path(scriptname) 1072 if not os.path.exists(scriptfile): 1073 self.send_error( 1074 HTTPStatus.NOT_FOUND, 1075 "No such CGI script (%r)" % scriptname) 1076 return 1077 if not os.path.isfile(scriptfile): 1078 self.send_error( 1079 HTTPStatus.FORBIDDEN, 1080 "CGI script is not a plain file (%r)" % scriptname) 1081 return 1082 ispy = self.is_python(scriptname) 1083 if self.have_fork or not ispy: 1084 if not self.is_executable(scriptfile): 1085 self.send_error( 1086 HTTPStatus.FORBIDDEN, 1087 "CGI script is not executable (%r)" % scriptname) 1088 return 1089 1090 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html 1091 # XXX Much of the following could be prepared ahead of time! 1092 env = copy.deepcopy(os.environ) 1093 env['SERVER_SOFTWARE'] = self.version_string() 1094 env['SERVER_NAME'] = self.server.server_name 1095 env['GATEWAY_INTERFACE'] = 'CGI/1.1' 1096 env['SERVER_PROTOCOL'] = self.protocol_version 1097 env['SERVER_PORT'] = str(self.server.server_port) 1098 env['REQUEST_METHOD'] = self.command 1099 uqrest = urllib.parse.unquote(rest) 1100 env['PATH_INFO'] = uqrest 1101 env['PATH_TRANSLATED'] = self.translate_path(uqrest) 1102 env['SCRIPT_NAME'] = scriptname 1103 if query: 1104 env['QUERY_STRING'] = query 1105 env['REMOTE_ADDR'] = self.client_address[0] 1106 authorization = self.headers.get("authorization") 1107 if authorization: 1108 authorization = authorization.split() 1109 if len(authorization) == 2: 1110 import base64, binascii 1111 env['AUTH_TYPE'] = authorization[0] 1112 if authorization[0].lower() == "basic": 1113 try: 1114 authorization = authorization[1].encode('ascii') 1115 authorization = base64.decodebytes(authorization).\ 1116 decode('ascii') 1117 except (binascii.Error, UnicodeError): 1118 pass 1119 else: 1120 authorization = authorization.split(':') 1121 if len(authorization) == 2: 1122 env['REMOTE_USER'] = authorization[0] 1123 # XXX REMOTE_IDENT 1124 if self.headers.get('content-type') is None: 1125 env['CONTENT_TYPE'] = self.headers.get_content_type() 1126 else: 1127 env['CONTENT_TYPE'] = self.headers['content-type'] 1128 length = self.headers.get('content-length') 1129 if length: 1130 env['CONTENT_LENGTH'] = length 1131 referer = self.headers.get('referer') 1132 if referer: 1133 env['HTTP_REFERER'] = referer 1134 accept = [] 1135 for line in self.headers.getallmatchingheaders('accept'): 1136 if line[:1] in "\t\n\r ": 1137 accept.append(line.strip()) 1138 else: 1139 accept = accept + line[7:].split(',') 1140 env['HTTP_ACCEPT'] = ','.join(accept) 1141 ua = self.headers.get('user-agent') 1142 if ua: 1143 env['HTTP_USER_AGENT'] = ua 1144 co = filter(None, self.headers.get_all('cookie', [])) 1145 cookie_str = ', '.join(co) 1146 if cookie_str: 1147 env['HTTP_COOKIE'] = cookie_str 1148 # XXX Other HTTP_* headers 1149 # Since we're setting the env in the parent, provide empty 1150 # values to override previously set values 1151 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', 1152 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): 1153 env.setdefault(k, "") 1154 1155 self.send_response(HTTPStatus.OK, "Script output follows") 1156 self.flush_headers() 1157 1158 decoded_query = query.replace('+', ' ') 1159 1160 if self.have_fork: 1161 # Unix -- fork as we should 1162 args = [script] 1163 if '=' not in decoded_query: 1164 args.append(decoded_query) 1165 nobody = nobody_uid() 1166 self.wfile.flush() # Always flush before forking 1167 pid = os.fork() 1168 if pid != 0: 1169 # Parent 1170 pid, sts = os.waitpid(pid, 0) 1171 # throw away additional data [see bug #427345] 1172 while select.select([self.rfile], [], [], 0)[0]: 1173 if not self.rfile.read(1): 1174 break 1175 if sts: 1176 self.log_error("CGI script exit status %#x", sts) 1177 return 1178 # Child 1179 try: 1180 try: 1181 os.setuid(nobody) 1182 except OSError: 1183 pass 1184 os.dup2(self.rfile.fileno(), 0) 1185 os.dup2(self.wfile.fileno(), 1) 1186 os.execve(scriptfile, args, env) 1187 except: 1188 self.server.handle_error(self.request, self.client_address) 1189 os._exit(127) 1190 1191 else: 1192 # Non-Unix -- use subprocess 1193 import subprocess 1194 cmdline = [scriptfile] 1195 if self.is_python(scriptfile): 1196 interp = sys.executable 1197 if interp.lower().endswith("w.exe"): 1198 # On Windows, use python.exe, not pythonw.exe 1199 interp = interp[:-5] + interp[-4:] 1200 cmdline = [interp, '-u'] + cmdline 1201 if '=' not in query: 1202 cmdline.append(query) 1203 self.log_message("command: %s", subprocess.list2cmdline(cmdline)) 1204 try: 1205 nbytes = int(length) 1206 except (TypeError, ValueError): 1207 nbytes = 0 1208 p = subprocess.Popen(cmdline, 1209 stdin=subprocess.PIPE, 1210 stdout=subprocess.PIPE, 1211 stderr=subprocess.PIPE, 1212 env = env 1213 ) 1214 if self.command.lower() == "post" and nbytes > 0: 1215 data = self.rfile.read(nbytes) 1216 else: 1217 data = None 1218 # throw away additional data [see bug #427345] 1219 while select.select([self.rfile._sock], [], [], 0)[0]: 1220 if not self.rfile._sock.recv(1): 1221 break 1222 stdout, stderr = p.communicate(data) 1223 self.wfile.write(stdout) 1224 if stderr: 1225 self.log_error('%s', stderr) 1226 p.stderr.close() 1227 p.stdout.close() 1228 status = p.returncode 1229 if status: 1230 self.log_error("CGI script exit status %#x", status) 1231 else: 1232 self.log_message("CGI script exited OK") 1233 1234 1235def _get_best_family(*address): 1236 infos = socket.getaddrinfo( 1237 *address, 1238 type=socket.SOCK_STREAM, 1239 flags=socket.AI_PASSIVE, 1240 ) 1241 family, type, proto, canonname, sockaddr = next(iter(infos)) 1242 return family, sockaddr 1243 1244 1245def test(HandlerClass=BaseHTTPRequestHandler, 1246 ServerClass=ThreadingHTTPServer, 1247 protocol="HTTP/1.0", port=8000, bind=None): 1248 """Test the HTTP request handler class. 1249 1250 This runs an HTTP server on port 8000 (or the port argument). 1251 1252 """ 1253 ServerClass.address_family, addr = _get_best_family(bind, port) 1254 1255 HandlerClass.protocol_version = protocol 1256 with ServerClass(addr, HandlerClass) as httpd: 1257 host, port = httpd.socket.getsockname()[:2] 1258 url_host = f'[{host}]' if ':' in host else host 1259 print( 1260 f"Serving HTTP on {host} port {port} " 1261 f"(http://{url_host}:{port}/) ..." 1262 ) 1263 try: 1264 httpd.serve_forever() 1265 except KeyboardInterrupt: 1266 print("\nKeyboard interrupt received, exiting.") 1267 sys.exit(0) 1268 1269if __name__ == '__main__': 1270 import argparse 1271 1272 parser = argparse.ArgumentParser() 1273 parser.add_argument('--cgi', action='store_true', 1274 help='Run as CGI Server') 1275 parser.add_argument('--bind', '-b', metavar='ADDRESS', 1276 help='Specify alternate bind address ' 1277 '[default: all interfaces]') 1278 parser.add_argument('--directory', '-d', default=os.getcwd(), 1279 help='Specify alternative directory ' 1280 '[default:current directory]') 1281 parser.add_argument('port', action='store', 1282 default=8000, type=int, 1283 nargs='?', 1284 help='Specify alternate port [default: 8000]') 1285 args = parser.parse_args() 1286 if args.cgi: 1287 handler_class = CGIHTTPRequestHandler 1288 else: 1289 handler_class = partial(SimpleHTTPRequestHandler, 1290 directory=args.directory) 1291 1292 # ensure dual-stack is not disabled; ref #38907 1293 class DualStackServer(ThreadingHTTPServer): 1294 def server_bind(self): 1295 # suppress exception when protocol is IPv4 1296 with contextlib.suppress(Exception): 1297 self.socket.setsockopt( 1298 socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) 1299 return super().server_bind() 1300 1301 test( 1302 HandlerClass=handler_class, 1303 ServerClass=DualStackServer, 1304 port=args.port, 1305 bind=args.bind, 1306 ) 1307