1"""HTTP server classes. 2 3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see 4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, 5and CGIHTTPRequestHandler for CGI scripts. 6 7It does, however, optionally implement HTTP/1.1 persistent connections, 8as of version 0.3. 9 10Notes on CGIHTTPRequestHandler 11------------------------------ 12 13This class implements GET and POST requests to cgi-bin scripts. 14 15If the os.fork() function is not present (e.g. on Windows), 16subprocess.Popen() is used as a fallback, with slightly altered semantics. 17 18In all cases, the implementation is intentionally naive -- all 19requests are executed synchronously. 20 21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL 22-- it may execute arbitrary Python code or external programs. 23 24Note that status code 200 is sent prior to execution of a CGI script, so 25scripts cannot send other status codes such as 302 (redirect). 26 27XXX To do: 28 29- log requests even later (to capture byte count) 30- log user-agent header and other interesting goodies 31- send error log to separate file 32""" 33 34 35# See also: 36# 37# HTTP Working Group T. Berners-Lee 38# INTERNET-DRAFT R. T. Fielding 39# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen 40# Expires September 8, 1995 March 8, 1995 41# 42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt 43# 44# and 45# 46# Network Working Group R. Fielding 47# Request for Comments: 2616 et al 48# Obsoletes: 2068 June 1999 49# Category: Standards Track 50# 51# URL: http://www.faqs.org/rfcs/rfc2616.html 52 53# Log files 54# --------- 55# 56# Here's a quote from the NCSA httpd docs about log file format. 57# 58# | The logfile format is as follows. Each line consists of: 59# | 60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb 61# | 62# | host: Either the DNS name or the IP number of the remote client 63# | rfc931: Any information returned by identd for this person, 64# | - otherwise. 65# | authuser: If user sent a userid for authentication, the user name, 66# | - otherwise. 67# | DD: Day 68# | Mon: Month (calendar name) 69# | YYYY: Year 70# | hh: hour (24-hour format, the machine's timezone) 71# | mm: minutes 72# | ss: seconds 73# | request: The first line of the HTTP request as sent by the client. 74# | ddd: the status code returned by the server, - if not available. 75# | bbbb: the total number of bytes sent, 76# | *not including the HTTP/1.0 header*, - if not available 77# | 78# | You can determine the name of the file accessed through request. 79# 80# (Actually, the latter is only true if you know the server configuration 81# at the time the request was made!) 82 83__version__ = "0.6" 84 85__all__ = [ 86 "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler", 87 "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler", 88] 89 90import copy 91import datetime 92import email.utils 93import html 94import http.client 95import io 96import mimetypes 97import os 98import posixpath 99import select 100import shutil 101import socket # For gethostbyaddr() 102import socketserver 103import sys 104import time 105import urllib.parse 106 107from http import HTTPStatus 108 109 110# Default error message template 111DEFAULT_ERROR_MESSAGE = """\ 112<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" 113 "http://www.w3.org/TR/html4/strict.dtd"> 114<html> 115 <head> 116 <meta http-equiv="Content-Type" content="text/html;charset=utf-8"> 117 <title>Error response</title> 118 </head> 119 <body> 120 <h1>Error response</h1> 121 <p>Error code: %(code)d</p> 122 <p>Message: %(message)s.</p> 123 <p>Error code explanation: %(code)s - %(explain)s.</p> 124 </body> 125</html> 126""" 127 128DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" 129 130class HTTPServer(socketserver.TCPServer): 131 132 allow_reuse_address = 1 # Seems to make sense in testing environment 133 134 def server_bind(self): 135 """Override server_bind to store the server name.""" 136 socketserver.TCPServer.server_bind(self) 137 host, port = self.server_address[:2] 138 self.server_name = socket.getfqdn(host) 139 self.server_port = port 140 141 142class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer): 143 daemon_threads = True 144 145 146class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): 147 148 """HTTP request handler base class. 149 150 The following explanation of HTTP serves to guide you through the 151 code as well as to expose any misunderstandings I may have about 152 HTTP (so you don't need to read the code to figure out I'm wrong 153 :-). 154 155 HTTP (HyperText Transfer Protocol) is an extensible protocol on 156 top of a reliable stream transport (e.g. TCP/IP). The protocol 157 recognizes three parts to a request: 158 159 1. One line identifying the request type and path 160 2. An optional set of RFC-822-style headers 161 3. An optional data part 162 163 The headers and data are separated by a blank line. 164 165 The first line of the request has the form 166 167 <command> <path> <version> 168 169 where <command> is a (case-sensitive) keyword such as GET or POST, 170 <path> is a string containing path information for the request, 171 and <version> should be the string "HTTP/1.0" or "HTTP/1.1". 172 <path> is encoded using the URL encoding scheme (using %xx to signify 173 the ASCII character with hex code xx). 174 175 The specification specifies that lines are separated by CRLF but 176 for compatibility with the widest range of clients recommends 177 servers also handle LF. Similarly, whitespace in the request line 178 is treated sensibly (allowing multiple spaces between components 179 and allowing trailing whitespace). 180 181 Similarly, for output, lines ought to be separated by CRLF pairs 182 but most clients grok LF characters just fine. 183 184 If the first line of the request has the form 185 186 <command> <path> 187 188 (i.e. <version> is left out) then this is assumed to be an HTTP 189 0.9 request; this form has no optional headers and data part and 190 the reply consists of just the data. 191 192 The reply form of the HTTP 1.x protocol again has three parts: 193 194 1. One line giving the response code 195 2. An optional set of RFC-822-style headers 196 3. The data 197 198 Again, the headers and data are separated by a blank line. 199 200 The response code line has the form 201 202 <version> <responsecode> <responsestring> 203 204 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"), 205 <responsecode> is a 3-digit response code indicating success or 206 failure of the request, and <responsestring> is an optional 207 human-readable string explaining what the response code means. 208 209 This server parses the request and the headers, and then calls a 210 function specific to the request type (<command>). Specifically, 211 a request SPAM will be handled by a method do_SPAM(). If no 212 such method exists the server sends an error response to the 213 client. If it exists, it is called with no arguments: 214 215 do_SPAM() 216 217 Note that the request name is case sensitive (i.e. SPAM and spam 218 are different requests). 219 220 The various request details are stored in instance variables: 221 222 - client_address is the client IP address in the form (host, 223 port); 224 225 - command, path and version are the broken-down request line; 226 227 - headers is an instance of email.message.Message (or a derived 228 class) containing the header information; 229 230 - rfile is a file object open for reading positioned at the 231 start of the optional input data part; 232 233 - wfile is a file object open for writing. 234 235 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! 236 237 The first thing to be written must be the response line. Then 238 follow 0 or more header lines, then a blank line, and then the 239 actual data (if any). The meaning of the header lines depends on 240 the command executed by the server; in most cases, when data is 241 returned, there should be at least one header line of the form 242 243 Content-type: <type>/<subtype> 244 245 where <type> and <subtype> should be registered MIME types, 246 e.g. "text/html" or "text/plain". 247 248 """ 249 250 # The Python system version, truncated to its first component. 251 sys_version = "Python/" + sys.version.split()[0] 252 253 # The server software version. You may want to override this. 254 # The format is multiple whitespace-separated strings, 255 # where each string is of the form name[/version]. 256 server_version = "BaseHTTP/" + __version__ 257 258 error_message_format = DEFAULT_ERROR_MESSAGE 259 error_content_type = DEFAULT_ERROR_CONTENT_TYPE 260 261 # The default request version. This only affects responses up until 262 # the point where the request line is parsed, so it mainly decides what 263 # the client gets back when sending a malformed request line. 264 # Most web servers default to HTTP 0.9, i.e. don't send a status line. 265 default_request_version = "HTTP/0.9" 266 267 def parse_request(self): 268 """Parse a request (internal). 269 270 The request should be stored in self.raw_requestline; the results 271 are in self.command, self.path, self.request_version and 272 self.headers. 273 274 Return True for success, False for failure; on failure, any relevant 275 error response has already been sent back. 276 277 """ 278 self.command = None # set in case of error on the first line 279 self.request_version = version = self.default_request_version 280 self.close_connection = True 281 requestline = str(self.raw_requestline, 'iso-8859-1') 282 requestline = requestline.rstrip('\r\n') 283 self.requestline = requestline 284 words = requestline.split() 285 if len(words) == 0: 286 return False 287 288 if len(words) >= 3: # Enough to determine protocol version 289 version = words[-1] 290 try: 291 if not version.startswith('HTTP/'): 292 raise ValueError 293 base_version_number = version.split('/', 1)[1] 294 version_number = base_version_number.split(".") 295 # RFC 2145 section 3.1 says there can be only one "." and 296 # - major and minor numbers MUST be treated as 297 # separate integers; 298 # - HTTP/2.4 is a lower version than HTTP/2.13, which in 299 # turn is lower than HTTP/12.3; 300 # - Leading zeros MUST be ignored by recipients. 301 if len(version_number) != 2: 302 raise ValueError 303 version_number = int(version_number[0]), int(version_number[1]) 304 except (ValueError, IndexError): 305 self.send_error( 306 HTTPStatus.BAD_REQUEST, 307 "Bad request version (%r)" % version) 308 return False 309 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": 310 self.close_connection = False 311 if version_number >= (2, 0): 312 self.send_error( 313 HTTPStatus.HTTP_VERSION_NOT_SUPPORTED, 314 "Invalid HTTP version (%s)" % base_version_number) 315 return False 316 self.request_version = version 317 318 if not 2 <= len(words) <= 3: 319 self.send_error( 320 HTTPStatus.BAD_REQUEST, 321 "Bad request syntax (%r)" % requestline) 322 return False 323 command, path = words[:2] 324 if len(words) == 2: 325 self.close_connection = True 326 if command != 'GET': 327 self.send_error( 328 HTTPStatus.BAD_REQUEST, 329 "Bad HTTP/0.9 request type (%r)" % command) 330 return False 331 self.command, self.path = command, path 332 333 # Examine the headers and look for a Connection directive. 334 try: 335 self.headers = http.client.parse_headers(self.rfile, 336 _class=self.MessageClass) 337 except http.client.LineTooLong as err: 338 self.send_error( 339 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, 340 "Line too long", 341 str(err)) 342 return False 343 except http.client.HTTPException as err: 344 self.send_error( 345 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, 346 "Too many headers", 347 str(err) 348 ) 349 return False 350 351 conntype = self.headers.get('Connection', "") 352 if conntype.lower() == 'close': 353 self.close_connection = True 354 elif (conntype.lower() == 'keep-alive' and 355 self.protocol_version >= "HTTP/1.1"): 356 self.close_connection = False 357 # Examine the headers and look for an Expect directive 358 expect = self.headers.get('Expect', "") 359 if (expect.lower() == "100-continue" and 360 self.protocol_version >= "HTTP/1.1" and 361 self.request_version >= "HTTP/1.1"): 362 if not self.handle_expect_100(): 363 return False 364 return True 365 366 def handle_expect_100(self): 367 """Decide what to do with an "Expect: 100-continue" header. 368 369 If the client is expecting a 100 Continue response, we must 370 respond with either a 100 Continue or a final response before 371 waiting for the request body. The default is to always respond 372 with a 100 Continue. You can behave differently (for example, 373 reject unauthorized requests) by overriding this method. 374 375 This method should either return True (possibly after sending 376 a 100 Continue response) or send an error response and return 377 False. 378 379 """ 380 self.send_response_only(HTTPStatus.CONTINUE) 381 self.end_headers() 382 return True 383 384 def handle_one_request(self): 385 """Handle a single HTTP request. 386 387 You normally don't need to override this method; see the class 388 __doc__ string for information on how to handle specific HTTP 389 commands such as GET and POST. 390 391 """ 392 try: 393 self.raw_requestline = self.rfile.readline(65537) 394 if len(self.raw_requestline) > 65536: 395 self.requestline = '' 396 self.request_version = '' 397 self.command = '' 398 self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG) 399 return 400 if not self.raw_requestline: 401 self.close_connection = True 402 return 403 if not self.parse_request(): 404 # An error code has been sent, just exit 405 return 406 mname = 'do_' + self.command 407 if not hasattr(self, mname): 408 self.send_error( 409 HTTPStatus.NOT_IMPLEMENTED, 410 "Unsupported method (%r)" % self.command) 411 return 412 method = getattr(self, mname) 413 method() 414 self.wfile.flush() #actually send the response if not already done. 415 except TimeoutError as e: 416 #a read or a write timed out. Discard this connection 417 self.log_error("Request timed out: %r", e) 418 self.close_connection = True 419 return 420 421 def handle(self): 422 """Handle multiple requests if necessary.""" 423 self.close_connection = True 424 425 self.handle_one_request() 426 while not self.close_connection: 427 self.handle_one_request() 428 429 def send_error(self, code, message=None, explain=None): 430 """Send and log an error reply. 431 432 Arguments are 433 * code: an HTTP error code 434 3 digits 435 * message: a simple optional 1 line reason phrase. 436 *( HTAB / SP / VCHAR / %x80-FF ) 437 defaults to short entry matching the response code 438 * explain: a detailed message defaults to the long entry 439 matching the response code. 440 441 This sends an error response (so it must be called before any 442 output has been generated), logs the error, and finally sends 443 a piece of HTML explaining the error to the user. 444 445 """ 446 447 try: 448 shortmsg, longmsg = self.responses[code] 449 except KeyError: 450 shortmsg, longmsg = '???', '???' 451 if message is None: 452 message = shortmsg 453 if explain is None: 454 explain = longmsg 455 self.log_error("code %d, message %s", code, message) 456 self.send_response(code, message) 457 self.send_header('Connection', 'close') 458 459 # Message body is omitted for cases described in: 460 # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified) 461 # - RFC7231: 6.3.6. 205(Reset Content) 462 body = None 463 if (code >= 200 and 464 code not in (HTTPStatus.NO_CONTENT, 465 HTTPStatus.RESET_CONTENT, 466 HTTPStatus.NOT_MODIFIED)): 467 # HTML encode to prevent Cross Site Scripting attacks 468 # (see bug #1100201) 469 content = (self.error_message_format % { 470 'code': code, 471 'message': html.escape(message, quote=False), 472 'explain': html.escape(explain, quote=False) 473 }) 474 body = content.encode('UTF-8', 'replace') 475 self.send_header("Content-Type", self.error_content_type) 476 self.send_header('Content-Length', str(len(body))) 477 self.end_headers() 478 479 if self.command != 'HEAD' and body: 480 self.wfile.write(body) 481 482 def send_response(self, code, message=None): 483 """Add the response header to the headers buffer and log the 484 response code. 485 486 Also send two standard headers with the server software 487 version and the current date. 488 489 """ 490 self.log_request(code) 491 self.send_response_only(code, message) 492 self.send_header('Server', self.version_string()) 493 self.send_header('Date', self.date_time_string()) 494 495 def send_response_only(self, code, message=None): 496 """Send the response header only.""" 497 if self.request_version != 'HTTP/0.9': 498 if message is None: 499 if code in self.responses: 500 message = self.responses[code][0] 501 else: 502 message = '' 503 if not hasattr(self, '_headers_buffer'): 504 self._headers_buffer = [] 505 self._headers_buffer.append(("%s %d %s\r\n" % 506 (self.protocol_version, code, message)).encode( 507 'latin-1', 'strict')) 508 509 def send_header(self, keyword, value): 510 """Send a MIME header to the headers buffer.""" 511 if self.request_version != 'HTTP/0.9': 512 if not hasattr(self, '_headers_buffer'): 513 self._headers_buffer = [] 514 self._headers_buffer.append( 515 ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict')) 516 517 if keyword.lower() == 'connection': 518 if value.lower() == 'close': 519 self.close_connection = True 520 elif value.lower() == 'keep-alive': 521 self.close_connection = False 522 523 def end_headers(self): 524 """Send the blank line ending the MIME headers.""" 525 if self.request_version != 'HTTP/0.9': 526 self._headers_buffer.append(b"\r\n") 527 self.flush_headers() 528 529 def flush_headers(self): 530 if hasattr(self, '_headers_buffer'): 531 self.wfile.write(b"".join(self._headers_buffer)) 532 self._headers_buffer = [] 533 534 def log_request(self, code='-', size='-'): 535 """Log an accepted request. 536 537 This is called by send_response(). 538 539 """ 540 if isinstance(code, HTTPStatus): 541 code = code.value 542 self.log_message('"%s" %s %s', 543 self.requestline, str(code), str(size)) 544 545 def log_error(self, format, *args): 546 """Log an error. 547 548 This is called when a request cannot be fulfilled. By 549 default it passes the message on to log_message(). 550 551 Arguments are the same as for log_message(). 552 553 XXX This should go to the separate error log. 554 555 """ 556 557 self.log_message(format, *args) 558 559 def log_message(self, format, *args): 560 """Log an arbitrary message. 561 562 This is used by all other logging functions. Override 563 it if you have specific logging wishes. 564 565 The first argument, FORMAT, is a format string for the 566 message to be logged. If the format string contains 567 any % escapes requiring parameters, they should be 568 specified as subsequent arguments (it's just like 569 printf!). 570 571 The client ip and current date/time are prefixed to 572 every message. 573 574 """ 575 576 sys.stderr.write("%s - - [%s] %s\n" % 577 (self.address_string(), 578 self.log_date_time_string(), 579 format%args)) 580 581 def version_string(self): 582 """Return the server software version string.""" 583 return self.server_version + ' ' + self.sys_version 584 585 def date_time_string(self, timestamp=None): 586 """Return the current date and time formatted for a message header.""" 587 if timestamp is None: 588 timestamp = time.time() 589 return email.utils.formatdate(timestamp, usegmt=True) 590 591 def log_date_time_string(self): 592 """Return the current time formatted for logging.""" 593 now = time.time() 594 year, month, day, hh, mm, ss, x, y, z = time.localtime(now) 595 s = "%02d/%3s/%04d %02d:%02d:%02d" % ( 596 day, self.monthname[month], year, hh, mm, ss) 597 return s 598 599 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] 600 601 monthname = [None, 602 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 603 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] 604 605 def address_string(self): 606 """Return the client address.""" 607 608 return self.client_address[0] 609 610 # Essentially static class variables 611 612 # The version of the HTTP protocol we support. 613 # Set this to HTTP/1.1 to enable automatic keepalive 614 protocol_version = "HTTP/1.0" 615 616 # MessageClass used to parse headers 617 MessageClass = http.client.HTTPMessage 618 619 # hack to maintain backwards compatibility 620 responses = { 621 v: (v.phrase, v.description) 622 for v in HTTPStatus.__members__.values() 623 } 624 625 626class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): 627 628 """Simple HTTP request handler with GET and HEAD commands. 629 630 This serves files from the current directory and any of its 631 subdirectories. The MIME type for files is determined by 632 calling the .guess_type() method. 633 634 The GET and HEAD requests are identical except that the HEAD 635 request omits the actual contents of the file. 636 637 """ 638 639 server_version = "SimpleHTTP/" + __version__ 640 extensions_map = _encodings_map_default = { 641 '.gz': 'application/gzip', 642 '.Z': 'application/octet-stream', 643 '.bz2': 'application/x-bzip2', 644 '.xz': 'application/x-xz', 645 } 646 647 def __init__(self, *args, directory=None, **kwargs): 648 if directory is None: 649 directory = os.getcwd() 650 self.directory = os.fspath(directory) 651 super().__init__(*args, **kwargs) 652 653 def do_GET(self): 654 """Serve a GET request.""" 655 f = self.send_head() 656 if f: 657 try: 658 self.copyfile(f, self.wfile) 659 finally: 660 f.close() 661 662 def do_HEAD(self): 663 """Serve a HEAD request.""" 664 f = self.send_head() 665 if f: 666 f.close() 667 668 def send_head(self): 669 """Common code for GET and HEAD commands. 670 671 This sends the response code and MIME headers. 672 673 Return value is either a file object (which has to be copied 674 to the outputfile by the caller unless the command was HEAD, 675 and must be closed by the caller under all circumstances), or 676 None, in which case the caller has nothing further to do. 677 678 """ 679 path = self.translate_path(self.path) 680 f = None 681 if os.path.isdir(path): 682 parts = urllib.parse.urlsplit(self.path) 683 if not parts.path.endswith('/'): 684 # redirect browser - doing basically what apache does 685 self.send_response(HTTPStatus.MOVED_PERMANENTLY) 686 new_parts = (parts[0], parts[1], parts[2] + '/', 687 parts[3], parts[4]) 688 new_url = urllib.parse.urlunsplit(new_parts) 689 self.send_header("Location", new_url) 690 self.send_header("Content-Length", "0") 691 self.end_headers() 692 return None 693 for index in "index.html", "index.htm": 694 index = os.path.join(path, index) 695 if os.path.exists(index): 696 path = index 697 break 698 else: 699 return self.list_directory(path) 700 ctype = self.guess_type(path) 701 # check for trailing "/" which should return 404. See Issue17324 702 # The test for this was added in test_httpserver.py 703 # However, some OS platforms accept a trailingSlash as a filename 704 # See discussion on python-dev and Issue34711 regarding 705 # parseing and rejection of filenames with a trailing slash 706 if path.endswith("/"): 707 self.send_error(HTTPStatus.NOT_FOUND, "File not found") 708 return None 709 try: 710 f = open(path, 'rb') 711 except OSError: 712 self.send_error(HTTPStatus.NOT_FOUND, "File not found") 713 return None 714 715 try: 716 fs = os.fstat(f.fileno()) 717 # Use browser cache if possible 718 if ("If-Modified-Since" in self.headers 719 and "If-None-Match" not in self.headers): 720 # compare If-Modified-Since and time of last file modification 721 try: 722 ims = email.utils.parsedate_to_datetime( 723 self.headers["If-Modified-Since"]) 724 except (TypeError, IndexError, OverflowError, ValueError): 725 # ignore ill-formed values 726 pass 727 else: 728 if ims.tzinfo is None: 729 # obsolete format with no timezone, cf. 730 # https://tools.ietf.org/html/rfc7231#section-7.1.1.1 731 ims = ims.replace(tzinfo=datetime.timezone.utc) 732 if ims.tzinfo is datetime.timezone.utc: 733 # compare to UTC datetime of last modification 734 last_modif = datetime.datetime.fromtimestamp( 735 fs.st_mtime, datetime.timezone.utc) 736 # remove microseconds, like in If-Modified-Since 737 last_modif = last_modif.replace(microsecond=0) 738 739 if last_modif <= ims: 740 self.send_response(HTTPStatus.NOT_MODIFIED) 741 self.end_headers() 742 f.close() 743 return None 744 745 self.send_response(HTTPStatus.OK) 746 self.send_header("Content-type", ctype) 747 self.send_header("Content-Length", str(fs[6])) 748 self.send_header("Last-Modified", 749 self.date_time_string(fs.st_mtime)) 750 self.end_headers() 751 return f 752 except: 753 f.close() 754 raise 755 756 def list_directory(self, path): 757 """Helper to produce a directory listing (absent index.html). 758 759 Return value is either a file object, or None (indicating an 760 error). In either case, the headers are sent, making the 761 interface the same as for send_head(). 762 763 """ 764 try: 765 list = os.listdir(path) 766 except OSError: 767 self.send_error( 768 HTTPStatus.NOT_FOUND, 769 "No permission to list directory") 770 return None 771 list.sort(key=lambda a: a.lower()) 772 r = [] 773 try: 774 displaypath = urllib.parse.unquote(self.path, 775 errors='surrogatepass') 776 except UnicodeDecodeError: 777 displaypath = urllib.parse.unquote(path) 778 displaypath = html.escape(displaypath, quote=False) 779 enc = sys.getfilesystemencoding() 780 title = 'Directory listing for %s' % displaypath 781 r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' 782 '"http://www.w3.org/TR/html4/strict.dtd">') 783 r.append('<html>\n<head>') 784 r.append('<meta http-equiv="Content-Type" ' 785 'content="text/html; charset=%s">' % enc) 786 r.append('<title>%s</title>\n</head>' % title) 787 r.append('<body>\n<h1>%s</h1>' % title) 788 r.append('<hr>\n<ul>') 789 for name in list: 790 fullname = os.path.join(path, name) 791 displayname = linkname = name 792 # Append / for directories or @ for symbolic links 793 if os.path.isdir(fullname): 794 displayname = name + "/" 795 linkname = name + "/" 796 if os.path.islink(fullname): 797 displayname = name + "@" 798 # Note: a link to a directory displays with @ and links with / 799 r.append('<li><a href="%s">%s</a></li>' 800 % (urllib.parse.quote(linkname, 801 errors='surrogatepass'), 802 html.escape(displayname, quote=False))) 803 r.append('</ul>\n<hr>\n</body>\n</html>\n') 804 encoded = '\n'.join(r).encode(enc, 'surrogateescape') 805 f = io.BytesIO() 806 f.write(encoded) 807 f.seek(0) 808 self.send_response(HTTPStatus.OK) 809 self.send_header("Content-type", "text/html; charset=%s" % enc) 810 self.send_header("Content-Length", str(len(encoded))) 811 self.end_headers() 812 return f 813 814 def translate_path(self, path): 815 """Translate a /-separated PATH to the local filename syntax. 816 817 Components that mean special things to the local file system 818 (e.g. drive or directory names) are ignored. (XXX They should 819 probably be diagnosed.) 820 821 """ 822 # abandon query parameters 823 path = path.split('?',1)[0] 824 path = path.split('#',1)[0] 825 # Don't forget explicit trailing slash when normalizing. Issue17324 826 trailing_slash = path.rstrip().endswith('/') 827 try: 828 path = urllib.parse.unquote(path, errors='surrogatepass') 829 except UnicodeDecodeError: 830 path = urllib.parse.unquote(path) 831 path = posixpath.normpath(path) 832 words = path.split('/') 833 words = filter(None, words) 834 path = self.directory 835 for word in words: 836 if os.path.dirname(word) or word in (os.curdir, os.pardir): 837 # Ignore components that are not a simple file/directory name 838 continue 839 path = os.path.join(path, word) 840 if trailing_slash: 841 path += '/' 842 return path 843 844 def copyfile(self, source, outputfile): 845 """Copy all data between two file objects. 846 847 The SOURCE argument is a file object open for reading 848 (or anything with a read() method) and the DESTINATION 849 argument is a file object open for writing (or 850 anything with a write() method). 851 852 The only reason for overriding this would be to change 853 the block size or perhaps to replace newlines by CRLF 854 -- note however that this the default server uses this 855 to copy binary data as well. 856 857 """ 858 shutil.copyfileobj(source, outputfile) 859 860 def guess_type(self, path): 861 """Guess the type of a file. 862 863 Argument is a PATH (a filename). 864 865 Return value is a string of the form type/subtype, 866 usable for a MIME Content-type header. 867 868 The default implementation looks the file's extension 869 up in the table self.extensions_map, using application/octet-stream 870 as a default; however it would be permissible (if 871 slow) to look inside the data to make a better guess. 872 873 """ 874 base, ext = posixpath.splitext(path) 875 if ext in self.extensions_map: 876 return self.extensions_map[ext] 877 ext = ext.lower() 878 if ext in self.extensions_map: 879 return self.extensions_map[ext] 880 guess, _ = mimetypes.guess_type(path) 881 if guess: 882 return guess 883 return 'application/octet-stream' 884 885 886# Utilities for CGIHTTPRequestHandler 887 888def _url_collapse_path(path): 889 """ 890 Given a URL path, remove extra '/'s and '.' path elements and collapse 891 any '..' references and returns a collapsed path. 892 893 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. 894 The utility of this function is limited to is_cgi method and helps 895 preventing some security attacks. 896 897 Returns: The reconstituted URL, which will always start with a '/'. 898 899 Raises: IndexError if too many '..' occur within the path. 900 901 """ 902 # Query component should not be involved. 903 path, _, query = path.partition('?') 904 path = urllib.parse.unquote(path) 905 906 # Similar to os.path.split(os.path.normpath(path)) but specific to URL 907 # path semantics rather than local operating system semantics. 908 path_parts = path.split('/') 909 head_parts = [] 910 for part in path_parts[:-1]: 911 if part == '..': 912 head_parts.pop() # IndexError if more '..' than prior parts 913 elif part and part != '.': 914 head_parts.append( part ) 915 if path_parts: 916 tail_part = path_parts.pop() 917 if tail_part: 918 if tail_part == '..': 919 head_parts.pop() 920 tail_part = '' 921 elif tail_part == '.': 922 tail_part = '' 923 else: 924 tail_part = '' 925 926 if query: 927 tail_part = '?'.join((tail_part, query)) 928 929 splitpath = ('/' + '/'.join(head_parts), tail_part) 930 collapsed_path = "/".join(splitpath) 931 932 return collapsed_path 933 934 935 936nobody = None 937 938def nobody_uid(): 939 """Internal routine to get nobody's uid""" 940 global nobody 941 if nobody: 942 return nobody 943 try: 944 import pwd 945 except ImportError: 946 return -1 947 try: 948 nobody = pwd.getpwnam('nobody')[2] 949 except KeyError: 950 nobody = 1 + max(x[2] for x in pwd.getpwall()) 951 return nobody 952 953 954def executable(path): 955 """Test for executable file.""" 956 return os.access(path, os.X_OK) 957 958 959class CGIHTTPRequestHandler(SimpleHTTPRequestHandler): 960 961 """Complete HTTP server with GET, HEAD and POST commands. 962 963 GET and HEAD also support running CGI scripts. 964 965 The POST command is *only* implemented for CGI scripts. 966 967 """ 968 969 # Determine platform specifics 970 have_fork = hasattr(os, 'fork') 971 972 # Make rfile unbuffered -- we need to read one line and then pass 973 # the rest to a subprocess, so we can't use buffered input. 974 rbufsize = 0 975 976 def do_POST(self): 977 """Serve a POST request. 978 979 This is only implemented for CGI scripts. 980 981 """ 982 983 if self.is_cgi(): 984 self.run_cgi() 985 else: 986 self.send_error( 987 HTTPStatus.NOT_IMPLEMENTED, 988 "Can only POST to CGI scripts") 989 990 def send_head(self): 991 """Version of send_head that support CGI scripts""" 992 if self.is_cgi(): 993 return self.run_cgi() 994 else: 995 return SimpleHTTPRequestHandler.send_head(self) 996 997 def is_cgi(self): 998 """Test whether self.path corresponds to a CGI script. 999 1000 Returns True and updates the cgi_info attribute to the tuple 1001 (dir, rest) if self.path requires running a CGI script. 1002 Returns False otherwise. 1003 1004 If any exception is raised, the caller should assume that 1005 self.path was rejected as invalid and act accordingly. 1006 1007 The default implementation tests whether the normalized url 1008 path begins with one of the strings in self.cgi_directories 1009 (and the next character is a '/' or the end of the string). 1010 1011 """ 1012 collapsed_path = _url_collapse_path(self.path) 1013 dir_sep = collapsed_path.find('/', 1) 1014 while dir_sep > 0 and not collapsed_path[:dir_sep] in self.cgi_directories: 1015 dir_sep = collapsed_path.find('/', dir_sep+1) 1016 if dir_sep > 0: 1017 head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:] 1018 self.cgi_info = head, tail 1019 return True 1020 return False 1021 1022 1023 cgi_directories = ['/cgi-bin', '/htbin'] 1024 1025 def is_executable(self, path): 1026 """Test whether argument path is an executable file.""" 1027 return executable(path) 1028 1029 def is_python(self, path): 1030 """Test whether argument path is a Python script.""" 1031 head, tail = os.path.splitext(path) 1032 return tail.lower() in (".py", ".pyw") 1033 1034 def run_cgi(self): 1035 """Execute a CGI script.""" 1036 dir, rest = self.cgi_info 1037 path = dir + '/' + rest 1038 i = path.find('/', len(dir)+1) 1039 while i >= 0: 1040 nextdir = path[:i] 1041 nextrest = path[i+1:] 1042 1043 scriptdir = self.translate_path(nextdir) 1044 if os.path.isdir(scriptdir): 1045 dir, rest = nextdir, nextrest 1046 i = path.find('/', len(dir)+1) 1047 else: 1048 break 1049 1050 # find an explicit query string, if present. 1051 rest, _, query = rest.partition('?') 1052 1053 # dissect the part after the directory name into a script name & 1054 # a possible additional path, to be stored in PATH_INFO. 1055 i = rest.find('/') 1056 if i >= 0: 1057 script, rest = rest[:i], rest[i:] 1058 else: 1059 script, rest = rest, '' 1060 1061 scriptname = dir + '/' + script 1062 scriptfile = self.translate_path(scriptname) 1063 if not os.path.exists(scriptfile): 1064 self.send_error( 1065 HTTPStatus.NOT_FOUND, 1066 "No such CGI script (%r)" % scriptname) 1067 return 1068 if not os.path.isfile(scriptfile): 1069 self.send_error( 1070 HTTPStatus.FORBIDDEN, 1071 "CGI script is not a plain file (%r)" % scriptname) 1072 return 1073 ispy = self.is_python(scriptname) 1074 if self.have_fork or not ispy: 1075 if not self.is_executable(scriptfile): 1076 self.send_error( 1077 HTTPStatus.FORBIDDEN, 1078 "CGI script is not executable (%r)" % scriptname) 1079 return 1080 1081 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html 1082 # XXX Much of the following could be prepared ahead of time! 1083 env = copy.deepcopy(os.environ) 1084 env['SERVER_SOFTWARE'] = self.version_string() 1085 env['SERVER_NAME'] = self.server.server_name 1086 env['GATEWAY_INTERFACE'] = 'CGI/1.1' 1087 env['SERVER_PROTOCOL'] = self.protocol_version 1088 env['SERVER_PORT'] = str(self.server.server_port) 1089 env['REQUEST_METHOD'] = self.command 1090 uqrest = urllib.parse.unquote(rest) 1091 env['PATH_INFO'] = uqrest 1092 env['PATH_TRANSLATED'] = self.translate_path(uqrest) 1093 env['SCRIPT_NAME'] = scriptname 1094 env['QUERY_STRING'] = query 1095 env['REMOTE_ADDR'] = self.client_address[0] 1096 authorization = self.headers.get("authorization") 1097 if authorization: 1098 authorization = authorization.split() 1099 if len(authorization) == 2: 1100 import base64, binascii 1101 env['AUTH_TYPE'] = authorization[0] 1102 if authorization[0].lower() == "basic": 1103 try: 1104 authorization = authorization[1].encode('ascii') 1105 authorization = base64.decodebytes(authorization).\ 1106 decode('ascii') 1107 except (binascii.Error, UnicodeError): 1108 pass 1109 else: 1110 authorization = authorization.split(':') 1111 if len(authorization) == 2: 1112 env['REMOTE_USER'] = authorization[0] 1113 # XXX REMOTE_IDENT 1114 if self.headers.get('content-type') is None: 1115 env['CONTENT_TYPE'] = self.headers.get_content_type() 1116 else: 1117 env['CONTENT_TYPE'] = self.headers['content-type'] 1118 length = self.headers.get('content-length') 1119 if length: 1120 env['CONTENT_LENGTH'] = length 1121 referer = self.headers.get('referer') 1122 if referer: 1123 env['HTTP_REFERER'] = referer 1124 accept = self.headers.get_all('accept', ()) 1125 env['HTTP_ACCEPT'] = ','.join(accept) 1126 ua = self.headers.get('user-agent') 1127 if ua: 1128 env['HTTP_USER_AGENT'] = ua 1129 co = filter(None, self.headers.get_all('cookie', [])) 1130 cookie_str = ', '.join(co) 1131 if cookie_str: 1132 env['HTTP_COOKIE'] = cookie_str 1133 # XXX Other HTTP_* headers 1134 # Since we're setting the env in the parent, provide empty 1135 # values to override previously set values 1136 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', 1137 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): 1138 env.setdefault(k, "") 1139 1140 self.send_response(HTTPStatus.OK, "Script output follows") 1141 self.flush_headers() 1142 1143 decoded_query = query.replace('+', ' ') 1144 1145 if self.have_fork: 1146 # Unix -- fork as we should 1147 args = [script] 1148 if '=' not in decoded_query: 1149 args.append(decoded_query) 1150 nobody = nobody_uid() 1151 self.wfile.flush() # Always flush before forking 1152 pid = os.fork() 1153 if pid != 0: 1154 # Parent 1155 pid, sts = os.waitpid(pid, 0) 1156 # throw away additional data [see bug #427345] 1157 while select.select([self.rfile], [], [], 0)[0]: 1158 if not self.rfile.read(1): 1159 break 1160 exitcode = os.waitstatus_to_exitcode(sts) 1161 if exitcode: 1162 self.log_error(f"CGI script exit code {exitcode}") 1163 return 1164 # Child 1165 try: 1166 try: 1167 os.setuid(nobody) 1168 except OSError: 1169 pass 1170 os.dup2(self.rfile.fileno(), 0) 1171 os.dup2(self.wfile.fileno(), 1) 1172 os.execve(scriptfile, args, env) 1173 except: 1174 self.server.handle_error(self.request, self.client_address) 1175 os._exit(127) 1176 1177 else: 1178 # Non-Unix -- use subprocess 1179 import subprocess 1180 cmdline = [scriptfile] 1181 if self.is_python(scriptfile): 1182 interp = sys.executable 1183 if interp.lower().endswith("w.exe"): 1184 # On Windows, use python.exe, not pythonw.exe 1185 interp = interp[:-5] + interp[-4:] 1186 cmdline = [interp, '-u'] + cmdline 1187 if '=' not in query: 1188 cmdline.append(query) 1189 self.log_message("command: %s", subprocess.list2cmdline(cmdline)) 1190 try: 1191 nbytes = int(length) 1192 except (TypeError, ValueError): 1193 nbytes = 0 1194 p = subprocess.Popen(cmdline, 1195 stdin=subprocess.PIPE, 1196 stdout=subprocess.PIPE, 1197 stderr=subprocess.PIPE, 1198 env = env 1199 ) 1200 if self.command.lower() == "post" and nbytes > 0: 1201 data = self.rfile.read(nbytes) 1202 else: 1203 data = None 1204 # throw away additional data [see bug #427345] 1205 while select.select([self.rfile._sock], [], [], 0)[0]: 1206 if not self.rfile._sock.recv(1): 1207 break 1208 stdout, stderr = p.communicate(data) 1209 self.wfile.write(stdout) 1210 if stderr: 1211 self.log_error('%s', stderr) 1212 p.stderr.close() 1213 p.stdout.close() 1214 status = p.returncode 1215 if status: 1216 self.log_error("CGI script exit status %#x", status) 1217 else: 1218 self.log_message("CGI script exited OK") 1219 1220 1221def _get_best_family(*address): 1222 infos = socket.getaddrinfo( 1223 *address, 1224 type=socket.SOCK_STREAM, 1225 flags=socket.AI_PASSIVE, 1226 ) 1227 family, type, proto, canonname, sockaddr = next(iter(infos)) 1228 return family, sockaddr 1229 1230 1231def test(HandlerClass=BaseHTTPRequestHandler, 1232 ServerClass=ThreadingHTTPServer, 1233 protocol="HTTP/1.0", port=8000, bind=None): 1234 """Test the HTTP request handler class. 1235 1236 This runs an HTTP server on port 8000 (or the port argument). 1237 1238 """ 1239 ServerClass.address_family, addr = _get_best_family(bind, port) 1240 HandlerClass.protocol_version = protocol 1241 with ServerClass(addr, HandlerClass) as httpd: 1242 host, port = httpd.socket.getsockname()[:2] 1243 url_host = f'[{host}]' if ':' in host else host 1244 print( 1245 f"Serving HTTP on {host} port {port} " 1246 f"(http://{url_host}:{port}/) ..." 1247 ) 1248 try: 1249 httpd.serve_forever() 1250 except KeyboardInterrupt: 1251 print("\nKeyboard interrupt received, exiting.") 1252 sys.exit(0) 1253 1254if __name__ == '__main__': 1255 import argparse 1256 import contextlib 1257 1258 parser = argparse.ArgumentParser() 1259 parser.add_argument('--cgi', action='store_true', 1260 help='run as CGI server') 1261 parser.add_argument('--bind', '-b', metavar='ADDRESS', 1262 help='specify alternate bind address ' 1263 '(default: all interfaces)') 1264 parser.add_argument('--directory', '-d', default=os.getcwd(), 1265 help='specify alternate directory ' 1266 '(default: current directory)') 1267 parser.add_argument('port', action='store', default=8000, type=int, 1268 nargs='?', 1269 help='specify alternate port (default: 8000)') 1270 args = parser.parse_args() 1271 if args.cgi: 1272 handler_class = CGIHTTPRequestHandler 1273 else: 1274 handler_class = SimpleHTTPRequestHandler 1275 1276 # ensure dual-stack is not disabled; ref #38907 1277 class DualStackServer(ThreadingHTTPServer): 1278 1279 def server_bind(self): 1280 # suppress exception when protocol is IPv4 1281 with contextlib.suppress(Exception): 1282 self.socket.setsockopt( 1283 socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) 1284 return super().server_bind() 1285 1286 def finish_request(self, request, client_address): 1287 self.RequestHandlerClass(request, client_address, self, 1288 directory=args.directory) 1289 1290 test( 1291 HandlerClass=handler_class, 1292 ServerClass=DualStackServer, 1293 port=args.port, 1294 bind=args.bind, 1295 ) 1296