1"""HTTP server classes. 2 3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see 4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, 5and CGIHTTPRequestHandler for CGI scripts. 6 7It does, however, optionally implement HTTP/1.1 persistent connections, 8as of version 0.3. 9 10Notes on CGIHTTPRequestHandler 11------------------------------ 12 13This class implements GET and POST requests to cgi-bin scripts. 14 15If the os.fork() function is not present (e.g. on Windows), 16subprocess.Popen() is used as a fallback, with slightly altered semantics. 17 18In all cases, the implementation is intentionally naive -- all 19requests are executed synchronously. 20 21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL 22-- it may execute arbitrary Python code or external programs. 23 24Note that status code 200 is sent prior to execution of a CGI script, so 25scripts cannot send other status codes such as 302 (redirect). 26 27XXX To do: 28 29- log requests even later (to capture byte count) 30- log user-agent header and other interesting goodies 31- send error log to separate file 32""" 33 34 35# See also: 36# 37# HTTP Working Group T. Berners-Lee 38# INTERNET-DRAFT R. T. Fielding 39# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen 40# Expires September 8, 1995 March 8, 1995 41# 42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt 43# 44# and 45# 46# Network Working Group R. Fielding 47# Request for Comments: 2616 et al 48# Obsoletes: 2068 June 1999 49# Category: Standards Track 50# 51# URL: http://www.faqs.org/rfcs/rfc2616.html 52 53# Log files 54# --------- 55# 56# Here's a quote from the NCSA httpd docs about log file format. 57# 58# | The logfile format is as follows. Each line consists of: 59# | 60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb 61# | 62# | host: Either the DNS name or the IP number of the remote client 63# | rfc931: Any information returned by identd for this person, 64# | - otherwise. 65# | authuser: If user sent a userid for authentication, the user name, 66# | - otherwise. 67# | DD: Day 68# | Mon: Month (calendar name) 69# | YYYY: Year 70# | hh: hour (24-hour format, the machine's timezone) 71# | mm: minutes 72# | ss: seconds 73# | request: The first line of the HTTP request as sent by the client. 74# | ddd: the status code returned by the server, - if not available. 75# | bbbb: the total number of bytes sent, 76# | *not including the HTTP/1.0 header*, - if not available 77# | 78# | You can determine the name of the file accessed through request. 79# 80# (Actually, the latter is only true if you know the server configuration 81# at the time the request was made!) 82 83__version__ = "0.6" 84 85__all__ = [ 86 "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler", 87 "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler", 88] 89 90import copy 91import datetime 92import email.utils 93import html 94import http.client 95import io 96import mimetypes 97import os 98import posixpath 99import select 100import shutil 101import socket # For gethostbyaddr() 102import socketserver 103import sys 104import time 105import urllib.parse 106import contextlib 107from functools import partial 108 109from http import HTTPStatus 110 111 112# Default error message template 113DEFAULT_ERROR_MESSAGE = """\ 114<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" 115 "http://www.w3.org/TR/html4/strict.dtd"> 116<html> 117 <head> 118 <meta http-equiv="Content-Type" content="text/html;charset=utf-8"> 119 <title>Error response</title> 120 </head> 121 <body> 122 <h1>Error response</h1> 123 <p>Error code: %(code)d</p> 124 <p>Message: %(message)s.</p> 125 <p>Error code explanation: %(code)s - %(explain)s.</p> 126 </body> 127</html> 128""" 129 130DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" 131 132class HTTPServer(socketserver.TCPServer): 133 134 allow_reuse_address = 1 # Seems to make sense in testing environment 135 136 def server_bind(self): 137 """Override server_bind to store the server name.""" 138 socketserver.TCPServer.server_bind(self) 139 host, port = self.server_address[:2] 140 self.server_name = socket.getfqdn(host) 141 self.server_port = port 142 143 144class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer): 145 daemon_threads = True 146 147 148class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): 149 150 """HTTP request handler base class. 151 152 The following explanation of HTTP serves to guide you through the 153 code as well as to expose any misunderstandings I may have about 154 HTTP (so you don't need to read the code to figure out I'm wrong 155 :-). 156 157 HTTP (HyperText Transfer Protocol) is an extensible protocol on 158 top of a reliable stream transport (e.g. TCP/IP). The protocol 159 recognizes three parts to a request: 160 161 1. One line identifying the request type and path 162 2. An optional set of RFC-822-style headers 163 3. An optional data part 164 165 The headers and data are separated by a blank line. 166 167 The first line of the request has the form 168 169 <command> <path> <version> 170 171 where <command> is a (case-sensitive) keyword such as GET or POST, 172 <path> is a string containing path information for the request, 173 and <version> should be the string "HTTP/1.0" or "HTTP/1.1". 174 <path> is encoded using the URL encoding scheme (using %xx to signify 175 the ASCII character with hex code xx). 176 177 The specification specifies that lines are separated by CRLF but 178 for compatibility with the widest range of clients recommends 179 servers also handle LF. Similarly, whitespace in the request line 180 is treated sensibly (allowing multiple spaces between components 181 and allowing trailing whitespace). 182 183 Similarly, for output, lines ought to be separated by CRLF pairs 184 but most clients grok LF characters just fine. 185 186 If the first line of the request has the form 187 188 <command> <path> 189 190 (i.e. <version> is left out) then this is assumed to be an HTTP 191 0.9 request; this form has no optional headers and data part and 192 the reply consists of just the data. 193 194 The reply form of the HTTP 1.x protocol again has three parts: 195 196 1. One line giving the response code 197 2. An optional set of RFC-822-style headers 198 3. The data 199 200 Again, the headers and data are separated by a blank line. 201 202 The response code line has the form 203 204 <version> <responsecode> <responsestring> 205 206 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"), 207 <responsecode> is a 3-digit response code indicating success or 208 failure of the request, and <responsestring> is an optional 209 human-readable string explaining what the response code means. 210 211 This server parses the request and the headers, and then calls a 212 function specific to the request type (<command>). Specifically, 213 a request SPAM will be handled by a method do_SPAM(). If no 214 such method exists the server sends an error response to the 215 client. If it exists, it is called with no arguments: 216 217 do_SPAM() 218 219 Note that the request name is case sensitive (i.e. SPAM and spam 220 are different requests). 221 222 The various request details are stored in instance variables: 223 224 - client_address is the client IP address in the form (host, 225 port); 226 227 - command, path and version are the broken-down request line; 228 229 - headers is an instance of email.message.Message (or a derived 230 class) containing the header information; 231 232 - rfile is a file object open for reading positioned at the 233 start of the optional input data part; 234 235 - wfile is a file object open for writing. 236 237 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! 238 239 The first thing to be written must be the response line. Then 240 follow 0 or more header lines, then a blank line, and then the 241 actual data (if any). The meaning of the header lines depends on 242 the command executed by the server; in most cases, when data is 243 returned, there should be at least one header line of the form 244 245 Content-type: <type>/<subtype> 246 247 where <type> and <subtype> should be registered MIME types, 248 e.g. "text/html" or "text/plain". 249 250 """ 251 252 # The Python system version, truncated to its first component. 253 sys_version = "Python/" + sys.version.split()[0] 254 255 # The server software version. You may want to override this. 256 # The format is multiple whitespace-separated strings, 257 # where each string is of the form name[/version]. 258 server_version = "BaseHTTP/" + __version__ 259 260 error_message_format = DEFAULT_ERROR_MESSAGE 261 error_content_type = DEFAULT_ERROR_CONTENT_TYPE 262 263 # The default request version. This only affects responses up until 264 # the point where the request line is parsed, so it mainly decides what 265 # the client gets back when sending a malformed request line. 266 # Most web servers default to HTTP 0.9, i.e. don't send a status line. 267 default_request_version = "HTTP/0.9" 268 269 def parse_request(self): 270 """Parse a request (internal). 271 272 The request should be stored in self.raw_requestline; the results 273 are in self.command, self.path, self.request_version and 274 self.headers. 275 276 Return True for success, False for failure; on failure, any relevant 277 error response has already been sent back. 278 279 """ 280 self.command = None # set in case of error on the first line 281 self.request_version = version = self.default_request_version 282 self.close_connection = True 283 requestline = str(self.raw_requestline, 'iso-8859-1') 284 requestline = requestline.rstrip('\r\n') 285 self.requestline = requestline 286 words = requestline.split() 287 if len(words) == 0: 288 return False 289 290 if len(words) >= 3: # Enough to determine protocol version 291 version = words[-1] 292 try: 293 if not version.startswith('HTTP/'): 294 raise ValueError 295 base_version_number = version.split('/', 1)[1] 296 version_number = base_version_number.split(".") 297 # RFC 2145 section 3.1 says there can be only one "." and 298 # - major and minor numbers MUST be treated as 299 # separate integers; 300 # - HTTP/2.4 is a lower version than HTTP/2.13, which in 301 # turn is lower than HTTP/12.3; 302 # - Leading zeros MUST be ignored by recipients. 303 if len(version_number) != 2: 304 raise ValueError 305 version_number = int(version_number[0]), int(version_number[1]) 306 except (ValueError, IndexError): 307 self.send_error( 308 HTTPStatus.BAD_REQUEST, 309 "Bad request version (%r)" % version) 310 return False 311 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": 312 self.close_connection = False 313 if version_number >= (2, 0): 314 self.send_error( 315 HTTPStatus.HTTP_VERSION_NOT_SUPPORTED, 316 "Invalid HTTP version (%s)" % base_version_number) 317 return False 318 self.request_version = version 319 320 if not 2 <= len(words) <= 3: 321 self.send_error( 322 HTTPStatus.BAD_REQUEST, 323 "Bad request syntax (%r)" % requestline) 324 return False 325 command, path = words[:2] 326 if len(words) == 2: 327 self.close_connection = True 328 if command != 'GET': 329 self.send_error( 330 HTTPStatus.BAD_REQUEST, 331 "Bad HTTP/0.9 request type (%r)" % command) 332 return False 333 self.command, self.path = command, path 334 335 # gh-87389: The purpose of replacing '//' with '/' is to protect 336 # against open redirect attacks possibly triggered if the path starts 337 # with '//' because http clients treat //path as an absolute URI 338 # without scheme (similar to http://path) rather than a path. 339 if self.path.startswith('//'): 340 self.path = '/' + self.path.lstrip('/') # Reduce to a single / 341 342 # Examine the headers and look for a Connection directive. 343 try: 344 self.headers = http.client.parse_headers(self.rfile, 345 _class=self.MessageClass) 346 except http.client.LineTooLong as err: 347 self.send_error( 348 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, 349 "Line too long", 350 str(err)) 351 return False 352 except http.client.HTTPException as err: 353 self.send_error( 354 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, 355 "Too many headers", 356 str(err) 357 ) 358 return False 359 360 conntype = self.headers.get('Connection', "") 361 if conntype.lower() == 'close': 362 self.close_connection = True 363 elif (conntype.lower() == 'keep-alive' and 364 self.protocol_version >= "HTTP/1.1"): 365 self.close_connection = False 366 # Examine the headers and look for an Expect directive 367 expect = self.headers.get('Expect', "") 368 if (expect.lower() == "100-continue" and 369 self.protocol_version >= "HTTP/1.1" and 370 self.request_version >= "HTTP/1.1"): 371 if not self.handle_expect_100(): 372 return False 373 return True 374 375 def handle_expect_100(self): 376 """Decide what to do with an "Expect: 100-continue" header. 377 378 If the client is expecting a 100 Continue response, we must 379 respond with either a 100 Continue or a final response before 380 waiting for the request body. The default is to always respond 381 with a 100 Continue. You can behave differently (for example, 382 reject unauthorized requests) by overriding this method. 383 384 This method should either return True (possibly after sending 385 a 100 Continue response) or send an error response and return 386 False. 387 388 """ 389 self.send_response_only(HTTPStatus.CONTINUE) 390 self.end_headers() 391 return True 392 393 def handle_one_request(self): 394 """Handle a single HTTP request. 395 396 You normally don't need to override this method; see the class 397 __doc__ string for information on how to handle specific HTTP 398 commands such as GET and POST. 399 400 """ 401 try: 402 self.raw_requestline = self.rfile.readline(65537) 403 if len(self.raw_requestline) > 65536: 404 self.requestline = '' 405 self.request_version = '' 406 self.command = '' 407 self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG) 408 return 409 if not self.raw_requestline: 410 self.close_connection = True 411 return 412 if not self.parse_request(): 413 # An error code has been sent, just exit 414 return 415 mname = 'do_' + self.command 416 if not hasattr(self, mname): 417 self.send_error( 418 HTTPStatus.NOT_IMPLEMENTED, 419 "Unsupported method (%r)" % self.command) 420 return 421 method = getattr(self, mname) 422 method() 423 self.wfile.flush() #actually send the response if not already done. 424 except TimeoutError as e: 425 #a read or a write timed out. Discard this connection 426 self.log_error("Request timed out: %r", e) 427 self.close_connection = True 428 return 429 430 def handle(self): 431 """Handle multiple requests if necessary.""" 432 self.close_connection = True 433 434 self.handle_one_request() 435 while not self.close_connection: 436 self.handle_one_request() 437 438 def send_error(self, code, message=None, explain=None): 439 """Send and log an error reply. 440 441 Arguments are 442 * code: an HTTP error code 443 3 digits 444 * message: a simple optional 1 line reason phrase. 445 *( HTAB / SP / VCHAR / %x80-FF ) 446 defaults to short entry matching the response code 447 * explain: a detailed message defaults to the long entry 448 matching the response code. 449 450 This sends an error response (so it must be called before any 451 output has been generated), logs the error, and finally sends 452 a piece of HTML explaining the error to the user. 453 454 """ 455 456 try: 457 shortmsg, longmsg = self.responses[code] 458 except KeyError: 459 shortmsg, longmsg = '???', '???' 460 if message is None: 461 message = shortmsg 462 if explain is None: 463 explain = longmsg 464 self.log_error("code %d, message %s", code, message) 465 self.send_response(code, message) 466 self.send_header('Connection', 'close') 467 468 # Message body is omitted for cases described in: 469 # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified) 470 # - RFC7231: 6.3.6. 205(Reset Content) 471 body = None 472 if (code >= 200 and 473 code not in (HTTPStatus.NO_CONTENT, 474 HTTPStatus.RESET_CONTENT, 475 HTTPStatus.NOT_MODIFIED)): 476 # HTML encode to prevent Cross Site Scripting attacks 477 # (see bug #1100201) 478 content = (self.error_message_format % { 479 'code': code, 480 'message': html.escape(message, quote=False), 481 'explain': html.escape(explain, quote=False) 482 }) 483 body = content.encode('UTF-8', 'replace') 484 self.send_header("Content-Type", self.error_content_type) 485 self.send_header('Content-Length', str(len(body))) 486 self.end_headers() 487 488 if self.command != 'HEAD' and body: 489 self.wfile.write(body) 490 491 def send_response(self, code, message=None): 492 """Add the response header to the headers buffer and log the 493 response code. 494 495 Also send two standard headers with the server software 496 version and the current date. 497 498 """ 499 self.log_request(code) 500 self.send_response_only(code, message) 501 self.send_header('Server', self.version_string()) 502 self.send_header('Date', self.date_time_string()) 503 504 def send_response_only(self, code, message=None): 505 """Send the response header only.""" 506 if self.request_version != 'HTTP/0.9': 507 if message is None: 508 if code in self.responses: 509 message = self.responses[code][0] 510 else: 511 message = '' 512 if not hasattr(self, '_headers_buffer'): 513 self._headers_buffer = [] 514 self._headers_buffer.append(("%s %d %s\r\n" % 515 (self.protocol_version, code, message)).encode( 516 'latin-1', 'strict')) 517 518 def send_header(self, keyword, value): 519 """Send a MIME header to the headers buffer.""" 520 if self.request_version != 'HTTP/0.9': 521 if not hasattr(self, '_headers_buffer'): 522 self._headers_buffer = [] 523 self._headers_buffer.append( 524 ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict')) 525 526 if keyword.lower() == 'connection': 527 if value.lower() == 'close': 528 self.close_connection = True 529 elif value.lower() == 'keep-alive': 530 self.close_connection = False 531 532 def end_headers(self): 533 """Send the blank line ending the MIME headers.""" 534 if self.request_version != 'HTTP/0.9': 535 self._headers_buffer.append(b"\r\n") 536 self.flush_headers() 537 538 def flush_headers(self): 539 if hasattr(self, '_headers_buffer'): 540 self.wfile.write(b"".join(self._headers_buffer)) 541 self._headers_buffer = [] 542 543 def log_request(self, code='-', size='-'): 544 """Log an accepted request. 545 546 This is called by send_response(). 547 548 """ 549 if isinstance(code, HTTPStatus): 550 code = code.value 551 self.log_message('"%s" %s %s', 552 self.requestline, str(code), str(size)) 553 554 def log_error(self, format, *args): 555 """Log an error. 556 557 This is called when a request cannot be fulfilled. By 558 default it passes the message on to log_message(). 559 560 Arguments are the same as for log_message(). 561 562 XXX This should go to the separate error log. 563 564 """ 565 566 self.log_message(format, *args) 567 568 def log_message(self, format, *args): 569 """Log an arbitrary message. 570 571 This is used by all other logging functions. Override 572 it if you have specific logging wishes. 573 574 The first argument, FORMAT, is a format string for the 575 message to be logged. If the format string contains 576 any % escapes requiring parameters, they should be 577 specified as subsequent arguments (it's just like 578 printf!). 579 580 The client ip and current date/time are prefixed to 581 every message. 582 583 """ 584 585 sys.stderr.write("%s - - [%s] %s\n" % 586 (self.address_string(), 587 self.log_date_time_string(), 588 format%args)) 589 590 def version_string(self): 591 """Return the server software version string.""" 592 return self.server_version + ' ' + self.sys_version 593 594 def date_time_string(self, timestamp=None): 595 """Return the current date and time formatted for a message header.""" 596 if timestamp is None: 597 timestamp = time.time() 598 return email.utils.formatdate(timestamp, usegmt=True) 599 600 def log_date_time_string(self): 601 """Return the current time formatted for logging.""" 602 now = time.time() 603 year, month, day, hh, mm, ss, x, y, z = time.localtime(now) 604 s = "%02d/%3s/%04d %02d:%02d:%02d" % ( 605 day, self.monthname[month], year, hh, mm, ss) 606 return s 607 608 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] 609 610 monthname = [None, 611 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 612 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] 613 614 def address_string(self): 615 """Return the client address.""" 616 617 return self.client_address[0] 618 619 # Essentially static class variables 620 621 # The version of the HTTP protocol we support. 622 # Set this to HTTP/1.1 to enable automatic keepalive 623 protocol_version = "HTTP/1.0" 624 625 # MessageClass used to parse headers 626 MessageClass = http.client.HTTPMessage 627 628 # hack to maintain backwards compatibility 629 responses = { 630 v: (v.phrase, v.description) 631 for v in HTTPStatus.__members__.values() 632 } 633 634 635class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): 636 637 """Simple HTTP request handler with GET and HEAD commands. 638 639 This serves files from the current directory and any of its 640 subdirectories. The MIME type for files is determined by 641 calling the .guess_type() method. 642 643 The GET and HEAD requests are identical except that the HEAD 644 request omits the actual contents of the file. 645 646 """ 647 648 server_version = "SimpleHTTP/" + __version__ 649 extensions_map = _encodings_map_default = { 650 '.gz': 'application/gzip', 651 '.Z': 'application/octet-stream', 652 '.bz2': 'application/x-bzip2', 653 '.xz': 'application/x-xz', 654 } 655 656 def __init__(self, *args, directory=None, **kwargs): 657 if directory is None: 658 directory = os.getcwd() 659 self.directory = os.fspath(directory) 660 super().__init__(*args, **kwargs) 661 662 def do_GET(self): 663 """Serve a GET request.""" 664 f = self.send_head() 665 if f: 666 try: 667 self.copyfile(f, self.wfile) 668 finally: 669 f.close() 670 671 def do_HEAD(self): 672 """Serve a HEAD request.""" 673 f = self.send_head() 674 if f: 675 f.close() 676 677 def send_head(self): 678 """Common code for GET and HEAD commands. 679 680 This sends the response code and MIME headers. 681 682 Return value is either a file object (which has to be copied 683 to the outputfile by the caller unless the command was HEAD, 684 and must be closed by the caller under all circumstances), or 685 None, in which case the caller has nothing further to do. 686 687 """ 688 path = self.translate_path(self.path) 689 f = None 690 if os.path.isdir(path): 691 parts = urllib.parse.urlsplit(self.path) 692 if not parts.path.endswith('/'): 693 # redirect browser - doing basically what apache does 694 self.send_response(HTTPStatus.MOVED_PERMANENTLY) 695 new_parts = (parts[0], parts[1], parts[2] + '/', 696 parts[3], parts[4]) 697 new_url = urllib.parse.urlunsplit(new_parts) 698 self.send_header("Location", new_url) 699 self.send_header("Content-Length", "0") 700 self.end_headers() 701 return None 702 for index in "index.html", "index.htm": 703 index = os.path.join(path, index) 704 if os.path.exists(index): 705 path = index 706 break 707 else: 708 return self.list_directory(path) 709 ctype = self.guess_type(path) 710 # check for trailing "/" which should return 404. See Issue17324 711 # The test for this was added in test_httpserver.py 712 # However, some OS platforms accept a trailingSlash as a filename 713 # See discussion on python-dev and Issue34711 regarding 714 # parseing and rejection of filenames with a trailing slash 715 if path.endswith("/"): 716 self.send_error(HTTPStatus.NOT_FOUND, "File not found") 717 return None 718 try: 719 f = open(path, 'rb') 720 except OSError: 721 self.send_error(HTTPStatus.NOT_FOUND, "File not found") 722 return None 723 724 try: 725 fs = os.fstat(f.fileno()) 726 # Use browser cache if possible 727 if ("If-Modified-Since" in self.headers 728 and "If-None-Match" not in self.headers): 729 # compare If-Modified-Since and time of last file modification 730 try: 731 ims = email.utils.parsedate_to_datetime( 732 self.headers["If-Modified-Since"]) 733 except (TypeError, IndexError, OverflowError, ValueError): 734 # ignore ill-formed values 735 pass 736 else: 737 if ims.tzinfo is None: 738 # obsolete format with no timezone, cf. 739 # https://tools.ietf.org/html/rfc7231#section-7.1.1.1 740 ims = ims.replace(tzinfo=datetime.timezone.utc) 741 if ims.tzinfo is datetime.timezone.utc: 742 # compare to UTC datetime of last modification 743 last_modif = datetime.datetime.fromtimestamp( 744 fs.st_mtime, datetime.timezone.utc) 745 # remove microseconds, like in If-Modified-Since 746 last_modif = last_modif.replace(microsecond=0) 747 748 if last_modif <= ims: 749 self.send_response(HTTPStatus.NOT_MODIFIED) 750 self.end_headers() 751 f.close() 752 return None 753 754 self.send_response(HTTPStatus.OK) 755 self.send_header("Content-type", ctype) 756 self.send_header("Content-Length", str(fs[6])) 757 self.send_header("Last-Modified", 758 self.date_time_string(fs.st_mtime)) 759 self.end_headers() 760 return f 761 except: 762 f.close() 763 raise 764 765 def list_directory(self, path): 766 """Helper to produce a directory listing (absent index.html). 767 768 Return value is either a file object, or None (indicating an 769 error). In either case, the headers are sent, making the 770 interface the same as for send_head(). 771 772 """ 773 try: 774 list = os.listdir(path) 775 except OSError: 776 self.send_error( 777 HTTPStatus.NOT_FOUND, 778 "No permission to list directory") 779 return None 780 list.sort(key=lambda a: a.lower()) 781 r = [] 782 try: 783 displaypath = urllib.parse.unquote(self.path, 784 errors='surrogatepass') 785 except UnicodeDecodeError: 786 displaypath = urllib.parse.unquote(path) 787 displaypath = html.escape(displaypath, quote=False) 788 enc = sys.getfilesystemencoding() 789 title = 'Directory listing for %s' % displaypath 790 r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' 791 '"http://www.w3.org/TR/html4/strict.dtd">') 792 r.append('<html>\n<head>') 793 r.append('<meta http-equiv="Content-Type" ' 794 'content="text/html; charset=%s">' % enc) 795 r.append('<title>%s</title>\n</head>' % title) 796 r.append('<body>\n<h1>%s</h1>' % title) 797 r.append('<hr>\n<ul>') 798 for name in list: 799 fullname = os.path.join(path, name) 800 displayname = linkname = name 801 # Append / for directories or @ for symbolic links 802 if os.path.isdir(fullname): 803 displayname = name + "/" 804 linkname = name + "/" 805 if os.path.islink(fullname): 806 displayname = name + "@" 807 # Note: a link to a directory displays with @ and links with / 808 r.append('<li><a href="%s">%s</a></li>' 809 % (urllib.parse.quote(linkname, 810 errors='surrogatepass'), 811 html.escape(displayname, quote=False))) 812 r.append('</ul>\n<hr>\n</body>\n</html>\n') 813 encoded = '\n'.join(r).encode(enc, 'surrogateescape') 814 f = io.BytesIO() 815 f.write(encoded) 816 f.seek(0) 817 self.send_response(HTTPStatus.OK) 818 self.send_header("Content-type", "text/html; charset=%s" % enc) 819 self.send_header("Content-Length", str(len(encoded))) 820 self.end_headers() 821 return f 822 823 def translate_path(self, path): 824 """Translate a /-separated PATH to the local filename syntax. 825 826 Components that mean special things to the local file system 827 (e.g. drive or directory names) are ignored. (XXX They should 828 probably be diagnosed.) 829 830 """ 831 # abandon query parameters 832 path = path.split('?',1)[0] 833 path = path.split('#',1)[0] 834 # Don't forget explicit trailing slash when normalizing. Issue17324 835 trailing_slash = path.rstrip().endswith('/') 836 try: 837 path = urllib.parse.unquote(path, errors='surrogatepass') 838 except UnicodeDecodeError: 839 path = urllib.parse.unquote(path) 840 path = posixpath.normpath(path) 841 words = path.split('/') 842 words = filter(None, words) 843 path = self.directory 844 for word in words: 845 if os.path.dirname(word) or word in (os.curdir, os.pardir): 846 # Ignore components that are not a simple file/directory name 847 continue 848 path = os.path.join(path, word) 849 if trailing_slash: 850 path += '/' 851 return path 852 853 def copyfile(self, source, outputfile): 854 """Copy all data between two file objects. 855 856 The SOURCE argument is a file object open for reading 857 (or anything with a read() method) and the DESTINATION 858 argument is a file object open for writing (or 859 anything with a write() method). 860 861 The only reason for overriding this would be to change 862 the block size or perhaps to replace newlines by CRLF 863 -- note however that this the default server uses this 864 to copy binary data as well. 865 866 """ 867 shutil.copyfileobj(source, outputfile) 868 869 def guess_type(self, path): 870 """Guess the type of a file. 871 872 Argument is a PATH (a filename). 873 874 Return value is a string of the form type/subtype, 875 usable for a MIME Content-type header. 876 877 The default implementation looks the file's extension 878 up in the table self.extensions_map, using application/octet-stream 879 as a default; however it would be permissible (if 880 slow) to look inside the data to make a better guess. 881 882 """ 883 base, ext = posixpath.splitext(path) 884 if ext in self.extensions_map: 885 return self.extensions_map[ext] 886 ext = ext.lower() 887 if ext in self.extensions_map: 888 return self.extensions_map[ext] 889 guess, _ = mimetypes.guess_type(path) 890 if guess: 891 return guess 892 return 'application/octet-stream' 893 894 895# Utilities for CGIHTTPRequestHandler 896 897def _url_collapse_path(path): 898 """ 899 Given a URL path, remove extra '/'s and '.' path elements and collapse 900 any '..' references and returns a collapsed path. 901 902 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. 903 The utility of this function is limited to is_cgi method and helps 904 preventing some security attacks. 905 906 Returns: The reconstituted URL, which will always start with a '/'. 907 908 Raises: IndexError if too many '..' occur within the path. 909 910 """ 911 # Query component should not be involved. 912 path, _, query = path.partition('?') 913 path = urllib.parse.unquote(path) 914 915 # Similar to os.path.split(os.path.normpath(path)) but specific to URL 916 # path semantics rather than local operating system semantics. 917 path_parts = path.split('/') 918 head_parts = [] 919 for part in path_parts[:-1]: 920 if part == '..': 921 head_parts.pop() # IndexError if more '..' than prior parts 922 elif part and part != '.': 923 head_parts.append( part ) 924 if path_parts: 925 tail_part = path_parts.pop() 926 if tail_part: 927 if tail_part == '..': 928 head_parts.pop() 929 tail_part = '' 930 elif tail_part == '.': 931 tail_part = '' 932 else: 933 tail_part = '' 934 935 if query: 936 tail_part = '?'.join((tail_part, query)) 937 938 splitpath = ('/' + '/'.join(head_parts), tail_part) 939 collapsed_path = "/".join(splitpath) 940 941 return collapsed_path 942 943 944 945nobody = None 946 947def nobody_uid(): 948 """Internal routine to get nobody's uid""" 949 global nobody 950 if nobody: 951 return nobody 952 try: 953 import pwd 954 except ImportError: 955 return -1 956 try: 957 nobody = pwd.getpwnam('nobody')[2] 958 except KeyError: 959 nobody = 1 + max(x[2] for x in pwd.getpwall()) 960 return nobody 961 962 963def executable(path): 964 """Test for executable file.""" 965 return os.access(path, os.X_OK) 966 967 968class CGIHTTPRequestHandler(SimpleHTTPRequestHandler): 969 970 """Complete HTTP server with GET, HEAD and POST commands. 971 972 GET and HEAD also support running CGI scripts. 973 974 The POST command is *only* implemented for CGI scripts. 975 976 """ 977 978 # Determine platform specifics 979 have_fork = hasattr(os, 'fork') 980 981 # Make rfile unbuffered -- we need to read one line and then pass 982 # the rest to a subprocess, so we can't use buffered input. 983 rbufsize = 0 984 985 def do_POST(self): 986 """Serve a POST request. 987 988 This is only implemented for CGI scripts. 989 990 """ 991 992 if self.is_cgi(): 993 self.run_cgi() 994 else: 995 self.send_error( 996 HTTPStatus.NOT_IMPLEMENTED, 997 "Can only POST to CGI scripts") 998 999 def send_head(self): 1000 """Version of send_head that support CGI scripts""" 1001 if self.is_cgi(): 1002 return self.run_cgi() 1003 else: 1004 return SimpleHTTPRequestHandler.send_head(self) 1005 1006 def is_cgi(self): 1007 """Test whether self.path corresponds to a CGI script. 1008 1009 Returns True and updates the cgi_info attribute to the tuple 1010 (dir, rest) if self.path requires running a CGI script. 1011 Returns False otherwise. 1012 1013 If any exception is raised, the caller should assume that 1014 self.path was rejected as invalid and act accordingly. 1015 1016 The default implementation tests whether the normalized url 1017 path begins with one of the strings in self.cgi_directories 1018 (and the next character is a '/' or the end of the string). 1019 1020 """ 1021 collapsed_path = _url_collapse_path(self.path) 1022 dir_sep = collapsed_path.find('/', 1) 1023 while dir_sep > 0 and not collapsed_path[:dir_sep] in self.cgi_directories: 1024 dir_sep = collapsed_path.find('/', dir_sep+1) 1025 if dir_sep > 0: 1026 head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:] 1027 self.cgi_info = head, tail 1028 return True 1029 return False 1030 1031 1032 cgi_directories = ['/cgi-bin', '/htbin'] 1033 1034 def is_executable(self, path): 1035 """Test whether argument path is an executable file.""" 1036 return executable(path) 1037 1038 def is_python(self, path): 1039 """Test whether argument path is a Python script.""" 1040 head, tail = os.path.splitext(path) 1041 return tail.lower() in (".py", ".pyw") 1042 1043 def run_cgi(self): 1044 """Execute a CGI script.""" 1045 dir, rest = self.cgi_info 1046 path = dir + '/' + rest 1047 i = path.find('/', len(dir)+1) 1048 while i >= 0: 1049 nextdir = path[:i] 1050 nextrest = path[i+1:] 1051 1052 scriptdir = self.translate_path(nextdir) 1053 if os.path.isdir(scriptdir): 1054 dir, rest = nextdir, nextrest 1055 i = path.find('/', len(dir)+1) 1056 else: 1057 break 1058 1059 # find an explicit query string, if present. 1060 rest, _, query = rest.partition('?') 1061 1062 # dissect the part after the directory name into a script name & 1063 # a possible additional path, to be stored in PATH_INFO. 1064 i = rest.find('/') 1065 if i >= 0: 1066 script, rest = rest[:i], rest[i:] 1067 else: 1068 script, rest = rest, '' 1069 1070 scriptname = dir + '/' + script 1071 scriptfile = self.translate_path(scriptname) 1072 if not os.path.exists(scriptfile): 1073 self.send_error( 1074 HTTPStatus.NOT_FOUND, 1075 "No such CGI script (%r)" % scriptname) 1076 return 1077 if not os.path.isfile(scriptfile): 1078 self.send_error( 1079 HTTPStatus.FORBIDDEN, 1080 "CGI script is not a plain file (%r)" % scriptname) 1081 return 1082 ispy = self.is_python(scriptname) 1083 if self.have_fork or not ispy: 1084 if not self.is_executable(scriptfile): 1085 self.send_error( 1086 HTTPStatus.FORBIDDEN, 1087 "CGI script is not executable (%r)" % scriptname) 1088 return 1089 1090 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html 1091 # XXX Much of the following could be prepared ahead of time! 1092 env = copy.deepcopy(os.environ) 1093 env['SERVER_SOFTWARE'] = self.version_string() 1094 env['SERVER_NAME'] = self.server.server_name 1095 env['GATEWAY_INTERFACE'] = 'CGI/1.1' 1096 env['SERVER_PROTOCOL'] = self.protocol_version 1097 env['SERVER_PORT'] = str(self.server.server_port) 1098 env['REQUEST_METHOD'] = self.command 1099 uqrest = urllib.parse.unquote(rest) 1100 env['PATH_INFO'] = uqrest 1101 env['PATH_TRANSLATED'] = self.translate_path(uqrest) 1102 env['SCRIPT_NAME'] = scriptname 1103 env['QUERY_STRING'] = query 1104 env['REMOTE_ADDR'] = self.client_address[0] 1105 authorization = self.headers.get("authorization") 1106 if authorization: 1107 authorization = authorization.split() 1108 if len(authorization) == 2: 1109 import base64, binascii 1110 env['AUTH_TYPE'] = authorization[0] 1111 if authorization[0].lower() == "basic": 1112 try: 1113 authorization = authorization[1].encode('ascii') 1114 authorization = base64.decodebytes(authorization).\ 1115 decode('ascii') 1116 except (binascii.Error, UnicodeError): 1117 pass 1118 else: 1119 authorization = authorization.split(':') 1120 if len(authorization) == 2: 1121 env['REMOTE_USER'] = authorization[0] 1122 # XXX REMOTE_IDENT 1123 if self.headers.get('content-type') is None: 1124 env['CONTENT_TYPE'] = self.headers.get_content_type() 1125 else: 1126 env['CONTENT_TYPE'] = self.headers['content-type'] 1127 length = self.headers.get('content-length') 1128 if length: 1129 env['CONTENT_LENGTH'] = length 1130 referer = self.headers.get('referer') 1131 if referer: 1132 env['HTTP_REFERER'] = referer 1133 accept = self.headers.get_all('accept', ()) 1134 env['HTTP_ACCEPT'] = ','.join(accept) 1135 ua = self.headers.get('user-agent') 1136 if ua: 1137 env['HTTP_USER_AGENT'] = ua 1138 co = filter(None, self.headers.get_all('cookie', [])) 1139 cookie_str = ', '.join(co) 1140 if cookie_str: 1141 env['HTTP_COOKIE'] = cookie_str 1142 # XXX Other HTTP_* headers 1143 # Since we're setting the env in the parent, provide empty 1144 # values to override previously set values 1145 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', 1146 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): 1147 env.setdefault(k, "") 1148 1149 self.send_response(HTTPStatus.OK, "Script output follows") 1150 self.flush_headers() 1151 1152 decoded_query = query.replace('+', ' ') 1153 1154 if self.have_fork: 1155 # Unix -- fork as we should 1156 args = [script] 1157 if '=' not in decoded_query: 1158 args.append(decoded_query) 1159 nobody = nobody_uid() 1160 self.wfile.flush() # Always flush before forking 1161 pid = os.fork() 1162 if pid != 0: 1163 # Parent 1164 pid, sts = os.waitpid(pid, 0) 1165 # throw away additional data [see bug #427345] 1166 while select.select([self.rfile], [], [], 0)[0]: 1167 if not self.rfile.read(1): 1168 break 1169 exitcode = os.waitstatus_to_exitcode(sts) 1170 if exitcode: 1171 self.log_error(f"CGI script exit code {exitcode}") 1172 return 1173 # Child 1174 try: 1175 try: 1176 os.setuid(nobody) 1177 except OSError: 1178 pass 1179 os.dup2(self.rfile.fileno(), 0) 1180 os.dup2(self.wfile.fileno(), 1) 1181 os.execve(scriptfile, args, env) 1182 except: 1183 self.server.handle_error(self.request, self.client_address) 1184 os._exit(127) 1185 1186 else: 1187 # Non-Unix -- use subprocess 1188 import subprocess 1189 cmdline = [scriptfile] 1190 if self.is_python(scriptfile): 1191 interp = sys.executable 1192 if interp.lower().endswith("w.exe"): 1193 # On Windows, use python.exe, not pythonw.exe 1194 interp = interp[:-5] + interp[-4:] 1195 cmdline = [interp, '-u'] + cmdline 1196 if '=' not in query: 1197 cmdline.append(query) 1198 self.log_message("command: %s", subprocess.list2cmdline(cmdline)) 1199 try: 1200 nbytes = int(length) 1201 except (TypeError, ValueError): 1202 nbytes = 0 1203 p = subprocess.Popen(cmdline, 1204 stdin=subprocess.PIPE, 1205 stdout=subprocess.PIPE, 1206 stderr=subprocess.PIPE, 1207 env = env 1208 ) 1209 if self.command.lower() == "post" and nbytes > 0: 1210 data = self.rfile.read(nbytes) 1211 else: 1212 data = None 1213 # throw away additional data [see bug #427345] 1214 while select.select([self.rfile._sock], [], [], 0)[0]: 1215 if not self.rfile._sock.recv(1): 1216 break 1217 stdout, stderr = p.communicate(data) 1218 self.wfile.write(stdout) 1219 if stderr: 1220 self.log_error('%s', stderr) 1221 p.stderr.close() 1222 p.stdout.close() 1223 status = p.returncode 1224 if status: 1225 self.log_error("CGI script exit status %#x", status) 1226 else: 1227 self.log_message("CGI script exited OK") 1228 1229 1230def _get_best_family(*address): 1231 infos = socket.getaddrinfo( 1232 *address, 1233 type=socket.SOCK_STREAM, 1234 flags=socket.AI_PASSIVE, 1235 ) 1236 family, type, proto, canonname, sockaddr = next(iter(infos)) 1237 return family, sockaddr 1238 1239 1240def test(HandlerClass=BaseHTTPRequestHandler, 1241 ServerClass=ThreadingHTTPServer, 1242 protocol="HTTP/1.0", port=8000, bind=None): 1243 """Test the HTTP request handler class. 1244 1245 This runs an HTTP server on port 8000 (or the port argument). 1246 1247 """ 1248 ServerClass.address_family, addr = _get_best_family(bind, port) 1249 1250 HandlerClass.protocol_version = protocol 1251 with ServerClass(addr, HandlerClass) as httpd: 1252 host, port = httpd.socket.getsockname()[:2] 1253 url_host = f'[{host}]' if ':' in host else host 1254 print( 1255 f"Serving HTTP on {host} port {port} " 1256 f"(http://{url_host}:{port}/) ..." 1257 ) 1258 try: 1259 httpd.serve_forever() 1260 except KeyboardInterrupt: 1261 print("\nKeyboard interrupt received, exiting.") 1262 sys.exit(0) 1263 1264if __name__ == '__main__': 1265 import argparse 1266 1267 parser = argparse.ArgumentParser() 1268 parser.add_argument('--cgi', action='store_true', 1269 help='Run as CGI Server') 1270 parser.add_argument('--bind', '-b', metavar='ADDRESS', 1271 help='Specify alternate bind address ' 1272 '[default: all interfaces]') 1273 parser.add_argument('--directory', '-d', default=os.getcwd(), 1274 help='Specify alternative directory ' 1275 '[default:current directory]') 1276 parser.add_argument('port', action='store', 1277 default=8000, type=int, 1278 nargs='?', 1279 help='Specify alternate port [default: 8000]') 1280 args = parser.parse_args() 1281 if args.cgi: 1282 handler_class = CGIHTTPRequestHandler 1283 else: 1284 handler_class = partial(SimpleHTTPRequestHandler, 1285 directory=args.directory) 1286 1287 # ensure dual-stack is not disabled; ref #38907 1288 class DualStackServer(ThreadingHTTPServer): 1289 def server_bind(self): 1290 # suppress exception when protocol is IPv4 1291 with contextlib.suppress(Exception): 1292 self.socket.setsockopt( 1293 socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) 1294 return super().server_bind() 1295 1296 test( 1297 HandlerClass=handler_class, 1298 ServerClass=DualStackServer, 1299 port=args.port, 1300 bind=args.bind, 1301 ) 1302