1"""HTTP server classes. 2 3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see 4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, 5and (deprecated) CGIHTTPRequestHandler for CGI scripts. 6 7It does, however, optionally implement HTTP/1.1 persistent connections. 8 9Notes on CGIHTTPRequestHandler 10------------------------------ 11 12This class is deprecated. It implements GET and POST requests to cgi-bin scripts. 13 14If the os.fork() function is not present (Windows), subprocess.Popen() is used, 15with slightly altered but never documented semantics. Use from a threaded 16process is likely to trigger a warning at os.fork() time. 17 18In all cases, the implementation is intentionally naive -- all 19requests are executed synchronously. 20 21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL 22-- it may execute arbitrary Python code or external programs. 23 24Note that status code 200 is sent prior to execution of a CGI script, so 25scripts cannot send other status codes such as 302 (redirect). 26 27XXX To do: 28 29- log requests even later (to capture byte count) 30- log user-agent header and other interesting goodies 31- send error log to separate file 32""" 33 34 35# See also: 36# 37# HTTP Working Group T. Berners-Lee 38# INTERNET-DRAFT R. T. Fielding 39# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen 40# Expires September 8, 1995 March 8, 1995 41# 42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt 43# 44# and 45# 46# Network Working Group R. Fielding 47# Request for Comments: 2616 et al 48# Obsoletes: 2068 June 1999 49# Category: Standards Track 50# 51# URL: http://www.faqs.org/rfcs/rfc2616.html 52 53# Log files 54# --------- 55# 56# Here's a quote from the NCSA httpd docs about log file format. 57# 58# | The logfile format is as follows. Each line consists of: 59# | 60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb 61# | 62# | host: Either the DNS name or the IP number of the remote client 63# | rfc931: Any information returned by identd for this person, 64# | - otherwise. 65# | authuser: If user sent a userid for authentication, the user name, 66# | - otherwise. 67# | DD: Day 68# | Mon: Month (calendar name) 69# | YYYY: Year 70# | hh: hour (24-hour format, the machine's timezone) 71# | mm: minutes 72# | ss: seconds 73# | request: The first line of the HTTP request as sent by the client. 74# | ddd: the status code returned by the server, - if not available. 75# | bbbb: the total number of bytes sent, 76# | *not including the HTTP/1.0 header*, - if not available 77# | 78# | You can determine the name of the file accessed through request. 79# 80# (Actually, the latter is only true if you know the server configuration 81# at the time the request was made!) 82 83__version__ = "0.6" 84 85__all__ = [ 86 "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler", 87 "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler", 88] 89 90import copy 91import datetime 92import email.utils 93import html 94import http.client 95import io 96import itertools 97import mimetypes 98import os 99import posixpath 100import select 101import shutil 102import socket # For gethostbyaddr() 103import socketserver 104import sys 105import time 106import urllib.parse 107 108from http import HTTPStatus 109 110 111# Default error message template 112DEFAULT_ERROR_MESSAGE = """\ 113<!DOCTYPE HTML> 114<html lang="en"> 115 <head> 116 <meta charset="utf-8"> 117 <title>Error response</title> 118 </head> 119 <body> 120 <h1>Error response</h1> 121 <p>Error code: %(code)d</p> 122 <p>Message: %(message)s.</p> 123 <p>Error code explanation: %(code)s - %(explain)s.</p> 124 </body> 125</html> 126""" 127 128DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" 129 130class HTTPServer(socketserver.TCPServer): 131 132 allow_reuse_address = 1 # Seems to make sense in testing environment 133 134 def server_bind(self): 135 """Override server_bind to store the server name.""" 136 socketserver.TCPServer.server_bind(self) 137 host, port = self.server_address[:2] 138 self.server_name = socket.getfqdn(host) 139 self.server_port = port 140 141 142class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer): 143 daemon_threads = True 144 145 146class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): 147 148 """HTTP request handler base class. 149 150 The following explanation of HTTP serves to guide you through the 151 code as well as to expose any misunderstandings I may have about 152 HTTP (so you don't need to read the code to figure out I'm wrong 153 :-). 154 155 HTTP (HyperText Transfer Protocol) is an extensible protocol on 156 top of a reliable stream transport (e.g. TCP/IP). The protocol 157 recognizes three parts to a request: 158 159 1. One line identifying the request type and path 160 2. An optional set of RFC-822-style headers 161 3. An optional data part 162 163 The headers and data are separated by a blank line. 164 165 The first line of the request has the form 166 167 <command> <path> <version> 168 169 where <command> is a (case-sensitive) keyword such as GET or POST, 170 <path> is a string containing path information for the request, 171 and <version> should be the string "HTTP/1.0" or "HTTP/1.1". 172 <path> is encoded using the URL encoding scheme (using %xx to signify 173 the ASCII character with hex code xx). 174 175 The specification specifies that lines are separated by CRLF but 176 for compatibility with the widest range of clients recommends 177 servers also handle LF. Similarly, whitespace in the request line 178 is treated sensibly (allowing multiple spaces between components 179 and allowing trailing whitespace). 180 181 Similarly, for output, lines ought to be separated by CRLF pairs 182 but most clients grok LF characters just fine. 183 184 If the first line of the request has the form 185 186 <command> <path> 187 188 (i.e. <version> is left out) then this is assumed to be an HTTP 189 0.9 request; this form has no optional headers and data part and 190 the reply consists of just the data. 191 192 The reply form of the HTTP 1.x protocol again has three parts: 193 194 1. One line giving the response code 195 2. An optional set of RFC-822-style headers 196 3. The data 197 198 Again, the headers and data are separated by a blank line. 199 200 The response code line has the form 201 202 <version> <responsecode> <responsestring> 203 204 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"), 205 <responsecode> is a 3-digit response code indicating success or 206 failure of the request, and <responsestring> is an optional 207 human-readable string explaining what the response code means. 208 209 This server parses the request and the headers, and then calls a 210 function specific to the request type (<command>). Specifically, 211 a request SPAM will be handled by a method do_SPAM(). If no 212 such method exists the server sends an error response to the 213 client. If it exists, it is called with no arguments: 214 215 do_SPAM() 216 217 Note that the request name is case sensitive (i.e. SPAM and spam 218 are different requests). 219 220 The various request details are stored in instance variables: 221 222 - client_address is the client IP address in the form (host, 223 port); 224 225 - command, path and version are the broken-down request line; 226 227 - headers is an instance of email.message.Message (or a derived 228 class) containing the header information; 229 230 - rfile is a file object open for reading positioned at the 231 start of the optional input data part; 232 233 - wfile is a file object open for writing. 234 235 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! 236 237 The first thing to be written must be the response line. Then 238 follow 0 or more header lines, then a blank line, and then the 239 actual data (if any). The meaning of the header lines depends on 240 the command executed by the server; in most cases, when data is 241 returned, there should be at least one header line of the form 242 243 Content-type: <type>/<subtype> 244 245 where <type> and <subtype> should be registered MIME types, 246 e.g. "text/html" or "text/plain". 247 248 """ 249 250 # The Python system version, truncated to its first component. 251 sys_version = "Python/" + sys.version.split()[0] 252 253 # The server software version. You may want to override this. 254 # The format is multiple whitespace-separated strings, 255 # where each string is of the form name[/version]. 256 server_version = "BaseHTTP/" + __version__ 257 258 error_message_format = DEFAULT_ERROR_MESSAGE 259 error_content_type = DEFAULT_ERROR_CONTENT_TYPE 260 261 # The default request version. This only affects responses up until 262 # the point where the request line is parsed, so it mainly decides what 263 # the client gets back when sending a malformed request line. 264 # Most web servers default to HTTP 0.9, i.e. don't send a status line. 265 default_request_version = "HTTP/0.9" 266 267 def parse_request(self): 268 """Parse a request (internal). 269 270 The request should be stored in self.raw_requestline; the results 271 are in self.command, self.path, self.request_version and 272 self.headers. 273 274 Return True for success, False for failure; on failure, any relevant 275 error response has already been sent back. 276 277 """ 278 self.command = None # set in case of error on the first line 279 self.request_version = version = self.default_request_version 280 self.close_connection = True 281 requestline = str(self.raw_requestline, 'iso-8859-1') 282 requestline = requestline.rstrip('\r\n') 283 self.requestline = requestline 284 words = requestline.split() 285 if len(words) == 0: 286 return False 287 288 if len(words) >= 3: # Enough to determine protocol version 289 version = words[-1] 290 try: 291 if not version.startswith('HTTP/'): 292 raise ValueError 293 base_version_number = version.split('/', 1)[1] 294 version_number = base_version_number.split(".") 295 # RFC 2145 section 3.1 says there can be only one "." and 296 # - major and minor numbers MUST be treated as 297 # separate integers; 298 # - HTTP/2.4 is a lower version than HTTP/2.13, which in 299 # turn is lower than HTTP/12.3; 300 # - Leading zeros MUST be ignored by recipients. 301 if len(version_number) != 2: 302 raise ValueError 303 if any(not component.isdigit() for component in version_number): 304 raise ValueError("non digit in http version") 305 if any(len(component) > 10 for component in version_number): 306 raise ValueError("unreasonable length http version") 307 version_number = int(version_number[0]), int(version_number[1]) 308 except (ValueError, IndexError): 309 self.send_error( 310 HTTPStatus.BAD_REQUEST, 311 "Bad request version (%r)" % version) 312 return False 313 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": 314 self.close_connection = False 315 if version_number >= (2, 0): 316 self.send_error( 317 HTTPStatus.HTTP_VERSION_NOT_SUPPORTED, 318 "Invalid HTTP version (%s)" % base_version_number) 319 return False 320 self.request_version = version 321 322 if not 2 <= len(words) <= 3: 323 self.send_error( 324 HTTPStatus.BAD_REQUEST, 325 "Bad request syntax (%r)" % requestline) 326 return False 327 command, path = words[:2] 328 if len(words) == 2: 329 self.close_connection = True 330 if command != 'GET': 331 self.send_error( 332 HTTPStatus.BAD_REQUEST, 333 "Bad HTTP/0.9 request type (%r)" % command) 334 return False 335 self.command, self.path = command, path 336 337 # gh-87389: The purpose of replacing '//' with '/' is to protect 338 # against open redirect attacks possibly triggered if the path starts 339 # with '//' because http clients treat //path as an absolute URI 340 # without scheme (similar to http://path) rather than a path. 341 if self.path.startswith('//'): 342 self.path = '/' + self.path.lstrip('/') # Reduce to a single / 343 344 # Examine the headers and look for a Connection directive. 345 try: 346 self.headers = http.client.parse_headers(self.rfile, 347 _class=self.MessageClass) 348 except http.client.LineTooLong as err: 349 self.send_error( 350 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, 351 "Line too long", 352 str(err)) 353 return False 354 except http.client.HTTPException as err: 355 self.send_error( 356 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, 357 "Too many headers", 358 str(err) 359 ) 360 return False 361 362 conntype = self.headers.get('Connection', "") 363 if conntype.lower() == 'close': 364 self.close_connection = True 365 elif (conntype.lower() == 'keep-alive' and 366 self.protocol_version >= "HTTP/1.1"): 367 self.close_connection = False 368 # Examine the headers and look for an Expect directive 369 expect = self.headers.get('Expect', "") 370 if (expect.lower() == "100-continue" and 371 self.protocol_version >= "HTTP/1.1" and 372 self.request_version >= "HTTP/1.1"): 373 if not self.handle_expect_100(): 374 return False 375 return True 376 377 def handle_expect_100(self): 378 """Decide what to do with an "Expect: 100-continue" header. 379 380 If the client is expecting a 100 Continue response, we must 381 respond with either a 100 Continue or a final response before 382 waiting for the request body. The default is to always respond 383 with a 100 Continue. You can behave differently (for example, 384 reject unauthorized requests) by overriding this method. 385 386 This method should either return True (possibly after sending 387 a 100 Continue response) or send an error response and return 388 False. 389 390 """ 391 self.send_response_only(HTTPStatus.CONTINUE) 392 self.end_headers() 393 return True 394 395 def handle_one_request(self): 396 """Handle a single HTTP request. 397 398 You normally don't need to override this method; see the class 399 __doc__ string for information on how to handle specific HTTP 400 commands such as GET and POST. 401 402 """ 403 try: 404 self.raw_requestline = self.rfile.readline(65537) 405 if len(self.raw_requestline) > 65536: 406 self.requestline = '' 407 self.request_version = '' 408 self.command = '' 409 self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG) 410 return 411 if not self.raw_requestline: 412 self.close_connection = True 413 return 414 if not self.parse_request(): 415 # An error code has been sent, just exit 416 return 417 mname = 'do_' + self.command 418 if not hasattr(self, mname): 419 self.send_error( 420 HTTPStatus.NOT_IMPLEMENTED, 421 "Unsupported method (%r)" % self.command) 422 return 423 method = getattr(self, mname) 424 method() 425 self.wfile.flush() #actually send the response if not already done. 426 except TimeoutError as e: 427 #a read or a write timed out. Discard this connection 428 self.log_error("Request timed out: %r", e) 429 self.close_connection = True 430 return 431 432 def handle(self): 433 """Handle multiple requests if necessary.""" 434 self.close_connection = True 435 436 self.handle_one_request() 437 while not self.close_connection: 438 self.handle_one_request() 439 440 def send_error(self, code, message=None, explain=None): 441 """Send and log an error reply. 442 443 Arguments are 444 * code: an HTTP error code 445 3 digits 446 * message: a simple optional 1 line reason phrase. 447 *( HTAB / SP / VCHAR / %x80-FF ) 448 defaults to short entry matching the response code 449 * explain: a detailed message defaults to the long entry 450 matching the response code. 451 452 This sends an error response (so it must be called before any 453 output has been generated), logs the error, and finally sends 454 a piece of HTML explaining the error to the user. 455 456 """ 457 458 try: 459 shortmsg, longmsg = self.responses[code] 460 except KeyError: 461 shortmsg, longmsg = '???', '???' 462 if message is None: 463 message = shortmsg 464 if explain is None: 465 explain = longmsg 466 self.log_error("code %d, message %s", code, message) 467 self.send_response(code, message) 468 self.send_header('Connection', 'close') 469 470 # Message body is omitted for cases described in: 471 # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified) 472 # - RFC7231: 6.3.6. 205(Reset Content) 473 body = None 474 if (code >= 200 and 475 code not in (HTTPStatus.NO_CONTENT, 476 HTTPStatus.RESET_CONTENT, 477 HTTPStatus.NOT_MODIFIED)): 478 # HTML encode to prevent Cross Site Scripting attacks 479 # (see bug #1100201) 480 content = (self.error_message_format % { 481 'code': code, 482 'message': html.escape(message, quote=False), 483 'explain': html.escape(explain, quote=False) 484 }) 485 body = content.encode('UTF-8', 'replace') 486 self.send_header("Content-Type", self.error_content_type) 487 self.send_header('Content-Length', str(len(body))) 488 self.end_headers() 489 490 if self.command != 'HEAD' and body: 491 self.wfile.write(body) 492 493 def send_response(self, code, message=None): 494 """Add the response header to the headers buffer and log the 495 response code. 496 497 Also send two standard headers with the server software 498 version and the current date. 499 500 """ 501 self.log_request(code) 502 self.send_response_only(code, message) 503 self.send_header('Server', self.version_string()) 504 self.send_header('Date', self.date_time_string()) 505 506 def send_response_only(self, code, message=None): 507 """Send the response header only.""" 508 if self.request_version != 'HTTP/0.9': 509 if message is None: 510 if code in self.responses: 511 message = self.responses[code][0] 512 else: 513 message = '' 514 if not hasattr(self, '_headers_buffer'): 515 self._headers_buffer = [] 516 self._headers_buffer.append(("%s %d %s\r\n" % 517 (self.protocol_version, code, message)).encode( 518 'latin-1', 'strict')) 519 520 def send_header(self, keyword, value): 521 """Send a MIME header to the headers buffer.""" 522 if self.request_version != 'HTTP/0.9': 523 if not hasattr(self, '_headers_buffer'): 524 self._headers_buffer = [] 525 self._headers_buffer.append( 526 ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict')) 527 528 if keyword.lower() == 'connection': 529 if value.lower() == 'close': 530 self.close_connection = True 531 elif value.lower() == 'keep-alive': 532 self.close_connection = False 533 534 def end_headers(self): 535 """Send the blank line ending the MIME headers.""" 536 if self.request_version != 'HTTP/0.9': 537 self._headers_buffer.append(b"\r\n") 538 self.flush_headers() 539 540 def flush_headers(self): 541 if hasattr(self, '_headers_buffer'): 542 self.wfile.write(b"".join(self._headers_buffer)) 543 self._headers_buffer = [] 544 545 def log_request(self, code='-', size='-'): 546 """Log an accepted request. 547 548 This is called by send_response(). 549 550 """ 551 if isinstance(code, HTTPStatus): 552 code = code.value 553 self.log_message('"%s" %s %s', 554 self.requestline, str(code), str(size)) 555 556 def log_error(self, format, *args): 557 """Log an error. 558 559 This is called when a request cannot be fulfilled. By 560 default it passes the message on to log_message(). 561 562 Arguments are the same as for log_message(). 563 564 XXX This should go to the separate error log. 565 566 """ 567 568 self.log_message(format, *args) 569 570 # https://en.wikipedia.org/wiki/List_of_Unicode_characters#Control_codes 571 _control_char_table = str.maketrans( 572 {c: fr'\x{c:02x}' for c in itertools.chain(range(0x20), range(0x7f,0xa0))}) 573 _control_char_table[ord('\\')] = r'\\' 574 575 def log_message(self, format, *args): 576 """Log an arbitrary message. 577 578 This is used by all other logging functions. Override 579 it if you have specific logging wishes. 580 581 The first argument, FORMAT, is a format string for the 582 message to be logged. If the format string contains 583 any % escapes requiring parameters, they should be 584 specified as subsequent arguments (it's just like 585 printf!). 586 587 The client ip and current date/time are prefixed to 588 every message. 589 590 Unicode control characters are replaced with escaped hex 591 before writing the output to stderr. 592 593 """ 594 595 message = format % args 596 sys.stderr.write("%s - - [%s] %s\n" % 597 (self.address_string(), 598 self.log_date_time_string(), 599 message.translate(self._control_char_table))) 600 601 def version_string(self): 602 """Return the server software version string.""" 603 return self.server_version + ' ' + self.sys_version 604 605 def date_time_string(self, timestamp=None): 606 """Return the current date and time formatted for a message header.""" 607 if timestamp is None: 608 timestamp = time.time() 609 return email.utils.formatdate(timestamp, usegmt=True) 610 611 def log_date_time_string(self): 612 """Return the current time formatted for logging.""" 613 now = time.time() 614 year, month, day, hh, mm, ss, x, y, z = time.localtime(now) 615 s = "%02d/%3s/%04d %02d:%02d:%02d" % ( 616 day, self.monthname[month], year, hh, mm, ss) 617 return s 618 619 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] 620 621 monthname = [None, 622 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 623 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] 624 625 def address_string(self): 626 """Return the client address.""" 627 628 return self.client_address[0] 629 630 # Essentially static class variables 631 632 # The version of the HTTP protocol we support. 633 # Set this to HTTP/1.1 to enable automatic keepalive 634 protocol_version = "HTTP/1.0" 635 636 # MessageClass used to parse headers 637 MessageClass = http.client.HTTPMessage 638 639 # hack to maintain backwards compatibility 640 responses = { 641 v: (v.phrase, v.description) 642 for v in HTTPStatus.__members__.values() 643 } 644 645 646class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): 647 648 """Simple HTTP request handler with GET and HEAD commands. 649 650 This serves files from the current directory and any of its 651 subdirectories. The MIME type for files is determined by 652 calling the .guess_type() method. 653 654 The GET and HEAD requests are identical except that the HEAD 655 request omits the actual contents of the file. 656 657 """ 658 659 server_version = "SimpleHTTP/" + __version__ 660 index_pages = ("index.html", "index.htm") 661 extensions_map = _encodings_map_default = { 662 '.gz': 'application/gzip', 663 '.Z': 'application/octet-stream', 664 '.bz2': 'application/x-bzip2', 665 '.xz': 'application/x-xz', 666 } 667 668 def __init__(self, *args, directory=None, **kwargs): 669 if directory is None: 670 directory = os.getcwd() 671 self.directory = os.fspath(directory) 672 super().__init__(*args, **kwargs) 673 674 def do_GET(self): 675 """Serve a GET request.""" 676 f = self.send_head() 677 if f: 678 try: 679 self.copyfile(f, self.wfile) 680 finally: 681 f.close() 682 683 def do_HEAD(self): 684 """Serve a HEAD request.""" 685 f = self.send_head() 686 if f: 687 f.close() 688 689 def send_head(self): 690 """Common code for GET and HEAD commands. 691 692 This sends the response code and MIME headers. 693 694 Return value is either a file object (which has to be copied 695 to the outputfile by the caller unless the command was HEAD, 696 and must be closed by the caller under all circumstances), or 697 None, in which case the caller has nothing further to do. 698 699 """ 700 path = self.translate_path(self.path) 701 f = None 702 if os.path.isdir(path): 703 parts = urllib.parse.urlsplit(self.path) 704 if not parts.path.endswith('/'): 705 # redirect browser - doing basically what apache does 706 self.send_response(HTTPStatus.MOVED_PERMANENTLY) 707 new_parts = (parts[0], parts[1], parts[2] + '/', 708 parts[3], parts[4]) 709 new_url = urllib.parse.urlunsplit(new_parts) 710 self.send_header("Location", new_url) 711 self.send_header("Content-Length", "0") 712 self.end_headers() 713 return None 714 for index in self.index_pages: 715 index = os.path.join(path, index) 716 if os.path.isfile(index): 717 path = index 718 break 719 else: 720 return self.list_directory(path) 721 ctype = self.guess_type(path) 722 # check for trailing "/" which should return 404. See Issue17324 723 # The test for this was added in test_httpserver.py 724 # However, some OS platforms accept a trailingSlash as a filename 725 # See discussion on python-dev and Issue34711 regarding 726 # parsing and rejection of filenames with a trailing slash 727 if path.endswith("/"): 728 self.send_error(HTTPStatus.NOT_FOUND, "File not found") 729 return None 730 try: 731 f = open(path, 'rb') 732 except OSError: 733 self.send_error(HTTPStatus.NOT_FOUND, "File not found") 734 return None 735 736 try: 737 fs = os.fstat(f.fileno()) 738 # Use browser cache if possible 739 if ("If-Modified-Since" in self.headers 740 and "If-None-Match" not in self.headers): 741 # compare If-Modified-Since and time of last file modification 742 try: 743 ims = email.utils.parsedate_to_datetime( 744 self.headers["If-Modified-Since"]) 745 except (TypeError, IndexError, OverflowError, ValueError): 746 # ignore ill-formed values 747 pass 748 else: 749 if ims.tzinfo is None: 750 # obsolete format with no timezone, cf. 751 # https://tools.ietf.org/html/rfc7231#section-7.1.1.1 752 ims = ims.replace(tzinfo=datetime.timezone.utc) 753 if ims.tzinfo is datetime.timezone.utc: 754 # compare to UTC datetime of last modification 755 last_modif = datetime.datetime.fromtimestamp( 756 fs.st_mtime, datetime.timezone.utc) 757 # remove microseconds, like in If-Modified-Since 758 last_modif = last_modif.replace(microsecond=0) 759 760 if last_modif <= ims: 761 self.send_response(HTTPStatus.NOT_MODIFIED) 762 self.end_headers() 763 f.close() 764 return None 765 766 self.send_response(HTTPStatus.OK) 767 self.send_header("Content-type", ctype) 768 self.send_header("Content-Length", str(fs[6])) 769 self.send_header("Last-Modified", 770 self.date_time_string(fs.st_mtime)) 771 self.end_headers() 772 return f 773 except: 774 f.close() 775 raise 776 777 def list_directory(self, path): 778 """Helper to produce a directory listing (absent index.html). 779 780 Return value is either a file object, or None (indicating an 781 error). In either case, the headers are sent, making the 782 interface the same as for send_head(). 783 784 """ 785 try: 786 list = os.listdir(path) 787 except OSError: 788 self.send_error( 789 HTTPStatus.NOT_FOUND, 790 "No permission to list directory") 791 return None 792 list.sort(key=lambda a: a.lower()) 793 r = [] 794 try: 795 displaypath = urllib.parse.unquote(self.path, 796 errors='surrogatepass') 797 except UnicodeDecodeError: 798 displaypath = urllib.parse.unquote(self.path) 799 displaypath = html.escape(displaypath, quote=False) 800 enc = sys.getfilesystemencoding() 801 title = f'Directory listing for {displaypath}' 802 r.append('<!DOCTYPE HTML>') 803 r.append('<html lang="en">') 804 r.append('<head>') 805 r.append(f'<meta charset="{enc}">') 806 r.append(f'<title>{title}</title>\n</head>') 807 r.append(f'<body>\n<h1>{title}</h1>') 808 r.append('<hr>\n<ul>') 809 for name in list: 810 fullname = os.path.join(path, name) 811 displayname = linkname = name 812 # Append / for directories or @ for symbolic links 813 if os.path.isdir(fullname): 814 displayname = name + "/" 815 linkname = name + "/" 816 if os.path.islink(fullname): 817 displayname = name + "@" 818 # Note: a link to a directory displays with @ and links with / 819 r.append('<li><a href="%s">%s</a></li>' 820 % (urllib.parse.quote(linkname, 821 errors='surrogatepass'), 822 html.escape(displayname, quote=False))) 823 r.append('</ul>\n<hr>\n</body>\n</html>\n') 824 encoded = '\n'.join(r).encode(enc, 'surrogateescape') 825 f = io.BytesIO() 826 f.write(encoded) 827 f.seek(0) 828 self.send_response(HTTPStatus.OK) 829 self.send_header("Content-type", "text/html; charset=%s" % enc) 830 self.send_header("Content-Length", str(len(encoded))) 831 self.end_headers() 832 return f 833 834 def translate_path(self, path): 835 """Translate a /-separated PATH to the local filename syntax. 836 837 Components that mean special things to the local file system 838 (e.g. drive or directory names) are ignored. (XXX They should 839 probably be diagnosed.) 840 841 """ 842 # abandon query parameters 843 path = path.split('?',1)[0] 844 path = path.split('#',1)[0] 845 # Don't forget explicit trailing slash when normalizing. Issue17324 846 trailing_slash = path.rstrip().endswith('/') 847 try: 848 path = urllib.parse.unquote(path, errors='surrogatepass') 849 except UnicodeDecodeError: 850 path = urllib.parse.unquote(path) 851 path = posixpath.normpath(path) 852 words = path.split('/') 853 words = filter(None, words) 854 path = self.directory 855 for word in words: 856 if os.path.dirname(word) or word in (os.curdir, os.pardir): 857 # Ignore components that are not a simple file/directory name 858 continue 859 path = os.path.join(path, word) 860 if trailing_slash: 861 path += '/' 862 return path 863 864 def copyfile(self, source, outputfile): 865 """Copy all data between two file objects. 866 867 The SOURCE argument is a file object open for reading 868 (or anything with a read() method) and the DESTINATION 869 argument is a file object open for writing (or 870 anything with a write() method). 871 872 The only reason for overriding this would be to change 873 the block size or perhaps to replace newlines by CRLF 874 -- note however that this the default server uses this 875 to copy binary data as well. 876 877 """ 878 shutil.copyfileobj(source, outputfile) 879 880 def guess_type(self, path): 881 """Guess the type of a file. 882 883 Argument is a PATH (a filename). 884 885 Return value is a string of the form type/subtype, 886 usable for a MIME Content-type header. 887 888 The default implementation looks the file's extension 889 up in the table self.extensions_map, using application/octet-stream 890 as a default; however it would be permissible (if 891 slow) to look inside the data to make a better guess. 892 893 """ 894 base, ext = posixpath.splitext(path) 895 if ext in self.extensions_map: 896 return self.extensions_map[ext] 897 ext = ext.lower() 898 if ext in self.extensions_map: 899 return self.extensions_map[ext] 900 guess, _ = mimetypes.guess_file_type(path) 901 if guess: 902 return guess 903 return 'application/octet-stream' 904 905 906# Utilities for CGIHTTPRequestHandler 907 908def _url_collapse_path(path): 909 """ 910 Given a URL path, remove extra '/'s and '.' path elements and collapse 911 any '..' references and returns a collapsed path. 912 913 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. 914 The utility of this function is limited to is_cgi method and helps 915 preventing some security attacks. 916 917 Returns: The reconstituted URL, which will always start with a '/'. 918 919 Raises: IndexError if too many '..' occur within the path. 920 921 """ 922 # Query component should not be involved. 923 path, _, query = path.partition('?') 924 path = urllib.parse.unquote(path) 925 926 # Similar to os.path.split(os.path.normpath(path)) but specific to URL 927 # path semantics rather than local operating system semantics. 928 path_parts = path.split('/') 929 head_parts = [] 930 for part in path_parts[:-1]: 931 if part == '..': 932 head_parts.pop() # IndexError if more '..' than prior parts 933 elif part and part != '.': 934 head_parts.append( part ) 935 if path_parts: 936 tail_part = path_parts.pop() 937 if tail_part: 938 if tail_part == '..': 939 head_parts.pop() 940 tail_part = '' 941 elif tail_part == '.': 942 tail_part = '' 943 else: 944 tail_part = '' 945 946 if query: 947 tail_part = '?'.join((tail_part, query)) 948 949 splitpath = ('/' + '/'.join(head_parts), tail_part) 950 collapsed_path = "/".join(splitpath) 951 952 return collapsed_path 953 954 955 956nobody = None 957 958def nobody_uid(): 959 """Internal routine to get nobody's uid""" 960 global nobody 961 if nobody: 962 return nobody 963 try: 964 import pwd 965 except ImportError: 966 return -1 967 try: 968 nobody = pwd.getpwnam('nobody')[2] 969 except KeyError: 970 nobody = 1 + max(x[2] for x in pwd.getpwall()) 971 return nobody 972 973 974def executable(path): 975 """Test for executable file.""" 976 return os.access(path, os.X_OK) 977 978 979class CGIHTTPRequestHandler(SimpleHTTPRequestHandler): 980 981 """Complete HTTP server with GET, HEAD and POST commands. 982 983 GET and HEAD also support running CGI scripts. 984 985 The POST command is *only* implemented for CGI scripts. 986 987 """ 988 989 def __init__(self, *args, **kwargs): 990 import warnings 991 warnings._deprecated("http.server.CGIHTTPRequestHandler", 992 remove=(3, 15)) 993 super().__init__(*args, **kwargs) 994 995 # Determine platform specifics 996 have_fork = hasattr(os, 'fork') 997 998 # Make rfile unbuffered -- we need to read one line and then pass 999 # the rest to a subprocess, so we can't use buffered input. 1000 rbufsize = 0 1001 1002 def do_POST(self): 1003 """Serve a POST request. 1004 1005 This is only implemented for CGI scripts. 1006 1007 """ 1008 1009 if self.is_cgi(): 1010 self.run_cgi() 1011 else: 1012 self.send_error( 1013 HTTPStatus.NOT_IMPLEMENTED, 1014 "Can only POST to CGI scripts") 1015 1016 def send_head(self): 1017 """Version of send_head that support CGI scripts""" 1018 if self.is_cgi(): 1019 return self.run_cgi() 1020 else: 1021 return SimpleHTTPRequestHandler.send_head(self) 1022 1023 def is_cgi(self): 1024 """Test whether self.path corresponds to a CGI script. 1025 1026 Returns True and updates the cgi_info attribute to the tuple 1027 (dir, rest) if self.path requires running a CGI script. 1028 Returns False otherwise. 1029 1030 If any exception is raised, the caller should assume that 1031 self.path was rejected as invalid and act accordingly. 1032 1033 The default implementation tests whether the normalized url 1034 path begins with one of the strings in self.cgi_directories 1035 (and the next character is a '/' or the end of the string). 1036 1037 """ 1038 collapsed_path = _url_collapse_path(self.path) 1039 dir_sep = collapsed_path.find('/', 1) 1040 while dir_sep > 0 and not collapsed_path[:dir_sep] in self.cgi_directories: 1041 dir_sep = collapsed_path.find('/', dir_sep+1) 1042 if dir_sep > 0: 1043 head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:] 1044 self.cgi_info = head, tail 1045 return True 1046 return False 1047 1048 1049 cgi_directories = ['/cgi-bin', '/htbin'] 1050 1051 def is_executable(self, path): 1052 """Test whether argument path is an executable file.""" 1053 return executable(path) 1054 1055 def is_python(self, path): 1056 """Test whether argument path is a Python script.""" 1057 head, tail = os.path.splitext(path) 1058 return tail.lower() in (".py", ".pyw") 1059 1060 def run_cgi(self): 1061 """Execute a CGI script.""" 1062 dir, rest = self.cgi_info 1063 path = dir + '/' + rest 1064 i = path.find('/', len(dir)+1) 1065 while i >= 0: 1066 nextdir = path[:i] 1067 nextrest = path[i+1:] 1068 1069 scriptdir = self.translate_path(nextdir) 1070 if os.path.isdir(scriptdir): 1071 dir, rest = nextdir, nextrest 1072 i = path.find('/', len(dir)+1) 1073 else: 1074 break 1075 1076 # find an explicit query string, if present. 1077 rest, _, query = rest.partition('?') 1078 1079 # dissect the part after the directory name into a script name & 1080 # a possible additional path, to be stored in PATH_INFO. 1081 i = rest.find('/') 1082 if i >= 0: 1083 script, rest = rest[:i], rest[i:] 1084 else: 1085 script, rest = rest, '' 1086 1087 scriptname = dir + '/' + script 1088 scriptfile = self.translate_path(scriptname) 1089 if not os.path.exists(scriptfile): 1090 self.send_error( 1091 HTTPStatus.NOT_FOUND, 1092 "No such CGI script (%r)" % scriptname) 1093 return 1094 if not os.path.isfile(scriptfile): 1095 self.send_error( 1096 HTTPStatus.FORBIDDEN, 1097 "CGI script is not a plain file (%r)" % scriptname) 1098 return 1099 ispy = self.is_python(scriptname) 1100 if self.have_fork or not ispy: 1101 if not self.is_executable(scriptfile): 1102 self.send_error( 1103 HTTPStatus.FORBIDDEN, 1104 "CGI script is not executable (%r)" % scriptname) 1105 return 1106 1107 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html 1108 # XXX Much of the following could be prepared ahead of time! 1109 env = copy.deepcopy(os.environ) 1110 env['SERVER_SOFTWARE'] = self.version_string() 1111 env['SERVER_NAME'] = self.server.server_name 1112 env['GATEWAY_INTERFACE'] = 'CGI/1.1' 1113 env['SERVER_PROTOCOL'] = self.protocol_version 1114 env['SERVER_PORT'] = str(self.server.server_port) 1115 env['REQUEST_METHOD'] = self.command 1116 uqrest = urllib.parse.unquote(rest) 1117 env['PATH_INFO'] = uqrest 1118 env['PATH_TRANSLATED'] = self.translate_path(uqrest) 1119 env['SCRIPT_NAME'] = scriptname 1120 env['QUERY_STRING'] = query 1121 env['REMOTE_ADDR'] = self.client_address[0] 1122 authorization = self.headers.get("authorization") 1123 if authorization: 1124 authorization = authorization.split() 1125 if len(authorization) == 2: 1126 import base64, binascii 1127 env['AUTH_TYPE'] = authorization[0] 1128 if authorization[0].lower() == "basic": 1129 try: 1130 authorization = authorization[1].encode('ascii') 1131 authorization = base64.decodebytes(authorization).\ 1132 decode('ascii') 1133 except (binascii.Error, UnicodeError): 1134 pass 1135 else: 1136 authorization = authorization.split(':') 1137 if len(authorization) == 2: 1138 env['REMOTE_USER'] = authorization[0] 1139 # XXX REMOTE_IDENT 1140 if self.headers.get('content-type') is None: 1141 env['CONTENT_TYPE'] = self.headers.get_content_type() 1142 else: 1143 env['CONTENT_TYPE'] = self.headers['content-type'] 1144 length = self.headers.get('content-length') 1145 if length: 1146 env['CONTENT_LENGTH'] = length 1147 referer = self.headers.get('referer') 1148 if referer: 1149 env['HTTP_REFERER'] = referer 1150 accept = self.headers.get_all('accept', ()) 1151 env['HTTP_ACCEPT'] = ','.join(accept) 1152 ua = self.headers.get('user-agent') 1153 if ua: 1154 env['HTTP_USER_AGENT'] = ua 1155 co = filter(None, self.headers.get_all('cookie', [])) 1156 cookie_str = ', '.join(co) 1157 if cookie_str: 1158 env['HTTP_COOKIE'] = cookie_str 1159 # XXX Other HTTP_* headers 1160 # Since we're setting the env in the parent, provide empty 1161 # values to override previously set values 1162 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', 1163 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): 1164 env.setdefault(k, "") 1165 1166 self.send_response(HTTPStatus.OK, "Script output follows") 1167 self.flush_headers() 1168 1169 decoded_query = query.replace('+', ' ') 1170 1171 if self.have_fork: 1172 # Unix -- fork as we should 1173 args = [script] 1174 if '=' not in decoded_query: 1175 args.append(decoded_query) 1176 nobody = nobody_uid() 1177 self.wfile.flush() # Always flush before forking 1178 pid = os.fork() 1179 if pid != 0: 1180 # Parent 1181 pid, sts = os.waitpid(pid, 0) 1182 # throw away additional data [see bug #427345] 1183 while select.select([self.rfile], [], [], 0)[0]: 1184 if not self.rfile.read(1): 1185 break 1186 exitcode = os.waitstatus_to_exitcode(sts) 1187 if exitcode: 1188 self.log_error(f"CGI script exit code {exitcode}") 1189 return 1190 # Child 1191 try: 1192 try: 1193 os.setuid(nobody) 1194 except OSError: 1195 pass 1196 os.dup2(self.rfile.fileno(), 0) 1197 os.dup2(self.wfile.fileno(), 1) 1198 os.execve(scriptfile, args, env) 1199 except: 1200 self.server.handle_error(self.request, self.client_address) 1201 os._exit(127) 1202 1203 else: 1204 # Non-Unix -- use subprocess 1205 import subprocess 1206 cmdline = [scriptfile] 1207 if self.is_python(scriptfile): 1208 interp = sys.executable 1209 if interp.lower().endswith("w.exe"): 1210 # On Windows, use python.exe, not pythonw.exe 1211 interp = interp[:-5] + interp[-4:] 1212 cmdline = [interp, '-u'] + cmdline 1213 if '=' not in query: 1214 cmdline.append(query) 1215 self.log_message("command: %s", subprocess.list2cmdline(cmdline)) 1216 try: 1217 nbytes = int(length) 1218 except (TypeError, ValueError): 1219 nbytes = 0 1220 p = subprocess.Popen(cmdline, 1221 stdin=subprocess.PIPE, 1222 stdout=subprocess.PIPE, 1223 stderr=subprocess.PIPE, 1224 env = env 1225 ) 1226 if self.command.lower() == "post" and nbytes > 0: 1227 data = self.rfile.read(nbytes) 1228 else: 1229 data = None 1230 # throw away additional data [see bug #427345] 1231 while select.select([self.rfile._sock], [], [], 0)[0]: 1232 if not self.rfile._sock.recv(1): 1233 break 1234 stdout, stderr = p.communicate(data) 1235 self.wfile.write(stdout) 1236 if stderr: 1237 self.log_error('%s', stderr) 1238 p.stderr.close() 1239 p.stdout.close() 1240 status = p.returncode 1241 if status: 1242 self.log_error("CGI script exit status %#x", status) 1243 else: 1244 self.log_message("CGI script exited OK") 1245 1246 1247def _get_best_family(*address): 1248 infos = socket.getaddrinfo( 1249 *address, 1250 type=socket.SOCK_STREAM, 1251 flags=socket.AI_PASSIVE, 1252 ) 1253 family, type, proto, canonname, sockaddr = next(iter(infos)) 1254 return family, sockaddr 1255 1256 1257def test(HandlerClass=BaseHTTPRequestHandler, 1258 ServerClass=ThreadingHTTPServer, 1259 protocol="HTTP/1.0", port=8000, bind=None): 1260 """Test the HTTP request handler class. 1261 1262 This runs an HTTP server on port 8000 (or the port argument). 1263 1264 """ 1265 ServerClass.address_family, addr = _get_best_family(bind, port) 1266 HandlerClass.protocol_version = protocol 1267 with ServerClass(addr, HandlerClass) as httpd: 1268 host, port = httpd.socket.getsockname()[:2] 1269 url_host = f'[{host}]' if ':' in host else host 1270 print( 1271 f"Serving HTTP on {host} port {port} " 1272 f"(http://{url_host}:{port}/) ..." 1273 ) 1274 try: 1275 httpd.serve_forever() 1276 except KeyboardInterrupt: 1277 print("\nKeyboard interrupt received, exiting.") 1278 sys.exit(0) 1279 1280if __name__ == '__main__': 1281 import argparse 1282 import contextlib 1283 1284 parser = argparse.ArgumentParser() 1285 parser.add_argument('--cgi', action='store_true', 1286 help='run as CGI server') 1287 parser.add_argument('-b', '--bind', metavar='ADDRESS', 1288 help='bind to this address ' 1289 '(default: all interfaces)') 1290 parser.add_argument('-d', '--directory', default=os.getcwd(), 1291 help='serve this directory ' 1292 '(default: current directory)') 1293 parser.add_argument('-p', '--protocol', metavar='VERSION', 1294 default='HTTP/1.0', 1295 help='conform to this HTTP version ' 1296 '(default: %(default)s)') 1297 parser.add_argument('port', default=8000, type=int, nargs='?', 1298 help='bind to this port ' 1299 '(default: %(default)s)') 1300 args = parser.parse_args() 1301 if args.cgi: 1302 handler_class = CGIHTTPRequestHandler 1303 else: 1304 handler_class = SimpleHTTPRequestHandler 1305 1306 # ensure dual-stack is not disabled; ref #38907 1307 class DualStackServer(ThreadingHTTPServer): 1308 1309 def server_bind(self): 1310 # suppress exception when protocol is IPv4 1311 with contextlib.suppress(Exception): 1312 self.socket.setsockopt( 1313 socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) 1314 return super().server_bind() 1315 1316 def finish_request(self, request, client_address): 1317 self.RequestHandlerClass(request, client_address, self, 1318 directory=args.directory) 1319 1320 test( 1321 HandlerClass=handler_class, 1322 ServerClass=DualStackServer, 1323 port=args.port, 1324 bind=args.bind, 1325 protocol=args.protocol, 1326 ) 1327