1"""HTTP server classes. 2 3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see 4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, 5and CGIHTTPRequestHandler for CGI scripts. 6 7It does, however, optionally implement HTTP/1.1 persistent connections, 8as of version 0.3. 9 10Notes on CGIHTTPRequestHandler 11------------------------------ 12 13This class implements GET and POST requests to cgi-bin scripts. 14 15If the os.fork() function is not present (e.g. on Windows), 16subprocess.Popen() is used as a fallback, with slightly altered semantics. 17 18In all cases, the implementation is intentionally naive -- all 19requests are executed synchronously. 20 21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL 22-- it may execute arbitrary Python code or external programs. 23 24Note that status code 200 is sent prior to execution of a CGI script, so 25scripts cannot send other status codes such as 302 (redirect). 26 27XXX To do: 28 29- log requests even later (to capture byte count) 30- log user-agent header and other interesting goodies 31- send error log to separate file 32""" 33 34 35# See also: 36# 37# HTTP Working Group T. Berners-Lee 38# INTERNET-DRAFT R. T. Fielding 39# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen 40# Expires September 8, 1995 March 8, 1995 41# 42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt 43# 44# and 45# 46# Network Working Group R. Fielding 47# Request for Comments: 2616 et al 48# Obsoletes: 2068 June 1999 49# Category: Standards Track 50# 51# URL: http://www.faqs.org/rfcs/rfc2616.html 52 53# Log files 54# --------- 55# 56# Here's a quote from the NCSA httpd docs about log file format. 57# 58# | The logfile format is as follows. Each line consists of: 59# | 60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb 61# | 62# | host: Either the DNS name or the IP number of the remote client 63# | rfc931: Any information returned by identd for this person, 64# | - otherwise. 65# | authuser: If user sent a userid for authentication, the user name, 66# | - otherwise. 67# | DD: Day 68# | Mon: Month (calendar name) 69# | YYYY: Year 70# | hh: hour (24-hour format, the machine's timezone) 71# | mm: minutes 72# | ss: seconds 73# | request: The first line of the HTTP request as sent by the client. 74# | ddd: the status code returned by the server, - if not available. 75# | bbbb: the total number of bytes sent, 76# | *not including the HTTP/1.0 header*, - if not available 77# | 78# | You can determine the name of the file accessed through request. 79# 80# (Actually, the latter is only true if you know the server configuration 81# at the time the request was made!) 82 83__version__ = "0.6" 84 85__all__ = [ 86 "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler", 87 "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler", 88] 89 90import copy 91import datetime 92import email.utils 93import html 94import http.client 95import io 96import mimetypes 97import os 98import posixpath 99import select 100import shutil 101import socket # For gethostbyaddr() 102import socketserver 103import sys 104import time 105import urllib.parse 106from functools import partial 107 108from http import HTTPStatus 109 110 111# Default error message template 112DEFAULT_ERROR_MESSAGE = """\ 113<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" 114 "http://www.w3.org/TR/html4/strict.dtd"> 115<html> 116 <head> 117 <meta http-equiv="Content-Type" content="text/html;charset=utf-8"> 118 <title>Error response</title> 119 </head> 120 <body> 121 <h1>Error response</h1> 122 <p>Error code: %(code)d</p> 123 <p>Message: %(message)s.</p> 124 <p>Error code explanation: %(code)s - %(explain)s.</p> 125 </body> 126</html> 127""" 128 129DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" 130 131class HTTPServer(socketserver.TCPServer): 132 133 allow_reuse_address = 1 # Seems to make sense in testing environment 134 135 def server_bind(self): 136 """Override server_bind to store the server name.""" 137 socketserver.TCPServer.server_bind(self) 138 host, port = self.server_address[:2] 139 self.server_name = socket.getfqdn(host) 140 self.server_port = port 141 142 143class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer): 144 daemon_threads = True 145 146 147class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): 148 149 """HTTP request handler base class. 150 151 The following explanation of HTTP serves to guide you through the 152 code as well as to expose any misunderstandings I may have about 153 HTTP (so you don't need to read the code to figure out I'm wrong 154 :-). 155 156 HTTP (HyperText Transfer Protocol) is an extensible protocol on 157 top of a reliable stream transport (e.g. TCP/IP). The protocol 158 recognizes three parts to a request: 159 160 1. One line identifying the request type and path 161 2. An optional set of RFC-822-style headers 162 3. An optional data part 163 164 The headers and data are separated by a blank line. 165 166 The first line of the request has the form 167 168 <command> <path> <version> 169 170 where <command> is a (case-sensitive) keyword such as GET or POST, 171 <path> is a string containing path information for the request, 172 and <version> should be the string "HTTP/1.0" or "HTTP/1.1". 173 <path> is encoded using the URL encoding scheme (using %xx to signify 174 the ASCII character with hex code xx). 175 176 The specification specifies that lines are separated by CRLF but 177 for compatibility with the widest range of clients recommends 178 servers also handle LF. Similarly, whitespace in the request line 179 is treated sensibly (allowing multiple spaces between components 180 and allowing trailing whitespace). 181 182 Similarly, for output, lines ought to be separated by CRLF pairs 183 but most clients grok LF characters just fine. 184 185 If the first line of the request has the form 186 187 <command> <path> 188 189 (i.e. <version> is left out) then this is assumed to be an HTTP 190 0.9 request; this form has no optional headers and data part and 191 the reply consists of just the data. 192 193 The reply form of the HTTP 1.x protocol again has three parts: 194 195 1. One line giving the response code 196 2. An optional set of RFC-822-style headers 197 3. The data 198 199 Again, the headers and data are separated by a blank line. 200 201 The response code line has the form 202 203 <version> <responsecode> <responsestring> 204 205 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"), 206 <responsecode> is a 3-digit response code indicating success or 207 failure of the request, and <responsestring> is an optional 208 human-readable string explaining what the response code means. 209 210 This server parses the request and the headers, and then calls a 211 function specific to the request type (<command>). Specifically, 212 a request SPAM will be handled by a method do_SPAM(). If no 213 such method exists the server sends an error response to the 214 client. If it exists, it is called with no arguments: 215 216 do_SPAM() 217 218 Note that the request name is case sensitive (i.e. SPAM and spam 219 are different requests). 220 221 The various request details are stored in instance variables: 222 223 - client_address is the client IP address in the form (host, 224 port); 225 226 - command, path and version are the broken-down request line; 227 228 - headers is an instance of email.message.Message (or a derived 229 class) containing the header information; 230 231 - rfile is a file object open for reading positioned at the 232 start of the optional input data part; 233 234 - wfile is a file object open for writing. 235 236 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! 237 238 The first thing to be written must be the response line. Then 239 follow 0 or more header lines, then a blank line, and then the 240 actual data (if any). The meaning of the header lines depends on 241 the command executed by the server; in most cases, when data is 242 returned, there should be at least one header line of the form 243 244 Content-type: <type>/<subtype> 245 246 where <type> and <subtype> should be registered MIME types, 247 e.g. "text/html" or "text/plain". 248 249 """ 250 251 # The Python system version, truncated to its first component. 252 sys_version = "Python/" + sys.version.split()[0] 253 254 # The server software version. You may want to override this. 255 # The format is multiple whitespace-separated strings, 256 # where each string is of the form name[/version]. 257 server_version = "BaseHTTP/" + __version__ 258 259 error_message_format = DEFAULT_ERROR_MESSAGE 260 error_content_type = DEFAULT_ERROR_CONTENT_TYPE 261 262 # The default request version. This only affects responses up until 263 # the point where the request line is parsed, so it mainly decides what 264 # the client gets back when sending a malformed request line. 265 # Most web servers default to HTTP 0.9, i.e. don't send a status line. 266 default_request_version = "HTTP/0.9" 267 268 def parse_request(self): 269 """Parse a request (internal). 270 271 The request should be stored in self.raw_requestline; the results 272 are in self.command, self.path, self.request_version and 273 self.headers. 274 275 Return True for success, False for failure; on failure, any relevant 276 error response has already been sent back. 277 278 """ 279 self.command = None # set in case of error on the first line 280 self.request_version = version = self.default_request_version 281 self.close_connection = True 282 requestline = str(self.raw_requestline, 'iso-8859-1') 283 requestline = requestline.rstrip('\r\n') 284 self.requestline = requestline 285 words = requestline.split() 286 if len(words) == 0: 287 return False 288 289 if len(words) >= 3: # Enough to determine protocol version 290 version = words[-1] 291 try: 292 if not version.startswith('HTTP/'): 293 raise ValueError 294 base_version_number = version.split('/', 1)[1] 295 version_number = base_version_number.split(".") 296 # RFC 2145 section 3.1 says there can be only one "." and 297 # - major and minor numbers MUST be treated as 298 # separate integers; 299 # - HTTP/2.4 is a lower version than HTTP/2.13, which in 300 # turn is lower than HTTP/12.3; 301 # - Leading zeros MUST be ignored by recipients. 302 if len(version_number) != 2: 303 raise ValueError 304 version_number = int(version_number[0]), int(version_number[1]) 305 except (ValueError, IndexError): 306 self.send_error( 307 HTTPStatus.BAD_REQUEST, 308 "Bad request version (%r)" % version) 309 return False 310 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": 311 self.close_connection = False 312 if version_number >= (2, 0): 313 self.send_error( 314 HTTPStatus.HTTP_VERSION_NOT_SUPPORTED, 315 "Invalid HTTP version (%s)" % base_version_number) 316 return False 317 self.request_version = version 318 319 if not 2 <= len(words) <= 3: 320 self.send_error( 321 HTTPStatus.BAD_REQUEST, 322 "Bad request syntax (%r)" % requestline) 323 return False 324 command, path = words[:2] 325 if len(words) == 2: 326 self.close_connection = True 327 if command != 'GET': 328 self.send_error( 329 HTTPStatus.BAD_REQUEST, 330 "Bad HTTP/0.9 request type (%r)" % command) 331 return False 332 self.command, self.path = command, path 333 334 # Examine the headers and look for a Connection directive. 335 try: 336 self.headers = http.client.parse_headers(self.rfile, 337 _class=self.MessageClass) 338 except http.client.LineTooLong as err: 339 self.send_error( 340 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, 341 "Line too long", 342 str(err)) 343 return False 344 except http.client.HTTPException as err: 345 self.send_error( 346 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, 347 "Too many headers", 348 str(err) 349 ) 350 return False 351 352 conntype = self.headers.get('Connection', "") 353 if conntype.lower() == 'close': 354 self.close_connection = True 355 elif (conntype.lower() == 'keep-alive' and 356 self.protocol_version >= "HTTP/1.1"): 357 self.close_connection = False 358 # Examine the headers and look for an Expect directive 359 expect = self.headers.get('Expect', "") 360 if (expect.lower() == "100-continue" and 361 self.protocol_version >= "HTTP/1.1" and 362 self.request_version >= "HTTP/1.1"): 363 if not self.handle_expect_100(): 364 return False 365 return True 366 367 def handle_expect_100(self): 368 """Decide what to do with an "Expect: 100-continue" header. 369 370 If the client is expecting a 100 Continue response, we must 371 respond with either a 100 Continue or a final response before 372 waiting for the request body. The default is to always respond 373 with a 100 Continue. You can behave differently (for example, 374 reject unauthorized requests) by overriding this method. 375 376 This method should either return True (possibly after sending 377 a 100 Continue response) or send an error response and return 378 False. 379 380 """ 381 self.send_response_only(HTTPStatus.CONTINUE) 382 self.end_headers() 383 return True 384 385 def handle_one_request(self): 386 """Handle a single HTTP request. 387 388 You normally don't need to override this method; see the class 389 __doc__ string for information on how to handle specific HTTP 390 commands such as GET and POST. 391 392 """ 393 try: 394 self.raw_requestline = self.rfile.readline(65537) 395 if len(self.raw_requestline) > 65536: 396 self.requestline = '' 397 self.request_version = '' 398 self.command = '' 399 self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG) 400 return 401 if not self.raw_requestline: 402 self.close_connection = True 403 return 404 if not self.parse_request(): 405 # An error code has been sent, just exit 406 return 407 mname = 'do_' + self.command 408 if not hasattr(self, mname): 409 self.send_error( 410 HTTPStatus.NOT_IMPLEMENTED, 411 "Unsupported method (%r)" % self.command) 412 return 413 method = getattr(self, mname) 414 method() 415 self.wfile.flush() #actually send the response if not already done. 416 except socket.timeout as e: 417 #a read or a write timed out. Discard this connection 418 self.log_error("Request timed out: %r", e) 419 self.close_connection = True 420 return 421 422 def handle(self): 423 """Handle multiple requests if necessary.""" 424 self.close_connection = True 425 426 self.handle_one_request() 427 while not self.close_connection: 428 self.handle_one_request() 429 430 def send_error(self, code, message=None, explain=None): 431 """Send and log an error reply. 432 433 Arguments are 434 * code: an HTTP error code 435 3 digits 436 * message: a simple optional 1 line reason phrase. 437 *( HTAB / SP / VCHAR / %x80-FF ) 438 defaults to short entry matching the response code 439 * explain: a detailed message defaults to the long entry 440 matching the response code. 441 442 This sends an error response (so it must be called before any 443 output has been generated), logs the error, and finally sends 444 a piece of HTML explaining the error to the user. 445 446 """ 447 448 try: 449 shortmsg, longmsg = self.responses[code] 450 except KeyError: 451 shortmsg, longmsg = '???', '???' 452 if message is None: 453 message = shortmsg 454 if explain is None: 455 explain = longmsg 456 self.log_error("code %d, message %s", code, message) 457 self.send_response(code, message) 458 self.send_header('Connection', 'close') 459 460 # Message body is omitted for cases described in: 461 # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified) 462 # - RFC7231: 6.3.6. 205(Reset Content) 463 body = None 464 if (code >= 200 and 465 code not in (HTTPStatus.NO_CONTENT, 466 HTTPStatus.RESET_CONTENT, 467 HTTPStatus.NOT_MODIFIED)): 468 # HTML encode to prevent Cross Site Scripting attacks 469 # (see bug #1100201) 470 content = (self.error_message_format % { 471 'code': code, 472 'message': html.escape(message, quote=False), 473 'explain': html.escape(explain, quote=False) 474 }) 475 body = content.encode('UTF-8', 'replace') 476 self.send_header("Content-Type", self.error_content_type) 477 self.send_header('Content-Length', str(len(body))) 478 self.end_headers() 479 480 if self.command != 'HEAD' and body: 481 self.wfile.write(body) 482 483 def send_response(self, code, message=None): 484 """Add the response header to the headers buffer and log the 485 response code. 486 487 Also send two standard headers with the server software 488 version and the current date. 489 490 """ 491 self.log_request(code) 492 self.send_response_only(code, message) 493 self.send_header('Server', self.version_string()) 494 self.send_header('Date', self.date_time_string()) 495 496 def send_response_only(self, code, message=None): 497 """Send the response header only.""" 498 if self.request_version != 'HTTP/0.9': 499 if message is None: 500 if code in self.responses: 501 message = self.responses[code][0] 502 else: 503 message = '' 504 if not hasattr(self, '_headers_buffer'): 505 self._headers_buffer = [] 506 self._headers_buffer.append(("%s %d %s\r\n" % 507 (self.protocol_version, code, message)).encode( 508 'latin-1', 'strict')) 509 510 def send_header(self, keyword, value): 511 """Send a MIME header to the headers buffer.""" 512 if self.request_version != 'HTTP/0.9': 513 if not hasattr(self, '_headers_buffer'): 514 self._headers_buffer = [] 515 self._headers_buffer.append( 516 ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict')) 517 518 if keyword.lower() == 'connection': 519 if value.lower() == 'close': 520 self.close_connection = True 521 elif value.lower() == 'keep-alive': 522 self.close_connection = False 523 524 def end_headers(self): 525 """Send the blank line ending the MIME headers.""" 526 if self.request_version != 'HTTP/0.9': 527 self._headers_buffer.append(b"\r\n") 528 self.flush_headers() 529 530 def flush_headers(self): 531 if hasattr(self, '_headers_buffer'): 532 self.wfile.write(b"".join(self._headers_buffer)) 533 self._headers_buffer = [] 534 535 def log_request(self, code='-', size='-'): 536 """Log an accepted request. 537 538 This is called by send_response(). 539 540 """ 541 if isinstance(code, HTTPStatus): 542 code = code.value 543 self.log_message('"%s" %s %s', 544 self.requestline, str(code), str(size)) 545 546 def log_error(self, format, *args): 547 """Log an error. 548 549 This is called when a request cannot be fulfilled. By 550 default it passes the message on to log_message(). 551 552 Arguments are the same as for log_message(). 553 554 XXX This should go to the separate error log. 555 556 """ 557 558 self.log_message(format, *args) 559 560 def log_message(self, format, *args): 561 """Log an arbitrary message. 562 563 This is used by all other logging functions. Override 564 it if you have specific logging wishes. 565 566 The first argument, FORMAT, is a format string for the 567 message to be logged. If the format string contains 568 any % escapes requiring parameters, they should be 569 specified as subsequent arguments (it's just like 570 printf!). 571 572 The client ip and current date/time are prefixed to 573 every message. 574 575 """ 576 577 sys.stderr.write("%s - - [%s] %s\n" % 578 (self.address_string(), 579 self.log_date_time_string(), 580 format%args)) 581 582 def version_string(self): 583 """Return the server software version string.""" 584 return self.server_version + ' ' + self.sys_version 585 586 def date_time_string(self, timestamp=None): 587 """Return the current date and time formatted for a message header.""" 588 if timestamp is None: 589 timestamp = time.time() 590 return email.utils.formatdate(timestamp, usegmt=True) 591 592 def log_date_time_string(self): 593 """Return the current time formatted for logging.""" 594 now = time.time() 595 year, month, day, hh, mm, ss, x, y, z = time.localtime(now) 596 s = "%02d/%3s/%04d %02d:%02d:%02d" % ( 597 day, self.monthname[month], year, hh, mm, ss) 598 return s 599 600 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] 601 602 monthname = [None, 603 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 604 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] 605 606 def address_string(self): 607 """Return the client address.""" 608 609 return self.client_address[0] 610 611 # Essentially static class variables 612 613 # The version of the HTTP protocol we support. 614 # Set this to HTTP/1.1 to enable automatic keepalive 615 protocol_version = "HTTP/1.0" 616 617 # MessageClass used to parse headers 618 MessageClass = http.client.HTTPMessage 619 620 # hack to maintain backwards compatibility 621 responses = { 622 v: (v.phrase, v.description) 623 for v in HTTPStatus.__members__.values() 624 } 625 626 627class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): 628 629 """Simple HTTP request handler with GET and HEAD commands. 630 631 This serves files from the current directory and any of its 632 subdirectories. The MIME type for files is determined by 633 calling the .guess_type() method. 634 635 The GET and HEAD requests are identical except that the HEAD 636 request omits the actual contents of the file. 637 638 """ 639 640 server_version = "SimpleHTTP/" + __version__ 641 642 def __init__(self, *args, directory=None, **kwargs): 643 if directory is None: 644 directory = os.getcwd() 645 self.directory = directory 646 super().__init__(*args, **kwargs) 647 648 def do_GET(self): 649 """Serve a GET request.""" 650 f = self.send_head() 651 if f: 652 try: 653 self.copyfile(f, self.wfile) 654 finally: 655 f.close() 656 657 def do_HEAD(self): 658 """Serve a HEAD request.""" 659 f = self.send_head() 660 if f: 661 f.close() 662 663 def send_head(self): 664 """Common code for GET and HEAD commands. 665 666 This sends the response code and MIME headers. 667 668 Return value is either a file object (which has to be copied 669 to the outputfile by the caller unless the command was HEAD, 670 and must be closed by the caller under all circumstances), or 671 None, in which case the caller has nothing further to do. 672 673 """ 674 path = self.translate_path(self.path) 675 f = None 676 if os.path.isdir(path): 677 parts = urllib.parse.urlsplit(self.path) 678 if not parts.path.endswith('/'): 679 # redirect browser - doing basically what apache does 680 self.send_response(HTTPStatus.MOVED_PERMANENTLY) 681 new_parts = (parts[0], parts[1], parts[2] + '/', 682 parts[3], parts[4]) 683 new_url = urllib.parse.urlunsplit(new_parts) 684 self.send_header("Location", new_url) 685 self.end_headers() 686 return None 687 for index in "index.html", "index.htm": 688 index = os.path.join(path, index) 689 if os.path.exists(index): 690 path = index 691 break 692 else: 693 return self.list_directory(path) 694 ctype = self.guess_type(path) 695 # check for trailing "/" which should return 404. See Issue17324 696 # The test for this was added in test_httpserver.py 697 # However, some OS platforms accept a trailingSlash as a filename 698 # See discussion on python-dev and Issue34711 regarding 699 # parseing and rejection of filenames with a trailing slash 700 if path.endswith("/"): 701 self.send_error(HTTPStatus.NOT_FOUND, "File not found") 702 return None 703 try: 704 f = open(path, 'rb') 705 except OSError: 706 self.send_error(HTTPStatus.NOT_FOUND, "File not found") 707 return None 708 709 try: 710 fs = os.fstat(f.fileno()) 711 # Use browser cache if possible 712 if ("If-Modified-Since" in self.headers 713 and "If-None-Match" not in self.headers): 714 # compare If-Modified-Since and time of last file modification 715 try: 716 ims = email.utils.parsedate_to_datetime( 717 self.headers["If-Modified-Since"]) 718 except (TypeError, IndexError, OverflowError, ValueError): 719 # ignore ill-formed values 720 pass 721 else: 722 if ims.tzinfo is None: 723 # obsolete format with no timezone, cf. 724 # https://tools.ietf.org/html/rfc7231#section-7.1.1.1 725 ims = ims.replace(tzinfo=datetime.timezone.utc) 726 if ims.tzinfo is datetime.timezone.utc: 727 # compare to UTC datetime of last modification 728 last_modif = datetime.datetime.fromtimestamp( 729 fs.st_mtime, datetime.timezone.utc) 730 # remove microseconds, like in If-Modified-Since 731 last_modif = last_modif.replace(microsecond=0) 732 733 if last_modif <= ims: 734 self.send_response(HTTPStatus.NOT_MODIFIED) 735 self.end_headers() 736 f.close() 737 return None 738 739 self.send_response(HTTPStatus.OK) 740 self.send_header("Content-type", ctype) 741 self.send_header("Content-Length", str(fs[6])) 742 self.send_header("Last-Modified", 743 self.date_time_string(fs.st_mtime)) 744 self.end_headers() 745 return f 746 except: 747 f.close() 748 raise 749 750 def list_directory(self, path): 751 """Helper to produce a directory listing (absent index.html). 752 753 Return value is either a file object, or None (indicating an 754 error). In either case, the headers are sent, making the 755 interface the same as for send_head(). 756 757 """ 758 try: 759 list = os.listdir(path) 760 except OSError: 761 self.send_error( 762 HTTPStatus.NOT_FOUND, 763 "No permission to list directory") 764 return None 765 list.sort(key=lambda a: a.lower()) 766 r = [] 767 try: 768 displaypath = urllib.parse.unquote(self.path, 769 errors='surrogatepass') 770 except UnicodeDecodeError: 771 displaypath = urllib.parse.unquote(path) 772 displaypath = html.escape(displaypath, quote=False) 773 enc = sys.getfilesystemencoding() 774 title = 'Directory listing for %s' % displaypath 775 r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' 776 '"http://www.w3.org/TR/html4/strict.dtd">') 777 r.append('<html>\n<head>') 778 r.append('<meta http-equiv="Content-Type" ' 779 'content="text/html; charset=%s">' % enc) 780 r.append('<title>%s</title>\n</head>' % title) 781 r.append('<body>\n<h1>%s</h1>' % title) 782 r.append('<hr>\n<ul>') 783 for name in list: 784 fullname = os.path.join(path, name) 785 displayname = linkname = name 786 # Append / for directories or @ for symbolic links 787 if os.path.isdir(fullname): 788 displayname = name + "/" 789 linkname = name + "/" 790 if os.path.islink(fullname): 791 displayname = name + "@" 792 # Note: a link to a directory displays with @ and links with / 793 r.append('<li><a href="%s">%s</a></li>' 794 % (urllib.parse.quote(linkname, 795 errors='surrogatepass'), 796 html.escape(displayname, quote=False))) 797 r.append('</ul>\n<hr>\n</body>\n</html>\n') 798 encoded = '\n'.join(r).encode(enc, 'surrogateescape') 799 f = io.BytesIO() 800 f.write(encoded) 801 f.seek(0) 802 self.send_response(HTTPStatus.OK) 803 self.send_header("Content-type", "text/html; charset=%s" % enc) 804 self.send_header("Content-Length", str(len(encoded))) 805 self.end_headers() 806 return f 807 808 def translate_path(self, path): 809 """Translate a /-separated PATH to the local filename syntax. 810 811 Components that mean special things to the local file system 812 (e.g. drive or directory names) are ignored. (XXX They should 813 probably be diagnosed.) 814 815 """ 816 # abandon query parameters 817 path = path.split('?',1)[0] 818 path = path.split('#',1)[0] 819 # Don't forget explicit trailing slash when normalizing. Issue17324 820 trailing_slash = path.rstrip().endswith('/') 821 try: 822 path = urllib.parse.unquote(path, errors='surrogatepass') 823 except UnicodeDecodeError: 824 path = urllib.parse.unquote(path) 825 path = posixpath.normpath(path) 826 words = path.split('/') 827 words = filter(None, words) 828 path = self.directory 829 for word in words: 830 if os.path.dirname(word) or word in (os.curdir, os.pardir): 831 # Ignore components that are not a simple file/directory name 832 continue 833 path = os.path.join(path, word) 834 if trailing_slash: 835 path += '/' 836 return path 837 838 def copyfile(self, source, outputfile): 839 """Copy all data between two file objects. 840 841 The SOURCE argument is a file object open for reading 842 (or anything with a read() method) and the DESTINATION 843 argument is a file object open for writing (or 844 anything with a write() method). 845 846 The only reason for overriding this would be to change 847 the block size or perhaps to replace newlines by CRLF 848 -- note however that this the default server uses this 849 to copy binary data as well. 850 851 """ 852 shutil.copyfileobj(source, outputfile) 853 854 def guess_type(self, path): 855 """Guess the type of a file. 856 857 Argument is a PATH (a filename). 858 859 Return value is a string of the form type/subtype, 860 usable for a MIME Content-type header. 861 862 The default implementation looks the file's extension 863 up in the table self.extensions_map, using application/octet-stream 864 as a default; however it would be permissible (if 865 slow) to look inside the data to make a better guess. 866 867 """ 868 869 base, ext = posixpath.splitext(path) 870 if ext in self.extensions_map: 871 return self.extensions_map[ext] 872 ext = ext.lower() 873 if ext in self.extensions_map: 874 return self.extensions_map[ext] 875 else: 876 return self.extensions_map[''] 877 878 if not mimetypes.inited: 879 mimetypes.init() # try to read system mime.types 880 extensions_map = mimetypes.types_map.copy() 881 extensions_map.update({ 882 '': 'application/octet-stream', # Default 883 '.py': 'text/plain', 884 '.c': 'text/plain', 885 '.h': 'text/plain', 886 }) 887 888 889# Utilities for CGIHTTPRequestHandler 890 891def _url_collapse_path(path): 892 """ 893 Given a URL path, remove extra '/'s and '.' path elements and collapse 894 any '..' references and returns a collapsed path. 895 896 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. 897 The utility of this function is limited to is_cgi method and helps 898 preventing some security attacks. 899 900 Returns: The reconstituted URL, which will always start with a '/'. 901 902 Raises: IndexError if too many '..' occur within the path. 903 904 """ 905 # Query component should not be involved. 906 path, _, query = path.partition('?') 907 path = urllib.parse.unquote(path) 908 909 # Similar to os.path.split(os.path.normpath(path)) but specific to URL 910 # path semantics rather than local operating system semantics. 911 path_parts = path.split('/') 912 head_parts = [] 913 for part in path_parts[:-1]: 914 if part == '..': 915 head_parts.pop() # IndexError if more '..' than prior parts 916 elif part and part != '.': 917 head_parts.append( part ) 918 if path_parts: 919 tail_part = path_parts.pop() 920 if tail_part: 921 if tail_part == '..': 922 head_parts.pop() 923 tail_part = '' 924 elif tail_part == '.': 925 tail_part = '' 926 else: 927 tail_part = '' 928 929 if query: 930 tail_part = '?'.join((tail_part, query)) 931 932 splitpath = ('/' + '/'.join(head_parts), tail_part) 933 collapsed_path = "/".join(splitpath) 934 935 return collapsed_path 936 937 938 939nobody = None 940 941def nobody_uid(): 942 """Internal routine to get nobody's uid""" 943 global nobody 944 if nobody: 945 return nobody 946 try: 947 import pwd 948 except ImportError: 949 return -1 950 try: 951 nobody = pwd.getpwnam('nobody')[2] 952 except KeyError: 953 nobody = 1 + max(x[2] for x in pwd.getpwall()) 954 return nobody 955 956 957def executable(path): 958 """Test for executable file.""" 959 return os.access(path, os.X_OK) 960 961 962class CGIHTTPRequestHandler(SimpleHTTPRequestHandler): 963 964 """Complete HTTP server with GET, HEAD and POST commands. 965 966 GET and HEAD also support running CGI scripts. 967 968 The POST command is *only* implemented for CGI scripts. 969 970 """ 971 972 # Determine platform specifics 973 have_fork = hasattr(os, 'fork') 974 975 # Make rfile unbuffered -- we need to read one line and then pass 976 # the rest to a subprocess, so we can't use buffered input. 977 rbufsize = 0 978 979 def do_POST(self): 980 """Serve a POST request. 981 982 This is only implemented for CGI scripts. 983 984 """ 985 986 if self.is_cgi(): 987 self.run_cgi() 988 else: 989 self.send_error( 990 HTTPStatus.NOT_IMPLEMENTED, 991 "Can only POST to CGI scripts") 992 993 def send_head(self): 994 """Version of send_head that support CGI scripts""" 995 if self.is_cgi(): 996 return self.run_cgi() 997 else: 998 return SimpleHTTPRequestHandler.send_head(self) 999 1000 def is_cgi(self): 1001 """Test whether self.path corresponds to a CGI script. 1002 1003 Returns True and updates the cgi_info attribute to the tuple 1004 (dir, rest) if self.path requires running a CGI script. 1005 Returns False otherwise. 1006 1007 If any exception is raised, the caller should assume that 1008 self.path was rejected as invalid and act accordingly. 1009 1010 The default implementation tests whether the normalized url 1011 path begins with one of the strings in self.cgi_directories 1012 (and the next character is a '/' or the end of the string). 1013 1014 """ 1015 collapsed_path = _url_collapse_path(self.path) 1016 dir_sep = collapsed_path.find('/', 1) 1017 head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:] 1018 if head in self.cgi_directories: 1019 self.cgi_info = head, tail 1020 return True 1021 return False 1022 1023 1024 cgi_directories = ['/cgi-bin', '/htbin'] 1025 1026 def is_executable(self, path): 1027 """Test whether argument path is an executable file.""" 1028 return executable(path) 1029 1030 def is_python(self, path): 1031 """Test whether argument path is a Python script.""" 1032 head, tail = os.path.splitext(path) 1033 return tail.lower() in (".py", ".pyw") 1034 1035 def run_cgi(self): 1036 """Execute a CGI script.""" 1037 dir, rest = self.cgi_info 1038 path = dir + '/' + rest 1039 i = path.find('/', len(dir)+1) 1040 while i >= 0: 1041 nextdir = path[:i] 1042 nextrest = path[i+1:] 1043 1044 scriptdir = self.translate_path(nextdir) 1045 if os.path.isdir(scriptdir): 1046 dir, rest = nextdir, nextrest 1047 i = path.find('/', len(dir)+1) 1048 else: 1049 break 1050 1051 # find an explicit query string, if present. 1052 rest, _, query = rest.partition('?') 1053 1054 # dissect the part after the directory name into a script name & 1055 # a possible additional path, to be stored in PATH_INFO. 1056 i = rest.find('/') 1057 if i >= 0: 1058 script, rest = rest[:i], rest[i:] 1059 else: 1060 script, rest = rest, '' 1061 1062 scriptname = dir + '/' + script 1063 scriptfile = self.translate_path(scriptname) 1064 if not os.path.exists(scriptfile): 1065 self.send_error( 1066 HTTPStatus.NOT_FOUND, 1067 "No such CGI script (%r)" % scriptname) 1068 return 1069 if not os.path.isfile(scriptfile): 1070 self.send_error( 1071 HTTPStatus.FORBIDDEN, 1072 "CGI script is not a plain file (%r)" % scriptname) 1073 return 1074 ispy = self.is_python(scriptname) 1075 if self.have_fork or not ispy: 1076 if not self.is_executable(scriptfile): 1077 self.send_error( 1078 HTTPStatus.FORBIDDEN, 1079 "CGI script is not executable (%r)" % scriptname) 1080 return 1081 1082 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html 1083 # XXX Much of the following could be prepared ahead of time! 1084 env = copy.deepcopy(os.environ) 1085 env['SERVER_SOFTWARE'] = self.version_string() 1086 env['SERVER_NAME'] = self.server.server_name 1087 env['GATEWAY_INTERFACE'] = 'CGI/1.1' 1088 env['SERVER_PROTOCOL'] = self.protocol_version 1089 env['SERVER_PORT'] = str(self.server.server_port) 1090 env['REQUEST_METHOD'] = self.command 1091 uqrest = urllib.parse.unquote(rest) 1092 env['PATH_INFO'] = uqrest 1093 env['PATH_TRANSLATED'] = self.translate_path(uqrest) 1094 env['SCRIPT_NAME'] = scriptname 1095 if query: 1096 env['QUERY_STRING'] = query 1097 env['REMOTE_ADDR'] = self.client_address[0] 1098 authorization = self.headers.get("authorization") 1099 if authorization: 1100 authorization = authorization.split() 1101 if len(authorization) == 2: 1102 import base64, binascii 1103 env['AUTH_TYPE'] = authorization[0] 1104 if authorization[0].lower() == "basic": 1105 try: 1106 authorization = authorization[1].encode('ascii') 1107 authorization = base64.decodebytes(authorization).\ 1108 decode('ascii') 1109 except (binascii.Error, UnicodeError): 1110 pass 1111 else: 1112 authorization = authorization.split(':') 1113 if len(authorization) == 2: 1114 env['REMOTE_USER'] = authorization[0] 1115 # XXX REMOTE_IDENT 1116 if self.headers.get('content-type') is None: 1117 env['CONTENT_TYPE'] = self.headers.get_content_type() 1118 else: 1119 env['CONTENT_TYPE'] = self.headers['content-type'] 1120 length = self.headers.get('content-length') 1121 if length: 1122 env['CONTENT_LENGTH'] = length 1123 referer = self.headers.get('referer') 1124 if referer: 1125 env['HTTP_REFERER'] = referer 1126 accept = [] 1127 for line in self.headers.getallmatchingheaders('accept'): 1128 if line[:1] in "\t\n\r ": 1129 accept.append(line.strip()) 1130 else: 1131 accept = accept + line[7:].split(',') 1132 env['HTTP_ACCEPT'] = ','.join(accept) 1133 ua = self.headers.get('user-agent') 1134 if ua: 1135 env['HTTP_USER_AGENT'] = ua 1136 co = filter(None, self.headers.get_all('cookie', [])) 1137 cookie_str = ', '.join(co) 1138 if cookie_str: 1139 env['HTTP_COOKIE'] = cookie_str 1140 # XXX Other HTTP_* headers 1141 # Since we're setting the env in the parent, provide empty 1142 # values to override previously set values 1143 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', 1144 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): 1145 env.setdefault(k, "") 1146 1147 self.send_response(HTTPStatus.OK, "Script output follows") 1148 self.flush_headers() 1149 1150 decoded_query = query.replace('+', ' ') 1151 1152 if self.have_fork: 1153 # Unix -- fork as we should 1154 args = [script] 1155 if '=' not in decoded_query: 1156 args.append(decoded_query) 1157 nobody = nobody_uid() 1158 self.wfile.flush() # Always flush before forking 1159 pid = os.fork() 1160 if pid != 0: 1161 # Parent 1162 pid, sts = os.waitpid(pid, 0) 1163 # throw away additional data [see bug #427345] 1164 while select.select([self.rfile], [], [], 0)[0]: 1165 if not self.rfile.read(1): 1166 break 1167 if sts: 1168 self.log_error("CGI script exit status %#x", sts) 1169 return 1170 # Child 1171 try: 1172 try: 1173 os.setuid(nobody) 1174 except OSError: 1175 pass 1176 os.dup2(self.rfile.fileno(), 0) 1177 os.dup2(self.wfile.fileno(), 1) 1178 os.execve(scriptfile, args, env) 1179 except: 1180 self.server.handle_error(self.request, self.client_address) 1181 os._exit(127) 1182 1183 else: 1184 # Non-Unix -- use subprocess 1185 import subprocess 1186 cmdline = [scriptfile] 1187 if self.is_python(scriptfile): 1188 interp = sys.executable 1189 if interp.lower().endswith("w.exe"): 1190 # On Windows, use python.exe, not pythonw.exe 1191 interp = interp[:-5] + interp[-4:] 1192 cmdline = [interp, '-u'] + cmdline 1193 if '=' not in query: 1194 cmdline.append(query) 1195 self.log_message("command: %s", subprocess.list2cmdline(cmdline)) 1196 try: 1197 nbytes = int(length) 1198 except (TypeError, ValueError): 1199 nbytes = 0 1200 p = subprocess.Popen(cmdline, 1201 stdin=subprocess.PIPE, 1202 stdout=subprocess.PIPE, 1203 stderr=subprocess.PIPE, 1204 env = env 1205 ) 1206 if self.command.lower() == "post" and nbytes > 0: 1207 data = self.rfile.read(nbytes) 1208 else: 1209 data = None 1210 # throw away additional data [see bug #427345] 1211 while select.select([self.rfile._sock], [], [], 0)[0]: 1212 if not self.rfile._sock.recv(1): 1213 break 1214 stdout, stderr = p.communicate(data) 1215 self.wfile.write(stdout) 1216 if stderr: 1217 self.log_error('%s', stderr) 1218 p.stderr.close() 1219 p.stdout.close() 1220 status = p.returncode 1221 if status: 1222 self.log_error("CGI script exit status %#x", status) 1223 else: 1224 self.log_message("CGI script exited OK") 1225 1226 1227def _get_best_family(*address): 1228 infos = socket.getaddrinfo( 1229 *address, 1230 type=socket.SOCK_STREAM, 1231 flags=socket.AI_PASSIVE, 1232 ) 1233 family, type, proto, canonname, sockaddr = next(iter(infos)) 1234 return family, sockaddr 1235 1236 1237def test(HandlerClass=BaseHTTPRequestHandler, 1238 ServerClass=ThreadingHTTPServer, 1239 protocol="HTTP/1.0", port=8000, bind=None): 1240 """Test the HTTP request handler class. 1241 1242 This runs an HTTP server on port 8000 (or the port argument). 1243 1244 """ 1245 ServerClass.address_family, addr = _get_best_family(bind, port) 1246 1247 HandlerClass.protocol_version = protocol 1248 with ServerClass(addr, HandlerClass) as httpd: 1249 host, port = httpd.socket.getsockname()[:2] 1250 url_host = f'[{host}]' if ':' in host else host 1251 print( 1252 f"Serving HTTP on {host} port {port} " 1253 f"(http://{url_host}:{port}/) ..." 1254 ) 1255 try: 1256 httpd.serve_forever() 1257 except KeyboardInterrupt: 1258 print("\nKeyboard interrupt received, exiting.") 1259 sys.exit(0) 1260 1261if __name__ == '__main__': 1262 import argparse 1263 1264 parser = argparse.ArgumentParser() 1265 parser.add_argument('--cgi', action='store_true', 1266 help='Run as CGI Server') 1267 parser.add_argument('--bind', '-b', metavar='ADDRESS', 1268 help='Specify alternate bind address ' 1269 '[default: all interfaces]') 1270 parser.add_argument('--directory', '-d', default=os.getcwd(), 1271 help='Specify alternative directory ' 1272 '[default:current directory]') 1273 parser.add_argument('port', action='store', 1274 default=8000, type=int, 1275 nargs='?', 1276 help='Specify alternate port [default: 8000]') 1277 args = parser.parse_args() 1278 if args.cgi: 1279 handler_class = CGIHTTPRequestHandler 1280 else: 1281 handler_class = partial(SimpleHTTPRequestHandler, 1282 directory=args.directory) 1283 test(HandlerClass=handler_class, port=args.port, bind=args.bind) 1284