1"""HTTP server classes. 2 3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see 4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, 5and CGIHTTPRequestHandler for CGI scripts. 6 7It does, however, optionally implement HTTP/1.1 persistent connections, 8as of version 0.3. 9 10Notes on CGIHTTPRequestHandler 11------------------------------ 12 13This class implements GET and POST requests to cgi-bin scripts. 14 15If the os.fork() function is not present (e.g. on Windows), 16subprocess.Popen() is used as a fallback, with slightly altered semantics. 17 18In all cases, the implementation is intentionally naive -- all 19requests are executed synchronously. 20 21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL 22-- it may execute arbitrary Python code or external programs. 23 24Note that status code 200 is sent prior to execution of a CGI script, so 25scripts cannot send other status codes such as 302 (redirect). 26 27XXX To do: 28 29- log requests even later (to capture byte count) 30- log user-agent header and other interesting goodies 31- send error log to separate file 32""" 33 34 35# See also: 36# 37# HTTP Working Group T. Berners-Lee 38# INTERNET-DRAFT R. T. Fielding 39# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen 40# Expires September 8, 1995 March 8, 1995 41# 42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt 43# 44# and 45# 46# Network Working Group R. Fielding 47# Request for Comments: 2616 et al 48# Obsoletes: 2068 June 1999 49# Category: Standards Track 50# 51# URL: http://www.faqs.org/rfcs/rfc2616.html 52 53# Log files 54# --------- 55# 56# Here's a quote from the NCSA httpd docs about log file format. 57# 58# | The logfile format is as follows. Each line consists of: 59# | 60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb 61# | 62# | host: Either the DNS name or the IP number of the remote client 63# | rfc931: Any information returned by identd for this person, 64# | - otherwise. 65# | authuser: If user sent a userid for authentication, the user name, 66# | - otherwise. 67# | DD: Day 68# | Mon: Month (calendar name) 69# | YYYY: Year 70# | hh: hour (24-hour format, the machine's timezone) 71# | mm: minutes 72# | ss: seconds 73# | request: The first line of the HTTP request as sent by the client. 74# | ddd: the status code returned by the server, - if not available. 75# | bbbb: the total number of bytes sent, 76# | *not including the HTTP/1.0 header*, - if not available 77# | 78# | You can determine the name of the file accessed through request. 79# 80# (Actually, the latter is only true if you know the server configuration 81# at the time the request was made!) 82 83__version__ = "0.6" 84 85__all__ = [ 86 "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler", 87 "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler", 88] 89 90import copy 91import datetime 92import email.utils 93import html 94import http.client 95import io 96import mimetypes 97import os 98import posixpath 99import select 100import shutil 101import socket # For gethostbyaddr() 102import socketserver 103import sys 104import time 105import urllib.parse 106from functools import partial 107 108from http import HTTPStatus 109 110 111# Default error message template 112DEFAULT_ERROR_MESSAGE = """\ 113<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" 114 "http://www.w3.org/TR/html4/strict.dtd"> 115<html> 116 <head> 117 <meta http-equiv="Content-Type" content="text/html;charset=utf-8"> 118 <title>Error response</title> 119 </head> 120 <body> 121 <h1>Error response</h1> 122 <p>Error code: %(code)d</p> 123 <p>Message: %(message)s.</p> 124 <p>Error code explanation: %(code)s - %(explain)s.</p> 125 </body> 126</html> 127""" 128 129DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" 130 131class HTTPServer(socketserver.TCPServer): 132 133 allow_reuse_address = 1 # Seems to make sense in testing environment 134 135 def server_bind(self): 136 """Override server_bind to store the server name.""" 137 socketserver.TCPServer.server_bind(self) 138 host, port = self.server_address[:2] 139 self.server_name = socket.getfqdn(host) 140 self.server_port = port 141 142 143class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer): 144 daemon_threads = True 145 146 147class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): 148 149 """HTTP request handler base class. 150 151 The following explanation of HTTP serves to guide you through the 152 code as well as to expose any misunderstandings I may have about 153 HTTP (so you don't need to read the code to figure out I'm wrong 154 :-). 155 156 HTTP (HyperText Transfer Protocol) is an extensible protocol on 157 top of a reliable stream transport (e.g. TCP/IP). The protocol 158 recognizes three parts to a request: 159 160 1. One line identifying the request type and path 161 2. An optional set of RFC-822-style headers 162 3. An optional data part 163 164 The headers and data are separated by a blank line. 165 166 The first line of the request has the form 167 168 <command> <path> <version> 169 170 where <command> is a (case-sensitive) keyword such as GET or POST, 171 <path> is a string containing path information for the request, 172 and <version> should be the string "HTTP/1.0" or "HTTP/1.1". 173 <path> is encoded using the URL encoding scheme (using %xx to signify 174 the ASCII character with hex code xx). 175 176 The specification specifies that lines are separated by CRLF but 177 for compatibility with the widest range of clients recommends 178 servers also handle LF. Similarly, whitespace in the request line 179 is treated sensibly (allowing multiple spaces between components 180 and allowing trailing whitespace). 181 182 Similarly, for output, lines ought to be separated by CRLF pairs 183 but most clients grok LF characters just fine. 184 185 If the first line of the request has the form 186 187 <command> <path> 188 189 (i.e. <version> is left out) then this is assumed to be an HTTP 190 0.9 request; this form has no optional headers and data part and 191 the reply consists of just the data. 192 193 The reply form of the HTTP 1.x protocol again has three parts: 194 195 1. One line giving the response code 196 2. An optional set of RFC-822-style headers 197 3. The data 198 199 Again, the headers and data are separated by a blank line. 200 201 The response code line has the form 202 203 <version> <responsecode> <responsestring> 204 205 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"), 206 <responsecode> is a 3-digit response code indicating success or 207 failure of the request, and <responsestring> is an optional 208 human-readable string explaining what the response code means. 209 210 This server parses the request and the headers, and then calls a 211 function specific to the request type (<command>). Specifically, 212 a request SPAM will be handled by a method do_SPAM(). If no 213 such method exists the server sends an error response to the 214 client. If it exists, it is called with no arguments: 215 216 do_SPAM() 217 218 Note that the request name is case sensitive (i.e. SPAM and spam 219 are different requests). 220 221 The various request details are stored in instance variables: 222 223 - client_address is the client IP address in the form (host, 224 port); 225 226 - command, path and version are the broken-down request line; 227 228 - headers is an instance of email.message.Message (or a derived 229 class) containing the header information; 230 231 - rfile is a file object open for reading positioned at the 232 start of the optional input data part; 233 234 - wfile is a file object open for writing. 235 236 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! 237 238 The first thing to be written must be the response line. Then 239 follow 0 or more header lines, then a blank line, and then the 240 actual data (if any). The meaning of the header lines depends on 241 the command executed by the server; in most cases, when data is 242 returned, there should be at least one header line of the form 243 244 Content-type: <type>/<subtype> 245 246 where <type> and <subtype> should be registered MIME types, 247 e.g. "text/html" or "text/plain". 248 249 """ 250 251 # The Python system version, truncated to its first component. 252 sys_version = "Python/" + sys.version.split()[0] 253 254 # The server software version. You may want to override this. 255 # The format is multiple whitespace-separated strings, 256 # where each string is of the form name[/version]. 257 server_version = "BaseHTTP/" + __version__ 258 259 error_message_format = DEFAULT_ERROR_MESSAGE 260 error_content_type = DEFAULT_ERROR_CONTENT_TYPE 261 262 # The default request version. This only affects responses up until 263 # the point where the request line is parsed, so it mainly decides what 264 # the client gets back when sending a malformed request line. 265 # Most web servers default to HTTP 0.9, i.e. don't send a status line. 266 default_request_version = "HTTP/0.9" 267 268 def parse_request(self): 269 """Parse a request (internal). 270 271 The request should be stored in self.raw_requestline; the results 272 are in self.command, self.path, self.request_version and 273 self.headers. 274 275 Return True for success, False for failure; on failure, any relevant 276 error response has already been sent back. 277 278 """ 279 self.command = None # set in case of error on the first line 280 self.request_version = version = self.default_request_version 281 self.close_connection = True 282 requestline = str(self.raw_requestline, 'iso-8859-1') 283 requestline = requestline.rstrip('\r\n') 284 self.requestline = requestline 285 words = requestline.split() 286 if len(words) == 0: 287 return False 288 289 if len(words) >= 3: # Enough to determine protocol version 290 version = words[-1] 291 try: 292 if not version.startswith('HTTP/'): 293 raise ValueError 294 base_version_number = version.split('/', 1)[1] 295 version_number = base_version_number.split(".") 296 # RFC 2145 section 3.1 says there can be only one "." and 297 # - major and minor numbers MUST be treated as 298 # separate integers; 299 # - HTTP/2.4 is a lower version than HTTP/2.13, which in 300 # turn is lower than HTTP/12.3; 301 # - Leading zeros MUST be ignored by recipients. 302 if len(version_number) != 2: 303 raise ValueError 304 version_number = int(version_number[0]), int(version_number[1]) 305 except (ValueError, IndexError): 306 self.send_error( 307 HTTPStatus.BAD_REQUEST, 308 "Bad request version (%r)" % version) 309 return False 310 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": 311 self.close_connection = False 312 if version_number >= (2, 0): 313 self.send_error( 314 HTTPStatus.HTTP_VERSION_NOT_SUPPORTED, 315 "Invalid HTTP version (%s)" % base_version_number) 316 return False 317 self.request_version = version 318 319 if not 2 <= len(words) <= 3: 320 self.send_error( 321 HTTPStatus.BAD_REQUEST, 322 "Bad request syntax (%r)" % requestline) 323 return False 324 command, path = words[:2] 325 if len(words) == 2: 326 self.close_connection = True 327 if command != 'GET': 328 self.send_error( 329 HTTPStatus.BAD_REQUEST, 330 "Bad HTTP/0.9 request type (%r)" % command) 331 return False 332 self.command, self.path = command, path 333 334 # Examine the headers and look for a Connection directive. 335 try: 336 self.headers = http.client.parse_headers(self.rfile, 337 _class=self.MessageClass) 338 except http.client.LineTooLong as err: 339 self.send_error( 340 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, 341 "Line too long", 342 str(err)) 343 return False 344 except http.client.HTTPException as err: 345 self.send_error( 346 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, 347 "Too many headers", 348 str(err) 349 ) 350 return False 351 352 conntype = self.headers.get('Connection', "") 353 if conntype.lower() == 'close': 354 self.close_connection = True 355 elif (conntype.lower() == 'keep-alive' and 356 self.protocol_version >= "HTTP/1.1"): 357 self.close_connection = False 358 # Examine the headers and look for an Expect directive 359 expect = self.headers.get('Expect', "") 360 if (expect.lower() == "100-continue" and 361 self.protocol_version >= "HTTP/1.1" and 362 self.request_version >= "HTTP/1.1"): 363 if not self.handle_expect_100(): 364 return False 365 return True 366 367 def handle_expect_100(self): 368 """Decide what to do with an "Expect: 100-continue" header. 369 370 If the client is expecting a 100 Continue response, we must 371 respond with either a 100 Continue or a final response before 372 waiting for the request body. The default is to always respond 373 with a 100 Continue. You can behave differently (for example, 374 reject unauthorized requests) by overriding this method. 375 376 This method should either return True (possibly after sending 377 a 100 Continue response) or send an error response and return 378 False. 379 380 """ 381 self.send_response_only(HTTPStatus.CONTINUE) 382 self.end_headers() 383 return True 384 385 def handle_one_request(self): 386 """Handle a single HTTP request. 387 388 You normally don't need to override this method; see the class 389 __doc__ string for information on how to handle specific HTTP 390 commands such as GET and POST. 391 392 """ 393 try: 394 self.raw_requestline = self.rfile.readline(65537) 395 if len(self.raw_requestline) > 65536: 396 self.requestline = '' 397 self.request_version = '' 398 self.command = '' 399 self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG) 400 return 401 if not self.raw_requestline: 402 self.close_connection = True 403 return 404 if not self.parse_request(): 405 # An error code has been sent, just exit 406 return 407 mname = 'do_' + self.command 408 if not hasattr(self, mname): 409 self.send_error( 410 HTTPStatus.NOT_IMPLEMENTED, 411 "Unsupported method (%r)" % self.command) 412 return 413 method = getattr(self, mname) 414 method() 415 self.wfile.flush() #actually send the response if not already done. 416 except socket.timeout as e: 417 #a read or a write timed out. Discard this connection 418 self.log_error("Request timed out: %r", e) 419 self.close_connection = True 420 return 421 422 def handle(self): 423 """Handle multiple requests if necessary.""" 424 self.close_connection = True 425 426 self.handle_one_request() 427 while not self.close_connection: 428 self.handle_one_request() 429 430 def send_error(self, code, message=None, explain=None): 431 """Send and log an error reply. 432 433 Arguments are 434 * code: an HTTP error code 435 3 digits 436 * message: a simple optional 1 line reason phrase. 437 *( HTAB / SP / VCHAR / %x80-FF ) 438 defaults to short entry matching the response code 439 * explain: a detailed message defaults to the long entry 440 matching the response code. 441 442 This sends an error response (so it must be called before any 443 output has been generated), logs the error, and finally sends 444 a piece of HTML explaining the error to the user. 445 446 """ 447 448 try: 449 shortmsg, longmsg = self.responses[code] 450 except KeyError: 451 shortmsg, longmsg = '???', '???' 452 if message is None: 453 message = shortmsg 454 if explain is None: 455 explain = longmsg 456 self.log_error("code %d, message %s", code, message) 457 self.send_response(code, message) 458 self.send_header('Connection', 'close') 459 460 # Message body is omitted for cases described in: 461 # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified) 462 # - RFC7231: 6.3.6. 205(Reset Content) 463 body = None 464 if (code >= 200 and 465 code not in (HTTPStatus.NO_CONTENT, 466 HTTPStatus.RESET_CONTENT, 467 HTTPStatus.NOT_MODIFIED)): 468 # HTML encode to prevent Cross Site Scripting attacks 469 # (see bug #1100201) 470 content = (self.error_message_format % { 471 'code': code, 472 'message': html.escape(message, quote=False), 473 'explain': html.escape(explain, quote=False) 474 }) 475 body = content.encode('UTF-8', 'replace') 476 self.send_header("Content-Type", self.error_content_type) 477 self.send_header('Content-Length', str(len(body))) 478 self.end_headers() 479 480 if self.command != 'HEAD' and body: 481 self.wfile.write(body) 482 483 def send_response(self, code, message=None): 484 """Add the response header to the headers buffer and log the 485 response code. 486 487 Also send two standard headers with the server software 488 version and the current date. 489 490 """ 491 self.log_request(code) 492 self.send_response_only(code, message) 493 self.send_header('Server', self.version_string()) 494 self.send_header('Date', self.date_time_string()) 495 496 def send_response_only(self, code, message=None): 497 """Send the response header only.""" 498 if self.request_version != 'HTTP/0.9': 499 if message is None: 500 if code in self.responses: 501 message = self.responses[code][0] 502 else: 503 message = '' 504 if not hasattr(self, '_headers_buffer'): 505 self._headers_buffer = [] 506 self._headers_buffer.append(("%s %d %s\r\n" % 507 (self.protocol_version, code, message)).encode( 508 'latin-1', 'strict')) 509 510 def send_header(self, keyword, value): 511 """Send a MIME header to the headers buffer.""" 512 if self.request_version != 'HTTP/0.9': 513 if not hasattr(self, '_headers_buffer'): 514 self._headers_buffer = [] 515 self._headers_buffer.append( 516 ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict')) 517 518 if keyword.lower() == 'connection': 519 if value.lower() == 'close': 520 self.close_connection = True 521 elif value.lower() == 'keep-alive': 522 self.close_connection = False 523 524 def end_headers(self): 525 """Send the blank line ending the MIME headers.""" 526 if self.request_version != 'HTTP/0.9': 527 self._headers_buffer.append(b"\r\n") 528 self.flush_headers() 529 530 def flush_headers(self): 531 if hasattr(self, '_headers_buffer'): 532 self.wfile.write(b"".join(self._headers_buffer)) 533 self._headers_buffer = [] 534 535 def log_request(self, code='-', size='-'): 536 """Log an accepted request. 537 538 This is called by send_response(). 539 540 """ 541 if isinstance(code, HTTPStatus): 542 code = code.value 543 self.log_message('"%s" %s %s', 544 self.requestline, str(code), str(size)) 545 546 def log_error(self, format, *args): 547 """Log an error. 548 549 This is called when a request cannot be fulfilled. By 550 default it passes the message on to log_message(). 551 552 Arguments are the same as for log_message(). 553 554 XXX This should go to the separate error log. 555 556 """ 557 558 self.log_message(format, *args) 559 560 def log_message(self, format, *args): 561 """Log an arbitrary message. 562 563 This is used by all other logging functions. Override 564 it if you have specific logging wishes. 565 566 The first argument, FORMAT, is a format string for the 567 message to be logged. If the format string contains 568 any % escapes requiring parameters, they should be 569 specified as subsequent arguments (it's just like 570 printf!). 571 572 The client ip and current date/time are prefixed to 573 every message. 574 575 """ 576 577 sys.stderr.write("%s - - [%s] %s\n" % 578 (self.address_string(), 579 self.log_date_time_string(), 580 format%args)) 581 582 def version_string(self): 583 """Return the server software version string.""" 584 return self.server_version + ' ' + self.sys_version 585 586 def date_time_string(self, timestamp=None): 587 """Return the current date and time formatted for a message header.""" 588 if timestamp is None: 589 timestamp = time.time() 590 return email.utils.formatdate(timestamp, usegmt=True) 591 592 def log_date_time_string(self): 593 """Return the current time formatted for logging.""" 594 now = time.time() 595 year, month, day, hh, mm, ss, x, y, z = time.localtime(now) 596 s = "%02d/%3s/%04d %02d:%02d:%02d" % ( 597 day, self.monthname[month], year, hh, mm, ss) 598 return s 599 600 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] 601 602 monthname = [None, 603 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 604 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] 605 606 def address_string(self): 607 """Return the client address.""" 608 609 return self.client_address[0] 610 611 # Essentially static class variables 612 613 # The version of the HTTP protocol we support. 614 # Set this to HTTP/1.1 to enable automatic keepalive 615 protocol_version = "HTTP/1.0" 616 617 # MessageClass used to parse headers 618 MessageClass = http.client.HTTPMessage 619 620 # hack to maintain backwards compatibility 621 responses = { 622 v: (v.phrase, v.description) 623 for v in HTTPStatus.__members__.values() 624 } 625 626 627class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): 628 629 """Simple HTTP request handler with GET and HEAD commands. 630 631 This serves files from the current directory and any of its 632 subdirectories. The MIME type for files is determined by 633 calling the .guess_type() method. 634 635 The GET and HEAD requests are identical except that the HEAD 636 request omits the actual contents of the file. 637 638 """ 639 640 server_version = "SimpleHTTP/" + __version__ 641 642 def __init__(self, *args, directory=None, **kwargs): 643 if directory is None: 644 directory = os.getcwd() 645 self.directory = directory 646 super().__init__(*args, **kwargs) 647 648 def do_GET(self): 649 """Serve a GET request.""" 650 f = self.send_head() 651 if f: 652 try: 653 self.copyfile(f, self.wfile) 654 finally: 655 f.close() 656 657 def do_HEAD(self): 658 """Serve a HEAD request.""" 659 f = self.send_head() 660 if f: 661 f.close() 662 663 def send_head(self): 664 """Common code for GET and HEAD commands. 665 666 This sends the response code and MIME headers. 667 668 Return value is either a file object (which has to be copied 669 to the outputfile by the caller unless the command was HEAD, 670 and must be closed by the caller under all circumstances), or 671 None, in which case the caller has nothing further to do. 672 673 """ 674 path = self.translate_path(self.path) 675 f = None 676 if os.path.isdir(path): 677 parts = urllib.parse.urlsplit(self.path) 678 if not parts.path.endswith('/'): 679 # redirect browser - doing basically what apache does 680 self.send_response(HTTPStatus.MOVED_PERMANENTLY) 681 new_parts = (parts[0], parts[1], parts[2] + '/', 682 parts[3], parts[4]) 683 new_url = urllib.parse.urlunsplit(new_parts) 684 self.send_header("Location", new_url) 685 self.end_headers() 686 return None 687 for index in "index.html", "index.htm": 688 index = os.path.join(path, index) 689 if os.path.exists(index): 690 path = index 691 break 692 else: 693 return self.list_directory(path) 694 ctype = self.guess_type(path) 695 try: 696 f = open(path, 'rb') 697 except OSError: 698 self.send_error(HTTPStatus.NOT_FOUND, "File not found") 699 return None 700 701 try: 702 fs = os.fstat(f.fileno()) 703 # Use browser cache if possible 704 if ("If-Modified-Since" in self.headers 705 and "If-None-Match" not in self.headers): 706 # compare If-Modified-Since and time of last file modification 707 try: 708 ims = email.utils.parsedate_to_datetime( 709 self.headers["If-Modified-Since"]) 710 except (TypeError, IndexError, OverflowError, ValueError): 711 # ignore ill-formed values 712 pass 713 else: 714 if ims.tzinfo is None: 715 # obsolete format with no timezone, cf. 716 # https://tools.ietf.org/html/rfc7231#section-7.1.1.1 717 ims = ims.replace(tzinfo=datetime.timezone.utc) 718 if ims.tzinfo is datetime.timezone.utc: 719 # compare to UTC datetime of last modification 720 last_modif = datetime.datetime.fromtimestamp( 721 fs.st_mtime, datetime.timezone.utc) 722 # remove microseconds, like in If-Modified-Since 723 last_modif = last_modif.replace(microsecond=0) 724 725 if last_modif <= ims: 726 self.send_response(HTTPStatus.NOT_MODIFIED) 727 self.end_headers() 728 f.close() 729 return None 730 731 self.send_response(HTTPStatus.OK) 732 self.send_header("Content-type", ctype) 733 self.send_header("Content-Length", str(fs[6])) 734 self.send_header("Last-Modified", 735 self.date_time_string(fs.st_mtime)) 736 self.end_headers() 737 return f 738 except: 739 f.close() 740 raise 741 742 def list_directory(self, path): 743 """Helper to produce a directory listing (absent index.html). 744 745 Return value is either a file object, or None (indicating an 746 error). In either case, the headers are sent, making the 747 interface the same as for send_head(). 748 749 """ 750 try: 751 list = os.listdir(path) 752 except OSError: 753 self.send_error( 754 HTTPStatus.NOT_FOUND, 755 "No permission to list directory") 756 return None 757 list.sort(key=lambda a: a.lower()) 758 r = [] 759 try: 760 displaypath = urllib.parse.unquote(self.path, 761 errors='surrogatepass') 762 except UnicodeDecodeError: 763 displaypath = urllib.parse.unquote(path) 764 displaypath = html.escape(displaypath, quote=False) 765 enc = sys.getfilesystemencoding() 766 title = 'Directory listing for %s' % displaypath 767 r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' 768 '"http://www.w3.org/TR/html4/strict.dtd">') 769 r.append('<html>\n<head>') 770 r.append('<meta http-equiv="Content-Type" ' 771 'content="text/html; charset=%s">' % enc) 772 r.append('<title>%s</title>\n</head>' % title) 773 r.append('<body>\n<h1>%s</h1>' % title) 774 r.append('<hr>\n<ul>') 775 for name in list: 776 fullname = os.path.join(path, name) 777 displayname = linkname = name 778 # Append / for directories or @ for symbolic links 779 if os.path.isdir(fullname): 780 displayname = name + "/" 781 linkname = name + "/" 782 if os.path.islink(fullname): 783 displayname = name + "@" 784 # Note: a link to a directory displays with @ and links with / 785 r.append('<li><a href="%s">%s</a></li>' 786 % (urllib.parse.quote(linkname, 787 errors='surrogatepass'), 788 html.escape(displayname, quote=False))) 789 r.append('</ul>\n<hr>\n</body>\n</html>\n') 790 encoded = '\n'.join(r).encode(enc, 'surrogateescape') 791 f = io.BytesIO() 792 f.write(encoded) 793 f.seek(0) 794 self.send_response(HTTPStatus.OK) 795 self.send_header("Content-type", "text/html; charset=%s" % enc) 796 self.send_header("Content-Length", str(len(encoded))) 797 self.end_headers() 798 return f 799 800 def translate_path(self, path): 801 """Translate a /-separated PATH to the local filename syntax. 802 803 Components that mean special things to the local file system 804 (e.g. drive or directory names) are ignored. (XXX They should 805 probably be diagnosed.) 806 807 """ 808 # abandon query parameters 809 path = path.split('?',1)[0] 810 path = path.split('#',1)[0] 811 # Don't forget explicit trailing slash when normalizing. Issue17324 812 trailing_slash = path.rstrip().endswith('/') 813 try: 814 path = urllib.parse.unquote(path, errors='surrogatepass') 815 except UnicodeDecodeError: 816 path = urllib.parse.unquote(path) 817 path = posixpath.normpath(path) 818 words = path.split('/') 819 words = filter(None, words) 820 path = self.directory 821 for word in words: 822 if os.path.dirname(word) or word in (os.curdir, os.pardir): 823 # Ignore components that are not a simple file/directory name 824 continue 825 path = os.path.join(path, word) 826 if trailing_slash: 827 path += '/' 828 return path 829 830 def copyfile(self, source, outputfile): 831 """Copy all data between two file objects. 832 833 The SOURCE argument is a file object open for reading 834 (or anything with a read() method) and the DESTINATION 835 argument is a file object open for writing (or 836 anything with a write() method). 837 838 The only reason for overriding this would be to change 839 the block size or perhaps to replace newlines by CRLF 840 -- note however that this the default server uses this 841 to copy binary data as well. 842 843 """ 844 shutil.copyfileobj(source, outputfile) 845 846 def guess_type(self, path): 847 """Guess the type of a file. 848 849 Argument is a PATH (a filename). 850 851 Return value is a string of the form type/subtype, 852 usable for a MIME Content-type header. 853 854 The default implementation looks the file's extension 855 up in the table self.extensions_map, using application/octet-stream 856 as a default; however it would be permissible (if 857 slow) to look inside the data to make a better guess. 858 859 """ 860 861 base, ext = posixpath.splitext(path) 862 if ext in self.extensions_map: 863 return self.extensions_map[ext] 864 ext = ext.lower() 865 if ext in self.extensions_map: 866 return self.extensions_map[ext] 867 else: 868 return self.extensions_map[''] 869 870 if not mimetypes.inited: 871 mimetypes.init() # try to read system mime.types 872 extensions_map = mimetypes.types_map.copy() 873 extensions_map.update({ 874 '': 'application/octet-stream', # Default 875 '.py': 'text/plain', 876 '.c': 'text/plain', 877 '.h': 'text/plain', 878 }) 879 880 881# Utilities for CGIHTTPRequestHandler 882 883def _url_collapse_path(path): 884 """ 885 Given a URL path, remove extra '/'s and '.' path elements and collapse 886 any '..' references and returns a collapsed path. 887 888 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. 889 The utility of this function is limited to is_cgi method and helps 890 preventing some security attacks. 891 892 Returns: The reconstituted URL, which will always start with a '/'. 893 894 Raises: IndexError if too many '..' occur within the path. 895 896 """ 897 # Query component should not be involved. 898 path, _, query = path.partition('?') 899 path = urllib.parse.unquote(path) 900 901 # Similar to os.path.split(os.path.normpath(path)) but specific to URL 902 # path semantics rather than local operating system semantics. 903 path_parts = path.split('/') 904 head_parts = [] 905 for part in path_parts[:-1]: 906 if part == '..': 907 head_parts.pop() # IndexError if more '..' than prior parts 908 elif part and part != '.': 909 head_parts.append( part ) 910 if path_parts: 911 tail_part = path_parts.pop() 912 if tail_part: 913 if tail_part == '..': 914 head_parts.pop() 915 tail_part = '' 916 elif tail_part == '.': 917 tail_part = '' 918 else: 919 tail_part = '' 920 921 if query: 922 tail_part = '?'.join((tail_part, query)) 923 924 splitpath = ('/' + '/'.join(head_parts), tail_part) 925 collapsed_path = "/".join(splitpath) 926 927 return collapsed_path 928 929 930 931nobody = None 932 933def nobody_uid(): 934 """Internal routine to get nobody's uid""" 935 global nobody 936 if nobody: 937 return nobody 938 try: 939 import pwd 940 except ImportError: 941 return -1 942 try: 943 nobody = pwd.getpwnam('nobody')[2] 944 except KeyError: 945 nobody = 1 + max(x[2] for x in pwd.getpwall()) 946 return nobody 947 948 949def executable(path): 950 """Test for executable file.""" 951 return os.access(path, os.X_OK) 952 953 954class CGIHTTPRequestHandler(SimpleHTTPRequestHandler): 955 956 """Complete HTTP server with GET, HEAD and POST commands. 957 958 GET and HEAD also support running CGI scripts. 959 960 The POST command is *only* implemented for CGI scripts. 961 962 """ 963 964 # Determine platform specifics 965 have_fork = hasattr(os, 'fork') 966 967 # Make rfile unbuffered -- we need to read one line and then pass 968 # the rest to a subprocess, so we can't use buffered input. 969 rbufsize = 0 970 971 def do_POST(self): 972 """Serve a POST request. 973 974 This is only implemented for CGI scripts. 975 976 """ 977 978 if self.is_cgi(): 979 self.run_cgi() 980 else: 981 self.send_error( 982 HTTPStatus.NOT_IMPLEMENTED, 983 "Can only POST to CGI scripts") 984 985 def send_head(self): 986 """Version of send_head that support CGI scripts""" 987 if self.is_cgi(): 988 return self.run_cgi() 989 else: 990 return SimpleHTTPRequestHandler.send_head(self) 991 992 def is_cgi(self): 993 """Test whether self.path corresponds to a CGI script. 994 995 Returns True and updates the cgi_info attribute to the tuple 996 (dir, rest) if self.path requires running a CGI script. 997 Returns False otherwise. 998 999 If any exception is raised, the caller should assume that 1000 self.path was rejected as invalid and act accordingly. 1001 1002 The default implementation tests whether the normalized url 1003 path begins with one of the strings in self.cgi_directories 1004 (and the next character is a '/' or the end of the string). 1005 1006 """ 1007 collapsed_path = _url_collapse_path(self.path) 1008 dir_sep = collapsed_path.find('/', 1) 1009 head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:] 1010 if head in self.cgi_directories: 1011 self.cgi_info = head, tail 1012 return True 1013 return False 1014 1015 1016 cgi_directories = ['/cgi-bin', '/htbin'] 1017 1018 def is_executable(self, path): 1019 """Test whether argument path is an executable file.""" 1020 return executable(path) 1021 1022 def is_python(self, path): 1023 """Test whether argument path is a Python script.""" 1024 head, tail = os.path.splitext(path) 1025 return tail.lower() in (".py", ".pyw") 1026 1027 def run_cgi(self): 1028 """Execute a CGI script.""" 1029 dir, rest = self.cgi_info 1030 path = dir + '/' + rest 1031 i = path.find('/', len(dir)+1) 1032 while i >= 0: 1033 nextdir = path[:i] 1034 nextrest = path[i+1:] 1035 1036 scriptdir = self.translate_path(nextdir) 1037 if os.path.isdir(scriptdir): 1038 dir, rest = nextdir, nextrest 1039 i = path.find('/', len(dir)+1) 1040 else: 1041 break 1042 1043 # find an explicit query string, if present. 1044 rest, _, query = rest.partition('?') 1045 1046 # dissect the part after the directory name into a script name & 1047 # a possible additional path, to be stored in PATH_INFO. 1048 i = rest.find('/') 1049 if i >= 0: 1050 script, rest = rest[:i], rest[i:] 1051 else: 1052 script, rest = rest, '' 1053 1054 scriptname = dir + '/' + script 1055 scriptfile = self.translate_path(scriptname) 1056 if not os.path.exists(scriptfile): 1057 self.send_error( 1058 HTTPStatus.NOT_FOUND, 1059 "No such CGI script (%r)" % scriptname) 1060 return 1061 if not os.path.isfile(scriptfile): 1062 self.send_error( 1063 HTTPStatus.FORBIDDEN, 1064 "CGI script is not a plain file (%r)" % scriptname) 1065 return 1066 ispy = self.is_python(scriptname) 1067 if self.have_fork or not ispy: 1068 if not self.is_executable(scriptfile): 1069 self.send_error( 1070 HTTPStatus.FORBIDDEN, 1071 "CGI script is not executable (%r)" % scriptname) 1072 return 1073 1074 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html 1075 # XXX Much of the following could be prepared ahead of time! 1076 env = copy.deepcopy(os.environ) 1077 env['SERVER_SOFTWARE'] = self.version_string() 1078 env['SERVER_NAME'] = self.server.server_name 1079 env['GATEWAY_INTERFACE'] = 'CGI/1.1' 1080 env['SERVER_PROTOCOL'] = self.protocol_version 1081 env['SERVER_PORT'] = str(self.server.server_port) 1082 env['REQUEST_METHOD'] = self.command 1083 uqrest = urllib.parse.unquote(rest) 1084 env['PATH_INFO'] = uqrest 1085 env['PATH_TRANSLATED'] = self.translate_path(uqrest) 1086 env['SCRIPT_NAME'] = scriptname 1087 if query: 1088 env['QUERY_STRING'] = query 1089 env['REMOTE_ADDR'] = self.client_address[0] 1090 authorization = self.headers.get("authorization") 1091 if authorization: 1092 authorization = authorization.split() 1093 if len(authorization) == 2: 1094 import base64, binascii 1095 env['AUTH_TYPE'] = authorization[0] 1096 if authorization[0].lower() == "basic": 1097 try: 1098 authorization = authorization[1].encode('ascii') 1099 authorization = base64.decodebytes(authorization).\ 1100 decode('ascii') 1101 except (binascii.Error, UnicodeError): 1102 pass 1103 else: 1104 authorization = authorization.split(':') 1105 if len(authorization) == 2: 1106 env['REMOTE_USER'] = authorization[0] 1107 # XXX REMOTE_IDENT 1108 if self.headers.get('content-type') is None: 1109 env['CONTENT_TYPE'] = self.headers.get_content_type() 1110 else: 1111 env['CONTENT_TYPE'] = self.headers['content-type'] 1112 length = self.headers.get('content-length') 1113 if length: 1114 env['CONTENT_LENGTH'] = length 1115 referer = self.headers.get('referer') 1116 if referer: 1117 env['HTTP_REFERER'] = referer 1118 accept = [] 1119 for line in self.headers.getallmatchingheaders('accept'): 1120 if line[:1] in "\t\n\r ": 1121 accept.append(line.strip()) 1122 else: 1123 accept = accept + line[7:].split(',') 1124 env['HTTP_ACCEPT'] = ','.join(accept) 1125 ua = self.headers.get('user-agent') 1126 if ua: 1127 env['HTTP_USER_AGENT'] = ua 1128 co = filter(None, self.headers.get_all('cookie', [])) 1129 cookie_str = ', '.join(co) 1130 if cookie_str: 1131 env['HTTP_COOKIE'] = cookie_str 1132 # XXX Other HTTP_* headers 1133 # Since we're setting the env in the parent, provide empty 1134 # values to override previously set values 1135 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', 1136 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): 1137 env.setdefault(k, "") 1138 1139 self.send_response(HTTPStatus.OK, "Script output follows") 1140 self.flush_headers() 1141 1142 decoded_query = query.replace('+', ' ') 1143 1144 if self.have_fork: 1145 # Unix -- fork as we should 1146 args = [script] 1147 if '=' not in decoded_query: 1148 args.append(decoded_query) 1149 nobody = nobody_uid() 1150 self.wfile.flush() # Always flush before forking 1151 pid = os.fork() 1152 if pid != 0: 1153 # Parent 1154 pid, sts = os.waitpid(pid, 0) 1155 # throw away additional data [see bug #427345] 1156 while select.select([self.rfile], [], [], 0)[0]: 1157 if not self.rfile.read(1): 1158 break 1159 if sts: 1160 self.log_error("CGI script exit status %#x", sts) 1161 return 1162 # Child 1163 try: 1164 try: 1165 os.setuid(nobody) 1166 except OSError: 1167 pass 1168 os.dup2(self.rfile.fileno(), 0) 1169 os.dup2(self.wfile.fileno(), 1) 1170 os.execve(scriptfile, args, env) 1171 except: 1172 self.server.handle_error(self.request, self.client_address) 1173 os._exit(127) 1174 1175 else: 1176 # Non-Unix -- use subprocess 1177 import subprocess 1178 cmdline = [scriptfile] 1179 if self.is_python(scriptfile): 1180 interp = sys.executable 1181 if interp.lower().endswith("w.exe"): 1182 # On Windows, use python.exe, not pythonw.exe 1183 interp = interp[:-5] + interp[-4:] 1184 cmdline = [interp, '-u'] + cmdline 1185 if '=' not in query: 1186 cmdline.append(query) 1187 self.log_message("command: %s", subprocess.list2cmdline(cmdline)) 1188 try: 1189 nbytes = int(length) 1190 except (TypeError, ValueError): 1191 nbytes = 0 1192 p = subprocess.Popen(cmdline, 1193 stdin=subprocess.PIPE, 1194 stdout=subprocess.PIPE, 1195 stderr=subprocess.PIPE, 1196 env = env 1197 ) 1198 if self.command.lower() == "post" and nbytes > 0: 1199 data = self.rfile.read(nbytes) 1200 else: 1201 data = None 1202 # throw away additional data [see bug #427345] 1203 while select.select([self.rfile._sock], [], [], 0)[0]: 1204 if not self.rfile._sock.recv(1): 1205 break 1206 stdout, stderr = p.communicate(data) 1207 self.wfile.write(stdout) 1208 if stderr: 1209 self.log_error('%s', stderr) 1210 p.stderr.close() 1211 p.stdout.close() 1212 status = p.returncode 1213 if status: 1214 self.log_error("CGI script exit status %#x", status) 1215 else: 1216 self.log_message("CGI script exited OK") 1217 1218 1219def test(HandlerClass=BaseHTTPRequestHandler, 1220 ServerClass=ThreadingHTTPServer, 1221 protocol="HTTP/1.0", port=8000, bind=""): 1222 """Test the HTTP request handler class. 1223 1224 This runs an HTTP server on port 8000 (or the port argument). 1225 1226 """ 1227 server_address = (bind, port) 1228 1229 HandlerClass.protocol_version = protocol 1230 with ServerClass(server_address, HandlerClass) as httpd: 1231 sa = httpd.socket.getsockname() 1232 serve_message = "Serving HTTP on {host} port {port} (http://{host}:{port}/) ..." 1233 print(serve_message.format(host=sa[0], port=sa[1])) 1234 try: 1235 httpd.serve_forever() 1236 except KeyboardInterrupt: 1237 print("\nKeyboard interrupt received, exiting.") 1238 sys.exit(0) 1239 1240if __name__ == '__main__': 1241 import argparse 1242 1243 parser = argparse.ArgumentParser() 1244 parser.add_argument('--cgi', action='store_true', 1245 help='Run as CGI Server') 1246 parser.add_argument('--bind', '-b', default='', metavar='ADDRESS', 1247 help='Specify alternate bind address ' 1248 '[default: all interfaces]') 1249 parser.add_argument('--directory', '-d', default=os.getcwd(), 1250 help='Specify alternative directory ' 1251 '[default:current directory]') 1252 parser.add_argument('port', action='store', 1253 default=8000, type=int, 1254 nargs='?', 1255 help='Specify alternate port [default: 8000]') 1256 args = parser.parse_args() 1257 if args.cgi: 1258 handler_class = CGIHTTPRequestHandler 1259 else: 1260 handler_class = partial(SimpleHTTPRequestHandler, 1261 directory=args.directory) 1262 test(HandlerClass=handler_class, port=args.port, bind=args.bind) 1263