1"""HTTP server classes. 2 3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see 4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, 5and CGIHTTPRequestHandler for CGI scripts. 6 7It does, however, optionally implement HTTP/1.1 persistent connections, 8as of version 0.3. 9 10Notes on CGIHTTPRequestHandler 11------------------------------ 12 13This class implements GET and POST requests to cgi-bin scripts. 14 15If the os.fork() function is not present (e.g. on Windows), 16subprocess.Popen() is used as a fallback, with slightly altered semantics. 17 18In all cases, the implementation is intentionally naive -- all 19requests are executed synchronously. 20 21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL 22-- it may execute arbitrary Python code or external programs. 23 24Note that status code 200 is sent prior to execution of a CGI script, so 25scripts cannot send other status codes such as 302 (redirect). 26 27XXX To do: 28 29- log requests even later (to capture byte count) 30- log user-agent header and other interesting goodies 31- send error log to separate file 32""" 33 34 35# See also: 36# 37# HTTP Working Group T. Berners-Lee 38# INTERNET-DRAFT R. T. Fielding 39# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen 40# Expires September 8, 1995 March 8, 1995 41# 42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt 43# 44# and 45# 46# Network Working Group R. Fielding 47# Request for Comments: 2616 et al 48# Obsoletes: 2068 June 1999 49# Category: Standards Track 50# 51# URL: http://www.faqs.org/rfcs/rfc2616.html 52 53# Log files 54# --------- 55# 56# Here's a quote from the NCSA httpd docs about log file format. 57# 58# | The logfile format is as follows. Each line consists of: 59# | 60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb 61# | 62# | host: Either the DNS name or the IP number of the remote client 63# | rfc931: Any information returned by identd for this person, 64# | - otherwise. 65# | authuser: If user sent a userid for authentication, the user name, 66# | - otherwise. 67# | DD: Day 68# | Mon: Month (calendar name) 69# | YYYY: Year 70# | hh: hour (24-hour format, the machine's timezone) 71# | mm: minutes 72# | ss: seconds 73# | request: The first line of the HTTP request as sent by the client. 74# | ddd: the status code returned by the server, - if not available. 75# | bbbb: the total number of bytes sent, 76# | *not including the HTTP/1.0 header*, - if not available 77# | 78# | You can determine the name of the file accessed through request. 79# 80# (Actually, the latter is only true if you know the server configuration 81# at the time the request was made!) 82 83__version__ = "0.6" 84 85__all__ = [ 86 "HTTPServer", "BaseHTTPRequestHandler", 87 "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler", 88] 89 90import email.utils 91import html 92import http.client 93import io 94import mimetypes 95import os 96import posixpath 97import select 98import shutil 99import socket # For gethostbyaddr() 100import socketserver 101import sys 102import time 103import urllib.parse 104import copy 105import argparse 106 107from http import HTTPStatus 108 109 110# Default error message template 111DEFAULT_ERROR_MESSAGE = """\ 112<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" 113 "http://www.w3.org/TR/html4/strict.dtd"> 114<html> 115 <head> 116 <meta http-equiv="Content-Type" content="text/html;charset=utf-8"> 117 <title>Error response</title> 118 </head> 119 <body> 120 <h1>Error response</h1> 121 <p>Error code: %(code)d</p> 122 <p>Message: %(message)s.</p> 123 <p>Error code explanation: %(code)s - %(explain)s.</p> 124 </body> 125</html> 126""" 127 128DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" 129 130class HTTPServer(socketserver.TCPServer): 131 132 allow_reuse_address = 1 # Seems to make sense in testing environment 133 134 def server_bind(self): 135 """Override server_bind to store the server name.""" 136 socketserver.TCPServer.server_bind(self) 137 host, port = self.server_address[:2] 138 self.server_name = socket.getfqdn(host) 139 self.server_port = port 140 141 142class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): 143 144 """HTTP request handler base class. 145 146 The following explanation of HTTP serves to guide you through the 147 code as well as to expose any misunderstandings I may have about 148 HTTP (so you don't need to read the code to figure out I'm wrong 149 :-). 150 151 HTTP (HyperText Transfer Protocol) is an extensible protocol on 152 top of a reliable stream transport (e.g. TCP/IP). The protocol 153 recognizes three parts to a request: 154 155 1. One line identifying the request type and path 156 2. An optional set of RFC-822-style headers 157 3. An optional data part 158 159 The headers and data are separated by a blank line. 160 161 The first line of the request has the form 162 163 <command> <path> <version> 164 165 where <command> is a (case-sensitive) keyword such as GET or POST, 166 <path> is a string containing path information for the request, 167 and <version> should be the string "HTTP/1.0" or "HTTP/1.1". 168 <path> is encoded using the URL encoding scheme (using %xx to signify 169 the ASCII character with hex code xx). 170 171 The specification specifies that lines are separated by CRLF but 172 for compatibility with the widest range of clients recommends 173 servers also handle LF. Similarly, whitespace in the request line 174 is treated sensibly (allowing multiple spaces between components 175 and allowing trailing whitespace). 176 177 Similarly, for output, lines ought to be separated by CRLF pairs 178 but most clients grok LF characters just fine. 179 180 If the first line of the request has the form 181 182 <command> <path> 183 184 (i.e. <version> is left out) then this is assumed to be an HTTP 185 0.9 request; this form has no optional headers and data part and 186 the reply consists of just the data. 187 188 The reply form of the HTTP 1.x protocol again has three parts: 189 190 1. One line giving the response code 191 2. An optional set of RFC-822-style headers 192 3. The data 193 194 Again, the headers and data are separated by a blank line. 195 196 The response code line has the form 197 198 <version> <responsecode> <responsestring> 199 200 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"), 201 <responsecode> is a 3-digit response code indicating success or 202 failure of the request, and <responsestring> is an optional 203 human-readable string explaining what the response code means. 204 205 This server parses the request and the headers, and then calls a 206 function specific to the request type (<command>). Specifically, 207 a request SPAM will be handled by a method do_SPAM(). If no 208 such method exists the server sends an error response to the 209 client. If it exists, it is called with no arguments: 210 211 do_SPAM() 212 213 Note that the request name is case sensitive (i.e. SPAM and spam 214 are different requests). 215 216 The various request details are stored in instance variables: 217 218 - client_address is the client IP address in the form (host, 219 port); 220 221 - command, path and version are the broken-down request line; 222 223 - headers is an instance of email.message.Message (or a derived 224 class) containing the header information; 225 226 - rfile is a file object open for reading positioned at the 227 start of the optional input data part; 228 229 - wfile is a file object open for writing. 230 231 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! 232 233 The first thing to be written must be the response line. Then 234 follow 0 or more header lines, then a blank line, and then the 235 actual data (if any). The meaning of the header lines depends on 236 the command executed by the server; in most cases, when data is 237 returned, there should be at least one header line of the form 238 239 Content-type: <type>/<subtype> 240 241 where <type> and <subtype> should be registered MIME types, 242 e.g. "text/html" or "text/plain". 243 244 """ 245 246 # The Python system version, truncated to its first component. 247 sys_version = "Python/" + sys.version.split()[0] 248 249 # The server software version. You may want to override this. 250 # The format is multiple whitespace-separated strings, 251 # where each string is of the form name[/version]. 252 server_version = "BaseHTTP/" + __version__ 253 254 error_message_format = DEFAULT_ERROR_MESSAGE 255 error_content_type = DEFAULT_ERROR_CONTENT_TYPE 256 257 # The default request version. This only affects responses up until 258 # the point where the request line is parsed, so it mainly decides what 259 # the client gets back when sending a malformed request line. 260 # Most web servers default to HTTP 0.9, i.e. don't send a status line. 261 default_request_version = "HTTP/0.9" 262 263 def parse_request(self): 264 """Parse a request (internal). 265 266 The request should be stored in self.raw_requestline; the results 267 are in self.command, self.path, self.request_version and 268 self.headers. 269 270 Return True for success, False for failure; on failure, an 271 error is sent back. 272 273 """ 274 self.command = None # set in case of error on the first line 275 self.request_version = version = self.default_request_version 276 self.close_connection = True 277 requestline = str(self.raw_requestline, 'iso-8859-1') 278 requestline = requestline.rstrip('\r\n') 279 self.requestline = requestline 280 words = requestline.split() 281 if len(words) == 3: 282 command, path, version = words 283 try: 284 if version[:5] != 'HTTP/': 285 raise ValueError 286 base_version_number = version.split('/', 1)[1] 287 version_number = base_version_number.split(".") 288 # RFC 2145 section 3.1 says there can be only one "." and 289 # - major and minor numbers MUST be treated as 290 # separate integers; 291 # - HTTP/2.4 is a lower version than HTTP/2.13, which in 292 # turn is lower than HTTP/12.3; 293 # - Leading zeros MUST be ignored by recipients. 294 if len(version_number) != 2: 295 raise ValueError 296 version_number = int(version_number[0]), int(version_number[1]) 297 except (ValueError, IndexError): 298 self.send_error( 299 HTTPStatus.BAD_REQUEST, 300 "Bad request version (%r)" % version) 301 return False 302 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": 303 self.close_connection = False 304 if version_number >= (2, 0): 305 self.send_error( 306 HTTPStatus.HTTP_VERSION_NOT_SUPPORTED, 307 "Invalid HTTP version (%s)" % base_version_number) 308 return False 309 elif len(words) == 2: 310 command, path = words 311 self.close_connection = True 312 if command != 'GET': 313 self.send_error( 314 HTTPStatus.BAD_REQUEST, 315 "Bad HTTP/0.9 request type (%r)" % command) 316 return False 317 elif not words: 318 return False 319 else: 320 self.send_error( 321 HTTPStatus.BAD_REQUEST, 322 "Bad request syntax (%r)" % requestline) 323 return False 324 self.command, self.path, self.request_version = command, path, version 325 326 # Examine the headers and look for a Connection directive. 327 try: 328 self.headers = http.client.parse_headers(self.rfile, 329 _class=self.MessageClass) 330 except http.client.LineTooLong as err: 331 self.send_error( 332 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, 333 "Line too long", 334 str(err)) 335 return False 336 except http.client.HTTPException as err: 337 self.send_error( 338 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, 339 "Too many headers", 340 str(err) 341 ) 342 return False 343 344 conntype = self.headers.get('Connection', "") 345 if conntype.lower() == 'close': 346 self.close_connection = True 347 elif (conntype.lower() == 'keep-alive' and 348 self.protocol_version >= "HTTP/1.1"): 349 self.close_connection = False 350 # Examine the headers and look for an Expect directive 351 expect = self.headers.get('Expect', "") 352 if (expect.lower() == "100-continue" and 353 self.protocol_version >= "HTTP/1.1" and 354 self.request_version >= "HTTP/1.1"): 355 if not self.handle_expect_100(): 356 return False 357 return True 358 359 def handle_expect_100(self): 360 """Decide what to do with an "Expect: 100-continue" header. 361 362 If the client is expecting a 100 Continue response, we must 363 respond with either a 100 Continue or a final response before 364 waiting for the request body. The default is to always respond 365 with a 100 Continue. You can behave differently (for example, 366 reject unauthorized requests) by overriding this method. 367 368 This method should either return True (possibly after sending 369 a 100 Continue response) or send an error response and return 370 False. 371 372 """ 373 self.send_response_only(HTTPStatus.CONTINUE) 374 self.end_headers() 375 return True 376 377 def handle_one_request(self): 378 """Handle a single HTTP request. 379 380 You normally don't need to override this method; see the class 381 __doc__ string for information on how to handle specific HTTP 382 commands such as GET and POST. 383 384 """ 385 try: 386 self.raw_requestline = self.rfile.readline(65537) 387 if len(self.raw_requestline) > 65536: 388 self.requestline = '' 389 self.request_version = '' 390 self.command = '' 391 self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG) 392 return 393 if not self.raw_requestline: 394 self.close_connection = True 395 return 396 if not self.parse_request(): 397 # An error code has been sent, just exit 398 return 399 mname = 'do_' + self.command 400 if not hasattr(self, mname): 401 self.send_error( 402 HTTPStatus.NOT_IMPLEMENTED, 403 "Unsupported method (%r)" % self.command) 404 return 405 method = getattr(self, mname) 406 method() 407 self.wfile.flush() #actually send the response if not already done. 408 except socket.timeout as e: 409 #a read or a write timed out. Discard this connection 410 self.log_error("Request timed out: %r", e) 411 self.close_connection = True 412 return 413 414 def handle(self): 415 """Handle multiple requests if necessary.""" 416 self.close_connection = True 417 418 self.handle_one_request() 419 while not self.close_connection: 420 self.handle_one_request() 421 422 def send_error(self, code, message=None, explain=None): 423 """Send and log an error reply. 424 425 Arguments are 426 * code: an HTTP error code 427 3 digits 428 * message: a simple optional 1 line reason phrase. 429 *( HTAB / SP / VCHAR / %x80-FF ) 430 defaults to short entry matching the response code 431 * explain: a detailed message defaults to the long entry 432 matching the response code. 433 434 This sends an error response (so it must be called before any 435 output has been generated), logs the error, and finally sends 436 a piece of HTML explaining the error to the user. 437 438 """ 439 440 try: 441 shortmsg, longmsg = self.responses[code] 442 except KeyError: 443 shortmsg, longmsg = '???', '???' 444 if message is None: 445 message = shortmsg 446 if explain is None: 447 explain = longmsg 448 self.log_error("code %d, message %s", code, message) 449 self.send_response(code, message) 450 self.send_header('Connection', 'close') 451 452 # Message body is omitted for cases described in: 453 # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified) 454 # - RFC7231: 6.3.6. 205(Reset Content) 455 body = None 456 if (code >= 200 and 457 code not in (HTTPStatus.NO_CONTENT, 458 HTTPStatus.RESET_CONTENT, 459 HTTPStatus.NOT_MODIFIED)): 460 # HTML encode to prevent Cross Site Scripting attacks 461 # (see bug #1100201) 462 content = (self.error_message_format % { 463 'code': code, 464 'message': html.escape(message, quote=False), 465 'explain': html.escape(explain, quote=False) 466 }) 467 body = content.encode('UTF-8', 'replace') 468 self.send_header("Content-Type", self.error_content_type) 469 self.send_header('Content-Length', int(len(body))) 470 self.end_headers() 471 472 if self.command != 'HEAD' and body: 473 self.wfile.write(body) 474 475 def send_response(self, code, message=None): 476 """Add the response header to the headers buffer and log the 477 response code. 478 479 Also send two standard headers with the server software 480 version and the current date. 481 482 """ 483 self.log_request(code) 484 self.send_response_only(code, message) 485 self.send_header('Server', self.version_string()) 486 self.send_header('Date', self.date_time_string()) 487 488 def send_response_only(self, code, message=None): 489 """Send the response header only.""" 490 if self.request_version != 'HTTP/0.9': 491 if message is None: 492 if code in self.responses: 493 message = self.responses[code][0] 494 else: 495 message = '' 496 if not hasattr(self, '_headers_buffer'): 497 self._headers_buffer = [] 498 self._headers_buffer.append(("%s %d %s\r\n" % 499 (self.protocol_version, code, message)).encode( 500 'latin-1', 'strict')) 501 502 def send_header(self, keyword, value): 503 """Send a MIME header to the headers buffer.""" 504 if self.request_version != 'HTTP/0.9': 505 if not hasattr(self, '_headers_buffer'): 506 self._headers_buffer = [] 507 self._headers_buffer.append( 508 ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict')) 509 510 if keyword.lower() == 'connection': 511 if value.lower() == 'close': 512 self.close_connection = True 513 elif value.lower() == 'keep-alive': 514 self.close_connection = False 515 516 def end_headers(self): 517 """Send the blank line ending the MIME headers.""" 518 if self.request_version != 'HTTP/0.9': 519 self._headers_buffer.append(b"\r\n") 520 self.flush_headers() 521 522 def flush_headers(self): 523 if hasattr(self, '_headers_buffer'): 524 self.wfile.write(b"".join(self._headers_buffer)) 525 self._headers_buffer = [] 526 527 def log_request(self, code='-', size='-'): 528 """Log an accepted request. 529 530 This is called by send_response(). 531 532 """ 533 if isinstance(code, HTTPStatus): 534 code = code.value 535 self.log_message('"%s" %s %s', 536 self.requestline, str(code), str(size)) 537 538 def log_error(self, format, *args): 539 """Log an error. 540 541 This is called when a request cannot be fulfilled. By 542 default it passes the message on to log_message(). 543 544 Arguments are the same as for log_message(). 545 546 XXX This should go to the separate error log. 547 548 """ 549 550 self.log_message(format, *args) 551 552 def log_message(self, format, *args): 553 """Log an arbitrary message. 554 555 This is used by all other logging functions. Override 556 it if you have specific logging wishes. 557 558 The first argument, FORMAT, is a format string for the 559 message to be logged. If the format string contains 560 any % escapes requiring parameters, they should be 561 specified as subsequent arguments (it's just like 562 printf!). 563 564 The client ip and current date/time are prefixed to 565 every message. 566 567 """ 568 569 sys.stderr.write("%s - - [%s] %s\n" % 570 (self.address_string(), 571 self.log_date_time_string(), 572 format%args)) 573 574 def version_string(self): 575 """Return the server software version string.""" 576 return self.server_version + ' ' + self.sys_version 577 578 def date_time_string(self, timestamp=None): 579 """Return the current date and time formatted for a message header.""" 580 if timestamp is None: 581 timestamp = time.time() 582 return email.utils.formatdate(timestamp, usegmt=True) 583 584 def log_date_time_string(self): 585 """Return the current time formatted for logging.""" 586 now = time.time() 587 year, month, day, hh, mm, ss, x, y, z = time.localtime(now) 588 s = "%02d/%3s/%04d %02d:%02d:%02d" % ( 589 day, self.monthname[month], year, hh, mm, ss) 590 return s 591 592 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] 593 594 monthname = [None, 595 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 596 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] 597 598 def address_string(self): 599 """Return the client address.""" 600 601 return self.client_address[0] 602 603 # Essentially static class variables 604 605 # The version of the HTTP protocol we support. 606 # Set this to HTTP/1.1 to enable automatic keepalive 607 protocol_version = "HTTP/1.0" 608 609 # MessageClass used to parse headers 610 MessageClass = http.client.HTTPMessage 611 612 # hack to maintain backwards compatibility 613 responses = { 614 v: (v.phrase, v.description) 615 for v in HTTPStatus.__members__.values() 616 } 617 618 619class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): 620 621 """Simple HTTP request handler with GET and HEAD commands. 622 623 This serves files from the current directory and any of its 624 subdirectories. The MIME type for files is determined by 625 calling the .guess_type() method. 626 627 The GET and HEAD requests are identical except that the HEAD 628 request omits the actual contents of the file. 629 630 """ 631 632 server_version = "SimpleHTTP/" + __version__ 633 634 def do_GET(self): 635 """Serve a GET request.""" 636 f = self.send_head() 637 if f: 638 try: 639 self.copyfile(f, self.wfile) 640 finally: 641 f.close() 642 643 def do_HEAD(self): 644 """Serve a HEAD request.""" 645 f = self.send_head() 646 if f: 647 f.close() 648 649 def send_head(self): 650 """Common code for GET and HEAD commands. 651 652 This sends the response code and MIME headers. 653 654 Return value is either a file object (which has to be copied 655 to the outputfile by the caller unless the command was HEAD, 656 and must be closed by the caller under all circumstances), or 657 None, in which case the caller has nothing further to do. 658 659 """ 660 path = self.translate_path(self.path) 661 f = None 662 if os.path.isdir(path): 663 parts = urllib.parse.urlsplit(self.path) 664 if not parts.path.endswith('/'): 665 # redirect browser - doing basically what apache does 666 self.send_response(HTTPStatus.MOVED_PERMANENTLY) 667 new_parts = (parts[0], parts[1], parts[2] + '/', 668 parts[3], parts[4]) 669 new_url = urllib.parse.urlunsplit(new_parts) 670 self.send_header("Location", new_url) 671 self.end_headers() 672 return None 673 for index in "index.html", "index.htm": 674 index = os.path.join(path, index) 675 if os.path.exists(index): 676 path = index 677 break 678 else: 679 return self.list_directory(path) 680 ctype = self.guess_type(path) 681 try: 682 f = open(path, 'rb') 683 except OSError: 684 self.send_error(HTTPStatus.NOT_FOUND, "File not found") 685 return None 686 try: 687 self.send_response(HTTPStatus.OK) 688 self.send_header("Content-type", ctype) 689 fs = os.fstat(f.fileno()) 690 self.send_header("Content-Length", str(fs[6])) 691 self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) 692 self.end_headers() 693 return f 694 except: 695 f.close() 696 raise 697 698 def list_directory(self, path): 699 """Helper to produce a directory listing (absent index.html). 700 701 Return value is either a file object, or None (indicating an 702 error). In either case, the headers are sent, making the 703 interface the same as for send_head(). 704 705 """ 706 try: 707 list = os.listdir(path) 708 except OSError: 709 self.send_error( 710 HTTPStatus.NOT_FOUND, 711 "No permission to list directory") 712 return None 713 list.sort(key=lambda a: a.lower()) 714 r = [] 715 try: 716 displaypath = urllib.parse.unquote(self.path, 717 errors='surrogatepass') 718 except UnicodeDecodeError: 719 displaypath = urllib.parse.unquote(path) 720 displaypath = html.escape(displaypath, quote=False) 721 enc = sys.getfilesystemencoding() 722 title = 'Directory listing for %s' % displaypath 723 r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' 724 '"http://www.w3.org/TR/html4/strict.dtd">') 725 r.append('<html>\n<head>') 726 r.append('<meta http-equiv="Content-Type" ' 727 'content="text/html; charset=%s">' % enc) 728 r.append('<title>%s</title>\n</head>' % title) 729 r.append('<body>\n<h1>%s</h1>' % title) 730 r.append('<hr>\n<ul>') 731 for name in list: 732 fullname = os.path.join(path, name) 733 displayname = linkname = name 734 # Append / for directories or @ for symbolic links 735 if os.path.isdir(fullname): 736 displayname = name + "/" 737 linkname = name + "/" 738 if os.path.islink(fullname): 739 displayname = name + "@" 740 # Note: a link to a directory displays with @ and links with / 741 r.append('<li><a href="%s">%s</a></li>' 742 % (urllib.parse.quote(linkname, 743 errors='surrogatepass'), 744 html.escape(displayname, quote=False))) 745 r.append('</ul>\n<hr>\n</body>\n</html>\n') 746 encoded = '\n'.join(r).encode(enc, 'surrogateescape') 747 f = io.BytesIO() 748 f.write(encoded) 749 f.seek(0) 750 self.send_response(HTTPStatus.OK) 751 self.send_header("Content-type", "text/html; charset=%s" % enc) 752 self.send_header("Content-Length", str(len(encoded))) 753 self.end_headers() 754 return f 755 756 def translate_path(self, path): 757 """Translate a /-separated PATH to the local filename syntax. 758 759 Components that mean special things to the local file system 760 (e.g. drive or directory names) are ignored. (XXX They should 761 probably be diagnosed.) 762 763 """ 764 # abandon query parameters 765 path = path.split('?',1)[0] 766 path = path.split('#',1)[0] 767 # Don't forget explicit trailing slash when normalizing. Issue17324 768 trailing_slash = path.rstrip().endswith('/') 769 try: 770 path = urllib.parse.unquote(path, errors='surrogatepass') 771 except UnicodeDecodeError: 772 path = urllib.parse.unquote(path) 773 path = posixpath.normpath(path) 774 words = path.split('/') 775 words = filter(None, words) 776 path = os.getcwd() 777 for word in words: 778 if os.path.dirname(word) or word in (os.curdir, os.pardir): 779 # Ignore components that are not a simple file/directory name 780 continue 781 path = os.path.join(path, word) 782 if trailing_slash: 783 path += '/' 784 return path 785 786 def copyfile(self, source, outputfile): 787 """Copy all data between two file objects. 788 789 The SOURCE argument is a file object open for reading 790 (or anything with a read() method) and the DESTINATION 791 argument is a file object open for writing (or 792 anything with a write() method). 793 794 The only reason for overriding this would be to change 795 the block size or perhaps to replace newlines by CRLF 796 -- note however that this the default server uses this 797 to copy binary data as well. 798 799 """ 800 shutil.copyfileobj(source, outputfile) 801 802 def guess_type(self, path): 803 """Guess the type of a file. 804 805 Argument is a PATH (a filename). 806 807 Return value is a string of the form type/subtype, 808 usable for a MIME Content-type header. 809 810 The default implementation looks the file's extension 811 up in the table self.extensions_map, using application/octet-stream 812 as a default; however it would be permissible (if 813 slow) to look inside the data to make a better guess. 814 815 """ 816 817 base, ext = posixpath.splitext(path) 818 if ext in self.extensions_map: 819 return self.extensions_map[ext] 820 ext = ext.lower() 821 if ext in self.extensions_map: 822 return self.extensions_map[ext] 823 else: 824 return self.extensions_map[''] 825 826 if not mimetypes.inited: 827 mimetypes.init() # try to read system mime.types 828 extensions_map = mimetypes.types_map.copy() 829 extensions_map.update({ 830 '': 'application/octet-stream', # Default 831 '.py': 'text/plain', 832 '.c': 'text/plain', 833 '.h': 'text/plain', 834 }) 835 836 837# Utilities for CGIHTTPRequestHandler 838 839def _url_collapse_path(path): 840 """ 841 Given a URL path, remove extra '/'s and '.' path elements and collapse 842 any '..' references and returns a collapsed path. 843 844 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. 845 The utility of this function is limited to is_cgi method and helps 846 preventing some security attacks. 847 848 Returns: The reconstituted URL, which will always start with a '/'. 849 850 Raises: IndexError if too many '..' occur within the path. 851 852 """ 853 # Query component should not be involved. 854 path, _, query = path.partition('?') 855 path = urllib.parse.unquote(path) 856 857 # Similar to os.path.split(os.path.normpath(path)) but specific to URL 858 # path semantics rather than local operating system semantics. 859 path_parts = path.split('/') 860 head_parts = [] 861 for part in path_parts[:-1]: 862 if part == '..': 863 head_parts.pop() # IndexError if more '..' than prior parts 864 elif part and part != '.': 865 head_parts.append( part ) 866 if path_parts: 867 tail_part = path_parts.pop() 868 if tail_part: 869 if tail_part == '..': 870 head_parts.pop() 871 tail_part = '' 872 elif tail_part == '.': 873 tail_part = '' 874 else: 875 tail_part = '' 876 877 if query: 878 tail_part = '?'.join((tail_part, query)) 879 880 splitpath = ('/' + '/'.join(head_parts), tail_part) 881 collapsed_path = "/".join(splitpath) 882 883 return collapsed_path 884 885 886 887nobody = None 888 889def nobody_uid(): 890 """Internal routine to get nobody's uid""" 891 global nobody 892 if nobody: 893 return nobody 894 try: 895 import pwd 896 except ImportError: 897 return -1 898 try: 899 nobody = pwd.getpwnam('nobody')[2] 900 except KeyError: 901 nobody = 1 + max(x[2] for x in pwd.getpwall()) 902 return nobody 903 904 905def executable(path): 906 """Test for executable file.""" 907 return os.access(path, os.X_OK) 908 909 910class CGIHTTPRequestHandler(SimpleHTTPRequestHandler): 911 912 """Complete HTTP server with GET, HEAD and POST commands. 913 914 GET and HEAD also support running CGI scripts. 915 916 The POST command is *only* implemented for CGI scripts. 917 918 """ 919 920 # Determine platform specifics 921 have_fork = hasattr(os, 'fork') 922 923 # Make rfile unbuffered -- we need to read one line and then pass 924 # the rest to a subprocess, so we can't use buffered input. 925 rbufsize = 0 926 927 def do_POST(self): 928 """Serve a POST request. 929 930 This is only implemented for CGI scripts. 931 932 """ 933 934 if self.is_cgi(): 935 self.run_cgi() 936 else: 937 self.send_error( 938 HTTPStatus.NOT_IMPLEMENTED, 939 "Can only POST to CGI scripts") 940 941 def send_head(self): 942 """Version of send_head that support CGI scripts""" 943 if self.is_cgi(): 944 return self.run_cgi() 945 else: 946 return SimpleHTTPRequestHandler.send_head(self) 947 948 def is_cgi(self): 949 """Test whether self.path corresponds to a CGI script. 950 951 Returns True and updates the cgi_info attribute to the tuple 952 (dir, rest) if self.path requires running a CGI script. 953 Returns False otherwise. 954 955 If any exception is raised, the caller should assume that 956 self.path was rejected as invalid and act accordingly. 957 958 The default implementation tests whether the normalized url 959 path begins with one of the strings in self.cgi_directories 960 (and the next character is a '/' or the end of the string). 961 962 """ 963 collapsed_path = _url_collapse_path(self.path) 964 dir_sep = collapsed_path.find('/', 1) 965 head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:] 966 if head in self.cgi_directories: 967 self.cgi_info = head, tail 968 return True 969 return False 970 971 972 cgi_directories = ['/cgi-bin', '/htbin'] 973 974 def is_executable(self, path): 975 """Test whether argument path is an executable file.""" 976 return executable(path) 977 978 def is_python(self, path): 979 """Test whether argument path is a Python script.""" 980 head, tail = os.path.splitext(path) 981 return tail.lower() in (".py", ".pyw") 982 983 def run_cgi(self): 984 """Execute a CGI script.""" 985 dir, rest = self.cgi_info 986 path = dir + '/' + rest 987 i = path.find('/', len(dir)+1) 988 while i >= 0: 989 nextdir = path[:i] 990 nextrest = path[i+1:] 991 992 scriptdir = self.translate_path(nextdir) 993 if os.path.isdir(scriptdir): 994 dir, rest = nextdir, nextrest 995 i = path.find('/', len(dir)+1) 996 else: 997 break 998 999 # find an explicit query string, if present. 1000 rest, _, query = rest.partition('?') 1001 1002 # dissect the part after the directory name into a script name & 1003 # a possible additional path, to be stored in PATH_INFO. 1004 i = rest.find('/') 1005 if i >= 0: 1006 script, rest = rest[:i], rest[i:] 1007 else: 1008 script, rest = rest, '' 1009 1010 scriptname = dir + '/' + script 1011 scriptfile = self.translate_path(scriptname) 1012 if not os.path.exists(scriptfile): 1013 self.send_error( 1014 HTTPStatus.NOT_FOUND, 1015 "No such CGI script (%r)" % scriptname) 1016 return 1017 if not os.path.isfile(scriptfile): 1018 self.send_error( 1019 HTTPStatus.FORBIDDEN, 1020 "CGI script is not a plain file (%r)" % scriptname) 1021 return 1022 ispy = self.is_python(scriptname) 1023 if self.have_fork or not ispy: 1024 if not self.is_executable(scriptfile): 1025 self.send_error( 1026 HTTPStatus.FORBIDDEN, 1027 "CGI script is not executable (%r)" % scriptname) 1028 return 1029 1030 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html 1031 # XXX Much of the following could be prepared ahead of time! 1032 env = copy.deepcopy(os.environ) 1033 env['SERVER_SOFTWARE'] = self.version_string() 1034 env['SERVER_NAME'] = self.server.server_name 1035 env['GATEWAY_INTERFACE'] = 'CGI/1.1' 1036 env['SERVER_PROTOCOL'] = self.protocol_version 1037 env['SERVER_PORT'] = str(self.server.server_port) 1038 env['REQUEST_METHOD'] = self.command 1039 uqrest = urllib.parse.unquote(rest) 1040 env['PATH_INFO'] = uqrest 1041 env['PATH_TRANSLATED'] = self.translate_path(uqrest) 1042 env['SCRIPT_NAME'] = scriptname 1043 if query: 1044 env['QUERY_STRING'] = query 1045 env['REMOTE_ADDR'] = self.client_address[0] 1046 authorization = self.headers.get("authorization") 1047 if authorization: 1048 authorization = authorization.split() 1049 if len(authorization) == 2: 1050 import base64, binascii 1051 env['AUTH_TYPE'] = authorization[0] 1052 if authorization[0].lower() == "basic": 1053 try: 1054 authorization = authorization[1].encode('ascii') 1055 authorization = base64.decodebytes(authorization).\ 1056 decode('ascii') 1057 except (binascii.Error, UnicodeError): 1058 pass 1059 else: 1060 authorization = authorization.split(':') 1061 if len(authorization) == 2: 1062 env['REMOTE_USER'] = authorization[0] 1063 # XXX REMOTE_IDENT 1064 if self.headers.get('content-type') is None: 1065 env['CONTENT_TYPE'] = self.headers.get_content_type() 1066 else: 1067 env['CONTENT_TYPE'] = self.headers['content-type'] 1068 length = self.headers.get('content-length') 1069 if length: 1070 env['CONTENT_LENGTH'] = length 1071 referer = self.headers.get('referer') 1072 if referer: 1073 env['HTTP_REFERER'] = referer 1074 accept = [] 1075 for line in self.headers.getallmatchingheaders('accept'): 1076 if line[:1] in "\t\n\r ": 1077 accept.append(line.strip()) 1078 else: 1079 accept = accept + line[7:].split(',') 1080 env['HTTP_ACCEPT'] = ','.join(accept) 1081 ua = self.headers.get('user-agent') 1082 if ua: 1083 env['HTTP_USER_AGENT'] = ua 1084 co = filter(None, self.headers.get_all('cookie', [])) 1085 cookie_str = ', '.join(co) 1086 if cookie_str: 1087 env['HTTP_COOKIE'] = cookie_str 1088 # XXX Other HTTP_* headers 1089 # Since we're setting the env in the parent, provide empty 1090 # values to override previously set values 1091 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', 1092 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): 1093 env.setdefault(k, "") 1094 1095 self.send_response(HTTPStatus.OK, "Script output follows") 1096 self.flush_headers() 1097 1098 decoded_query = query.replace('+', ' ') 1099 1100 if self.have_fork: 1101 # Unix -- fork as we should 1102 args = [script] 1103 if '=' not in decoded_query: 1104 args.append(decoded_query) 1105 nobody = nobody_uid() 1106 self.wfile.flush() # Always flush before forking 1107 pid = os.fork() 1108 if pid != 0: 1109 # Parent 1110 pid, sts = os.waitpid(pid, 0) 1111 # throw away additional data [see bug #427345] 1112 while select.select([self.rfile], [], [], 0)[0]: 1113 if not self.rfile.read(1): 1114 break 1115 if sts: 1116 self.log_error("CGI script exit status %#x", sts) 1117 return 1118 # Child 1119 try: 1120 try: 1121 os.setuid(nobody) 1122 except OSError: 1123 pass 1124 os.dup2(self.rfile.fileno(), 0) 1125 os.dup2(self.wfile.fileno(), 1) 1126 os.execve(scriptfile, args, env) 1127 except: 1128 self.server.handle_error(self.request, self.client_address) 1129 os._exit(127) 1130 1131 else: 1132 # Non-Unix -- use subprocess 1133 import subprocess 1134 cmdline = [scriptfile] 1135 if self.is_python(scriptfile): 1136 interp = sys.executable 1137 if interp.lower().endswith("w.exe"): 1138 # On Windows, use python.exe, not pythonw.exe 1139 interp = interp[:-5] + interp[-4:] 1140 cmdline = [interp, '-u'] + cmdline 1141 if '=' not in query: 1142 cmdline.append(query) 1143 self.log_message("command: %s", subprocess.list2cmdline(cmdline)) 1144 try: 1145 nbytes = int(length) 1146 except (TypeError, ValueError): 1147 nbytes = 0 1148 p = subprocess.Popen(cmdline, 1149 stdin=subprocess.PIPE, 1150 stdout=subprocess.PIPE, 1151 stderr=subprocess.PIPE, 1152 env = env 1153 ) 1154 if self.command.lower() == "post" and nbytes > 0: 1155 data = self.rfile.read(nbytes) 1156 else: 1157 data = None 1158 # throw away additional data [see bug #427345] 1159 while select.select([self.rfile._sock], [], [], 0)[0]: 1160 if not self.rfile._sock.recv(1): 1161 break 1162 stdout, stderr = p.communicate(data) 1163 self.wfile.write(stdout) 1164 if stderr: 1165 self.log_error('%s', stderr) 1166 p.stderr.close() 1167 p.stdout.close() 1168 status = p.returncode 1169 if status: 1170 self.log_error("CGI script exit status %#x", status) 1171 else: 1172 self.log_message("CGI script exited OK") 1173 1174 1175def test(HandlerClass=BaseHTTPRequestHandler, 1176 ServerClass=HTTPServer, protocol="HTTP/1.0", port=8000, bind=""): 1177 """Test the HTTP request handler class. 1178 1179 This runs an HTTP server on port 8000 (or the port argument). 1180 1181 """ 1182 server_address = (bind, port) 1183 1184 HandlerClass.protocol_version = protocol 1185 with ServerClass(server_address, HandlerClass) as httpd: 1186 sa = httpd.socket.getsockname() 1187 serve_message = "Serving HTTP on {host} port {port} (http://{host}:{port}/) ..." 1188 print(serve_message.format(host=sa[0], port=sa[1])) 1189 try: 1190 httpd.serve_forever() 1191 except KeyboardInterrupt: 1192 print("\nKeyboard interrupt received, exiting.") 1193 sys.exit(0) 1194 1195if __name__ == '__main__': 1196 parser = argparse.ArgumentParser() 1197 parser.add_argument('--cgi', action='store_true', 1198 help='Run as CGI Server') 1199 parser.add_argument('--bind', '-b', default='', metavar='ADDRESS', 1200 help='Specify alternate bind address ' 1201 '[default: all interfaces]') 1202 parser.add_argument('port', action='store', 1203 default=8000, type=int, 1204 nargs='?', 1205 help='Specify alternate port [default: 8000]') 1206 args = parser.parse_args() 1207 if args.cgi: 1208 handler_class = CGIHTTPRequestHandler 1209 else: 1210 handler_class = SimpleHTTPRequestHandler 1211 test(HandlerClass=handler_class, port=args.port, bind=args.bind) 1212