1"""Base classes for server/gateway implementations""" 2 3from .util import FileWrapper, guess_scheme, is_hop_by_hop 4from .headers import Headers 5 6import sys, os, time 7 8__all__ = [ 9 'BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler', 10 'IISCGIHandler', 'read_environ' 11] 12 13# Weekday and month names for HTTP date/time formatting; always English! 14_weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] 15_monthname = [None, # Dummy so we can use 1-based month numbers 16 "Jan", "Feb", "Mar", "Apr", "May", "Jun", 17 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] 18 19def format_date_time(timestamp): 20 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp) 21 return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( 22 _weekdayname[wd], day, _monthname[month], year, hh, mm, ss 23 ) 24 25_is_request = { 26 'SCRIPT_NAME', 'PATH_INFO', 'QUERY_STRING', 'REQUEST_METHOD', 'AUTH_TYPE', 27 'CONTENT_TYPE', 'CONTENT_LENGTH', 'HTTPS', 'REMOTE_USER', 'REMOTE_IDENT', 28}.__contains__ 29 30def _needs_transcode(k): 31 return _is_request(k) or k.startswith('HTTP_') or k.startswith('SSL_') \ 32 or (k.startswith('REDIRECT_') and _needs_transcode(k[9:])) 33 34def read_environ(): 35 """Read environment, fixing HTTP variables""" 36 enc = sys.getfilesystemencoding() 37 esc = 'surrogateescape' 38 try: 39 ''.encode('utf-8', esc) 40 except LookupError: 41 esc = 'replace' 42 environ = {} 43 44 # Take the basic environment from native-unicode os.environ. Attempt to 45 # fix up the variables that come from the HTTP request to compensate for 46 # the bytes->unicode decoding step that will already have taken place. 47 for k, v in os.environ.items(): 48 if _needs_transcode(k): 49 50 # On win32, the os.environ is natively Unicode. Different servers 51 # decode the request bytes using different encodings. 52 if sys.platform == 'win32': 53 software = os.environ.get('SERVER_SOFTWARE', '').lower() 54 55 # On IIS, the HTTP request will be decoded as UTF-8 as long 56 # as the input is a valid UTF-8 sequence. Otherwise it is 57 # decoded using the system code page (mbcs), with no way to 58 # detect this has happened. Because UTF-8 is the more likely 59 # encoding, and mbcs is inherently unreliable (an mbcs string 60 # that happens to be valid UTF-8 will not be decoded as mbcs) 61 # always recreate the original bytes as UTF-8. 62 if software.startswith('microsoft-iis/'): 63 v = v.encode('utf-8').decode('iso-8859-1') 64 65 # Apache mod_cgi writes bytes-as-unicode (as if ISO-8859-1) direct 66 # to the Unicode environ. No modification needed. 67 elif software.startswith('apache/'): 68 pass 69 70 # Python 3's http.server.CGIHTTPRequestHandler decodes 71 # using the urllib.unquote default of UTF-8, amongst other 72 # issues. 73 elif ( 74 software.startswith('simplehttp/') 75 and 'python/3' in software 76 ): 77 v = v.encode('utf-8').decode('iso-8859-1') 78 79 # For other servers, guess that they have written bytes to 80 # the environ using stdio byte-oriented interfaces, ending up 81 # with the system code page. 82 else: 83 v = v.encode(enc, 'replace').decode('iso-8859-1') 84 85 # Recover bytes from unicode environ, using surrogate escapes 86 # where available (Python 3.1+). 87 else: 88 v = v.encode(enc, esc).decode('iso-8859-1') 89 90 environ[k] = v 91 return environ 92 93 94class BaseHandler: 95 """Manage the invocation of a WSGI application""" 96 97 # Configuration parameters; can override per-subclass or per-instance 98 wsgi_version = (1,0) 99 wsgi_multithread = True 100 wsgi_multiprocess = True 101 wsgi_run_once = False 102 103 origin_server = True # We are transmitting direct to client 104 http_version = "1.0" # Version that should be used for response 105 server_software = None # String name of server software, if any 106 107 # os_environ is used to supply configuration from the OS environment: 108 # by default it's a copy of 'os.environ' as of import time, but you can 109 # override this in e.g. your __init__ method. 110 os_environ= read_environ() 111 112 # Collaborator classes 113 wsgi_file_wrapper = FileWrapper # set to None to disable 114 headers_class = Headers # must be a Headers-like class 115 116 # Error handling (also per-subclass or per-instance) 117 traceback_limit = None # Print entire traceback to self.get_stderr() 118 error_status = "500 Internal Server Error" 119 error_headers = [('Content-Type','text/plain')] 120 error_body = b"A server error occurred. Please contact the administrator." 121 122 # State variables (don't mess with these) 123 status = result = None 124 headers_sent = False 125 headers = None 126 bytes_sent = 0 127 128 def run(self, application): 129 """Invoke the application""" 130 # Note to self: don't move the close()! Asynchronous servers shouldn't 131 # call close() from finish_response(), so if you close() anywhere but 132 # the double-error branch here, you'll break asynchronous servers by 133 # prematurely closing. Async servers must return from 'run()' without 134 # closing if there might still be output to iterate over. 135 try: 136 self.setup_environ() 137 self.result = application(self.environ, self.start_response) 138 self.finish_response() 139 except (ConnectionAbortedError, BrokenPipeError, ConnectionResetError): 140 # We expect the client to close the connection abruptly from time 141 # to time. 142 return 143 except: 144 try: 145 self.handle_error() 146 except: 147 # If we get an error handling an error, just give up already! 148 self.close() 149 raise # ...and let the actual server figure it out. 150 151 152 def setup_environ(self): 153 """Set up the environment for one request""" 154 155 env = self.environ = self.os_environ.copy() 156 self.add_cgi_vars() 157 158 env['wsgi.input'] = self.get_stdin() 159 env['wsgi.errors'] = self.get_stderr() 160 env['wsgi.version'] = self.wsgi_version 161 env['wsgi.run_once'] = self.wsgi_run_once 162 env['wsgi.url_scheme'] = self.get_scheme() 163 env['wsgi.multithread'] = self.wsgi_multithread 164 env['wsgi.multiprocess'] = self.wsgi_multiprocess 165 166 if self.wsgi_file_wrapper is not None: 167 env['wsgi.file_wrapper'] = self.wsgi_file_wrapper 168 169 if self.origin_server and self.server_software: 170 env.setdefault('SERVER_SOFTWARE',self.server_software) 171 172 173 def finish_response(self): 174 """Send any iterable data, then close self and the iterable 175 176 Subclasses intended for use in asynchronous servers will 177 want to redefine this method, such that it sets up callbacks 178 in the event loop to iterate over the data, and to call 179 'self.close()' once the response is finished. 180 """ 181 try: 182 if not self.result_is_file() or not self.sendfile(): 183 for data in self.result: 184 self.write(data) 185 self.finish_content() 186 except: 187 # Call close() on the iterable returned by the WSGI application 188 # in case of an exception. 189 if hasattr(self.result, 'close'): 190 self.result.close() 191 raise 192 else: 193 # We only call close() when no exception is raised, because it 194 # will set status, result, headers, and environ fields to None. 195 # See bpo-29183 for more details. 196 self.close() 197 198 199 def get_scheme(self): 200 """Return the URL scheme being used""" 201 return guess_scheme(self.environ) 202 203 204 def set_content_length(self): 205 """Compute Content-Length or switch to chunked encoding if possible""" 206 try: 207 blocks = len(self.result) 208 except (TypeError,AttributeError,NotImplementedError): 209 pass 210 else: 211 if blocks==1: 212 self.headers['Content-Length'] = str(self.bytes_sent) 213 return 214 # XXX Try for chunked encoding if origin server and client is 1.1 215 216 217 def cleanup_headers(self): 218 """Make any necessary header changes or defaults 219 220 Subclasses can extend this to add other defaults. 221 """ 222 if 'Content-Length' not in self.headers: 223 self.set_content_length() 224 225 def start_response(self, status, headers,exc_info=None): 226 """'start_response()' callable as specified by PEP 3333""" 227 228 if exc_info: 229 try: 230 if self.headers_sent: 231 raise 232 finally: 233 exc_info = None # avoid dangling circular ref 234 elif self.headers is not None: 235 raise AssertionError("Headers already set!") 236 237 self.status = status 238 self.headers = self.headers_class(headers) 239 status = self._convert_string_type(status, "Status") 240 self._validate_status(status) 241 242 if __debug__: 243 for name, val in headers: 244 name = self._convert_string_type(name, "Header name") 245 val = self._convert_string_type(val, "Header value") 246 assert not is_hop_by_hop(name),\ 247 f"Hop-by-hop header, '{name}: {val}', not allowed" 248 249 return self.write 250 251 def _validate_status(self, status): 252 if len(status) < 4: 253 raise AssertionError("Status must be at least 4 characters") 254 if not status[:3].isdigit(): 255 raise AssertionError("Status message must begin w/3-digit code") 256 if status[3] != " ": 257 raise AssertionError("Status message must have a space after code") 258 259 def _convert_string_type(self, value, title): 260 """Convert/check value type.""" 261 if type(value) is str: 262 return value 263 raise AssertionError( 264 "{0} must be of type str (got {1})".format(title, repr(value)) 265 ) 266 267 def send_preamble(self): 268 """Transmit version/status/date/server, via self._write()""" 269 if self.origin_server: 270 if self.client_is_modern(): 271 self._write(('HTTP/%s %s\r\n' % (self.http_version,self.status)).encode('iso-8859-1')) 272 if 'Date' not in self.headers: 273 self._write( 274 ('Date: %s\r\n' % format_date_time(time.time())).encode('iso-8859-1') 275 ) 276 if self.server_software and 'Server' not in self.headers: 277 self._write(('Server: %s\r\n' % self.server_software).encode('iso-8859-1')) 278 else: 279 self._write(('Status: %s\r\n' % self.status).encode('iso-8859-1')) 280 281 def write(self, data): 282 """'write()' callable as specified by PEP 3333""" 283 284 assert type(data) is bytes, \ 285 "write() argument must be a bytes instance" 286 287 if not self.status: 288 raise AssertionError("write() before start_response()") 289 290 elif not self.headers_sent: 291 # Before the first output, send the stored headers 292 self.bytes_sent = len(data) # make sure we know content-length 293 self.send_headers() 294 else: 295 self.bytes_sent += len(data) 296 297 # XXX check Content-Length and truncate if too many bytes written? 298 self._write(data) 299 self._flush() 300 301 302 def sendfile(self): 303 """Platform-specific file transmission 304 305 Override this method in subclasses to support platform-specific 306 file transmission. It is only called if the application's 307 return iterable ('self.result') is an instance of 308 'self.wsgi_file_wrapper'. 309 310 This method should return a true value if it was able to actually 311 transmit the wrapped file-like object using a platform-specific 312 approach. It should return a false value if normal iteration 313 should be used instead. An exception can be raised to indicate 314 that transmission was attempted, but failed. 315 316 NOTE: this method should call 'self.send_headers()' if 317 'self.headers_sent' is false and it is going to attempt direct 318 transmission of the file. 319 """ 320 return False # No platform-specific transmission by default 321 322 323 def finish_content(self): 324 """Ensure headers and content have both been sent""" 325 if not self.headers_sent: 326 # Only zero Content-Length if not set by the application (so 327 # that HEAD requests can be satisfied properly, see #3839) 328 self.headers.setdefault('Content-Length', "0") 329 self.send_headers() 330 else: 331 pass # XXX check if content-length was too short? 332 333 def close(self): 334 """Close the iterable (if needed) and reset all instance vars 335 336 Subclasses may want to also drop the client connection. 337 """ 338 try: 339 if hasattr(self.result,'close'): 340 self.result.close() 341 finally: 342 self.result = self.headers = self.status = self.environ = None 343 self.bytes_sent = 0; self.headers_sent = False 344 345 346 def send_headers(self): 347 """Transmit headers to the client, via self._write()""" 348 self.cleanup_headers() 349 self.headers_sent = True 350 if not self.origin_server or self.client_is_modern(): 351 self.send_preamble() 352 self._write(bytes(self.headers)) 353 354 355 def result_is_file(self): 356 """True if 'self.result' is an instance of 'self.wsgi_file_wrapper'""" 357 wrapper = self.wsgi_file_wrapper 358 return wrapper is not None and isinstance(self.result,wrapper) 359 360 361 def client_is_modern(self): 362 """True if client can accept status and headers""" 363 return self.environ['SERVER_PROTOCOL'].upper() != 'HTTP/0.9' 364 365 366 def log_exception(self,exc_info): 367 """Log the 'exc_info' tuple in the server log 368 369 Subclasses may override to retarget the output or change its format. 370 """ 371 try: 372 from traceback import print_exception 373 stderr = self.get_stderr() 374 print_exception( 375 exc_info[0], exc_info[1], exc_info[2], 376 self.traceback_limit, stderr 377 ) 378 stderr.flush() 379 finally: 380 exc_info = None 381 382 def handle_error(self): 383 """Log current error, and send error output to client if possible""" 384 self.log_exception(sys.exc_info()) 385 if not self.headers_sent: 386 self.result = self.error_output(self.environ, self.start_response) 387 self.finish_response() 388 # XXX else: attempt advanced recovery techniques for HTML or text? 389 390 def error_output(self, environ, start_response): 391 """WSGI mini-app to create error output 392 393 By default, this just uses the 'error_status', 'error_headers', 394 and 'error_body' attributes to generate an output page. It can 395 be overridden in a subclass to dynamically generate diagnostics, 396 choose an appropriate message for the user's preferred language, etc. 397 398 Note, however, that it's not recommended from a security perspective to 399 spit out diagnostics to any old user; ideally, you should have to do 400 something special to enable diagnostic output, which is why we don't 401 include any here! 402 """ 403 start_response(self.error_status,self.error_headers[:],sys.exc_info()) 404 return [self.error_body] 405 406 407 # Pure abstract methods; *must* be overridden in subclasses 408 409 def _write(self,data): 410 """Override in subclass to buffer data for send to client 411 412 It's okay if this method actually transmits the data; BaseHandler 413 just separates write and flush operations for greater efficiency 414 when the underlying system actually has such a distinction. 415 """ 416 raise NotImplementedError 417 418 def _flush(self): 419 """Override in subclass to force sending of recent '_write()' calls 420 421 It's okay if this method is a no-op (i.e., if '_write()' actually 422 sends the data. 423 """ 424 raise NotImplementedError 425 426 def get_stdin(self): 427 """Override in subclass to return suitable 'wsgi.input'""" 428 raise NotImplementedError 429 430 def get_stderr(self): 431 """Override in subclass to return suitable 'wsgi.errors'""" 432 raise NotImplementedError 433 434 def add_cgi_vars(self): 435 """Override in subclass to insert CGI variables in 'self.environ'""" 436 raise NotImplementedError 437 438 439class SimpleHandler(BaseHandler): 440 """Handler that's just initialized with streams, environment, etc. 441 442 This handler subclass is intended for synchronous HTTP/1.0 origin servers, 443 and handles sending the entire response output, given the correct inputs. 444 445 Usage:: 446 447 handler = SimpleHandler( 448 inp,out,err,env, multithread=False, multiprocess=True 449 ) 450 handler.run(app)""" 451 452 def __init__(self,stdin,stdout,stderr,environ, 453 multithread=True, multiprocess=False 454 ): 455 self.stdin = stdin 456 self.stdout = stdout 457 self.stderr = stderr 458 self.base_env = environ 459 self.wsgi_multithread = multithread 460 self.wsgi_multiprocess = multiprocess 461 462 def get_stdin(self): 463 return self.stdin 464 465 def get_stderr(self): 466 return self.stderr 467 468 def add_cgi_vars(self): 469 self.environ.update(self.base_env) 470 471 def _write(self,data): 472 result = self.stdout.write(data) 473 if result is None or result == len(data): 474 return 475 from warnings import warn 476 warn("SimpleHandler.stdout.write() should not do partial writes", 477 DeprecationWarning) 478 while data := data[result:]: 479 result = self.stdout.write(data) 480 481 def _flush(self): 482 self.stdout.flush() 483 self._flush = self.stdout.flush 484 485 486class BaseCGIHandler(SimpleHandler): 487 488 """CGI-like systems using input/output/error streams and environ mapping 489 490 Usage:: 491 492 handler = BaseCGIHandler(inp,out,err,env) 493 handler.run(app) 494 495 This handler class is useful for gateway protocols like ReadyExec and 496 FastCGI, that have usable input/output/error streams and an environment 497 mapping. It's also the base class for CGIHandler, which just uses 498 sys.stdin, os.environ, and so on. 499 500 The constructor also takes keyword arguments 'multithread' and 501 'multiprocess' (defaulting to 'True' and 'False' respectively) to control 502 the configuration sent to the application. It sets 'origin_server' to 503 False (to enable CGI-like output), and assumes that 'wsgi.run_once' is 504 False. 505 """ 506 507 origin_server = False 508 509 510class CGIHandler(BaseCGIHandler): 511 512 """CGI-based invocation via sys.stdin/stdout/stderr and os.environ 513 514 Usage:: 515 516 CGIHandler().run(app) 517 518 The difference between this class and BaseCGIHandler is that it always 519 uses 'wsgi.run_once' of 'True', 'wsgi.multithread' of 'False', and 520 'wsgi.multiprocess' of 'True'. It does not take any initialization 521 parameters, but always uses 'sys.stdin', 'os.environ', and friends. 522 523 If you need to override any of these parameters, use BaseCGIHandler 524 instead. 525 """ 526 527 wsgi_run_once = True 528 # Do not allow os.environ to leak between requests in Google App Engine 529 # and other multi-run CGI use cases. This is not easily testable. 530 # See http://bugs.python.org/issue7250 531 os_environ = {} 532 533 def __init__(self): 534 BaseCGIHandler.__init__( 535 self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr, 536 read_environ(), multithread=False, multiprocess=True 537 ) 538 539 540class IISCGIHandler(BaseCGIHandler): 541 """CGI-based invocation with workaround for IIS path bug 542 543 This handler should be used in preference to CGIHandler when deploying on 544 Microsoft IIS without having set the config allowPathInfo option (IIS>=7) 545 or metabase allowPathInfoForScriptMappings (IIS<7). 546 """ 547 wsgi_run_once = True 548 os_environ = {} 549 550 # By default, IIS gives a PATH_INFO that duplicates the SCRIPT_NAME at 551 # the front, causing problems for WSGI applications that wish to implement 552 # routing. This handler strips any such duplicated path. 553 554 # IIS can be configured to pass the correct PATH_INFO, but this causes 555 # another bug where PATH_TRANSLATED is wrong. Luckily this variable is 556 # rarely used and is not guaranteed by WSGI. On IIS<7, though, the 557 # setting can only be made on a vhost level, affecting all other script 558 # mappings, many of which break when exposed to the PATH_TRANSLATED bug. 559 # For this reason IIS<7 is almost never deployed with the fix. (Even IIS7 560 # rarely uses it because there is still no UI for it.) 561 562 # There is no way for CGI code to tell whether the option was set, so a 563 # separate handler class is provided. 564 def __init__(self): 565 environ= read_environ() 566 path = environ.get('PATH_INFO', '') 567 script = environ.get('SCRIPT_NAME', '') 568 if (path+'/').startswith(script+'/'): 569 environ['PATH_INFO'] = path[len(script):] 570 BaseCGIHandler.__init__( 571 self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr, 572 environ, multithread=False, multiprocess=True 573 ) 574