• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Base classes for server/gateway implementations"""
2
3from .util import FileWrapper, guess_scheme, is_hop_by_hop
4from .headers import Headers
5
6import sys, os, time
7
8__all__ = [
9    'BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler',
10    'IISCGIHandler', 'read_environ'
11]
12
13# Weekday and month names for HTTP date/time formatting; always English!
14_weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
15_monthname = [None, # Dummy so we can use 1-based month numbers
16              "Jan", "Feb", "Mar", "Apr", "May", "Jun",
17              "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
18
19def format_date_time(timestamp):
20    year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
21    return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
22        _weekdayname[wd], day, _monthname[month], year, hh, mm, ss
23    )
24
25_is_request = {
26    'SCRIPT_NAME', 'PATH_INFO', 'QUERY_STRING', 'REQUEST_METHOD', 'AUTH_TYPE',
27    'CONTENT_TYPE', 'CONTENT_LENGTH', 'HTTPS', 'REMOTE_USER', 'REMOTE_IDENT',
28}.__contains__
29
30def _needs_transcode(k):
31    return _is_request(k) or k.startswith('HTTP_') or k.startswith('SSL_') \
32        or (k.startswith('REDIRECT_') and _needs_transcode(k[9:]))
33
34def read_environ():
35    """Read environment, fixing HTTP variables"""
36    enc = sys.getfilesystemencoding()
37    esc = 'surrogateescape'
38    try:
39        ''.encode('utf-8', esc)
40    except LookupError:
41        esc = 'replace'
42    environ = {}
43
44    # Take the basic environment from native-unicode os.environ. Attempt to
45    # fix up the variables that come from the HTTP request to compensate for
46    # the bytes->unicode decoding step that will already have taken place.
47    for k, v in os.environ.items():
48        if _needs_transcode(k):
49
50            # On win32, the os.environ is natively Unicode. Different servers
51            # decode the request bytes using different encodings.
52            if sys.platform == 'win32':
53                software = os.environ.get('SERVER_SOFTWARE', '').lower()
54
55                # On IIS, the HTTP request will be decoded as UTF-8 as long
56                # as the input is a valid UTF-8 sequence. Otherwise it is
57                # decoded using the system code page (mbcs), with no way to
58                # detect this has happened. Because UTF-8 is the more likely
59                # encoding, and mbcs is inherently unreliable (an mbcs string
60                # that happens to be valid UTF-8 will not be decoded as mbcs)
61                # always recreate the original bytes as UTF-8.
62                if software.startswith('microsoft-iis/'):
63                    v = v.encode('utf-8').decode('iso-8859-1')
64
65                # Apache mod_cgi writes bytes-as-unicode (as if ISO-8859-1) direct
66                # to the Unicode environ. No modification needed.
67                elif software.startswith('apache/'):
68                    pass
69
70                # Python 3's http.server.CGIHTTPRequestHandler decodes
71                # using the urllib.unquote default of UTF-8, amongst other
72                # issues.
73                elif (
74                    software.startswith('simplehttp/')
75                    and 'python/3' in software
76                ):
77                    v = v.encode('utf-8').decode('iso-8859-1')
78
79                # For other servers, guess that they have written bytes to
80                # the environ using stdio byte-oriented interfaces, ending up
81                # with the system code page.
82                else:
83                    v = v.encode(enc, 'replace').decode('iso-8859-1')
84
85            # Recover bytes from unicode environ, using surrogate escapes
86            # where available (Python 3.1+).
87            else:
88                v = v.encode(enc, esc).decode('iso-8859-1')
89
90        environ[k] = v
91    return environ
92
93
94class BaseHandler:
95    """Manage the invocation of a WSGI application"""
96
97    # Configuration parameters; can override per-subclass or per-instance
98    wsgi_version = (1,0)
99    wsgi_multithread = True
100    wsgi_multiprocess = True
101    wsgi_run_once = False
102
103    origin_server = True    # We are transmitting direct to client
104    http_version  = "1.0"   # Version that should be used for response
105    server_software = None  # String name of server software, if any
106
107    # os_environ is used to supply configuration from the OS environment:
108    # by default it's a copy of 'os.environ' as of import time, but you can
109    # override this in e.g. your __init__ method.
110    os_environ= read_environ()
111
112    # Collaborator classes
113    wsgi_file_wrapper = FileWrapper     # set to None to disable
114    headers_class = Headers             # must be a Headers-like class
115
116    # Error handling (also per-subclass or per-instance)
117    traceback_limit = None  # Print entire traceback to self.get_stderr()
118    error_status = "500 Internal Server Error"
119    error_headers = [('Content-Type','text/plain')]
120    error_body = b"A server error occurred.  Please contact the administrator."
121
122    # State variables (don't mess with these)
123    status = result = None
124    headers_sent = False
125    headers = None
126    bytes_sent = 0
127
128    def run(self, application):
129        """Invoke the application"""
130        # Note to self: don't move the close()!  Asynchronous servers shouldn't
131        # call close() from finish_response(), so if you close() anywhere but
132        # the double-error branch here, you'll break asynchronous servers by
133        # prematurely closing.  Async servers must return from 'run()' without
134        # closing if there might still be output to iterate over.
135        try:
136            self.setup_environ()
137            self.result = application(self.environ, self.start_response)
138            self.finish_response()
139        except (ConnectionAbortedError, BrokenPipeError, ConnectionResetError):
140            # We expect the client to close the connection abruptly from time
141            # to time.
142            return
143        except:
144            try:
145                self.handle_error()
146            except:
147                # If we get an error handling an error, just give up already!
148                self.close()
149                raise   # ...and let the actual server figure it out.
150
151
152    def setup_environ(self):
153        """Set up the environment for one request"""
154
155        env = self.environ = self.os_environ.copy()
156        self.add_cgi_vars()
157
158        env['wsgi.input']        = self.get_stdin()
159        env['wsgi.errors']       = self.get_stderr()
160        env['wsgi.version']      = self.wsgi_version
161        env['wsgi.run_once']     = self.wsgi_run_once
162        env['wsgi.url_scheme']   = self.get_scheme()
163        env['wsgi.multithread']  = self.wsgi_multithread
164        env['wsgi.multiprocess'] = self.wsgi_multiprocess
165
166        if self.wsgi_file_wrapper is not None:
167            env['wsgi.file_wrapper'] = self.wsgi_file_wrapper
168
169        if self.origin_server and self.server_software:
170            env.setdefault('SERVER_SOFTWARE',self.server_software)
171
172
173    def finish_response(self):
174        """Send any iterable data, then close self and the iterable
175
176        Subclasses intended for use in asynchronous servers will
177        want to redefine this method, such that it sets up callbacks
178        in the event loop to iterate over the data, and to call
179        'self.close()' once the response is finished.
180        """
181        try:
182            if not self.result_is_file() or not self.sendfile():
183                for data in self.result:
184                    self.write(data)
185                self.finish_content()
186        except:
187            # Call close() on the iterable returned by the WSGI application
188            # in case of an exception.
189            if hasattr(self.result, 'close'):
190                self.result.close()
191            raise
192        else:
193            # We only call close() when no exception is raised, because it
194            # will set status, result, headers, and environ fields to None.
195            # See bpo-29183 for more details.
196            self.close()
197
198
199    def get_scheme(self):
200        """Return the URL scheme being used"""
201        return guess_scheme(self.environ)
202
203
204    def set_content_length(self):
205        """Compute Content-Length or switch to chunked encoding if possible"""
206        try:
207            blocks = len(self.result)
208        except (TypeError,AttributeError,NotImplementedError):
209            pass
210        else:
211            if blocks==1:
212                self.headers['Content-Length'] = str(self.bytes_sent)
213                return
214        # XXX Try for chunked encoding if origin server and client is 1.1
215
216
217    def cleanup_headers(self):
218        """Make any necessary header changes or defaults
219
220        Subclasses can extend this to add other defaults.
221        """
222        if 'Content-Length' not in self.headers:
223            self.set_content_length()
224
225    def start_response(self, status, headers,exc_info=None):
226        """'start_response()' callable as specified by PEP 3333"""
227
228        if exc_info:
229            try:
230                if self.headers_sent:
231                    raise
232            finally:
233                exc_info = None        # avoid dangling circular ref
234        elif self.headers is not None:
235            raise AssertionError("Headers already set!")
236
237        self.status = status
238        self.headers = self.headers_class(headers)
239        status = self._convert_string_type(status, "Status")
240        self._validate_status(status)
241
242        if __debug__:
243            for name, val in headers:
244                name = self._convert_string_type(name, "Header name")
245                val = self._convert_string_type(val, "Header value")
246                assert not is_hop_by_hop(name),\
247                       f"Hop-by-hop header, '{name}: {val}', not allowed"
248
249        return self.write
250
251    def _validate_status(self, status):
252        if len(status) < 4:
253            raise AssertionError("Status must be at least 4 characters")
254        if not status[:3].isdigit():
255            raise AssertionError("Status message must begin w/3-digit code")
256        if status[3] != " ":
257            raise AssertionError("Status message must have a space after code")
258
259    def _convert_string_type(self, value, title):
260        """Convert/check value type."""
261        if type(value) is str:
262            return value
263        raise AssertionError(
264            "{0} must be of type str (got {1})".format(title, repr(value))
265        )
266
267    def send_preamble(self):
268        """Transmit version/status/date/server, via self._write()"""
269        if self.origin_server:
270            if self.client_is_modern():
271                self._write(('HTTP/%s %s\r\n' % (self.http_version,self.status)).encode('iso-8859-1'))
272                if 'Date' not in self.headers:
273                    self._write(
274                        ('Date: %s\r\n' % format_date_time(time.time())).encode('iso-8859-1')
275                    )
276                if self.server_software and 'Server' not in self.headers:
277                    self._write(('Server: %s\r\n' % self.server_software).encode('iso-8859-1'))
278        else:
279            self._write(('Status: %s\r\n' % self.status).encode('iso-8859-1'))
280
281    def write(self, data):
282        """'write()' callable as specified by PEP 3333"""
283
284        assert type(data) is bytes, \
285            "write() argument must be a bytes instance"
286
287        if not self.status:
288            raise AssertionError("write() before start_response()")
289
290        elif not self.headers_sent:
291            # Before the first output, send the stored headers
292            self.bytes_sent = len(data)    # make sure we know content-length
293            self.send_headers()
294        else:
295            self.bytes_sent += len(data)
296
297        # XXX check Content-Length and truncate if too many bytes written?
298        self._write(data)
299        self._flush()
300
301
302    def sendfile(self):
303        """Platform-specific file transmission
304
305        Override this method in subclasses to support platform-specific
306        file transmission.  It is only called if the application's
307        return iterable ('self.result') is an instance of
308        'self.wsgi_file_wrapper'.
309
310        This method should return a true value if it was able to actually
311        transmit the wrapped file-like object using a platform-specific
312        approach.  It should return a false value if normal iteration
313        should be used instead.  An exception can be raised to indicate
314        that transmission was attempted, but failed.
315
316        NOTE: this method should call 'self.send_headers()' if
317        'self.headers_sent' is false and it is going to attempt direct
318        transmission of the file.
319        """
320        return False   # No platform-specific transmission by default
321
322
323    def finish_content(self):
324        """Ensure headers and content have both been sent"""
325        if not self.headers_sent:
326            # Only zero Content-Length if not set by the application (so
327            # that HEAD requests can be satisfied properly, see #3839)
328            self.headers.setdefault('Content-Length', "0")
329            self.send_headers()
330        else:
331            pass # XXX check if content-length was too short?
332
333    def close(self):
334        """Close the iterable (if needed) and reset all instance vars
335
336        Subclasses may want to also drop the client connection.
337        """
338        try:
339            if hasattr(self.result,'close'):
340                self.result.close()
341        finally:
342            self.result = self.headers = self.status = self.environ = None
343            self.bytes_sent = 0; self.headers_sent = False
344
345
346    def send_headers(self):
347        """Transmit headers to the client, via self._write()"""
348        self.cleanup_headers()
349        self.headers_sent = True
350        if not self.origin_server or self.client_is_modern():
351            self.send_preamble()
352            self._write(bytes(self.headers))
353
354
355    def result_is_file(self):
356        """True if 'self.result' is an instance of 'self.wsgi_file_wrapper'"""
357        wrapper = self.wsgi_file_wrapper
358        return wrapper is not None and isinstance(self.result,wrapper)
359
360
361    def client_is_modern(self):
362        """True if client can accept status and headers"""
363        return self.environ['SERVER_PROTOCOL'].upper() != 'HTTP/0.9'
364
365
366    def log_exception(self,exc_info):
367        """Log the 'exc_info' tuple in the server log
368
369        Subclasses may override to retarget the output or change its format.
370        """
371        try:
372            from traceback import print_exception
373            stderr = self.get_stderr()
374            print_exception(
375                exc_info[0], exc_info[1], exc_info[2],
376                self.traceback_limit, stderr
377            )
378            stderr.flush()
379        finally:
380            exc_info = None
381
382    def handle_error(self):
383        """Log current error, and send error output to client if possible"""
384        self.log_exception(sys.exc_info())
385        if not self.headers_sent:
386            self.result = self.error_output(self.environ, self.start_response)
387            self.finish_response()
388        # XXX else: attempt advanced recovery techniques for HTML or text?
389
390    def error_output(self, environ, start_response):
391        """WSGI mini-app to create error output
392
393        By default, this just uses the 'error_status', 'error_headers',
394        and 'error_body' attributes to generate an output page.  It can
395        be overridden in a subclass to dynamically generate diagnostics,
396        choose an appropriate message for the user's preferred language, etc.
397
398        Note, however, that it's not recommended from a security perspective to
399        spit out diagnostics to any old user; ideally, you should have to do
400        something special to enable diagnostic output, which is why we don't
401        include any here!
402        """
403        start_response(self.error_status,self.error_headers[:],sys.exc_info())
404        return [self.error_body]
405
406
407    # Pure abstract methods; *must* be overridden in subclasses
408
409    def _write(self,data):
410        """Override in subclass to buffer data for send to client
411
412        It's okay if this method actually transmits the data; BaseHandler
413        just separates write and flush operations for greater efficiency
414        when the underlying system actually has such a distinction.
415        """
416        raise NotImplementedError
417
418    def _flush(self):
419        """Override in subclass to force sending of recent '_write()' calls
420
421        It's okay if this method is a no-op (i.e., if '_write()' actually
422        sends the data.
423        """
424        raise NotImplementedError
425
426    def get_stdin(self):
427        """Override in subclass to return suitable 'wsgi.input'"""
428        raise NotImplementedError
429
430    def get_stderr(self):
431        """Override in subclass to return suitable 'wsgi.errors'"""
432        raise NotImplementedError
433
434    def add_cgi_vars(self):
435        """Override in subclass to insert CGI variables in 'self.environ'"""
436        raise NotImplementedError
437
438
439class SimpleHandler(BaseHandler):
440    """Handler that's just initialized with streams, environment, etc.
441
442    This handler subclass is intended for synchronous HTTP/1.0 origin servers,
443    and handles sending the entire response output, given the correct inputs.
444
445    Usage::
446
447        handler = SimpleHandler(
448            inp,out,err,env, multithread=False, multiprocess=True
449        )
450        handler.run(app)"""
451
452    def __init__(self,stdin,stdout,stderr,environ,
453        multithread=True, multiprocess=False
454    ):
455        self.stdin = stdin
456        self.stdout = stdout
457        self.stderr = stderr
458        self.base_env = environ
459        self.wsgi_multithread = multithread
460        self.wsgi_multiprocess = multiprocess
461
462    def get_stdin(self):
463        return self.stdin
464
465    def get_stderr(self):
466        return self.stderr
467
468    def add_cgi_vars(self):
469        self.environ.update(self.base_env)
470
471    def _write(self,data):
472        result = self.stdout.write(data)
473        if result is None or result == len(data):
474            return
475        from warnings import warn
476        warn("SimpleHandler.stdout.write() should not do partial writes",
477            DeprecationWarning)
478        while data := data[result:]:
479            result = self.stdout.write(data)
480
481    def _flush(self):
482        self.stdout.flush()
483        self._flush = self.stdout.flush
484
485
486class BaseCGIHandler(SimpleHandler):
487
488    """CGI-like systems using input/output/error streams and environ mapping
489
490    Usage::
491
492        handler = BaseCGIHandler(inp,out,err,env)
493        handler.run(app)
494
495    This handler class is useful for gateway protocols like ReadyExec and
496    FastCGI, that have usable input/output/error streams and an environment
497    mapping.  It's also the base class for CGIHandler, which just uses
498    sys.stdin, os.environ, and so on.
499
500    The constructor also takes keyword arguments 'multithread' and
501    'multiprocess' (defaulting to 'True' and 'False' respectively) to control
502    the configuration sent to the application.  It sets 'origin_server' to
503    False (to enable CGI-like output), and assumes that 'wsgi.run_once' is
504    False.
505    """
506
507    origin_server = False
508
509
510class CGIHandler(BaseCGIHandler):
511
512    """CGI-based invocation via sys.stdin/stdout/stderr and os.environ
513
514    Usage::
515
516        CGIHandler().run(app)
517
518    The difference between this class and BaseCGIHandler is that it always
519    uses 'wsgi.run_once' of 'True', 'wsgi.multithread' of 'False', and
520    'wsgi.multiprocess' of 'True'.  It does not take any initialization
521    parameters, but always uses 'sys.stdin', 'os.environ', and friends.
522
523    If you need to override any of these parameters, use BaseCGIHandler
524    instead.
525    """
526
527    wsgi_run_once = True
528    # Do not allow os.environ to leak between requests in Google App Engine
529    # and other multi-run CGI use cases.  This is not easily testable.
530    # See http://bugs.python.org/issue7250
531    os_environ = {}
532
533    def __init__(self):
534        BaseCGIHandler.__init__(
535            self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
536            read_environ(), multithread=False, multiprocess=True
537        )
538
539
540class IISCGIHandler(BaseCGIHandler):
541    """CGI-based invocation with workaround for IIS path bug
542
543    This handler should be used in preference to CGIHandler when deploying on
544    Microsoft IIS without having set the config allowPathInfo option (IIS>=7)
545    or metabase allowPathInfoForScriptMappings (IIS<7).
546    """
547    wsgi_run_once = True
548    os_environ = {}
549
550    # By default, IIS gives a PATH_INFO that duplicates the SCRIPT_NAME at
551    # the front, causing problems for WSGI applications that wish to implement
552    # routing. This handler strips any such duplicated path.
553
554    # IIS can be configured to pass the correct PATH_INFO, but this causes
555    # another bug where PATH_TRANSLATED is wrong. Luckily this variable is
556    # rarely used and is not guaranteed by WSGI. On IIS<7, though, the
557    # setting can only be made on a vhost level, affecting all other script
558    # mappings, many of which break when exposed to the PATH_TRANSLATED bug.
559    # For this reason IIS<7 is almost never deployed with the fix. (Even IIS7
560    # rarely uses it because there is still no UI for it.)
561
562    # There is no way for CGI code to tell whether the option was set, so a
563    # separate handler class is provided.
564    def __init__(self):
565        environ= read_environ()
566        path = environ.get('PATH_INFO', '')
567        script = environ.get('SCRIPT_NAME', '')
568        if (path+'/').startswith(script+'/'):
569            environ['PATH_INFO'] = path[len(script):]
570        BaseCGIHandler.__init__(
571            self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
572            environ, multithread=False, multiprocess=True
573        )
574