• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""CGI-savvy HTTP Server.
2
3This module builds on SimpleHTTPServer by implementing GET and POST
4requests to cgi-bin scripts.
5
6If the os.fork() function is not present (e.g. on Windows),
7os.popen2() is used as a fallback, with slightly altered semantics; if
8that function is not present either (e.g. on Macintosh), only Python
9scripts are supported, and they are executed by the current process.
10
11In all cases, the implementation is intentionally naive -- all
12requests are executed sychronously.
13
14SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
15-- it may execute arbitrary Python code or external programs.
16
17Note that status code 200 is sent prior to execution of a CGI script, so
18scripts cannot send other status codes such as 302 (redirect).
19"""
20
21
22__version__ = "0.4"
23
24__all__ = ["CGIHTTPRequestHandler"]
25
26import os
27import sys
28import urllib
29import BaseHTTPServer
30import SimpleHTTPServer
31import select
32import copy
33
34
35class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
36
37    """Complete HTTP server with GET, HEAD and POST commands.
38
39    GET and HEAD also support running CGI scripts.
40
41    The POST command is *only* implemented for CGI scripts.
42
43    """
44
45    # Determine platform specifics
46    have_fork = hasattr(os, 'fork')
47    have_popen2 = hasattr(os, 'popen2')
48    have_popen3 = hasattr(os, 'popen3')
49
50    # Make rfile unbuffered -- we need to read one line and then pass
51    # the rest to a subprocess, so we can't use buffered input.
52    rbufsize = 0
53
54    def do_POST(self):
55        """Serve a POST request.
56
57        This is only implemented for CGI scripts.
58
59        """
60
61        if self.is_cgi():
62            self.run_cgi()
63        else:
64            self.send_error(501, "Can only POST to CGI scripts")
65
66    def send_head(self):
67        """Version of send_head that support CGI scripts"""
68        if self.is_cgi():
69            return self.run_cgi()
70        else:
71            return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
72
73    def is_cgi(self):
74        """Test whether self.path corresponds to a CGI script.
75
76        Returns True and updates the cgi_info attribute to the tuple
77        (dir, rest) if self.path requires running a CGI script.
78        Returns False otherwise.
79
80        If any exception is raised, the caller should assume that
81        self.path was rejected as invalid and act accordingly.
82
83        The default implementation tests whether the normalized url
84        path begins with one of the strings in self.cgi_directories
85        (and the next character is a '/' or the end of the string).
86        """
87        splitpath = _url_collapse_path_split(self.path)
88        if splitpath[0] in self.cgi_directories:
89            self.cgi_info = splitpath
90            return True
91        return False
92
93    cgi_directories = ['/cgi-bin', '/htbin']
94
95    def is_executable(self, path):
96        """Test whether argument path is an executable file."""
97        return executable(path)
98
99    def is_python(self, path):
100        """Test whether argument path is a Python script."""
101        head, tail = os.path.splitext(path)
102        return tail.lower() in (".py", ".pyw")
103
104    def run_cgi(self):
105        """Execute a CGI script."""
106        path = self.path
107        dir, rest = self.cgi_info
108
109        i = path.find('/', len(dir) + 1)
110        while i >= 0:
111            nextdir = path[:i]
112            nextrest = path[i+1:]
113
114            scriptdir = self.translate_path(nextdir)
115            if os.path.isdir(scriptdir):
116                dir, rest = nextdir, nextrest
117                i = path.find('/', len(dir) + 1)
118            else:
119                break
120
121        # find an explicit query string, if present.
122        i = rest.rfind('?')
123        if i >= 0:
124            rest, query = rest[:i], rest[i+1:]
125        else:
126            query = ''
127
128        # dissect the part after the directory name into a script name &
129        # a possible additional path, to be stored in PATH_INFO.
130        i = rest.find('/')
131        if i >= 0:
132            script, rest = rest[:i], rest[i:]
133        else:
134            script, rest = rest, ''
135
136        scriptname = dir + '/' + script
137        scriptfile = self.translate_path(scriptname)
138        if not os.path.exists(scriptfile):
139            self.send_error(404, "No such CGI script (%r)" % scriptname)
140            return
141        if not os.path.isfile(scriptfile):
142            self.send_error(403, "CGI script is not a plain file (%r)" %
143                            scriptname)
144            return
145        ispy = self.is_python(scriptname)
146        if not ispy:
147            if not (self.have_fork or self.have_popen2 or self.have_popen3):
148                self.send_error(403, "CGI script is not a Python script (%r)" %
149                                scriptname)
150                return
151            if not self.is_executable(scriptfile):
152                self.send_error(403, "CGI script is not executable (%r)" %
153                                scriptname)
154                return
155
156        # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
157        # XXX Much of the following could be prepared ahead of time!
158        env = copy.deepcopy(os.environ)
159        env['SERVER_SOFTWARE'] = self.version_string()
160        env['SERVER_NAME'] = self.server.server_name
161        env['GATEWAY_INTERFACE'] = 'CGI/1.1'
162        env['SERVER_PROTOCOL'] = self.protocol_version
163        env['SERVER_PORT'] = str(self.server.server_port)
164        env['REQUEST_METHOD'] = self.command
165        uqrest = urllib.unquote(rest)
166        env['PATH_INFO'] = uqrest
167        env['PATH_TRANSLATED'] = self.translate_path(uqrest)
168        env['SCRIPT_NAME'] = scriptname
169        if query:
170            env['QUERY_STRING'] = query
171        host = self.address_string()
172        if host != self.client_address[0]:
173            env['REMOTE_HOST'] = host
174        env['REMOTE_ADDR'] = self.client_address[0]
175        authorization = self.headers.getheader("authorization")
176        if authorization:
177            authorization = authorization.split()
178            if len(authorization) == 2:
179                import base64, binascii
180                env['AUTH_TYPE'] = authorization[0]
181                if authorization[0].lower() == "basic":
182                    try:
183                        authorization = base64.decodestring(authorization[1])
184                    except binascii.Error:
185                        pass
186                    else:
187                        authorization = authorization.split(':')
188                        if len(authorization) == 2:
189                            env['REMOTE_USER'] = authorization[0]
190        # XXX REMOTE_IDENT
191        if self.headers.typeheader is None:
192            env['CONTENT_TYPE'] = self.headers.type
193        else:
194            env['CONTENT_TYPE'] = self.headers.typeheader
195        length = self.headers.getheader('content-length')
196        if length:
197            env['CONTENT_LENGTH'] = length
198        referer = self.headers.getheader('referer')
199        if referer:
200            env['HTTP_REFERER'] = referer
201        accept = []
202        for line in self.headers.getallmatchingheaders('accept'):
203            if line[:1] in "\t\n\r ":
204                accept.append(line.strip())
205            else:
206                accept = accept + line[7:].split(',')
207        env['HTTP_ACCEPT'] = ','.join(accept)
208        ua = self.headers.getheader('user-agent')
209        if ua:
210            env['HTTP_USER_AGENT'] = ua
211        co = filter(None, self.headers.getheaders('cookie'))
212        if co:
213            env['HTTP_COOKIE'] = ', '.join(co)
214        # XXX Other HTTP_* headers
215        # Since we're setting the env in the parent, provide empty
216        # values to override previously set values
217        for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
218                  'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
219            env.setdefault(k, "")
220
221        self.send_response(200, "Script output follows")
222
223        decoded_query = query.replace('+', ' ')
224
225        if self.have_fork:
226            # Unix -- fork as we should
227            args = [script]
228            if '=' not in decoded_query:
229                args.append(decoded_query)
230            nobody = nobody_uid()
231            self.wfile.flush() # Always flush before forking
232            pid = os.fork()
233            if pid != 0:
234                # Parent
235                pid, sts = os.waitpid(pid, 0)
236                # throw away additional data [see bug #427345]
237                while select.select([self.rfile], [], [], 0)[0]:
238                    if not self.rfile.read(1):
239                        break
240                if sts:
241                    self.log_error("CGI script exit status %#x", sts)
242                return
243            # Child
244            try:
245                try:
246                    os.setuid(nobody)
247                except os.error:
248                    pass
249                os.dup2(self.rfile.fileno(), 0)
250                os.dup2(self.wfile.fileno(), 1)
251                os.execve(scriptfile, args, env)
252            except:
253                self.server.handle_error(self.request, self.client_address)
254                os._exit(127)
255
256        else:
257            # Non Unix - use subprocess
258            import subprocess
259            cmdline = [scriptfile]
260            if self.is_python(scriptfile):
261                interp = sys.executable
262                if interp.lower().endswith("w.exe"):
263                    # On Windows, use python.exe, not pythonw.exe
264                    interp = interp[:-5] + interp[-4:]
265                cmdline = [interp, '-u'] + cmdline
266            if '=' not in query:
267                cmdline.append(query)
268
269            self.log_message("command: %s", subprocess.list2cmdline(cmdline))
270            try:
271                nbytes = int(length)
272            except (TypeError, ValueError):
273                nbytes = 0
274            p = subprocess.Popen(cmdline,
275                                 stdin = subprocess.PIPE,
276                                 stdout = subprocess.PIPE,
277                                 stderr = subprocess.PIPE,
278                                 env = env
279                                )
280            if self.command.lower() == "post" and nbytes > 0:
281                data = self.rfile.read(nbytes)
282            else:
283                data = None
284            # throw away additional data [see bug #427345]
285            while select.select([self.rfile._sock], [], [], 0)[0]:
286                if not self.rfile._sock.recv(1):
287                    break
288            stdout, stderr = p.communicate(data)
289            self.wfile.write(stdout)
290            if stderr:
291                self.log_error('%s', stderr)
292            p.stderr.close()
293            p.stdout.close()
294            status = p.returncode
295            if status:
296                self.log_error("CGI script exit status %#x", status)
297            else:
298                self.log_message("CGI script exited OK")
299
300
301# TODO(gregory.p.smith): Move this into an appropriate library.
302def _url_collapse_path_split(path):
303    """
304    Given a URL path, remove extra '/'s and '.' path elements and collapse
305    any '..' references.
306
307    Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
308
309    Returns: A tuple of (head, tail) where tail is everything after the final /
310    and head is everything before it.  Head will always start with a '/' and,
311    if it contains anything else, never have a trailing '/'.
312
313    Raises: IndexError if too many '..' occur within the path.
314    """
315    # Similar to os.path.split(os.path.normpath(path)) but specific to URL
316    # path semantics rather than local operating system semantics.
317    path_parts = []
318    for part in path.split('/'):
319        if part == '.':
320            path_parts.append('')
321        else:
322            path_parts.append(part)
323    # Filter out blank non trailing parts before consuming the '..'.
324    path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
325    if path_parts:
326        tail_part = path_parts.pop()
327    else:
328        tail_part = ''
329    head_parts = []
330    for part in path_parts:
331        if part == '..':
332            head_parts.pop()
333        else:
334            head_parts.append(part)
335    if tail_part and tail_part == '..':
336        head_parts.pop()
337        tail_part = ''
338    return ('/' + '/'.join(head_parts), tail_part)
339
340
341nobody = None
342
343def nobody_uid():
344    """Internal routine to get nobody's uid"""
345    global nobody
346    if nobody:
347        return nobody
348    try:
349        import pwd
350    except ImportError:
351        return -1
352    try:
353        nobody = pwd.getpwnam('nobody')[2]
354    except KeyError:
355        nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
356    return nobody
357
358
359def executable(path):
360    """Test for executable file."""
361    try:
362        st = os.stat(path)
363    except os.error:
364        return False
365    return st.st_mode & 0111 != 0
366
367
368def test(HandlerClass = CGIHTTPRequestHandler,
369         ServerClass = BaseHTTPServer.HTTPServer):
370    SimpleHTTPServer.test(HandlerClass, ServerClass)
371
372
373if __name__ == '__main__':
374    test()
375