1"""CGI-savvy HTTP Server. 2 3This module builds on SimpleHTTPServer by implementing GET and POST 4requests to cgi-bin scripts. 5 6If the os.fork() function is not present (e.g. on Windows), 7os.popen2() is used as a fallback, with slightly altered semantics; if 8that function is not present either (e.g. on Macintosh), only Python 9scripts are supported, and they are executed by the current process. 10 11In all cases, the implementation is intentionally naive -- all 12requests are executed sychronously. 13 14SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL 15-- it may execute arbitrary Python code or external programs. 16 17Note that status code 200 is sent prior to execution of a CGI script, so 18scripts cannot send other status codes such as 302 (redirect). 19""" 20 21 22__version__ = "0.4" 23 24__all__ = ["CGIHTTPRequestHandler"] 25 26import os 27import sys 28import urllib 29import BaseHTTPServer 30import SimpleHTTPServer 31import select 32import copy 33 34 35class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): 36 37 """Complete HTTP server with GET, HEAD and POST commands. 38 39 GET and HEAD also support running CGI scripts. 40 41 The POST command is *only* implemented for CGI scripts. 42 43 """ 44 45 # Determine platform specifics 46 have_fork = hasattr(os, 'fork') 47 have_popen2 = hasattr(os, 'popen2') 48 have_popen3 = hasattr(os, 'popen3') 49 50 # Make rfile unbuffered -- we need to read one line and then pass 51 # the rest to a subprocess, so we can't use buffered input. 52 rbufsize = 0 53 54 def do_POST(self): 55 """Serve a POST request. 56 57 This is only implemented for CGI scripts. 58 59 """ 60 61 if self.is_cgi(): 62 self.run_cgi() 63 else: 64 self.send_error(501, "Can only POST to CGI scripts") 65 66 def send_head(self): 67 """Version of send_head that support CGI scripts""" 68 if self.is_cgi(): 69 return self.run_cgi() 70 else: 71 return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self) 72 73 def is_cgi(self): 74 """Test whether self.path corresponds to a CGI script. 75 76 Returns True and updates the cgi_info attribute to the tuple 77 (dir, rest) if self.path requires running a CGI script. 78 Returns False otherwise. 79 80 If any exception is raised, the caller should assume that 81 self.path was rejected as invalid and act accordingly. 82 83 The default implementation tests whether the normalized url 84 path begins with one of the strings in self.cgi_directories 85 (and the next character is a '/' or the end of the string). 86 """ 87 splitpath = _url_collapse_path_split(self.path) 88 if splitpath[0] in self.cgi_directories: 89 self.cgi_info = splitpath 90 return True 91 return False 92 93 cgi_directories = ['/cgi-bin', '/htbin'] 94 95 def is_executable(self, path): 96 """Test whether argument path is an executable file.""" 97 return executable(path) 98 99 def is_python(self, path): 100 """Test whether argument path is a Python script.""" 101 head, tail = os.path.splitext(path) 102 return tail.lower() in (".py", ".pyw") 103 104 def run_cgi(self): 105 """Execute a CGI script.""" 106 path = self.path 107 dir, rest = self.cgi_info 108 109 i = path.find('/', len(dir) + 1) 110 while i >= 0: 111 nextdir = path[:i] 112 nextrest = path[i+1:] 113 114 scriptdir = self.translate_path(nextdir) 115 if os.path.isdir(scriptdir): 116 dir, rest = nextdir, nextrest 117 i = path.find('/', len(dir) + 1) 118 else: 119 break 120 121 # find an explicit query string, if present. 122 i = rest.rfind('?') 123 if i >= 0: 124 rest, query = rest[:i], rest[i+1:] 125 else: 126 query = '' 127 128 # dissect the part after the directory name into a script name & 129 # a possible additional path, to be stored in PATH_INFO. 130 i = rest.find('/') 131 if i >= 0: 132 script, rest = rest[:i], rest[i:] 133 else: 134 script, rest = rest, '' 135 136 scriptname = dir + '/' + script 137 scriptfile = self.translate_path(scriptname) 138 if not os.path.exists(scriptfile): 139 self.send_error(404, "No such CGI script (%r)" % scriptname) 140 return 141 if not os.path.isfile(scriptfile): 142 self.send_error(403, "CGI script is not a plain file (%r)" % 143 scriptname) 144 return 145 ispy = self.is_python(scriptname) 146 if not ispy: 147 if not (self.have_fork or self.have_popen2 or self.have_popen3): 148 self.send_error(403, "CGI script is not a Python script (%r)" % 149 scriptname) 150 return 151 if not self.is_executable(scriptfile): 152 self.send_error(403, "CGI script is not executable (%r)" % 153 scriptname) 154 return 155 156 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html 157 # XXX Much of the following could be prepared ahead of time! 158 env = copy.deepcopy(os.environ) 159 env['SERVER_SOFTWARE'] = self.version_string() 160 env['SERVER_NAME'] = self.server.server_name 161 env['GATEWAY_INTERFACE'] = 'CGI/1.1' 162 env['SERVER_PROTOCOL'] = self.protocol_version 163 env['SERVER_PORT'] = str(self.server.server_port) 164 env['REQUEST_METHOD'] = self.command 165 uqrest = urllib.unquote(rest) 166 env['PATH_INFO'] = uqrest 167 env['PATH_TRANSLATED'] = self.translate_path(uqrest) 168 env['SCRIPT_NAME'] = scriptname 169 if query: 170 env['QUERY_STRING'] = query 171 host = self.address_string() 172 if host != self.client_address[0]: 173 env['REMOTE_HOST'] = host 174 env['REMOTE_ADDR'] = self.client_address[0] 175 authorization = self.headers.getheader("authorization") 176 if authorization: 177 authorization = authorization.split() 178 if len(authorization) == 2: 179 import base64, binascii 180 env['AUTH_TYPE'] = authorization[0] 181 if authorization[0].lower() == "basic": 182 try: 183 authorization = base64.decodestring(authorization[1]) 184 except binascii.Error: 185 pass 186 else: 187 authorization = authorization.split(':') 188 if len(authorization) == 2: 189 env['REMOTE_USER'] = authorization[0] 190 # XXX REMOTE_IDENT 191 if self.headers.typeheader is None: 192 env['CONTENT_TYPE'] = self.headers.type 193 else: 194 env['CONTENT_TYPE'] = self.headers.typeheader 195 length = self.headers.getheader('content-length') 196 if length: 197 env['CONTENT_LENGTH'] = length 198 referer = self.headers.getheader('referer') 199 if referer: 200 env['HTTP_REFERER'] = referer 201 accept = [] 202 for line in self.headers.getallmatchingheaders('accept'): 203 if line[:1] in "\t\n\r ": 204 accept.append(line.strip()) 205 else: 206 accept = accept + line[7:].split(',') 207 env['HTTP_ACCEPT'] = ','.join(accept) 208 ua = self.headers.getheader('user-agent') 209 if ua: 210 env['HTTP_USER_AGENT'] = ua 211 co = filter(None, self.headers.getheaders('cookie')) 212 if co: 213 env['HTTP_COOKIE'] = ', '.join(co) 214 # XXX Other HTTP_* headers 215 # Since we're setting the env in the parent, provide empty 216 # values to override previously set values 217 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', 218 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): 219 env.setdefault(k, "") 220 221 self.send_response(200, "Script output follows") 222 223 decoded_query = query.replace('+', ' ') 224 225 if self.have_fork: 226 # Unix -- fork as we should 227 args = [script] 228 if '=' not in decoded_query: 229 args.append(decoded_query) 230 nobody = nobody_uid() 231 self.wfile.flush() # Always flush before forking 232 pid = os.fork() 233 if pid != 0: 234 # Parent 235 pid, sts = os.waitpid(pid, 0) 236 # throw away additional data [see bug #427345] 237 while select.select([self.rfile], [], [], 0)[0]: 238 if not self.rfile.read(1): 239 break 240 if sts: 241 self.log_error("CGI script exit status %#x", sts) 242 return 243 # Child 244 try: 245 try: 246 os.setuid(nobody) 247 except os.error: 248 pass 249 os.dup2(self.rfile.fileno(), 0) 250 os.dup2(self.wfile.fileno(), 1) 251 os.execve(scriptfile, args, env) 252 except: 253 self.server.handle_error(self.request, self.client_address) 254 os._exit(127) 255 256 else: 257 # Non Unix - use subprocess 258 import subprocess 259 cmdline = [scriptfile] 260 if self.is_python(scriptfile): 261 interp = sys.executable 262 if interp.lower().endswith("w.exe"): 263 # On Windows, use python.exe, not pythonw.exe 264 interp = interp[:-5] + interp[-4:] 265 cmdline = [interp, '-u'] + cmdline 266 if '=' not in query: 267 cmdline.append(query) 268 269 self.log_message("command: %s", subprocess.list2cmdline(cmdline)) 270 try: 271 nbytes = int(length) 272 except (TypeError, ValueError): 273 nbytes = 0 274 p = subprocess.Popen(cmdline, 275 stdin = subprocess.PIPE, 276 stdout = subprocess.PIPE, 277 stderr = subprocess.PIPE, 278 env = env 279 ) 280 if self.command.lower() == "post" and nbytes > 0: 281 data = self.rfile.read(nbytes) 282 else: 283 data = None 284 # throw away additional data [see bug #427345] 285 while select.select([self.rfile._sock], [], [], 0)[0]: 286 if not self.rfile._sock.recv(1): 287 break 288 stdout, stderr = p.communicate(data) 289 self.wfile.write(stdout) 290 if stderr: 291 self.log_error('%s', stderr) 292 p.stderr.close() 293 p.stdout.close() 294 status = p.returncode 295 if status: 296 self.log_error("CGI script exit status %#x", status) 297 else: 298 self.log_message("CGI script exited OK") 299 300 301# TODO(gregory.p.smith): Move this into an appropriate library. 302def _url_collapse_path_split(path): 303 """ 304 Given a URL path, remove extra '/'s and '.' path elements and collapse 305 any '..' references. 306 307 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. 308 309 Returns: A tuple of (head, tail) where tail is everything after the final / 310 and head is everything before it. Head will always start with a '/' and, 311 if it contains anything else, never have a trailing '/'. 312 313 Raises: IndexError if too many '..' occur within the path. 314 """ 315 # Similar to os.path.split(os.path.normpath(path)) but specific to URL 316 # path semantics rather than local operating system semantics. 317 path_parts = [] 318 for part in path.split('/'): 319 if part == '.': 320 path_parts.append('') 321 else: 322 path_parts.append(part) 323 # Filter out blank non trailing parts before consuming the '..'. 324 path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:] 325 if path_parts: 326 tail_part = path_parts.pop() 327 else: 328 tail_part = '' 329 head_parts = [] 330 for part in path_parts: 331 if part == '..': 332 head_parts.pop() 333 else: 334 head_parts.append(part) 335 if tail_part and tail_part == '..': 336 head_parts.pop() 337 tail_part = '' 338 return ('/' + '/'.join(head_parts), tail_part) 339 340 341nobody = None 342 343def nobody_uid(): 344 """Internal routine to get nobody's uid""" 345 global nobody 346 if nobody: 347 return nobody 348 try: 349 import pwd 350 except ImportError: 351 return -1 352 try: 353 nobody = pwd.getpwnam('nobody')[2] 354 except KeyError: 355 nobody = 1 + max(map(lambda x: x[2], pwd.getpwall())) 356 return nobody 357 358 359def executable(path): 360 """Test for executable file.""" 361 try: 362 st = os.stat(path) 363 except os.error: 364 return False 365 return st.st_mode & 0111 != 0 366 367 368def test(HandlerClass = CGIHTTPRequestHandler, 369 ServerClass = BaseHTTPServer.HTTPServer): 370 SimpleHTTPServer.test(HandlerClass, ServerClass) 371 372 373if __name__ == '__main__': 374 test() 375