1# Copyright 2012 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import BaseHTTPServer 6from collections import namedtuple 7import gzip 8import mimetypes 9import os 10import SimpleHTTPServer 11import SocketServer 12import StringIO 13import sys 14import urlparse 15 16 17from telemetry.core import local_server 18 19 20ByteRange = namedtuple('ByteRange', ['from_byte', 'to_byte']) 21ResourceAndRange = namedtuple('ResourceAndRange', ['resource', 'byte_range']) 22 23 24class MemoryCacheHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): 25 26 protocol_version = 'HTTP/1.1' # override BaseHTTPServer setting 27 wbufsize = -1 # override StreamRequestHandler (a base class) setting 28 29 def do_GET(self): 30 """Serve a GET request.""" 31 resource_range = self.SendHead() 32 33 if not resource_range or not resource_range.resource: 34 return 35 response = resource_range.resource['response'] 36 37 if not resource_range.byte_range: 38 self.wfile.write(response) 39 return 40 41 start_index = resource_range.byte_range.from_byte 42 end_index = resource_range.byte_range.to_byte 43 self.wfile.write(response[start_index:end_index + 1]) 44 45 def do_HEAD(self): 46 """Serve a HEAD request.""" 47 self.SendHead() 48 49 def log_error(self, fmt, *args): 50 pass 51 52 def log_request(self, code='-', size='-'): 53 # Dont spam the console unless it is important. 54 pass 55 56 def SendHead(self): 57 path = os.path.realpath(self.translate_path(self.path)) 58 if path not in self.server.resource_map: 59 self.send_error(404, 'File not found') 60 return None 61 62 resource = self.server.resource_map[path] 63 total_num_of_bytes = resource['content-length'] 64 byte_range = self.GetByteRange(total_num_of_bytes) 65 if byte_range: 66 # request specified a range, so set response code to 206. 67 self.send_response(206) 68 self.send_header('Content-Range', 69 'bytes %d-%d/%d' % (byte_range.from_byte, 70 byte_range.to_byte, 71 total_num_of_bytes)) 72 total_num_of_bytes = byte_range.to_byte - byte_range.from_byte + 1 73 else: 74 self.send_response(200) 75 76 self.send_header('Content-Length', str(total_num_of_bytes)) 77 self.send_header('Content-Type', resource['content-type']) 78 self.send_header('Last-Modified', 79 self.date_time_string(resource['last-modified'])) 80 if resource['zipped']: 81 self.send_header('Content-Encoding', 'gzip') 82 self.end_headers() 83 return ResourceAndRange(resource, byte_range) 84 85 def GetByteRange(self, total_num_of_bytes): 86 """Parse the header and get the range values specified. 87 88 Args: 89 total_num_of_bytes: Total # of bytes in requested resource, 90 used to calculate upper range limit. 91 Returns: 92 A ByteRange namedtuple object with the requested byte-range values. 93 If no Range is explicitly requested or there is a failure parsing, 94 return None. 95 If range specified is in the format "N-", return N-END. Refer to 96 http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html for details. 97 If upper range limit is greater than total # of bytes, return upper index. 98 """ 99 100 range_header = self.headers.getheader('Range') 101 if range_header is None: 102 return None 103 if not range_header.startswith('bytes='): 104 return None 105 106 # The range header is expected to be a string in this format: 107 # bytes=0-1 108 # Get the upper and lower limits of the specified byte-range. 109 # We've already confirmed that range_header starts with 'bytes='. 110 byte_range_values = range_header[len('bytes='):].split('-') 111 from_byte = 0 112 to_byte = 0 113 114 if len(byte_range_values) == 2: 115 # If to_range is not defined return all bytes starting from from_byte. 116 to_byte = (int(byte_range_values[1]) if byte_range_values[1] 117 else total_num_of_bytes - 1) 118 # If from_range is not defined return last 'to_byte' bytes. 119 from_byte = (int(byte_range_values[0]) if byte_range_values[0] 120 else total_num_of_bytes - to_byte) 121 else: 122 return None 123 124 # Do some validation. 125 if from_byte < 0: 126 return None 127 128 # Make to_byte the end byte by default in edge cases. 129 if to_byte < from_byte or to_byte >= total_num_of_bytes: 130 to_byte = total_num_of_bytes - 1 131 132 return ByteRange(from_byte, to_byte) 133 134 135class _MemoryCacheHTTPServerImpl(SocketServer.ThreadingMixIn, 136 BaseHTTPServer.HTTPServer): 137 # Increase the request queue size. The default value, 5, is set in 138 # SocketServer.TCPServer (the parent of BaseHTTPServer.HTTPServer). 139 # Since we're intercepting many domains through this single server, 140 # it is quite possible to get more than 5 concurrent requests. 141 request_queue_size = 128 142 143 # Don't prevent python from exiting when there is thread activity. 144 daemon_threads = True 145 146 def __init__(self, host_port, handler, paths): 147 BaseHTTPServer.HTTPServer.__init__(self, host_port, handler) 148 self.resource_map = {} 149 for path in paths: 150 if os.path.isdir(path): 151 self.AddDirectoryToResourceMap(path) 152 else: 153 self.AddFileToResourceMap(path) 154 155 def AddDirectoryToResourceMap(self, directory_path): 156 """Loads all files in directory_path into the in-memory resource map.""" 157 for root, dirs, files in os.walk(directory_path): 158 # Skip hidden files and folders (like .svn and .git). 159 files = [f for f in files if f[0] != '.'] 160 dirs[:] = [d for d in dirs if d[0] != '.'] 161 162 for f in files: 163 file_path = os.path.join(root, f) 164 if not os.path.exists(file_path): # Allow for '.#' files 165 continue 166 self.AddFileToResourceMap(file_path) 167 168 def AddFileToResourceMap(self, file_path): 169 """Loads file_path into the in-memory resource map.""" 170 file_path = os.path.realpath(file_path) 171 if file_path in self.resource_map: 172 return 173 174 with open(file_path, 'rb') as fd: 175 response = fd.read() 176 fs = os.fstat(fd.fileno()) 177 content_type = mimetypes.guess_type(file_path)[0] 178 zipped = False 179 if content_type in ['text/html', 'text/css', 'application/javascript']: 180 zipped = True 181 sio = StringIO.StringIO() 182 gzf = gzip.GzipFile(fileobj=sio, compresslevel=9, mode='wb') 183 gzf.write(response) 184 gzf.close() 185 response = sio.getvalue() 186 sio.close() 187 self.resource_map[file_path] = { 188 'content-type': content_type, 189 'content-length': len(response), 190 'last-modified': fs.st_mtime, 191 'response': response, 192 'zipped': zipped 193 } 194 195 index = 'index.html' 196 if os.path.basename(file_path) == index: 197 dir_path = os.path.dirname(file_path) 198 self.resource_map[dir_path] = self.resource_map[file_path] 199 200 201class MemoryCacheHTTPServerBackend(local_server.LocalServerBackend): 202 def __init__(self): 203 super(MemoryCacheHTTPServerBackend, self).__init__() 204 self._httpd = None 205 206 def StartAndGetNamedPorts(self, args): 207 base_dir = args['base_dir'] 208 os.chdir(base_dir) 209 210 paths = args['paths'] 211 for path in paths: 212 if not os.path.realpath(path).startswith(os.path.realpath(os.getcwd())): 213 print >> sys.stderr, '"%s" is not under the cwd.' % path 214 sys.exit(1) 215 216 server_address = (args['host'], args['port']) 217 MemoryCacheHTTPRequestHandler.protocol_version = 'HTTP/1.1' 218 self._httpd = _MemoryCacheHTTPServerImpl( 219 server_address, MemoryCacheHTTPRequestHandler, paths) 220 return [local_server.NamedPort('http', self._httpd.server_address[1])] 221 222 def ServeForever(self): 223 return self._httpd.serve_forever() 224 225 226class MemoryCacheHTTPServer(local_server.LocalServer): 227 def __init__(self, paths): 228 super(MemoryCacheHTTPServer, self).__init__( 229 MemoryCacheHTTPServerBackend) 230 self._base_dir = None 231 232 for path in paths: 233 assert os.path.exists(path), '%s does not exist.' % path 234 235 paths = list(paths) 236 self._paths = paths 237 238 self._paths_as_set = set(map(os.path.realpath, paths)) 239 240 common_prefix = os.path.commonprefix(paths) 241 if os.path.isdir(common_prefix): 242 self._base_dir = common_prefix 243 else: 244 self._base_dir = os.path.dirname(common_prefix) 245 246 def GetBackendStartupArgs(self): 247 return {'base_dir': self._base_dir, 248 'paths': self._paths, 249 'host': self.host_ip, 250 'port': 0} 251 252 @property 253 def paths(self): 254 return self._paths_as_set 255 256 @property 257 def url(self): 258 return self.forwarder.url 259 260 def UrlOf(self, path): 261 relative_path = os.path.relpath(path, self._base_dir) 262 # Preserve trailing slash or backslash. 263 # It doesn't matter in a file path, but it does matter in a URL. 264 if path.endswith(os.sep) or (os.altsep and path.endswith(os.altsep)): 265 relative_path += '/' 266 return urlparse.urljoin(self.url, relative_path.replace(os.sep, '/')) 267