1#!/usr/bin/env python 2# Copyright 2010 Google Inc. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16"""Replays web pages under simulated network conditions. 17 18Must be run as administrator (sudo). 19 20To record web pages: 21 1. Start the program in record mode. 22 $ sudo ./replay.py --record archive.wpr 23 2. Load the web pages you want to record in a web browser. It is important to 24 clear browser caches before this so that all subresources are requested 25 from the network. 26 3. Kill the process to stop recording. 27 28To replay web pages: 29 1. Start the program in replay mode with a previously recorded archive. 30 $ sudo ./replay.py archive.wpr 31 2. Load recorded pages in a web browser. A 404 will be served for any pages or 32 resources not in the recorded archive. 33 34Network simulation examples: 35 # 128KByte/s uplink bandwidth, 4Mbps/s downlink bandwidth with 100ms RTT time 36 $ sudo ./replay.py --up 128KByte/s --down 4Mbit/s --delay_ms=100 archive.wpr 37 38 # 1% packet loss rate 39 $ sudo ./replay.py --packet_loss_rate=0.01 archive.wpr 40""" 41 42import argparse 43import json 44import logging 45import os 46import socket 47import sys 48import traceback 49 50import customhandlers 51import dnsproxy 52import httparchive 53import httpclient 54import httpproxy 55import net_configs 56import platformsettings 57import rules_parser 58import script_injector 59import servermanager 60import trafficshaper 61 62if sys.version < '2.6': 63 print 'Need Python 2.6 or greater.' 64 sys.exit(1) 65 66 67def configure_logging(log_level_name, log_file_name=None): 68 """Configure logging level and format. 69 70 Args: 71 log_level_name: 'debug', 'info', 'warning', 'error', or 'critical'. 72 log_file_name: a file name 73 """ 74 if logging.root.handlers: 75 logging.critical('A logging method (e.g. "logging.warn(...)")' 76 ' was called before logging was configured.') 77 log_level = getattr(logging, log_level_name.upper()) 78 log_format = ( 79 '(%(levelname)s) %(asctime)s %(module)s.%(funcName)s:%(lineno)d ' 80 '%(message)s') 81 82 83 logging.basicConfig(level=log_level, format=log_format) 84 logger = logging.getLogger() 85 if log_file_name: 86 fh = logging.FileHandler(log_file_name) 87 fh.setLevel(log_level) 88 fh.setFormatter(logging.Formatter(log_format)) 89 logger.addHandler(fh) 90 system_handler = platformsettings.get_system_logging_handler() 91 if system_handler: 92 logger.addHandler(system_handler) 93 94 95def AddDnsForward(server_manager, host): 96 """Forward DNS traffic.""" 97 server_manager.Append(platformsettings.set_temporary_primary_nameserver, host) 98 99 100def AddDnsProxy(server_manager, options, host, port, real_dns_lookup, 101 http_archive): 102 dns_filters = [] 103 if options.dns_private_passthrough: 104 private_filter = dnsproxy.PrivateIpFilter(real_dns_lookup, http_archive) 105 dns_filters.append(private_filter) 106 server_manager.AppendRecordCallback(private_filter.InitializeArchiveHosts) 107 server_manager.AppendReplayCallback(private_filter.InitializeArchiveHosts) 108 if options.shaping_dns: 109 delay_filter = dnsproxy.DelayFilter(options.record, **options.shaping_dns) 110 dns_filters.append(delay_filter) 111 server_manager.AppendRecordCallback(delay_filter.SetRecordMode) 112 server_manager.AppendReplayCallback(delay_filter.SetReplayMode) 113 server_manager.Append(dnsproxy.DnsProxyServer, host, port, 114 dns_lookup=dnsproxy.ReplayDnsLookup(host, dns_filters)) 115 116 117def AddWebProxy(server_manager, options, host, real_dns_lookup, http_archive): 118 if options.rules_path: 119 with open(options.rules_path) as file_obj: 120 allowed_imports = [ 121 name.strip() for name in options.allowed_rule_imports.split(',')] 122 rules = rules_parser.Rules(file_obj, allowed_imports) 123 logging.info('Parsed %s rules:\n%s', options.rules_path, rules) 124 else: 125 rules = rules_parser.Rules() 126 inject_script = script_injector.GetInjectScript(options.inject_scripts) 127 custom_handlers = customhandlers.CustomHandlers(options, http_archive) 128 custom_handlers.add_server_manager_handler(server_manager) 129 archive_fetch = httpclient.ControllableHttpArchiveFetch( 130 http_archive, real_dns_lookup, 131 inject_script, 132 options.diff_unknown_requests, options.record, 133 use_closest_match=options.use_closest_match, 134 scramble_images=options.scramble_images) 135 server_manager.AppendRecordCallback(archive_fetch.SetRecordMode) 136 server_manager.AppendReplayCallback(archive_fetch.SetReplayMode) 137 allow_generate_304 = not options.record 138 server_manager.Append( 139 httpproxy.HttpProxyServer, 140 archive_fetch, custom_handlers, rules, 141 host=host, port=options.port, use_delays=options.use_server_delay, 142 allow_generate_304=allow_generate_304, 143 **options.shaping_http) 144 if options.ssl: 145 if options.should_generate_certs: 146 server_manager.Append( 147 httpproxy.HttpsProxyServer, archive_fetch, custom_handlers, rules, 148 options.https_root_ca_cert_path, host=host, port=options.ssl_port, 149 allow_generate_304=allow_generate_304, 150 use_delays=options.use_server_delay, **options.shaping_http) 151 else: 152 server_manager.Append( 153 httpproxy.SingleCertHttpsProxyServer, archive_fetch, 154 custom_handlers, rules, options.https_root_ca_cert_path, host=host, 155 port=options.ssl_port, use_delays=options.use_server_delay, 156 allow_generate_304=allow_generate_304, 157 **options.shaping_http) 158 if options.http_to_https_port: 159 server_manager.Append( 160 httpproxy.HttpToHttpsProxyServer, 161 archive_fetch, custom_handlers, rules, 162 host=host, port=options.http_to_https_port, 163 use_delays=options.use_server_delay, 164 allow_generate_304=allow_generate_304, 165 **options.shaping_http) 166 167 168def AddTrafficShaper(server_manager, options, host): 169 if options.shaping_dummynet: 170 server_manager.AppendTrafficShaper( 171 trafficshaper.TrafficShaper, host=host, 172 use_loopback=not options.server_mode and host == '127.0.0.1', 173 **options.shaping_dummynet) 174 175 176class OptionsWrapper(object): 177 """Add checks, updates, and methods to option values. 178 179 Example: 180 options, args = arg_parser.parse_args() 181 options = OptionsWrapper(options, arg_parser) # run checks and updates 182 if options.record and options.HasTrafficShaping(): 183 [...] 184 """ 185 _TRAFFICSHAPING_OPTIONS = { 186 'down', 'up', 'delay_ms', 'packet_loss_rate', 'init_cwnd', 'net'} 187 _CONFLICTING_OPTIONS = ( 188 ('record', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net', 189 'spdy', 'use_server_delay')), 190 ('append', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net', 191 'use_server_delay')), # same as --record 192 ('net', ('down', 'up', 'delay_ms')), 193 ('server', ('server_mode',)), 194 ) 195 196 def __init__(self, options, parser): 197 self._options = options 198 self._parser = parser 199 self._nondefaults = set([ 200 action.dest for action in parser._optionals._actions 201 if getattr(options, action.dest, action.default) is not action.default]) 202 self._CheckConflicts() 203 self._CheckValidIp('host') 204 self._CheckFeatureSupport() 205 self._MassageValues() 206 207 def _CheckConflicts(self): 208 """Give an error if mutually exclusive options are used.""" 209 for option, bad_options in self._CONFLICTING_OPTIONS: 210 if option in self._nondefaults: 211 for bad_option in bad_options: 212 if bad_option in self._nondefaults: 213 self._parser.error('Option --%s cannot be used with --%s.' % 214 (bad_option, option)) 215 216 def _CheckValidIp(self, name): 217 """Give an error if option |name| is not a valid IPv4 address.""" 218 value = getattr(self._options, name) 219 if value: 220 try: 221 socket.inet_aton(value) 222 except Exception: 223 self._parser.error('Option --%s must be a valid IPv4 address.' % name) 224 225 def _CheckFeatureSupport(self): 226 if (self._options.should_generate_certs and 227 not platformsettings.HasSniSupport()): 228 self._parser.error('Option --should_generate_certs requires pyOpenSSL ' 229 '0.13 or greater for SNI support.') 230 231 def _ShapingKeywordArgs(self, shaping_key): 232 """Return the shaping keyword args for |shaping_key|. 233 234 Args: 235 shaping_key: one of 'dummynet', 'dns', 'http'. 236 Returns: 237 {} # if shaping_key does not apply, or options have default values. 238 {k: v, ...} 239 """ 240 kwargs = {} 241 def AddItemIfSet(d, kw_key, opt_key=None): 242 opt_key = opt_key or kw_key 243 if opt_key in self._nondefaults: 244 d[kw_key] = getattr(self, opt_key) 245 if ((self.shaping_type == 'proxy' and shaping_key in ('dns', 'http')) or 246 self.shaping_type == shaping_key): 247 AddItemIfSet(kwargs, 'delay_ms') 248 if shaping_key in ('dummynet', 'http'): 249 AddItemIfSet(kwargs, 'down_bandwidth', opt_key='down') 250 AddItemIfSet(kwargs, 'up_bandwidth', opt_key='up') 251 if shaping_key == 'dummynet': 252 AddItemIfSet(kwargs, 'packet_loss_rate') 253 AddItemIfSet(kwargs, 'init_cwnd') 254 elif self.shaping_type != 'none': 255 if 'packet_loss_rate' in self._nondefaults: 256 logging.warn('Shaping type, %s, ignores --packet_loss_rate=%s', 257 self.shaping_type, self.packet_loss_rate) 258 if 'init_cwnd' in self._nondefaults: 259 logging.warn('Shaping type, %s, ignores --init_cwnd=%s', 260 self.shaping_type, self.init_cwnd) 261 return kwargs 262 263 def _MassageValues(self): 264 """Set options that depend on the values of other options.""" 265 if self.append and not self.record: 266 self._options.record = True 267 if self.net: 268 self._options.down, self._options.up, self._options.delay_ms = \ 269 net_configs.GetNetConfig(self.net) 270 self._nondefaults.update(['down', 'up', 'delay_ms']) 271 if not self.ssl: 272 self._options.https_root_ca_cert_path = None 273 self.shaping_dns = self._ShapingKeywordArgs('dns') 274 self.shaping_http = self._ShapingKeywordArgs('http') 275 self.shaping_dummynet = self._ShapingKeywordArgs('dummynet') 276 277 def __getattr__(self, name): 278 """Make the original option values available.""" 279 return getattr(self._options, name) 280 281 def __repr__(self): 282 """Return a json representation of the original options dictionary.""" 283 return json.dumps(self._options.__dict__) 284 285 def IsRootRequired(self): 286 """Returns True iff the options require whole program root access.""" 287 if self.server: 288 return True 289 290 def IsPrivilegedPort(port): 291 return port and port < 1024 292 293 if IsPrivilegedPort(self.port) or (self.ssl and 294 IsPrivilegedPort(self.ssl_port)): 295 return True 296 297 if self.dns_forwarding: 298 if IsPrivilegedPort(self.dns_port): 299 return True 300 if not self.server_mode and self.host == '127.0.0.1': 301 return True 302 303 return False 304 305 306def replay(options, replay_filename): 307 if options.admin_check and options.IsRootRequired(): 308 platformsettings.rerun_as_administrator() 309 configure_logging(options.log_level, options.log_file) 310 server_manager = servermanager.ServerManager(options.record) 311 if options.server: 312 AddDnsForward(server_manager, options.server) 313 else: 314 real_dns_lookup = dnsproxy.RealDnsLookup( 315 name_servers=[platformsettings.get_original_primary_nameserver()]) 316 if options.record: 317 httparchive.HttpArchive.AssertWritable(replay_filename) 318 if options.append and os.path.exists(replay_filename): 319 http_archive = httparchive.HttpArchive.Load(replay_filename) 320 logging.info('Appending to %s (loaded %d existing responses)', 321 replay_filename, len(http_archive)) 322 else: 323 http_archive = httparchive.HttpArchive() 324 else: 325 http_archive = httparchive.HttpArchive.Load(replay_filename) 326 logging.info('Loaded %d responses from %s', 327 len(http_archive), replay_filename) 328 server_manager.AppendRecordCallback(real_dns_lookup.ClearCache) 329 server_manager.AppendRecordCallback(http_archive.clear) 330 331 ipfw_dns_host = None 332 if options.dns_forwarding or options.shaping_dummynet: 333 # compute the ip/host used for the DNS server and traffic shaping 334 ipfw_dns_host = options.host 335 if not ipfw_dns_host: 336 ipfw_dns_host = platformsettings.get_server_ip_address( 337 options.server_mode) 338 339 if options.dns_forwarding: 340 if not options.server_mode and ipfw_dns_host == '127.0.0.1': 341 AddDnsForward(server_manager, ipfw_dns_host) 342 AddDnsProxy(server_manager, options, ipfw_dns_host, options.dns_port, 343 real_dns_lookup, http_archive) 344 if options.ssl and options.https_root_ca_cert_path is None: 345 options.https_root_ca_cert_path = os.path.join(os.path.dirname(__file__), 346 'wpr_cert.pem') 347 http_proxy_address = options.host 348 if not http_proxy_address: 349 http_proxy_address = platformsettings.get_httpproxy_ip_address( 350 options.server_mode) 351 AddWebProxy(server_manager, options, http_proxy_address, real_dns_lookup, 352 http_archive) 353 AddTrafficShaper(server_manager, options, ipfw_dns_host) 354 355 exit_status = 0 356 try: 357 server_manager.Run() 358 except KeyboardInterrupt: 359 logging.info('Shutting down.') 360 except (dnsproxy.DnsProxyException, 361 trafficshaper.TrafficShaperException, 362 platformsettings.NotAdministratorError, 363 platformsettings.DnsUpdateError) as e: 364 logging.critical('%s: %s', e.__class__.__name__, e) 365 exit_status = 1 366 except Exception: 367 logging.critical(traceback.format_exc()) 368 exit_status = 2 369 370 if options.record: 371 http_archive.Persist(replay_filename) 372 logging.info('Saved %d responses to %s', len(http_archive), replay_filename) 373 return exit_status 374 375 376def GetParser(): 377 arg_parser = argparse.ArgumentParser( 378 usage='%(prog)s [options] replay_file', 379 description=__doc__, 380 formatter_class=argparse.RawDescriptionHelpFormatter, 381 epilog='http://code.google.com/p/web-page-replay/') 382 383 arg_parser.add_argument('replay_filename', type=str, help='Replay file', 384 nargs='?') 385 386 arg_parser.add_argument('-r', '--record', default=False, 387 action='store_true', 388 help='Download real responses and record them to replay_file') 389 arg_parser.add_argument('--append', default=False, 390 action='store_true', 391 help='Append responses to replay_file.') 392 arg_parser.add_argument('-l', '--log_level', default='debug', 393 action='store', 394 type=str, 395 choices=('debug', 'info', 'warning', 'error', 'critical'), 396 help='Minimum verbosity level to log') 397 arg_parser.add_argument('-f', '--log_file', default=None, 398 action='store', 399 type=str, 400 help='Log file to use in addition to writting logs to stderr.') 401 402 network_group = arg_parser.add_argument_group( 403 title='Network Simulation Options', 404 description=('These options configure the network simulation in ' 405 'replay mode')) 406 network_group.add_argument('-u', '--up', default='0', 407 action='store', 408 type=str, 409 help='Upload Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.') 410 network_group.add_argument('-d', '--down', default='0', 411 action='store', 412 type=str, 413 help='Download Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.') 414 network_group.add_argument('-m', '--delay_ms', default='0', 415 action='store', 416 type=str, 417 help='Propagation delay (latency) in milliseconds. Zero means no delay.') 418 network_group.add_argument('-p', '--packet_loss_rate', default='0', 419 action='store', 420 type=str, 421 help='Packet loss rate in range [0..1]. Zero means no loss.') 422 network_group.add_argument('-w', '--init_cwnd', default='0', 423 action='store', 424 type=str, 425 help='Set initial cwnd (linux only, requires kernel patch)') 426 network_group.add_argument('--net', default=None, 427 action='store', 428 type=str, 429 choices=net_configs.NET_CONFIG_NAMES, 430 help='Select a set of network options: %s.' % ', '.join( 431 net_configs.NET_CONFIG_NAMES)) 432 network_group.add_argument('--shaping_type', default='dummynet', 433 action='store', 434 choices=('dummynet', 'proxy'), 435 help='When shaping is configured (i.e. --up, --down, etc.) decides ' 436 'whether to use |dummynet| (default), or |proxy| servers.') 437 438 harness_group = arg_parser.add_argument_group( 439 title='Replay Harness Options', 440 description=('These advanced options configure various aspects ' 441 'of the replay harness')) 442 harness_group.add_argument('-S', '--server', default=None, 443 action='store', 444 type=str, 445 help='IP address of host running "replay.py --server_mode". ' 446 'This only changes the primary DNS nameserver to use the given IP.') 447 harness_group.add_argument('-M', '--server_mode', default=False, 448 action='store_true', 449 help='Run replay DNS & http proxies, and trafficshaping on --port ' 450 'without changing the primary DNS nameserver. ' 451 'Other hosts may connect to this using "replay.py --server" ' 452 'or by pointing their DNS to this server.') 453 harness_group.add_argument('-i', '--inject_scripts', default='deterministic.js', 454 action='store', 455 dest='inject_scripts', 456 help='A comma separated list of JavaScript sources to inject in all ' 457 'pages. By default a script is injected that eliminates sources ' 458 'of entropy such as Date() and Math.random() deterministic. ' 459 'CAUTION: Without deterministic.js, many pages will not replay.') 460 harness_group.add_argument('-D', '--no-diff_unknown_requests', default=True, 461 action='store_false', 462 dest='diff_unknown_requests', 463 help='During replay, do not show a diff of unknown requests against ' 464 'their nearest match in the archive.') 465 harness_group.add_argument('-C', '--use_closest_match', default=False, 466 action='store_true', 467 dest='use_closest_match', 468 help='During replay, if a request is not found, serve the closest match' 469 'in the archive instead of giving a 404.') 470 harness_group.add_argument('-U', '--use_server_delay', default=False, 471 action='store_true', 472 dest='use_server_delay', 473 help='During replay, simulate server delay by delaying response time to' 474 'requests.') 475 harness_group.add_argument('-I', '--screenshot_dir', default=None, 476 action='store', 477 type=str, 478 help='Save PNG images of the loaded page in the given directory.') 479 harness_group.add_argument('-P', '--no-dns_private_passthrough', default=True, 480 action='store_false', 481 dest='dns_private_passthrough', 482 help='Don\'t forward DNS requests that resolve to private network ' 483 'addresses. CAUTION: With this option important services like ' 484 'Kerberos will resolve to the HTTP proxy address.') 485 harness_group.add_argument('-x', '--no-dns_forwarding', default=True, 486 action='store_false', 487 dest='dns_forwarding', 488 help='Don\'t forward DNS requests to the local replay server. ' 489 'CAUTION: With this option an external mechanism must be used to ' 490 'forward traffic to the replay server.') 491 harness_group.add_argument('--host', default=None, 492 action='store', 493 type=str, 494 help='The IP address to bind all servers to. Defaults to 0.0.0.0 or ' 495 '127.0.0.1, depending on --server_mode and platform.') 496 harness_group.add_argument('-o', '--port', default=80, 497 action='store', 498 type=int, 499 help='Port number to listen on.') 500 harness_group.add_argument('--ssl_port', default=443, 501 action='store', 502 type=int, 503 help='SSL port number to listen on.') 504 harness_group.add_argument('--http_to_https_port', default=None, 505 action='store', 506 type=int, 507 help='Port on which WPR will listen for HTTP requests that it will send ' 508 'along as HTTPS requests.') 509 harness_group.add_argument('--dns_port', default=53, 510 action='store', 511 type=int, 512 help='DNS port number to listen on.') 513 harness_group.add_argument('-c', '--https_root_ca_cert_path', default=None, 514 action='store', 515 type=str, 516 help='Certificate file to use with SSL (gets auto-generated if needed).') 517 harness_group.add_argument('--no-ssl', default=True, 518 action='store_false', 519 dest='ssl', 520 help='Do not setup an SSL proxy.') 521 harness_group.add_argument('--should_generate_certs', default=False, 522 action='store_true', 523 help='Use OpenSSL to generate certificate files for requested hosts.') 524 harness_group.add_argument('--no-admin-check', default=True, 525 action='store_false', 526 dest='admin_check', 527 help='Do not check if administrator access is needed.') 528 harness_group.add_argument('--scramble_images', default=False, 529 action='store_true', 530 dest='scramble_images', 531 help='Scramble image responses.') 532 harness_group.add_argument('--rules_path', default=None, 533 action='store', 534 help='Path of file containing Python rules.') 535 harness_group.add_argument('--allowed_rule_imports', default='rules', 536 action='store', 537 help='A comma-separate list of allowed rule imports, or \'*\' to allow' 538 ' all packages. Defaults to %(default)s.') 539 return arg_parser 540 541 542def main(): 543 arg_parser = GetParser() 544 options = arg_parser.parse_args() 545 options = OptionsWrapper(options, arg_parser) 546 547 if options.server: 548 options.replay_filename = None 549 elif options.replay_filename is None: 550 arg_parser.error('Must specify a replay_file') 551 return replay(options, options.replay_filename) 552 553 554if __name__ == '__main__': 555 sys.exit(main()) 556