• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2# Copyright 2010 Google Inc. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Replays web pages under simulated network conditions.
17
18Must be run as administrator (sudo).
19
20To record web pages:
21  1. Start the program in record mode.
22     $ sudo ./replay.py --record archive.wpr
23  2. Load the web pages you want to record in a web browser. It is important to
24     clear browser caches before this so that all subresources are requested
25     from the network.
26  3. Kill the process to stop recording.
27
28To replay web pages:
29  1. Start the program in replay mode with a previously recorded archive.
30     $ sudo ./replay.py archive.wpr
31  2. Load recorded pages in a web browser. A 404 will be served for any pages or
32     resources not in the recorded archive.
33
34Network simulation examples:
35  # 128KByte/s uplink bandwidth, 4Mbps/s downlink bandwidth with 100ms RTT time
36  $ sudo ./replay.py --up 128KByte/s --down 4Mbit/s --delay_ms=100 archive.wpr
37
38  # 1% packet loss rate
39  $ sudo ./replay.py --packet_loss_rate=0.01 archive.wpr
40"""
41
42import argparse
43import json
44import logging
45import os
46import socket
47import sys
48import traceback
49
50import customhandlers
51import dnsproxy
52import httparchive
53import httpclient
54import httpproxy
55import net_configs
56import platformsettings
57import rules_parser
58import script_injector
59import servermanager
60import trafficshaper
61
62if sys.version < '2.6':
63  print 'Need Python 2.6 or greater.'
64  sys.exit(1)
65
66
67def configure_logging(log_level_name, log_file_name=None):
68  """Configure logging level and format.
69
70  Args:
71    log_level_name: 'debug', 'info', 'warning', 'error', or 'critical'.
72    log_file_name: a file name
73  """
74  if logging.root.handlers:
75    logging.critical('A logging method (e.g. "logging.warn(...)")'
76                     ' was called before logging was configured.')
77  log_level = getattr(logging, log_level_name.upper())
78  log_format = (
79    '(%(levelname)s) %(asctime)s %(module)s.%(funcName)s:%(lineno)d  '
80    '%(message)s')
81
82
83  logging.basicConfig(level=log_level, format=log_format)
84  logger = logging.getLogger()
85  if log_file_name:
86    fh = logging.FileHandler(log_file_name)
87    fh.setLevel(log_level)
88    fh.setFormatter(logging.Formatter(log_format))
89    logger.addHandler(fh)
90  system_handler = platformsettings.get_system_logging_handler()
91  if system_handler:
92    logger.addHandler(system_handler)
93
94
95def AddDnsForward(server_manager, host):
96  """Forward DNS traffic."""
97  server_manager.Append(platformsettings.set_temporary_primary_nameserver, host)
98
99
100def AddDnsProxy(server_manager, options, host, port, real_dns_lookup,
101                http_archive):
102  dns_filters = []
103  if options.dns_private_passthrough:
104    private_filter = dnsproxy.PrivateIpFilter(real_dns_lookup, http_archive)
105    dns_filters.append(private_filter)
106    server_manager.AppendRecordCallback(private_filter.InitializeArchiveHosts)
107    server_manager.AppendReplayCallback(private_filter.InitializeArchiveHosts)
108  if options.shaping_dns:
109    delay_filter = dnsproxy.DelayFilter(options.record, **options.shaping_dns)
110    dns_filters.append(delay_filter)
111    server_manager.AppendRecordCallback(delay_filter.SetRecordMode)
112    server_manager.AppendReplayCallback(delay_filter.SetReplayMode)
113  server_manager.Append(dnsproxy.DnsProxyServer, host, port,
114                        dns_lookup=dnsproxy.ReplayDnsLookup(host, dns_filters))
115
116
117def AddWebProxy(server_manager, options, host, real_dns_lookup, http_archive):
118  if options.rules_path:
119    with open(options.rules_path) as file_obj:
120      allowed_imports = [
121          name.strip() for name in options.allowed_rule_imports.split(',')]
122      rules = rules_parser.Rules(file_obj, allowed_imports)
123    logging.info('Parsed %s rules:\n%s', options.rules_path, rules)
124  else:
125    rules = rules_parser.Rules()
126  inject_script = script_injector.GetInjectScript(options.inject_scripts)
127  custom_handlers = customhandlers.CustomHandlers(options, http_archive)
128  custom_handlers.add_server_manager_handler(server_manager)
129  archive_fetch = httpclient.ControllableHttpArchiveFetch(
130      http_archive, real_dns_lookup,
131      inject_script,
132      options.diff_unknown_requests, options.record,
133      use_closest_match=options.use_closest_match,
134      scramble_images=options.scramble_images)
135  server_manager.AppendRecordCallback(archive_fetch.SetRecordMode)
136  server_manager.AppendReplayCallback(archive_fetch.SetReplayMode)
137  allow_generate_304 = not options.record
138  server_manager.Append(
139      httpproxy.HttpProxyServer,
140      archive_fetch, custom_handlers, rules,
141      host=host, port=options.port, use_delays=options.use_server_delay,
142      allow_generate_304=allow_generate_304,
143      **options.shaping_http)
144  if options.ssl:
145    if options.should_generate_certs:
146      server_manager.Append(
147          httpproxy.HttpsProxyServer, archive_fetch, custom_handlers, rules,
148          options.https_root_ca_cert_path, host=host, port=options.ssl_port,
149          allow_generate_304=allow_generate_304,
150          use_delays=options.use_server_delay, **options.shaping_http)
151    else:
152      server_manager.Append(
153          httpproxy.SingleCertHttpsProxyServer, archive_fetch,
154          custom_handlers, rules, options.https_root_ca_cert_path, host=host,
155          port=options.ssl_port, use_delays=options.use_server_delay,
156          allow_generate_304=allow_generate_304,
157          **options.shaping_http)
158  if options.http_to_https_port:
159    server_manager.Append(
160        httpproxy.HttpToHttpsProxyServer,
161        archive_fetch, custom_handlers, rules,
162        host=host, port=options.http_to_https_port,
163        use_delays=options.use_server_delay,
164        allow_generate_304=allow_generate_304,
165        **options.shaping_http)
166
167
168def AddTrafficShaper(server_manager, options, host):
169  if options.shaping_dummynet:
170    server_manager.AppendTrafficShaper(
171        trafficshaper.TrafficShaper, host=host,
172        use_loopback=not options.server_mode and host == '127.0.0.1',
173        **options.shaping_dummynet)
174
175
176class OptionsWrapper(object):
177  """Add checks, updates, and methods to option values.
178
179  Example:
180    options, args = arg_parser.parse_args()
181    options = OptionsWrapper(options, arg_parser)  # run checks and updates
182    if options.record and options.HasTrafficShaping():
183       [...]
184  """
185  _TRAFFICSHAPING_OPTIONS = {
186      'down', 'up', 'delay_ms', 'packet_loss_rate', 'init_cwnd', 'net'}
187  _CONFLICTING_OPTIONS = (
188      ('record', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net',
189                  'spdy', 'use_server_delay')),
190      ('append', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net',
191                  'use_server_delay')),  # same as --record
192      ('net', ('down', 'up', 'delay_ms')),
193      ('server', ('server_mode',)),
194  )
195
196  def __init__(self, options, parser):
197    self._options = options
198    self._parser = parser
199    self._nondefaults = set([
200        action.dest for action in parser._optionals._actions
201        if getattr(options, action.dest, action.default) is not action.default])
202    self._CheckConflicts()
203    self._CheckValidIp('host')
204    self._CheckFeatureSupport()
205    self._MassageValues()
206
207  def _CheckConflicts(self):
208    """Give an error if mutually exclusive options are used."""
209    for option, bad_options in self._CONFLICTING_OPTIONS:
210      if option in self._nondefaults:
211        for bad_option in bad_options:
212          if bad_option in self._nondefaults:
213            self._parser.error('Option --%s cannot be used with --%s.' %
214                                (bad_option, option))
215
216  def _CheckValidIp(self, name):
217    """Give an error if option |name| is not a valid IPv4 address."""
218    value = getattr(self._options, name)
219    if value:
220      try:
221        socket.inet_aton(value)
222      except Exception:
223        self._parser.error('Option --%s must be a valid IPv4 address.' % name)
224
225  def _CheckFeatureSupport(self):
226    if (self._options.should_generate_certs and
227        not platformsettings.HasSniSupport()):
228      self._parser.error('Option --should_generate_certs requires pyOpenSSL '
229                         '0.13 or greater for SNI support.')
230
231  def _ShapingKeywordArgs(self, shaping_key):
232    """Return the shaping keyword args for |shaping_key|.
233
234    Args:
235      shaping_key: one of 'dummynet', 'dns', 'http'.
236    Returns:
237      {}  # if shaping_key does not apply, or options have default values.
238      {k: v, ...}
239    """
240    kwargs = {}
241    def AddItemIfSet(d, kw_key, opt_key=None):
242      opt_key = opt_key or kw_key
243      if opt_key in self._nondefaults:
244        d[kw_key] = getattr(self, opt_key)
245    if ((self.shaping_type == 'proxy' and shaping_key in ('dns', 'http')) or
246        self.shaping_type == shaping_key):
247      AddItemIfSet(kwargs, 'delay_ms')
248      if shaping_key in ('dummynet', 'http'):
249        AddItemIfSet(kwargs, 'down_bandwidth', opt_key='down')
250        AddItemIfSet(kwargs, 'up_bandwidth', opt_key='up')
251        if shaping_key == 'dummynet':
252          AddItemIfSet(kwargs, 'packet_loss_rate')
253          AddItemIfSet(kwargs, 'init_cwnd')
254        elif self.shaping_type != 'none':
255          if 'packet_loss_rate' in self._nondefaults:
256            logging.warn('Shaping type, %s, ignores --packet_loss_rate=%s',
257                         self.shaping_type, self.packet_loss_rate)
258          if 'init_cwnd' in self._nondefaults:
259            logging.warn('Shaping type, %s, ignores --init_cwnd=%s',
260                         self.shaping_type, self.init_cwnd)
261    return kwargs
262
263  def _MassageValues(self):
264    """Set options that depend on the values of other options."""
265    if self.append and not self.record:
266      self._options.record = True
267    if self.net:
268      self._options.down, self._options.up, self._options.delay_ms = \
269          net_configs.GetNetConfig(self.net)
270      self._nondefaults.update(['down', 'up', 'delay_ms'])
271    if not self.ssl:
272      self._options.https_root_ca_cert_path = None
273    self.shaping_dns = self._ShapingKeywordArgs('dns')
274    self.shaping_http = self._ShapingKeywordArgs('http')
275    self.shaping_dummynet = self._ShapingKeywordArgs('dummynet')
276
277  def __getattr__(self, name):
278    """Make the original option values available."""
279    return getattr(self._options, name)
280
281  def __repr__(self):
282    """Return a json representation of the original options dictionary."""
283    return json.dumps(self._options.__dict__)
284
285  def IsRootRequired(self):
286    """Returns True iff the options require whole program root access."""
287    if self.server:
288      return True
289
290    def IsPrivilegedPort(port):
291      return port and port < 1024
292
293    if IsPrivilegedPort(self.port) or (self.ssl and
294                                       IsPrivilegedPort(self.ssl_port)):
295      return True
296
297    if self.dns_forwarding:
298      if IsPrivilegedPort(self.dns_port):
299        return True
300      if not self.server_mode and self.host == '127.0.0.1':
301        return True
302
303    return False
304
305
306def replay(options, replay_filename):
307  if options.admin_check and options.IsRootRequired():
308    platformsettings.rerun_as_administrator()
309  configure_logging(options.log_level, options.log_file)
310  server_manager = servermanager.ServerManager(options.record)
311  if options.server:
312    AddDnsForward(server_manager, options.server)
313  else:
314    real_dns_lookup = dnsproxy.RealDnsLookup(
315        name_servers=[platformsettings.get_original_primary_nameserver()])
316    if options.record:
317      httparchive.HttpArchive.AssertWritable(replay_filename)
318      if options.append and os.path.exists(replay_filename):
319        http_archive = httparchive.HttpArchive.Load(replay_filename)
320        logging.info('Appending to %s (loaded %d existing responses)',
321                     replay_filename, len(http_archive))
322      else:
323        http_archive = httparchive.HttpArchive()
324    else:
325      http_archive = httparchive.HttpArchive.Load(replay_filename)
326      logging.info('Loaded %d responses from %s',
327                   len(http_archive), replay_filename)
328    server_manager.AppendRecordCallback(real_dns_lookup.ClearCache)
329    server_manager.AppendRecordCallback(http_archive.clear)
330
331    ipfw_dns_host = None
332    if options.dns_forwarding or options.shaping_dummynet:
333      # compute the ip/host used for the DNS server and traffic shaping
334      ipfw_dns_host = options.host
335      if not ipfw_dns_host:
336        ipfw_dns_host = platformsettings.get_server_ip_address(
337            options.server_mode)
338
339    if options.dns_forwarding:
340      if not options.server_mode and ipfw_dns_host == '127.0.0.1':
341        AddDnsForward(server_manager, ipfw_dns_host)
342      AddDnsProxy(server_manager, options, ipfw_dns_host, options.dns_port,
343                  real_dns_lookup, http_archive)
344    if options.ssl and options.https_root_ca_cert_path is None:
345      options.https_root_ca_cert_path = os.path.join(os.path.dirname(__file__),
346                                                     'wpr_cert.pem')
347    http_proxy_address = options.host
348    if not http_proxy_address:
349      http_proxy_address = platformsettings.get_httpproxy_ip_address(
350          options.server_mode)
351    AddWebProxy(server_manager, options, http_proxy_address, real_dns_lookup,
352                http_archive)
353    AddTrafficShaper(server_manager, options, ipfw_dns_host)
354
355  exit_status = 0
356  try:
357    server_manager.Run()
358  except KeyboardInterrupt:
359    logging.info('Shutting down.')
360  except (dnsproxy.DnsProxyException,
361          trafficshaper.TrafficShaperException,
362          platformsettings.NotAdministratorError,
363          platformsettings.DnsUpdateError) as e:
364    logging.critical('%s: %s', e.__class__.__name__, e)
365    exit_status = 1
366  except Exception:
367    logging.critical(traceback.format_exc())
368    exit_status = 2
369
370  if options.record:
371    http_archive.Persist(replay_filename)
372    logging.info('Saved %d responses to %s', len(http_archive), replay_filename)
373  return exit_status
374
375
376def GetParser():
377  arg_parser = argparse.ArgumentParser(
378      usage='%(prog)s [options] replay_file',
379      description=__doc__,
380      formatter_class=argparse.RawDescriptionHelpFormatter,
381      epilog='http://code.google.com/p/web-page-replay/')
382
383  arg_parser.add_argument('replay_filename', type=str, help='Replay file',
384                          nargs='?')
385
386  arg_parser.add_argument('-r', '--record', default=False,
387      action='store_true',
388      help='Download real responses and record them to replay_file')
389  arg_parser.add_argument('--append', default=False,
390      action='store_true',
391      help='Append responses to replay_file.')
392  arg_parser.add_argument('-l', '--log_level', default='debug',
393      action='store',
394      type=str,
395      choices=('debug', 'info', 'warning', 'error', 'critical'),
396      help='Minimum verbosity level to log')
397  arg_parser.add_argument('-f', '--log_file', default=None,
398      action='store',
399      type=str,
400      help='Log file to use in addition to writting logs to stderr.')
401
402  network_group = arg_parser.add_argument_group(
403      title='Network Simulation Options',
404      description=('These options configure the network simulation in '
405                   'replay mode'))
406  network_group.add_argument('-u', '--up', default='0',
407      action='store',
408      type=str,
409      help='Upload Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.')
410  network_group.add_argument('-d', '--down', default='0',
411      action='store',
412      type=str,
413      help='Download Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.')
414  network_group.add_argument('-m', '--delay_ms', default='0',
415      action='store',
416      type=str,
417      help='Propagation delay (latency) in milliseconds. Zero means no delay.')
418  network_group.add_argument('-p', '--packet_loss_rate', default='0',
419      action='store',
420      type=str,
421      help='Packet loss rate in range [0..1]. Zero means no loss.')
422  network_group.add_argument('-w', '--init_cwnd', default='0',
423      action='store',
424      type=str,
425      help='Set initial cwnd (linux only, requires kernel patch)')
426  network_group.add_argument('--net', default=None,
427      action='store',
428      type=str,
429      choices=net_configs.NET_CONFIG_NAMES,
430      help='Select a set of network options: %s.' % ', '.join(
431          net_configs.NET_CONFIG_NAMES))
432  network_group.add_argument('--shaping_type', default='dummynet',
433      action='store',
434      choices=('dummynet', 'proxy'),
435      help='When shaping is configured (i.e. --up, --down, etc.) decides '
436           'whether to use |dummynet| (default), or |proxy| servers.')
437
438  harness_group = arg_parser.add_argument_group(
439      title='Replay Harness Options',
440      description=('These advanced options configure various aspects '
441                   'of the replay harness'))
442  harness_group.add_argument('-S', '--server', default=None,
443      action='store',
444      type=str,
445      help='IP address of host running "replay.py --server_mode". '
446           'This only changes the primary DNS nameserver to use the given IP.')
447  harness_group.add_argument('-M', '--server_mode', default=False,
448      action='store_true',
449      help='Run replay DNS & http proxies, and trafficshaping on --port '
450           'without changing the primary DNS nameserver. '
451           'Other hosts may connect to this using "replay.py --server" '
452           'or by pointing their DNS to this server.')
453  harness_group.add_argument('-i', '--inject_scripts', default='deterministic.js',
454      action='store',
455      dest='inject_scripts',
456      help='A comma separated list of JavaScript sources to inject in all '
457           'pages. By default a script is injected that eliminates sources '
458           'of entropy such as Date() and Math.random() deterministic. '
459           'CAUTION: Without deterministic.js, many pages will not replay.')
460  harness_group.add_argument('-D', '--no-diff_unknown_requests', default=True,
461      action='store_false',
462      dest='diff_unknown_requests',
463      help='During replay, do not show a diff of unknown requests against '
464           'their nearest match in the archive.')
465  harness_group.add_argument('-C', '--use_closest_match', default=False,
466      action='store_true',
467      dest='use_closest_match',
468      help='During replay, if a request is not found, serve the closest match'
469           'in the archive instead of giving a 404.')
470  harness_group.add_argument('-U', '--use_server_delay', default=False,
471      action='store_true',
472      dest='use_server_delay',
473      help='During replay, simulate server delay by delaying response time to'
474           'requests.')
475  harness_group.add_argument('-I', '--screenshot_dir', default=None,
476      action='store',
477      type=str,
478      help='Save PNG images of the loaded page in the given directory.')
479  harness_group.add_argument('-P', '--no-dns_private_passthrough', default=True,
480      action='store_false',
481      dest='dns_private_passthrough',
482      help='Don\'t forward DNS requests that resolve to private network '
483           'addresses. CAUTION: With this option important services like '
484           'Kerberos will resolve to the HTTP proxy address.')
485  harness_group.add_argument('-x', '--no-dns_forwarding', default=True,
486      action='store_false',
487      dest='dns_forwarding',
488      help='Don\'t forward DNS requests to the local replay server. '
489           'CAUTION: With this option an external mechanism must be used to '
490           'forward traffic to the replay server.')
491  harness_group.add_argument('--host', default=None,
492      action='store',
493      type=str,
494      help='The IP address to bind all servers to. Defaults to 0.0.0.0 or '
495           '127.0.0.1, depending on --server_mode and platform.')
496  harness_group.add_argument('-o', '--port', default=80,
497      action='store',
498      type=int,
499      help='Port number to listen on.')
500  harness_group.add_argument('--ssl_port', default=443,
501      action='store',
502      type=int,
503      help='SSL port number to listen on.')
504  harness_group.add_argument('--http_to_https_port', default=None,
505      action='store',
506      type=int,
507      help='Port on which WPR will listen for HTTP requests that it will send '
508           'along as HTTPS requests.')
509  harness_group.add_argument('--dns_port', default=53,
510      action='store',
511      type=int,
512      help='DNS port number to listen on.')
513  harness_group.add_argument('-c', '--https_root_ca_cert_path', default=None,
514      action='store',
515      type=str,
516      help='Certificate file to use with SSL (gets auto-generated if needed).')
517  harness_group.add_argument('--no-ssl', default=True,
518      action='store_false',
519      dest='ssl',
520      help='Do not setup an SSL proxy.')
521  harness_group.add_argument('--should_generate_certs', default=False,
522      action='store_true',
523      help='Use OpenSSL to generate certificate files for requested hosts.')
524  harness_group.add_argument('--no-admin-check', default=True,
525      action='store_false',
526      dest='admin_check',
527      help='Do not check if administrator access is needed.')
528  harness_group.add_argument('--scramble_images', default=False,
529      action='store_true',
530      dest='scramble_images',
531      help='Scramble image responses.')
532  harness_group.add_argument('--rules_path', default=None,
533      action='store',
534      help='Path of file containing Python rules.')
535  harness_group.add_argument('--allowed_rule_imports', default='rules',
536      action='store',
537      help='A comma-separate list of allowed rule imports, or \'*\' to allow'
538           ' all packages.  Defaults to %(default)s.')
539  return arg_parser
540
541
542def main():
543  arg_parser = GetParser()
544  options = arg_parser.parse_args()
545  options = OptionsWrapper(options, arg_parser)
546
547  if options.server:
548    options.replay_filename = None
549  elif options.replay_filename is None:
550    arg_parser.error('Must specify a replay_file')
551  return replay(options, options.replay_filename)
552
553
554if __name__ == '__main__':
555  sys.exit(main())
556