• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2# Copyright 2016 the V8 project authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5'''
6Usage: runtime-call-stats.py [-h] <command> ...
7
8Optional arguments:
9  -h, --help  show this help message and exit
10
11Commands:
12  run         run chrome with --runtime-call-stats and generate logs
13  stats       process logs and print statistics
14  json        process logs from several versions and generate JSON
15  help        help information
16
17For each command, you can try ./runtime-call-stats.py help command.
18'''
19
20import argparse
21import json
22import os
23import re
24import shutil
25import subprocess
26import sys
27import tempfile
28import operator
29
30import numpy
31import scipy
32import scipy.stats
33from math import sqrt
34
35
36# Run benchmarks.
37
38def print_command(cmd_args):
39  def fix_for_printing(arg):
40    m = re.match(r'^--([^=]+)=(.*)$', arg)
41    if m and (' ' in m.group(2) or m.group(2).startswith('-')):
42      arg = "--{}='{}'".format(m.group(1), m.group(2))
43    elif ' ' in arg:
44      arg = "'{}'".format(arg)
45    return arg
46  print " ".join(map(fix_for_printing, cmd_args))
47
48
49def start_replay_server(args, sites):
50  with tempfile.NamedTemporaryFile(prefix='callstats-inject-', suffix='.js',
51                                   mode='wt', delete=False) as f:
52    injection = f.name
53    generate_injection(f, sites, args.refresh)
54  http_port = 4080 + args.port_offset
55  https_port = 4443 + args.port_offset
56  cmd_args = [
57      args.replay_bin,
58      "--port=%s" % http_port,
59      "--ssl_port=%s" % https_port,
60      "--no-dns_forwarding",
61      "--use_closest_match",
62      "--no-diff_unknown_requests",
63      "--inject_scripts=deterministic.js,{}".format(injection),
64      args.replay_wpr,
65  ]
66  print "=" * 80
67  print_command(cmd_args)
68  with open(os.devnull, 'w') as null:
69    server = subprocess.Popen(cmd_args, stdout=null, stderr=null)
70  print "RUNNING REPLAY SERVER: %s with PID=%s" % (args.replay_bin, server.pid)
71  print "=" * 80
72  return {'process': server, 'injection': injection}
73
74
75def stop_replay_server(server):
76  print("SHUTTING DOWN REPLAY SERVER %s" % server['process'].pid)
77  server['process'].terminate()
78  os.remove(server['injection'])
79
80
81def generate_injection(f, sites, refreshes=0):
82  print >> f, """\
83(function() {
84  var s = window.sessionStorage.getItem("refreshCounter");
85  var refreshTotal = """, refreshes, """;
86  var refreshCounter = s ? parseInt(s) : refreshTotal;
87  var refreshId = refreshTotal - refreshCounter;
88  if (refreshCounter > 0) {
89    window.sessionStorage.setItem("refreshCounter", refreshCounter-1);
90  }
91  function match(url, item) {
92    if ('regexp' in item) { return url.match(item.regexp) !== null };
93    var url_wanted = item.url;
94    /* Allow automatic redirections from http to https. */
95    if (url_wanted.startsWith("http://") && url.startsWith("https://")) {
96      url_wanted = "https://" + url_wanted.substr(7);
97    }
98    return url.startsWith(url_wanted);
99  };
100  function onLoad(url) {
101    for (var item of sites) {
102      if (!match(url, item)) continue;
103      var timeout = 'timeline' in item ? 2000 * item.timeline
104                  : 'timeout'  in item ? 1000 * (item.timeout - 3)
105                  : 10000;
106      console.log("Setting time out of " + timeout + " for: " + url);
107      window.setTimeout(function() {
108        console.log("Time is out for: " + url);
109        var msg = "STATS: (" + refreshId + ") " + url;
110        %GetAndResetRuntimeCallStats(1, msg);
111        if (refreshCounter > 0) {
112          console.log(
113              "Refresh counter is " + refreshCounter + ", refreshing: " + url);
114          window.location.reload();
115        }
116      }, timeout);
117      return;
118    }
119    console.log("Ignoring: " + url);
120  };
121  var sites =
122    """, json.dumps(sites), """;
123  onLoad(window.location.href);
124})();"""
125
126
127def run_site(site, domain, args, timeout=None):
128  print "="*80
129  print "RUNNING DOMAIN %s" % domain
130  print "="*80
131  result_template = "{domain}#{count}.txt" if args.repeat else "{domain}.txt"
132  count = 0
133  if timeout is None: timeout = args.timeout
134  if args.replay_wpr:
135    timeout *= 1 + args.refresh
136    timeout += 1
137  retries_since_good_run = 0
138  while count == 0 or args.repeat is not None and count < args.repeat:
139    count += 1
140    result = result_template.format(domain=domain, count=count)
141    retries = 0
142    while args.retries is None or retries < args.retries:
143      retries += 1
144      try:
145        if args.user_data_dir:
146          user_data_dir = args.user_data_dir
147        else:
148          user_data_dir = tempfile.mkdtemp(prefix="chr_")
149        js_flags = "--runtime-call-stats"
150        if args.replay_wpr: js_flags += " --allow-natives-syntax"
151        if args.js_flags: js_flags += " " + args.js_flags
152        chrome_flags = [
153            "--no-default-browser-check",
154            "--no-sandbox",
155            "--disable-translate",
156            "--js-flags={}".format(js_flags),
157            "--no-first-run",
158            "--user-data-dir={}".format(user_data_dir),
159        ]
160        if args.replay_wpr:
161          http_port = 4080 + args.port_offset
162          https_port = 4443 + args.port_offset
163          chrome_flags += [
164              "--host-resolver-rules=MAP *:80 localhost:%s, "  \
165                                    "MAP *:443 localhost:%s, " \
166                                    "EXCLUDE localhost" % (
167                                        http_port, https_port),
168              "--ignore-certificate-errors",
169              "--disable-seccomp-sandbox",
170              "--disable-web-security",
171              "--reduce-security-for-testing",
172              "--allow-insecure-localhost",
173          ]
174        else:
175          chrome_flags += [
176              "--single-process",
177          ]
178        if args.chrome_flags:
179          chrome_flags += args.chrome_flags.split()
180        cmd_args = [
181            "timeout", str(timeout),
182            args.with_chrome
183        ] + chrome_flags + [ site ]
184        print "- " * 40
185        print_command(cmd_args)
186        print "- " * 40
187        with open(result, "wt") as f:
188          with open(args.log_stderr or os.devnull, 'at') as err:
189            status = subprocess.call(cmd_args, stdout=f, stderr=err)
190        # 124 means timeout killed chrome, 0 means the user was bored first!
191        # If none of these two happened, then chrome apparently crashed, so
192        # it must be called again.
193        if status != 124 and status != 0:
194          print("CHROME CRASHED, REPEATING RUN");
195          continue
196        # If the stats file is empty, chrome must be called again.
197        if os.path.isfile(result) and os.path.getsize(result) > 0:
198          if args.print_url:
199            with open(result, "at") as f:
200              print >> f
201              print >> f, "URL: {}".format(site)
202          retries_since_good_run = 0
203          break
204        if retries_since_good_run < 6:
205          timeout += 2 ** retries_since_good_run
206          retries_since_good_run += 1
207        print("EMPTY RESULT, REPEATING RUN ({})".format(
208            retries_since_good_run));
209      finally:
210        if not args.user_data_dir:
211          shutil.rmtree(user_data_dir)
212
213
214def read_sites_file(args):
215  try:
216    sites = []
217    try:
218      with open(args.sites_file, "rt") as f:
219        for item in json.load(f):
220          if 'timeout' not in item:
221            # This is more-or-less arbitrary.
222            item['timeout'] = int(1.5 * item['timeline'] + 7)
223          if item['timeout'] > args.timeout: item['timeout'] = args.timeout
224          sites.append(item)
225    except ValueError:
226      with open(args.sites_file, "rt") as f:
227        for line in f:
228          line = line.strip()
229          if not line or line.startswith('#'): continue
230          sites.append({'url': line, 'timeout': args.timeout})
231    return sites
232  except IOError as e:
233    args.error("Cannot read from {}. {}.".format(args.sites_file, e.strerror))
234    sys.exit(1)
235
236
237def do_run(args):
238  # Determine the websites to benchmark.
239  if args.sites_file:
240    sites = read_sites_file(args)
241  else:
242    sites = [{'url': site, 'timeout': args.timeout} for site in args.sites]
243  # Disambiguate domains, if needed.
244  L = []
245  domains = {}
246  for item in sites:
247    site = item['url']
248    domain = None
249    if args.domain:
250      domain = args.domain
251    elif 'domain' in item:
252      domain = item['domain']
253    else:
254      m = re.match(r'^(https?://)?([^/]+)(/.*)?$', site)
255      if not m:
256        args.error("Invalid URL {}.".format(site))
257        continue
258      domain = m.group(2)
259    entry = [site, domain, None, item['timeout']]
260    if domain not in domains:
261      domains[domain] = entry
262    else:
263      if not isinstance(domains[domain], int):
264        domains[domain][2] = 1
265        domains[domain] = 1
266      domains[domain] += 1
267      entry[2] = domains[domain]
268    L.append(entry)
269  replay_server = start_replay_server(args, sites) if args.replay_wpr else None
270  try:
271    # Run them.
272    for site, domain, count, timeout in L:
273      if count is not None: domain = "{}%{}".format(domain, count)
274      print site, domain, timeout
275      run_site(site, domain, args, timeout)
276  finally:
277    if replay_server:
278      stop_replay_server(replay_server)
279
280
281# Calculate statistics.
282
283def statistics(data):
284  N = len(data)
285  average = numpy.average(data)
286  median = numpy.median(data)
287  low = numpy.min(data)
288  high= numpy.max(data)
289  if N > 1:
290    # evaluate sample variance by setting delta degrees of freedom (ddof) to
291    # 1. The degree used in calculations is N - ddof
292    stddev = numpy.std(data, ddof=1)
293    # Get the endpoints of the range that contains 95% of the distribution
294    t_bounds = scipy.stats.t.interval(0.95, N-1)
295    #assert abs(t_bounds[0] + t_bounds[1]) < 1e-6
296    # sum mean to the confidence interval
297    ci = {
298        'abs': t_bounds[1] * stddev / sqrt(N),
299        'low': average + t_bounds[0] * stddev / sqrt(N),
300        'high': average + t_bounds[1] * stddev / sqrt(N)
301    }
302  else:
303    stddev = 0
304    ci = { 'abs': 0, 'low': average, 'high': average }
305  if abs(stddev) > 0.0001 and abs(average) > 0.0001:
306    ci['perc'] = t_bounds[1] * stddev / sqrt(N) / average * 100
307  else:
308    ci['perc'] = 0
309  return { 'samples': N, 'average': average, 'median': median,
310           'stddev': stddev, 'min': low, 'max': high, 'ci': ci }
311
312
313def read_stats(path, domain, args):
314  groups = [];
315  if args.aggregate:
316    groups = [
317        ('Group-IC', re.compile(".*IC.*")),
318        ('Group-Optimize',
319         re.compile("StackGuard|.*Optimize.*|.*Deoptimize.*|Recompile.*")),
320        ('Group-Compile', re.compile(".*Compile.*")),
321        ('Group-Parse', re.compile(".*Parse.*")),
322        ('Group-Callback', re.compile(".*Callback.*")),
323        ('Group-API', re.compile(".*API.*")),
324        ('Group-GC', re.compile("GC|AllocateInTargetSpace")),
325        ('Group-JavaScript', re.compile("JS_Execution")),
326        ('Group-Runtime', re.compile(".*"))]
327  with open(path, "rt") as f:
328    # Process the whole file and sum repeating entries.
329    entries = { 'Sum': {'time': 0, 'count': 0} }
330    for group_name, regexp in groups:
331      entries[group_name] = { 'time': 0, 'count': 0 }
332    for line in f:
333      line = line.strip()
334      # Discard headers and footers.
335      if not line: continue
336      if line.startswith("Runtime Function"): continue
337      if line.startswith("===="): continue
338      if line.startswith("----"): continue
339      if line.startswith("URL:"): continue
340      if line.startswith("STATS:"): continue
341      # We have a regular line.
342      fields = line.split()
343      key = fields[0]
344      time = float(fields[1].replace("ms", ""))
345      count = int(fields[3])
346      if key not in entries: entries[key] = { 'time': 0, 'count': 0 }
347      entries[key]['time'] += time
348      entries[key]['count'] += count
349      # We calculate the sum, if it's not the "total" line.
350      if key != "Total":
351        entries['Sum']['time'] += time
352        entries['Sum']['count'] += count
353        for group_name, regexp in groups:
354          if not regexp.match(key): continue
355          entries[group_name]['time'] += time
356          entries[group_name]['count'] += count
357          break
358    # Append the sums as single entries to domain.
359    for key in entries :
360      if key not in domain: domain[key] = { 'time_list': [], 'count_list': [] }
361      domain[key]['time_list'].append(entries[key]['time'])
362      domain[key]['count_list'].append(entries[key]['count'])
363
364
365def print_stats(S, args):
366  # Sort by ascending/descending time average, then by ascending/descending
367  # count average, then by ascending name.
368  def sort_asc_func(item):
369    return (item[1]['time_stat']['average'],
370            item[1]['count_stat']['average'],
371            item[0])
372  def sort_desc_func(item):
373    return (-item[1]['time_stat']['average'],
374            -item[1]['count_stat']['average'],
375            item[0])
376  # Sorting order is in the commend-line arguments.
377  sort_func = sort_asc_func if args.sort == "asc" else sort_desc_func
378  # Possibly limit how many elements to print.
379  L = [item for item in sorted(S.items(), key=sort_func)
380       if item[0] not in ["Total", "Sum"]]
381  N = len(L)
382  if args.limit == 0:
383    low, high = 0, N
384  elif args.sort == "desc":
385    low, high = 0, args.limit
386  else:
387    low, high = N-args.limit, N
388  # How to print entries.
389  def print_entry(key, value):
390    def stats(s, units=""):
391      conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc'])
392      return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf)
393    print "{:>50s}  {}  {}".format(
394      key,
395      stats(value['time_stat'], units="ms"),
396      stats(value['count_stat'])
397    )
398  # Print and calculate partial sums, if necessary.
399  for i in range(low, high):
400    print_entry(*L[i])
401    if args.totals and args.limit != 0 and not args.aggregate:
402      if i == low:
403        partial = { 'time_list': [0] * len(L[i][1]['time_list']),
404                    'count_list': [0] * len(L[i][1]['count_list']) }
405      assert len(partial['time_list']) == len(L[i][1]['time_list'])
406      assert len(partial['count_list']) == len(L[i][1]['count_list'])
407      for j, v in enumerate(L[i][1]['time_list']):
408        partial['time_list'][j] += v
409      for j, v in enumerate(L[i][1]['count_list']):
410        partial['count_list'][j] += v
411  # Print totals, if necessary.
412  if args.totals:
413    print '-' * 80
414    if args.limit != 0 and not args.aggregate:
415      partial['time_stat'] = statistics(partial['time_list'])
416      partial['count_stat'] = statistics(partial['count_list'])
417      print_entry("Partial", partial)
418    print_entry("Sum", S["Sum"])
419    print_entry("Total", S["Total"])
420
421
422def do_stats(args):
423  domains = {}
424  for path in args.logfiles:
425    filename = os.path.basename(path)
426    m = re.match(r'^([^#]+)(#.*)?$', filename)
427    domain = m.group(1)
428    if domain not in domains: domains[domain] = {}
429    read_stats(path, domains[domain], args)
430  if args.aggregate:
431    create_total_page_stats(domains, args)
432  for i, domain in enumerate(sorted(domains)):
433    if len(domains) > 1:
434      if i > 0: print
435      print "{}:".format(domain)
436      print '=' * 80
437    domain_stats = domains[domain]
438    for key in domain_stats:
439      domain_stats[key]['time_stat'] = \
440          statistics(domain_stats[key]['time_list'])
441      domain_stats[key]['count_stat'] = \
442          statistics(domain_stats[key]['count_list'])
443    print_stats(domain_stats, args)
444
445
446# Create a Total page with all entries summed up.
447def create_total_page_stats(domains, args):
448  total = {}
449  def sum_up(parent, key, other):
450    sums = parent[key]
451    for i, item in enumerate(other[key]):
452      if i >= len(sums):
453        sums.extend([0] * (i - len(sums) + 1))
454      if item is not None:
455        sums[i] += item
456  # Sum up all the entries/metrics from all domains
457  for domain, entries in domains.items():
458    for key, domain_stats in entries.items():
459      if key not in total:
460        total[key] = {}
461        total[key]['time_list'] = list(domain_stats['time_list'])
462        total[key]['count_list'] = list(domain_stats['count_list'])
463      else:
464        sum_up(total[key], 'time_list', domain_stats)
465        sum_up(total[key], 'count_list', domain_stats)
466  # Add a new "Total" page containing the summed up metrics.
467  domains['Total'] = total
468
469
470# Generate JSON file.
471
472def do_json(args):
473  versions = {}
474  for path in args.logdirs:
475    if os.path.isdir(path):
476      for root, dirs, files in os.walk(path):
477        version = os.path.basename(root)
478        if version not in versions: versions[version] = {}
479        for filename in files:
480          if filename.endswith(".txt"):
481            m = re.match(r'^([^#]+)(#.*)?\.txt$', filename)
482            domain = m.group(1)
483            if domain not in versions[version]: versions[version][domain] = {}
484            read_stats(os.path.join(root, filename),
485                       versions[version][domain], args)
486  for version, domains in versions.items():
487    if args.aggregate:
488      create_total_page_stats(domains, args)
489    for domain, entries in domains.items():
490      stats = []
491      for name, value in entries.items():
492        # We don't want the calculated sum in the JSON file.
493        if name == "Sum": continue
494        entry = [name]
495        for x in ['time_list', 'count_list']:
496          s = statistics(entries[name][x])
497          entry.append(round(s['average'], 1))
498          entry.append(round(s['ci']['abs'], 1))
499          entry.append(round(s['ci']['perc'], 2))
500        stats.append(entry)
501      domains[domain] = stats
502  print json.dumps(versions, separators=(',', ':'))
503
504
505# Help.
506
507def do_help(parser, subparsers, args):
508  if args.help_cmd:
509    if args.help_cmd in subparsers:
510      subparsers[args.help_cmd].print_help()
511    else:
512      args.error("Unknown command '{}'".format(args.help_cmd))
513  else:
514    parser.print_help()
515
516
517# Main program, parse command line and execute.
518
519def coexist(*l):
520  given = sum(1 for x in l if x)
521  return given == 0 or given == len(l)
522
523def main():
524  parser = argparse.ArgumentParser()
525  subparser_adder = parser.add_subparsers(title="commands", dest="command",
526                                          metavar="<command>")
527  subparsers = {}
528  # Command: run.
529  subparsers["run"] = subparser_adder.add_parser(
530      "run", help="run --help")
531  subparsers["run"].set_defaults(
532      func=do_run, error=subparsers["run"].error)
533  subparsers["run"].add_argument(
534      "--chrome-flags", type=str, default="",
535      help="specify additional chrome flags")
536  subparsers["run"].add_argument(
537      "--js-flags", type=str, default="",
538      help="specify additional V8 flags")
539  subparsers["run"].add_argument(
540      "--domain", type=str, default="",
541      help="specify the output file domain name")
542  subparsers["run"].add_argument(
543      "--no-url", dest="print_url", action="store_false", default=True,
544      help="do not include url in statistics file")
545  subparsers["run"].add_argument(
546      "-n", "--repeat", type=int, metavar="<num>",
547      help="specify iterations for each website (default: once)")
548  subparsers["run"].add_argument(
549      "-k", "--refresh", type=int, metavar="<num>", default=0,
550      help="specify refreshes for each iteration (default: 0)")
551  subparsers["run"].add_argument(
552      "--replay-wpr", type=str, metavar="<path>",
553      help="use the specified web page replay (.wpr) archive")
554  subparsers["run"].add_argument(
555      "--replay-bin", type=str, metavar="<path>",
556      help="specify the replay.py script typically located in " \
557           "$CHROMIUM/src/third_party/webpagereplay/replay.py")
558  subparsers["run"].add_argument(
559      "-r", "--retries", type=int, metavar="<num>",
560      help="specify retries if website is down (default: forever)")
561  subparsers["run"].add_argument(
562      "-f", "--sites-file", type=str, metavar="<path>",
563      help="specify file containing benchmark websites")
564  subparsers["run"].add_argument(
565      "-t", "--timeout", type=int, metavar="<seconds>", default=60,
566      help="specify seconds before chrome is killed")
567  subparsers["run"].add_argument(
568      "-p", "--port-offset", type=int, metavar="<offset>", default=0,
569      help="specify the offset for the replay server's default ports")
570  subparsers["run"].add_argument(
571      "-u", "--user-data-dir", type=str, metavar="<path>",
572      help="specify user data dir (default is temporary)")
573  subparsers["run"].add_argument(
574      "-c", "--with-chrome", type=str, metavar="<path>",
575      default="/usr/bin/google-chrome",
576      help="specify chrome executable to use")
577  subparsers["run"].add_argument(
578      "-l", "--log-stderr", type=str, metavar="<path>",
579      help="specify where chrome's stderr should go (default: /dev/null)")
580  subparsers["run"].add_argument(
581      "sites", type=str, metavar="<URL>", nargs="*",
582      help="specify benchmark website")
583  # Command: stats.
584  subparsers["stats"] = subparser_adder.add_parser(
585      "stats", help="stats --help")
586  subparsers["stats"].set_defaults(
587      func=do_stats, error=subparsers["stats"].error)
588  subparsers["stats"].add_argument(
589      "-l", "--limit", type=int, metavar="<num>", default=0,
590      help="limit how many items to print (default: none)")
591  subparsers["stats"].add_argument(
592      "-s", "--sort", choices=["asc", "desc"], default="asc",
593      help="specify sorting order (default: ascending)")
594  subparsers["stats"].add_argument(
595      "-n", "--no-total", dest="totals", action="store_false", default=True,
596      help="do not print totals")
597  subparsers["stats"].add_argument(
598      "logfiles", type=str, metavar="<logfile>", nargs="*",
599      help="specify log files to parse")
600  subparsers["stats"].add_argument(
601      "--aggregate", dest="aggregate", action="store_true", default=False,
602      help="Create aggregated entries. Adds Group-* entries at the toplevel. " +
603      "Additionally creates a Total page with all entries.")
604  # Command: json.
605  subparsers["json"] = subparser_adder.add_parser(
606      "json", help="json --help")
607  subparsers["json"].set_defaults(
608      func=do_json, error=subparsers["json"].error)
609  subparsers["json"].add_argument(
610      "logdirs", type=str, metavar="<logdir>", nargs="*",
611      help="specify directories with log files to parse")
612  subparsers["json"].add_argument(
613      "--aggregate", dest="aggregate", action="store_true", default=False,
614      help="Create aggregated entries. Adds Group-* entries at the toplevel. " +
615      "Additionally creates a Total page with all entries.")
616  # Command: help.
617  subparsers["help"] = subparser_adder.add_parser(
618      "help", help="help information")
619  subparsers["help"].set_defaults(
620      func=lambda args: do_help(parser, subparsers, args),
621      error=subparsers["help"].error)
622  subparsers["help"].add_argument(
623      "help_cmd", type=str, metavar="<command>", nargs="?",
624      help="command for which to display help")
625  # Execute the command.
626  args = parser.parse_args()
627  setattr(args, 'script_path', os.path.dirname(sys.argv[0]))
628  if args.command == "run" and coexist(args.sites_file, args.sites):
629    args.error("use either option --sites-file or site URLs")
630    sys.exit(1)
631  elif args.command == "run" and not coexist(args.replay_wpr, args.replay_bin):
632    args.error("options --replay-wpr and --replay-bin must be used together")
633    sys.exit(1)
634  else:
635    args.func(args)
636
637if __name__ == "__main__":
638  sys.exit(main())
639