• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2# encoding: utf-8
3
4# Copyright 2017 Google Inc.
5#
6# Use of this source code is governed by a BSD-style license that can be found
7# in the LICENSE file.
8#
9# This is an A/B test utility script used by calmbench.py
10#
11# For each bench, we get a distribution of min_ms measurements from nanobench.
12# From that, we try to recover the 1/3 and 2/3 quantiles of the distribution.
13# If range (1/3 quantile, 2/3 quantile) is completely disjoint between A and B,
14# we report that as a regression.
15#
16# The more measurements we have for a bench, the more accurate our quantiles
17# are. However, taking more measurements is time consuming. Hence we'll prune
18# out benches and only take more measurements for benches whose current quantile
19# ranges are disjoint.
20#
21# P.S. The current script is brute forcely translated from a ruby script. So it
22# may be ugly...
23
24import re
25import os
26import sys
27import time
28import json
29import subprocess
30import shlex
31import multiprocessing
32from argparse import ArgumentParser
33from multiprocessing import Process
34from threading import Thread
35from threading import Lock
36from pdb import set_trace
37
38
39HELP = """
40\033[31mPlease call calmbench.py to drive this script if you're not doing so.
41This script is not supposed to be used by itself. (At least, it's not easy to
42use by itself.)
43\033[0m
44"""
45
46FACTOR  = 3     # lower/upper quantile factor
47DIFF_T  = 0.99  # different enough threshold
48TERM    = 10    # terminate after this no. of iterations without suspect changes
49MAXTRY  = 30    # max number of nanobench tries to narrow down suspects
50
51UNITS   = "ns µs ms s".split()
52
53
54timesLock = Lock()
55timesA  = {}
56timesB  = {}
57
58
59def parse_args():
60  parser = ArgumentParser(description=HELP)
61
62  parser.add_argument('outdir', type=str, help="output directory")
63  parser.add_argument('a', type=str, help="name of A")
64  parser.add_argument('b', type=str, help="name of B")
65  parser.add_argument('nano_a', type=str, help="path to A's nanobench binary")
66  parser.add_argument('nano_b', type=str, help="path to B's nanobench binary")
67  parser.add_argument('arg_a', type=str, help="args for A's nanobench run")
68  parser.add_argument('arg_b', type=str, help="args for B's nanobench run")
69  parser.add_argument('repeat', type=int, help="number of initial runs")
70  parser.add_argument('skip_b', type=str, help=("whether to skip running B"
71                                                " ('true' or 'false')"))
72  parser.add_argument('config', type=str, help="nanobenh config")
73  parser.add_argument('threads', type=int, help="number of threads to run")
74  parser.add_argument('noinit', type=str, help=("whether to skip running B"
75                                                " ('true' or 'false')"))
76
77  parser.add_argument('--concise', dest='concise', action="store_true",
78      help="If set, no verbose thread info will be printed.")
79  parser.set_defaults(concise=False)
80
81  # Additional args for bots
82  BHELP = "bot specific options"
83  parser.add_argument('--githash', type=str, default="", help=BHELP)
84  parser.add_argument('--keys', type=str, default=[], nargs='+', help=BHELP)
85
86  args = parser.parse_args()
87  args.skip_b = args.skip_b == "true"
88  args.noinit = args.noinit == "true"
89
90  if args.threads == -1:
91    args.threads = 1
92    if args.config in ["8888", "565"]: # multi-thread for CPU only
93        args.threads = max(1, multiprocessing.cpu_count() / 2)
94
95  return args
96
97def append_dict_sorted_array(dict_array, key, value):
98  if key not in dict_array:
99    dict_array[key] = []
100  dict_array[key].append(value)
101  dict_array[key].sort()
102
103
104def add_time(args, name, bench, t, unit):
105  normalized_t = t * 1000 ** UNITS.index(unit);
106  if name.startswith(args.a):
107    append_dict_sorted_array(timesA, bench, normalized_t)
108  else:
109    append_dict_sorted_array(timesB, bench, normalized_t)
110
111
112def append_times_from_file(args, name, filename):
113  with open(filename) as f:
114    lines = f.readlines()
115  for line in lines:
116    items = line.split()
117    if len(items) > 10:
118      bench = items[10]
119      matches = re.search("([+-]?\d*.?\d+)(s|ms|µs|ns)", items[3])
120      if (not matches or items[9] != args.config):
121        continue
122      time_num = matches.group(1)
123      time_unit = matches.group(2)
124      add_time(args, name, bench, float(time_num), time_unit)
125
126
127class ThreadRunner:
128  """Simplest and stupidiest threaded executer."""
129  def __init__(self, args):
130    self.concise = args.concise
131    self.threads = []
132
133  def add(self, args, fn):
134    if len(self.threads) >= args.threads:
135      self.wait()
136    t = Thread(target = fn)
137    t.daemon = True
138    self.threads.append(t)
139    t.start()
140
141  def wait(self):
142    def spin():
143      i = 0
144      spinners = [".  ", ".. ", "..."]
145      while len(self.threads) > 0:
146        timesLock.acquire()
147        sys.stderr.write(
148            "\r" + spinners[i % len(spinners)] +
149            " (%d threads running)" % len(self.threads) +
150            "           \r" # spaces for erasing characters
151        )
152        timesLock.release()
153        time.sleep(0.5)
154        i += 1
155
156    if not self.concise:
157      ts = Thread(target = spin);
158      ts.start()
159
160    for t in self.threads:
161        t.join()
162    self.threads = []
163
164    if not self.concise:
165      ts.join()
166
167
168def split_arg(arg):
169  raw = shlex.split(arg)
170  result = []
171  for r in raw:
172    if '~' in r:
173      result.append(os.path.expanduser(r))
174    else:
175      result.append(r)
176  return result
177
178
179def run(args, threadRunner, name, nano, arg, i):
180  def task():
181    file_i = "%s/%s.out%d" % (args.outdir, name, i)
182
183    should_run = not args.noinit and not (name == args.b and args.skip_b)
184    if i <= 0:
185      should_run = True # always run for suspects
186
187    if should_run:
188      if i > 0:
189        timesLock.acquire()
190        print "Init run %d for %s..." % (i, name)
191        timesLock.release()
192      subprocess.check_call(["touch", file_i])
193      with open(file_i, 'w') as f:
194        subprocess.check_call([nano] + split_arg(arg) +
195                              ["--config", args.config], stderr=f, stdout=f)
196
197    timesLock.acquire()
198    append_times_from_file(args, name, file_i)
199    timesLock.release()
200
201  threadRunner.add(args, task)
202
203
204def init_run(args):
205  threadRunner = ThreadRunner(args)
206  for i in range(1, max(args.repeat, args.threads / 2) + 1):
207    run(args, threadRunner, args.a, args.nano_a, args.arg_a, i)
208    run(args, threadRunner, args.b, args.nano_b, args.arg_b, i)
209  threadRunner.wait()
210
211
212def get_lower_upper(values):
213  i = max(0, (len(values) - 1) / FACTOR)
214  return values[i], values[-i - 1]
215
216
217def different_enough(lower1, upper2):
218  return upper2 < DIFF_T * lower1
219
220
221def get_suspects():
222  suspects = []
223  for bench in timesA.keys():
224    if bench not in timesB:
225      continue
226    lowerA, upperA = get_lower_upper(timesA[bench])
227    lowerB, upperB = get_lower_upper(timesB[bench])
228    if different_enough(lowerA, upperB) or different_enough(lowerB, upperA):
229      suspects.append(bench)
230  return suspects
231
232
233def process_bench_pattern(s):
234  if ".skp" in s: # skp bench won't match their exact names...
235    return "^\"" + s[0:(s.index(".skp") + 3)] + "\""
236  else:
237    return "^\"" + s + "\"$"
238
239
240def suspects_arg(suspects):
241  patterns = map(process_bench_pattern, suspects)
242  return " --match " + (" ".join(patterns))
243
244
245def median(array):
246  return array[len(array) / 2]
247
248
249def regression(bench):
250  a = median(timesA[bench])
251  b = median(timesB[bench])
252  if (a == 0): # bad bench, just return no regression
253    return 1
254  return b / a
255
256
257def percentage(x):
258  return (x - 1) * 100
259
260
261def format_r(r):
262  return ('%6.2f' % percentage(r)) + "%"
263
264
265def normalize_r(r):
266  if r > 1.0:
267    return r - 1.0
268  else:
269    return 1.0 - 1/r
270
271
272def test():
273  args = parse_args()
274
275  init_run(args)
276  last_unchanged_iter = 0
277  last_suspect_number = -1
278  tryCnt = 0
279  it = 0
280  while tryCnt < MAXTRY:
281    it += 1
282    suspects = get_suspects()
283    if len(suspects) != last_suspect_number:
284      last_suspect_number = len(suspects)
285      last_unchanged_iter = it
286    if (len(suspects) == 0 or it - last_unchanged_iter >= TERM):
287      break
288
289    print "Number of suspects at iteration %d: %d" % (it, len(suspects))
290    threadRunner = ThreadRunner(args)
291    for j in range(1, max(1, args.threads / 2) + 1):
292      run(args, threadRunner, args.a, args.nano_a,
293          args.arg_a + suspects_arg(suspects), -j)
294      run(args, threadRunner, args.b, args.nano_b,
295          args.arg_b + suspects_arg(suspects), -j)
296      tryCnt += 1
297    threadRunner.wait()
298
299  suspects = get_suspects()
300  if len(suspects) == 0:
301    print ("%s and %s does not seem to have significant " + \
302           "performance differences.") % (args.a, args.b)
303  else:
304    suspects.sort(key = regression)
305    print "%s (compared to %s) is likely" % (args.a, args.b)
306    for suspect in suspects:
307      r = regression(suspect)
308      if r < 1:
309        print "\033[31m  %s slower in %s\033[0m" % \
310                (format_r(1/r), suspect)
311      else:
312        print "\033[32m  %s faster in %s\033[0m" % \
313                (format_r(r), suspect)
314
315  with open("%s/bench_%s_%s.json" % (args.outdir, args.a, args.b), 'w') as f:
316    results = {}
317    for bench in timesA:
318      r = regression(bench) if bench in suspects else 1.0
319      results[bench] = {
320        args.config: {
321          "signed_regression": normalize_r(r),
322          "lower_quantile_ms": get_lower_upper(timesA[bench])[0] * 1e-6,
323          "upper_quantile_ms": get_lower_upper(timesA[bench])[1] * 1e-6
324        }
325      }
326
327    output = {"results": results}
328    if args.githash:
329      output["gitHash"] = args.githash
330    if args.keys:
331      keys = {}
332      for i in range(len(args.keys) / 2):
333        keys[args.keys[i * 2]] = args.keys[i * 2 + 1]
334      output["key"] = keys
335    f.write(json.dumps(output, indent=4))
336    print ("\033[36mJSON results available in %s\033[0m" % f.name)
337
338  with open("%s/bench_%s_%s.csv" % (args.outdir, args.a, args.b), 'w') as out:
339    out.write(("bench, significant?, raw regresion, " +
340                   "%(A)s quantile (ns), %(B)s quantile (ns), " +
341                   "%(A)s (ns), %(B)s (ns)\n") % {'A': args.a, 'B': args.b})
342    for bench in suspects + timesA.keys():
343      if (bench not in timesA or bench not in timesB):
344        continue
345      ta = timesA[bench]
346      tb = timesB[bench]
347      out.write(
348          "%s, %s, %f, " % (bench, bench in suspects, regression(bench)) +
349          ' '.join(map(str, get_lower_upper(ta))) + ", " +
350          ' '.join(map(str, get_lower_upper(tb))) + ", " +
351          ("%s, %s\n" % (' '.join(map(str, ta)), ' '.join(map(str, tb))))
352      )
353    print (("\033[36m" +
354           "Compared %d benches. " +
355           "%d of them seem to be significantly differrent." +
356           "\033[0m") %
357           (len([x for x in timesA if x in timesB]), len(suspects)))
358    print ("\033[36mPlease see detailed bench results in %s\033[0m" %
359            out.name)
360
361
362if __name__ == "__main__":
363  try:
364    test()
365  except Exception as e:
366    print e
367    print HELP
368    raise
369