1#!/usr/bin/python 2# encoding: utf-8 3 4# Copyright 2017 Google Inc. 5# 6# Use of this source code is governed by a BSD-style license that can be found 7# in the LICENSE file. 8# 9# This is an A/B test utility script used by calmbench.py 10# 11# For each bench, we get a distribution of min_ms measurements from nanobench. 12# From that, we try to recover the 1/3 and 2/3 quantiles of the distribution. 13# If range (1/3 quantile, 2/3 quantile) is completely disjoint between A and B, 14# we report that as a regression. 15# 16# The more measurements we have for a bench, the more accurate our quantiles 17# are. However, taking more measurements is time consuming. Hence we'll prune 18# out benches and only take more measurements for benches whose current quantile 19# ranges are disjoint. 20# 21# P.S. The current script is brute forcely translated from a ruby script. So it 22# may be ugly... 23 24import re 25import os 26import sys 27import time 28import json 29import subprocess 30import shlex 31import multiprocessing 32from argparse import ArgumentParser 33from multiprocessing import Process 34from threading import Thread 35from threading import Lock 36from pdb import set_trace 37 38 39HELP = """ 40\033[31mPlease call calmbench.py to drive this script if you're not doing so. 41This script is not supposed to be used by itself. (At least, it's not easy to 42use by itself.) 43\033[0m 44""" 45 46FACTOR = 3 # lower/upper quantile factor 47DIFF_T = 0.99 # different enough threshold 48TERM = 10 # terminate after this no. of iterations without suspect changes 49MAXTRY = 30 # max number of nanobench tries to narrow down suspects 50 51UNITS = "ns µs ms s".split() 52 53 54timesLock = Lock() 55timesA = {} 56timesB = {} 57 58 59def parse_args(): 60 parser = ArgumentParser(description=HELP) 61 62 parser.add_argument('outdir', type=str, help="output directory") 63 parser.add_argument('a', type=str, help="name of A") 64 parser.add_argument('b', type=str, help="name of B") 65 parser.add_argument('nano_a', type=str, help="path to A's nanobench binary") 66 parser.add_argument('nano_b', type=str, help="path to B's nanobench binary") 67 parser.add_argument('arg_a', type=str, help="args for A's nanobench run") 68 parser.add_argument('arg_b', type=str, help="args for B's nanobench run") 69 parser.add_argument('repeat', type=int, help="number of initial runs") 70 parser.add_argument('skip_b', type=str, help=("whether to skip running B" 71 " ('true' or 'false')")) 72 parser.add_argument('config', type=str, help="nanobenh config") 73 parser.add_argument('threads', type=int, help="number of threads to run") 74 parser.add_argument('noinit', type=str, help=("whether to skip running B" 75 " ('true' or 'false')")) 76 77 parser.add_argument('--concise', dest='concise', action="store_true", 78 help="If set, no verbose thread info will be printed.") 79 parser.set_defaults(concise=False) 80 81 # Additional args for bots 82 BHELP = "bot specific options" 83 parser.add_argument('--githash', type=str, default="", help=BHELP) 84 parser.add_argument('--keys', type=str, default=[], nargs='+', help=BHELP) 85 86 args = parser.parse_args() 87 args.skip_b = args.skip_b == "true" 88 args.noinit = args.noinit == "true" 89 90 if args.threads == -1: 91 args.threads = 1 92 if args.config in ["8888", "565"]: # multi-thread for CPU only 93 args.threads = max(1, multiprocessing.cpu_count() / 2) 94 95 return args 96 97def append_dict_sorted_array(dict_array, key, value): 98 if key not in dict_array: 99 dict_array[key] = [] 100 dict_array[key].append(value) 101 dict_array[key].sort() 102 103 104def add_time(args, name, bench, t, unit): 105 normalized_t = t * 1000 ** UNITS.index(unit); 106 if name.startswith(args.a): 107 append_dict_sorted_array(timesA, bench, normalized_t) 108 else: 109 append_dict_sorted_array(timesB, bench, normalized_t) 110 111 112def append_times_from_file(args, name, filename): 113 with open(filename) as f: 114 lines = f.readlines() 115 for line in lines: 116 items = line.split() 117 if len(items) > 10: 118 bench = items[10] 119 matches = re.search("([+-]?\d*.?\d+)(s|ms|µs|ns)", items[3]) 120 if (not matches or items[9] != args.config): 121 continue 122 time_num = matches.group(1) 123 time_unit = matches.group(2) 124 add_time(args, name, bench, float(time_num), time_unit) 125 126 127class ThreadRunner: 128 """Simplest and stupidiest threaded executer.""" 129 def __init__(self, args): 130 self.concise = args.concise 131 self.threads = [] 132 133 def add(self, args, fn): 134 if len(self.threads) >= args.threads: 135 self.wait() 136 t = Thread(target = fn) 137 t.daemon = True 138 self.threads.append(t) 139 t.start() 140 141 def wait(self): 142 def spin(): 143 i = 0 144 spinners = [". ", ".. ", "..."] 145 while len(self.threads) > 0: 146 timesLock.acquire() 147 sys.stderr.write( 148 "\r" + spinners[i % len(spinners)] + 149 " (%d threads running)" % len(self.threads) + 150 " \r" # spaces for erasing characters 151 ) 152 timesLock.release() 153 time.sleep(0.5) 154 i += 1 155 156 if not self.concise: 157 ts = Thread(target = spin); 158 ts.start() 159 160 for t in self.threads: 161 t.join() 162 self.threads = [] 163 164 if not self.concise: 165 ts.join() 166 167 168def split_arg(arg): 169 raw = shlex.split(arg) 170 result = [] 171 for r in raw: 172 if '~' in r: 173 result.append(os.path.expanduser(r)) 174 else: 175 result.append(r) 176 return result 177 178 179def run(args, threadRunner, name, nano, arg, i): 180 def task(): 181 file_i = "%s/%s.out%d" % (args.outdir, name, i) 182 183 should_run = not args.noinit and not (name == args.b and args.skip_b) 184 if i <= 0: 185 should_run = True # always run for suspects 186 187 if should_run: 188 if i > 0: 189 timesLock.acquire() 190 print "Init run %d for %s..." % (i, name) 191 timesLock.release() 192 subprocess.check_call(["touch", file_i]) 193 with open(file_i, 'w') as f: 194 subprocess.check_call([nano] + split_arg(arg) + 195 ["--config", args.config], stderr=f, stdout=f) 196 197 timesLock.acquire() 198 append_times_from_file(args, name, file_i) 199 timesLock.release() 200 201 threadRunner.add(args, task) 202 203 204def init_run(args): 205 threadRunner = ThreadRunner(args) 206 for i in range(1, max(args.repeat, args.threads / 2) + 1): 207 run(args, threadRunner, args.a, args.nano_a, args.arg_a, i) 208 run(args, threadRunner, args.b, args.nano_b, args.arg_b, i) 209 threadRunner.wait() 210 211 212def get_lower_upper(values): 213 i = max(0, (len(values) - 1) / FACTOR) 214 return values[i], values[-i - 1] 215 216 217def different_enough(lower1, upper2): 218 return upper2 < DIFF_T * lower1 219 220 221def get_suspects(): 222 suspects = [] 223 for bench in timesA.keys(): 224 if bench not in timesB: 225 continue 226 lowerA, upperA = get_lower_upper(timesA[bench]) 227 lowerB, upperB = get_lower_upper(timesB[bench]) 228 if different_enough(lowerA, upperB) or different_enough(lowerB, upperA): 229 suspects.append(bench) 230 return suspects 231 232 233def process_bench_pattern(s): 234 if ".skp" in s: # skp bench won't match their exact names... 235 return "^\"" + s[0:(s.index(".skp") + 3)] + "\"" 236 else: 237 return "^\"" + s + "\"$" 238 239 240def suspects_arg(suspects): 241 patterns = map(process_bench_pattern, suspects) 242 return " --match " + (" ".join(patterns)) 243 244 245def median(array): 246 return array[len(array) / 2] 247 248 249def regression(bench): 250 a = median(timesA[bench]) 251 b = median(timesB[bench]) 252 if (a == 0): # bad bench, just return no regression 253 return 1 254 return b / a 255 256 257def percentage(x): 258 return (x - 1) * 100 259 260 261def format_r(r): 262 return ('%6.2f' % percentage(r)) + "%" 263 264 265def normalize_r(r): 266 if r > 1.0: 267 return r - 1.0 268 else: 269 return 1.0 - 1/r 270 271 272def test(): 273 args = parse_args() 274 275 init_run(args) 276 last_unchanged_iter = 0 277 last_suspect_number = -1 278 tryCnt = 0 279 it = 0 280 while tryCnt < MAXTRY: 281 it += 1 282 suspects = get_suspects() 283 if len(suspects) != last_suspect_number: 284 last_suspect_number = len(suspects) 285 last_unchanged_iter = it 286 if (len(suspects) == 0 or it - last_unchanged_iter >= TERM): 287 break 288 289 print "Number of suspects at iteration %d: %d" % (it, len(suspects)) 290 threadRunner = ThreadRunner(args) 291 for j in range(1, max(1, args.threads / 2) + 1): 292 run(args, threadRunner, args.a, args.nano_a, 293 args.arg_a + suspects_arg(suspects), -j) 294 run(args, threadRunner, args.b, args.nano_b, 295 args.arg_b + suspects_arg(suspects), -j) 296 tryCnt += 1 297 threadRunner.wait() 298 299 suspects = get_suspects() 300 if len(suspects) == 0: 301 print ("%s and %s does not seem to have significant " + \ 302 "performance differences.") % (args.a, args.b) 303 else: 304 suspects.sort(key = regression) 305 print "%s (compared to %s) is likely" % (args.a, args.b) 306 for suspect in suspects: 307 r = regression(suspect) 308 if r < 1: 309 print "\033[31m %s slower in %s\033[0m" % \ 310 (format_r(1/r), suspect) 311 else: 312 print "\033[32m %s faster in %s\033[0m" % \ 313 (format_r(r), suspect) 314 315 with open("%s/bench_%s_%s.json" % (args.outdir, args.a, args.b), 'w') as f: 316 results = {} 317 for bench in timesA: 318 r = regression(bench) if bench in suspects else 1.0 319 results[bench] = { 320 args.config: { 321 "signed_regression": normalize_r(r), 322 "lower_quantile_ms": get_lower_upper(timesA[bench])[0] * 1e-6, 323 "upper_quantile_ms": get_lower_upper(timesA[bench])[1] * 1e-6 324 } 325 } 326 327 output = {"results": results} 328 if args.githash: 329 output["gitHash"] = args.githash 330 if args.keys: 331 keys = {} 332 for i in range(len(args.keys) / 2): 333 keys[args.keys[i * 2]] = args.keys[i * 2 + 1] 334 output["key"] = keys 335 f.write(json.dumps(output, indent=4)) 336 print ("\033[36mJSON results available in %s\033[0m" % f.name) 337 338 with open("%s/bench_%s_%s.csv" % (args.outdir, args.a, args.b), 'w') as out: 339 out.write(("bench, significant?, raw regresion, " + 340 "%(A)s quantile (ns), %(B)s quantile (ns), " + 341 "%(A)s (ns), %(B)s (ns)\n") % {'A': args.a, 'B': args.b}) 342 for bench in suspects + timesA.keys(): 343 if (bench not in timesA or bench not in timesB): 344 continue 345 ta = timesA[bench] 346 tb = timesB[bench] 347 out.write( 348 "%s, %s, %f, " % (bench, bench in suspects, regression(bench)) + 349 ' '.join(map(str, get_lower_upper(ta))) + ", " + 350 ' '.join(map(str, get_lower_upper(tb))) + ", " + 351 ("%s, %s\n" % (' '.join(map(str, ta)), ' '.join(map(str, tb)))) 352 ) 353 print (("\033[36m" + 354 "Compared %d benches. " + 355 "%d of them seem to be significantly differrent." + 356 "\033[0m") % 357 (len([x for x in timesA if x in timesB]), len(suspects))) 358 print ("\033[36mPlease see detailed bench results in %s\033[0m" % 359 out.name) 360 361 362if __name__ == "__main__": 363 try: 364 test() 365 except Exception as e: 366 print e 367 print HELP 368 raise 369