1#!/usr/bin/env python3 2# 3# Copyright 2017 gRPC authors. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16""" Computes the diff between two bm runs and outputs significant results """ 17 18import argparse 19import collections 20import json 21import os 22import subprocess 23import sys 24 25sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), "..")) 26 27import bm_constants 28import bm_json 29import bm_speedup 30import tabulate 31 32verbose = False 33 34 35def _median(ary): 36 assert len(ary) 37 ary = sorted(ary) 38 n = len(ary) 39 if n % 2 == 0: 40 return (ary[(n - 1) // 2] + ary[(n - 1) // 2 + 1]) / 2.0 41 else: 42 return ary[n // 2] 43 44 45def _args(): 46 argp = argparse.ArgumentParser( 47 description="Perform diff on microbenchmarks" 48 ) 49 argp.add_argument( 50 "-t", 51 "--track", 52 choices=sorted(bm_constants._INTERESTING), 53 nargs="+", 54 default=sorted(bm_constants._INTERESTING), 55 help="Which metrics to track", 56 ) 57 argp.add_argument( 58 "-b", 59 "--benchmarks", 60 nargs="+", 61 choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, 62 default=bm_constants._AVAILABLE_BENCHMARK_TESTS, 63 help="Which benchmarks to run", 64 ) 65 argp.add_argument( 66 "-l", 67 "--loops", 68 type=int, 69 default=20, 70 help=( 71 "Number of times to loops the benchmarks. Must match what was" 72 " passed to bm_run.py" 73 ), 74 ) 75 argp.add_argument( 76 "-r", 77 "--regex", 78 type=str, 79 default="", 80 help="Regex to filter benchmarks run", 81 ) 82 argp.add_argument("-n", "--new", type=str, help="New benchmark name") 83 argp.add_argument("-o", "--old", type=str, help="Old benchmark name") 84 argp.add_argument( 85 "-v", "--verbose", type=bool, help="Print details of before/after" 86 ) 87 args = argp.parse_args() 88 global verbose 89 if args.verbose: 90 verbose = True 91 assert args.new 92 assert args.old 93 return args 94 95 96def _maybe_print(str): 97 if verbose: 98 print(str) 99 100 101class Benchmark: 102 def __init__(self): 103 self.samples = { 104 True: collections.defaultdict(list), 105 False: collections.defaultdict(list), 106 } 107 self.final = {} 108 self.speedup = {} 109 110 def add_sample(self, track, data, new): 111 for f in track: 112 if f in data: 113 self.samples[new][f].append(float(data[f])) 114 115 def process(self, track, new_name, old_name): 116 for f in sorted(track): 117 new = self.samples[True][f] 118 old = self.samples[False][f] 119 if not new or not old: 120 continue 121 mdn_diff = abs(_median(new) - _median(old)) 122 _maybe_print( 123 "%s: %s=%r %s=%r mdn_diff=%r" 124 % (f, new_name, new, old_name, old, mdn_diff) 125 ) 126 s = bm_speedup.speedup(new, old, 1e-5) 127 self.speedup[f] = s 128 if abs(s) > 3: 129 if mdn_diff > 0.5: 130 self.final[f] = "%+d%%" % s 131 return self.final.keys() 132 133 def skip(self): 134 return not self.final 135 136 def row(self, flds): 137 return [self.final[f] if f in self.final else "" for f in flds] 138 139 def speedup(self, name): 140 if name in self.speedup: 141 return self.speedup[name] 142 return None 143 144 145def _read_json(filename, badjson_files, nonexistant_files): 146 stripped = ".".join(filename.split(".")[:-2]) 147 try: 148 with open(filename) as f: 149 r = f.read() 150 return json.loads(r) 151 except IOError as e: 152 if stripped in nonexistant_files: 153 nonexistant_files[stripped] += 1 154 else: 155 nonexistant_files[stripped] = 1 156 return None 157 except ValueError as e: 158 print(r) 159 if stripped in badjson_files: 160 badjson_files[stripped] += 1 161 else: 162 badjson_files[stripped] = 1 163 return None 164 165 166def fmt_dict(d): 167 return "".join([" " + k + ": " + str(d[k]) + "\n" for k in d]) 168 169 170def diff(bms, loops, regex, track, old, new): 171 benchmarks = collections.defaultdict(Benchmark) 172 173 badjson_files = {} 174 nonexistant_files = {} 175 for bm in bms: 176 for loop in range(0, loops): 177 for line in subprocess.check_output( 178 [ 179 "bm_diff_%s/opt/%s" % (old, bm), 180 "--benchmark_list_tests", 181 "--benchmark_filter=%s" % regex, 182 ] 183 ).splitlines(): 184 line = line.decode("UTF-8") 185 stripped_line = ( 186 line.strip() 187 .replace("/", "_") 188 .replace("<", "_") 189 .replace(">", "_") 190 .replace(", ", "_") 191 ) 192 js_new_opt = _read_json( 193 "%s.%s.opt.%s.%d.json" % (bm, stripped_line, new, loop), 194 badjson_files, 195 nonexistant_files, 196 ) 197 js_old_opt = _read_json( 198 "%s.%s.opt.%s.%d.json" % (bm, stripped_line, old, loop), 199 badjson_files, 200 nonexistant_files, 201 ) 202 if js_new_opt: 203 for row in bm_json.expand_json(js_new_opt): 204 name = row["cpp_name"] 205 if name.endswith("_mean") or name.endswith("_stddev"): 206 continue 207 benchmarks[name].add_sample(track, row, True) 208 if js_old_opt: 209 for row in bm_json.expand_json(js_old_opt): 210 name = row["cpp_name"] 211 if name.endswith("_mean") or name.endswith("_stddev"): 212 continue 213 benchmarks[name].add_sample(track, row, False) 214 215 really_interesting = set() 216 for name, bm in benchmarks.items(): 217 _maybe_print(name) 218 really_interesting.update(bm.process(track, new, old)) 219 fields = [f for f in track if f in really_interesting] 220 221 # figure out the significance of the changes... right now we take the 95%-ile 222 # benchmark delta %-age, and then apply some hand chosen thresholds 223 histogram = [] 224 _NOISY = ["BM_WellFlushed"] 225 for name, bm in benchmarks.items(): 226 if name in _NOISY: 227 print( 228 "skipping noisy benchmark '%s' for labelling evaluation" % name 229 ) 230 if bm.skip(): 231 continue 232 d = bm.speedup["cpu_time"] 233 if d is None: 234 continue 235 histogram.append(d) 236 histogram.sort() 237 print("histogram of speedups: ", histogram) 238 if len(histogram) == 0: 239 significance = 0 240 else: 241 delta = histogram[int(len(histogram) * 0.95)] 242 mul = 1 243 if delta < 0: 244 delta = -delta 245 mul = -1 246 if delta < 2: 247 significance = 0 248 elif delta < 5: 249 significance = 1 250 elif delta < 10: 251 significance = 2 252 else: 253 significance = 3 254 significance *= mul 255 256 headers = ["Benchmark"] + fields 257 rows = [] 258 for name in sorted(benchmarks.keys()): 259 if benchmarks[name].skip(): 260 continue 261 rows.append([name] + benchmarks[name].row(fields)) 262 note = None 263 if len(badjson_files): 264 note = ( 265 "Corrupt JSON data (indicates timeout or crash): \n%s" 266 % fmt_dict(badjson_files) 267 ) 268 if len(nonexistant_files): 269 if note: 270 note += ( 271 "\n\nMissing files (indicates new benchmark): \n%s" 272 % fmt_dict(nonexistant_files) 273 ) 274 else: 275 note = ( 276 "\n\nMissing files (indicates new benchmark): \n%s" 277 % fmt_dict(nonexistant_files) 278 ) 279 if rows: 280 return ( 281 tabulate.tabulate(rows, headers=headers, floatfmt="+.2f"), 282 note, 283 significance, 284 ) 285 else: 286 return None, note, 0 287 288 289if __name__ == "__main__": 290 args = _args() 291 diff, note = diff( 292 args.benchmarks, 293 args.loops, 294 args.regex, 295 args.track, 296 args.old, 297 args.new, 298 args.counters, 299 ) 300 print("%s\n%s" % (note, diff if diff else "No performance differences")) 301