1import unittest 2"""report.py - Utilities for reporting statistics about benchmark results 3""" 4import os 5import re 6import copy 7 8from scipy.stats import mannwhitneyu 9 10 11class BenchmarkColor(object): 12 def __init__(self, name, code): 13 self.name = name 14 self.code = code 15 16 def __repr__(self): 17 return '%s%r' % (self.__class__.__name__, 18 (self.name, self.code)) 19 20 def __format__(self, format): 21 return self.code 22 23 24# Benchmark Colors Enumeration 25BC_NONE = BenchmarkColor('NONE', '') 26BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m') 27BC_CYAN = BenchmarkColor('CYAN', '\033[96m') 28BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m') 29BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m') 30BC_HEADER = BenchmarkColor('HEADER', '\033[92m') 31BC_WARNING = BenchmarkColor('WARNING', '\033[93m') 32BC_WHITE = BenchmarkColor('WHITE', '\033[97m') 33BC_FAIL = BenchmarkColor('FAIL', '\033[91m') 34BC_ENDC = BenchmarkColor('ENDC', '\033[0m') 35BC_BOLD = BenchmarkColor('BOLD', '\033[1m') 36BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m') 37 38UTEST_MIN_REPETITIONS = 2 39UTEST_OPTIMAL_REPETITIONS = 9 # Lowest reasonable number, More is better. 40UTEST_COL_NAME = "_pvalue" 41 42 43def color_format(use_color, fmt_str, *args, **kwargs): 44 """ 45 Return the result of 'fmt_str.format(*args, **kwargs)' after transforming 46 'args' and 'kwargs' according to the value of 'use_color'. If 'use_color' 47 is False then all color codes in 'args' and 'kwargs' are replaced with 48 the empty string. 49 """ 50 assert use_color is True or use_color is False 51 if not use_color: 52 args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE 53 for arg in args] 54 kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE 55 for key, arg in kwargs.items()} 56 return fmt_str.format(*args, **kwargs) 57 58 59def find_longest_name(benchmark_list): 60 """ 61 Return the length of the longest benchmark name in a given list of 62 benchmark JSON objects 63 """ 64 longest_name = 1 65 for bc in benchmark_list: 66 if len(bc['name']) > longest_name: 67 longest_name = len(bc['name']) 68 return longest_name 69 70 71def calculate_change(old_val, new_val): 72 """ 73 Return a float representing the decimal change between old_val and new_val. 74 """ 75 if old_val == 0 and new_val == 0: 76 return 0.0 77 if old_val == 0: 78 return float(new_val - old_val) / (float(old_val + new_val) / 2) 79 return float(new_val - old_val) / abs(old_val) 80 81 82def filter_benchmark(json_orig, family, replacement=""): 83 """ 84 Apply a filter to the json, and only leave the 'family' of benchmarks. 85 """ 86 regex = re.compile(family) 87 filtered = {} 88 filtered['benchmarks'] = [] 89 for be in json_orig['benchmarks']: 90 if not regex.search(be['name']): 91 continue 92 filteredbench = copy.deepcopy(be) # Do NOT modify the old name! 93 filteredbench['name'] = regex.sub(replacement, filteredbench['name']) 94 filtered['benchmarks'].append(filteredbench) 95 return filtered 96 97 98def get_unique_benchmark_names(json): 99 """ 100 While *keeping* the order, give all the unique 'names' used for benchmarks. 101 """ 102 seen = set() 103 uniqued = [x['name'] for x in json['benchmarks'] 104 if x['name'] not in seen and 105 (seen.add(x['name']) or True)] 106 return uniqued 107 108 109def intersect(list1, list2): 110 """ 111 Given two lists, get a new list consisting of the elements only contained 112 in *both of the input lists*, while preserving the ordering. 113 """ 114 return [x for x in list1 if x in list2] 115 116 117def partition_benchmarks(json1, json2): 118 """ 119 While preserving the ordering, find benchmarks with the same names in 120 both of the inputs, and group them. 121 (i.e. partition/filter into groups with common name) 122 """ 123 json1_unique_names = get_unique_benchmark_names(json1) 124 json2_unique_names = get_unique_benchmark_names(json2) 125 names = intersect(json1_unique_names, json2_unique_names) 126 partitions = [] 127 for name in names: 128 # Pick the time unit from the first entry of the lhs benchmark. 129 time_unit = (x['time_unit'] 130 for x in json1['benchmarks'] if x['name'] == name).next() 131 # Filter by name and time unit. 132 lhs = [x for x in json1['benchmarks'] if x['name'] == name and 133 x['time_unit'] == time_unit] 134 rhs = [x for x in json2['benchmarks'] if x['name'] == name and 135 x['time_unit'] == time_unit] 136 partitions.append([lhs, rhs]) 137 return partitions 138 139 140def extract_field(partition, field_name): 141 # The count of elements may be different. We want *all* of them. 142 lhs = [x[field_name] for x in partition[0]] 143 rhs = [x[field_name] for x in partition[1]] 144 return [lhs, rhs] 145 146 147def print_utest(partition, utest_alpha, first_col_width, use_color=True): 148 timings_time = extract_field(partition, 'real_time') 149 timings_cpu = extract_field(partition, 'cpu_time') 150 151 min_rep_cnt = min(len(timings_time[0]), 152 len(timings_time[1]), 153 len(timings_cpu[0]), 154 len(timings_cpu[1])) 155 156 # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions? 157 if min_rep_cnt < UTEST_MIN_REPETITIONS: 158 return [] 159 160 def get_utest_color(pval): 161 return BC_FAIL if pval >= utest_alpha else BC_OKGREEN 162 163 time_pvalue = mannwhitneyu( 164 timings_time[0], timings_time[1], alternative='two-sided').pvalue 165 cpu_pvalue = mannwhitneyu( 166 timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue 167 168 dsc = "U Test, Repetitions: {} vs {}".format( 169 len(timings_cpu[0]), len(timings_cpu[1])) 170 dsc_color = BC_OKGREEN 171 172 if min_rep_cnt < UTEST_OPTIMAL_REPETITIONS: 173 dsc_color = BC_WARNING 174 dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format( 175 UTEST_OPTIMAL_REPETITIONS) 176 177 special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{} {}" 178 179 last_name = partition[0][0]['name'] 180 return [color_format(use_color, 181 special_str, 182 BC_HEADER, 183 "{}{}".format(last_name, UTEST_COL_NAME), 184 first_col_width, 185 get_utest_color(time_pvalue), time_pvalue, 186 get_utest_color(cpu_pvalue), cpu_pvalue, 187 dsc_color, dsc, 188 endc=BC_ENDC)] 189 190 191def generate_difference_report( 192 json1, 193 json2, 194 display_aggregates_only=False, 195 utest=False, 196 utest_alpha=0.05, 197 use_color=True): 198 """ 199 Calculate and report the difference between each test of two benchmarks 200 runs specified as 'json1' and 'json2'. 201 """ 202 assert utest is True or utest is False 203 first_col_width = find_longest_name(json1['benchmarks']) 204 205 def find_test(name): 206 for b in json2['benchmarks']: 207 if b['name'] == name: 208 return b 209 return None 210 211 first_col_width = max( 212 first_col_width, 213 len('Benchmark')) 214 first_col_width += len(UTEST_COL_NAME) 215 first_line = "{:<{}s}Time CPU Time Old Time New CPU Old CPU New".format( 216 'Benchmark', 12 + first_col_width) 217 output_strs = [first_line, '-' * len(first_line)] 218 219 partitions = partition_benchmarks(json1, json2) 220 for partition in partitions: 221 # Careful, we may have different repetition count. 222 for i in range(min(len(partition[0]), len(partition[1]))): 223 bn = partition[0][i] 224 other_bench = partition[1][i] 225 226 # *If* we were asked to only display aggregates, 227 # and if it is non-aggregate, then skip it. 228 if display_aggregates_only and 'run_type' in bn and 'run_type' in other_bench: 229 assert bn['run_type'] == other_bench['run_type'] 230 if bn['run_type'] != 'aggregate': 231 continue 232 233 fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}" 234 235 def get_color(res): 236 if res > 0.05: 237 return BC_FAIL 238 elif res > -0.07: 239 return BC_WHITE 240 else: 241 return BC_CYAN 242 243 tres = calculate_change(bn['real_time'], other_bench['real_time']) 244 cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time']) 245 output_strs += [color_format(use_color, 246 fmt_str, 247 BC_HEADER, 248 bn['name'], 249 first_col_width, 250 get_color(tres), 251 tres, 252 get_color(cpures), 253 cpures, 254 bn['real_time'], 255 other_bench['real_time'], 256 bn['cpu_time'], 257 other_bench['cpu_time'], 258 endc=BC_ENDC)] 259 260 # After processing the whole partition, if requested, do the U test. 261 if utest: 262 output_strs += print_utest(partition, 263 utest_alpha=utest_alpha, 264 first_col_width=first_col_width, 265 use_color=use_color) 266 267 return output_strs 268 269 270############################################################################### 271# Unit tests 272 273 274class TestGetUniqueBenchmarkNames(unittest.TestCase): 275 def load_results(self): 276 import json 277 testInputs = os.path.join( 278 os.path.dirname( 279 os.path.realpath(__file__)), 280 'Inputs') 281 testOutput = os.path.join(testInputs, 'test3_run0.json') 282 with open(testOutput, 'r') as f: 283 json = json.load(f) 284 return json 285 286 def test_basic(self): 287 expect_lines = [ 288 'BM_One', 289 'BM_Two', 290 'short', # These two are not sorted 291 'medium', # These two are not sorted 292 ] 293 json = self.load_results() 294 output_lines = get_unique_benchmark_names(json) 295 print("\n") 296 print("\n".join(output_lines)) 297 self.assertEqual(len(output_lines), len(expect_lines)) 298 for i in range(0, len(output_lines)): 299 self.assertEqual(expect_lines[i], output_lines[i]) 300 301 302class TestReportDifference(unittest.TestCase): 303 def load_results(self): 304 import json 305 testInputs = os.path.join( 306 os.path.dirname( 307 os.path.realpath(__file__)), 308 'Inputs') 309 testOutput1 = os.path.join(testInputs, 'test1_run1.json') 310 testOutput2 = os.path.join(testInputs, 'test1_run2.json') 311 with open(testOutput1, 'r') as f: 312 json1 = json.load(f) 313 with open(testOutput2, 'r') as f: 314 json2 = json.load(f) 315 return json1, json2 316 317 def test_basic(self): 318 expect_lines = [ 319 ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'], 320 ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'], 321 ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'], 322 ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'], 323 ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'], 324 ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'], 325 ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'], 326 ['BM_100xSlower', '+99.0000', '+99.0000', 327 '100', '10000', '100', '10000'], 328 ['BM_100xFaster', '-0.9900', '-0.9900', 329 '10000', '100', '10000', '100'], 330 ['BM_10PercentCPUToTime', '+0.1000', 331 '-0.1000', '100', '110', '100', '90'], 332 ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'], 333 ['BM_BadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'], 334 ] 335 json1, json2 = self.load_results() 336 output_lines_with_header = generate_difference_report( 337 json1, json2, use_color=False) 338 output_lines = output_lines_with_header[2:] 339 print("\n") 340 print("\n".join(output_lines_with_header)) 341 self.assertEqual(len(output_lines), len(expect_lines)) 342 for i in range(0, len(output_lines)): 343 parts = [x for x in output_lines[i].split(' ') if x] 344 self.assertEqual(len(parts), 7) 345 self.assertEqual(expect_lines[i], parts) 346 347 348class TestReportDifferenceBetweenFamilies(unittest.TestCase): 349 def load_result(self): 350 import json 351 testInputs = os.path.join( 352 os.path.dirname( 353 os.path.realpath(__file__)), 354 'Inputs') 355 testOutput = os.path.join(testInputs, 'test2_run.json') 356 with open(testOutput, 'r') as f: 357 json = json.load(f) 358 return json 359 360 def test_basic(self): 361 expect_lines = [ 362 ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'], 363 ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'], 364 ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'], 365 ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'], 366 ] 367 json = self.load_result() 368 json1 = filter_benchmark(json, "BM_Z.ro", ".") 369 json2 = filter_benchmark(json, "BM_O.e", ".") 370 output_lines_with_header = generate_difference_report( 371 json1, json2, use_color=False) 372 output_lines = output_lines_with_header[2:] 373 print("\n") 374 print("\n".join(output_lines_with_header)) 375 self.assertEqual(len(output_lines), len(expect_lines)) 376 for i in range(0, len(output_lines)): 377 parts = [x for x in output_lines[i].split(' ') if x] 378 self.assertEqual(len(parts), 7) 379 self.assertEqual(expect_lines[i], parts) 380 381 382class TestReportDifferenceWithUTest(unittest.TestCase): 383 def load_results(self): 384 import json 385 testInputs = os.path.join( 386 os.path.dirname( 387 os.path.realpath(__file__)), 388 'Inputs') 389 testOutput1 = os.path.join(testInputs, 'test3_run0.json') 390 testOutput2 = os.path.join(testInputs, 'test3_run1.json') 391 with open(testOutput1, 'r') as f: 392 json1 = json.load(f) 393 with open(testOutput2, 'r') as f: 394 json2 = json.load(f) 395 return json1, json2 396 397 def test_utest(self): 398 expect_lines = [] 399 expect_lines = [ 400 ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], 401 ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], 402 ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], 403 ['BM_Two_pvalue', 404 '0.6985', 405 '0.6985', 406 'U', 407 'Test,', 408 'Repetitions:', 409 '2', 410 'vs', 411 '2.', 412 'WARNING:', 413 'Results', 414 'unreliable!', 415 '9+', 416 'repetitions', 417 'recommended.'], 418 ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], 419 ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], 420 ['short_pvalue', 421 '0.7671', 422 '0.1489', 423 'U', 424 'Test,', 425 'Repetitions:', 426 '2', 427 'vs', 428 '3.', 429 'WARNING:', 430 'Results', 431 'unreliable!', 432 '9+', 433 'repetitions', 434 'recommended.'], 435 ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'], 436 ] 437 json1, json2 = self.load_results() 438 output_lines_with_header = generate_difference_report( 439 json1, json2, utest=True, utest_alpha=0.05, use_color=False) 440 output_lines = output_lines_with_header[2:] 441 print("\n") 442 print("\n".join(output_lines_with_header)) 443 self.assertEqual(len(output_lines), len(expect_lines)) 444 for i in range(0, len(output_lines)): 445 parts = [x for x in output_lines[i].split(' ') if x] 446 self.assertEqual(expect_lines[i], parts) 447 448 449class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( 450 unittest.TestCase): 451 def load_results(self): 452 import json 453 testInputs = os.path.join( 454 os.path.dirname( 455 os.path.realpath(__file__)), 456 'Inputs') 457 testOutput1 = os.path.join(testInputs, 'test3_run0.json') 458 testOutput2 = os.path.join(testInputs, 'test3_run1.json') 459 with open(testOutput1, 'r') as f: 460 json1 = json.load(f) 461 with open(testOutput2, 'r') as f: 462 json2 = json.load(f) 463 return json1, json2 464 465 def test_utest(self): 466 expect_lines = [] 467 expect_lines = [ 468 ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], 469 ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], 470 ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], 471 ['BM_Two_pvalue', 472 '0.6985', 473 '0.6985', 474 'U', 475 'Test,', 476 'Repetitions:', 477 '2', 478 'vs', 479 '2.', 480 'WARNING:', 481 'Results', 482 'unreliable!', 483 '9+', 484 'repetitions', 485 'recommended.'], 486 ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], 487 ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], 488 ['short_pvalue', 489 '0.7671', 490 '0.1489', 491 'U', 492 'Test,', 493 'Repetitions:', 494 '2', 495 'vs', 496 '3.', 497 'WARNING:', 498 'Results', 499 'unreliable!', 500 '9+', 501 'repetitions', 502 'recommended.'], 503 ] 504 json1, json2 = self.load_results() 505 output_lines_with_header = generate_difference_report( 506 json1, json2, display_aggregates_only=True, 507 utest=True, utest_alpha=0.05, use_color=False) 508 output_lines = output_lines_with_header[2:] 509 print("\n") 510 print("\n".join(output_lines_with_header)) 511 self.assertEqual(len(output_lines), len(expect_lines)) 512 for i in range(0, len(output_lines)): 513 parts = [x for x in output_lines[i].split(' ') if x] 514 self.assertEqual(expect_lines[i], parts) 515 516 517if __name__ == '__main__': 518 unittest.main() 519 520# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 521# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; 522# kate: indent-mode python; remove-trailing-spaces modified; 523