1"""report.py - Utilities for reporting statistics about benchmark results 2""" 3 4import unittest 5import os 6import re 7import copy 8import random 9 10from scipy.stats import mannwhitneyu, gmean 11from numpy import array 12from pandas import Timedelta 13 14 15class BenchmarkColor(object): 16 def __init__(self, name, code): 17 self.name = name 18 self.code = code 19 20 def __repr__(self): 21 return '%s%r' % (self.__class__.__name__, 22 (self.name, self.code)) 23 24 def __format__(self, format): 25 return self.code 26 27 28# Benchmark Colors Enumeration 29BC_NONE = BenchmarkColor('NONE', '') 30BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m') 31BC_CYAN = BenchmarkColor('CYAN', '\033[96m') 32BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m') 33BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m') 34BC_HEADER = BenchmarkColor('HEADER', '\033[92m') 35BC_WARNING = BenchmarkColor('WARNING', '\033[93m') 36BC_WHITE = BenchmarkColor('WHITE', '\033[97m') 37BC_FAIL = BenchmarkColor('FAIL', '\033[91m') 38BC_ENDC = BenchmarkColor('ENDC', '\033[0m') 39BC_BOLD = BenchmarkColor('BOLD', '\033[1m') 40BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m') 41 42UTEST_MIN_REPETITIONS = 2 43UTEST_OPTIMAL_REPETITIONS = 9 # Lowest reasonable number, More is better. 44UTEST_COL_NAME = "_pvalue" 45 46 47def color_format(use_color, fmt_str, *args, **kwargs): 48 """ 49 Return the result of 'fmt_str.format(*args, **kwargs)' after transforming 50 'args' and 'kwargs' according to the value of 'use_color'. If 'use_color' 51 is False then all color codes in 'args' and 'kwargs' are replaced with 52 the empty string. 53 """ 54 assert use_color is True or use_color is False 55 if not use_color: 56 args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE 57 for arg in args] 58 kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE 59 for key, arg in kwargs.items()} 60 return fmt_str.format(*args, **kwargs) 61 62 63def find_longest_name(benchmark_list): 64 """ 65 Return the length of the longest benchmark name in a given list of 66 benchmark JSON objects 67 """ 68 longest_name = 1 69 for bc in benchmark_list: 70 if len(bc['name']) > longest_name: 71 longest_name = len(bc['name']) 72 return longest_name 73 74 75def calculate_change(old_val, new_val): 76 """ 77 Return a float representing the decimal change between old_val and new_val. 78 """ 79 if old_val == 0 and new_val == 0: 80 return 0.0 81 if old_val == 0: 82 return float(new_val - old_val) / (float(old_val + new_val) / 2) 83 return float(new_val - old_val) / abs(old_val) 84 85 86def filter_benchmark(json_orig, family, replacement=""): 87 """ 88 Apply a filter to the json, and only leave the 'family' of benchmarks. 89 """ 90 regex = re.compile(family) 91 filtered = {} 92 filtered['benchmarks'] = [] 93 for be in json_orig['benchmarks']: 94 if not regex.search(be['name']): 95 continue 96 filteredbench = copy.deepcopy(be) # Do NOT modify the old name! 97 filteredbench['name'] = regex.sub(replacement, filteredbench['name']) 98 filtered['benchmarks'].append(filteredbench) 99 return filtered 100 101 102def get_unique_benchmark_names(json): 103 """ 104 While *keeping* the order, give all the unique 'names' used for benchmarks. 105 """ 106 seen = set() 107 uniqued = [x['name'] for x in json['benchmarks'] 108 if x['name'] not in seen and 109 (seen.add(x['name']) or True)] 110 return uniqued 111 112 113def intersect(list1, list2): 114 """ 115 Given two lists, get a new list consisting of the elements only contained 116 in *both of the input lists*, while preserving the ordering. 117 """ 118 return [x for x in list1 if x in list2] 119 120 121def is_potentially_comparable_benchmark(x): 122 return ('time_unit' in x and 'real_time' in x and 'cpu_time' in x) 123 124 125def partition_benchmarks(json1, json2): 126 """ 127 While preserving the ordering, find benchmarks with the same names in 128 both of the inputs, and group them. 129 (i.e. partition/filter into groups with common name) 130 """ 131 json1_unique_names = get_unique_benchmark_names(json1) 132 json2_unique_names = get_unique_benchmark_names(json2) 133 names = intersect(json1_unique_names, json2_unique_names) 134 partitions = [] 135 for name in names: 136 time_unit = None 137 # Pick the time unit from the first entry of the lhs benchmark. 138 # We should be careful not to crash with unexpected input. 139 for x in json1['benchmarks']: 140 if (x['name'] == name and is_potentially_comparable_benchmark(x)): 141 time_unit = x['time_unit'] 142 break 143 if time_unit is None: 144 continue 145 # Filter by name and time unit. 146 # All the repetitions are assumed to be comparable. 147 lhs = [x for x in json1['benchmarks'] if x['name'] == name and 148 x['time_unit'] == time_unit] 149 rhs = [x for x in json2['benchmarks'] if x['name'] == name and 150 x['time_unit'] == time_unit] 151 partitions.append([lhs, rhs]) 152 return partitions 153 154 155def get_timedelta_field_as_seconds(benchmark, field_name): 156 """ 157 Get value of field_name field of benchmark, which is time with time unit 158 time_unit, as time in seconds. 159 """ 160 time_unit = benchmark['time_unit'] if 'time_unit' in benchmark else 's' 161 dt = Timedelta(benchmark[field_name], time_unit) 162 return dt / Timedelta(1, 's') 163 164 165def calculate_geomean(json): 166 """ 167 Extract all real/cpu times from all the benchmarks as seconds, 168 and calculate their geomean. 169 """ 170 times = [] 171 for benchmark in json['benchmarks']: 172 if 'run_type' in benchmark and benchmark['run_type'] == 'aggregate': 173 continue 174 times.append([get_timedelta_field_as_seconds(benchmark, 'real_time'), 175 get_timedelta_field_as_seconds(benchmark, 'cpu_time')]) 176 return gmean(times) if times else array([]) 177 178 179def extract_field(partition, field_name): 180 # The count of elements may be different. We want *all* of them. 181 lhs = [x[field_name] for x in partition[0]] 182 rhs = [x[field_name] for x in partition[1]] 183 return [lhs, rhs] 184 185 186def calc_utest(timings_cpu, timings_time): 187 min_rep_cnt = min(len(timings_time[0]), 188 len(timings_time[1]), 189 len(timings_cpu[0]), 190 len(timings_cpu[1])) 191 192 # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions? 193 if min_rep_cnt < UTEST_MIN_REPETITIONS: 194 return False, None, None 195 196 time_pvalue = mannwhitneyu( 197 timings_time[0], timings_time[1], alternative='two-sided').pvalue 198 cpu_pvalue = mannwhitneyu( 199 timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue 200 201 return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue 202 203 204def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True): 205 def get_utest_color(pval): 206 return BC_FAIL if pval >= utest_alpha else BC_OKGREEN 207 208 # Check if we failed miserably with minimum required repetitions for utest 209 if not utest['have_optimal_repetitions'] and utest['cpu_pvalue'] is None and utest['time_pvalue'] is None: 210 return [] 211 212 dsc = "U Test, Repetitions: {} vs {}".format( 213 utest['nr_of_repetitions'], utest['nr_of_repetitions_other']) 214 dsc_color = BC_OKGREEN 215 216 # We still got some results to show but issue a warning about it. 217 if not utest['have_optimal_repetitions']: 218 dsc_color = BC_WARNING 219 dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format( 220 UTEST_OPTIMAL_REPETITIONS) 221 222 special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{} {}" 223 224 return [color_format(use_color, 225 special_str, 226 BC_HEADER, 227 "{}{}".format(bc_name, UTEST_COL_NAME), 228 first_col_width, 229 get_utest_color( 230 utest['time_pvalue']), utest['time_pvalue'], 231 get_utest_color( 232 utest['cpu_pvalue']), utest['cpu_pvalue'], 233 dsc_color, dsc, 234 endc=BC_ENDC)] 235 236 237def get_difference_report( 238 json1, 239 json2, 240 utest=False): 241 """ 242 Calculate and report the difference between each test of two benchmarks 243 runs specified as 'json1' and 'json2'. Output is another json containing 244 relevant details for each test run. 245 """ 246 assert utest is True or utest is False 247 248 diff_report = [] 249 partitions = partition_benchmarks(json1, json2) 250 for partition in partitions: 251 benchmark_name = partition[0][0]['name'] 252 time_unit = partition[0][0]['time_unit'] 253 measurements = [] 254 utest_results = {} 255 # Careful, we may have different repetition count. 256 for i in range(min(len(partition[0]), len(partition[1]))): 257 bn = partition[0][i] 258 other_bench = partition[1][i] 259 measurements.append({ 260 'real_time': bn['real_time'], 261 'cpu_time': bn['cpu_time'], 262 'real_time_other': other_bench['real_time'], 263 'cpu_time_other': other_bench['cpu_time'], 264 'time': calculate_change(bn['real_time'], other_bench['real_time']), 265 'cpu': calculate_change(bn['cpu_time'], other_bench['cpu_time']) 266 }) 267 268 # After processing the whole partition, if requested, do the U test. 269 if utest: 270 timings_cpu = extract_field(partition, 'cpu_time') 271 timings_time = extract_field(partition, 'real_time') 272 have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest( 273 timings_cpu, timings_time) 274 if cpu_pvalue and time_pvalue: 275 utest_results = { 276 'have_optimal_repetitions': have_optimal_repetitions, 277 'cpu_pvalue': cpu_pvalue, 278 'time_pvalue': time_pvalue, 279 'nr_of_repetitions': len(timings_cpu[0]), 280 'nr_of_repetitions_other': len(timings_cpu[1]) 281 } 282 283 # Store only if we had any measurements for given benchmark. 284 # E.g. partition_benchmarks will filter out the benchmarks having 285 # time units which are not compatible with other time units in the 286 # benchmark suite. 287 if measurements: 288 run_type = partition[0][0]['run_type'] if 'run_type' in partition[0][0] else '' 289 aggregate_name = partition[0][0]['aggregate_name'] if run_type == 'aggregate' and 'aggregate_name' in partition[0][0] else '' 290 diff_report.append({ 291 'name': benchmark_name, 292 'measurements': measurements, 293 'time_unit': time_unit, 294 'run_type': run_type, 295 'aggregate_name': aggregate_name, 296 'utest': utest_results 297 }) 298 299 lhs_gmean = calculate_geomean(json1) 300 rhs_gmean = calculate_geomean(json2) 301 if lhs_gmean.any() and rhs_gmean.any(): 302 diff_report.append({ 303 'name': 'OVERALL_GEOMEAN', 304 'measurements': [{ 305 'real_time': lhs_gmean[0], 306 'cpu_time': lhs_gmean[1], 307 'real_time_other': rhs_gmean[0], 308 'cpu_time_other': rhs_gmean[1], 309 'time': calculate_change(lhs_gmean[0], rhs_gmean[0]), 310 'cpu': calculate_change(lhs_gmean[1], rhs_gmean[1]) 311 }], 312 'time_unit': 's', 313 'run_type': 'aggregate', 314 'aggregate_name': 'geomean', 315 'utest': {} 316 }) 317 318 return diff_report 319 320 321def print_difference_report( 322 json_diff_report, 323 include_aggregates_only=False, 324 utest=False, 325 utest_alpha=0.05, 326 use_color=True): 327 """ 328 Calculate and report the difference between each test of two benchmarks 329 runs specified as 'json1' and 'json2'. 330 """ 331 assert utest is True or utest is False 332 333 def get_color(res): 334 if res > 0.05: 335 return BC_FAIL 336 elif res > -0.07: 337 return BC_WHITE 338 else: 339 return BC_CYAN 340 341 first_col_width = find_longest_name(json_diff_report) 342 first_col_width = max( 343 first_col_width, 344 len('Benchmark')) 345 first_col_width += len(UTEST_COL_NAME) 346 first_line = "{:<{}s}Time CPU Time Old Time New CPU Old CPU New".format( 347 'Benchmark', 12 + first_col_width) 348 output_strs = [first_line, '-' * len(first_line)] 349 350 fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}" 351 for benchmark in json_diff_report: 352 # *If* we were asked to only include aggregates, 353 # and if it is non-aggregate, then don't print it. 354 if not include_aggregates_only or not 'run_type' in benchmark or benchmark['run_type'] == 'aggregate': 355 for measurement in benchmark['measurements']: 356 output_strs += [color_format(use_color, 357 fmt_str, 358 BC_HEADER, 359 benchmark['name'], 360 first_col_width, 361 get_color(measurement['time']), 362 measurement['time'], 363 get_color(measurement['cpu']), 364 measurement['cpu'], 365 measurement['real_time'], 366 measurement['real_time_other'], 367 measurement['cpu_time'], 368 measurement['cpu_time_other'], 369 endc=BC_ENDC)] 370 371 # After processing the measurements, if requested and 372 # if applicable (e.g. u-test exists for given benchmark), 373 # print the U test. 374 if utest and benchmark['utest']: 375 output_strs += print_utest(benchmark['name'], 376 benchmark['utest'], 377 utest_alpha=utest_alpha, 378 first_col_width=first_col_width, 379 use_color=use_color) 380 381 return output_strs 382 383 384############################################################################### 385# Unit tests 386 387 388class TestGetUniqueBenchmarkNames(unittest.TestCase): 389 def load_results(self): 390 import json 391 testInputs = os.path.join( 392 os.path.dirname( 393 os.path.realpath(__file__)), 394 'Inputs') 395 testOutput = os.path.join(testInputs, 'test3_run0.json') 396 with open(testOutput, 'r') as f: 397 json = json.load(f) 398 return json 399 400 def test_basic(self): 401 expect_lines = [ 402 'BM_One', 403 'BM_Two', 404 'short', # These two are not sorted 405 'medium', # These two are not sorted 406 ] 407 json = self.load_results() 408 output_lines = get_unique_benchmark_names(json) 409 print("\n") 410 print("\n".join(output_lines)) 411 self.assertEqual(len(output_lines), len(expect_lines)) 412 for i in range(0, len(output_lines)): 413 self.assertEqual(expect_lines[i], output_lines[i]) 414 415 416class TestReportDifference(unittest.TestCase): 417 @classmethod 418 def setUpClass(cls): 419 def load_results(): 420 import json 421 testInputs = os.path.join( 422 os.path.dirname( 423 os.path.realpath(__file__)), 424 'Inputs') 425 testOutput1 = os.path.join(testInputs, 'test1_run1.json') 426 testOutput2 = os.path.join(testInputs, 'test1_run2.json') 427 with open(testOutput1, 'r') as f: 428 json1 = json.load(f) 429 with open(testOutput2, 'r') as f: 430 json2 = json.load(f) 431 return json1, json2 432 433 json1, json2 = load_results() 434 cls.json_diff_report = get_difference_report(json1, json2) 435 436 def test_json_diff_report_pretty_printing(self): 437 expect_lines = [ 438 ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'], 439 ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'], 440 ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'], 441 ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'], 442 ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'], 443 ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'], 444 ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'], 445 ['BM_100xSlower', '+99.0000', '+99.0000', 446 '100', '10000', '100', '10000'], 447 ['BM_100xFaster', '-0.9900', '-0.9900', 448 '10000', '100', '10000', '100'], 449 ['BM_10PercentCPUToTime', '+0.1000', 450 '-0.1000', '100', '110', '100', '90'], 451 ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'], 452 ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'], 453 ['OVERALL_GEOMEAN', '-0.8344', '-0.8026', '0', '0', '0', '0'] 454 ] 455 output_lines_with_header = print_difference_report( 456 self.json_diff_report, use_color=False) 457 output_lines = output_lines_with_header[2:] 458 print("\n") 459 print("\n".join(output_lines_with_header)) 460 self.assertEqual(len(output_lines), len(expect_lines)) 461 for i in range(0, len(output_lines)): 462 parts = [x for x in output_lines[i].split(' ') if x] 463 self.assertEqual(len(parts), 7) 464 self.assertEqual(expect_lines[i], parts) 465 466 def test_json_diff_report_output(self): 467 expected_output = [ 468 { 469 'name': 'BM_SameTimes', 470 'measurements': [{'time': 0.0000, 'cpu': 0.0000, 'real_time': 10, 'real_time_other': 10, 'cpu_time': 10, 'cpu_time_other': 10}], 471 'time_unit': 'ns', 472 'utest': {} 473 }, 474 { 475 'name': 'BM_2xFaster', 476 'measurements': [{'time': -0.5000, 'cpu': -0.5000, 'real_time': 50, 'real_time_other': 25, 'cpu_time': 50, 'cpu_time_other': 25}], 477 'time_unit': 'ns', 478 'utest': {} 479 }, 480 { 481 'name': 'BM_2xSlower', 482 'measurements': [{'time': 1.0000, 'cpu': 1.0000, 'real_time': 50, 'real_time_other': 100, 'cpu_time': 50, 'cpu_time_other': 100}], 483 'time_unit': 'ns', 484 'utest': {} 485 }, 486 { 487 'name': 'BM_1PercentFaster', 488 'measurements': [{'time': -0.0100, 'cpu': -0.0100, 'real_time': 100, 'real_time_other': 98.9999999, 'cpu_time': 100, 'cpu_time_other': 98.9999999}], 489 'time_unit': 'ns', 490 'utest': {} 491 }, 492 { 493 'name': 'BM_1PercentSlower', 494 'measurements': [{'time': 0.0100, 'cpu': 0.0100, 'real_time': 100, 'real_time_other': 101, 'cpu_time': 100, 'cpu_time_other': 101}], 495 'time_unit': 'ns', 496 'utest': {} 497 }, 498 { 499 'name': 'BM_10PercentFaster', 500 'measurements': [{'time': -0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 90, 'cpu_time': 100, 'cpu_time_other': 90}], 501 'time_unit': 'ns', 502 'utest': {} 503 }, 504 { 505 'name': 'BM_10PercentSlower', 506 'measurements': [{'time': 0.1000, 'cpu': 0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 110}], 507 'time_unit': 'ns', 508 'utest': {} 509 }, 510 { 511 'name': 'BM_100xSlower', 512 'measurements': [{'time': 99.0000, 'cpu': 99.0000, 'real_time': 100, 'real_time_other': 10000, 'cpu_time': 100, 'cpu_time_other': 10000}], 513 'time_unit': 'ns', 514 'utest': {} 515 }, 516 { 517 'name': 'BM_100xFaster', 518 'measurements': [{'time': -0.9900, 'cpu': -0.9900, 'real_time': 10000, 'real_time_other': 100, 'cpu_time': 10000, 'cpu_time_other': 100}], 519 'time_unit': 'ns', 520 'utest': {} 521 }, 522 { 523 'name': 'BM_10PercentCPUToTime', 524 'measurements': [{'time': 0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 90}], 525 'time_unit': 'ns', 526 'utest': {} 527 }, 528 { 529 'name': 'BM_ThirdFaster', 530 'measurements': [{'time': -0.3333, 'cpu': -0.3334, 'real_time': 100, 'real_time_other': 67, 'cpu_time': 100, 'cpu_time_other': 67}], 531 'time_unit': 'ns', 532 'utest': {} 533 }, 534 { 535 'name': 'BM_NotBadTimeUnit', 536 'measurements': [{'time': -0.9000, 'cpu': 0.2000, 'real_time': 0.4, 'real_time_other': 0.04, 'cpu_time': 0.5, 'cpu_time_other': 0.6}], 537 'time_unit': 's', 538 'utest': {} 539 }, 540 { 541 'name': 'OVERALL_GEOMEAN', 542 'measurements': [{'real_time': 1.193776641714438e-06, 'cpu_time': 1.2144445585302297e-06, 543 'real_time_other': 1.9768988699420897e-07, 'cpu_time_other': 2.397447755209533e-07, 544 'time': -0.834399601997324, 'cpu': -0.8025889499549471}], 545 'time_unit': 's', 546 'run_type': 'aggregate', 547 'aggregate_name': 'geomean', 'utest': {} 548 }, 549 ] 550 self.assertEqual(len(self.json_diff_report), len(expected_output)) 551 for out, expected in zip( 552 self.json_diff_report, expected_output): 553 self.assertEqual(out['name'], expected['name']) 554 self.assertEqual(out['time_unit'], expected['time_unit']) 555 assert_utest(self, out, expected) 556 assert_measurements(self, out, expected) 557 558 559class TestReportDifferenceBetweenFamilies(unittest.TestCase): 560 @classmethod 561 def setUpClass(cls): 562 def load_result(): 563 import json 564 testInputs = os.path.join( 565 os.path.dirname( 566 os.path.realpath(__file__)), 567 'Inputs') 568 testOutput = os.path.join(testInputs, 'test2_run.json') 569 with open(testOutput, 'r') as f: 570 json = json.load(f) 571 return json 572 573 json = load_result() 574 json1 = filter_benchmark(json, "BM_Z.ro", ".") 575 json2 = filter_benchmark(json, "BM_O.e", ".") 576 cls.json_diff_report = get_difference_report(json1, json2) 577 578 def test_json_diff_report_pretty_printing(self): 579 expect_lines = [ 580 ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'], 581 ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'], 582 ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'], 583 ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'], 584 ['OVERALL_GEOMEAN', '-0.5000', '-0.5000', '0', '0', '0', '0'] 585 ] 586 output_lines_with_header = print_difference_report( 587 self.json_diff_report, use_color=False) 588 output_lines = output_lines_with_header[2:] 589 print("\n") 590 print("\n".join(output_lines_with_header)) 591 self.assertEqual(len(output_lines), len(expect_lines)) 592 for i in range(0, len(output_lines)): 593 parts = [x for x in output_lines[i].split(' ') if x] 594 self.assertEqual(len(parts), 7) 595 self.assertEqual(expect_lines[i], parts) 596 597 def test_json_diff_report(self): 598 expected_output = [ 599 { 600 'name': u'.', 601 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 10, 'real_time_other': 5, 'cpu_time': 10, 'cpu_time_other': 5}], 602 'time_unit': 'ns', 603 'utest': {} 604 }, 605 { 606 'name': u'./4', 607 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 40, 'real_time_other': 20, 'cpu_time': 40, 'cpu_time_other': 20}], 608 'time_unit': 'ns', 609 'utest': {}, 610 }, 611 { 612 'name': u'Prefix/.', 613 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 20, 'real_time_other': 10, 'cpu_time': 20, 'cpu_time_other': 10}], 614 'time_unit': 'ns', 615 'utest': {} 616 }, 617 { 618 'name': u'Prefix/./3', 619 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 30, 'real_time_other': 15, 'cpu_time': 30, 'cpu_time_other': 15}], 620 'time_unit': 'ns', 621 'utest': {} 622 }, 623 { 624 'name': 'OVERALL_GEOMEAN', 625 'measurements': [{'real_time': 2.213363839400641e-08, 'cpu_time': 2.213363839400641e-08, 626 'real_time_other': 1.1066819197003185e-08, 'cpu_time_other': 1.1066819197003185e-08, 627 'time': -0.5000000000000009, 'cpu': -0.5000000000000009}], 628 'time_unit': 's', 629 'run_type': 'aggregate', 630 'aggregate_name': 'geomean', 631 'utest': {} 632 } 633 ] 634 self.assertEqual(len(self.json_diff_report), len(expected_output)) 635 for out, expected in zip( 636 self.json_diff_report, expected_output): 637 self.assertEqual(out['name'], expected['name']) 638 self.assertEqual(out['time_unit'], expected['time_unit']) 639 assert_utest(self, out, expected) 640 assert_measurements(self, out, expected) 641 642 643class TestReportDifferenceWithUTest(unittest.TestCase): 644 @classmethod 645 def setUpClass(cls): 646 def load_results(): 647 import json 648 testInputs = os.path.join( 649 os.path.dirname( 650 os.path.realpath(__file__)), 651 'Inputs') 652 testOutput1 = os.path.join(testInputs, 'test3_run0.json') 653 testOutput2 = os.path.join(testInputs, 'test3_run1.json') 654 with open(testOutput1, 'r') as f: 655 json1 = json.load(f) 656 with open(testOutput2, 'r') as f: 657 json2 = json.load(f) 658 return json1, json2 659 660 json1, json2 = load_results() 661 cls.json_diff_report = get_difference_report( 662 json1, json2, utest=True) 663 664 def test_json_diff_report_pretty_printing(self): 665 expect_lines = [ 666 ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], 667 ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], 668 ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], 669 ['BM_Two_pvalue', 670 '1.0000', 671 '0.6667', 672 'U', 673 'Test,', 674 'Repetitions:', 675 '2', 676 'vs', 677 '2.', 678 'WARNING:', 679 'Results', 680 'unreliable!', 681 '9+', 682 'repetitions', 683 'recommended.'], 684 ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], 685 ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], 686 ['short_pvalue', 687 '0.7671', 688 '0.2000', 689 'U', 690 'Test,', 691 'Repetitions:', 692 '2', 693 'vs', 694 '3.', 695 'WARNING:', 696 'Results', 697 'unreliable!', 698 '9+', 699 'repetitions', 700 'recommended.'], 701 ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'], 702 ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0'] 703 ] 704 output_lines_with_header = print_difference_report( 705 self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False) 706 output_lines = output_lines_with_header[2:] 707 print("\n") 708 print("\n".join(output_lines_with_header)) 709 self.assertEqual(len(output_lines), len(expect_lines)) 710 for i in range(0, len(output_lines)): 711 parts = [x for x in output_lines[i].split(' ') if x] 712 self.assertEqual(expect_lines[i], parts) 713 714 def test_json_diff_report_pretty_printing_aggregates_only(self): 715 expect_lines = [ 716 ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], 717 ['BM_Two_pvalue', 718 '1.0000', 719 '0.6667', 720 'U', 721 'Test,', 722 'Repetitions:', 723 '2', 724 'vs', 725 '2.', 726 'WARNING:', 727 'Results', 728 'unreliable!', 729 '9+', 730 'repetitions', 731 'recommended.'], 732 ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], 733 ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], 734 ['short_pvalue', 735 '0.7671', 736 '0.2000', 737 'U', 738 'Test,', 739 'Repetitions:', 740 '2', 741 'vs', 742 '3.', 743 'WARNING:', 744 'Results', 745 'unreliable!', 746 '9+', 747 'repetitions', 748 'recommended.'], 749 ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0'] 750 ] 751 output_lines_with_header = print_difference_report( 752 self.json_diff_report, include_aggregates_only=True, utest=True, utest_alpha=0.05, use_color=False) 753 output_lines = output_lines_with_header[2:] 754 print("\n") 755 print("\n".join(output_lines_with_header)) 756 self.assertEqual(len(output_lines), len(expect_lines)) 757 for i in range(0, len(output_lines)): 758 parts = [x for x in output_lines[i].split(' ') if x] 759 self.assertEqual(expect_lines[i], parts) 760 761 def test_json_diff_report(self): 762 expected_output = [ 763 { 764 'name': u'BM_One', 765 'measurements': [ 766 {'time': -0.1, 767 'cpu': 0.1, 768 'real_time': 10, 769 'real_time_other': 9, 770 'cpu_time': 100, 771 'cpu_time_other': 110} 772 ], 773 'time_unit': 'ns', 774 'utest': {} 775 }, 776 { 777 'name': u'BM_Two', 778 'measurements': [ 779 {'time': 0.1111111111111111, 780 'cpu': -0.011111111111111112, 781 'real_time': 9, 782 'real_time_other': 10, 783 'cpu_time': 90, 784 'cpu_time_other': 89}, 785 {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8, 786 'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72} 787 ], 788 'time_unit': 'ns', 789 'utest': { 790 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0 791 } 792 }, 793 { 794 'name': u'short', 795 'measurements': [ 796 {'time': -0.125, 797 'cpu': -0.0625, 798 'real_time': 8, 799 'real_time_other': 7, 800 'cpu_time': 80, 801 'cpu_time_other': 75}, 802 {'time': -0.4325, 803 'cpu': -0.13506493506493514, 804 'real_time': 8, 805 'real_time_other': 4.54, 806 'cpu_time': 77, 807 'cpu_time_other': 66.6} 808 ], 809 'time_unit': 'ns', 810 'utest': { 811 'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772 812 } 813 }, 814 { 815 'name': u'medium', 816 'measurements': [ 817 {'time': -0.375, 818 'cpu': -0.3375, 819 'real_time': 8, 820 'real_time_other': 5, 821 'cpu_time': 80, 822 'cpu_time_other': 53} 823 ], 824 'time_unit': 'ns', 825 'utest': {} 826 }, 827 { 828 'name': 'OVERALL_GEOMEAN', 829 'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08, 830 'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08, 831 'time': 1.6404861082353634, 'cpu': -0.6984640740519662}], 832 'time_unit': 's', 833 'run_type': 'aggregate', 834 'aggregate_name': 'geomean', 835 'utest': {} 836 } 837 ] 838 self.assertEqual(len(self.json_diff_report), len(expected_output)) 839 for out, expected in zip( 840 self.json_diff_report, expected_output): 841 self.assertEqual(out['name'], expected['name']) 842 self.assertEqual(out['time_unit'], expected['time_unit']) 843 assert_utest(self, out, expected) 844 assert_measurements(self, out, expected) 845 846 847class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( 848 unittest.TestCase): 849 @classmethod 850 def setUpClass(cls): 851 def load_results(): 852 import json 853 testInputs = os.path.join( 854 os.path.dirname( 855 os.path.realpath(__file__)), 856 'Inputs') 857 testOutput1 = os.path.join(testInputs, 'test3_run0.json') 858 testOutput2 = os.path.join(testInputs, 'test3_run1.json') 859 with open(testOutput1, 'r') as f: 860 json1 = json.load(f) 861 with open(testOutput2, 'r') as f: 862 json2 = json.load(f) 863 return json1, json2 864 865 json1, json2 = load_results() 866 cls.json_diff_report = get_difference_report( 867 json1, json2, utest=True) 868 869 def test_json_diff_report_pretty_printing(self): 870 expect_lines = [ 871 ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], 872 ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], 873 ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], 874 ['BM_Two_pvalue', 875 '1.0000', 876 '0.6667', 877 'U', 878 'Test,', 879 'Repetitions:', 880 '2', 881 'vs', 882 '2.', 883 'WARNING:', 884 'Results', 885 'unreliable!', 886 '9+', 887 'repetitions', 888 'recommended.'], 889 ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], 890 ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], 891 ['short_pvalue', 892 '0.7671', 893 '0.2000', 894 'U', 895 'Test,', 896 'Repetitions:', 897 '2', 898 'vs', 899 '3.', 900 'WARNING:', 901 'Results', 902 'unreliable!', 903 '9+', 904 'repetitions', 905 'recommended.'], 906 ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'], 907 ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0'] 908 ] 909 output_lines_with_header = print_difference_report( 910 self.json_diff_report, 911 utest=True, utest_alpha=0.05, use_color=False) 912 output_lines = output_lines_with_header[2:] 913 print("\n") 914 print("\n".join(output_lines_with_header)) 915 self.assertEqual(len(output_lines), len(expect_lines)) 916 for i in range(0, len(output_lines)): 917 parts = [x for x in output_lines[i].split(' ') if x] 918 self.assertEqual(expect_lines[i], parts) 919 920 def test_json_diff_report(self): 921 expected_output = [ 922 { 923 'name': u'BM_One', 924 'measurements': [ 925 {'time': -0.1, 926 'cpu': 0.1, 927 'real_time': 10, 928 'real_time_other': 9, 929 'cpu_time': 100, 930 'cpu_time_other': 110} 931 ], 932 'time_unit': 'ns', 933 'utest': {} 934 }, 935 { 936 'name': u'BM_Two', 937 'measurements': [ 938 {'time': 0.1111111111111111, 939 'cpu': -0.011111111111111112, 940 'real_time': 9, 941 'real_time_other': 10, 942 'cpu_time': 90, 943 'cpu_time_other': 89}, 944 {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8, 945 'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72} 946 ], 947 'time_unit': 'ns', 948 'utest': { 949 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0 950 } 951 }, 952 { 953 'name': u'short', 954 'measurements': [ 955 {'time': -0.125, 956 'cpu': -0.0625, 957 'real_time': 8, 958 'real_time_other': 7, 959 'cpu_time': 80, 960 'cpu_time_other': 75}, 961 {'time': -0.4325, 962 'cpu': -0.13506493506493514, 963 'real_time': 8, 964 'real_time_other': 4.54, 965 'cpu_time': 77, 966 'cpu_time_other': 66.6} 967 ], 968 'time_unit': 'ns', 969 'utest': { 970 'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772 971 } 972 }, 973 { 974 'name': u'medium', 975 'measurements': [ 976 {'real_time_other': 5, 977 'cpu_time': 80, 978 'time': -0.375, 979 'real_time': 8, 980 'cpu_time_other': 53, 981 'cpu': -0.3375 982 } 983 ], 984 'utest': {}, 985 'time_unit': u'ns', 986 'aggregate_name': '' 987 }, 988 { 989 'name': 'OVERALL_GEOMEAN', 990 'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08, 991 'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08, 992 'time': 1.6404861082353634, 'cpu': -0.6984640740519662}], 993 'time_unit': 's', 994 'run_type': 'aggregate', 995 'aggregate_name': 'geomean', 996 'utest': {} 997 } 998 ] 999 self.assertEqual(len(self.json_diff_report), len(expected_output)) 1000 for out, expected in zip( 1001 self.json_diff_report, expected_output): 1002 self.assertEqual(out['name'], expected['name']) 1003 self.assertEqual(out['time_unit'], expected['time_unit']) 1004 assert_utest(self, out, expected) 1005 assert_measurements(self, out, expected) 1006 1007 1008class TestReportDifferenceForPercentageAggregates( 1009 unittest.TestCase): 1010 @classmethod 1011 def setUpClass(cls): 1012 def load_results(): 1013 import json 1014 testInputs = os.path.join( 1015 os.path.dirname( 1016 os.path.realpath(__file__)), 1017 'Inputs') 1018 testOutput1 = os.path.join(testInputs, 'test4_run0.json') 1019 testOutput2 = os.path.join(testInputs, 'test4_run1.json') 1020 with open(testOutput1, 'r') as f: 1021 json1 = json.load(f) 1022 with open(testOutput2, 'r') as f: 1023 json2 = json.load(f) 1024 return json1, json2 1025 1026 json1, json2 = load_results() 1027 cls.json_diff_report = get_difference_report( 1028 json1, json2, utest=True) 1029 1030 def test_json_diff_report_pretty_printing(self): 1031 expect_lines = [ 1032 ['whocares', '-0.5000', '+0.5000', '0', '0', '0', '0'] 1033 ] 1034 output_lines_with_header = print_difference_report( 1035 self.json_diff_report, 1036 utest=True, utest_alpha=0.05, use_color=False) 1037 output_lines = output_lines_with_header[2:] 1038 print("\n") 1039 print("\n".join(output_lines_with_header)) 1040 self.assertEqual(len(output_lines), len(expect_lines)) 1041 for i in range(0, len(output_lines)): 1042 parts = [x for x in output_lines[i].split(' ') if x] 1043 self.assertEqual(expect_lines[i], parts) 1044 1045 def test_json_diff_report(self): 1046 expected_output = [ 1047 { 1048 'name': u'whocares', 1049 'measurements': [ 1050 {'time': -0.5, 1051 'cpu': 0.5, 1052 'real_time': 0.01, 1053 'real_time_other': 0.005, 1054 'cpu_time': 0.10, 1055 'cpu_time_other': 0.15} 1056 ], 1057 'time_unit': 'ns', 1058 'utest': {} 1059 } 1060 ] 1061 self.assertEqual(len(self.json_diff_report), len(expected_output)) 1062 for out, expected in zip( 1063 self.json_diff_report, expected_output): 1064 self.assertEqual(out['name'], expected['name']) 1065 self.assertEqual(out['time_unit'], expected['time_unit']) 1066 assert_utest(self, out, expected) 1067 assert_measurements(self, out, expected) 1068 1069 1070class TestReportSorting(unittest.TestCase): 1071 @classmethod 1072 def setUpClass(cls): 1073 def load_result(): 1074 import json 1075 testInputs = os.path.join( 1076 os.path.dirname( 1077 os.path.realpath(__file__)), 1078 'Inputs') 1079 testOutput = os.path.join(testInputs, 'test4_run.json') 1080 with open(testOutput, 'r') as f: 1081 json = json.load(f) 1082 return json 1083 1084 cls.json = load_result() 1085 1086 def test_json_diff_report_pretty_printing(self): 1087 import util 1088 1089 expected_names = [ 1090 "99 family 0 instance 0 repetition 0", 1091 "98 family 0 instance 0 repetition 1", 1092 "97 family 0 instance 0 aggregate", 1093 "96 family 0 instance 1 repetition 0", 1094 "95 family 0 instance 1 repetition 1", 1095 "94 family 0 instance 1 aggregate", 1096 "93 family 1 instance 0 repetition 0", 1097 "92 family 1 instance 0 repetition 1", 1098 "91 family 1 instance 0 aggregate", 1099 "90 family 1 instance 1 repetition 0", 1100 "89 family 1 instance 1 repetition 1", 1101 "88 family 1 instance 1 aggregate" 1102 ] 1103 1104 for n in range(len(self.json['benchmarks']) ** 2): 1105 random.shuffle(self.json['benchmarks']) 1106 sorted_benchmarks = util.sort_benchmark_results(self.json)[ 1107 'benchmarks'] 1108 self.assertEqual(len(expected_names), len(sorted_benchmarks)) 1109 for out, expected in zip(sorted_benchmarks, expected_names): 1110 self.assertEqual(out['name'], expected) 1111 1112 1113def assert_utest(unittest_instance, lhs, rhs): 1114 if lhs['utest']: 1115 unittest_instance.assertAlmostEqual( 1116 lhs['utest']['cpu_pvalue'], 1117 rhs['utest']['cpu_pvalue']) 1118 unittest_instance.assertAlmostEqual( 1119 lhs['utest']['time_pvalue'], 1120 rhs['utest']['time_pvalue']) 1121 unittest_instance.assertEqual( 1122 lhs['utest']['have_optimal_repetitions'], 1123 rhs['utest']['have_optimal_repetitions']) 1124 else: 1125 # lhs is empty. assert if rhs is not. 1126 unittest_instance.assertEqual(lhs['utest'], rhs['utest']) 1127 1128 1129def assert_measurements(unittest_instance, lhs, rhs): 1130 for m1, m2 in zip(lhs['measurements'], rhs['measurements']): 1131 unittest_instance.assertEqual(m1['real_time'], m2['real_time']) 1132 unittest_instance.assertEqual(m1['cpu_time'], m2['cpu_time']) 1133 # m1['time'] and m1['cpu'] hold values which are being calculated, 1134 # and therefore we must use almost-equal pattern. 1135 unittest_instance.assertAlmostEqual(m1['time'], m2['time'], places=4) 1136 unittest_instance.assertAlmostEqual(m1['cpu'], m2['cpu'], places=4) 1137 1138 1139if __name__ == '__main__': 1140 unittest.main() 1141 1142# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 1143# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; 1144# kate: indent-mode python; remove-trailing-spaces modified; 1145