1import unittest 2"""report.py - Utilities for reporting statistics about benchmark results 3""" 4import os 5import re 6import copy 7 8from scipy.stats import mannwhitneyu 9 10 11class BenchmarkColor(object): 12 def __init__(self, name, code): 13 self.name = name 14 self.code = code 15 16 def __repr__(self): 17 return '%s%r' % (self.__class__.__name__, 18 (self.name, self.code)) 19 20 def __format__(self, format): 21 return self.code 22 23 24# Benchmark Colors Enumeration 25BC_NONE = BenchmarkColor('NONE', '') 26BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m') 27BC_CYAN = BenchmarkColor('CYAN', '\033[96m') 28BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m') 29BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m') 30BC_HEADER = BenchmarkColor('HEADER', '\033[92m') 31BC_WARNING = BenchmarkColor('WARNING', '\033[93m') 32BC_WHITE = BenchmarkColor('WHITE', '\033[97m') 33BC_FAIL = BenchmarkColor('FAIL', '\033[91m') 34BC_ENDC = BenchmarkColor('ENDC', '\033[0m') 35BC_BOLD = BenchmarkColor('BOLD', '\033[1m') 36BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m') 37 38UTEST_MIN_REPETITIONS = 2 39UTEST_OPTIMAL_REPETITIONS = 9 # Lowest reasonable number, More is better. 40UTEST_COL_NAME = "_pvalue" 41 42 43def color_format(use_color, fmt_str, *args, **kwargs): 44 """ 45 Return the result of 'fmt_str.format(*args, **kwargs)' after transforming 46 'args' and 'kwargs' according to the value of 'use_color'. If 'use_color' 47 is False then all color codes in 'args' and 'kwargs' are replaced with 48 the empty string. 49 """ 50 assert use_color is True or use_color is False 51 if not use_color: 52 args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE 53 for arg in args] 54 kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE 55 for key, arg in kwargs.items()} 56 return fmt_str.format(*args, **kwargs) 57 58 59def find_longest_name(benchmark_list): 60 """ 61 Return the length of the longest benchmark name in a given list of 62 benchmark JSON objects 63 """ 64 longest_name = 1 65 for bc in benchmark_list: 66 if len(bc['name']) > longest_name: 67 longest_name = len(bc['name']) 68 return longest_name 69 70 71def calculate_change(old_val, new_val): 72 """ 73 Return a float representing the decimal change between old_val and new_val. 74 """ 75 if old_val == 0 and new_val == 0: 76 return 0.0 77 if old_val == 0: 78 return float(new_val - old_val) / (float(old_val + new_val) / 2) 79 return float(new_val - old_val) / abs(old_val) 80 81 82def filter_benchmark(json_orig, family, replacement=""): 83 """ 84 Apply a filter to the json, and only leave the 'family' of benchmarks. 85 """ 86 regex = re.compile(family) 87 filtered = {} 88 filtered['benchmarks'] = [] 89 for be in json_orig['benchmarks']: 90 if not regex.search(be['name']): 91 continue 92 filteredbench = copy.deepcopy(be) # Do NOT modify the old name! 93 filteredbench['name'] = regex.sub(replacement, filteredbench['name']) 94 filtered['benchmarks'].append(filteredbench) 95 return filtered 96 97 98def get_unique_benchmark_names(json): 99 """ 100 While *keeping* the order, give all the unique 'names' used for benchmarks. 101 """ 102 seen = set() 103 uniqued = [x['name'] for x in json['benchmarks'] 104 if x['name'] not in seen and 105 (seen.add(x['name']) or True)] 106 return uniqued 107 108 109def intersect(list1, list2): 110 """ 111 Given two lists, get a new list consisting of the elements only contained 112 in *both of the input lists*, while preserving the ordering. 113 """ 114 return [x for x in list1 if x in list2] 115 116 117def is_potentially_comparable_benchmark(x): 118 return ('time_unit' in x and 'real_time' in x and 'cpu_time' in x) 119 120 121def partition_benchmarks(json1, json2): 122 """ 123 While preserving the ordering, find benchmarks with the same names in 124 both of the inputs, and group them. 125 (i.e. partition/filter into groups with common name) 126 """ 127 json1_unique_names = get_unique_benchmark_names(json1) 128 json2_unique_names = get_unique_benchmark_names(json2) 129 names = intersect(json1_unique_names, json2_unique_names) 130 partitions = [] 131 for name in names: 132 time_unit = None 133 # Pick the time unit from the first entry of the lhs benchmark. 134 # We should be careful not to crash with unexpected input. 135 for x in json1['benchmarks']: 136 if (x['name'] == name and is_potentially_comparable_benchmark(x)): 137 time_unit = x['time_unit'] 138 break 139 if time_unit is None: 140 continue 141 # Filter by name and time unit. 142 # All the repetitions are assumed to be comparable. 143 lhs = [x for x in json1['benchmarks'] if x['name'] == name and 144 x['time_unit'] == time_unit] 145 rhs = [x for x in json2['benchmarks'] if x['name'] == name and 146 x['time_unit'] == time_unit] 147 partitions.append([lhs, rhs]) 148 return partitions 149 150 151def extract_field(partition, field_name): 152 # The count of elements may be different. We want *all* of them. 153 lhs = [x[field_name] for x in partition[0]] 154 rhs = [x[field_name] for x in partition[1]] 155 return [lhs, rhs] 156 157 158def calc_utest(timings_cpu, timings_time): 159 min_rep_cnt = min(len(timings_time[0]), 160 len(timings_time[1]), 161 len(timings_cpu[0]), 162 len(timings_cpu[1])) 163 164 # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions? 165 if min_rep_cnt < UTEST_MIN_REPETITIONS: 166 return False, None, None 167 168 time_pvalue = mannwhitneyu( 169 timings_time[0], timings_time[1], alternative='two-sided').pvalue 170 cpu_pvalue = mannwhitneyu( 171 timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue 172 173 return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue 174 175def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True): 176 def get_utest_color(pval): 177 return BC_FAIL if pval >= utest_alpha else BC_OKGREEN 178 179 # Check if we failed miserably with minimum required repetitions for utest 180 if not utest['have_optimal_repetitions'] and utest['cpu_pvalue'] is None and utest['time_pvalue'] is None: 181 return [] 182 183 dsc = "U Test, Repetitions: {} vs {}".format( 184 utest['nr_of_repetitions'], utest['nr_of_repetitions_other']) 185 dsc_color = BC_OKGREEN 186 187 # We still got some results to show but issue a warning about it. 188 if not utest['have_optimal_repetitions']: 189 dsc_color = BC_WARNING 190 dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format( 191 UTEST_OPTIMAL_REPETITIONS) 192 193 special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{} {}" 194 195 return [color_format(use_color, 196 special_str, 197 BC_HEADER, 198 "{}{}".format(bc_name, UTEST_COL_NAME), 199 first_col_width, 200 get_utest_color( 201 utest['time_pvalue']), utest['time_pvalue'], 202 get_utest_color( 203 utest['cpu_pvalue']), utest['cpu_pvalue'], 204 dsc_color, dsc, 205 endc=BC_ENDC)] 206 207 208def get_difference_report( 209 json1, 210 json2, 211 utest=False): 212 """ 213 Calculate and report the difference between each test of two benchmarks 214 runs specified as 'json1' and 'json2'. Output is another json containing 215 relevant details for each test run. 216 """ 217 assert utest is True or utest is False 218 219 diff_report = [] 220 partitions = partition_benchmarks(json1, json2) 221 for partition in partitions: 222 benchmark_name = partition[0][0]['name'] 223 time_unit = partition[0][0]['time_unit'] 224 measurements = [] 225 utest_results = {} 226 # Careful, we may have different repetition count. 227 for i in range(min(len(partition[0]), len(partition[1]))): 228 bn = partition[0][i] 229 other_bench = partition[1][i] 230 measurements.append({ 231 'real_time': bn['real_time'], 232 'cpu_time': bn['cpu_time'], 233 'real_time_other': other_bench['real_time'], 234 'cpu_time_other': other_bench['cpu_time'], 235 'time': calculate_change(bn['real_time'], other_bench['real_time']), 236 'cpu': calculate_change(bn['cpu_time'], other_bench['cpu_time']) 237 }) 238 239 # After processing the whole partition, if requested, do the U test. 240 if utest: 241 timings_cpu = extract_field(partition, 'cpu_time') 242 timings_time = extract_field(partition, 'real_time') 243 have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(timings_cpu, timings_time) 244 if cpu_pvalue and time_pvalue: 245 utest_results = { 246 'have_optimal_repetitions': have_optimal_repetitions, 247 'cpu_pvalue': cpu_pvalue, 248 'time_pvalue': time_pvalue, 249 'nr_of_repetitions': len(timings_cpu[0]), 250 'nr_of_repetitions_other': len(timings_cpu[1]) 251 } 252 253 # Store only if we had any measurements for given benchmark. 254 # E.g. partition_benchmarks will filter out the benchmarks having 255 # time units which are not compatible with other time units in the 256 # benchmark suite. 257 if measurements: 258 run_type = partition[0][0]['run_type'] if 'run_type' in partition[0][0] else '' 259 aggregate_name = partition[0][0]['aggregate_name'] if run_type == 'aggregate' and 'aggregate_name' in partition[0][0] else '' 260 diff_report.append({ 261 'name': benchmark_name, 262 'measurements': measurements, 263 'time_unit': time_unit, 264 'run_type': run_type, 265 'aggregate_name': aggregate_name, 266 'utest': utest_results 267 }) 268 269 return diff_report 270 271 272def print_difference_report( 273 json_diff_report, 274 include_aggregates_only=False, 275 utest=False, 276 utest_alpha=0.05, 277 use_color=True): 278 """ 279 Calculate and report the difference between each test of two benchmarks 280 runs specified as 'json1' and 'json2'. 281 """ 282 assert utest is True or utest is False 283 284 def get_color(res): 285 if res > 0.05: 286 return BC_FAIL 287 elif res > -0.07: 288 return BC_WHITE 289 else: 290 return BC_CYAN 291 292 first_col_width = find_longest_name(json_diff_report) 293 first_col_width = max( 294 first_col_width, 295 len('Benchmark')) 296 first_col_width += len(UTEST_COL_NAME) 297 first_line = "{:<{}s}Time CPU Time Old Time New CPU Old CPU New".format( 298 'Benchmark', 12 + first_col_width) 299 output_strs = [first_line, '-' * len(first_line)] 300 301 fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}" 302 for benchmark in json_diff_report: 303 # *If* we were asked to only include aggregates, 304 # and if it is non-aggregate, then skip it. 305 if include_aggregates_only and 'run_type' in benchmark: 306 if benchmark['run_type'] != 'aggregate': 307 continue 308 309 for measurement in benchmark['measurements']: 310 output_strs += [color_format(use_color, 311 fmt_str, 312 BC_HEADER, 313 benchmark['name'], 314 first_col_width, 315 get_color(measurement['time']), 316 measurement['time'], 317 get_color(measurement['cpu']), 318 measurement['cpu'], 319 measurement['real_time'], 320 measurement['real_time_other'], 321 measurement['cpu_time'], 322 measurement['cpu_time_other'], 323 endc=BC_ENDC)] 324 325 # After processing the measurements, if requested and 326 # if applicable (e.g. u-test exists for given benchmark), 327 # print the U test. 328 if utest and benchmark['utest']: 329 output_strs += print_utest(benchmark['name'], 330 benchmark['utest'], 331 utest_alpha=utest_alpha, 332 first_col_width=first_col_width, 333 use_color=use_color) 334 335 return output_strs 336 337 338############################################################################### 339# Unit tests 340 341 342class TestGetUniqueBenchmarkNames(unittest.TestCase): 343 def load_results(self): 344 import json 345 testInputs = os.path.join( 346 os.path.dirname( 347 os.path.realpath(__file__)), 348 'Inputs') 349 testOutput = os.path.join(testInputs, 'test3_run0.json') 350 with open(testOutput, 'r') as f: 351 json = json.load(f) 352 return json 353 354 def test_basic(self): 355 expect_lines = [ 356 'BM_One', 357 'BM_Two', 358 'short', # These two are not sorted 359 'medium', # These two are not sorted 360 ] 361 json = self.load_results() 362 output_lines = get_unique_benchmark_names(json) 363 print("\n") 364 print("\n".join(output_lines)) 365 self.assertEqual(len(output_lines), len(expect_lines)) 366 for i in range(0, len(output_lines)): 367 self.assertEqual(expect_lines[i], output_lines[i]) 368 369 370class TestReportDifference(unittest.TestCase): 371 @classmethod 372 def setUpClass(cls): 373 def load_results(): 374 import json 375 testInputs = os.path.join( 376 os.path.dirname( 377 os.path.realpath(__file__)), 378 'Inputs') 379 testOutput1 = os.path.join(testInputs, 'test1_run1.json') 380 testOutput2 = os.path.join(testInputs, 'test1_run2.json') 381 with open(testOutput1, 'r') as f: 382 json1 = json.load(f) 383 with open(testOutput2, 'r') as f: 384 json2 = json.load(f) 385 return json1, json2 386 387 json1, json2 = load_results() 388 cls.json_diff_report = get_difference_report(json1, json2) 389 390 def test_json_diff_report_pretty_printing(self): 391 expect_lines = [ 392 ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'], 393 ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'], 394 ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'], 395 ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'], 396 ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'], 397 ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'], 398 ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'], 399 ['BM_100xSlower', '+99.0000', '+99.0000', 400 '100', '10000', '100', '10000'], 401 ['BM_100xFaster', '-0.9900', '-0.9900', 402 '10000', '100', '10000', '100'], 403 ['BM_10PercentCPUToTime', '+0.1000', 404 '-0.1000', '100', '110', '100', '90'], 405 ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'], 406 ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'], 407 ] 408 output_lines_with_header = print_difference_report( 409 self.json_diff_report, use_color=False) 410 output_lines = output_lines_with_header[2:] 411 print("\n") 412 print("\n".join(output_lines_with_header)) 413 self.assertEqual(len(output_lines), len(expect_lines)) 414 for i in range(0, len(output_lines)): 415 parts = [x for x in output_lines[i].split(' ') if x] 416 self.assertEqual(len(parts), 7) 417 self.assertEqual(expect_lines[i], parts) 418 419 def test_json_diff_report_output(self): 420 expected_output = [ 421 { 422 'name': 'BM_SameTimes', 423 'measurements': [{'time': 0.0000, 'cpu': 0.0000, 'real_time': 10, 'real_time_other': 10, 'cpu_time': 10, 'cpu_time_other': 10}], 424 'time_unit': 'ns', 425 'utest': {} 426 }, 427 { 428 'name': 'BM_2xFaster', 429 'measurements': [{'time': -0.5000, 'cpu': -0.5000, 'real_time': 50, 'real_time_other': 25, 'cpu_time': 50, 'cpu_time_other': 25}], 430 'time_unit': 'ns', 431 'utest': {} 432 }, 433 { 434 'name': 'BM_2xSlower', 435 'measurements': [{'time': 1.0000, 'cpu': 1.0000, 'real_time': 50, 'real_time_other': 100, 'cpu_time': 50, 'cpu_time_other': 100}], 436 'time_unit': 'ns', 437 'utest': {} 438 }, 439 { 440 'name': 'BM_1PercentFaster', 441 'measurements': [{'time': -0.0100, 'cpu': -0.0100, 'real_time': 100, 'real_time_other': 98.9999999, 'cpu_time': 100, 'cpu_time_other': 98.9999999}], 442 'time_unit': 'ns', 443 'utest': {} 444 }, 445 { 446 'name': 'BM_1PercentSlower', 447 'measurements': [{'time': 0.0100, 'cpu': 0.0100, 'real_time': 100, 'real_time_other': 101, 'cpu_time': 100, 'cpu_time_other': 101}], 448 'time_unit': 'ns', 449 'utest': {} 450 }, 451 { 452 'name': 'BM_10PercentFaster', 453 'measurements': [{'time': -0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 90, 'cpu_time': 100, 'cpu_time_other': 90}], 454 'time_unit': 'ns', 455 'utest': {} 456 }, 457 { 458 'name': 'BM_10PercentSlower', 459 'measurements': [{'time': 0.1000, 'cpu': 0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 110}], 460 'time_unit': 'ns', 461 'utest': {} 462 }, 463 { 464 'name': 'BM_100xSlower', 465 'measurements': [{'time': 99.0000, 'cpu': 99.0000, 'real_time': 100, 'real_time_other': 10000, 'cpu_time': 100, 'cpu_time_other': 10000}], 466 'time_unit': 'ns', 467 'utest': {} 468 }, 469 { 470 'name': 'BM_100xFaster', 471 'measurements': [{'time': -0.9900, 'cpu': -0.9900, 'real_time': 10000, 'real_time_other': 100, 'cpu_time': 10000, 'cpu_time_other': 100}], 472 'time_unit': 'ns', 473 'utest': {} 474 }, 475 { 476 'name': 'BM_10PercentCPUToTime', 477 'measurements': [{'time': 0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 90}], 478 'time_unit': 'ns', 479 'utest': {} 480 }, 481 { 482 'name': 'BM_ThirdFaster', 483 'measurements': [{'time': -0.3333, 'cpu': -0.3334, 'real_time': 100, 'real_time_other': 67, 'cpu_time': 100, 'cpu_time_other': 67}], 484 'time_unit': 'ns', 485 'utest': {} 486 }, 487 { 488 'name': 'BM_NotBadTimeUnit', 489 'measurements': [{'time': -0.9000, 'cpu': 0.2000, 'real_time': 0.4, 'real_time_other': 0.04, 'cpu_time': 0.5, 'cpu_time_other': 0.6}], 490 'time_unit': 's', 491 'utest': {} 492 }, 493 ] 494 self.assertEqual(len(self.json_diff_report), len(expected_output)) 495 for out, expected in zip( 496 self.json_diff_report, expected_output): 497 self.assertEqual(out['name'], expected['name']) 498 self.assertEqual(out['time_unit'], expected['time_unit']) 499 assert_utest(self, out, expected) 500 assert_measurements(self, out, expected) 501 502 503class TestReportDifferenceBetweenFamilies(unittest.TestCase): 504 @classmethod 505 def setUpClass(cls): 506 def load_result(): 507 import json 508 testInputs = os.path.join( 509 os.path.dirname( 510 os.path.realpath(__file__)), 511 'Inputs') 512 testOutput = os.path.join(testInputs, 'test2_run.json') 513 with open(testOutput, 'r') as f: 514 json = json.load(f) 515 return json 516 517 json = load_result() 518 json1 = filter_benchmark(json, "BM_Z.ro", ".") 519 json2 = filter_benchmark(json, "BM_O.e", ".") 520 cls.json_diff_report = get_difference_report(json1, json2) 521 522 def test_json_diff_report_pretty_printing(self): 523 expect_lines = [ 524 ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'], 525 ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'], 526 ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'], 527 ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'], 528 ] 529 output_lines_with_header = print_difference_report( 530 self.json_diff_report, use_color=False) 531 output_lines = output_lines_with_header[2:] 532 print("\n") 533 print("\n".join(output_lines_with_header)) 534 self.assertEqual(len(output_lines), len(expect_lines)) 535 for i in range(0, len(output_lines)): 536 parts = [x for x in output_lines[i].split(' ') if x] 537 self.assertEqual(len(parts), 7) 538 self.assertEqual(expect_lines[i], parts) 539 540 def test_json_diff_report(self): 541 expected_output = [ 542 { 543 'name': u'.', 544 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 10, 'real_time_other': 5, 'cpu_time': 10, 'cpu_time_other': 5}], 545 'time_unit': 'ns', 546 'utest': {} 547 }, 548 { 549 'name': u'./4', 550 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 40, 'real_time_other': 20, 'cpu_time': 40, 'cpu_time_other': 20}], 551 'time_unit': 'ns', 552 'utest': {}, 553 }, 554 { 555 'name': u'Prefix/.', 556 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 20, 'real_time_other': 10, 'cpu_time': 20, 'cpu_time_other': 10}], 557 'time_unit': 'ns', 558 'utest': {} 559 }, 560 { 561 'name': u'Prefix/./3', 562 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 30, 'real_time_other': 15, 'cpu_time': 30, 'cpu_time_other': 15}], 563 'time_unit': 'ns', 564 'utest': {} 565 } 566 ] 567 self.assertEqual(len(self.json_diff_report), len(expected_output)) 568 for out, expected in zip( 569 self.json_diff_report, expected_output): 570 self.assertEqual(out['name'], expected['name']) 571 self.assertEqual(out['time_unit'], expected['time_unit']) 572 assert_utest(self, out, expected) 573 assert_measurements(self, out, expected) 574 575 576class TestReportDifferenceWithUTest(unittest.TestCase): 577 @classmethod 578 def setUpClass(cls): 579 def load_results(): 580 import json 581 testInputs = os.path.join( 582 os.path.dirname( 583 os.path.realpath(__file__)), 584 'Inputs') 585 testOutput1 = os.path.join(testInputs, 'test3_run0.json') 586 testOutput2 = os.path.join(testInputs, 'test3_run1.json') 587 with open(testOutput1, 'r') as f: 588 json1 = json.load(f) 589 with open(testOutput2, 'r') as f: 590 json2 = json.load(f) 591 return json1, json2 592 593 json1, json2 = load_results() 594 cls.json_diff_report = get_difference_report( 595 json1, json2, utest=True) 596 597 def test_json_diff_report_pretty_printing(self): 598 expect_lines = [ 599 ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], 600 ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], 601 ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], 602 ['BM_Two_pvalue', 603 '0.6985', 604 '0.6985', 605 'U', 606 'Test,', 607 'Repetitions:', 608 '2', 609 'vs', 610 '2.', 611 'WARNING:', 612 'Results', 613 'unreliable!', 614 '9+', 615 'repetitions', 616 'recommended.'], 617 ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], 618 ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], 619 ['short_pvalue', 620 '0.7671', 621 '0.1489', 622 'U', 623 'Test,', 624 'Repetitions:', 625 '2', 626 'vs', 627 '3.', 628 'WARNING:', 629 'Results', 630 'unreliable!', 631 '9+', 632 'repetitions', 633 'recommended.'], 634 ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'], 635 ] 636 output_lines_with_header = print_difference_report( 637 self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False) 638 output_lines = output_lines_with_header[2:] 639 print("\n") 640 print("\n".join(output_lines_with_header)) 641 self.assertEqual(len(output_lines), len(expect_lines)) 642 for i in range(0, len(output_lines)): 643 parts = [x for x in output_lines[i].split(' ') if x] 644 self.assertEqual(expect_lines[i], parts) 645 646 def test_json_diff_report(self): 647 expected_output = [ 648 { 649 'name': u'BM_One', 650 'measurements': [ 651 {'time': -0.1, 652 'cpu': 0.1, 653 'real_time': 10, 654 'real_time_other': 9, 655 'cpu_time': 100, 656 'cpu_time_other': 110} 657 ], 658 'time_unit': 'ns', 659 'utest': {} 660 }, 661 { 662 'name': u'BM_Two', 663 'measurements': [ 664 {'time': 0.1111111111111111, 665 'cpu': -0.011111111111111112, 666 'real_time': 9, 667 'real_time_other': 10, 668 'cpu_time': 90, 669 'cpu_time_other': 89}, 670 {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8, 671 'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72} 672 ], 673 'time_unit': 'ns', 674 'utest': { 675 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6985353583033387, 'time_pvalue': 0.6985353583033387 676 } 677 }, 678 { 679 'name': u'short', 680 'measurements': [ 681 {'time': -0.125, 682 'cpu': -0.0625, 683 'real_time': 8, 684 'real_time_other': 7, 685 'cpu_time': 80, 686 'cpu_time_other': 75}, 687 {'time': -0.4325, 688 'cpu': -0.13506493506493514, 689 'real_time': 8, 690 'real_time_other': 4.54, 691 'cpu_time': 77, 692 'cpu_time_other': 66.6} 693 ], 694 'time_unit': 'ns', 695 'utest': { 696 'have_optimal_repetitions': False, 'cpu_pvalue': 0.14891467317876572, 'time_pvalue': 0.7670968684102772 697 } 698 }, 699 { 700 'name': u'medium', 701 'measurements': [ 702 {'time': -0.375, 703 'cpu': -0.3375, 704 'real_time': 8, 705 'real_time_other': 5, 706 'cpu_time': 80, 707 'cpu_time_other': 53} 708 ], 709 'time_unit': 'ns', 710 'utest': {} 711 } 712 ] 713 self.assertEqual(len(self.json_diff_report), len(expected_output)) 714 for out, expected in zip( 715 self.json_diff_report, expected_output): 716 self.assertEqual(out['name'], expected['name']) 717 self.assertEqual(out['time_unit'], expected['time_unit']) 718 assert_utest(self, out, expected) 719 assert_measurements(self, out, expected) 720 721 722class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( 723 unittest.TestCase): 724 @classmethod 725 def setUpClass(cls): 726 def load_results(): 727 import json 728 testInputs = os.path.join( 729 os.path.dirname( 730 os.path.realpath(__file__)), 731 'Inputs') 732 testOutput1 = os.path.join(testInputs, 'test3_run0.json') 733 testOutput2 = os.path.join(testInputs, 'test3_run1.json') 734 with open(testOutput1, 'r') as f: 735 json1 = json.load(f) 736 with open(testOutput2, 'r') as f: 737 json2 = json.load(f) 738 return json1, json2 739 740 json1, json2 = load_results() 741 cls.json_diff_report = get_difference_report( 742 json1, json2, utest=True) 743 744 def test_json_diff_report_pretty_printing(self): 745 expect_lines = [ 746 ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], 747 ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], 748 ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], 749 ['BM_Two_pvalue', 750 '0.6985', 751 '0.6985', 752 'U', 753 'Test,', 754 'Repetitions:', 755 '2', 756 'vs', 757 '2.', 758 'WARNING:', 759 'Results', 760 'unreliable!', 761 '9+', 762 'repetitions', 763 'recommended.'], 764 ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], 765 ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], 766 ['short_pvalue', 767 '0.7671', 768 '0.1489', 769 'U', 770 'Test,', 771 'Repetitions:', 772 '2', 773 'vs', 774 '3.', 775 'WARNING:', 776 'Results', 777 'unreliable!', 778 '9+', 779 'repetitions', 780 'recommended.'], 781 ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'] 782 ] 783 output_lines_with_header = print_difference_report( 784 self.json_diff_report, 785 utest=True, utest_alpha=0.05, use_color=False) 786 output_lines = output_lines_with_header[2:] 787 print("\n") 788 print("\n".join(output_lines_with_header)) 789 self.assertEqual(len(output_lines), len(expect_lines)) 790 for i in range(0, len(output_lines)): 791 parts = [x for x in output_lines[i].split(' ') if x] 792 self.assertEqual(expect_lines[i], parts) 793 794 def test_json_diff_report(self): 795 expected_output = [ 796 { 797 'name': u'BM_One', 798 'measurements': [ 799 {'time': -0.1, 800 'cpu': 0.1, 801 'real_time': 10, 802 'real_time_other': 9, 803 'cpu_time': 100, 804 'cpu_time_other': 110} 805 ], 806 'time_unit': 'ns', 807 'utest': {} 808 }, 809 { 810 'name': u'BM_Two', 811 'measurements': [ 812 {'time': 0.1111111111111111, 813 'cpu': -0.011111111111111112, 814 'real_time': 9, 815 'real_time_other': 10, 816 'cpu_time': 90, 817 'cpu_time_other': 89}, 818 {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8, 819 'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72} 820 ], 821 'time_unit': 'ns', 822 'utest': { 823 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6985353583033387, 'time_pvalue': 0.6985353583033387 824 } 825 }, 826 { 827 'name': u'short', 828 'measurements': [ 829 {'time': -0.125, 830 'cpu': -0.0625, 831 'real_time': 8, 832 'real_time_other': 7, 833 'cpu_time': 80, 834 'cpu_time_other': 75}, 835 {'time': -0.4325, 836 'cpu': -0.13506493506493514, 837 'real_time': 8, 838 'real_time_other': 4.54, 839 'cpu_time': 77, 840 'cpu_time_other': 66.6} 841 ], 842 'time_unit': 'ns', 843 'utest': { 844 'have_optimal_repetitions': False, 'cpu_pvalue': 0.14891467317876572, 'time_pvalue': 0.7670968684102772 845 } 846 }, 847 { 848 'name': u'medium', 849 'measurements': [ 850 {'real_time_other': 5, 851 'cpu_time': 80, 852 'time': -0.375, 853 'real_time': 8, 854 'cpu_time_other': 53, 855 'cpu': -0.3375 856 } 857 ], 858 'utest': {}, 859 'time_unit': u'ns', 860 'aggregate_name': '' 861 } 862 ] 863 self.assertEqual(len(self.json_diff_report), len(expected_output)) 864 for out, expected in zip( 865 self.json_diff_report, expected_output): 866 self.assertEqual(out['name'], expected['name']) 867 self.assertEqual(out['time_unit'], expected['time_unit']) 868 assert_utest(self, out, expected) 869 assert_measurements(self, out, expected) 870 871 872def assert_utest(unittest_instance, lhs, rhs): 873 if lhs['utest']: 874 unittest_instance.assertAlmostEqual( 875 lhs['utest']['cpu_pvalue'], 876 rhs['utest']['cpu_pvalue']) 877 unittest_instance.assertAlmostEqual( 878 lhs['utest']['time_pvalue'], 879 rhs['utest']['time_pvalue']) 880 unittest_instance.assertEqual( 881 lhs['utest']['have_optimal_repetitions'], 882 rhs['utest']['have_optimal_repetitions']) 883 else: 884 # lhs is empty. assert if rhs is not. 885 unittest_instance.assertEqual(lhs['utest'], rhs['utest']) 886 887 888def assert_measurements(unittest_instance, lhs, rhs): 889 for m1, m2 in zip(lhs['measurements'], rhs['measurements']): 890 unittest_instance.assertEqual(m1['real_time'], m2['real_time']) 891 unittest_instance.assertEqual(m1['cpu_time'], m2['cpu_time']) 892 # m1['time'] and m1['cpu'] hold values which are being calculated, 893 # and therefore we must use almost-equal pattern. 894 unittest_instance.assertAlmostEqual(m1['time'], m2['time'], places=4) 895 unittest_instance.assertAlmostEqual(m1['cpu'], m2['cpu'], places=4) 896 897 898if __name__ == '__main__': 899 unittest.main() 900 901# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 902# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; 903# kate: indent-mode python; remove-trailing-spaces modified; 904