• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""report.py - Utilities for reporting statistics about benchmark results
2"""
3
4import unittest
5import os
6import re
7import copy
8import random
9
10from scipy.stats import mannwhitneyu, gmean
11from numpy import array
12from pandas import Timedelta
13
14
15class BenchmarkColor(object):
16    def __init__(self, name, code):
17        self.name = name
18        self.code = code
19
20    def __repr__(self):
21        return '%s%r' % (self.__class__.__name__,
22                         (self.name, self.code))
23
24    def __format__(self, format):
25        return self.code
26
27
28# Benchmark Colors Enumeration
29BC_NONE = BenchmarkColor('NONE', '')
30BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m')
31BC_CYAN = BenchmarkColor('CYAN', '\033[96m')
32BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m')
33BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m')
34BC_HEADER = BenchmarkColor('HEADER', '\033[92m')
35BC_WARNING = BenchmarkColor('WARNING', '\033[93m')
36BC_WHITE = BenchmarkColor('WHITE', '\033[97m')
37BC_FAIL = BenchmarkColor('FAIL', '\033[91m')
38BC_ENDC = BenchmarkColor('ENDC', '\033[0m')
39BC_BOLD = BenchmarkColor('BOLD', '\033[1m')
40BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
41
42UTEST_MIN_REPETITIONS = 2
43UTEST_OPTIMAL_REPETITIONS = 9  # Lowest reasonable number, More is better.
44UTEST_COL_NAME = "_pvalue"
45
46
47def color_format(use_color, fmt_str, *args, **kwargs):
48    """
49    Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
50    'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
51    is False then all color codes in 'args' and 'kwargs' are replaced with
52    the empty string.
53    """
54    assert use_color is True or use_color is False
55    if not use_color:
56        args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE
57                for arg in args]
58        kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
59                  for key, arg in kwargs.items()}
60    return fmt_str.format(*args, **kwargs)
61
62
63def find_longest_name(benchmark_list):
64    """
65    Return the length of the longest benchmark name in a given list of
66    benchmark JSON objects
67    """
68    longest_name = 1
69    for bc in benchmark_list:
70        if len(bc['name']) > longest_name:
71            longest_name = len(bc['name'])
72    return longest_name
73
74
75def calculate_change(old_val, new_val):
76    """
77    Return a float representing the decimal change between old_val and new_val.
78    """
79    if old_val == 0 and new_val == 0:
80        return 0.0
81    if old_val == 0:
82        return float(new_val - old_val) / (float(old_val + new_val) / 2)
83    return float(new_val - old_val) / abs(old_val)
84
85
86def filter_benchmark(json_orig, family, replacement=""):
87    """
88    Apply a filter to the json, and only leave the 'family' of benchmarks.
89    """
90    regex = re.compile(family)
91    filtered = {}
92    filtered['benchmarks'] = []
93    for be in json_orig['benchmarks']:
94        if not regex.search(be['name']):
95            continue
96        filteredbench = copy.deepcopy(be)  # Do NOT modify the old name!
97        filteredbench['name'] = regex.sub(replacement, filteredbench['name'])
98        filtered['benchmarks'].append(filteredbench)
99    return filtered
100
101
102def get_unique_benchmark_names(json):
103    """
104    While *keeping* the order, give all the unique 'names' used for benchmarks.
105    """
106    seen = set()
107    uniqued = [x['name'] for x in json['benchmarks']
108               if x['name'] not in seen and
109               (seen.add(x['name']) or True)]
110    return uniqued
111
112
113def intersect(list1, list2):
114    """
115    Given two lists, get a new list consisting of the elements only contained
116    in *both of the input lists*, while preserving the ordering.
117    """
118    return [x for x in list1 if x in list2]
119
120
121def is_potentially_comparable_benchmark(x):
122    return ('time_unit' in x and 'real_time' in x and 'cpu_time' in x)
123
124
125def partition_benchmarks(json1, json2):
126    """
127    While preserving the ordering, find benchmarks with the same names in
128    both of the inputs, and group them.
129    (i.e. partition/filter into groups with common name)
130    """
131    json1_unique_names = get_unique_benchmark_names(json1)
132    json2_unique_names = get_unique_benchmark_names(json2)
133    names = intersect(json1_unique_names, json2_unique_names)
134    partitions = []
135    for name in names:
136        time_unit = None
137        # Pick the time unit from the first entry of the lhs benchmark.
138        # We should be careful not to crash with unexpected input.
139        for x in json1['benchmarks']:
140            if (x['name'] == name and is_potentially_comparable_benchmark(x)):
141                time_unit = x['time_unit']
142                break
143        if time_unit is None:
144            continue
145        # Filter by name and time unit.
146        # All the repetitions are assumed to be comparable.
147        lhs = [x for x in json1['benchmarks'] if x['name'] == name and
148               x['time_unit'] == time_unit]
149        rhs = [x for x in json2['benchmarks'] if x['name'] == name and
150               x['time_unit'] == time_unit]
151        partitions.append([lhs, rhs])
152    return partitions
153
154
155def get_timedelta_field_as_seconds(benchmark, field_name):
156    """
157    Get value of field_name field of benchmark, which is time with time unit
158    time_unit, as time in seconds.
159    """
160    time_unit = benchmark['time_unit'] if 'time_unit' in benchmark else 's'
161    dt = Timedelta(benchmark[field_name], time_unit)
162    return dt / Timedelta(1, 's')
163
164
165def calculate_geomean(json):
166    """
167    Extract all real/cpu times from all the benchmarks as seconds,
168    and calculate their geomean.
169    """
170    times = []
171    for benchmark in json['benchmarks']:
172        if 'run_type' in benchmark and benchmark['run_type'] == 'aggregate':
173            continue
174        times.append([get_timedelta_field_as_seconds(benchmark, 'real_time'),
175                      get_timedelta_field_as_seconds(benchmark, 'cpu_time')])
176    return gmean(times) if times else array([])
177
178
179def extract_field(partition, field_name):
180    # The count of elements may be different. We want *all* of them.
181    lhs = [x[field_name] for x in partition[0]]
182    rhs = [x[field_name] for x in partition[1]]
183    return [lhs, rhs]
184
185
186def calc_utest(timings_cpu, timings_time):
187    min_rep_cnt = min(len(timings_time[0]),
188                      len(timings_time[1]),
189                      len(timings_cpu[0]),
190                      len(timings_cpu[1]))
191
192    # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions?
193    if min_rep_cnt < UTEST_MIN_REPETITIONS:
194        return False, None, None
195
196    time_pvalue = mannwhitneyu(
197        timings_time[0], timings_time[1], alternative='two-sided').pvalue
198    cpu_pvalue = mannwhitneyu(
199        timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue
200
201    return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue
202
203
204def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True):
205    def get_utest_color(pval):
206        return BC_FAIL if pval >= utest_alpha else BC_OKGREEN
207
208    # Check if we failed miserably with minimum required repetitions for utest
209    if not utest['have_optimal_repetitions'] and utest['cpu_pvalue'] is None and utest['time_pvalue'] is None:
210        return []
211
212    dsc = "U Test, Repetitions: {} vs {}".format(
213        utest['nr_of_repetitions'], utest['nr_of_repetitions_other'])
214    dsc_color = BC_OKGREEN
215
216    # We still got some results to show but issue a warning about it.
217    if not utest['have_optimal_repetitions']:
218        dsc_color = BC_WARNING
219        dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format(
220            UTEST_OPTIMAL_REPETITIONS)
221
222    special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{}      {}"
223
224    return [color_format(use_color,
225                         special_str,
226                         BC_HEADER,
227                         "{}{}".format(bc_name, UTEST_COL_NAME),
228                         first_col_width,
229                         get_utest_color(
230                             utest['time_pvalue']), utest['time_pvalue'],
231                         get_utest_color(
232                             utest['cpu_pvalue']), utest['cpu_pvalue'],
233                         dsc_color, dsc,
234                         endc=BC_ENDC)]
235
236
237def get_difference_report(
238        json1,
239        json2,
240        utest=False):
241    """
242    Calculate and report the difference between each test of two benchmarks
243    runs specified as 'json1' and 'json2'. Output is another json containing
244    relevant details for each test run.
245    """
246    assert utest is True or utest is False
247
248    diff_report = []
249    partitions = partition_benchmarks(json1, json2)
250    for partition in partitions:
251        benchmark_name = partition[0][0]['name']
252        time_unit = partition[0][0]['time_unit']
253        measurements = []
254        utest_results = {}
255        # Careful, we may have different repetition count.
256        for i in range(min(len(partition[0]), len(partition[1]))):
257            bn = partition[0][i]
258            other_bench = partition[1][i]
259            measurements.append({
260                'real_time': bn['real_time'],
261                'cpu_time': bn['cpu_time'],
262                'real_time_other': other_bench['real_time'],
263                'cpu_time_other': other_bench['cpu_time'],
264                'time': calculate_change(bn['real_time'], other_bench['real_time']),
265                'cpu': calculate_change(bn['cpu_time'], other_bench['cpu_time'])
266            })
267
268        # After processing the whole partition, if requested, do the U test.
269        if utest:
270            timings_cpu = extract_field(partition, 'cpu_time')
271            timings_time = extract_field(partition, 'real_time')
272            have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(
273                timings_cpu, timings_time)
274            if cpu_pvalue and time_pvalue:
275                utest_results = {
276                    'have_optimal_repetitions': have_optimal_repetitions,
277                    'cpu_pvalue': cpu_pvalue,
278                    'time_pvalue': time_pvalue,
279                    'nr_of_repetitions': len(timings_cpu[0]),
280                    'nr_of_repetitions_other': len(timings_cpu[1])
281                }
282
283        # Store only if we had any measurements for given benchmark.
284        # E.g. partition_benchmarks will filter out the benchmarks having
285        # time units which are not compatible with other time units in the
286        # benchmark suite.
287        if measurements:
288            run_type = partition[0][0]['run_type'] if 'run_type' in partition[0][0] else ''
289            aggregate_name = partition[0][0]['aggregate_name'] if run_type == 'aggregate' and 'aggregate_name' in partition[0][0] else ''
290            diff_report.append({
291                'name': benchmark_name,
292                'measurements': measurements,
293                'time_unit': time_unit,
294                'run_type': run_type,
295                'aggregate_name': aggregate_name,
296                'utest': utest_results
297            })
298
299    lhs_gmean = calculate_geomean(json1)
300    rhs_gmean = calculate_geomean(json2)
301    if lhs_gmean.any() and rhs_gmean.any():
302        diff_report.append({
303            'name': 'OVERALL_GEOMEAN',
304            'measurements': [{
305                'real_time': lhs_gmean[0],
306                'cpu_time': lhs_gmean[1],
307                'real_time_other': rhs_gmean[0],
308                'cpu_time_other': rhs_gmean[1],
309                'time': calculate_change(lhs_gmean[0], rhs_gmean[0]),
310                'cpu': calculate_change(lhs_gmean[1], rhs_gmean[1])
311            }],
312            'time_unit': 's',
313            'run_type': 'aggregate',
314            'aggregate_name': 'geomean',
315            'utest': {}
316        })
317
318    return diff_report
319
320
321def print_difference_report(
322        json_diff_report,
323        include_aggregates_only=False,
324        utest=False,
325        utest_alpha=0.05,
326        use_color=True):
327    """
328    Calculate and report the difference between each test of two benchmarks
329    runs specified as 'json1' and 'json2'.
330    """
331    assert utest is True or utest is False
332
333    def get_color(res):
334        if res > 0.05:
335            return BC_FAIL
336        elif res > -0.07:
337            return BC_WHITE
338        else:
339            return BC_CYAN
340
341    first_col_width = find_longest_name(json_diff_report)
342    first_col_width = max(
343        first_col_width,
344        len('Benchmark'))
345    first_col_width += len(UTEST_COL_NAME)
346    first_line = "{:<{}s}Time             CPU      Time Old      Time New       CPU Old       CPU New".format(
347        'Benchmark', 12 + first_col_width)
348    output_strs = [first_line, '-' * len(first_line)]
349
350    fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
351    for benchmark in json_diff_report:
352        # *If* we were asked to only include aggregates,
353        # and if it is non-aggregate, then don't print it.
354        if not include_aggregates_only or not 'run_type' in benchmark or benchmark['run_type'] == 'aggregate':
355            for measurement in benchmark['measurements']:
356                output_strs += [color_format(use_color,
357                                             fmt_str,
358                                             BC_HEADER,
359                                             benchmark['name'],
360                                             first_col_width,
361                                             get_color(measurement['time']),
362                                             measurement['time'],
363                                             get_color(measurement['cpu']),
364                                             measurement['cpu'],
365                                             measurement['real_time'],
366                                             measurement['real_time_other'],
367                                             measurement['cpu_time'],
368                                             measurement['cpu_time_other'],
369                                             endc=BC_ENDC)]
370
371        # After processing the measurements, if requested and
372        # if applicable (e.g. u-test exists for given benchmark),
373        # print the U test.
374        if utest and benchmark['utest']:
375            output_strs += print_utest(benchmark['name'],
376                                       benchmark['utest'],
377                                       utest_alpha=utest_alpha,
378                                       first_col_width=first_col_width,
379                                       use_color=use_color)
380
381    return output_strs
382
383
384###############################################################################
385# Unit tests
386
387
388class TestGetUniqueBenchmarkNames(unittest.TestCase):
389    def load_results(self):
390        import json
391        testInputs = os.path.join(
392            os.path.dirname(
393                os.path.realpath(__file__)),
394            'Inputs')
395        testOutput = os.path.join(testInputs, 'test3_run0.json')
396        with open(testOutput, 'r') as f:
397            json = json.load(f)
398        return json
399
400    def test_basic(self):
401        expect_lines = [
402            'BM_One',
403            'BM_Two',
404            'short',  # These two are not sorted
405            'medium',  # These two are not sorted
406        ]
407        json = self.load_results()
408        output_lines = get_unique_benchmark_names(json)
409        print("\n")
410        print("\n".join(output_lines))
411        self.assertEqual(len(output_lines), len(expect_lines))
412        for i in range(0, len(output_lines)):
413            self.assertEqual(expect_lines[i], output_lines[i])
414
415
416class TestReportDifference(unittest.TestCase):
417    @classmethod
418    def setUpClass(cls):
419        def load_results():
420            import json
421            testInputs = os.path.join(
422                os.path.dirname(
423                    os.path.realpath(__file__)),
424                'Inputs')
425            testOutput1 = os.path.join(testInputs, 'test1_run1.json')
426            testOutput2 = os.path.join(testInputs, 'test1_run2.json')
427            with open(testOutput1, 'r') as f:
428                json1 = json.load(f)
429            with open(testOutput2, 'r') as f:
430                json2 = json.load(f)
431            return json1, json2
432
433        json1, json2 = load_results()
434        cls.json_diff_report = get_difference_report(json1, json2)
435
436    def test_json_diff_report_pretty_printing(self):
437        expect_lines = [
438            ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'],
439            ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'],
440            ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'],
441            ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'],
442            ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'],
443            ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'],
444            ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'],
445            ['BM_100xSlower', '+99.0000', '+99.0000',
446                '100', '10000', '100', '10000'],
447            ['BM_100xFaster', '-0.9900', '-0.9900',
448                '10000', '100', '10000', '100'],
449            ['BM_10PercentCPUToTime', '+0.1000',
450                '-0.1000', '100', '110', '100', '90'],
451            ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'],
452            ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'],
453            ['OVERALL_GEOMEAN', '-0.8344', '-0.8026', '0', '0', '0', '0']
454        ]
455        output_lines_with_header = print_difference_report(
456            self.json_diff_report, use_color=False)
457        output_lines = output_lines_with_header[2:]
458        print("\n")
459        print("\n".join(output_lines_with_header))
460        self.assertEqual(len(output_lines), len(expect_lines))
461        for i in range(0, len(output_lines)):
462            parts = [x for x in output_lines[i].split(' ') if x]
463            self.assertEqual(len(parts), 7)
464            self.assertEqual(expect_lines[i], parts)
465
466    def test_json_diff_report_output(self):
467        expected_output = [
468            {
469                'name': 'BM_SameTimes',
470                'measurements': [{'time': 0.0000, 'cpu': 0.0000, 'real_time': 10, 'real_time_other': 10, 'cpu_time': 10, 'cpu_time_other': 10}],
471                'time_unit': 'ns',
472                'utest': {}
473            },
474            {
475                'name': 'BM_2xFaster',
476                'measurements': [{'time': -0.5000, 'cpu': -0.5000, 'real_time': 50, 'real_time_other': 25, 'cpu_time': 50, 'cpu_time_other': 25}],
477                'time_unit': 'ns',
478                'utest': {}
479            },
480            {
481                'name': 'BM_2xSlower',
482                'measurements': [{'time': 1.0000, 'cpu': 1.0000, 'real_time': 50, 'real_time_other': 100, 'cpu_time': 50, 'cpu_time_other': 100}],
483                'time_unit': 'ns',
484                'utest': {}
485            },
486            {
487                'name': 'BM_1PercentFaster',
488                'measurements': [{'time': -0.0100, 'cpu': -0.0100, 'real_time': 100, 'real_time_other': 98.9999999, 'cpu_time': 100, 'cpu_time_other': 98.9999999}],
489                'time_unit': 'ns',
490                'utest': {}
491            },
492            {
493                'name': 'BM_1PercentSlower',
494                'measurements': [{'time': 0.0100, 'cpu': 0.0100, 'real_time': 100, 'real_time_other': 101, 'cpu_time': 100, 'cpu_time_other': 101}],
495                'time_unit': 'ns',
496                'utest': {}
497            },
498            {
499                'name': 'BM_10PercentFaster',
500                'measurements': [{'time': -0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 90, 'cpu_time': 100, 'cpu_time_other': 90}],
501                'time_unit': 'ns',
502                'utest': {}
503            },
504            {
505                'name': 'BM_10PercentSlower',
506                'measurements': [{'time': 0.1000, 'cpu': 0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 110}],
507                'time_unit': 'ns',
508                'utest': {}
509            },
510            {
511                'name': 'BM_100xSlower',
512                'measurements': [{'time': 99.0000, 'cpu': 99.0000, 'real_time': 100, 'real_time_other': 10000, 'cpu_time': 100, 'cpu_time_other': 10000}],
513                'time_unit': 'ns',
514                'utest': {}
515            },
516            {
517                'name': 'BM_100xFaster',
518                'measurements': [{'time': -0.9900, 'cpu': -0.9900, 'real_time': 10000, 'real_time_other': 100, 'cpu_time': 10000, 'cpu_time_other': 100}],
519                'time_unit': 'ns',
520                'utest': {}
521            },
522            {
523                'name': 'BM_10PercentCPUToTime',
524                'measurements': [{'time': 0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 90}],
525                'time_unit': 'ns',
526                'utest': {}
527            },
528            {
529                'name': 'BM_ThirdFaster',
530                'measurements': [{'time': -0.3333, 'cpu': -0.3334, 'real_time': 100, 'real_time_other': 67, 'cpu_time': 100, 'cpu_time_other': 67}],
531                'time_unit': 'ns',
532                'utest': {}
533            },
534            {
535                'name': 'BM_NotBadTimeUnit',
536                'measurements': [{'time': -0.9000, 'cpu': 0.2000, 'real_time': 0.4, 'real_time_other': 0.04, 'cpu_time': 0.5, 'cpu_time_other': 0.6}],
537                'time_unit': 's',
538                'utest': {}
539            },
540            {
541                'name': 'OVERALL_GEOMEAN',
542                'measurements': [{'real_time': 1.193776641714438e-06, 'cpu_time': 1.2144445585302297e-06,
543                                  'real_time_other': 1.9768988699420897e-07, 'cpu_time_other': 2.397447755209533e-07,
544                                  'time': -0.834399601997324, 'cpu': -0.8025889499549471}],
545                'time_unit': 's',
546                'run_type': 'aggregate',
547                'aggregate_name': 'geomean', 'utest': {}
548            },
549        ]
550        self.assertEqual(len(self.json_diff_report), len(expected_output))
551        for out, expected in zip(
552                self.json_diff_report, expected_output):
553            self.assertEqual(out['name'], expected['name'])
554            self.assertEqual(out['time_unit'], expected['time_unit'])
555            assert_utest(self, out, expected)
556            assert_measurements(self, out, expected)
557
558
559class TestReportDifferenceBetweenFamilies(unittest.TestCase):
560    @classmethod
561    def setUpClass(cls):
562        def load_result():
563            import json
564            testInputs = os.path.join(
565                os.path.dirname(
566                    os.path.realpath(__file__)),
567                'Inputs')
568            testOutput = os.path.join(testInputs, 'test2_run.json')
569            with open(testOutput, 'r') as f:
570                json = json.load(f)
571            return json
572
573        json = load_result()
574        json1 = filter_benchmark(json, "BM_Z.ro", ".")
575        json2 = filter_benchmark(json, "BM_O.e", ".")
576        cls.json_diff_report = get_difference_report(json1, json2)
577
578    def test_json_diff_report_pretty_printing(self):
579        expect_lines = [
580            ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'],
581            ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'],
582            ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'],
583            ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'],
584            ['OVERALL_GEOMEAN', '-0.5000', '-0.5000', '0', '0', '0', '0']
585        ]
586        output_lines_with_header = print_difference_report(
587            self.json_diff_report, use_color=False)
588        output_lines = output_lines_with_header[2:]
589        print("\n")
590        print("\n".join(output_lines_with_header))
591        self.assertEqual(len(output_lines), len(expect_lines))
592        for i in range(0, len(output_lines)):
593            parts = [x for x in output_lines[i].split(' ') if x]
594            self.assertEqual(len(parts), 7)
595            self.assertEqual(expect_lines[i], parts)
596
597    def test_json_diff_report(self):
598        expected_output = [
599            {
600                'name': u'.',
601                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 10, 'real_time_other': 5, 'cpu_time': 10, 'cpu_time_other': 5}],
602                'time_unit': 'ns',
603                'utest': {}
604            },
605            {
606                'name': u'./4',
607                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 40, 'real_time_other': 20, 'cpu_time': 40, 'cpu_time_other': 20}],
608                'time_unit': 'ns',
609                'utest': {},
610            },
611            {
612                'name': u'Prefix/.',
613                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 20, 'real_time_other': 10, 'cpu_time': 20, 'cpu_time_other': 10}],
614                'time_unit': 'ns',
615                'utest': {}
616            },
617            {
618                'name': u'Prefix/./3',
619                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 30, 'real_time_other': 15, 'cpu_time': 30, 'cpu_time_other': 15}],
620                'time_unit': 'ns',
621                'utest': {}
622            },
623            {
624                'name': 'OVERALL_GEOMEAN',
625                'measurements': [{'real_time': 2.213363839400641e-08, 'cpu_time': 2.213363839400641e-08,
626                                  'real_time_other': 1.1066819197003185e-08, 'cpu_time_other': 1.1066819197003185e-08,
627                                  'time': -0.5000000000000009, 'cpu': -0.5000000000000009}],
628                'time_unit': 's',
629                'run_type': 'aggregate',
630                'aggregate_name': 'geomean',
631                'utest': {}
632            }
633        ]
634        self.assertEqual(len(self.json_diff_report), len(expected_output))
635        for out, expected in zip(
636                self.json_diff_report, expected_output):
637            self.assertEqual(out['name'], expected['name'])
638            self.assertEqual(out['time_unit'], expected['time_unit'])
639            assert_utest(self, out, expected)
640            assert_measurements(self, out, expected)
641
642
643class TestReportDifferenceWithUTest(unittest.TestCase):
644    @classmethod
645    def setUpClass(cls):
646        def load_results():
647            import json
648            testInputs = os.path.join(
649                os.path.dirname(
650                    os.path.realpath(__file__)),
651                'Inputs')
652            testOutput1 = os.path.join(testInputs, 'test3_run0.json')
653            testOutput2 = os.path.join(testInputs, 'test3_run1.json')
654            with open(testOutput1, 'r') as f:
655                json1 = json.load(f)
656            with open(testOutput2, 'r') as f:
657                json2 = json.load(f)
658            return json1, json2
659
660        json1, json2 = load_results()
661        cls.json_diff_report = get_difference_report(
662            json1, json2, utest=True)
663
664    def test_json_diff_report_pretty_printing(self):
665        expect_lines = [
666            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
667            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
668            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
669            ['BM_Two_pvalue',
670             '1.0000',
671             '0.6667',
672             'U',
673             'Test,',
674             'Repetitions:',
675             '2',
676             'vs',
677             '2.',
678             'WARNING:',
679             'Results',
680             'unreliable!',
681             '9+',
682             'repetitions',
683             'recommended.'],
684            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
685            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
686            ['short_pvalue',
687             '0.7671',
688             '0.2000',
689             'U',
690             'Test,',
691             'Repetitions:',
692             '2',
693             'vs',
694             '3.',
695             'WARNING:',
696             'Results',
697             'unreliable!',
698             '9+',
699             'repetitions',
700             'recommended.'],
701            ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'],
702            ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0']
703        ]
704        output_lines_with_header = print_difference_report(
705            self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False)
706        output_lines = output_lines_with_header[2:]
707        print("\n")
708        print("\n".join(output_lines_with_header))
709        self.assertEqual(len(output_lines), len(expect_lines))
710        for i in range(0, len(output_lines)):
711            parts = [x for x in output_lines[i].split(' ') if x]
712            self.assertEqual(expect_lines[i], parts)
713
714    def test_json_diff_report_pretty_printing_aggregates_only(self):
715        expect_lines = [
716            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
717            ['BM_Two_pvalue',
718             '1.0000',
719             '0.6667',
720             'U',
721             'Test,',
722             'Repetitions:',
723             '2',
724             'vs',
725             '2.',
726             'WARNING:',
727             'Results',
728             'unreliable!',
729             '9+',
730             'repetitions',
731             'recommended.'],
732            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
733            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
734            ['short_pvalue',
735             '0.7671',
736             '0.2000',
737             'U',
738             'Test,',
739             'Repetitions:',
740             '2',
741             'vs',
742             '3.',
743             'WARNING:',
744             'Results',
745             'unreliable!',
746             '9+',
747             'repetitions',
748             'recommended.'],
749            ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0']
750        ]
751        output_lines_with_header = print_difference_report(
752            self.json_diff_report, include_aggregates_only=True, utest=True, utest_alpha=0.05, use_color=False)
753        output_lines = output_lines_with_header[2:]
754        print("\n")
755        print("\n".join(output_lines_with_header))
756        self.assertEqual(len(output_lines), len(expect_lines))
757        for i in range(0, len(output_lines)):
758            parts = [x for x in output_lines[i].split(' ') if x]
759            self.assertEqual(expect_lines[i], parts)
760
761    def test_json_diff_report(self):
762        expected_output = [
763            {
764                'name': u'BM_One',
765                'measurements': [
766                    {'time': -0.1,
767                     'cpu': 0.1,
768                     'real_time': 10,
769                     'real_time_other': 9,
770                     'cpu_time': 100,
771                     'cpu_time_other': 110}
772                ],
773                'time_unit': 'ns',
774                'utest': {}
775            },
776            {
777                'name': u'BM_Two',
778                'measurements': [
779                    {'time': 0.1111111111111111,
780                     'cpu': -0.011111111111111112,
781                     'real_time': 9,
782                     'real_time_other': 10,
783                     'cpu_time': 90,
784                     'cpu_time_other': 89},
785                    {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8,
786                        'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72}
787                ],
788                'time_unit': 'ns',
789                'utest': {
790                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0
791                }
792            },
793            {
794                'name': u'short',
795                'measurements': [
796                    {'time': -0.125,
797                     'cpu': -0.0625,
798                     'real_time': 8,
799                     'real_time_other': 7,
800                     'cpu_time': 80,
801                     'cpu_time_other': 75},
802                    {'time': -0.4325,
803                     'cpu': -0.13506493506493514,
804                     'real_time': 8,
805                     'real_time_other': 4.54,
806                     'cpu_time': 77,
807                     'cpu_time_other': 66.6}
808                ],
809                'time_unit': 'ns',
810                'utest': {
811                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772
812                }
813            },
814            {
815                'name': u'medium',
816                'measurements': [
817                    {'time': -0.375,
818                     'cpu': -0.3375,
819                     'real_time': 8,
820                     'real_time_other': 5,
821                     'cpu_time': 80,
822                     'cpu_time_other': 53}
823                ],
824                'time_unit': 'ns',
825                'utest': {}
826            },
827            {
828                'name': 'OVERALL_GEOMEAN',
829                'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08,
830                                  'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08,
831                                  'time': 1.6404861082353634, 'cpu': -0.6984640740519662}],
832                'time_unit': 's',
833                'run_type': 'aggregate',
834                'aggregate_name': 'geomean',
835                'utest': {}
836            }
837        ]
838        self.assertEqual(len(self.json_diff_report), len(expected_output))
839        for out, expected in zip(
840                self.json_diff_report, expected_output):
841            self.assertEqual(out['name'], expected['name'])
842            self.assertEqual(out['time_unit'], expected['time_unit'])
843            assert_utest(self, out, expected)
844            assert_measurements(self, out, expected)
845
846
847class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
848        unittest.TestCase):
849    @classmethod
850    def setUpClass(cls):
851        def load_results():
852            import json
853            testInputs = os.path.join(
854                os.path.dirname(
855                    os.path.realpath(__file__)),
856                'Inputs')
857            testOutput1 = os.path.join(testInputs, 'test3_run0.json')
858            testOutput2 = os.path.join(testInputs, 'test3_run1.json')
859            with open(testOutput1, 'r') as f:
860                json1 = json.load(f)
861            with open(testOutput2, 'r') as f:
862                json2 = json.load(f)
863            return json1, json2
864
865        json1, json2 = load_results()
866        cls.json_diff_report = get_difference_report(
867            json1, json2, utest=True)
868
869    def test_json_diff_report_pretty_printing(self):
870        expect_lines = [
871            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
872            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
873            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
874            ['BM_Two_pvalue',
875             '1.0000',
876             '0.6667',
877             'U',
878             'Test,',
879             'Repetitions:',
880             '2',
881             'vs',
882             '2.',
883             'WARNING:',
884             'Results',
885             'unreliable!',
886             '9+',
887             'repetitions',
888             'recommended.'],
889            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
890            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
891            ['short_pvalue',
892             '0.7671',
893             '0.2000',
894             'U',
895             'Test,',
896             'Repetitions:',
897             '2',
898             'vs',
899             '3.',
900             'WARNING:',
901             'Results',
902             'unreliable!',
903             '9+',
904             'repetitions',
905             'recommended.'],
906            ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'],
907            ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0']
908        ]
909        output_lines_with_header = print_difference_report(
910            self.json_diff_report,
911            utest=True, utest_alpha=0.05, use_color=False)
912        output_lines = output_lines_with_header[2:]
913        print("\n")
914        print("\n".join(output_lines_with_header))
915        self.assertEqual(len(output_lines), len(expect_lines))
916        for i in range(0, len(output_lines)):
917            parts = [x for x in output_lines[i].split(' ') if x]
918            self.assertEqual(expect_lines[i], parts)
919
920    def test_json_diff_report(self):
921        expected_output = [
922            {
923                'name': u'BM_One',
924                'measurements': [
925                    {'time': -0.1,
926                     'cpu': 0.1,
927                     'real_time': 10,
928                     'real_time_other': 9,
929                     'cpu_time': 100,
930                     'cpu_time_other': 110}
931                ],
932                'time_unit': 'ns',
933                'utest': {}
934            },
935            {
936                'name': u'BM_Two',
937                'measurements': [
938                    {'time': 0.1111111111111111,
939                     'cpu': -0.011111111111111112,
940                     'real_time': 9,
941                     'real_time_other': 10,
942                     'cpu_time': 90,
943                     'cpu_time_other': 89},
944                    {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8,
945                        'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72}
946                ],
947                'time_unit': 'ns',
948                'utest': {
949                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0
950                }
951            },
952            {
953                'name': u'short',
954                'measurements': [
955                    {'time': -0.125,
956                     'cpu': -0.0625,
957                     'real_time': 8,
958                     'real_time_other': 7,
959                     'cpu_time': 80,
960                     'cpu_time_other': 75},
961                    {'time': -0.4325,
962                     'cpu': -0.13506493506493514,
963                     'real_time': 8,
964                     'real_time_other': 4.54,
965                     'cpu_time': 77,
966                     'cpu_time_other': 66.6}
967                ],
968                'time_unit': 'ns',
969                'utest': {
970                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772
971                }
972            },
973            {
974                'name': u'medium',
975                'measurements': [
976                    {'real_time_other': 5,
977                     'cpu_time': 80,
978                     'time': -0.375,
979                     'real_time': 8,
980                     'cpu_time_other': 53,
981                     'cpu': -0.3375
982                     }
983                ],
984                'utest': {},
985                'time_unit': u'ns',
986                'aggregate_name': ''
987            },
988            {
989                'name': 'OVERALL_GEOMEAN',
990                'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08,
991                                  'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08,
992                                  'time': 1.6404861082353634, 'cpu': -0.6984640740519662}],
993                'time_unit': 's',
994                'run_type': 'aggregate',
995                'aggregate_name': 'geomean',
996                'utest': {}
997            }
998        ]
999        self.assertEqual(len(self.json_diff_report), len(expected_output))
1000        for out, expected in zip(
1001                self.json_diff_report, expected_output):
1002            self.assertEqual(out['name'], expected['name'])
1003            self.assertEqual(out['time_unit'], expected['time_unit'])
1004            assert_utest(self, out, expected)
1005            assert_measurements(self, out, expected)
1006
1007
1008class TestReportDifferenceForPercentageAggregates(
1009        unittest.TestCase):
1010    @classmethod
1011    def setUpClass(cls):
1012        def load_results():
1013            import json
1014            testInputs = os.path.join(
1015                os.path.dirname(
1016                    os.path.realpath(__file__)),
1017                'Inputs')
1018            testOutput1 = os.path.join(testInputs, 'test4_run0.json')
1019            testOutput2 = os.path.join(testInputs, 'test4_run1.json')
1020            with open(testOutput1, 'r') as f:
1021                json1 = json.load(f)
1022            with open(testOutput2, 'r') as f:
1023                json2 = json.load(f)
1024            return json1, json2
1025
1026        json1, json2 = load_results()
1027        cls.json_diff_report = get_difference_report(
1028            json1, json2, utest=True)
1029
1030    def test_json_diff_report_pretty_printing(self):
1031        expect_lines = [
1032            ['whocares', '-0.5000', '+0.5000', '0', '0', '0', '0']
1033        ]
1034        output_lines_with_header = print_difference_report(
1035            self.json_diff_report,
1036            utest=True, utest_alpha=0.05, use_color=False)
1037        output_lines = output_lines_with_header[2:]
1038        print("\n")
1039        print("\n".join(output_lines_with_header))
1040        self.assertEqual(len(output_lines), len(expect_lines))
1041        for i in range(0, len(output_lines)):
1042            parts = [x for x in output_lines[i].split(' ') if x]
1043            self.assertEqual(expect_lines[i], parts)
1044
1045    def test_json_diff_report(self):
1046        expected_output = [
1047            {
1048                'name': u'whocares',
1049                'measurements': [
1050                    {'time': -0.5,
1051                     'cpu': 0.5,
1052                     'real_time': 0.01,
1053                     'real_time_other': 0.005,
1054                     'cpu_time': 0.10,
1055                     'cpu_time_other': 0.15}
1056                ],
1057                'time_unit': 'ns',
1058                'utest': {}
1059            }
1060        ]
1061        self.assertEqual(len(self.json_diff_report), len(expected_output))
1062        for out, expected in zip(
1063                self.json_diff_report, expected_output):
1064            self.assertEqual(out['name'], expected['name'])
1065            self.assertEqual(out['time_unit'], expected['time_unit'])
1066            assert_utest(self, out, expected)
1067            assert_measurements(self, out, expected)
1068
1069
1070class TestReportSorting(unittest.TestCase):
1071    @classmethod
1072    def setUpClass(cls):
1073        def load_result():
1074            import json
1075            testInputs = os.path.join(
1076                os.path.dirname(
1077                    os.path.realpath(__file__)),
1078                'Inputs')
1079            testOutput = os.path.join(testInputs, 'test4_run.json')
1080            with open(testOutput, 'r') as f:
1081                json = json.load(f)
1082            return json
1083
1084        cls.json = load_result()
1085
1086    def test_json_diff_report_pretty_printing(self):
1087        import util
1088
1089        expected_names = [
1090            "99 family 0 instance 0 repetition 0",
1091            "98 family 0 instance 0 repetition 1",
1092            "97 family 0 instance 0 aggregate",
1093            "96 family 0 instance 1 repetition 0",
1094            "95 family 0 instance 1 repetition 1",
1095            "94 family 0 instance 1 aggregate",
1096            "93 family 1 instance 0 repetition 0",
1097            "92 family 1 instance 0 repetition 1",
1098            "91 family 1 instance 0 aggregate",
1099            "90 family 1 instance 1 repetition 0",
1100            "89 family 1 instance 1 repetition 1",
1101            "88 family 1 instance 1 aggregate"
1102        ]
1103
1104        for n in range(len(self.json['benchmarks']) ** 2):
1105            random.shuffle(self.json['benchmarks'])
1106            sorted_benchmarks = util.sort_benchmark_results(self.json)[
1107                'benchmarks']
1108            self.assertEqual(len(expected_names), len(sorted_benchmarks))
1109            for out, expected in zip(sorted_benchmarks, expected_names):
1110                self.assertEqual(out['name'], expected)
1111
1112
1113def assert_utest(unittest_instance, lhs, rhs):
1114    if lhs['utest']:
1115        unittest_instance.assertAlmostEqual(
1116            lhs['utest']['cpu_pvalue'],
1117            rhs['utest']['cpu_pvalue'])
1118        unittest_instance.assertAlmostEqual(
1119            lhs['utest']['time_pvalue'],
1120            rhs['utest']['time_pvalue'])
1121        unittest_instance.assertEqual(
1122            lhs['utest']['have_optimal_repetitions'],
1123            rhs['utest']['have_optimal_repetitions'])
1124    else:
1125        # lhs is empty. assert if rhs is not.
1126        unittest_instance.assertEqual(lhs['utest'], rhs['utest'])
1127
1128
1129def assert_measurements(unittest_instance, lhs, rhs):
1130    for m1, m2 in zip(lhs['measurements'], rhs['measurements']):
1131        unittest_instance.assertEqual(m1['real_time'], m2['real_time'])
1132        unittest_instance.assertEqual(m1['cpu_time'], m2['cpu_time'])
1133        # m1['time'] and m1['cpu'] hold values which are being calculated,
1134        # and therefore we must use almost-equal pattern.
1135        unittest_instance.assertAlmostEqual(m1['time'], m2['time'], places=4)
1136        unittest_instance.assertAlmostEqual(m1['cpu'], m2['cpu'], places=4)
1137
1138
1139if __name__ == '__main__':
1140    unittest.main()
1141
1142# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
1143# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
1144# kate: indent-mode python; remove-trailing-spaces modified;
1145