• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import unittest
2"""report.py - Utilities for reporting statistics about benchmark results
3"""
4import os
5import re
6import copy
7
8from scipy.stats import mannwhitneyu
9
10
11class BenchmarkColor(object):
12    def __init__(self, name, code):
13        self.name = name
14        self.code = code
15
16    def __repr__(self):
17        return '%s%r' % (self.__class__.__name__,
18                         (self.name, self.code))
19
20    def __format__(self, format):
21        return self.code
22
23
24# Benchmark Colors Enumeration
25BC_NONE = BenchmarkColor('NONE', '')
26BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m')
27BC_CYAN = BenchmarkColor('CYAN', '\033[96m')
28BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m')
29BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m')
30BC_HEADER = BenchmarkColor('HEADER', '\033[92m')
31BC_WARNING = BenchmarkColor('WARNING', '\033[93m')
32BC_WHITE = BenchmarkColor('WHITE', '\033[97m')
33BC_FAIL = BenchmarkColor('FAIL', '\033[91m')
34BC_ENDC = BenchmarkColor('ENDC', '\033[0m')
35BC_BOLD = BenchmarkColor('BOLD', '\033[1m')
36BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
37
38UTEST_MIN_REPETITIONS = 2
39UTEST_OPTIMAL_REPETITIONS = 9  # Lowest reasonable number, More is better.
40UTEST_COL_NAME = "_pvalue"
41
42
43def color_format(use_color, fmt_str, *args, **kwargs):
44    """
45    Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
46    'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
47    is False then all color codes in 'args' and 'kwargs' are replaced with
48    the empty string.
49    """
50    assert use_color is True or use_color is False
51    if not use_color:
52        args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE
53                for arg in args]
54        kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
55                  for key, arg in kwargs.items()}
56    return fmt_str.format(*args, **kwargs)
57
58
59def find_longest_name(benchmark_list):
60    """
61    Return the length of the longest benchmark name in a given list of
62    benchmark JSON objects
63    """
64    longest_name = 1
65    for bc in benchmark_list:
66        if len(bc['name']) > longest_name:
67            longest_name = len(bc['name'])
68    return longest_name
69
70
71def calculate_change(old_val, new_val):
72    """
73    Return a float representing the decimal change between old_val and new_val.
74    """
75    if old_val == 0 and new_val == 0:
76        return 0.0
77    if old_val == 0:
78        return float(new_val - old_val) / (float(old_val + new_val) / 2)
79    return float(new_val - old_val) / abs(old_val)
80
81
82def filter_benchmark(json_orig, family, replacement=""):
83    """
84    Apply a filter to the json, and only leave the 'family' of benchmarks.
85    """
86    regex = re.compile(family)
87    filtered = {}
88    filtered['benchmarks'] = []
89    for be in json_orig['benchmarks']:
90        if not regex.search(be['name']):
91            continue
92        filteredbench = copy.deepcopy(be)  # Do NOT modify the old name!
93        filteredbench['name'] = regex.sub(replacement, filteredbench['name'])
94        filtered['benchmarks'].append(filteredbench)
95    return filtered
96
97
98def get_unique_benchmark_names(json):
99    """
100    While *keeping* the order, give all the unique 'names' used for benchmarks.
101    """
102    seen = set()
103    uniqued = [x['name'] for x in json['benchmarks']
104               if x['name'] not in seen and
105               (seen.add(x['name']) or True)]
106    return uniqued
107
108
109def intersect(list1, list2):
110    """
111    Given two lists, get a new list consisting of the elements only contained
112    in *both of the input lists*, while preserving the ordering.
113    """
114    return [x for x in list1 if x in list2]
115
116
117def is_potentially_comparable_benchmark(x):
118    return ('time_unit' in x and 'real_time' in x and 'cpu_time' in x)
119
120
121def partition_benchmarks(json1, json2):
122    """
123    While preserving the ordering, find benchmarks with the same names in
124    both of the inputs, and group them.
125    (i.e. partition/filter into groups with common name)
126    """
127    json1_unique_names = get_unique_benchmark_names(json1)
128    json2_unique_names = get_unique_benchmark_names(json2)
129    names = intersect(json1_unique_names, json2_unique_names)
130    partitions = []
131    for name in names:
132        time_unit = None
133        # Pick the time unit from the first entry of the lhs benchmark.
134        # We should be careful not to crash with unexpected input.
135        for x in json1['benchmarks']:
136            if (x['name'] == name and is_potentially_comparable_benchmark(x)):
137                time_unit = x['time_unit']
138                break
139        if time_unit is None:
140            continue
141        # Filter by name and time unit.
142        # All the repetitions are assumed to be comparable.
143        lhs = [x for x in json1['benchmarks'] if x['name'] == name and
144               x['time_unit'] == time_unit]
145        rhs = [x for x in json2['benchmarks'] if x['name'] == name and
146               x['time_unit'] == time_unit]
147        partitions.append([lhs, rhs])
148    return partitions
149
150
151def extract_field(partition, field_name):
152    # The count of elements may be different. We want *all* of them.
153    lhs = [x[field_name] for x in partition[0]]
154    rhs = [x[field_name] for x in partition[1]]
155    return [lhs, rhs]
156
157
158def calc_utest(timings_cpu, timings_time):
159    min_rep_cnt = min(len(timings_time[0]),
160                      len(timings_time[1]),
161                      len(timings_cpu[0]),
162                      len(timings_cpu[1]))
163
164    # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions?
165    if min_rep_cnt < UTEST_MIN_REPETITIONS:
166        return False, None, None
167
168    time_pvalue = mannwhitneyu(
169        timings_time[0], timings_time[1], alternative='two-sided').pvalue
170    cpu_pvalue = mannwhitneyu(
171        timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue
172
173    return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue
174
175def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True):
176    def get_utest_color(pval):
177        return BC_FAIL if pval >= utest_alpha else BC_OKGREEN
178
179    # Check if we failed miserably with minimum required repetitions for utest
180    if not utest['have_optimal_repetitions'] and utest['cpu_pvalue'] is None and utest['time_pvalue'] is None:
181        return []
182
183    dsc = "U Test, Repetitions: {} vs {}".format(
184        utest['nr_of_repetitions'], utest['nr_of_repetitions_other'])
185    dsc_color = BC_OKGREEN
186
187    # We still got some results to show but issue a warning about it.
188    if not utest['have_optimal_repetitions']:
189        dsc_color = BC_WARNING
190        dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format(
191            UTEST_OPTIMAL_REPETITIONS)
192
193    special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{}      {}"
194
195    return [color_format(use_color,
196                         special_str,
197                         BC_HEADER,
198                         "{}{}".format(bc_name, UTEST_COL_NAME),
199                         first_col_width,
200                         get_utest_color(
201                             utest['time_pvalue']), utest['time_pvalue'],
202                         get_utest_color(
203                             utest['cpu_pvalue']), utest['cpu_pvalue'],
204                         dsc_color, dsc,
205                         endc=BC_ENDC)]
206
207
208def get_difference_report(
209        json1,
210        json2,
211        utest=False):
212    """
213    Calculate and report the difference between each test of two benchmarks
214    runs specified as 'json1' and 'json2'. Output is another json containing
215    relevant details for each test run.
216    """
217    assert utest is True or utest is False
218
219    diff_report = []
220    partitions = partition_benchmarks(json1, json2)
221    for partition in partitions:
222        benchmark_name = partition[0][0]['name']
223        time_unit = partition[0][0]['time_unit']
224        measurements = []
225        utest_results = {}
226        # Careful, we may have different repetition count.
227        for i in range(min(len(partition[0]), len(partition[1]))):
228            bn = partition[0][i]
229            other_bench = partition[1][i]
230            measurements.append({
231                'real_time': bn['real_time'],
232                'cpu_time': bn['cpu_time'],
233                'real_time_other': other_bench['real_time'],
234                'cpu_time_other': other_bench['cpu_time'],
235                'time': calculate_change(bn['real_time'], other_bench['real_time']),
236                'cpu': calculate_change(bn['cpu_time'], other_bench['cpu_time'])
237            })
238
239        # After processing the whole partition, if requested, do the U test.
240        if utest:
241            timings_cpu = extract_field(partition, 'cpu_time')
242            timings_time = extract_field(partition, 'real_time')
243            have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(timings_cpu, timings_time)
244            if cpu_pvalue and time_pvalue:
245                utest_results = {
246                    'have_optimal_repetitions': have_optimal_repetitions,
247                    'cpu_pvalue': cpu_pvalue,
248                    'time_pvalue': time_pvalue,
249                    'nr_of_repetitions': len(timings_cpu[0]),
250                    'nr_of_repetitions_other': len(timings_cpu[1])
251                }
252
253        # Store only if we had any measurements for given benchmark.
254        # E.g. partition_benchmarks will filter out the benchmarks having
255        # time units which are not compatible with other time units in the
256        # benchmark suite.
257        if measurements:
258            run_type = partition[0][0]['run_type'] if 'run_type' in partition[0][0] else ''
259            aggregate_name = partition[0][0]['aggregate_name'] if run_type == 'aggregate' and 'aggregate_name' in partition[0][0] else ''
260            diff_report.append({
261                'name': benchmark_name,
262                'measurements': measurements,
263                'time_unit': time_unit,
264                'run_type': run_type,
265                'aggregate_name': aggregate_name,
266                'utest': utest_results
267            })
268
269    return diff_report
270
271
272def print_difference_report(
273        json_diff_report,
274        include_aggregates_only=False,
275        utest=False,
276        utest_alpha=0.05,
277        use_color=True):
278    """
279    Calculate and report the difference between each test of two benchmarks
280    runs specified as 'json1' and 'json2'.
281    """
282    assert utest is True or utest is False
283
284    def get_color(res):
285        if res > 0.05:
286            return BC_FAIL
287        elif res > -0.07:
288            return BC_WHITE
289        else:
290            return BC_CYAN
291
292    first_col_width = find_longest_name(json_diff_report)
293    first_col_width = max(
294        first_col_width,
295        len('Benchmark'))
296    first_col_width += len(UTEST_COL_NAME)
297    first_line = "{:<{}s}Time             CPU      Time Old      Time New       CPU Old       CPU New".format(
298        'Benchmark', 12 + first_col_width)
299    output_strs = [first_line, '-' * len(first_line)]
300
301    fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
302    for benchmark in json_diff_report:
303        # *If* we were asked to only include aggregates,
304        # and if it is non-aggregate, then skip it.
305        if include_aggregates_only and 'run_type' in benchmark:
306            if benchmark['run_type'] != 'aggregate':
307                continue
308
309        for measurement in benchmark['measurements']:
310            output_strs += [color_format(use_color,
311                                         fmt_str,
312                                         BC_HEADER,
313                                         benchmark['name'],
314                                         first_col_width,
315                                         get_color(measurement['time']),
316                                         measurement['time'],
317                                         get_color(measurement['cpu']),
318                                         measurement['cpu'],
319                                         measurement['real_time'],
320                                         measurement['real_time_other'],
321                                         measurement['cpu_time'],
322                                         measurement['cpu_time_other'],
323                                         endc=BC_ENDC)]
324
325        # After processing the measurements, if requested and
326        # if applicable (e.g. u-test exists for given benchmark),
327        # print the U test.
328        if utest and benchmark['utest']:
329            output_strs += print_utest(benchmark['name'],
330                                       benchmark['utest'],
331                                       utest_alpha=utest_alpha,
332                                       first_col_width=first_col_width,
333                                       use_color=use_color)
334
335    return output_strs
336
337
338###############################################################################
339# Unit tests
340
341
342class TestGetUniqueBenchmarkNames(unittest.TestCase):
343    def load_results(self):
344        import json
345        testInputs = os.path.join(
346            os.path.dirname(
347                os.path.realpath(__file__)),
348            'Inputs')
349        testOutput = os.path.join(testInputs, 'test3_run0.json')
350        with open(testOutput, 'r') as f:
351            json = json.load(f)
352        return json
353
354    def test_basic(self):
355        expect_lines = [
356            'BM_One',
357            'BM_Two',
358            'short',  # These two are not sorted
359            'medium',  # These two are not sorted
360        ]
361        json = self.load_results()
362        output_lines = get_unique_benchmark_names(json)
363        print("\n")
364        print("\n".join(output_lines))
365        self.assertEqual(len(output_lines), len(expect_lines))
366        for i in range(0, len(output_lines)):
367            self.assertEqual(expect_lines[i], output_lines[i])
368
369
370class TestReportDifference(unittest.TestCase):
371    @classmethod
372    def setUpClass(cls):
373        def load_results():
374            import json
375            testInputs = os.path.join(
376                os.path.dirname(
377                    os.path.realpath(__file__)),
378                'Inputs')
379            testOutput1 = os.path.join(testInputs, 'test1_run1.json')
380            testOutput2 = os.path.join(testInputs, 'test1_run2.json')
381            with open(testOutput1, 'r') as f:
382                json1 = json.load(f)
383            with open(testOutput2, 'r') as f:
384                json2 = json.load(f)
385            return json1, json2
386
387        json1, json2 = load_results()
388        cls.json_diff_report = get_difference_report(json1, json2)
389
390    def test_json_diff_report_pretty_printing(self):
391        expect_lines = [
392            ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'],
393            ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'],
394            ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'],
395            ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'],
396            ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'],
397            ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'],
398            ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'],
399            ['BM_100xSlower', '+99.0000', '+99.0000',
400                '100', '10000', '100', '10000'],
401            ['BM_100xFaster', '-0.9900', '-0.9900',
402                '10000', '100', '10000', '100'],
403            ['BM_10PercentCPUToTime', '+0.1000',
404                '-0.1000', '100', '110', '100', '90'],
405            ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'],
406            ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'],
407        ]
408        output_lines_with_header = print_difference_report(
409            self.json_diff_report, use_color=False)
410        output_lines = output_lines_with_header[2:]
411        print("\n")
412        print("\n".join(output_lines_with_header))
413        self.assertEqual(len(output_lines), len(expect_lines))
414        for i in range(0, len(output_lines)):
415            parts = [x for x in output_lines[i].split(' ') if x]
416            self.assertEqual(len(parts), 7)
417            self.assertEqual(expect_lines[i], parts)
418
419    def test_json_diff_report_output(self):
420        expected_output = [
421            {
422                'name': 'BM_SameTimes',
423                'measurements': [{'time': 0.0000, 'cpu': 0.0000, 'real_time': 10, 'real_time_other': 10, 'cpu_time': 10, 'cpu_time_other': 10}],
424                'time_unit': 'ns',
425                'utest': {}
426            },
427            {
428                'name': 'BM_2xFaster',
429                'measurements': [{'time': -0.5000, 'cpu': -0.5000, 'real_time': 50, 'real_time_other': 25, 'cpu_time': 50, 'cpu_time_other': 25}],
430                'time_unit': 'ns',
431                'utest': {}
432            },
433            {
434                'name': 'BM_2xSlower',
435                'measurements': [{'time': 1.0000, 'cpu': 1.0000, 'real_time': 50, 'real_time_other': 100, 'cpu_time': 50, 'cpu_time_other': 100}],
436                'time_unit': 'ns',
437                'utest': {}
438            },
439            {
440                'name': 'BM_1PercentFaster',
441                'measurements': [{'time': -0.0100, 'cpu': -0.0100, 'real_time': 100, 'real_time_other': 98.9999999, 'cpu_time': 100, 'cpu_time_other': 98.9999999}],
442                'time_unit': 'ns',
443                'utest': {}
444            },
445            {
446                'name': 'BM_1PercentSlower',
447                'measurements': [{'time': 0.0100, 'cpu': 0.0100, 'real_time': 100, 'real_time_other': 101, 'cpu_time': 100, 'cpu_time_other': 101}],
448                'time_unit': 'ns',
449                'utest': {}
450            },
451            {
452                'name': 'BM_10PercentFaster',
453                'measurements': [{'time': -0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 90, 'cpu_time': 100, 'cpu_time_other': 90}],
454                'time_unit': 'ns',
455                'utest': {}
456            },
457            {
458                'name': 'BM_10PercentSlower',
459                'measurements': [{'time': 0.1000, 'cpu': 0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 110}],
460                'time_unit': 'ns',
461                'utest': {}
462            },
463            {
464                'name': 'BM_100xSlower',
465                'measurements': [{'time': 99.0000, 'cpu': 99.0000, 'real_time': 100, 'real_time_other': 10000, 'cpu_time': 100, 'cpu_time_other': 10000}],
466                'time_unit': 'ns',
467                'utest': {}
468            },
469            {
470                'name': 'BM_100xFaster',
471                'measurements': [{'time': -0.9900, 'cpu': -0.9900, 'real_time': 10000, 'real_time_other': 100, 'cpu_time': 10000, 'cpu_time_other': 100}],
472                'time_unit': 'ns',
473                'utest': {}
474            },
475            {
476                'name': 'BM_10PercentCPUToTime',
477                'measurements': [{'time': 0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 90}],
478                'time_unit': 'ns',
479                'utest': {}
480            },
481            {
482                'name': 'BM_ThirdFaster',
483                'measurements': [{'time': -0.3333, 'cpu': -0.3334, 'real_time': 100, 'real_time_other': 67, 'cpu_time': 100, 'cpu_time_other': 67}],
484                'time_unit': 'ns',
485                'utest': {}
486            },
487            {
488                'name': 'BM_NotBadTimeUnit',
489                'measurements': [{'time': -0.9000, 'cpu': 0.2000, 'real_time': 0.4, 'real_time_other': 0.04, 'cpu_time': 0.5, 'cpu_time_other': 0.6}],
490                'time_unit': 's',
491                'utest': {}
492            },
493        ]
494        self.assertEqual(len(self.json_diff_report), len(expected_output))
495        for out, expected in zip(
496                self.json_diff_report, expected_output):
497            self.assertEqual(out['name'], expected['name'])
498            self.assertEqual(out['time_unit'], expected['time_unit'])
499            assert_utest(self, out, expected)
500            assert_measurements(self, out, expected)
501
502
503class TestReportDifferenceBetweenFamilies(unittest.TestCase):
504    @classmethod
505    def setUpClass(cls):
506        def load_result():
507            import json
508            testInputs = os.path.join(
509                os.path.dirname(
510                    os.path.realpath(__file__)),
511                'Inputs')
512            testOutput = os.path.join(testInputs, 'test2_run.json')
513            with open(testOutput, 'r') as f:
514                json = json.load(f)
515            return json
516
517        json = load_result()
518        json1 = filter_benchmark(json, "BM_Z.ro", ".")
519        json2 = filter_benchmark(json, "BM_O.e", ".")
520        cls.json_diff_report = get_difference_report(json1, json2)
521
522    def test_json_diff_report_pretty_printing(self):
523        expect_lines = [
524            ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'],
525            ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'],
526            ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'],
527            ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'],
528        ]
529        output_lines_with_header = print_difference_report(
530            self.json_diff_report, use_color=False)
531        output_lines = output_lines_with_header[2:]
532        print("\n")
533        print("\n".join(output_lines_with_header))
534        self.assertEqual(len(output_lines), len(expect_lines))
535        for i in range(0, len(output_lines)):
536            parts = [x for x in output_lines[i].split(' ') if x]
537            self.assertEqual(len(parts), 7)
538            self.assertEqual(expect_lines[i], parts)
539
540    def test_json_diff_report(self):
541        expected_output = [
542            {
543                'name': u'.',
544                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 10, 'real_time_other': 5, 'cpu_time': 10, 'cpu_time_other': 5}],
545                'time_unit': 'ns',
546                'utest': {}
547            },
548            {
549                'name': u'./4',
550                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 40, 'real_time_other': 20, 'cpu_time': 40, 'cpu_time_other': 20}],
551                'time_unit': 'ns',
552                'utest': {},
553            },
554            {
555                'name': u'Prefix/.',
556                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 20, 'real_time_other': 10, 'cpu_time': 20, 'cpu_time_other': 10}],
557                'time_unit': 'ns',
558                'utest': {}
559            },
560            {
561                'name': u'Prefix/./3',
562                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 30, 'real_time_other': 15, 'cpu_time': 30, 'cpu_time_other': 15}],
563                'time_unit': 'ns',
564                'utest': {}
565            }
566        ]
567        self.assertEqual(len(self.json_diff_report), len(expected_output))
568        for out, expected in zip(
569                self.json_diff_report, expected_output):
570            self.assertEqual(out['name'], expected['name'])
571            self.assertEqual(out['time_unit'], expected['time_unit'])
572            assert_utest(self, out, expected)
573            assert_measurements(self, out, expected)
574
575
576class TestReportDifferenceWithUTest(unittest.TestCase):
577    @classmethod
578    def setUpClass(cls):
579        def load_results():
580            import json
581            testInputs = os.path.join(
582                os.path.dirname(
583                    os.path.realpath(__file__)),
584                'Inputs')
585            testOutput1 = os.path.join(testInputs, 'test3_run0.json')
586            testOutput2 = os.path.join(testInputs, 'test3_run1.json')
587            with open(testOutput1, 'r') as f:
588                json1 = json.load(f)
589            with open(testOutput2, 'r') as f:
590                json2 = json.load(f)
591            return json1, json2
592
593        json1, json2 = load_results()
594        cls.json_diff_report = get_difference_report(
595            json1, json2, utest=True)
596
597    def test_json_diff_report_pretty_printing(self):
598        expect_lines = [
599            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
600            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
601            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
602            ['BM_Two_pvalue',
603             '0.6985',
604             '0.6985',
605             'U',
606             'Test,',
607             'Repetitions:',
608             '2',
609             'vs',
610             '2.',
611             'WARNING:',
612             'Results',
613             'unreliable!',
614             '9+',
615             'repetitions',
616             'recommended.'],
617            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
618            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
619            ['short_pvalue',
620             '0.7671',
621             '0.1489',
622             'U',
623             'Test,',
624             'Repetitions:',
625             '2',
626             'vs',
627             '3.',
628             'WARNING:',
629             'Results',
630             'unreliable!',
631             '9+',
632             'repetitions',
633             'recommended.'],
634            ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'],
635        ]
636        output_lines_with_header = print_difference_report(
637            self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False)
638        output_lines = output_lines_with_header[2:]
639        print("\n")
640        print("\n".join(output_lines_with_header))
641        self.assertEqual(len(output_lines), len(expect_lines))
642        for i in range(0, len(output_lines)):
643            parts = [x for x in output_lines[i].split(' ') if x]
644            self.assertEqual(expect_lines[i], parts)
645
646    def test_json_diff_report(self):
647        expected_output = [
648            {
649                'name': u'BM_One',
650                'measurements': [
651                    {'time': -0.1,
652                     'cpu': 0.1,
653                     'real_time': 10,
654                     'real_time_other': 9,
655                     'cpu_time': 100,
656                     'cpu_time_other': 110}
657                ],
658                'time_unit': 'ns',
659                'utest': {}
660            },
661            {
662                'name': u'BM_Two',
663                'measurements': [
664                    {'time': 0.1111111111111111,
665                     'cpu': -0.011111111111111112,
666                     'real_time': 9,
667                     'real_time_other': 10,
668                     'cpu_time': 90,
669                     'cpu_time_other': 89},
670                    {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8,
671                        'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72}
672                ],
673                'time_unit': 'ns',
674                'utest': {
675                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.6985353583033387, 'time_pvalue': 0.6985353583033387
676                }
677            },
678            {
679                'name': u'short',
680                'measurements': [
681                    {'time': -0.125,
682                     'cpu': -0.0625,
683                     'real_time': 8,
684                     'real_time_other': 7,
685                     'cpu_time': 80,
686                     'cpu_time_other': 75},
687                    {'time': -0.4325,
688                     'cpu': -0.13506493506493514,
689                     'real_time': 8,
690                     'real_time_other': 4.54,
691                     'cpu_time': 77,
692                     'cpu_time_other': 66.6}
693                ],
694                'time_unit': 'ns',
695                'utest': {
696                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.14891467317876572, 'time_pvalue': 0.7670968684102772
697                }
698            },
699            {
700                'name': u'medium',
701                'measurements': [
702                    {'time': -0.375,
703                     'cpu': -0.3375,
704                     'real_time': 8,
705                     'real_time_other': 5,
706                     'cpu_time': 80,
707                     'cpu_time_other': 53}
708                ],
709                'time_unit': 'ns',
710                'utest': {}
711            }
712        ]
713        self.assertEqual(len(self.json_diff_report), len(expected_output))
714        for out, expected in zip(
715                self.json_diff_report, expected_output):
716            self.assertEqual(out['name'], expected['name'])
717            self.assertEqual(out['time_unit'], expected['time_unit'])
718            assert_utest(self, out, expected)
719            assert_measurements(self, out, expected)
720
721
722class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
723        unittest.TestCase):
724    @classmethod
725    def setUpClass(cls):
726        def load_results():
727            import json
728            testInputs = os.path.join(
729                os.path.dirname(
730                    os.path.realpath(__file__)),
731                'Inputs')
732            testOutput1 = os.path.join(testInputs, 'test3_run0.json')
733            testOutput2 = os.path.join(testInputs, 'test3_run1.json')
734            with open(testOutput1, 'r') as f:
735                json1 = json.load(f)
736            with open(testOutput2, 'r') as f:
737                json2 = json.load(f)
738            return json1, json2
739
740        json1, json2 = load_results()
741        cls.json_diff_report = get_difference_report(
742            json1, json2, utest=True)
743
744    def test_json_diff_report_pretty_printing(self):
745        expect_lines = [
746            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
747            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
748            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
749            ['BM_Two_pvalue',
750             '0.6985',
751             '0.6985',
752             'U',
753             'Test,',
754             'Repetitions:',
755             '2',
756             'vs',
757             '2.',
758             'WARNING:',
759             'Results',
760             'unreliable!',
761             '9+',
762             'repetitions',
763             'recommended.'],
764            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
765            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
766            ['short_pvalue',
767             '0.7671',
768             '0.1489',
769             'U',
770             'Test,',
771             'Repetitions:',
772             '2',
773             'vs',
774             '3.',
775             'WARNING:',
776             'Results',
777             'unreliable!',
778             '9+',
779             'repetitions',
780             'recommended.'],
781             ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53']
782        ]
783        output_lines_with_header = print_difference_report(
784            self.json_diff_report,
785            utest=True, utest_alpha=0.05, use_color=False)
786        output_lines = output_lines_with_header[2:]
787        print("\n")
788        print("\n".join(output_lines_with_header))
789        self.assertEqual(len(output_lines), len(expect_lines))
790        for i in range(0, len(output_lines)):
791            parts = [x for x in output_lines[i].split(' ') if x]
792            self.assertEqual(expect_lines[i], parts)
793
794    def test_json_diff_report(self):
795        expected_output = [
796            {
797                'name': u'BM_One',
798                'measurements': [
799                    {'time': -0.1,
800                     'cpu': 0.1,
801                     'real_time': 10,
802                     'real_time_other': 9,
803                     'cpu_time': 100,
804                     'cpu_time_other': 110}
805                ],
806                'time_unit': 'ns',
807                'utest': {}
808            },
809            {
810                'name': u'BM_Two',
811                'measurements': [
812                    {'time': 0.1111111111111111,
813                     'cpu': -0.011111111111111112,
814                     'real_time': 9,
815                     'real_time_other': 10,
816                     'cpu_time': 90,
817                     'cpu_time_other': 89},
818                    {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8,
819                        'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72}
820                ],
821                'time_unit': 'ns',
822                'utest': {
823                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.6985353583033387, 'time_pvalue': 0.6985353583033387
824                }
825            },
826            {
827                'name': u'short',
828                'measurements': [
829                    {'time': -0.125,
830                     'cpu': -0.0625,
831                     'real_time': 8,
832                     'real_time_other': 7,
833                     'cpu_time': 80,
834                     'cpu_time_other': 75},
835                    {'time': -0.4325,
836                     'cpu': -0.13506493506493514,
837                     'real_time': 8,
838                     'real_time_other': 4.54,
839                     'cpu_time': 77,
840                     'cpu_time_other': 66.6}
841                ],
842                'time_unit': 'ns',
843                'utest': {
844                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.14891467317876572, 'time_pvalue': 0.7670968684102772
845                }
846            },
847            {
848                'name': u'medium',
849                'measurements': [
850                    {'real_time_other': 5,
851                     'cpu_time': 80,
852                     'time': -0.375,
853                     'real_time': 8,
854                     'cpu_time_other': 53,
855                     'cpu': -0.3375
856                    }
857                ],
858                'utest': {},
859                'time_unit': u'ns',
860                'aggregate_name': ''
861            }
862        ]
863        self.assertEqual(len(self.json_diff_report), len(expected_output))
864        for out, expected in zip(
865                self.json_diff_report, expected_output):
866            self.assertEqual(out['name'], expected['name'])
867            self.assertEqual(out['time_unit'], expected['time_unit'])
868            assert_utest(self, out, expected)
869            assert_measurements(self, out, expected)
870
871
872def assert_utest(unittest_instance, lhs, rhs):
873    if lhs['utest']:
874        unittest_instance.assertAlmostEqual(
875            lhs['utest']['cpu_pvalue'],
876            rhs['utest']['cpu_pvalue'])
877        unittest_instance.assertAlmostEqual(
878            lhs['utest']['time_pvalue'],
879            rhs['utest']['time_pvalue'])
880        unittest_instance.assertEqual(
881            lhs['utest']['have_optimal_repetitions'],
882            rhs['utest']['have_optimal_repetitions'])
883    else:
884        # lhs is empty. assert if rhs is not.
885        unittest_instance.assertEqual(lhs['utest'], rhs['utest'])
886
887
888def assert_measurements(unittest_instance, lhs, rhs):
889    for m1, m2 in zip(lhs['measurements'], rhs['measurements']):
890        unittest_instance.assertEqual(m1['real_time'], m2['real_time'])
891        unittest_instance.assertEqual(m1['cpu_time'], m2['cpu_time'])
892        # m1['time'] and m1['cpu'] hold values which are being calculated,
893        # and therefore we must use almost-equal pattern.
894        unittest_instance.assertAlmostEqual(m1['time'], m2['time'], places=4)
895        unittest_instance.assertAlmostEqual(m1['cpu'], m2['cpu'], places=4)
896
897
898if __name__ == '__main__':
899    unittest.main()
900
901# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
902# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
903# kate: indent-mode python; remove-trailing-spaces modified;
904