• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import unittest
2"""report.py - Utilities for reporting statistics about benchmark results
3"""
4import os
5import re
6import copy
7
8from scipy.stats import mannwhitneyu
9
10
11class BenchmarkColor(object):
12    def __init__(self, name, code):
13        self.name = name
14        self.code = code
15
16    def __repr__(self):
17        return '%s%r' % (self.__class__.__name__,
18                         (self.name, self.code))
19
20    def __format__(self, format):
21        return self.code
22
23
24# Benchmark Colors Enumeration
25BC_NONE = BenchmarkColor('NONE', '')
26BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m')
27BC_CYAN = BenchmarkColor('CYAN', '\033[96m')
28BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m')
29BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m')
30BC_HEADER = BenchmarkColor('HEADER', '\033[92m')
31BC_WARNING = BenchmarkColor('WARNING', '\033[93m')
32BC_WHITE = BenchmarkColor('WHITE', '\033[97m')
33BC_FAIL = BenchmarkColor('FAIL', '\033[91m')
34BC_ENDC = BenchmarkColor('ENDC', '\033[0m')
35BC_BOLD = BenchmarkColor('BOLD', '\033[1m')
36BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
37
38UTEST_MIN_REPETITIONS = 2
39UTEST_OPTIMAL_REPETITIONS = 9  # Lowest reasonable number, More is better.
40UTEST_COL_NAME = "_pvalue"
41
42
43def color_format(use_color, fmt_str, *args, **kwargs):
44    """
45    Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
46    'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
47    is False then all color codes in 'args' and 'kwargs' are replaced with
48    the empty string.
49    """
50    assert use_color is True or use_color is False
51    if not use_color:
52        args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE
53                for arg in args]
54        kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
55                  for key, arg in kwargs.items()}
56    return fmt_str.format(*args, **kwargs)
57
58
59def find_longest_name(benchmark_list):
60    """
61    Return the length of the longest benchmark name in a given list of
62    benchmark JSON objects
63    """
64    longest_name = 1
65    for bc in benchmark_list:
66        if len(bc['name']) > longest_name:
67            longest_name = len(bc['name'])
68    return longest_name
69
70
71def calculate_change(old_val, new_val):
72    """
73    Return a float representing the decimal change between old_val and new_val.
74    """
75    if old_val == 0 and new_val == 0:
76        return 0.0
77    if old_val == 0:
78        return float(new_val - old_val) / (float(old_val + new_val) / 2)
79    return float(new_val - old_val) / abs(old_val)
80
81
82def filter_benchmark(json_orig, family, replacement=""):
83    """
84    Apply a filter to the json, and only leave the 'family' of benchmarks.
85    """
86    regex = re.compile(family)
87    filtered = {}
88    filtered['benchmarks'] = []
89    for be in json_orig['benchmarks']:
90        if not regex.search(be['name']):
91            continue
92        filteredbench = copy.deepcopy(be)  # Do NOT modify the old name!
93        filteredbench['name'] = regex.sub(replacement, filteredbench['name'])
94        filtered['benchmarks'].append(filteredbench)
95    return filtered
96
97
98def get_unique_benchmark_names(json):
99    """
100    While *keeping* the order, give all the unique 'names' used for benchmarks.
101    """
102    seen = set()
103    uniqued = [x['name'] for x in json['benchmarks']
104               if x['name'] not in seen and
105               (seen.add(x['name']) or True)]
106    return uniqued
107
108
109def intersect(list1, list2):
110    """
111    Given two lists, get a new list consisting of the elements only contained
112    in *both of the input lists*, while preserving the ordering.
113    """
114    return [x for x in list1 if x in list2]
115
116
117def partition_benchmarks(json1, json2):
118    """
119    While preserving the ordering, find benchmarks with the same names in
120    both of the inputs, and group them.
121    (i.e. partition/filter into groups with common name)
122    """
123    json1_unique_names = get_unique_benchmark_names(json1)
124    json2_unique_names = get_unique_benchmark_names(json2)
125    names = intersect(json1_unique_names, json2_unique_names)
126    partitions = []
127    for name in names:
128        # Pick the time unit from the first entry of the lhs benchmark.
129        time_unit = (x['time_unit']
130                     for x in json1['benchmarks'] if x['name'] == name).next()
131        # Filter by name and time unit.
132        lhs = [x for x in json1['benchmarks'] if x['name'] == name and
133               x['time_unit'] == time_unit]
134        rhs = [x for x in json2['benchmarks'] if x['name'] == name and
135               x['time_unit'] == time_unit]
136        partitions.append([lhs, rhs])
137    return partitions
138
139
140def extract_field(partition, field_name):
141    # The count of elements may be different. We want *all* of them.
142    lhs = [x[field_name] for x in partition[0]]
143    rhs = [x[field_name] for x in partition[1]]
144    return [lhs, rhs]
145
146
147def print_utest(partition, utest_alpha, first_col_width, use_color=True):
148    timings_time = extract_field(partition, 'real_time')
149    timings_cpu = extract_field(partition, 'cpu_time')
150
151    min_rep_cnt = min(len(timings_time[0]),
152                      len(timings_time[1]),
153                      len(timings_cpu[0]),
154                      len(timings_cpu[1]))
155
156    # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions?
157    if min_rep_cnt < UTEST_MIN_REPETITIONS:
158        return []
159
160    def get_utest_color(pval):
161        return BC_FAIL if pval >= utest_alpha else BC_OKGREEN
162
163    time_pvalue = mannwhitneyu(
164        timings_time[0], timings_time[1], alternative='two-sided').pvalue
165    cpu_pvalue = mannwhitneyu(
166        timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue
167
168    dsc = "U Test, Repetitions: {} vs {}".format(
169        len(timings_cpu[0]), len(timings_cpu[1]))
170    dsc_color = BC_OKGREEN
171
172    if min_rep_cnt < UTEST_OPTIMAL_REPETITIONS:
173        dsc_color = BC_WARNING
174        dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format(
175            UTEST_OPTIMAL_REPETITIONS)
176
177    special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{}      {}"
178
179    last_name = partition[0][0]['name']
180    return [color_format(use_color,
181                         special_str,
182                         BC_HEADER,
183                         "{}{}".format(last_name, UTEST_COL_NAME),
184                         first_col_width,
185                         get_utest_color(time_pvalue), time_pvalue,
186                         get_utest_color(cpu_pvalue), cpu_pvalue,
187                         dsc_color, dsc,
188                         endc=BC_ENDC)]
189
190
191def generate_difference_report(
192        json1,
193        json2,
194        display_aggregates_only=False,
195        utest=False,
196        utest_alpha=0.05,
197        use_color=True):
198    """
199    Calculate and report the difference between each test of two benchmarks
200    runs specified as 'json1' and 'json2'.
201    """
202    assert utest is True or utest is False
203    first_col_width = find_longest_name(json1['benchmarks'])
204
205    def find_test(name):
206        for b in json2['benchmarks']:
207            if b['name'] == name:
208                return b
209        return None
210
211    first_col_width = max(
212        first_col_width,
213        len('Benchmark'))
214    first_col_width += len(UTEST_COL_NAME)
215    first_line = "{:<{}s}Time             CPU      Time Old      Time New       CPU Old       CPU New".format(
216        'Benchmark', 12 + first_col_width)
217    output_strs = [first_line, '-' * len(first_line)]
218
219    partitions = partition_benchmarks(json1, json2)
220    for partition in partitions:
221        # Careful, we may have different repetition count.
222        for i in range(min(len(partition[0]), len(partition[1]))):
223            bn = partition[0][i]
224            other_bench = partition[1][i]
225
226            # *If* we were asked to only display aggregates,
227            # and if it is non-aggregate, then skip it.
228            if display_aggregates_only and 'run_type' in bn and 'run_type' in other_bench:
229                assert bn['run_type'] == other_bench['run_type']
230                if bn['run_type'] != 'aggregate':
231                    continue
232
233            fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
234
235            def get_color(res):
236                if res > 0.05:
237                    return BC_FAIL
238                elif res > -0.07:
239                    return BC_WHITE
240                else:
241                    return BC_CYAN
242
243            tres = calculate_change(bn['real_time'], other_bench['real_time'])
244            cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time'])
245            output_strs += [color_format(use_color,
246                                         fmt_str,
247                                         BC_HEADER,
248                                         bn['name'],
249                                         first_col_width,
250                                         get_color(tres),
251                                         tres,
252                                         get_color(cpures),
253                                         cpures,
254                                         bn['real_time'],
255                                         other_bench['real_time'],
256                                         bn['cpu_time'],
257                                         other_bench['cpu_time'],
258                                         endc=BC_ENDC)]
259
260        # After processing the whole partition, if requested, do the U test.
261        if utest:
262            output_strs += print_utest(partition,
263                                       utest_alpha=utest_alpha,
264                                       first_col_width=first_col_width,
265                                       use_color=use_color)
266
267    return output_strs
268
269
270###############################################################################
271# Unit tests
272
273
274class TestGetUniqueBenchmarkNames(unittest.TestCase):
275    def load_results(self):
276        import json
277        testInputs = os.path.join(
278            os.path.dirname(
279                os.path.realpath(__file__)),
280            'Inputs')
281        testOutput = os.path.join(testInputs, 'test3_run0.json')
282        with open(testOutput, 'r') as f:
283            json = json.load(f)
284        return json
285
286    def test_basic(self):
287        expect_lines = [
288            'BM_One',
289            'BM_Two',
290            'short',  # These two are not sorted
291            'medium',  # These two are not sorted
292        ]
293        json = self.load_results()
294        output_lines = get_unique_benchmark_names(json)
295        print("\n")
296        print("\n".join(output_lines))
297        self.assertEqual(len(output_lines), len(expect_lines))
298        for i in range(0, len(output_lines)):
299            self.assertEqual(expect_lines[i], output_lines[i])
300
301
302class TestReportDifference(unittest.TestCase):
303    def load_results(self):
304        import json
305        testInputs = os.path.join(
306            os.path.dirname(
307                os.path.realpath(__file__)),
308            'Inputs')
309        testOutput1 = os.path.join(testInputs, 'test1_run1.json')
310        testOutput2 = os.path.join(testInputs, 'test1_run2.json')
311        with open(testOutput1, 'r') as f:
312            json1 = json.load(f)
313        with open(testOutput2, 'r') as f:
314            json2 = json.load(f)
315        return json1, json2
316
317    def test_basic(self):
318        expect_lines = [
319            ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'],
320            ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'],
321            ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'],
322            ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'],
323            ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'],
324            ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'],
325            ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'],
326            ['BM_100xSlower', '+99.0000', '+99.0000',
327                '100', '10000', '100', '10000'],
328            ['BM_100xFaster', '-0.9900', '-0.9900',
329                '10000', '100', '10000', '100'],
330            ['BM_10PercentCPUToTime', '+0.1000',
331                '-0.1000', '100', '110', '100', '90'],
332            ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'],
333            ['BM_BadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'],
334        ]
335        json1, json2 = self.load_results()
336        output_lines_with_header = generate_difference_report(
337            json1, json2, use_color=False)
338        output_lines = output_lines_with_header[2:]
339        print("\n")
340        print("\n".join(output_lines_with_header))
341        self.assertEqual(len(output_lines), len(expect_lines))
342        for i in range(0, len(output_lines)):
343            parts = [x for x in output_lines[i].split(' ') if x]
344            self.assertEqual(len(parts), 7)
345            self.assertEqual(expect_lines[i], parts)
346
347
348class TestReportDifferenceBetweenFamilies(unittest.TestCase):
349    def load_result(self):
350        import json
351        testInputs = os.path.join(
352            os.path.dirname(
353                os.path.realpath(__file__)),
354            'Inputs')
355        testOutput = os.path.join(testInputs, 'test2_run.json')
356        with open(testOutput, 'r') as f:
357            json = json.load(f)
358        return json
359
360    def test_basic(self):
361        expect_lines = [
362            ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'],
363            ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'],
364            ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'],
365            ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'],
366        ]
367        json = self.load_result()
368        json1 = filter_benchmark(json, "BM_Z.ro", ".")
369        json2 = filter_benchmark(json, "BM_O.e", ".")
370        output_lines_with_header = generate_difference_report(
371            json1, json2, use_color=False)
372        output_lines = output_lines_with_header[2:]
373        print("\n")
374        print("\n".join(output_lines_with_header))
375        self.assertEqual(len(output_lines), len(expect_lines))
376        for i in range(0, len(output_lines)):
377            parts = [x for x in output_lines[i].split(' ') if x]
378            self.assertEqual(len(parts), 7)
379            self.assertEqual(expect_lines[i], parts)
380
381
382class TestReportDifferenceWithUTest(unittest.TestCase):
383    def load_results(self):
384        import json
385        testInputs = os.path.join(
386            os.path.dirname(
387                os.path.realpath(__file__)),
388            'Inputs')
389        testOutput1 = os.path.join(testInputs, 'test3_run0.json')
390        testOutput2 = os.path.join(testInputs, 'test3_run1.json')
391        with open(testOutput1, 'r') as f:
392            json1 = json.load(f)
393        with open(testOutput2, 'r') as f:
394            json2 = json.load(f)
395        return json1, json2
396
397    def test_utest(self):
398        expect_lines = []
399        expect_lines = [
400            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
401            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
402            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
403            ['BM_Two_pvalue',
404             '0.6985',
405             '0.6985',
406             'U',
407             'Test,',
408             'Repetitions:',
409             '2',
410             'vs',
411             '2.',
412             'WARNING:',
413             'Results',
414             'unreliable!',
415             '9+',
416             'repetitions',
417             'recommended.'],
418            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
419            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
420            ['short_pvalue',
421             '0.7671',
422             '0.1489',
423             'U',
424             'Test,',
425             'Repetitions:',
426             '2',
427             'vs',
428             '3.',
429             'WARNING:',
430             'Results',
431             'unreliable!',
432             '9+',
433             'repetitions',
434             'recommended.'],
435            ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'],
436        ]
437        json1, json2 = self.load_results()
438        output_lines_with_header = generate_difference_report(
439            json1, json2, utest=True, utest_alpha=0.05, use_color=False)
440        output_lines = output_lines_with_header[2:]
441        print("\n")
442        print("\n".join(output_lines_with_header))
443        self.assertEqual(len(output_lines), len(expect_lines))
444        for i in range(0, len(output_lines)):
445            parts = [x for x in output_lines[i].split(' ') if x]
446            self.assertEqual(expect_lines[i], parts)
447
448
449class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
450        unittest.TestCase):
451    def load_results(self):
452        import json
453        testInputs = os.path.join(
454            os.path.dirname(
455                os.path.realpath(__file__)),
456            'Inputs')
457        testOutput1 = os.path.join(testInputs, 'test3_run0.json')
458        testOutput2 = os.path.join(testInputs, 'test3_run1.json')
459        with open(testOutput1, 'r') as f:
460            json1 = json.load(f)
461        with open(testOutput2, 'r') as f:
462            json2 = json.load(f)
463        return json1, json2
464
465    def test_utest(self):
466        expect_lines = []
467        expect_lines = [
468            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
469            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
470            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
471            ['BM_Two_pvalue',
472             '0.6985',
473             '0.6985',
474             'U',
475             'Test,',
476             'Repetitions:',
477             '2',
478             'vs',
479             '2.',
480             'WARNING:',
481             'Results',
482             'unreliable!',
483             '9+',
484             'repetitions',
485             'recommended.'],
486            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
487            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
488            ['short_pvalue',
489             '0.7671',
490             '0.1489',
491             'U',
492             'Test,',
493             'Repetitions:',
494             '2',
495             'vs',
496             '3.',
497             'WARNING:',
498             'Results',
499             'unreliable!',
500             '9+',
501             'repetitions',
502             'recommended.'],
503        ]
504        json1, json2 = self.load_results()
505        output_lines_with_header = generate_difference_report(
506            json1, json2, display_aggregates_only=True,
507            utest=True, utest_alpha=0.05, use_color=False)
508        output_lines = output_lines_with_header[2:]
509        print("\n")
510        print("\n".join(output_lines_with_header))
511        self.assertEqual(len(output_lines), len(expect_lines))
512        for i in range(0, len(output_lines)):
513            parts = [x for x in output_lines[i].split(' ') if x]
514            self.assertEqual(expect_lines[i], parts)
515
516
517if __name__ == '__main__':
518    unittest.main()
519
520# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
521# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
522# kate: indent-mode python; remove-trailing-spaces modified;
523