• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#  Copyright 2016 Google Inc. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS-IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16import argparse
17import json
18from typing import Tuple, List, Dict, Union, Callable, Any, Sequence, Set, Iterable
19
20import yaml
21from collections import defaultdict
22
23def extract_results(bench_results: List[Dict[str, Dict[Any, Any]]],
24                    fixed_benchmark_params: Dict[str, Union[str, Tuple[str, ...]]],
25                    column_dimension: str,
26                    row_dimension: str,
27                    result_dimension: str) -> Tuple[Dict[str, Dict[str, Dict[str, Any]]],
28                                                    Set[Tuple[List[Tuple[str, str]], ...]],
29                                                    Set[Tuple[Tuple[List[Tuple[str, str]], ...],
30                                                              str]]]:
31    table_data = defaultdict(lambda: dict())  # type: Dict[str, Dict[str, Dict[str, Any]]]
32    remaining_dimensions_by_row_column = dict()
33    used_bench_results = set()  # type: Set[Tuple[List[Tuple[str, str]], ...]]
34    used_bench_result_values = set() # type: Set[Tuple[Tuple[List[Tuple[str, str]], ...], str]]
35    for bench_result in bench_results:
36        try:
37            params = {dimension_name: make_immutable(dimension_value)
38                      for dimension_name, dimension_value in bench_result['benchmark'].items()}
39            original_params = dict(params)
40            results = bench_result['results']
41            matches = True
42            if result_dimension not in results:
43                # result_dimension not found in this result, skip
44                matches = False
45            for param_name, param_value in fixed_benchmark_params.items():
46                if (isinstance(param_value, tuple) and params.get(param_name) in param_value) or (params.get(param_name) == param_value):
47                    pass
48                else:
49                    # fixed_benchmark_params not satisfied by this result, skip
50                    matches = False
51            if matches:
52                # fixed_benchmark_params were satisfied by these params (and were removed)
53                assert row_dimension in params.keys(), '%s not in %s' % (row_dimension, params.keys())
54                assert column_dimension in params.keys(), '%s not in %s' % (column_dimension, params.keys())
55                assert result_dimension in results, '%s not in %s' % (result_dimension, results)
56                used_bench_results.add(tuple(sorted(original_params.items())))
57                used_bench_result_values.add((tuple(sorted(original_params.items())),
58                                              result_dimension))
59                row_value = params[row_dimension]
60                column_value = params[column_dimension]
61                remaining_dimensions = params.copy()
62                remaining_dimensions.pop(row_dimension)
63                remaining_dimensions.pop(column_dimension)
64                if column_value in table_data[row_value]:
65                    previous_remaining_dimensions = remaining_dimensions_by_row_column[(row_value, column_value)]
66                    raise Exception(
67                        'Found multiple benchmark results with the same fixed benchmark params, benchmark param for row and benchmark param for column, so a result can\'t be uniquely determined. '
68                        + 'Consider adding additional values in fixed_benchmark_params. Remaining dimensions:\n%s\nvs\n%s' % (
69                            remaining_dimensions, previous_remaining_dimensions))
70                table_data[row_value][column_value] = results[result_dimension]
71                remaining_dimensions_by_row_column[(row_value, column_value)] = remaining_dimensions
72        except Exception as e:
73            raise Exception('While processing %s' % bench_result) from e
74    return table_data, used_bench_results, used_bench_result_values
75
76# Takes a 2-dimensional array (list of lists) and prints a markdown table with that content.
77def print_markdown_table(table_data: List[List[str]]) -> None:
78    max_content_length_by_column = [max([len(str(row[column_index])) for row in table_data])
79                                    for column_index in range(len(table_data[0]))]
80    for row_index in range(len(table_data)):
81        row = table_data[row_index]
82        cell_strings = []
83        for column_index in range(len(row)):
84            value = str(row[column_index])
85            # E.g. if max_content_length_by_column=20, table_cell_format='%20s'
86            table_cell_format = '%%%ss' % max_content_length_by_column[column_index]
87            cell_strings += [table_cell_format % value]
88        print('| ' + ' | '.join(cell_strings) + ' |')
89        if row_index == 0:
90            # Print the separator line, e.g. |---|-----|---|
91            print('|-'
92                  + '-|-'.join(['-' * max_content_length_by_column[column_index]
93                                for column_index in range(len(row))])
94                  + '-|')
95
96# A sequence of length 2, with the lower and upper bound of the interval.
97# TODO: use a class instead.
98Interval = Sequence[float]
99
100def compute_min_max(table_data, row_headers: List[str], column_headers: List[str]) -> Interval:
101    values_by_row = {row_header: [table_data[row_header][column_header]
102                                  for column_header in column_headers
103                                  if column_header in table_data[row_header]]
104                     for row_header in row_headers}
105    # We compute min and max and pass it to the value pretty-printer, so that it can determine a unit that works well for all values in the table.
106    min_in_table = min([min([min(interval[0][0], interval[1][0]) for interval in values_by_row[row_header]])
107                        for row_header in row_headers])
108    max_in_table = max([max([max(interval[0][1], interval[1][1]) for interval in values_by_row[row_header]])
109                        for row_header in row_headers])
110    return (min_in_table, max_in_table)
111
112def pretty_print_percentage_difference(baseline_value: Interval, current_value: Interval):
113    baseline_min = baseline_value[0]
114    baseline_max = baseline_value[1]
115    current_min = current_value[0]
116    current_max = current_value[1]
117    percentage_min = (current_min / baseline_max - 1) * 100
118    percentage_max = (current_max / baseline_min - 1) * 100
119    percentage_min_s = "%+.1f%%" % percentage_min
120    percentage_max_s = "%+.1f%%" % percentage_max
121    if percentage_min_s == percentage_max_s:
122        return percentage_min_s
123    else:
124        return "%s - %s" % (percentage_min_s, percentage_max_s)
125
126DimensionPrettyPrinter = Callable[[Any], str]
127
128IntervalPrettyPrinter = Callable[[Interval, float, float], str]
129
130
131# Takes a table as a dict of dicts (where each table_data[row_key][column_key] is a confidence interval) and prints it as a markdown table using
132# the specified pretty print functions for column keys, row keys and values respectively.
133# column_header_pretty_printer and row_header_pretty_printer must be functions taking a single value and returning the pretty-printed version.
134# value_pretty_printer must be a function taking (value_confidence_interval, min_in_table, max_in_table).
135# baseline_table_data is an optional table (similar to table_data) that contains the "before" state. If present, the values in two tables will be compared.
136def print_confidence_intervals_table(table_name,
137                                     table_data,
138                                     baseline_table_data,
139                                     column_header_pretty_printer: DimensionPrettyPrinter,
140                                     row_header_pretty_printer: DimensionPrettyPrinter,
141                                     value_pretty_printer: IntervalPrettyPrinter,
142                                     row_sort_key: Callable[[Any], Any]):
143    if table_data == {}:
144        print('%s: (no data)' % table_name)
145        return
146
147    row_headers = sorted(list(table_data.keys()), key=row_sort_key)
148    # We need to compute the union of the headers of all rows; some rows might be missing values for certain columns.
149    column_headers = sorted(set().union(*[list(row_values.keys()) for row_values in table_data.values()]))
150    if baseline_table_data:
151        baseline_row_headers = sorted(list(baseline_table_data.keys()), key=row_sort_key)
152        baseline_column_headers = sorted(set().union(*[list(row_values.keys()) for row_values in baseline_table_data.values()]))
153        unmached_baseline_column_headers = set(baseline_row_headers) - set(row_headers)
154        if unmached_baseline_column_headers:
155            print('Found baseline column headers with no match in new results (they will be ignored): ', unmached_baseline_column_headers)
156        unmached_baseline_row_headers = set(baseline_row_headers) - set(row_headers)
157        if unmached_baseline_row_headers:
158            print('Found baseline row headers with no match in new results (they will be ignored): ', unmached_baseline_row_headers)
159
160    min_in_table, max_in_table = compute_min_max(table_data, row_headers, column_headers)
161    if baseline_table_data:
162        min_in_baseline_table, max_in_baseline_table = compute_min_max(table_data, row_headers, column_headers)
163        min_in_table = min(min_in_table, min_in_baseline_table)
164        max_in_table = max(max_in_table, max_in_baseline_table)
165
166    table_content = []
167    table_content.append([table_name] + [column_header_pretty_printer(column_header) for column_header in column_headers])
168    for row_header in row_headers:
169        row_content = [row_header_pretty_printer(row_header)]
170        for column_header in column_headers:
171            if column_header in table_data[row_header]:
172                value = table_data[row_header][column_header]
173                raw_confidence_interval, rounded_confidence_interval = value
174                pretty_printed_value = value_pretty_printer(rounded_confidence_interval, min_in_table, max_in_table)
175                if baseline_table_data and row_header in baseline_table_data and column_header in baseline_table_data[row_header]:
176                    baseline_value = baseline_table_data[row_header][column_header]
177                    raw_baseline_confidence_interval, rounded_baseline_confidence_interval = baseline_value
178                    pretty_printed_baseline_value = value_pretty_printer(rounded_baseline_confidence_interval, min_in_table, max_in_table)
179                    pretty_printed_percentage_difference = pretty_print_percentage_difference(raw_baseline_confidence_interval, raw_confidence_interval)
180                    row_content.append("%s -> %s (%s)" % (pretty_printed_baseline_value, pretty_printed_value, pretty_printed_percentage_difference))
181                else:
182                    row_content.append(pretty_printed_value)
183            else:
184                row_content.append("N/A")
185        table_content.append(row_content)
186    print_markdown_table(table_content)
187
188
189def format_string_pretty_printer(format_string: str) -> Callable[[str], str]:
190    def pretty_print(s: str):
191        return format_string % s
192
193    return pretty_print
194
195def float_to_str(x: float) -> str:
196    if x > 100:
197        return str(int(x))
198    else:
199        return '%.2g' % x
200
201def interval_pretty_printer(interval: Interval, unit: str, multiplier: float) -> str:
202    interval = list(interval)  # type: List[Any]
203    interval[0] *= multiplier
204    interval[1] *= multiplier
205
206    # This prevents the format strings below from printing '.0' for numbers that already have 2 digits:
207    # 23.0 -> 23
208    # 2.0 -> 2.0 (here we don't remove the '.0' because printing just '2' might suggest a lower precision)
209    if int(interval[0]) == interval[0] and interval[0] >= 10:
210        interval[0] = int(interval[0])
211    else:
212        interval[0] = float_to_str(interval[0])
213    if int(interval[1]) == interval[1] and interval[1] >= 10:
214        interval[1] = int(interval[1])
215    else:
216        interval[1] = float_to_str(interval[1])
217
218    if interval[0] == interval[1]:
219        return '%s %s' % (interval[0], unit)
220    else:
221        return '%s-%s %s' % (interval[0], interval[1], unit)
222
223
224# Finds the best unit to represent values in the range [min_value, max_value].
225# The units must be specified as an ordered list [multiplier1, ..., multiplierN]
226def find_best_unit(units: List[float], min_value: float, max_value: float) -> float:
227    assert min_value <= max_value
228    if max_value <= units[0]:
229        return units[0]
230    for i in range(len(units) - 1):
231        if min_value > units[i] and max_value < units[i + 1]:
232            return units[i]
233    if min_value > units[-1]:
234        return units[-1]
235    # There is no unit that works very well for all values, first let's try relaxing the min constraint
236    for i in range(len(units) - 1):
237        if min_value > units[i] * 0.2 and max_value < units[i + 1]:
238            return units[i]
239    if min_value > units[-1] * 0.2:
240        return units[-1]
241    # That didn't work either, just use a unit that works well for the min values then
242    for i in reversed(range(len(units))):
243        if min_value > units[i]:
244            return units[i]
245    assert min_value <= min(units)
246    # Pick the smallest unit
247    return units[0]
248
249
250def time_interval_pretty_printer(time_interval: Interval, min_in_table: float, max_in_table: float) -> str:
251    sec = 1
252    milli = 0.001
253    micro = milli * milli
254    units = [micro, milli, sec]
255    unit_name_by_unit = {micro: 'μs', milli: 'ms', sec: 's'}
256
257    unit = find_best_unit(units, min_in_table, max_in_table)
258    unit_name = unit_name_by_unit[unit]
259
260    return interval_pretty_printer(time_interval, unit=unit_name, multiplier=1 / unit)
261
262
263def file_size_interval_pretty_printer(file_size_interval: Interval, min_in_table: float, max_in_table: float) -> str:
264    byte = 1
265    kb = 1024
266    mb = kb * kb
267    units = [byte, kb, mb]
268    unit_name_by_unit = {byte: 'bytes', kb: 'KB', mb: 'MB'}
269
270    unit = find_best_unit(units, min_in_table, max_in_table)
271    unit_name = unit_name_by_unit[unit]
272
273    return interval_pretty_printer(file_size_interval, unit=unit_name, multiplier=1 / unit)
274
275
276def make_immutable(x):
277    if isinstance(x, list):
278        return tuple(make_immutable(elem) for elem in x)
279    return x
280
281
282def dict_pretty_printer(dict_data: List[Dict[str, Union[str, Tuple[str]]]]) -> Callable[[Union[str, Tuple[str]]], str]:
283    if isinstance(dict_data, list):
284        dict_data = {make_immutable(mapping['from']): mapping['to'] for mapping in dict_data}
285
286    def pretty_print(s: Union[str, Tuple[str]]) -> str:
287        if s in dict_data:
288            return dict_data[s]
289        else:
290            raise Exception('dict_pretty_printer(%s) can\'t handle the value %s' % (dict_data, s))
291
292    return pretty_print
293
294
295
296def determine_column_pretty_printer(pretty_printer_definition: Dict[str, Any]) -> DimensionPrettyPrinter:
297    if 'format_string' in pretty_printer_definition:
298        return format_string_pretty_printer(pretty_printer_definition['format_string'])
299
300    if 'fixed_map' in pretty_printer_definition:
301        return dict_pretty_printer(pretty_printer_definition['fixed_map'])
302
303    raise Exception("Unrecognized pretty printer description: %s" % pretty_printer_definition)
304
305def determine_row_pretty_printer(pretty_printer_definition: Dict[str, Any]) -> DimensionPrettyPrinter:
306    return determine_column_pretty_printer(pretty_printer_definition)
307
308def determine_row_sort_key(pretty_printer_definition: Dict[str, Any]) -> Callable[[Any], Any]:
309    if 'fixed_map' in pretty_printer_definition:
310        indexes = {x: i for i, x in enumerate(pretty_printer_definition['fixed_map'].keys())}
311        return lambda s: indexes[s]
312
313    return lambda x: x
314
315def determine_value_pretty_printer(unit: str) -> IntervalPrettyPrinter:
316    if unit == "seconds":
317        return time_interval_pretty_printer
318    if unit == "bytes":
319        return file_size_interval_pretty_printer
320    raise Exception("Unrecognized unit: %s" % unit)
321
322def main():
323    parser = argparse.ArgumentParser(description='Runs all the benchmarks whose results are on the Fruit website.')
324    parser.add_argument('--benchmark-results',
325                        help='The input file where benchmark results will be read from (1 per line, with each line in JSON format). You can use the run_benchmarks.py to run a benchmark and generate results in this format.')
326    parser.add_argument('--baseline-benchmark-results',
327                        help='Optional. If specified, compares this file (considered the "before" state) with the one specified in --benchmark-results.')
328    parser.add_argument('--benchmark-tables-definition', help='The YAML file that defines the benchmark tables (e.g. fruit_wiki_bench_tables.yaml).')
329    args = parser.parse_args()
330
331    if args.benchmark_results is None:
332        raise Exception("You must specify a benchmark results file using --benchmark-results.")
333
334    if args.benchmark_tables_definition is None:
335        raise Exception("You must specify a benchmark tables definition file using --benchmark-tables-definition.")
336
337    with open(args.benchmark_results, 'r') as f:
338        bench_results = [json.loads(line) for line in f.readlines()]
339
340    if args.baseline_benchmark_results:
341        with open(args.baseline_benchmark_results, 'r') as f:
342            baseline_bench_results = [json.loads(line) for line in f.readlines()]
343    else:
344        baseline_bench_results = None
345
346    with open(args.benchmark_tables_definition, 'r') as f:
347        used_bench_results = set()
348        # Set of (Benchmark definition, Benchmark result name) pairs
349        used_bench_result_values = set()
350        config = yaml.full_load(f)
351        for table_definition in config["tables"]:
352            try:
353                fixed_benchmark_params = {dimension_name: make_immutable(dimension_value) for dimension_name, dimension_value in table_definition['benchmark_filter'].items()}
354                table_data, last_used_bench_results, last_used_bench_result_values = extract_results(
355                    bench_results,
356                    fixed_benchmark_params=fixed_benchmark_params,
357                    column_dimension=table_definition['columns']['dimension'],
358                    row_dimension=table_definition['rows']['dimension'],
359                    result_dimension=table_definition['results']['dimension'])
360                used_bench_results = used_bench_results.union(last_used_bench_results)
361                used_bench_result_values = used_bench_result_values.union(last_used_bench_result_values)
362                if baseline_bench_results:
363                    baseline_table_data, _, _ = extract_results(
364                        baseline_bench_results,
365                        fixed_benchmark_params=fixed_benchmark_params,
366                        column_dimension=table_definition['columns']['dimension'],
367                        row_dimension=table_definition['rows']['dimension'],
368                        result_dimension=table_definition['results']['dimension'])
369                else:
370                    baseline_table_data = None
371                rows_pretty_printer_definition = table_definition['rows']['pretty_printer']
372                columns_pretty_printer_definition = table_definition['columns']['pretty_printer']
373                results_unit = table_definition['results']['unit']
374                print_confidence_intervals_table(table_definition['name'],
375                                                 table_data,
376                                                 baseline_table_data,
377                                                 column_header_pretty_printer=determine_column_pretty_printer(columns_pretty_printer_definition),
378                                                 row_header_pretty_printer=determine_row_pretty_printer(rows_pretty_printer_definition),
379                                                 value_pretty_printer=determine_value_pretty_printer(results_unit),
380                                                 row_sort_key=determine_row_sort_key(rows_pretty_printer_definition))
381                print()
382                print()
383            except Exception as e:
384                print('While processing table:\n%s' % table_definition)
385                print()
386                raise e
387        allowed_unused_benchmarks = set(config.get('allowed_unused_benchmarks', []))
388        allowed_unused_benchmark_results = set(config.get('allowed_unused_benchmark_results', []))
389        for bench_result in bench_results:
390            params = {dimension_name: make_immutable(dimension_value)
391                      for dimension_name, dimension_value in bench_result['benchmark'].items()}
392            benchmark_defn = tuple(sorted(params.items()))
393            if benchmark_defn not in used_bench_results:
394                if params['name'] not in allowed_unused_benchmarks:
395                    print('Warning: benchmark result did not match any tables: %s' % params)
396            else:
397                unused_result_dimensions = {result_dimension
398                                            for result_dimension in bench_result['results'].keys()
399                                            if (benchmark_defn, result_dimension) not in used_bench_result_values and result_dimension not in allowed_unused_benchmark_results}
400                if unused_result_dimensions:
401                    print('Warning: unused result dimensions %s in benchmark result %s' % (unused_result_dimensions, params))
402
403
404if __name__ == "__main__":
405    main()
406