1#!/usr/bin/env python3 2# Copyright 2016 Google Inc. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS-IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16import argparse 17import json 18from typing import Tuple, List, Dict, Union, Callable, Any, Sequence, Set, Iterable 19 20import yaml 21from collections import defaultdict 22 23def extract_results(bench_results: List[Dict[str, Dict[Any, Any]]], 24 fixed_benchmark_params: Dict[str, Union[str, Tuple[str, ...]]], 25 column_dimension: str, 26 row_dimension: str, 27 result_dimension: str) -> Tuple[Dict[str, Dict[str, Dict[str, Any]]], 28 Set[Tuple[List[Tuple[str, str]], ...]], 29 Set[Tuple[Tuple[List[Tuple[str, str]], ...], 30 str]]]: 31 table_data = defaultdict(lambda: dict()) # type: Dict[str, Dict[str, Dict[str, Any]]] 32 remaining_dimensions_by_row_column = dict() 33 used_bench_results = set() # type: Set[Tuple[List[Tuple[str, str]], ...]] 34 used_bench_result_values = set() # type: Set[Tuple[Tuple[List[Tuple[str, str]], ...], str]] 35 for bench_result in bench_results: 36 try: 37 params = {dimension_name: make_immutable(dimension_value) 38 for dimension_name, dimension_value in bench_result['benchmark'].items()} 39 original_params = dict(params) 40 results = bench_result['results'] 41 matches = True 42 if result_dimension not in results: 43 # result_dimension not found in this result, skip 44 matches = False 45 for param_name, param_value in fixed_benchmark_params.items(): 46 if (isinstance(param_value, tuple) and params.get(param_name) in param_value) or (params.get(param_name) == param_value): 47 pass 48 else: 49 # fixed_benchmark_params not satisfied by this result, skip 50 matches = False 51 if matches: 52 # fixed_benchmark_params were satisfied by these params (and were removed) 53 assert row_dimension in params.keys(), '%s not in %s' % (row_dimension, params.keys()) 54 assert column_dimension in params.keys(), '%s not in %s' % (column_dimension, params.keys()) 55 assert result_dimension in results, '%s not in %s' % (result_dimension, results) 56 used_bench_results.add(tuple(sorted(original_params.items()))) 57 used_bench_result_values.add((tuple(sorted(original_params.items())), 58 result_dimension)) 59 row_value = params[row_dimension] 60 column_value = params[column_dimension] 61 remaining_dimensions = params.copy() 62 remaining_dimensions.pop(row_dimension) 63 remaining_dimensions.pop(column_dimension) 64 if column_value in table_data[row_value]: 65 previous_remaining_dimensions = remaining_dimensions_by_row_column[(row_value, column_value)] 66 raise Exception( 67 'Found multiple benchmark results with the same fixed benchmark params, benchmark param for row and benchmark param for column, so a result can\'t be uniquely determined. ' 68 + 'Consider adding additional values in fixed_benchmark_params. Remaining dimensions:\n%s\nvs\n%s' % ( 69 remaining_dimensions, previous_remaining_dimensions)) 70 table_data[row_value][column_value] = results[result_dimension] 71 remaining_dimensions_by_row_column[(row_value, column_value)] = remaining_dimensions 72 except Exception as e: 73 raise Exception('While processing %s' % bench_result) from e 74 return table_data, used_bench_results, used_bench_result_values 75 76# Takes a 2-dimensional array (list of lists) and prints a markdown table with that content. 77def print_markdown_table(table_data: List[List[str]]) -> None: 78 max_content_length_by_column = [max([len(str(row[column_index])) for row in table_data]) 79 for column_index in range(len(table_data[0]))] 80 for row_index in range(len(table_data)): 81 row = table_data[row_index] 82 cell_strings = [] 83 for column_index in range(len(row)): 84 value = str(row[column_index]) 85 # E.g. if max_content_length_by_column=20, table_cell_format='%20s' 86 table_cell_format = '%%%ss' % max_content_length_by_column[column_index] 87 cell_strings += [table_cell_format % value] 88 print('| ' + ' | '.join(cell_strings) + ' |') 89 if row_index == 0: 90 # Print the separator line, e.g. |---|-----|---| 91 print('|-' 92 + '-|-'.join(['-' * max_content_length_by_column[column_index] 93 for column_index in range(len(row))]) 94 + '-|') 95 96# A sequence of length 2, with the lower and upper bound of the interval. 97# TODO: use a class instead. 98Interval = Sequence[float] 99 100def compute_min_max(table_data, row_headers: List[str], column_headers: List[str]) -> Interval: 101 values_by_row = {row_header: [table_data[row_header][column_header] 102 for column_header in column_headers 103 if column_header in table_data[row_header]] 104 for row_header in row_headers} 105 # We compute min and max and pass it to the value pretty-printer, so that it can determine a unit that works well for all values in the table. 106 min_in_table = min([min([min(interval[0][0], interval[1][0]) for interval in values_by_row[row_header]]) 107 for row_header in row_headers]) 108 max_in_table = max([max([max(interval[0][1], interval[1][1]) for interval in values_by_row[row_header]]) 109 for row_header in row_headers]) 110 return (min_in_table, max_in_table) 111 112def pretty_print_percentage_difference(baseline_value: Interval, current_value: Interval): 113 baseline_min = baseline_value[0] 114 baseline_max = baseline_value[1] 115 current_min = current_value[0] 116 current_max = current_value[1] 117 percentage_min = (current_min / baseline_max - 1) * 100 118 percentage_max = (current_max / baseline_min - 1) * 100 119 percentage_min_s = "%+.1f%%" % percentage_min 120 percentage_max_s = "%+.1f%%" % percentage_max 121 if percentage_min_s == percentage_max_s: 122 return percentage_min_s 123 else: 124 return "%s - %s" % (percentage_min_s, percentage_max_s) 125 126DimensionPrettyPrinter = Callable[[Any], str] 127 128IntervalPrettyPrinter = Callable[[Interval, float, float], str] 129 130 131# Takes a table as a dict of dicts (where each table_data[row_key][column_key] is a confidence interval) and prints it as a markdown table using 132# the specified pretty print functions for column keys, row keys and values respectively. 133# column_header_pretty_printer and row_header_pretty_printer must be functions taking a single value and returning the pretty-printed version. 134# value_pretty_printer must be a function taking (value_confidence_interval, min_in_table, max_in_table). 135# baseline_table_data is an optional table (similar to table_data) that contains the "before" state. If present, the values in two tables will be compared. 136def print_confidence_intervals_table(table_name, 137 table_data, 138 baseline_table_data, 139 column_header_pretty_printer: DimensionPrettyPrinter, 140 row_header_pretty_printer: DimensionPrettyPrinter, 141 value_pretty_printer: IntervalPrettyPrinter, 142 row_sort_key: Callable[[Any], Any]): 143 if table_data == {}: 144 print('%s: (no data)' % table_name) 145 return 146 147 row_headers = sorted(list(table_data.keys()), key=row_sort_key) 148 # We need to compute the union of the headers of all rows; some rows might be missing values for certain columns. 149 column_headers = sorted(set().union(*[list(row_values.keys()) for row_values in table_data.values()])) 150 if baseline_table_data: 151 baseline_row_headers = sorted(list(baseline_table_data.keys()), key=row_sort_key) 152 baseline_column_headers = sorted(set().union(*[list(row_values.keys()) for row_values in baseline_table_data.values()])) 153 unmached_baseline_column_headers = set(baseline_row_headers) - set(row_headers) 154 if unmached_baseline_column_headers: 155 print('Found baseline column headers with no match in new results (they will be ignored): ', unmached_baseline_column_headers) 156 unmached_baseline_row_headers = set(baseline_row_headers) - set(row_headers) 157 if unmached_baseline_row_headers: 158 print('Found baseline row headers with no match in new results (they will be ignored): ', unmached_baseline_row_headers) 159 160 min_in_table, max_in_table = compute_min_max(table_data, row_headers, column_headers) 161 if baseline_table_data: 162 min_in_baseline_table, max_in_baseline_table = compute_min_max(table_data, row_headers, column_headers) 163 min_in_table = min(min_in_table, min_in_baseline_table) 164 max_in_table = max(max_in_table, max_in_baseline_table) 165 166 table_content = [] 167 table_content.append([table_name] + [column_header_pretty_printer(column_header) for column_header in column_headers]) 168 for row_header in row_headers: 169 row_content = [row_header_pretty_printer(row_header)] 170 for column_header in column_headers: 171 if column_header in table_data[row_header]: 172 value = table_data[row_header][column_header] 173 raw_confidence_interval, rounded_confidence_interval = value 174 pretty_printed_value = value_pretty_printer(rounded_confidence_interval, min_in_table, max_in_table) 175 if baseline_table_data and row_header in baseline_table_data and column_header in baseline_table_data[row_header]: 176 baseline_value = baseline_table_data[row_header][column_header] 177 raw_baseline_confidence_interval, rounded_baseline_confidence_interval = baseline_value 178 pretty_printed_baseline_value = value_pretty_printer(rounded_baseline_confidence_interval, min_in_table, max_in_table) 179 pretty_printed_percentage_difference = pretty_print_percentage_difference(raw_baseline_confidence_interval, raw_confidence_interval) 180 row_content.append("%s -> %s (%s)" % (pretty_printed_baseline_value, pretty_printed_value, pretty_printed_percentage_difference)) 181 else: 182 row_content.append(pretty_printed_value) 183 else: 184 row_content.append("N/A") 185 table_content.append(row_content) 186 print_markdown_table(table_content) 187 188 189def format_string_pretty_printer(format_string: str) -> Callable[[str], str]: 190 def pretty_print(s: str): 191 return format_string % s 192 193 return pretty_print 194 195def float_to_str(x: float) -> str: 196 if x > 100: 197 return str(int(x)) 198 else: 199 return '%.2g' % x 200 201def interval_pretty_printer(interval: Interval, unit: str, multiplier: float) -> str: 202 interval = list(interval) # type: List[Any] 203 interval[0] *= multiplier 204 interval[1] *= multiplier 205 206 # This prevents the format strings below from printing '.0' for numbers that already have 2 digits: 207 # 23.0 -> 23 208 # 2.0 -> 2.0 (here we don't remove the '.0' because printing just '2' might suggest a lower precision) 209 if int(interval[0]) == interval[0] and interval[0] >= 10: 210 interval[0] = int(interval[0]) 211 else: 212 interval[0] = float_to_str(interval[0]) 213 if int(interval[1]) == interval[1] and interval[1] >= 10: 214 interval[1] = int(interval[1]) 215 else: 216 interval[1] = float_to_str(interval[1]) 217 218 if interval[0] == interval[1]: 219 return '%s %s' % (interval[0], unit) 220 else: 221 return '%s-%s %s' % (interval[0], interval[1], unit) 222 223 224# Finds the best unit to represent values in the range [min_value, max_value]. 225# The units must be specified as an ordered list [multiplier1, ..., multiplierN] 226def find_best_unit(units: List[float], min_value: float, max_value: float) -> float: 227 assert min_value <= max_value 228 if max_value <= units[0]: 229 return units[0] 230 for i in range(len(units) - 1): 231 if min_value > units[i] and max_value < units[i + 1]: 232 return units[i] 233 if min_value > units[-1]: 234 return units[-1] 235 # There is no unit that works very well for all values, first let's try relaxing the min constraint 236 for i in range(len(units) - 1): 237 if min_value > units[i] * 0.2 and max_value < units[i + 1]: 238 return units[i] 239 if min_value > units[-1] * 0.2: 240 return units[-1] 241 # That didn't work either, just use a unit that works well for the min values then 242 for i in reversed(range(len(units))): 243 if min_value > units[i]: 244 return units[i] 245 assert min_value <= min(units) 246 # Pick the smallest unit 247 return units[0] 248 249 250def time_interval_pretty_printer(time_interval: Interval, min_in_table: float, max_in_table: float) -> str: 251 sec = 1 252 milli = 0.001 253 micro = milli * milli 254 units = [micro, milli, sec] 255 unit_name_by_unit = {micro: 'μs', milli: 'ms', sec: 's'} 256 257 unit = find_best_unit(units, min_in_table, max_in_table) 258 unit_name = unit_name_by_unit[unit] 259 260 return interval_pretty_printer(time_interval, unit=unit_name, multiplier=1 / unit) 261 262 263def file_size_interval_pretty_printer(file_size_interval: Interval, min_in_table: float, max_in_table: float) -> str: 264 byte = 1 265 kb = 1024 266 mb = kb * kb 267 units = [byte, kb, mb] 268 unit_name_by_unit = {byte: 'bytes', kb: 'KB', mb: 'MB'} 269 270 unit = find_best_unit(units, min_in_table, max_in_table) 271 unit_name = unit_name_by_unit[unit] 272 273 return interval_pretty_printer(file_size_interval, unit=unit_name, multiplier=1 / unit) 274 275 276def make_immutable(x): 277 if isinstance(x, list): 278 return tuple(make_immutable(elem) for elem in x) 279 return x 280 281 282def dict_pretty_printer(dict_data: List[Dict[str, Union[str, Tuple[str]]]]) -> Callable[[Union[str, Tuple[str]]], str]: 283 if isinstance(dict_data, list): 284 dict_data = {make_immutable(mapping['from']): mapping['to'] for mapping in dict_data} 285 286 def pretty_print(s: Union[str, Tuple[str]]) -> str: 287 if s in dict_data: 288 return dict_data[s] 289 else: 290 raise Exception('dict_pretty_printer(%s) can\'t handle the value %s' % (dict_data, s)) 291 292 return pretty_print 293 294 295 296def determine_column_pretty_printer(pretty_printer_definition: Dict[str, Any]) -> DimensionPrettyPrinter: 297 if 'format_string' in pretty_printer_definition: 298 return format_string_pretty_printer(pretty_printer_definition['format_string']) 299 300 if 'fixed_map' in pretty_printer_definition: 301 return dict_pretty_printer(pretty_printer_definition['fixed_map']) 302 303 raise Exception("Unrecognized pretty printer description: %s" % pretty_printer_definition) 304 305def determine_row_pretty_printer(pretty_printer_definition: Dict[str, Any]) -> DimensionPrettyPrinter: 306 return determine_column_pretty_printer(pretty_printer_definition) 307 308def determine_row_sort_key(pretty_printer_definition: Dict[str, Any]) -> Callable[[Any], Any]: 309 if 'fixed_map' in pretty_printer_definition: 310 indexes = {x: i for i, x in enumerate(pretty_printer_definition['fixed_map'].keys())} 311 return lambda s: indexes[s] 312 313 return lambda x: x 314 315def determine_value_pretty_printer(unit: str) -> IntervalPrettyPrinter: 316 if unit == "seconds": 317 return time_interval_pretty_printer 318 if unit == "bytes": 319 return file_size_interval_pretty_printer 320 raise Exception("Unrecognized unit: %s" % unit) 321 322def main(): 323 parser = argparse.ArgumentParser(description='Runs all the benchmarks whose results are on the Fruit website.') 324 parser.add_argument('--benchmark-results', 325 help='The input file where benchmark results will be read from (1 per line, with each line in JSON format). You can use the run_benchmarks.py to run a benchmark and generate results in this format.') 326 parser.add_argument('--baseline-benchmark-results', 327 help='Optional. If specified, compares this file (considered the "before" state) with the one specified in --benchmark-results.') 328 parser.add_argument('--benchmark-tables-definition', help='The YAML file that defines the benchmark tables (e.g. fruit_wiki_bench_tables.yaml).') 329 args = parser.parse_args() 330 331 if args.benchmark_results is None: 332 raise Exception("You must specify a benchmark results file using --benchmark-results.") 333 334 if args.benchmark_tables_definition is None: 335 raise Exception("You must specify a benchmark tables definition file using --benchmark-tables-definition.") 336 337 with open(args.benchmark_results, 'r') as f: 338 bench_results = [json.loads(line) for line in f.readlines()] 339 340 if args.baseline_benchmark_results: 341 with open(args.baseline_benchmark_results, 'r') as f: 342 baseline_bench_results = [json.loads(line) for line in f.readlines()] 343 else: 344 baseline_bench_results = None 345 346 with open(args.benchmark_tables_definition, 'r') as f: 347 used_bench_results = set() 348 # Set of (Benchmark definition, Benchmark result name) pairs 349 used_bench_result_values = set() 350 config = yaml.full_load(f) 351 for table_definition in config["tables"]: 352 try: 353 fixed_benchmark_params = {dimension_name: make_immutable(dimension_value) for dimension_name, dimension_value in table_definition['benchmark_filter'].items()} 354 table_data, last_used_bench_results, last_used_bench_result_values = extract_results( 355 bench_results, 356 fixed_benchmark_params=fixed_benchmark_params, 357 column_dimension=table_definition['columns']['dimension'], 358 row_dimension=table_definition['rows']['dimension'], 359 result_dimension=table_definition['results']['dimension']) 360 used_bench_results = used_bench_results.union(last_used_bench_results) 361 used_bench_result_values = used_bench_result_values.union(last_used_bench_result_values) 362 if baseline_bench_results: 363 baseline_table_data, _, _ = extract_results( 364 baseline_bench_results, 365 fixed_benchmark_params=fixed_benchmark_params, 366 column_dimension=table_definition['columns']['dimension'], 367 row_dimension=table_definition['rows']['dimension'], 368 result_dimension=table_definition['results']['dimension']) 369 else: 370 baseline_table_data = None 371 rows_pretty_printer_definition = table_definition['rows']['pretty_printer'] 372 columns_pretty_printer_definition = table_definition['columns']['pretty_printer'] 373 results_unit = table_definition['results']['unit'] 374 print_confidence_intervals_table(table_definition['name'], 375 table_data, 376 baseline_table_data, 377 column_header_pretty_printer=determine_column_pretty_printer(columns_pretty_printer_definition), 378 row_header_pretty_printer=determine_row_pretty_printer(rows_pretty_printer_definition), 379 value_pretty_printer=determine_value_pretty_printer(results_unit), 380 row_sort_key=determine_row_sort_key(rows_pretty_printer_definition)) 381 print() 382 print() 383 except Exception as e: 384 print('While processing table:\n%s' % table_definition) 385 print() 386 raise e 387 allowed_unused_benchmarks = set(config.get('allowed_unused_benchmarks', [])) 388 allowed_unused_benchmark_results = set(config.get('allowed_unused_benchmark_results', [])) 389 for bench_result in bench_results: 390 params = {dimension_name: make_immutable(dimension_value) 391 for dimension_name, dimension_value in bench_result['benchmark'].items()} 392 benchmark_defn = tuple(sorted(params.items())) 393 if benchmark_defn not in used_bench_results: 394 if params['name'] not in allowed_unused_benchmarks: 395 print('Warning: benchmark result did not match any tables: %s' % params) 396 else: 397 unused_result_dimensions = {result_dimension 398 for result_dimension in bench_result['results'].keys() 399 if (benchmark_defn, result_dimension) not in used_bench_result_values and result_dimension not in allowed_unused_benchmark_results} 400 if unused_result_dimensions: 401 print('Warning: unused result dimensions %s in benchmark result %s' % (unused_result_dimensions, params)) 402 403 404if __name__ == "__main__": 405 main() 406