1#!/usr/bin/env python3 2# Copyright 2016 Google Inc. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS-IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16import argparse 17import re 18import textwrap 19from collections import defaultdict 20from timeit import default_timer as timer 21import tempfile 22import os 23import shutil 24import itertools 25import numpy 26import subprocess 27import yaml 28from numpy import floor, log10 29import scipy 30import multiprocessing 31import sh 32import json 33import statsmodels.stats.api as stats 34from generate_benchmark import generate_benchmark 35import git 36from functools import lru_cache as memoize 37 38class CommandFailedException(Exception): 39 def __init__(self, command, stdout, stderr, error_code): 40 self.command = command 41 self.stdout = stdout 42 self.stderr = stderr 43 self.error_code = error_code 44 45 def __str__(self): 46 return textwrap.dedent('''\ 47 Ran command: {command} 48 Exit code {error_code} 49 Stdout: 50 {stdout} 51 52 Stderr: 53 {stderr} 54 ''').format(command=self.command, error_code=self.error_code, stdout=self.stdout, stderr=self.stderr) 55 56def run_command(executable, args=[], cwd=None, env=None): 57 args = [str(arg) for arg in args] 58 command = [executable] + args 59 try: 60 p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, cwd=cwd, 61 env=env) 62 (stdout, stderr) = p.communicate() 63 except Exception as e: 64 raise Exception("While executing: %s" % command) 65 if p.returncode != 0: 66 raise CommandFailedException(command, stdout, stderr, p.returncode) 67 return (stdout, stderr) 68 69compile_flags = ['-O2', '-DNDEBUG'] 70 71make_args = ['-j', multiprocessing.cpu_count() + 1] 72 73def parse_results(result_lines): 74 """ 75 Parses results from the format: 76 ['Dimension name1 = 123', 77 'Long dimension name2 = 23.45'] 78 79 Into a dict {'Dimension name1': 123.0, 'Dimension name2': 23.45} 80 """ 81 result_dict = dict() 82 for line in result_lines: 83 line_splits = line.split('=') 84 metric = line_splits[0].strip() 85 value = float(line_splits[1].strip()) 86 result_dict[metric] = value 87 return result_dict 88 89 90# We memoize the result since this might be called repeatedly and it's somewhat expensive. 91@memoize(maxsize=None) 92def determine_compiler_name(compiler_executable_name): 93 tmpdir = tempfile.gettempdir() + '/fruit-determine-compiler-version-dir' 94 ensure_empty_dir(tmpdir) 95 with open(tmpdir + '/CMakeLists.txt', 'w') as file: 96 file.write('message("@@@${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}@@@")\n') 97 modified_env = os.environ.copy() 98 modified_env['CXX'] = compiler_executable_name 99 # By converting to a list, we force all output to be read (so the command execution is guaranteed to be complete after this line). 100 # Otherwise, subsequent calls to determine_compiler_name might have trouble deleting the temporary directory because the cmake 101 # process is still writing files in there. 102 _, stderr = run_command('cmake', args=['.'], cwd=tmpdir, env=modified_env) 103 cmake_output = stderr.splitlines() 104 for line in cmake_output: 105 re_result = re.search('@@@(.*)@@@', line) 106 if re_result: 107 pretty_name = re_result.group(1) 108 # CMake calls GCC 'GNU', change it into 'GCC'. 109 return pretty_name.replace('GNU ', 'GCC ') 110 raise Exception('Unable to determine compiler. CMake output was: \n', cmake_output) 111 112 113# Returns a pair (sha256_hash, version_name), where version_name will be None if no version tag was found at HEAD. 114@memoize(maxsize=None) 115def git_repo_info(repo_path): 116 repo = git.Repo(repo_path) 117 head_tags = [tag.name for tag in repo.tags if tag.commit == repo.head.commit and re.match('v[0-9].*', tag.name)] 118 if head_tags == []: 119 head_tag = None 120 else: 121 # There should be only 1 version at any given commit. 122 [head_tag] = head_tags 123 # Remove the 'v' prefix. 124 head_tag = head_tag[1:] 125 return (repo.head.commit.hexsha, head_tag) 126 127 128# Some benchmark parameters, e.g. 'compiler_name' are synthesized automatically from other dimensions (e.g. 'compiler' dimension) or from the environment. 129# We put the compiler name/version in the results because the same 'compiler' value might refer to different compiler versions 130# (e.g. if GCC 6.0.0 is installed when benchmarks are run, then it's updated to GCC 6.0.1 and finally the results are formatted, we 131# want the formatted results to say "GCC 6.0.0" instead of "GCC 6.0.1"). 132def add_synthetic_benchmark_parameters(original_benchmark_parameters, path_to_code_under_test): 133 benchmark_params = original_benchmark_parameters.copy() 134 benchmark_params['compiler_name'] = determine_compiler_name(original_benchmark_parameters['compiler']) 135 if path_to_code_under_test is not None: 136 sha256_hash, version_name = git_repo_info(path_to_code_under_test) 137 benchmark_params['di_library_git_commit_hash'] = sha256_hash 138 if version_name is not None: 139 benchmark_params['di_library_version_name'] = version_name 140 return benchmark_params 141 142 143class SimpleNewDeleteRunTimeBenchmark: 144 def __init__(self, benchmark_definition, fruit_benchmark_sources_dir): 145 self.benchmark_definition = add_synthetic_benchmark_parameters(benchmark_definition, path_to_code_under_test=None) 146 self.fruit_benchmark_sources_dir = fruit_benchmark_sources_dir 147 148 def prepare(self): 149 cxx_std = self.benchmark_definition['cxx_std'] 150 num_classes = self.benchmark_definition['num_classes'] 151 compiler_executable_name = self.benchmark_definition['compiler'] 152 153 self.tmpdir = tempfile.gettempdir() + '/fruit-benchmark-dir' 154 ensure_empty_dir(self.tmpdir) 155 run_command(compiler_executable_name, 156 args=compile_flags + [ 157 '-std=%s' % cxx_std, 158 '-DMULTIPLIER=%s' % num_classes, 159 self.fruit_benchmark_sources_dir + '/extras/benchmark/new_delete_benchmark.cpp', 160 '-o', 161 self.tmpdir + '/main', 162 ]) 163 164 def run(self): 165 loop_factor = self.benchmark_definition['loop_factor'] 166 stdout, _ = run_command(self.tmpdir + '/main', args = [int(5000000 * loop_factor)]) 167 return parse_results(stdout.splitlines()) 168 169 def describe(self): 170 return self.benchmark_definition 171 172 173class FruitSingleFileCompileTimeBenchmark: 174 def __init__(self, benchmark_definition, fruit_sources_dir, fruit_build_dir, fruit_benchmark_sources_dir): 175 self.benchmark_definition = add_synthetic_benchmark_parameters(benchmark_definition, path_to_code_under_test=fruit_sources_dir) 176 self.fruit_sources_dir = fruit_sources_dir 177 self.fruit_build_dir = fruit_build_dir 178 self.fruit_benchmark_sources_dir = fruit_benchmark_sources_dir 179 num_bindings = self.benchmark_definition['num_bindings'] 180 assert (num_bindings % 5) == 0, num_bindings 181 182 def prepare(self): 183 pass 184 185 def run(self): 186 start = timer() 187 cxx_std = self.benchmark_definition['cxx_std'] 188 num_bindings = self.benchmark_definition['num_bindings'] 189 compiler_executable_name = self.benchmark_definition['compiler'] 190 191 run_command(compiler_executable_name, 192 args = compile_flags + [ 193 '-std=%s' % cxx_std, 194 '-DMULTIPLIER=%s' % (num_bindings // 5), 195 '-I', self.fruit_sources_dir + '/include', 196 '-I', self.fruit_build_dir + '/include', 197 '-ftemplate-depth=1000', 198 '-c', 199 self.fruit_benchmark_sources_dir + '/extras/benchmark/compile_time_benchmark.cpp', 200 '-o', 201 '/dev/null', 202 ]) 203 end = timer() 204 return {"compile_time": end - start} 205 206 def describe(self): 207 return self.benchmark_definition 208 209 210def ensure_empty_dir(dirname): 211 # We start by creating the directory instead of just calling rmtree with ignore_errors=True because that would ignore 212 # all errors, so we might otherwise go ahead even if the directory wasn't properly deleted. 213 os.makedirs(dirname, exist_ok=True) 214 shutil.rmtree(dirname) 215 os.makedirs(dirname) 216 217 218class GenericGeneratedSourcesBenchmark: 219 def __init__(self, 220 di_library, 221 benchmark_definition, 222 path_to_code_under_test=None, 223 **other_args): 224 self.di_library = di_library 225 self.benchmark_definition = add_synthetic_benchmark_parameters(benchmark_definition, path_to_code_under_test=path_to_code_under_test) 226 self.other_args = other_args 227 self.arbitrary_file = None 228 229 def prepare_compile_benchmark(self): 230 num_classes = self.benchmark_definition['num_classes'] 231 cxx_std = self.benchmark_definition['cxx_std'] 232 compiler_executable_name = self.benchmark_definition['compiler'] 233 benchmark_generation_flags = {flag_name: True for flag_name in self.benchmark_definition['benchmark_generation_flags']} 234 235 self.tmpdir = tempfile.gettempdir() + '/fruit-benchmark-dir' 236 ensure_empty_dir(self.tmpdir) 237 num_classes_with_no_deps = int(num_classes * 0.1) 238 return generate_benchmark( 239 compiler=compiler_executable_name, 240 num_components_with_no_deps=num_classes_with_no_deps, 241 num_components_with_deps=num_classes - num_classes_with_no_deps, 242 num_deps=10, 243 output_dir=self.tmpdir, 244 cxx_std=cxx_std, 245 di_library=self.di_library, 246 **benchmark_generation_flags, 247 **self.other_args) 248 249 def run_make_build(self): 250 run_command('make', args=make_args, cwd=self.tmpdir) 251 252 def prepare_incremental_compile_benchmark(self): 253 files = self.prepare_compile_benchmark() 254 self.run_make_build() 255 files = list(sorted(file for file in files if file.endswith('.h'))) 256 # 5 files, equally spaced (but not at beginning/end) in the sorted sequence. 257 num_files_changed = 5 258 self.arbitrary_files = [files[i * (len(files) // (num_files_changed + 2))] 259 for i in range(1, num_files_changed + 1)] 260 261 def prepare_runtime_benchmark(self): 262 self.prepare_compile_benchmark() 263 self.run_make_build() 264 265 def prepare_startup_benchmark(self): 266 self.prepare_compile_benchmark() 267 self.run_make_build() 268 run_command('strip', args=[self.tmpdir + '/main']) 269 270 def prepare_executable_size_benchmark(self): 271 self.prepare_runtime_benchmark() 272 run_command('strip', args=[self.tmpdir + '/main']) 273 274 def run_compile_benchmark(self): 275 run_command('make', 276 args=make_args + ['clean'], 277 cwd=self.tmpdir) 278 start = timer() 279 self.run_make_build() 280 end = timer() 281 result = {'compile_time': end - start} 282 return result 283 284 def run_incremental_compile_benchmark(self): 285 run_command('touch', args=self.arbitrary_files, cwd=self.tmpdir) 286 start = timer() 287 self.run_make_build() 288 end = timer() 289 result = {'incremental_compile_time': end - start} 290 return result 291 292 def run_runtime_benchmark(self): 293 num_classes = self.benchmark_definition['num_classes'] 294 loop_factor = self.benchmark_definition['loop_factor'] 295 296 results, _ = run_command(self.tmpdir + '/main', 297 args = [ 298 # 40M loops with 100 classes, 40M with 1000 299 int(4 * 1000 * 1000 * 1000 * loop_factor / num_classes), 300 ]) 301 return parse_results(results.splitlines()) 302 303 def run_startup_benchmark(self): 304 N=1000 305 start = timer() 306 for i in range(0, N): 307 run_command(self.tmpdir + '/main', args = []) 308 end = timer() 309 result = {'startup_time': (end - start) / N} 310 return result 311 312 def run_executable_size_benchmark(self): 313 wc_result, _ = run_command('wc', args=['-c', self.tmpdir + '/main']) 314 num_bytes = wc_result.splitlines()[0].split(' ')[0] 315 return {'num_bytes': float(num_bytes)} 316 317 def describe(self): 318 return self.benchmark_definition 319 320 321class CompileTimeBenchmark(GenericGeneratedSourcesBenchmark): 322 def __init__(self, **kwargs): 323 super().__init__(generate_runtime_bench_code=False, 324 **kwargs) 325 326 def prepare(self): 327 self.prepare_compile_benchmark() 328 329 def run(self): 330 return self.run_compile_benchmark() 331 332class IncrementalCompileTimeBenchmark(GenericGeneratedSourcesBenchmark): 333 def __init__(self, **kwargs): 334 super().__init__(generate_runtime_bench_code=False, 335 **kwargs) 336 337 def prepare(self): 338 self.prepare_incremental_compile_benchmark() 339 340 def run(self): 341 return self.run_incremental_compile_benchmark() 342 343class StartupTimeBenchmark(GenericGeneratedSourcesBenchmark): 344 def __init__(self, **kwargs): 345 super().__init__(generate_runtime_bench_code=False, 346 **kwargs) 347 348 def prepare(self): 349 self.prepare_startup_benchmark() 350 351 def run(self): 352 return self.run_startup_benchmark() 353 354class RunTimeBenchmark(GenericGeneratedSourcesBenchmark): 355 def __init__(self, **kwargs): 356 super().__init__(generate_runtime_bench_code=True, 357 **kwargs) 358 359 def prepare(self): 360 self.prepare_runtime_benchmark() 361 362 def run(self): 363 return self.run_runtime_benchmark() 364 365# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure. 366class ExecutableSizeBenchmark(GenericGeneratedSourcesBenchmark): 367 def __init__(self, **kwargs): 368 super().__init__(generate_runtime_bench_code=False, 369 **kwargs) 370 371 def prepare(self): 372 self.prepare_executable_size_benchmark() 373 374 def run(self): 375 return self.run_executable_size_benchmark() 376 377# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure. 378class ExecutableSizeBenchmarkWithoutExceptionsAndRtti(ExecutableSizeBenchmark): 379 def __init__(self, **kwargs): 380 super().__init__(use_exceptions=False, 381 use_rtti=False, 382 **kwargs) 383 384class FruitCompileTimeBenchmark(CompileTimeBenchmark): 385 def __init__(self, fruit_sources_dir, **kwargs): 386 super().__init__(di_library='fruit', 387 path_to_code_under_test=fruit_sources_dir, 388 fruit_sources_dir=fruit_sources_dir, 389 **kwargs) 390 391class FruitIncrementalCompileTimeBenchmark(IncrementalCompileTimeBenchmark): 392 def __init__(self, fruit_sources_dir, **kwargs): 393 super().__init__(di_library='fruit', 394 path_to_code_under_test=fruit_sources_dir, 395 fruit_sources_dir=fruit_sources_dir, 396 **kwargs) 397 398class FruitRunTimeBenchmark(RunTimeBenchmark): 399 def __init__(self, fruit_sources_dir, **kwargs): 400 super().__init__(di_library='fruit', 401 path_to_code_under_test=fruit_sources_dir, 402 fruit_sources_dir=fruit_sources_dir, 403 **kwargs) 404 405class FruitStartupTimeBenchmark(StartupTimeBenchmark): 406 def __init__(self, fruit_sources_dir, **kwargs): 407 super().__init__(di_library='fruit', 408 path_to_code_under_test=fruit_sources_dir, 409 fruit_sources_dir=fruit_sources_dir, 410 **kwargs) 411 412class FruitStartupTimeWithNormalizedComponentBenchmark(FruitStartupTimeBenchmark): 413 def __init__(self, **kwargs): 414 super().__init__(use_normalized_component=True, 415 **kwargs) 416 417# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure. 418class FruitExecutableSizeBenchmark(ExecutableSizeBenchmark): 419 def __init__(self, fruit_sources_dir, **kwargs): 420 super().__init__(di_library='fruit', 421 path_to_code_under_test=fruit_sources_dir, 422 fruit_sources_dir=fruit_sources_dir, 423 **kwargs) 424 425# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure. 426class FruitExecutableSizeBenchmarkWithoutExceptionsAndRtti(ExecutableSizeBenchmarkWithoutExceptionsAndRtti): 427 def __init__(self, fruit_sources_dir, **kwargs): 428 super().__init__(di_library='fruit', 429 path_to_code_under_test=fruit_sources_dir, 430 fruit_sources_dir=fruit_sources_dir, 431 **kwargs) 432 433class BoostDiCompileTimeBenchmark(CompileTimeBenchmark): 434 def __init__(self, boost_di_sources_dir, **kwargs): 435 super().__init__(di_library='boost_di', 436 path_to_code_under_test=boost_di_sources_dir, 437 boost_di_sources_dir=boost_di_sources_dir, 438 **kwargs) 439 440class BoostDiIncrementalCompileTimeBenchmark(IncrementalCompileTimeBenchmark): 441 def __init__(self, boost_di_sources_dir, **kwargs): 442 super().__init__(di_library='boost_di', 443 path_to_code_under_test=boost_di_sources_dir, 444 boost_di_sources_dir=boost_di_sources_dir, 445 **kwargs) 446 447class BoostDiRunTimeBenchmark(RunTimeBenchmark): 448 def __init__(self, boost_di_sources_dir, **kwargs): 449 super().__init__(di_library='boost_di', 450 path_to_code_under_test=boost_di_sources_dir, 451 boost_di_sources_dir=boost_di_sources_dir, 452 **kwargs) 453 454class BoostDiStartupTimeBenchmark(StartupTimeBenchmark): 455 def __init__(self, boost_di_sources_dir, **kwargs): 456 super().__init__(di_library='boost_di', 457 path_to_code_under_test=boost_di_sources_dir, 458 boost_di_sources_dir=boost_di_sources_dir, 459 **kwargs) 460 461# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure. 462class BoostDiExecutableSizeBenchmark(ExecutableSizeBenchmark): 463 def __init__(self, boost_di_sources_dir, **kwargs): 464 super().__init__(di_library='boost_di', 465 path_to_code_under_test=boost_di_sources_dir, 466 boost_di_sources_dir=boost_di_sources_dir, 467 **kwargs) 468 469# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure. 470class BoostDiExecutableSizeBenchmarkWithoutExceptionsAndRtti(ExecutableSizeBenchmarkWithoutExceptionsAndRtti): 471 def __init__(self, boost_di_sources_dir, **kwargs): 472 super().__init__(di_library='boost_di', 473 path_to_code_under_test=boost_di_sources_dir, 474 boost_di_sources_dir=boost_di_sources_dir, 475 **kwargs) 476 477class SimpleDiCompileTimeBenchmark(CompileTimeBenchmark): 478 def __init__(self, **kwargs): 479 super().__init__(di_library='none', 480 **kwargs) 481 482class SimpleDiIncrementalCompileTimeBenchmark(IncrementalCompileTimeBenchmark): 483 def __init__(self, **kwargs): 484 super().__init__(di_library='none', 485 **kwargs) 486 487class SimpleDiRunTimeBenchmark(RunTimeBenchmark): 488 def __init__(self, **kwargs): 489 super().__init__(di_library='none', 490 **kwargs) 491 492class SimpleDiStartupTimeBenchmark(StartupTimeBenchmark): 493 def __init__(self, **kwargs): 494 super().__init__(di_library='none', 495 **kwargs) 496 497# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure. 498class SimpleDiExecutableSizeBenchmark(ExecutableSizeBenchmark): 499 def __init__(self, **kwargs): 500 super().__init__(di_library='none', 501 **kwargs) 502 503# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure. 504class SimpleDiExecutableSizeBenchmarkWithoutExceptionsAndRtti(ExecutableSizeBenchmarkWithoutExceptionsAndRtti): 505 def __init__(self, **kwargs): 506 super().__init__(di_library='none', 507 **kwargs) 508 509class SimpleDiWithInterfacesCompileTimeBenchmark(SimpleDiCompileTimeBenchmark): 510 def __init__(self, **kwargs): 511 super().__init__(use_interfaces=True, **kwargs) 512 513class SimpleDiWithInterfacesIncrementalCompileTimeBenchmark(SimpleDiIncrementalCompileTimeBenchmark): 514 def __init__(self, **kwargs): 515 super().__init__(use_interfaces=True, **kwargs) 516 517class SimpleDiWithInterfacesRunTimeBenchmark(SimpleDiRunTimeBenchmark): 518 def __init__(self, **kwargs): 519 super().__init__(use_interfaces=True, **kwargs) 520 521class SimpleDiWithInterfacesStartupTimeBenchmark(SimpleDiStartupTimeBenchmark): 522 def __init__(self, **kwargs): 523 super().__init__(use_interfaces=True, **kwargs) 524 525# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure. 526class SimpleDiWithInterfacesExecutableSizeBenchmark(SimpleDiExecutableSizeBenchmark): 527 def __init__(self, **kwargs): 528 super().__init__(use_interfaces=True, **kwargs) 529 530# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure. 531class SimpleDiWithInterfacesExecutableSizeBenchmarkWithoutExceptionsAndRtti(SimpleDiExecutableSizeBenchmarkWithoutExceptionsAndRtti): 532 def __init__(self, **kwargs): 533 super().__init__(use_interfaces=True, **kwargs) 534 535class SimpleDiWithInterfacesAndNewDeleteCompileTimeBenchmark(SimpleDiWithInterfacesCompileTimeBenchmark): 536 def __init__(self, **kwargs): 537 super().__init__(use_new_delete=True, **kwargs) 538 539class SimpleDiWithInterfacesAndNewDeleteIncrementalCompileTimeBenchmark(SimpleDiWithInterfacesIncrementalCompileTimeBenchmark): 540 def __init__(self, **kwargs): 541 super().__init__(use_new_delete=True, **kwargs) 542 543class SimpleDiWithInterfacesAndNewDeleteRunTimeBenchmark(SimpleDiWithInterfacesRunTimeBenchmark): 544 def __init__(self, **kwargs): 545 super().__init__(use_new_delete=True, **kwargs) 546 547class SimpleDiWithInterfacesAndNewDeleteStartupTimeBenchmark(SimpleDiWithInterfacesStartupTimeBenchmark): 548 def __init__(self, **kwargs): 549 super().__init__(use_new_delete=True, **kwargs) 550 551# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure. 552class SimpleDiWithInterfacesAndNewDeleteExecutableSizeBenchmark(SimpleDiWithInterfacesExecutableSizeBenchmark): 553 def __init__(self, **kwargs): 554 super().__init__(use_new_delete=True, **kwargs) 555 556# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure. 557class SimpleDiWithInterfacesAndNewDeleteExecutableSizeBenchmarkWithoutExceptionsAndRtti(SimpleDiWithInterfacesExecutableSizeBenchmarkWithoutExceptionsAndRtti): 558 def __init__(self, **kwargs): 559 super().__init__(use_new_delete=True, **kwargs) 560 561 562def round_to_significant_digits(n, num_significant_digits): 563 if n <= 0: 564 # We special-case this, otherwise the log10 below will fail. 565 return 0 566 return round(n, num_significant_digits - int(floor(log10(n))) - 1) 567 568def run_benchmark(benchmark, max_runs, timeout_hours, output_file, min_runs=3): 569 def run_benchmark_once(): 570 print('Running benchmark... ', end='', flush=True) 571 result = benchmark.run() 572 print(result) 573 for dimension, value in result.items(): 574 results_by_dimension[dimension] += [value] 575 576 results_by_dimension = defaultdict(lambda: []) 577 print('Preparing for benchmark... ', end='', flush=True) 578 benchmark.prepare() 579 print('Done.') 580 581 start_time = timer() 582 583 # Run at least min_runs times 584 for i in range(min_runs): 585 run_benchmark_once() 586 587 # Then consider running a few more times to get the desired precision. 588 while True: 589 if timer() - start_time > timeout_hours * 3600: 590 print("Warning: timed out, couldn't determine a result with the desired precision.") 591 break 592 593 for dimension, results in results_by_dimension.items(): 594 if all(result == results[0] for result in results): 595 # If all results are exactly the same the code below misbehaves. We don't need to run again in this case. 596 continue 597 confidence_interval = stats.DescrStatsW(results).tconfint_mean(0.05) 598 confidence_interval_2dig = (round_to_significant_digits(confidence_interval[0], 2), 599 round_to_significant_digits(confidence_interval[1], 2)) 600 if abs(confidence_interval_2dig[0] - confidence_interval_2dig[1]) > numpy.finfo(float).eps * 10: 601 if len(results) < max_runs: 602 print("Running again to get more precision on the metric %s. Current confidence interval: [%.3g, %.3g]" % ( 603 dimension, confidence_interval[0], confidence_interval[1])) 604 break 605 else: 606 print("Warning: couldn't determine a precise result for the metric %s. Confidence interval: [%.3g, %.3g]" % ( 607 dimension, confidence_interval[0], confidence_interval[1])) 608 else: 609 # We've reached sufficient precision in all metrics, or we've reached the max number of runs. 610 break 611 612 run_benchmark_once() 613 614 # We've reached the desired precision in all dimensions or reached the maximum number of runs. Record the results. 615 rounded_confidence_intervals_by_dimension = {} 616 confidence_intervals_by_dimension = {} 617 for dimension, results in results_by_dimension.items(): 618 confidence_interval = stats.DescrStatsW(results).tconfint_mean(0.05) 619 confidence_interval_2dig = (round_to_significant_digits(confidence_interval[0], 2), 620 round_to_significant_digits(confidence_interval[1], 2)) 621 rounded_confidence_intervals_by_dimension[dimension] = confidence_interval_2dig 622 confidence_intervals_by_dimension[dimension] = (confidence_interval, confidence_interval_2dig) 623 with open(output_file, 'a') as f: 624 json.dump({"benchmark": benchmark.describe(), "results": confidence_intervals_by_dimension}, f) 625 print(file=f) 626 print('Benchmark finished. Result: ', rounded_confidence_intervals_by_dimension) 627 print() 628 629 630def expand_benchmark_definition(benchmark_definition): 631 """ 632 Takes a benchmark definition, e.g.: 633 [{name: 'foo', compiler: ['g++-5', 'g++-6']}, 634 {name: ['bar', 'baz'], compiler: ['g++-5'], cxx_std: 'c++14'}] 635 636 And expands it into the individual benchmarks to run, in the example above: 637 [{name: 'foo', compiler: 'g++-5'}, 638 {name: 'foo', compiler: 'g++-6'}, 639 {name: 'bar', compiler: 'g++-5', cxx_std: 'c++14'}, 640 {name: 'baz', compiler: 'g++-5', cxx_std: 'c++14'}] 641 """ 642 dict_keys = sorted(benchmark_definition.keys()) 643 # Turn non-list values into single-item lists. 644 benchmark_definition = {dict_key: value if isinstance(value, list) 645 else [value] 646 for dict_key, value in benchmark_definition.items()} 647 # Compute the cartesian product of the value lists 648 value_combinations = itertools.product(*(benchmark_definition[dict_key] for dict_key in dict_keys)) 649 # Then turn the result back into a dict. 650 return [dict(zip(dict_keys, value_combination)) 651 for value_combination in value_combinations] 652 653 654def expand_benchmark_definitions(benchmark_definitions): 655 return list(itertools.chain(*[expand_benchmark_definition(benchmark_definition) for benchmark_definition in benchmark_definitions])) 656 657def group_by(l, element_to_key): 658 """Takes a list and returns a dict of sublists, where the elements are grouped using the provided function""" 659 result = defaultdict(list) 660 for elem in l: 661 result[element_to_key(elem)].append(elem) 662 return result.items() 663 664def main(): 665 # This configures numpy/scipy to raise an exception in case of errors, instead of printing a warning and going ahead. 666 numpy.seterr(all='raise') 667 scipy.seterr(all='raise') 668 669 parser = argparse.ArgumentParser(description='Runs a set of benchmarks defined in a YAML file.') 670 parser.add_argument('--fruit-benchmark-sources-dir', help='Path to the fruit sources (used for benchmarking code only)') 671 parser.add_argument('--fruit-sources-dir', help='Path to the fruit sources') 672 parser.add_argument('--boost-di-sources-dir', help='Path to the Boost.DI sources') 673 parser.add_argument('--output-file', 674 help='The output file where benchmark results will be stored (1 per line, with each line in JSON format). These can then be formatted by e.g. the format_bench_results script.') 675 parser.add_argument('--benchmark-definition', help='The YAML file that defines the benchmarks (see fruit_wiki_benchs_fruit.yml for an example).') 676 parser.add_argument('--continue-benchmark', help='If this is \'true\', continues a previous benchmark run instead of starting from scratch (taking into account the existing benchmark results in the file specified with --output-file).') 677 args = parser.parse_args() 678 679 if args.output_file is None: 680 raise Exception('You must specify --output_file') 681 if args.continue_benchmark == 'true': 682 try: 683 with open(args.output_file, 'r') as f: 684 previous_run_completed_benchmarks = [json.loads(line)['benchmark'] for line in f.readlines()] 685 except FileNotFoundError: 686 previous_run_completed_benchmarks = [] 687 else: 688 previous_run_completed_benchmarks = [] 689 run_command('rm', args=['-f', args.output_file]) 690 691 fruit_build_dir = tempfile.gettempdir() + '/fruit-benchmark-build-dir' 692 693 with open(args.benchmark_definition, 'r') as f: 694 yaml_file_content = yaml.load(f) 695 global_definitions = yaml_file_content['global'] 696 benchmark_definitions = expand_benchmark_definitions(yaml_file_content['benchmarks']) 697 698 benchmark_index = 0 699 700 for (compiler_executable_name, additional_cmake_args), benchmark_definitions_with_current_config \ 701 in group_by(benchmark_definitions, 702 lambda benchmark_definition: 703 (benchmark_definition['compiler'], tuple(benchmark_definition['additional_cmake_args']))): 704 705 print('Preparing for benchmarks with the compiler %s, with additional CMake args %s' % (compiler_executable_name, additional_cmake_args)) 706 # We compute this here (and memoize the result) so that the benchmark's describe() will retrieve the cached 707 # value instantly. 708 determine_compiler_name(compiler_executable_name) 709 710 # Build Fruit in fruit_build_dir, so that fruit_build_dir points to a built Fruit (useful for e.g. the config header). 711 shutil.rmtree(fruit_build_dir, ignore_errors=True) 712 os.makedirs(fruit_build_dir) 713 modified_env = os.environ.copy() 714 modified_env['CXX'] = compiler_executable_name 715 run_command('cmake', 716 args=[ 717 args.fruit_sources_dir, 718 '-DCMAKE_BUILD_TYPE=Release', 719 *additional_cmake_args, 720 ], 721 cwd=fruit_build_dir, 722 env=modified_env) 723 run_command('make', args=make_args, cwd=fruit_build_dir) 724 725 for benchmark_definition in benchmark_definitions_with_current_config: 726 benchmark_index += 1 727 print('%s/%s: %s' % (benchmark_index, len(benchmark_definitions), benchmark_definition)) 728 benchmark_name = benchmark_definition['name'] 729 730 if (benchmark_name in {'boost_di_compile_time', 'boost_di_run_time', 'boost_di_executable_size'} 731 and args.boost_di_sources_dir is None): 732 raise Exception('Error: you need to specify the --boost-di-sources-dir flag in order to run Boost.DI benchmarks.') 733 734 if benchmark_name == 'new_delete_run_time': 735 benchmark = SimpleNewDeleteRunTimeBenchmark( 736 benchmark_definition, 737 fruit_benchmark_sources_dir=args.fruit_benchmark_sources_dir) 738 elif benchmark_name == 'fruit_single_file_compile_time': 739 benchmark = FruitSingleFileCompileTimeBenchmark( 740 benchmark_definition, 741 fruit_sources_dir=args.fruit_sources_dir, 742 fruit_benchmark_sources_dir=args.fruit_benchmark_sources_dir, 743 fruit_build_dir=fruit_build_dir) 744 elif benchmark_name.startswith('fruit_'): 745 benchmark_class = { 746 'fruit_compile_time': FruitCompileTimeBenchmark, 747 'fruit_incremental_compile_time': FruitIncrementalCompileTimeBenchmark, 748 'fruit_run_time': FruitRunTimeBenchmark, 749 'fruit_startup_time': FruitStartupTimeBenchmark, 750 'fruit_startup_time_with_normalized_component': FruitStartupTimeWithNormalizedComponentBenchmark, 751 'fruit_executable_size': FruitExecutableSizeBenchmark, 752 'fruit_executable_size_without_exceptions_and_rtti': FruitExecutableSizeBenchmarkWithoutExceptionsAndRtti, 753 }[benchmark_name] 754 benchmark = benchmark_class( 755 benchmark_definition=benchmark_definition, 756 fruit_sources_dir=args.fruit_sources_dir, 757 fruit_build_dir=fruit_build_dir) 758 elif benchmark_name.startswith('boost_di_'): 759 benchmark_class = { 760 'boost_di_compile_time': BoostDiCompileTimeBenchmark, 761 'boost_di_incremental_compile_time': BoostDiIncrementalCompileTimeBenchmark, 762 'boost_di_run_time': BoostDiRunTimeBenchmark, 763 'boost_di_startup_time': BoostDiStartupTimeBenchmark, 764 'boost_di_executable_size': BoostDiExecutableSizeBenchmark, 765 'boost_di_executable_size_without_exceptions_and_rtti': BoostDiExecutableSizeBenchmarkWithoutExceptionsAndRtti, 766 }[benchmark_name] 767 benchmark = benchmark_class( 768 benchmark_definition=benchmark_definition, 769 boost_di_sources_dir=args.boost_di_sources_dir) 770 elif benchmark_name.startswith('simple_di_'): 771 benchmark_class = { 772 'simple_di_compile_time': SimpleDiCompileTimeBenchmark, 773 'simple_di_incremental_compile_time': SimpleDiIncrementalCompileTimeBenchmark, 774 'simple_di_run_time': SimpleDiRunTimeBenchmark, 775 'simple_di_startup_time': SimpleDiStartupTimeBenchmark, 776 'simple_di_executable_size': SimpleDiExecutableSizeBenchmark, 777 'simple_di_executable_size_without_exceptions_and_rtti': SimpleDiExecutableSizeBenchmarkWithoutExceptionsAndRtti, 778 'simple_di_with_interfaces_compile_time': SimpleDiWithInterfacesCompileTimeBenchmark, 779 'simple_di_with_interfaces_incremental_compile_time': SimpleDiWithInterfacesIncrementalCompileTimeBenchmark, 780 'simple_di_with_interfaces_run_time': SimpleDiWithInterfacesRunTimeBenchmark, 781 'simple_di_with_interfaces_startup_time': SimpleDiWithInterfacesStartupTimeBenchmark, 782 'simple_di_with_interfaces_executable_size': SimpleDiWithInterfacesExecutableSizeBenchmark, 783 'simple_di_with_interfaces_executable_size_without_exceptions_and_rtti': SimpleDiWithInterfacesExecutableSizeBenchmarkWithoutExceptionsAndRtti, 784 'simple_di_with_interfaces_and_new_delete_compile_time': SimpleDiWithInterfacesAndNewDeleteCompileTimeBenchmark, 785 'simple_di_with_interfaces_and_new_delete_incremental_compile_time': SimpleDiWithInterfacesAndNewDeleteIncrementalCompileTimeBenchmark, 786 'simple_di_with_interfaces_and_new_delete_run_time': SimpleDiWithInterfacesAndNewDeleteRunTimeBenchmark, 787 'simple_di_with_interfaces_and_new_delete_startup_time': SimpleDiWithInterfacesAndNewDeleteStartupTimeBenchmark, 788 'simple_di_with_interfaces_and_new_delete_executable_size': SimpleDiWithInterfacesAndNewDeleteExecutableSizeBenchmark, 789 'simple_di_with_interfaces_and_new_delete_executable_size_without_exceptions_and_rtti': SimpleDiWithInterfacesAndNewDeleteExecutableSizeBenchmarkWithoutExceptionsAndRtti, 790 }[benchmark_name] 791 benchmark = benchmark_class( 792 benchmark_definition=benchmark_definition) 793 else: 794 raise Exception("Unrecognized benchmark: %s" % benchmark_name) 795 796 if benchmark.describe() in previous_run_completed_benchmarks: 797 print("Skipping benchmark that was already run previously (due to --continue-benchmark):", benchmark.describe()) 798 continue 799 800 run_benchmark(benchmark, 801 output_file=args.output_file, 802 max_runs=global_definitions['max_runs'], 803 timeout_hours=global_definitions['max_hours_per_combination']) 804 805 806if __name__ == "__main__": 807 main() 808