• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# Copyright 2016 Google Inc. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS-IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16import argparse
17import re
18import textwrap
19from collections import defaultdict
20from timeit import default_timer as timer
21import tempfile
22import os
23import shutil
24import itertools
25import numpy
26import subprocess
27import yaml
28from numpy import floor, log10
29import scipy
30import multiprocessing
31import sh
32import json
33import statsmodels.stats.api as stats
34from generate_benchmark import generate_benchmark
35import git
36from functools import lru_cache as memoize
37
38class CommandFailedException(Exception):
39    def __init__(self, command, stdout, stderr, error_code):
40        self.command = command
41        self.stdout = stdout
42        self.stderr = stderr
43        self.error_code = error_code
44
45    def __str__(self):
46        return textwrap.dedent('''\
47        Ran command: {command}
48        Exit code {error_code}
49        Stdout:
50        {stdout}
51
52        Stderr:
53        {stderr}
54        ''').format(command=self.command, error_code=self.error_code, stdout=self.stdout, stderr=self.stderr)
55
56def run_command(executable, args=[], cwd=None, env=None):
57    args = [str(arg) for arg in args]
58    command = [executable] + args
59    try:
60        p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, cwd=cwd,
61                             env=env)
62        (stdout, stderr) = p.communicate()
63    except Exception as e:
64        raise Exception("While executing: %s" % command)
65    if p.returncode != 0:
66        raise CommandFailedException(command, stdout, stderr, p.returncode)
67    return (stdout, stderr)
68
69compile_flags = ['-O2', '-DNDEBUG']
70
71make_args = ['-j', multiprocessing.cpu_count() + 1]
72
73def parse_results(result_lines):
74    """
75     Parses results from the format:
76     ['Dimension name1        = 123',
77      'Long dimension name2   = 23.45']
78
79     Into a dict {'Dimension name1': 123.0, 'Dimension name2': 23.45}
80    """
81    result_dict = dict()
82    for line in result_lines:
83        line_splits = line.split('=')
84        metric = line_splits[0].strip()
85        value = float(line_splits[1].strip())
86        result_dict[metric] = value
87    return result_dict
88
89
90# We memoize the result since this might be called repeatedly and it's somewhat expensive.
91@memoize(maxsize=None)
92def determine_compiler_name(compiler_executable_name):
93    tmpdir = tempfile.gettempdir() + '/fruit-determine-compiler-version-dir'
94    ensure_empty_dir(tmpdir)
95    with open(tmpdir + '/CMakeLists.txt', 'w') as file:
96        file.write('message("@@@${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}@@@")\n')
97    modified_env = os.environ.copy()
98    modified_env['CXX'] = compiler_executable_name
99    # By converting to a list, we force all output to be read (so the command execution is guaranteed to be complete after this line).
100    # Otherwise, subsequent calls to determine_compiler_name might have trouble deleting the temporary directory because the cmake
101    # process is still writing files in there.
102    _, stderr = run_command('cmake', args=['.'], cwd=tmpdir, env=modified_env)
103    cmake_output = stderr.splitlines()
104    for line in cmake_output:
105        re_result = re.search('@@@(.*)@@@', line)
106        if re_result:
107            pretty_name = re_result.group(1)
108            # CMake calls GCC 'GNU', change it into 'GCC'.
109            return pretty_name.replace('GNU ', 'GCC ')
110    raise Exception('Unable to determine compiler. CMake output was: \n', cmake_output)
111
112
113# Returns a pair (sha256_hash, version_name), where version_name will be None if no version tag was found at HEAD.
114@memoize(maxsize=None)
115def git_repo_info(repo_path):
116    repo = git.Repo(repo_path)
117    head_tags = [tag.name for tag in repo.tags if tag.commit == repo.head.commit and re.match('v[0-9].*', tag.name)]
118    if head_tags == []:
119        head_tag = None
120    else:
121        # There should be only 1 version at any given commit.
122        [head_tag] = head_tags
123        # Remove the 'v' prefix.
124        head_tag = head_tag[1:]
125    return (repo.head.commit.hexsha, head_tag)
126
127
128# Some benchmark parameters, e.g. 'compiler_name' are synthesized automatically from other dimensions (e.g. 'compiler' dimension) or from the environment.
129# We put the compiler name/version in the results because the same 'compiler' value might refer to different compiler versions
130# (e.g. if GCC 6.0.0 is installed when benchmarks are run, then it's updated to GCC 6.0.1 and finally the results are formatted, we
131# want the formatted results to say "GCC 6.0.0" instead of "GCC 6.0.1").
132def add_synthetic_benchmark_parameters(original_benchmark_parameters, path_to_code_under_test):
133    benchmark_params = original_benchmark_parameters.copy()
134    benchmark_params['compiler_name'] = determine_compiler_name(original_benchmark_parameters['compiler'])
135    if path_to_code_under_test is not None:
136        sha256_hash, version_name = git_repo_info(path_to_code_under_test)
137        benchmark_params['di_library_git_commit_hash'] = sha256_hash
138        if version_name is not None:
139            benchmark_params['di_library_version_name'] = version_name
140    return benchmark_params
141
142
143class SimpleNewDeleteRunTimeBenchmark:
144    def __init__(self, benchmark_definition, fruit_benchmark_sources_dir):
145        self.benchmark_definition = add_synthetic_benchmark_parameters(benchmark_definition, path_to_code_under_test=None)
146        self.fruit_benchmark_sources_dir = fruit_benchmark_sources_dir
147
148    def prepare(self):
149        cxx_std = self.benchmark_definition['cxx_std']
150        num_classes = self.benchmark_definition['num_classes']
151        compiler_executable_name = self.benchmark_definition['compiler']
152
153        self.tmpdir = tempfile.gettempdir() + '/fruit-benchmark-dir'
154        ensure_empty_dir(self.tmpdir)
155        run_command(compiler_executable_name,
156                    args=compile_flags + [
157                        '-std=%s' % cxx_std,
158                        '-DMULTIPLIER=%s' % num_classes,
159                        self.fruit_benchmark_sources_dir + '/extras/benchmark/new_delete_benchmark.cpp',
160                        '-o',
161                        self.tmpdir + '/main',
162                    ])
163
164    def run(self):
165        loop_factor = self.benchmark_definition['loop_factor']
166        stdout, _ = run_command(self.tmpdir + '/main', args = [int(5000000 * loop_factor)])
167        return parse_results(stdout.splitlines())
168
169    def describe(self):
170        return self.benchmark_definition
171
172
173class FruitSingleFileCompileTimeBenchmark:
174    def __init__(self, benchmark_definition, fruit_sources_dir, fruit_build_dir, fruit_benchmark_sources_dir):
175        self.benchmark_definition = add_synthetic_benchmark_parameters(benchmark_definition, path_to_code_under_test=fruit_sources_dir)
176        self.fruit_sources_dir = fruit_sources_dir
177        self.fruit_build_dir = fruit_build_dir
178        self.fruit_benchmark_sources_dir = fruit_benchmark_sources_dir
179        num_bindings = self.benchmark_definition['num_bindings']
180        assert (num_bindings % 5) == 0, num_bindings
181
182    def prepare(self):
183        pass
184
185    def run(self):
186        start = timer()
187        cxx_std = self.benchmark_definition['cxx_std']
188        num_bindings = self.benchmark_definition['num_bindings']
189        compiler_executable_name = self.benchmark_definition['compiler']
190
191        run_command(compiler_executable_name,
192                    args = compile_flags + [
193                        '-std=%s' % cxx_std,
194                        '-DMULTIPLIER=%s' % (num_bindings // 5),
195                        '-I', self.fruit_sources_dir + '/include',
196                        '-I', self.fruit_build_dir + '/include',
197                        '-ftemplate-depth=1000',
198                        '-c',
199                        self.fruit_benchmark_sources_dir + '/extras/benchmark/compile_time_benchmark.cpp',
200                        '-o',
201                        '/dev/null',
202                    ])
203        end = timer()
204        return {"compile_time": end - start}
205
206    def describe(self):
207        return self.benchmark_definition
208
209
210def ensure_empty_dir(dirname):
211    # We start by creating the directory instead of just calling rmtree with ignore_errors=True because that would ignore
212    # all errors, so we might otherwise go ahead even if the directory wasn't properly deleted.
213    os.makedirs(dirname, exist_ok=True)
214    shutil.rmtree(dirname)
215    os.makedirs(dirname)
216
217
218class GenericGeneratedSourcesBenchmark:
219    def __init__(self,
220                 di_library,
221                 benchmark_definition,
222                 path_to_code_under_test=None,
223                 **other_args):
224        self.di_library = di_library
225        self.benchmark_definition = add_synthetic_benchmark_parameters(benchmark_definition, path_to_code_under_test=path_to_code_under_test)
226        self.other_args = other_args
227        self.arbitrary_file = None
228
229    def prepare_compile_benchmark(self):
230        num_classes = self.benchmark_definition['num_classes']
231        cxx_std = self.benchmark_definition['cxx_std']
232        compiler_executable_name = self.benchmark_definition['compiler']
233        benchmark_generation_flags = {flag_name: True for flag_name in self.benchmark_definition['benchmark_generation_flags']}
234
235        self.tmpdir = tempfile.gettempdir() + '/fruit-benchmark-dir'
236        ensure_empty_dir(self.tmpdir)
237        num_classes_with_no_deps = int(num_classes * 0.1)
238        return generate_benchmark(
239            compiler=compiler_executable_name,
240            num_components_with_no_deps=num_classes_with_no_deps,
241            num_components_with_deps=num_classes - num_classes_with_no_deps,
242            num_deps=10,
243            output_dir=self.tmpdir,
244            cxx_std=cxx_std,
245            di_library=self.di_library,
246            **benchmark_generation_flags,
247            **self.other_args)
248
249    def run_make_build(self):
250        run_command('make', args=make_args, cwd=self.tmpdir)
251
252    def prepare_incremental_compile_benchmark(self):
253        files = self.prepare_compile_benchmark()
254        self.run_make_build()
255        files = list(sorted(file for file in files if file.endswith('.h')))
256        # 5 files, equally spaced (but not at beginning/end) in the sorted sequence.
257        num_files_changed = 5
258        self.arbitrary_files = [files[i * (len(files) // (num_files_changed + 2))]
259                                for i in range(1, num_files_changed + 1)]
260
261    def prepare_runtime_benchmark(self):
262        self.prepare_compile_benchmark()
263        self.run_make_build()
264
265    def prepare_startup_benchmark(self):
266        self.prepare_compile_benchmark()
267        self.run_make_build()
268        run_command('strip', args=[self.tmpdir + '/main'])
269
270    def prepare_executable_size_benchmark(self):
271        self.prepare_runtime_benchmark()
272        run_command('strip', args=[self.tmpdir + '/main'])
273
274    def run_compile_benchmark(self):
275        run_command('make',
276                    args=make_args + ['clean'],
277                    cwd=self.tmpdir)
278        start = timer()
279        self.run_make_build()
280        end = timer()
281        result = {'compile_time': end - start}
282        return result
283
284    def run_incremental_compile_benchmark(self):
285        run_command('touch', args=self.arbitrary_files, cwd=self.tmpdir)
286        start = timer()
287        self.run_make_build()
288        end = timer()
289        result = {'incremental_compile_time': end - start}
290        return result
291
292    def run_runtime_benchmark(self):
293        num_classes = self.benchmark_definition['num_classes']
294        loop_factor = self.benchmark_definition['loop_factor']
295
296        results, _ = run_command(self.tmpdir + '/main',
297                                 args = [
298                                     # 40M loops with 100 classes, 40M with 1000
299                                     int(4 * 1000 * 1000 * 1000 * loop_factor / num_classes),
300                                 ])
301        return parse_results(results.splitlines())
302
303    def run_startup_benchmark(self):
304        N=1000
305        start = timer()
306        for i in range(0, N):
307            run_command(self.tmpdir + '/main', args = [])
308        end = timer()
309        result = {'startup_time': (end - start) / N}
310        return result
311
312    def run_executable_size_benchmark(self):
313        wc_result, _ = run_command('wc', args=['-c', self.tmpdir + '/main'])
314        num_bytes = wc_result.splitlines()[0].split(' ')[0]
315        return {'num_bytes': float(num_bytes)}
316
317    def describe(self):
318        return self.benchmark_definition
319
320
321class CompileTimeBenchmark(GenericGeneratedSourcesBenchmark):
322    def __init__(self, **kwargs):
323        super().__init__(generate_runtime_bench_code=False,
324                         **kwargs)
325
326    def prepare(self):
327        self.prepare_compile_benchmark()
328
329    def run(self):
330        return self.run_compile_benchmark()
331
332class IncrementalCompileTimeBenchmark(GenericGeneratedSourcesBenchmark):
333    def __init__(self, **kwargs):
334        super().__init__(generate_runtime_bench_code=False,
335                         **kwargs)
336
337    def prepare(self):
338        self.prepare_incremental_compile_benchmark()
339
340    def run(self):
341        return self.run_incremental_compile_benchmark()
342
343class StartupTimeBenchmark(GenericGeneratedSourcesBenchmark):
344    def __init__(self, **kwargs):
345        super().__init__(generate_runtime_bench_code=False,
346                         **kwargs)
347
348    def prepare(self):
349        self.prepare_startup_benchmark()
350
351    def run(self):
352        return self.run_startup_benchmark()
353
354class RunTimeBenchmark(GenericGeneratedSourcesBenchmark):
355    def __init__(self, **kwargs):
356        super().__init__(generate_runtime_bench_code=True,
357                         **kwargs)
358
359    def prepare(self):
360        self.prepare_runtime_benchmark()
361
362    def run(self):
363        return self.run_runtime_benchmark()
364
365# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure.
366class ExecutableSizeBenchmark(GenericGeneratedSourcesBenchmark):
367    def __init__(self, **kwargs):
368        super().__init__(generate_runtime_bench_code=False,
369                         **kwargs)
370
371    def prepare(self):
372        self.prepare_executable_size_benchmark()
373
374    def run(self):
375        return self.run_executable_size_benchmark()
376
377# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure.
378class ExecutableSizeBenchmarkWithoutExceptionsAndRtti(ExecutableSizeBenchmark):
379    def __init__(self, **kwargs):
380        super().__init__(use_exceptions=False,
381                         use_rtti=False,
382                         **kwargs)
383
384class FruitCompileTimeBenchmark(CompileTimeBenchmark):
385    def __init__(self, fruit_sources_dir, **kwargs):
386        super().__init__(di_library='fruit',
387                         path_to_code_under_test=fruit_sources_dir,
388                         fruit_sources_dir=fruit_sources_dir,
389                         **kwargs)
390
391class FruitIncrementalCompileTimeBenchmark(IncrementalCompileTimeBenchmark):
392    def __init__(self, fruit_sources_dir, **kwargs):
393        super().__init__(di_library='fruit',
394                         path_to_code_under_test=fruit_sources_dir,
395                         fruit_sources_dir=fruit_sources_dir,
396                         **kwargs)
397
398class FruitRunTimeBenchmark(RunTimeBenchmark):
399    def __init__(self, fruit_sources_dir, **kwargs):
400        super().__init__(di_library='fruit',
401                         path_to_code_under_test=fruit_sources_dir,
402                         fruit_sources_dir=fruit_sources_dir,
403                         **kwargs)
404
405class FruitStartupTimeBenchmark(StartupTimeBenchmark):
406    def __init__(self, fruit_sources_dir, **kwargs):
407        super().__init__(di_library='fruit',
408                         path_to_code_under_test=fruit_sources_dir,
409                         fruit_sources_dir=fruit_sources_dir,
410                         **kwargs)
411
412class FruitStartupTimeWithNormalizedComponentBenchmark(FruitStartupTimeBenchmark):
413    def __init__(self, **kwargs):
414        super().__init__(use_normalized_component=True,
415                         **kwargs)
416
417# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure.
418class FruitExecutableSizeBenchmark(ExecutableSizeBenchmark):
419    def __init__(self, fruit_sources_dir, **kwargs):
420        super().__init__(di_library='fruit',
421                         path_to_code_under_test=fruit_sources_dir,
422                         fruit_sources_dir=fruit_sources_dir,
423                         **kwargs)
424
425# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure.
426class FruitExecutableSizeBenchmarkWithoutExceptionsAndRtti(ExecutableSizeBenchmarkWithoutExceptionsAndRtti):
427    def __init__(self, fruit_sources_dir, **kwargs):
428        super().__init__(di_library='fruit',
429                         path_to_code_under_test=fruit_sources_dir,
430                         fruit_sources_dir=fruit_sources_dir,
431                         **kwargs)
432
433class BoostDiCompileTimeBenchmark(CompileTimeBenchmark):
434    def __init__(self, boost_di_sources_dir, **kwargs):
435        super().__init__(di_library='boost_di',
436                         path_to_code_under_test=boost_di_sources_dir,
437                         boost_di_sources_dir=boost_di_sources_dir,
438                         **kwargs)
439
440class BoostDiIncrementalCompileTimeBenchmark(IncrementalCompileTimeBenchmark):
441    def __init__(self, boost_di_sources_dir, **kwargs):
442        super().__init__(di_library='boost_di',
443                         path_to_code_under_test=boost_di_sources_dir,
444                         boost_di_sources_dir=boost_di_sources_dir,
445                         **kwargs)
446
447class BoostDiRunTimeBenchmark(RunTimeBenchmark):
448    def __init__(self, boost_di_sources_dir, **kwargs):
449        super().__init__(di_library='boost_di',
450                         path_to_code_under_test=boost_di_sources_dir,
451                         boost_di_sources_dir=boost_di_sources_dir,
452                         **kwargs)
453
454class BoostDiStartupTimeBenchmark(StartupTimeBenchmark):
455    def __init__(self, boost_di_sources_dir, **kwargs):
456        super().__init__(di_library='boost_di',
457                         path_to_code_under_test=boost_di_sources_dir,
458                         boost_di_sources_dir=boost_di_sources_dir,
459                         **kwargs)
460
461# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure.
462class BoostDiExecutableSizeBenchmark(ExecutableSizeBenchmark):
463    def __init__(self, boost_di_sources_dir, **kwargs):
464        super().__init__(di_library='boost_di',
465                         path_to_code_under_test=boost_di_sources_dir,
466                         boost_di_sources_dir=boost_di_sources_dir,
467                         **kwargs)
468
469# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure.
470class BoostDiExecutableSizeBenchmarkWithoutExceptionsAndRtti(ExecutableSizeBenchmarkWithoutExceptionsAndRtti):
471    def __init__(self, boost_di_sources_dir, **kwargs):
472        super().__init__(di_library='boost_di',
473                         path_to_code_under_test=boost_di_sources_dir,
474                         boost_di_sources_dir=boost_di_sources_dir,
475                         **kwargs)
476
477class SimpleDiCompileTimeBenchmark(CompileTimeBenchmark):
478    def __init__(self, **kwargs):
479        super().__init__(di_library='none',
480                         **kwargs)
481
482class SimpleDiIncrementalCompileTimeBenchmark(IncrementalCompileTimeBenchmark):
483    def __init__(self, **kwargs):
484        super().__init__(di_library='none',
485                         **kwargs)
486
487class SimpleDiRunTimeBenchmark(RunTimeBenchmark):
488    def __init__(self, **kwargs):
489        super().__init__(di_library='none',
490                         **kwargs)
491
492class SimpleDiStartupTimeBenchmark(StartupTimeBenchmark):
493    def __init__(self, **kwargs):
494        super().__init__(di_library='none',
495                         **kwargs)
496
497# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure.
498class SimpleDiExecutableSizeBenchmark(ExecutableSizeBenchmark):
499    def __init__(self, **kwargs):
500        super().__init__(di_library='none',
501                         **kwargs)
502
503# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure.
504class SimpleDiExecutableSizeBenchmarkWithoutExceptionsAndRtti(ExecutableSizeBenchmarkWithoutExceptionsAndRtti):
505    def __init__(self, **kwargs):
506        super().__init__(di_library='none',
507                         **kwargs)
508
509class SimpleDiWithInterfacesCompileTimeBenchmark(SimpleDiCompileTimeBenchmark):
510    def __init__(self, **kwargs):
511        super().__init__(use_interfaces=True, **kwargs)
512
513class SimpleDiWithInterfacesIncrementalCompileTimeBenchmark(SimpleDiIncrementalCompileTimeBenchmark):
514    def __init__(self, **kwargs):
515        super().__init__(use_interfaces=True, **kwargs)
516
517class SimpleDiWithInterfacesRunTimeBenchmark(SimpleDiRunTimeBenchmark):
518    def __init__(self, **kwargs):
519        super().__init__(use_interfaces=True, **kwargs)
520
521class SimpleDiWithInterfacesStartupTimeBenchmark(SimpleDiStartupTimeBenchmark):
522    def __init__(self, **kwargs):
523        super().__init__(use_interfaces=True, **kwargs)
524
525# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure.
526class SimpleDiWithInterfacesExecutableSizeBenchmark(SimpleDiExecutableSizeBenchmark):
527    def __init__(self, **kwargs):
528        super().__init__(use_interfaces=True, **kwargs)
529
530# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure.
531class SimpleDiWithInterfacesExecutableSizeBenchmarkWithoutExceptionsAndRtti(SimpleDiExecutableSizeBenchmarkWithoutExceptionsAndRtti):
532    def __init__(self, **kwargs):
533        super().__init__(use_interfaces=True, **kwargs)
534
535class SimpleDiWithInterfacesAndNewDeleteCompileTimeBenchmark(SimpleDiWithInterfacesCompileTimeBenchmark):
536    def __init__(self, **kwargs):
537        super().__init__(use_new_delete=True, **kwargs)
538
539class SimpleDiWithInterfacesAndNewDeleteIncrementalCompileTimeBenchmark(SimpleDiWithInterfacesIncrementalCompileTimeBenchmark):
540    def __init__(self, **kwargs):
541        super().__init__(use_new_delete=True, **kwargs)
542
543class SimpleDiWithInterfacesAndNewDeleteRunTimeBenchmark(SimpleDiWithInterfacesRunTimeBenchmark):
544    def __init__(self, **kwargs):
545        super().__init__(use_new_delete=True, **kwargs)
546
547class SimpleDiWithInterfacesAndNewDeleteStartupTimeBenchmark(SimpleDiWithInterfacesStartupTimeBenchmark):
548    def __init__(self, **kwargs):
549        super().__init__(use_new_delete=True, **kwargs)
550
551# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure.
552class SimpleDiWithInterfacesAndNewDeleteExecutableSizeBenchmark(SimpleDiWithInterfacesExecutableSizeBenchmark):
553    def __init__(self, **kwargs):
554        super().__init__(use_new_delete=True, **kwargs)
555
556# This is not really a 'benchmark', but we consider it as such to reuse the benchmark infrastructure.
557class SimpleDiWithInterfacesAndNewDeleteExecutableSizeBenchmarkWithoutExceptionsAndRtti(SimpleDiWithInterfacesExecutableSizeBenchmarkWithoutExceptionsAndRtti):
558    def __init__(self, **kwargs):
559        super().__init__(use_new_delete=True, **kwargs)
560
561
562def round_to_significant_digits(n, num_significant_digits):
563    if n <= 0:
564        # We special-case this, otherwise the log10 below will fail.
565        return 0
566    return round(n, num_significant_digits - int(floor(log10(n))) - 1)
567
568def run_benchmark(benchmark, max_runs, timeout_hours, output_file, min_runs=3):
569    def run_benchmark_once():
570        print('Running benchmark... ', end='', flush=True)
571        result = benchmark.run()
572        print(result)
573        for dimension, value in result.items():
574            results_by_dimension[dimension] += [value]
575
576    results_by_dimension = defaultdict(lambda: [])
577    print('Preparing for benchmark... ', end='', flush=True)
578    benchmark.prepare()
579    print('Done.')
580
581    start_time = timer()
582
583    # Run at least min_runs times
584    for i in range(min_runs):
585        run_benchmark_once()
586
587    # Then consider running a few more times to get the desired precision.
588    while True:
589        if timer() - start_time > timeout_hours * 3600:
590            print("Warning: timed out, couldn't determine a result with the desired precision.")
591            break
592
593        for dimension, results in results_by_dimension.items():
594            if all(result == results[0] for result in results):
595                # If all results are exactly the same the code below misbehaves. We don't need to run again in this case.
596                continue
597            confidence_interval = stats.DescrStatsW(results).tconfint_mean(0.05)
598            confidence_interval_2dig = (round_to_significant_digits(confidence_interval[0], 2),
599                                        round_to_significant_digits(confidence_interval[1], 2))
600            if abs(confidence_interval_2dig[0] - confidence_interval_2dig[1]) > numpy.finfo(float).eps * 10:
601                if len(results) < max_runs:
602                    print("Running again to get more precision on the metric %s. Current confidence interval: [%.3g, %.3g]" % (
603                    dimension, confidence_interval[0], confidence_interval[1]))
604                    break
605                else:
606                    print("Warning: couldn't determine a precise result for the metric %s. Confidence interval: [%.3g, %.3g]" % (
607                    dimension, confidence_interval[0], confidence_interval[1]))
608        else:
609            # We've reached sufficient precision in all metrics, or we've reached the max number of runs.
610            break
611
612        run_benchmark_once()
613
614    # We've reached the desired precision in all dimensions or reached the maximum number of runs. Record the results.
615    rounded_confidence_intervals_by_dimension = {}
616    confidence_intervals_by_dimension = {}
617    for dimension, results in results_by_dimension.items():
618        confidence_interval = stats.DescrStatsW(results).tconfint_mean(0.05)
619        confidence_interval_2dig = (round_to_significant_digits(confidence_interval[0], 2),
620                                    round_to_significant_digits(confidence_interval[1], 2))
621        rounded_confidence_intervals_by_dimension[dimension] = confidence_interval_2dig
622        confidence_intervals_by_dimension[dimension] = (confidence_interval, confidence_interval_2dig)
623    with open(output_file, 'a') as f:
624        json.dump({"benchmark": benchmark.describe(), "results": confidence_intervals_by_dimension}, f)
625        print(file=f)
626    print('Benchmark finished. Result: ', rounded_confidence_intervals_by_dimension)
627    print()
628
629
630def expand_benchmark_definition(benchmark_definition):
631    """
632    Takes a benchmark definition, e.g.:
633    [{name: 'foo', compiler: ['g++-5', 'g++-6']},
634     {name: ['bar', 'baz'], compiler: ['g++-5'], cxx_std: 'c++14'}]
635
636    And expands it into the individual benchmarks to run, in the example above:
637    [{name: 'foo', compiler: 'g++-5'},
638     {name: 'foo', compiler: 'g++-6'},
639     {name: 'bar', compiler: 'g++-5', cxx_std: 'c++14'},
640     {name: 'baz', compiler: 'g++-5', cxx_std: 'c++14'}]
641    """
642    dict_keys = sorted(benchmark_definition.keys())
643    # Turn non-list values into single-item lists.
644    benchmark_definition = {dict_key: value if isinstance(value, list)
645    else [value]
646                            for dict_key, value in benchmark_definition.items()}
647    # Compute the cartesian product of the value lists
648    value_combinations = itertools.product(*(benchmark_definition[dict_key] for dict_key in dict_keys))
649    # Then turn the result back into a dict.
650    return [dict(zip(dict_keys, value_combination))
651            for value_combination in value_combinations]
652
653
654def expand_benchmark_definitions(benchmark_definitions):
655    return list(itertools.chain(*[expand_benchmark_definition(benchmark_definition) for benchmark_definition in benchmark_definitions]))
656
657def group_by(l, element_to_key):
658    """Takes a list and returns a dict of sublists, where the elements are grouped using the provided function"""
659    result = defaultdict(list)
660    for elem in l:
661        result[element_to_key(elem)].append(elem)
662    return result.items()
663
664def main():
665    # This configures numpy/scipy to raise an exception in case of errors, instead of printing a warning and going ahead.
666    numpy.seterr(all='raise')
667    scipy.seterr(all='raise')
668
669    parser = argparse.ArgumentParser(description='Runs a set of benchmarks defined in a YAML file.')
670    parser.add_argument('--fruit-benchmark-sources-dir', help='Path to the fruit sources (used for benchmarking code only)')
671    parser.add_argument('--fruit-sources-dir', help='Path to the fruit sources')
672    parser.add_argument('--boost-di-sources-dir', help='Path to the Boost.DI sources')
673    parser.add_argument('--output-file',
674                        help='The output file where benchmark results will be stored (1 per line, with each line in JSON format). These can then be formatted by e.g. the format_bench_results script.')
675    parser.add_argument('--benchmark-definition', help='The YAML file that defines the benchmarks (see fruit_wiki_benchs_fruit.yml for an example).')
676    parser.add_argument('--continue-benchmark', help='If this is \'true\', continues a previous benchmark run instead of starting from scratch (taking into account the existing benchmark results in the file specified with --output-file).')
677    args = parser.parse_args()
678
679    if args.output_file is None:
680        raise Exception('You must specify --output_file')
681    if args.continue_benchmark == 'true':
682        try:
683            with open(args.output_file, 'r') as f:
684                previous_run_completed_benchmarks = [json.loads(line)['benchmark'] for line in f.readlines()]
685        except FileNotFoundError:
686            previous_run_completed_benchmarks = []
687    else:
688        previous_run_completed_benchmarks = []
689        run_command('rm', args=['-f', args.output_file])
690
691    fruit_build_dir = tempfile.gettempdir() + '/fruit-benchmark-build-dir'
692
693    with open(args.benchmark_definition, 'r') as f:
694        yaml_file_content = yaml.load(f)
695        global_definitions = yaml_file_content['global']
696        benchmark_definitions = expand_benchmark_definitions(yaml_file_content['benchmarks'])
697
698    benchmark_index = 0
699
700    for (compiler_executable_name, additional_cmake_args), benchmark_definitions_with_current_config \
701            in group_by(benchmark_definitions,
702                        lambda benchmark_definition:
703                            (benchmark_definition['compiler'], tuple(benchmark_definition['additional_cmake_args']))):
704
705        print('Preparing for benchmarks with the compiler %s, with additional CMake args %s' % (compiler_executable_name, additional_cmake_args))
706        # We compute this here (and memoize the result) so that the benchmark's describe() will retrieve the cached
707        # value instantly.
708        determine_compiler_name(compiler_executable_name)
709
710        # Build Fruit in fruit_build_dir, so that fruit_build_dir points to a built Fruit (useful for e.g. the config header).
711        shutil.rmtree(fruit_build_dir, ignore_errors=True)
712        os.makedirs(fruit_build_dir)
713        modified_env = os.environ.copy()
714        modified_env['CXX'] = compiler_executable_name
715        run_command('cmake',
716                    args=[
717                        args.fruit_sources_dir,
718                        '-DCMAKE_BUILD_TYPE=Release',
719                        *additional_cmake_args,
720                    ],
721                    cwd=fruit_build_dir,
722                    env=modified_env)
723        run_command('make', args=make_args, cwd=fruit_build_dir)
724
725        for benchmark_definition in benchmark_definitions_with_current_config:
726            benchmark_index += 1
727            print('%s/%s: %s' % (benchmark_index, len(benchmark_definitions), benchmark_definition))
728            benchmark_name = benchmark_definition['name']
729
730            if (benchmark_name in {'boost_di_compile_time', 'boost_di_run_time', 'boost_di_executable_size'}
731                and args.boost_di_sources_dir is None):
732                raise Exception('Error: you need to specify the --boost-di-sources-dir flag in order to run Boost.DI benchmarks.')
733
734            if benchmark_name == 'new_delete_run_time':
735                benchmark = SimpleNewDeleteRunTimeBenchmark(
736                    benchmark_definition,
737                    fruit_benchmark_sources_dir=args.fruit_benchmark_sources_dir)
738            elif benchmark_name == 'fruit_single_file_compile_time':
739                benchmark = FruitSingleFileCompileTimeBenchmark(
740                    benchmark_definition,
741                    fruit_sources_dir=args.fruit_sources_dir,
742                    fruit_benchmark_sources_dir=args.fruit_benchmark_sources_dir,
743                    fruit_build_dir=fruit_build_dir)
744            elif benchmark_name.startswith('fruit_'):
745                benchmark_class = {
746                    'fruit_compile_time': FruitCompileTimeBenchmark,
747                    'fruit_incremental_compile_time': FruitIncrementalCompileTimeBenchmark,
748                    'fruit_run_time': FruitRunTimeBenchmark,
749                    'fruit_startup_time': FruitStartupTimeBenchmark,
750                    'fruit_startup_time_with_normalized_component': FruitStartupTimeWithNormalizedComponentBenchmark,
751                    'fruit_executable_size': FruitExecutableSizeBenchmark,
752                    'fruit_executable_size_without_exceptions_and_rtti': FruitExecutableSizeBenchmarkWithoutExceptionsAndRtti,
753                }[benchmark_name]
754                benchmark = benchmark_class(
755                    benchmark_definition=benchmark_definition,
756                    fruit_sources_dir=args.fruit_sources_dir,
757                    fruit_build_dir=fruit_build_dir)
758            elif benchmark_name.startswith('boost_di_'):
759                benchmark_class = {
760                    'boost_di_compile_time': BoostDiCompileTimeBenchmark,
761                    'boost_di_incremental_compile_time': BoostDiIncrementalCompileTimeBenchmark,
762                    'boost_di_run_time': BoostDiRunTimeBenchmark,
763                    'boost_di_startup_time': BoostDiStartupTimeBenchmark,
764                    'boost_di_executable_size': BoostDiExecutableSizeBenchmark,
765                    'boost_di_executable_size_without_exceptions_and_rtti': BoostDiExecutableSizeBenchmarkWithoutExceptionsAndRtti,
766                }[benchmark_name]
767                benchmark = benchmark_class(
768                    benchmark_definition=benchmark_definition,
769                    boost_di_sources_dir=args.boost_di_sources_dir)
770            elif benchmark_name.startswith('simple_di_'):
771                benchmark_class = {
772                    'simple_di_compile_time': SimpleDiCompileTimeBenchmark,
773                    'simple_di_incremental_compile_time': SimpleDiIncrementalCompileTimeBenchmark,
774                    'simple_di_run_time': SimpleDiRunTimeBenchmark,
775                    'simple_di_startup_time': SimpleDiStartupTimeBenchmark,
776                    'simple_di_executable_size': SimpleDiExecutableSizeBenchmark,
777                    'simple_di_executable_size_without_exceptions_and_rtti': SimpleDiExecutableSizeBenchmarkWithoutExceptionsAndRtti,
778                    'simple_di_with_interfaces_compile_time': SimpleDiWithInterfacesCompileTimeBenchmark,
779                    'simple_di_with_interfaces_incremental_compile_time': SimpleDiWithInterfacesIncrementalCompileTimeBenchmark,
780                    'simple_di_with_interfaces_run_time': SimpleDiWithInterfacesRunTimeBenchmark,
781                    'simple_di_with_interfaces_startup_time': SimpleDiWithInterfacesStartupTimeBenchmark,
782                    'simple_di_with_interfaces_executable_size': SimpleDiWithInterfacesExecutableSizeBenchmark,
783                    'simple_di_with_interfaces_executable_size_without_exceptions_and_rtti': SimpleDiWithInterfacesExecutableSizeBenchmarkWithoutExceptionsAndRtti,
784                    'simple_di_with_interfaces_and_new_delete_compile_time': SimpleDiWithInterfacesAndNewDeleteCompileTimeBenchmark,
785                    'simple_di_with_interfaces_and_new_delete_incremental_compile_time': SimpleDiWithInterfacesAndNewDeleteIncrementalCompileTimeBenchmark,
786                    'simple_di_with_interfaces_and_new_delete_run_time': SimpleDiWithInterfacesAndNewDeleteRunTimeBenchmark,
787                    'simple_di_with_interfaces_and_new_delete_startup_time': SimpleDiWithInterfacesAndNewDeleteStartupTimeBenchmark,
788                    'simple_di_with_interfaces_and_new_delete_executable_size': SimpleDiWithInterfacesAndNewDeleteExecutableSizeBenchmark,
789                    'simple_di_with_interfaces_and_new_delete_executable_size_without_exceptions_and_rtti': SimpleDiWithInterfacesAndNewDeleteExecutableSizeBenchmarkWithoutExceptionsAndRtti,
790                }[benchmark_name]
791                benchmark = benchmark_class(
792                    benchmark_definition=benchmark_definition)
793            else:
794                raise Exception("Unrecognized benchmark: %s" % benchmark_name)
795
796            if benchmark.describe() in previous_run_completed_benchmarks:
797                print("Skipping benchmark that was already run previously (due to --continue-benchmark):", benchmark.describe())
798                continue
799
800            run_benchmark(benchmark,
801                          output_file=args.output_file,
802                          max_runs=global_definitions['max_runs'],
803                          timeout_hours=global_definitions['max_hours_per_combination'])
804
805
806if __name__ == "__main__":
807    main()
808