• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Copyright 2017 gRPC authors.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#     http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16""" Computes the diff between two bm runs and outputs significant results """
17
18import argparse
19import collections
20import json
21import os
22import subprocess
23import sys
24
25sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), ".."))
26
27import bm_constants
28import bm_json
29import bm_speedup
30import tabulate
31
32verbose = False
33
34
35def _median(ary):
36    assert len(ary)
37    ary = sorted(ary)
38    n = len(ary)
39    if n % 2 == 0:
40        return (ary[(n - 1) // 2] + ary[(n - 1) // 2 + 1]) / 2.0
41    else:
42        return ary[n // 2]
43
44
45def _args():
46    argp = argparse.ArgumentParser(
47        description="Perform diff on microbenchmarks"
48    )
49    argp.add_argument(
50        "-t",
51        "--track",
52        choices=sorted(bm_constants._INTERESTING),
53        nargs="+",
54        default=sorted(bm_constants._INTERESTING),
55        help="Which metrics to track",
56    )
57    argp.add_argument(
58        "-b",
59        "--benchmarks",
60        nargs="+",
61        choices=bm_constants._AVAILABLE_BENCHMARK_TESTS,
62        default=bm_constants._AVAILABLE_BENCHMARK_TESTS,
63        help="Which benchmarks to run",
64    )
65    argp.add_argument(
66        "-l",
67        "--loops",
68        type=int,
69        default=20,
70        help=(
71            "Number of times to loops the benchmarks. Must match what was"
72            " passed to bm_run.py"
73        ),
74    )
75    argp.add_argument(
76        "-r",
77        "--regex",
78        type=str,
79        default="",
80        help="Regex to filter benchmarks run",
81    )
82    argp.add_argument("-n", "--new", type=str, help="New benchmark name")
83    argp.add_argument("-o", "--old", type=str, help="Old benchmark name")
84    argp.add_argument(
85        "-v", "--verbose", type=bool, help="Print details of before/after"
86    )
87    args = argp.parse_args()
88    global verbose
89    if args.verbose:
90        verbose = True
91    assert args.new
92    assert args.old
93    return args
94
95
96def _maybe_print(str):
97    if verbose:
98        print(str)
99
100
101class Benchmark:
102    def __init__(self):
103        self.samples = {
104            True: collections.defaultdict(list),
105            False: collections.defaultdict(list),
106        }
107        self.final = {}
108        self.speedup = {}
109
110    def add_sample(self, track, data, new):
111        for f in track:
112            if f in data:
113                self.samples[new][f].append(float(data[f]))
114
115    def process(self, track, new_name, old_name):
116        for f in sorted(track):
117            new = self.samples[True][f]
118            old = self.samples[False][f]
119            if not new or not old:
120                continue
121            mdn_diff = abs(_median(new) - _median(old))
122            _maybe_print(
123                "%s: %s=%r %s=%r mdn_diff=%r"
124                % (f, new_name, new, old_name, old, mdn_diff)
125            )
126            s = bm_speedup.speedup(new, old, 1e-5)
127            self.speedup[f] = s
128            if abs(s) > 3:
129                if mdn_diff > 0.5:
130                    self.final[f] = "%+d%%" % s
131        return self.final.keys()
132
133    def skip(self):
134        return not self.final
135
136    def row(self, flds):
137        return [self.final[f] if f in self.final else "" for f in flds]
138
139    def speedup(self, name):
140        if name in self.speedup:
141            return self.speedup[name]
142        return None
143
144
145def _read_json(filename, badjson_files, nonexistant_files):
146    stripped = ".".join(filename.split(".")[:-2])
147    try:
148        with open(filename) as f:
149            r = f.read()
150            return json.loads(r)
151    except IOError as e:
152        if stripped in nonexistant_files:
153            nonexistant_files[stripped] += 1
154        else:
155            nonexistant_files[stripped] = 1
156        return None
157    except ValueError as e:
158        print(r)
159        if stripped in badjson_files:
160            badjson_files[stripped] += 1
161        else:
162            badjson_files[stripped] = 1
163        return None
164
165
166def fmt_dict(d):
167    return "".join(["    " + k + ": " + str(d[k]) + "\n" for k in d])
168
169
170def diff(bms, loops, regex, track, old, new):
171    benchmarks = collections.defaultdict(Benchmark)
172
173    badjson_files = {}
174    nonexistant_files = {}
175    for bm in bms:
176        for loop in range(0, loops):
177            for line in subprocess.check_output(
178                [
179                    "bm_diff_%s/opt/%s" % (old, bm),
180                    "--benchmark_list_tests",
181                    "--benchmark_filter=%s" % regex,
182                ]
183            ).splitlines():
184                line = line.decode("UTF-8")
185                stripped_line = (
186                    line.strip()
187                    .replace("/", "_")
188                    .replace("<", "_")
189                    .replace(">", "_")
190                    .replace(", ", "_")
191                )
192                js_new_opt = _read_json(
193                    "%s.%s.opt.%s.%d.json" % (bm, stripped_line, new, loop),
194                    badjson_files,
195                    nonexistant_files,
196                )
197                js_old_opt = _read_json(
198                    "%s.%s.opt.%s.%d.json" % (bm, stripped_line, old, loop),
199                    badjson_files,
200                    nonexistant_files,
201                )
202                if js_new_opt:
203                    for row in bm_json.expand_json(js_new_opt):
204                        name = row["cpp_name"]
205                        if name.endswith("_mean") or name.endswith("_stddev"):
206                            continue
207                        benchmarks[name].add_sample(track, row, True)
208                if js_old_opt:
209                    for row in bm_json.expand_json(js_old_opt):
210                        name = row["cpp_name"]
211                        if name.endswith("_mean") or name.endswith("_stddev"):
212                            continue
213                        benchmarks[name].add_sample(track, row, False)
214
215    really_interesting = set()
216    for name, bm in benchmarks.items():
217        _maybe_print(name)
218        really_interesting.update(bm.process(track, new, old))
219    fields = [f for f in track if f in really_interesting]
220
221    # figure out the significance of the changes... right now we take the 95%-ile
222    # benchmark delta %-age, and then apply some hand chosen thresholds
223    histogram = []
224    _NOISY = ["BM_WellFlushed"]
225    for name, bm in benchmarks.items():
226        if name in _NOISY:
227            print(
228                "skipping noisy benchmark '%s' for labelling evaluation" % name
229            )
230        if bm.skip():
231            continue
232        d = bm.speedup["cpu_time"]
233        if d is None:
234            continue
235        histogram.append(d)
236    histogram.sort()
237    print("histogram of speedups: ", histogram)
238    if len(histogram) == 0:
239        significance = 0
240    else:
241        delta = histogram[int(len(histogram) * 0.95)]
242        mul = 1
243        if delta < 0:
244            delta = -delta
245            mul = -1
246        if delta < 2:
247            significance = 0
248        elif delta < 5:
249            significance = 1
250        elif delta < 10:
251            significance = 2
252        else:
253            significance = 3
254        significance *= mul
255
256    headers = ["Benchmark"] + fields
257    rows = []
258    for name in sorted(benchmarks.keys()):
259        if benchmarks[name].skip():
260            continue
261        rows.append([name] + benchmarks[name].row(fields))
262    note = None
263    if len(badjson_files):
264        note = (
265            "Corrupt JSON data (indicates timeout or crash): \n%s"
266            % fmt_dict(badjson_files)
267        )
268    if len(nonexistant_files):
269        if note:
270            note += (
271                "\n\nMissing files (indicates new benchmark): \n%s"
272                % fmt_dict(nonexistant_files)
273            )
274        else:
275            note = (
276                "\n\nMissing files (indicates new benchmark): \n%s"
277                % fmt_dict(nonexistant_files)
278            )
279    if rows:
280        return (
281            tabulate.tabulate(rows, headers=headers, floatfmt="+.2f"),
282            note,
283            significance,
284        )
285    else:
286        return None, note, 0
287
288
289if __name__ == "__main__":
290    args = _args()
291    diff, note = diff(
292        args.benchmarks,
293        args.loops,
294        args.regex,
295        args.track,
296        args.old,
297        args.new,
298        args.counters,
299    )
300    print("%s\n%s" % (note, diff if diff else "No performance differences"))
301