1# mypy: allow-untyped-defs 2"""Example use of Timer and op fuzzers to measure kernel performance. 3 4$ python -m examples.op_benchmark 5""" 6 7import numpy as np 8import torch 9 10from torch.utils.benchmark import Timer 11from torch.utils.benchmark.op_fuzzers.binary import BinaryOpFuzzer 12from torch.utils.benchmark.op_fuzzers.unary import UnaryOpFuzzer 13import operator 14 15 16_MEASURE_TIME = 1.0 17 18 19def assert_dicts_equal(dict_0, dict_1): 20 """Builtin dict comparison will not compare numpy arrays. 21 e.g. 22 x = {"a": np.ones((2, 1))} 23 x == x # Raises ValueError 24 """ 25 assert set(dict_0.keys()) == set(dict_0.keys()) 26 assert all(np.all(v == dict_1[k]) for k, v in dict_0.items() if k != "dtype") 27 28 29def run(n, stmt, fuzzer_cls): 30 float_iter = fuzzer_cls(seed=0, dtype=torch.float32).take(n) 31 int_iter = fuzzer_cls(seed=0, dtype=torch.int32).take(n) 32 raw_results = [] 33 for i, (float_values, int_values) in enumerate(zip(float_iter, int_iter)): 34 float_tensors, float_tensor_params, float_params = float_values 35 int_tensors, int_tensor_params, int_params = int_values 36 37 # This benchmark assumes that the two fuzzers generate identically 38 # sized and strided Tensors, since the same seed is used. 39 assert_dicts_equal(float_params, int_params) 40 assert_dicts_equal(float_tensor_params["x"], int_tensor_params["x"]) 41 42 float_measurement, int_measurement = ( 43 Timer( 44 stmt, 45 globals=tensors, 46 ).blocked_autorange(min_run_time=_MEASURE_TIME) 47 for tensors in (float_tensors, int_tensors) 48 ) 49 50 descriptions = [] 51 for name in float_tensors: 52 shape_str = "(" + ", ".join([ 53 f"2 ** {int(np.log2(i))}" 54 if 2 ** int(np.log2(i)) == i and i > 1 55 else str(i) 56 for i in float_tensors[name].shape 57 ]) + ")" 58 order = float_tensor_params[name]["order"] 59 order_str = ("" if all(order == np.arange(len(order))) else str(tuple(order))) 60 steps = float_tensor_params[name]["steps"] 61 steps_str = str(steps) if sum(steps) > len(steps) else "" 62 descriptions.append((name, shape_str, order_str, steps_str)) 63 raw_results.append((float_measurement, int_measurement, descriptions)) 64 65 print(f"\r{i + 1} / {n}", end="") 66 print() 67 68 parsed_results, name_len, shape_len, order_len, steps_len = [], 0, 0, 0, 0 69 for float_measurement, int_measurement, descriptions in raw_results: 70 t_float = float_measurement.median * 1e6 71 t_int = int_measurement.median * 1e6 72 rel_diff = abs(t_float - t_int) / (t_float + t_int) * 2 73 parsed_results.append((t_float, t_int, rel_diff, descriptions)) 74 for name, shape, order, steps in descriptions: 75 name_len = max(name_len, len(name)) 76 shape_len = max(shape_len, len(shape)) 77 order_len = max(order_len, len(order)) 78 steps_len = max(steps_len, len(steps)) 79 80 parsed_results.sort(key=operator.itemgetter(2)) 81 82 print(f"stmt: {stmt}") 83 print(f" diff faster{'':>17}{' ' * name_len} ", end="") 84 print(f"{'shape'.ljust(shape_len)}{'':>16}{'order'.ljust(order_len)}", end="") 85 print(f" steps\n{'-' * 100}") 86 for results, spacer in [(parsed_results[:10], "..."), (parsed_results[-10:], "")]: 87 for t_float, t_int, rel_diff, descriptions in results: 88 time_str = [f"{rel_diff * 100:>4.1f}% {'int' if t_int < t_float else 'float':<20}"] 89 time_str.extend(["".ljust(len(time_str[0])) for _ in descriptions[:-1]]) 90 for t_str, (name, shape, order, steps) in zip(time_str, descriptions): 91 name = f"{name}:".ljust(name_len + 1) 92 shape = shape.ljust(shape_len + 10) 93 order = order.ljust(order_len) 94 print(f"{t_str} {name} {shape}| {order} | {steps}") 95 print(spacer) 96 97 98def main(): 99 run(n=100, stmt="torch.median(x, dim=0)", fuzzer_cls=UnaryOpFuzzer) 100 run(n=100, stmt="torch.square(x)", fuzzer_cls=UnaryOpFuzzer) 101 run(n=100, stmt="x + y", fuzzer_cls=BinaryOpFuzzer) 102 103 104if __name__ == "__main__": 105 main() 106