1import operator_benchmark as op_bench 2 3import torch 4 5 6"""Microbenchmarks for remainder operators.""" 7 8 9# Benchmark ops performance with broadcast 10remainder_ops_list = op_bench.op_list( 11 attr_names=["op_name", "op_func"], 12 attrs=[ 13 ["fmod", torch.fmod], 14 ["remainder", torch.remainder], 15 ], 16) 17 18remainder_short_configs = op_bench.config_list( 19 attr_names=["M", "N", "K"], 20 attrs=[ 21 [1, 1, 1], 22 [64, 64, 64], 23 [64, 64, 128], 24 ], 25 cross_product_configs={ 26 "device": ["cpu", "cuda"], 27 "dtype": [torch.int32, torch.float, torch.double], 28 }, 29 tags=["short"], 30) 31 32remainder_long_configs = op_bench.cross_product_configs( 33 M=[8, 128], 34 N=[32, 64], 35 K=[256, 512], 36 device=["cpu", "cuda"], 37 dtype=[torch.int32, torch.float, torch.double], 38 tags=["long"], 39) 40 41 42class RemainderOpBenchmark(op_bench.TorchBenchmarkBase): 43 def init(self, M, N, K, device, dtype, op_func): 44 self.dividend = torch.rand(M, N, K, device=device) 45 self.dividend = (self.dividend * 1000 - 500).to(dtype=dtype) 46 47 self.divisor = torch.rand(M, N, K, device=device) 48 # +1 so we don't divide by zero 49 self.divisor = (self.divisor * 40 + 1).to(dtype=dtype) 50 51 self.inputs = {"dividend": self.dividend, "divisor": self.divisor} 52 53 self.op_func = op_func 54 55 def forward(self, dividend, divisor): 56 return self.op_func(dividend, divisor) 57 58 59op_bench.generate_pt_tests_from_op_list( 60 remainder_ops_list, 61 remainder_short_configs + remainder_long_configs, 62 RemainderOpBenchmark, 63) 64 65 66if __name__ == "__main__": 67 op_bench.benchmark_runner.main() 68