• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import operator_benchmark as op_bench
2
3import torch
4
5
6"""Microbenchmarks for remainder operators."""
7
8
9# Benchmark ops performance with broadcast
10remainder_ops_list = op_bench.op_list(
11    attr_names=["op_name", "op_func"],
12    attrs=[
13        ["fmod", torch.fmod],
14        ["remainder", torch.remainder],
15    ],
16)
17
18remainder_short_configs = op_bench.config_list(
19    attr_names=["M", "N", "K"],
20    attrs=[
21        [1, 1, 1],
22        [64, 64, 64],
23        [64, 64, 128],
24    ],
25    cross_product_configs={
26        "device": ["cpu", "cuda"],
27        "dtype": [torch.int32, torch.float, torch.double],
28    },
29    tags=["short"],
30)
31
32remainder_long_configs = op_bench.cross_product_configs(
33    M=[8, 128],
34    N=[32, 64],
35    K=[256, 512],
36    device=["cpu", "cuda"],
37    dtype=[torch.int32, torch.float, torch.double],
38    tags=["long"],
39)
40
41
42class RemainderOpBenchmark(op_bench.TorchBenchmarkBase):
43    def init(self, M, N, K, device, dtype, op_func):
44        self.dividend = torch.rand(M, N, K, device=device)
45        self.dividend = (self.dividend * 1000 - 500).to(dtype=dtype)
46
47        self.divisor = torch.rand(M, N, K, device=device)
48        # +1 so we don't divide by zero
49        self.divisor = (self.divisor * 40 + 1).to(dtype=dtype)
50
51        self.inputs = {"dividend": self.dividend, "divisor": self.divisor}
52
53        self.op_func = op_func
54
55    def forward(self, dividend, divisor):
56        return self.op_func(dividend, divisor)
57
58
59op_bench.generate_pt_tests_from_op_list(
60    remainder_ops_list,
61    remainder_short_configs + remainder_long_configs,
62    RemainderOpBenchmark,
63)
64
65
66if __name__ == "__main__":
67    op_bench.benchmark_runner.main()
68