1# Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Benchmarks for autotuning performance knobs.""" 16 17import numpy as np 18 19from tensorflow.python.data.benchmarks import benchmark_base 20from tensorflow.python.data.ops import dataset_ops 21from tensorflow.python.data.ops import options as options_lib 22from tensorflow.python.ops import math_ops 23 24 25class AutotuneBenchmark(benchmark_base.DatasetBenchmarkBase): 26 """Benchmarks for autotuning performance knobs.""" 27 28 def _run_benchmark(self, dataset, autotune, benchmark_iters, benchmark_label, 29 benchmark_id): 30 options = options_lib.Options() 31 options.experimental_optimization.apply_default_optimizations = False 32 options.autotune.enabled = autotune 33 dataset = dataset.with_options(options) 34 35 autotune_string = "_autotune_parallelism_only" 36 wall_time = self.run_and_report_benchmark( 37 dataset=dataset, 38 num_elements=benchmark_iters, 39 warmup=True, 40 iters=1, 41 extras={ 42 "model_name": 43 "autotune.benchmark.%s.%d" % (benchmark_label, benchmark_id), 44 "parameters": 45 "%s" % autotune, 46 }, 47 name=benchmark_label + (autotune_string if autotune else "")) 48 return wall_time 49 50 def benchmark_batch(self): 51 a = self._benchmark_batch(autotune=False, benchmark_id=1) 52 b = self._benchmark_batch(autotune=True, benchmark_id=2) 53 print("autotune parallelism vs no autotuning speedup: {}".format(a / b)) 54 55 def _benchmark_batch(self, autotune, benchmark_id): 56 batch_size = 128 57 k = 1024 58 dataset = dataset_ops.Dataset.from_tensors( 59 (np.random.rand(1, 4 * k), np.random.rand(4 * k, 1))).repeat() 60 dataset = dataset.map(math_ops.matmul) 61 dataset = dataset.batch( 62 batch_size=batch_size, num_parallel_calls=dataset_ops.AUTOTUNE) 63 return self._run_benchmark( 64 dataset=dataset, 65 autotune=autotune, 66 benchmark_iters=10000, 67 benchmark_label="batch", 68 benchmark_id=benchmark_id) 69 70 def benchmark_map(self): 71 a = self._benchmark_map(autotune=False, benchmark_id=1) 72 b = self._benchmark_map(autotune=True, benchmark_id=2) 73 print("autotune parallelism vs no autotuning speedup: {}".format(a / b)) 74 75 def _benchmark_map(self, autotune, benchmark_id): 76 k = 1024 * 1024 77 dataset = dataset_ops.Dataset.from_tensors( 78 (np.random.rand(1, 4 * k), np.random.rand(4 * k, 1))).repeat() 79 dataset = dataset.map( 80 math_ops.matmul, num_parallel_calls=dataset_ops.AUTOTUNE) 81 return self._run_benchmark( 82 dataset=dataset, 83 autotune=autotune, 84 benchmark_iters=10000, 85 benchmark_label="map", 86 benchmark_id=benchmark_id) 87 88 def benchmark_map_and_batch(self): 89 a = self._benchmark_map_and_batch(autotune=False, benchmark_id=1) 90 b = self._benchmark_map_and_batch(autotune=True, benchmark_id=2) 91 print("autotune parallelism vs no autotuning speedup: {}".format(a / b)) 92 93 def _benchmark_map_and_batch(self, autotune, benchmark_id): 94 batch_size = 16 95 k = 1024 * 1024 96 dataset = dataset_ops.Dataset.from_tensors( 97 (np.random.rand(1, 4 * k), np.random.rand(4 * k, 1))).repeat() 98 dataset = dataset.map( 99 math_ops.matmul, num_parallel_calls=dataset_ops.AUTOTUNE) 100 dataset = dataset.batch(batch_size=batch_size) 101 return self._run_benchmark( 102 dataset=dataset, 103 autotune=autotune, 104 benchmark_iters=1000, 105 benchmark_label="map_and_batch", 106 benchmark_id=benchmark_id) 107 108 def benchmark_interleave(self): 109 a = self._benchmark_interleave(autotune=False, benchmark_id=1) 110 b = self._benchmark_interleave(autotune=True, benchmark_id=2) 111 print("autotune parallelism vs no autotuning speedup: {}".format(a / b)) 112 113 def _benchmark_interleave(self, autotune, benchmark_id): 114 k = 1024 * 1024 115 dataset = dataset_ops.Dataset.from_tensors( 116 (np.random.rand(1, 4 * k), np.random.rand(4 * k, 1))).repeat() 117 dataset = dataset.map(math_ops.matmul) 118 dataset = dataset_ops.Dataset.range(1).repeat().interleave( 119 lambda _: dataset, 120 cycle_length=10, 121 num_parallel_calls=dataset_ops.AUTOTUNE) 122 return self._run_benchmark( 123 dataset=dataset, 124 autotune=autotune, 125 benchmark_iters=10000, 126 benchmark_label="interleave", 127 benchmark_id=benchmark_id) 128 129 def benchmark_map_and_interleave(self): 130 a = self._benchmark_map_and_interleave(autotune=False, benchmark_id=1) 131 b = self._benchmark_map_and_interleave(autotune=True, benchmark_id=2) 132 print("autotune parallelism vs no autotuning speedup: {}".format(a / b)) 133 134 def _benchmark_map_and_interleave(self, autotune, benchmark_id): 135 k = 1024 * 1024 136 a = (np.random.rand(1, 8 * k), np.random.rand(8 * k, 1)) 137 b = (np.random.rand(1, 4 * k), np.random.rand(4 * k, 1)) 138 c = (np.random.rand(1, 2 * k), np.random.rand(2 * k, 1)) 139 dataset_a = dataset_ops.Dataset.from_tensors(a).repeat() 140 dataset_b = dataset_ops.Dataset.from_tensors(b).repeat() 141 dataset_c = dataset_ops.Dataset.from_tensors(c).repeat() 142 143 def f1(x, y): 144 return math_ops.matmul(x, y) 145 146 def f2(a, b): 147 x, y = b 148 return a, math_ops.matmul(x, y) 149 150 dataset = dataset_a 151 dataset = dataset.map(f1, num_parallel_calls=dataset_ops.AUTOTUNE) 152 dataset = dataset_ops.Dataset.range(1).repeat().interleave( 153 lambda _: dataset, 154 num_parallel_calls=dataset_ops.AUTOTUNE, 155 cycle_length=2) 156 157 dataset = dataset_ops.Dataset.zip((dataset, dataset_b)) 158 dataset = dataset.map(f2, num_parallel_calls=dataset_ops.AUTOTUNE) 159 dataset = dataset_ops.Dataset.range(1).repeat().interleave( 160 lambda _: dataset, 161 num_parallel_calls=dataset_ops.AUTOTUNE, 162 cycle_length=2) 163 164 dataset = dataset_ops.Dataset.zip((dataset, dataset_c)) 165 dataset = dataset.map(f2, num_parallel_calls=dataset_ops.AUTOTUNE) 166 return self._run_benchmark( 167 dataset=dataset, 168 autotune=autotune, 169 benchmark_iters=10000, 170 benchmark_label="map_and_interleave", 171 benchmark_id=benchmark_id) 172 173 def benchmark_map_batch_and_interleave(self): 174 a = self._benchmark_map_batch_and_interleave(autotune=False, benchmark_id=1) 175 b = self._benchmark_map_batch_and_interleave(autotune=True, benchmark_id=2) 176 print("autotune parallelism vs no autotuning speedup: {}".format(a / b)) 177 178 def _benchmark_map_batch_and_interleave(self, autotune, benchmark_id): 179 batch_size = 16 180 k = 1024 * 1024 181 a = (np.random.rand(1, 8 * k), np.random.rand(8 * k, 1)) 182 b = (np.random.rand(1, 4 * k), np.random.rand(4 * k, 1)) 183 c = (np.random.rand(1, 2 * k), np.random.rand(2 * k, 1)) 184 dataset_a = dataset_ops.Dataset.from_tensors(a).repeat() 185 dataset_b = dataset_ops.Dataset.from_tensors(b).repeat() 186 dataset_c = dataset_ops.Dataset.from_tensors(c).repeat() 187 188 dataset = dataset_a 189 dataset = dataset.map( 190 math_ops.matmul, num_parallel_calls=dataset_ops.AUTOTUNE) 191 dataset = dataset.batch(batch_size=batch_size) 192 dataset = dataset_ops.Dataset.range(1).repeat().interleave( 193 lambda _: dataset, 194 num_parallel_calls=dataset_ops.AUTOTUNE, 195 cycle_length=2) 196 197 dataset = dataset_ops.Dataset.zip((dataset, dataset_b)) 198 dataset = dataset_ops.Dataset.range(1).repeat().interleave( 199 lambda _: dataset, 200 num_parallel_calls=dataset_ops.AUTOTUNE, 201 cycle_length=2) 202 203 dataset_c = dataset_c.map( 204 math_ops.matmul, num_parallel_calls=dataset_ops.AUTOTUNE) 205 dataset_c = dataset_c.batch(batch_size=batch_size) 206 dataset = dataset_ops.Dataset.zip((dataset, dataset_c)) 207 return self._run_benchmark( 208 dataset=dataset, 209 autotune=autotune, 210 benchmark_iters=1000, 211 benchmark_label="map_and_batch_and_interleave", 212 benchmark_id=benchmark_id) 213 214 215if __name__ == "__main__": 216 benchmark_base.test.main() 217