• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Benchmarks for autotuning performance knobs."""
16
17import numpy as np
18
19from tensorflow.python.data.benchmarks import benchmark_base
20from tensorflow.python.data.ops import dataset_ops
21from tensorflow.python.data.ops import options as options_lib
22from tensorflow.python.ops import math_ops
23
24
25class AutotuneBenchmark(benchmark_base.DatasetBenchmarkBase):
26  """Benchmarks for autotuning performance knobs."""
27
28  def _run_benchmark(self, dataset, autotune, benchmark_iters, benchmark_label,
29                     benchmark_id):
30    options = options_lib.Options()
31    options.experimental_optimization.apply_default_optimizations = False
32    options.autotune.enabled = autotune
33    dataset = dataset.with_options(options)
34
35    autotune_string = "_autotune_parallelism_only"
36    wall_time = self.run_and_report_benchmark(
37        dataset=dataset,
38        num_elements=benchmark_iters,
39        warmup=True,
40        iters=1,
41        extras={
42            "model_name":
43                "autotune.benchmark.%s.%d" % (benchmark_label, benchmark_id),
44            "parameters":
45                "%s" % autotune,
46        },
47        name=benchmark_label + (autotune_string if autotune else ""))
48    return wall_time
49
50  def benchmark_batch(self):
51    a = self._benchmark_batch(autotune=False, benchmark_id=1)
52    b = self._benchmark_batch(autotune=True, benchmark_id=2)
53    print("autotune parallelism vs no autotuning speedup: {}".format(a / b))
54
55  def _benchmark_batch(self, autotune, benchmark_id):
56    batch_size = 128
57    k = 1024
58    dataset = dataset_ops.Dataset.from_tensors(
59        (np.random.rand(1, 4 * k), np.random.rand(4 * k, 1))).repeat()
60    dataset = dataset.map(math_ops.matmul)
61    dataset = dataset.batch(
62        batch_size=batch_size, num_parallel_calls=dataset_ops.AUTOTUNE)
63    return self._run_benchmark(
64        dataset=dataset,
65        autotune=autotune,
66        benchmark_iters=10000,
67        benchmark_label="batch",
68        benchmark_id=benchmark_id)
69
70  def benchmark_map(self):
71    a = self._benchmark_map(autotune=False, benchmark_id=1)
72    b = self._benchmark_map(autotune=True, benchmark_id=2)
73    print("autotune parallelism vs no autotuning speedup: {}".format(a / b))
74
75  def _benchmark_map(self, autotune, benchmark_id):
76    k = 1024 * 1024
77    dataset = dataset_ops.Dataset.from_tensors(
78        (np.random.rand(1, 4 * k), np.random.rand(4 * k, 1))).repeat()
79    dataset = dataset.map(
80        math_ops.matmul, num_parallel_calls=dataset_ops.AUTOTUNE)
81    return self._run_benchmark(
82        dataset=dataset,
83        autotune=autotune,
84        benchmark_iters=10000,
85        benchmark_label="map",
86        benchmark_id=benchmark_id)
87
88  def benchmark_map_and_batch(self):
89    a = self._benchmark_map_and_batch(autotune=False, benchmark_id=1)
90    b = self._benchmark_map_and_batch(autotune=True, benchmark_id=2)
91    print("autotune parallelism vs no autotuning speedup: {}".format(a / b))
92
93  def _benchmark_map_and_batch(self, autotune, benchmark_id):
94    batch_size = 16
95    k = 1024 * 1024
96    dataset = dataset_ops.Dataset.from_tensors(
97        (np.random.rand(1, 4 * k), np.random.rand(4 * k, 1))).repeat()
98    dataset = dataset.map(
99        math_ops.matmul, num_parallel_calls=dataset_ops.AUTOTUNE)
100    dataset = dataset.batch(batch_size=batch_size)
101    return self._run_benchmark(
102        dataset=dataset,
103        autotune=autotune,
104        benchmark_iters=1000,
105        benchmark_label="map_and_batch",
106        benchmark_id=benchmark_id)
107
108  def benchmark_interleave(self):
109    a = self._benchmark_interleave(autotune=False, benchmark_id=1)
110    b = self._benchmark_interleave(autotune=True, benchmark_id=2)
111    print("autotune parallelism vs no autotuning speedup: {}".format(a / b))
112
113  def _benchmark_interleave(self, autotune, benchmark_id):
114    k = 1024 * 1024
115    dataset = dataset_ops.Dataset.from_tensors(
116        (np.random.rand(1, 4 * k), np.random.rand(4 * k, 1))).repeat()
117    dataset = dataset.map(math_ops.matmul)
118    dataset = dataset_ops.Dataset.range(1).repeat().interleave(
119        lambda _: dataset,
120        cycle_length=10,
121        num_parallel_calls=dataset_ops.AUTOTUNE)
122    return self._run_benchmark(
123        dataset=dataset,
124        autotune=autotune,
125        benchmark_iters=10000,
126        benchmark_label="interleave",
127        benchmark_id=benchmark_id)
128
129  def benchmark_map_and_interleave(self):
130    a = self._benchmark_map_and_interleave(autotune=False, benchmark_id=1)
131    b = self._benchmark_map_and_interleave(autotune=True, benchmark_id=2)
132    print("autotune parallelism vs no autotuning speedup: {}".format(a / b))
133
134  def _benchmark_map_and_interleave(self, autotune, benchmark_id):
135    k = 1024 * 1024
136    a = (np.random.rand(1, 8 * k), np.random.rand(8 * k, 1))
137    b = (np.random.rand(1, 4 * k), np.random.rand(4 * k, 1))
138    c = (np.random.rand(1, 2 * k), np.random.rand(2 * k, 1))
139    dataset_a = dataset_ops.Dataset.from_tensors(a).repeat()
140    dataset_b = dataset_ops.Dataset.from_tensors(b).repeat()
141    dataset_c = dataset_ops.Dataset.from_tensors(c).repeat()
142
143    def f1(x, y):
144      return math_ops.matmul(x, y)
145
146    def f2(a, b):
147      x, y = b
148      return a, math_ops.matmul(x, y)
149
150    dataset = dataset_a
151    dataset = dataset.map(f1, num_parallel_calls=dataset_ops.AUTOTUNE)
152    dataset = dataset_ops.Dataset.range(1).repeat().interleave(
153        lambda _: dataset,
154        num_parallel_calls=dataset_ops.AUTOTUNE,
155        cycle_length=2)
156
157    dataset = dataset_ops.Dataset.zip((dataset, dataset_b))
158    dataset = dataset.map(f2, num_parallel_calls=dataset_ops.AUTOTUNE)
159    dataset = dataset_ops.Dataset.range(1).repeat().interleave(
160        lambda _: dataset,
161        num_parallel_calls=dataset_ops.AUTOTUNE,
162        cycle_length=2)
163
164    dataset = dataset_ops.Dataset.zip((dataset, dataset_c))
165    dataset = dataset.map(f2, num_parallel_calls=dataset_ops.AUTOTUNE)
166    return self._run_benchmark(
167        dataset=dataset,
168        autotune=autotune,
169        benchmark_iters=10000,
170        benchmark_label="map_and_interleave",
171        benchmark_id=benchmark_id)
172
173  def benchmark_map_batch_and_interleave(self):
174    a = self._benchmark_map_batch_and_interleave(autotune=False, benchmark_id=1)
175    b = self._benchmark_map_batch_and_interleave(autotune=True, benchmark_id=2)
176    print("autotune parallelism vs no autotuning speedup: {}".format(a / b))
177
178  def _benchmark_map_batch_and_interleave(self, autotune, benchmark_id):
179    batch_size = 16
180    k = 1024 * 1024
181    a = (np.random.rand(1, 8 * k), np.random.rand(8 * k, 1))
182    b = (np.random.rand(1, 4 * k), np.random.rand(4 * k, 1))
183    c = (np.random.rand(1, 2 * k), np.random.rand(2 * k, 1))
184    dataset_a = dataset_ops.Dataset.from_tensors(a).repeat()
185    dataset_b = dataset_ops.Dataset.from_tensors(b).repeat()
186    dataset_c = dataset_ops.Dataset.from_tensors(c).repeat()
187
188    dataset = dataset_a
189    dataset = dataset.map(
190        math_ops.matmul, num_parallel_calls=dataset_ops.AUTOTUNE)
191    dataset = dataset.batch(batch_size=batch_size)
192    dataset = dataset_ops.Dataset.range(1).repeat().interleave(
193        lambda _: dataset,
194        num_parallel_calls=dataset_ops.AUTOTUNE,
195        cycle_length=2)
196
197    dataset = dataset_ops.Dataset.zip((dataset, dataset_b))
198    dataset = dataset_ops.Dataset.range(1).repeat().interleave(
199        lambda _: dataset,
200        num_parallel_calls=dataset_ops.AUTOTUNE,
201        cycle_length=2)
202
203    dataset_c = dataset_c.map(
204        math_ops.matmul, num_parallel_calls=dataset_ops.AUTOTUNE)
205    dataset_c = dataset_c.batch(batch_size=batch_size)
206    dataset = dataset_ops.Dataset.zip((dataset, dataset_c))
207    return self._run_benchmark(
208        dataset=dataset,
209        autotune=autotune,
210        benchmark_iters=1000,
211        benchmark_label="map_and_batch_and_interleave",
212        benchmark_id=benchmark_id)
213
214
215if __name__ == "__main__":
216  benchmark_base.test.main()
217