1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 // Simple benchmarking facility.
17 #ifndef TENSORFLOW_CORE_PLATFORM_DEFAULT_TEST_BENCHMARK_H_
18 #define TENSORFLOW_CORE_PLATFORM_DEFAULT_TEST_BENCHMARK_H_
19
20 #include <utility>
21 #include <vector>
22
23 #if defined(_MSC_VER)
24 #include <intrin.h> // for _ReadWriteBarrier
25 #endif
26
27 #include "tensorflow/core/platform/logging.h"
28 #include "tensorflow/core/platform/macros.h"
29 #include "tensorflow/core/platform/platform.h"
30 #include "tensorflow/core/platform/types.h"
31
32 #define BENCHMARK(n) \
33 static ::tensorflow::testing::Benchmark* TF_BENCHMARK_CONCAT( \
34 __benchmark_, n, __LINE__) TF_ATTRIBUTE_UNUSED = \
35 (new ::tensorflow::testing::Benchmark(#n, (n)))
36 #define TF_BENCHMARK_CONCAT(a, b, c) TF_BENCHMARK_CONCAT2(a, b, c)
37 #define TF_BENCHMARK_CONCAT2(a, b, c) a##b##c
38
39 namespace testing {
40 namespace benchmark {
41 class State;
42 }
43 } // namespace testing
44
45 namespace tensorflow {
46 namespace testing {
47 namespace internal {
48 void UseCharPointer(char const volatile*);
49 }
50
51 // The DoNotOptimize(...) function can be used to prevent a value or
52 // expression from being optimized away by the compiler. This function is
53 // intended to add little to no overhead.
54 // See: http://stackoverflow.com/questions/28287064
55 //
56 // The specific guarantees of DoNotOptimize(x) are:
57 // 1) x, and any data it transitively points to, will exist (in a register or
58 // in memory) at the current point in the program.
59 // 2) The optimizer will assume that DoNotOptimize(x) could mutate x or
60 // anything it transitively points to (although it actually doesn't).
61 //
62 // To see this in action:
63 //
64 // void BM_multiply(benchmark::State& state) {
65 // int a = 2;
66 // int b = 4;
67 // for (auto _ : state) {
68 // testing::DoNotOptimize(a);
69 // testing::DoNotOptimize(b);
70 // int c = a * b;
71 // testing::DoNotOptimize(c);
72 // }
73 // }
74 // BENCHMARK(BM_multiply);
75 //
76 // Guarantee (2) applied to 'a' and 'b' prevents the compiler lifting the
77 // multiplication outside of the loop. Guarantee (1) applied to 'c' prevents the
78 // compiler from optimizing away 'c' as dead code.
79 template <class T>
DoNotOptimize(const T & var)80 void DoNotOptimize(const T& var) {
81 #if defined(_MSC_VER)
82 internal::UseCharPointer(reinterpret_cast<char const volatile*>(&var));
83 _ReadWriteBarrier();
84 #else
85 asm volatile("" : "+m"(const_cast<T&>(var)));
86 #endif
87 }
88
89 class Benchmark {
90 public:
91 [[deprecated("use `benchmark::State&` instead.")]] Benchmark(const char* name,
92 void (*fn)(int));
93
94 [[deprecated("use `benchmark::State&` instead.")]] Benchmark(const char* name,
95 void (*fn)(int,
96 int));
97
98 [[deprecated("use `benchmark::State&` instead.")]] Benchmark(
99 const char* name, void (*fn)(int, int, int));
100
101 Benchmark(const char* name, void (*fn)(::testing::benchmark::State&));
102
103 Benchmark* Arg(int x);
104 Benchmark* ArgPair(int x, int y);
105 Benchmark* Range(int lo, int hi);
106 Benchmark* RangePair(int lo1, int hi1, int lo2, int hi2);
107
108 Benchmark* UseRealTime();
109
110 static void Run(const char* pattern);
111
112 private:
113 string name_;
114 int num_args_;
115 int instantiated_num_args_ = -1;
116 std::vector<std::pair<int, int> > args_;
117 void (*fn0_)(int) = nullptr;
118 void (*fn1_)(int, int) = nullptr;
119 void (*fn2_)(int, int, int) = nullptr;
120 void (*fn_state_)(::testing::benchmark::State&) = nullptr;
121
122 void Register();
123 void Run(int arg1, int arg2, int* run_count, double* run_seconds);
124
125 void CheckArgCount(int expected);
126 };
127
128 void RunBenchmarks();
129 void SetLabel(const std::string& label);
130 void BytesProcessed(int64);
131 void ItemsProcessed(int64);
132 void StartTiming();
133 void StopTiming();
134 void UseRealTime();
135
136 } // namespace testing
137 } // namespace tensorflow
138
139 // Support `void BM_Func(benchmark::State&)` interface so that the it is
140 // compatible with the internal version.
141 namespace testing {
142 namespace benchmark {
143 // State is passed as an argument to a benchmark function.
144 // Each thread in threaded benchmarks receives own object.
145 class State {
146 public:
147 // Incomplete iterator-like type with dummy value type so that
148 // benchmark::State can support iteration with a range-based for loop.
149 //
150 // The only supported usage:
151 //
152 // static void BM_Foo(benchmark::State& state) {
153 // for (auto s : state) {
154 // // perform single iteration
155 // }
156 // }
157 //
158 // This is meant to replace the deprecated API :
159 //
160 // static void BM_Foo(int iters) {
161 // while (iters-- > 0) {
162 // // perform single iteration
163 // }
164 // }
165 //
166 // See go/benchmark#old-benchmark-interface for more details.
167 class Iterator {
168 public:
169 struct Value {
170 // Non-trivial destructor to avoid warning for unused dummy variable in
171 // the range-based for loop.
~ValueValue172 ~Value() {}
173 };
174
175 explicit Iterator(State* parent);
176
177 Iterator& operator++();
178
179 bool operator!=(const Iterator& other);
180
181 Value operator*();
182
183 private:
184 State* const parent_;
185 };
186
187 Iterator begin();
188 Iterator end();
189
190 void PauseTiming();
191 void ResumeTiming();
192
193 // Set the number of bytes processed by the current benchmark
194 // execution. This routine is typically called once at the end of a
195 // throughput oriented benchmark. If this routine is called with a
196 // value > 0, then bytes processed per second is also reported.
197 void SetBytesProcessed(::tensorflow::int64 bytes);
198
199 // If this routine is called with items > 0, then an items/s
200 // label is printed on the benchmark report line for the currently
201 // executing benchmark. It is typically called at the end of a processing
202 // benchmark where a processing items/second output is desired.
203 void SetItemsProcessed(::tensorflow::int64 items);
204
205 // If this method is called, the specified label is printed at the
206 // end of the benchmark report line for the currently executing
207 // benchmark. Example:
208 // static void BM_Compress(benchmark::State& state) {
209 // ...
210 // double compression = input_size / output_size;
211 // state.SetLabel(StringPrintf("compress:%.1f%%", 100.0*compression));
212 // }
213 // Produces output that looks like:
214 // BM_Compress 50 50 14115038 compress:27.3%
215 //
216 // REQUIRES: a benchmark is currently executing
217 void SetLabel(absl::string_view label);
218
219 // For parameterized benchmarks, range(i) returns the value of the ith
220 // parameter. Simple benchmarks are not parameterized and do not need to call
221 // range().
222 int range(size_t i) const;
223
224 // Total number of iterations processed so far.
225 size_t iterations() const;
226
227 const size_t
228 max_iterations; // NOLINT: for compatibility with OSS benchmark library
229
230 // Disallow copy and assign.
231 State(const State&) = delete;
232 State& operator=(const State&) = delete;
233
234 protected:
235 friend class tensorflow::testing::Benchmark;
236 State(size_t max_iterations, int formal_arg_count, std::vector<int> args);
237
238 private:
239 size_t completed_iterations_;
240 const int formal_arg_count_;
241 const std::vector<int> args_;
242 };
243
Iterator(State * parent)244 inline State::Iterator::Iterator(State* parent) : parent_(parent) {}
245
iterations()246 inline size_t State::iterations() const { return completed_iterations_; }
247
248 inline bool State::Iterator::operator!=(const Iterator& other) {
249 DCHECK_EQ(other.parent_, nullptr);
250 DCHECK_NE(parent_, nullptr);
251
252 if (parent_->completed_iterations_ < parent_->max_iterations) {
253 return true;
254 }
255
256 ++parent_->completed_iterations_;
257 // If this is the last iteration, stop the timer.
258 parent_->PauseTiming();
259 return false;
260 }
261
262 inline State::Iterator& State::Iterator::operator++() {
263 DCHECK_LT(parent_->completed_iterations_, parent_->max_iterations);
264 ++parent_->completed_iterations_;
265 return *this;
266 }
267
268 inline State::Iterator::Value State::Iterator::operator*() { return Value(); }
269
begin()270 inline State::Iterator State::begin() {
271 // Starts the timer here because if the code uses this API, it expects
272 // the timer to starts at the beginning of this loop.
273 ResumeTiming();
274 return Iterator(this);
275 }
276
end()277 inline State::Iterator State::end() { return Iterator(nullptr); }
278
279 void RunSpecifiedBenchmarks();
280
281 } // namespace benchmark
282 } // namespace testing
283
284 #endif // TENSORFLOW_CORE_PLATFORM_DEFAULT_TEST_BENCHMARK_H_
285