1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // Support for registering benchmarks for functions.
16
17 /* Example usage:
18 // Define a function that executes the code to be measured a
19 // specified number of times:
20 static void BM_StringCreation(benchmark::State& state) {
21 for (auto _ : state)
22 std::string empty_string;
23 }
24
25 // Register the function as a benchmark
26 BENCHMARK(BM_StringCreation);
27
28 // Define another benchmark
29 static void BM_StringCopy(benchmark::State& state) {
30 std::string x = "hello";
31 for (auto _ : state)
32 std::string copy(x);
33 }
34 BENCHMARK(BM_StringCopy);
35
36 // Augment the main() program to invoke benchmarks if specified
37 // via the --benchmarks command line flag. E.g.,
38 // my_unittest --benchmark_filter=all
39 // my_unittest --benchmark_filter=BM_StringCreation
40 // my_unittest --benchmark_filter=String
41 // my_unittest --benchmark_filter='Copy|Creation'
42 int main(int argc, char** argv) {
43 benchmark::Initialize(&argc, argv);
44 benchmark::RunSpecifiedBenchmarks();
45 return 0;
46 }
47
48 // Sometimes a family of microbenchmarks can be implemented with
49 // just one routine that takes an extra argument to specify which
50 // one of the family of benchmarks to run. For example, the following
51 // code defines a family of microbenchmarks for measuring the speed
52 // of memcpy() calls of different lengths:
53
54 static void BM_memcpy(benchmark::State& state) {
55 char* src = new char[state.range(0)]; char* dst = new char[state.range(0)];
56 memset(src, 'x', state.range(0));
57 for (auto _ : state)
58 memcpy(dst, src, state.range(0));
59 state.SetBytesProcessed(int64_t(state.iterations()) *
60 int64_t(state.range(0)));
61 delete[] src; delete[] dst;
62 }
63 BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
64
65 // The preceding code is quite repetitive, and can be replaced with the
66 // following short-hand. The following invocation will pick a few
67 // appropriate arguments in the specified range and will generate a
68 // microbenchmark for each such argument.
69 BENCHMARK(BM_memcpy)->Range(8, 8<<10);
70
71 // You might have a microbenchmark that depends on two inputs. For
72 // example, the following code defines a family of microbenchmarks for
73 // measuring the speed of set insertion.
74 static void BM_SetInsert(benchmark::State& state) {
75 set<int> data;
76 for (auto _ : state) {
77 state.PauseTiming();
78 data = ConstructRandomSet(state.range(0));
79 state.ResumeTiming();
80 for (int j = 0; j < state.range(1); ++j)
81 data.insert(RandomNumber());
82 }
83 }
84 BENCHMARK(BM_SetInsert)
85 ->Args({1<<10, 128})
86 ->Args({2<<10, 128})
87 ->Args({4<<10, 128})
88 ->Args({8<<10, 128})
89 ->Args({1<<10, 512})
90 ->Args({2<<10, 512})
91 ->Args({4<<10, 512})
92 ->Args({8<<10, 512});
93
94 // The preceding code is quite repetitive, and can be replaced with
95 // the following short-hand. The following macro will pick a few
96 // appropriate arguments in the product of the two specified ranges
97 // and will generate a microbenchmark for each such pair.
98 BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}});
99
100 // For more complex patterns of inputs, passing a custom function
101 // to Apply allows programmatic specification of an
102 // arbitrary set of arguments to run the microbenchmark on.
103 // The following example enumerates a dense range on
104 // one parameter, and a sparse range on the second.
105 static void CustomArguments(benchmark::internal::Benchmark* b) {
106 for (int i = 0; i <= 10; ++i)
107 for (int j = 32; j <= 1024*1024; j *= 8)
108 b->Args({i, j});
109 }
110 BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
111
112 // Templated microbenchmarks work the same way:
113 // Produce then consume 'size' messages 'iters' times
114 // Measures throughput in the absence of multiprogramming.
115 template <class Q> int BM_Sequential(benchmark::State& state) {
116 Q q;
117 typename Q::value_type v;
118 for (auto _ : state) {
119 for (int i = state.range(0); i--; )
120 q.push(v);
121 for (int e = state.range(0); e--; )
122 q.Wait(&v);
123 }
124 // actually messages, not bytes:
125 state.SetBytesProcessed(
126 static_cast<int64_t>(state.iterations())*state.range(0));
127 }
128 BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
129
130 Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
131 benchmark. This option overrides the `benchmark_min_time` flag.
132
133 void BM_test(benchmark::State& state) {
134 ... body ...
135 }
136 BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds.
137
138 In a multithreaded test, it is guaranteed that none of the threads will start
139 until all have reached the loop start, and all will have finished before any
140 thread exits the loop body. As such, any global setup or teardown you want to
141 do can be wrapped in a check against the thread index:
142
143 static void BM_MultiThreaded(benchmark::State& state) {
144 if (state.thread_index == 0) {
145 // Setup code here.
146 }
147 for (auto _ : state) {
148 // Run the test as normal.
149 }
150 if (state.thread_index == 0) {
151 // Teardown code here.
152 }
153 }
154 BENCHMARK(BM_MultiThreaded)->Threads(4);
155
156
157 If a benchmark runs a few milliseconds it may be hard to visually compare the
158 measured times, since the output data is given in nanoseconds per default. In
159 order to manually set the time unit, you can specify it manually:
160
161 BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
162 */
163
164 #ifndef BENCHMARK_BENCHMARK_H_
165 #define BENCHMARK_BENCHMARK_H_
166
167 // The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer.
168 #if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
169 #define BENCHMARK_HAS_CXX11
170 #endif
171
172 #include <stdint.h>
173
174 #include <algorithm>
175 #include <cassert>
176 #include <cstddef>
177 #include <iosfwd>
178 #include <map>
179 #include <set>
180 #include <string>
181 #include <vector>
182
183 #if defined(BENCHMARK_HAS_CXX11)
184 #include <initializer_list>
185 #include <type_traits>
186 #include <utility>
187 #endif
188
189 #if defined(_MSC_VER)
190 #include <intrin.h> // for _ReadWriteBarrier
191 #endif
192
193 #ifndef BENCHMARK_HAS_CXX11
194 #define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
195 TypeName(const TypeName&); \
196 TypeName& operator=(const TypeName&)
197 #else
198 #define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
199 TypeName(const TypeName&) = delete; \
200 TypeName& operator=(const TypeName&) = delete
201 #endif
202
203 #if defined(__GNUC__)
204 #define BENCHMARK_UNUSED __attribute__((unused))
205 #define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
206 #define BENCHMARK_NOEXCEPT noexcept
207 #define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
208 #elif defined(_MSC_VER) && !defined(__clang__)
209 #define BENCHMARK_UNUSED
210 #define BENCHMARK_ALWAYS_INLINE __forceinline
211 #if _MSC_VER >= 1900
212 #define BENCHMARK_NOEXCEPT noexcept
213 #define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
214 #else
215 #define BENCHMARK_NOEXCEPT
216 #define BENCHMARK_NOEXCEPT_OP(x)
217 #endif
218 #define __func__ __FUNCTION__
219 #else
220 #define BENCHMARK_UNUSED
221 #define BENCHMARK_ALWAYS_INLINE
222 #define BENCHMARK_NOEXCEPT
223 #define BENCHMARK_NOEXCEPT_OP(x)
224 #endif
225
226 #define BENCHMARK_INTERNAL_TOSTRING2(x) #x
227 #define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x)
228
229 #if defined(__GNUC__) || defined(__clang__)
230 #define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
231 #define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
232 #else
233 #define BENCHMARK_BUILTIN_EXPECT(x, y) x
234 #define BENCHMARK_DEPRECATED_MSG(msg)
235 #define BENCHMARK_WARNING_MSG(msg) \
236 __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING( \
237 __LINE__) ") : warning note: " msg))
238 #endif
239
240 #if defined(__GNUC__) && !defined(__clang__)
241 #define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
242 #endif
243
244 #ifndef __has_builtin
245 #define __has_builtin(x) 0
246 #endif
247
248 #if defined(__GNUC__) || __has_builtin(__builtin_unreachable)
249 #define BENCHMARK_UNREACHABLE() __builtin_unreachable()
250 #elif defined(_MSC_VER)
251 #define BENCHMARK_UNREACHABLE() __assume(false)
252 #else
253 #define BENCHMARK_UNREACHABLE() ((void)0)
254 #endif
255
256 namespace benchmark {
257 class BenchmarkReporter;
258 class MemoryManager;
259
260 void Initialize(int* argc, char** argv);
261
262 // Report to stdout all arguments in 'argv' as unrecognized except the first.
263 // Returns true there is at least on unrecognized argument (i.e. 'argc' > 1).
264 bool ReportUnrecognizedArguments(int argc, char** argv);
265
266 // Generate a list of benchmarks matching the specified --benchmark_filter flag
267 // and if --benchmark_list_tests is specified return after printing the name
268 // of each matching benchmark. Otherwise run each matching benchmark and
269 // report the results.
270 //
271 // The second and third overload use the specified 'display_reporter' and
272 // 'file_reporter' respectively. 'file_reporter' will write to the file
273 // specified
274 // by '--benchmark_output'. If '--benchmark_output' is not given the
275 // 'file_reporter' is ignored.
276 //
277 // RETURNS: The number of matching benchmarks.
278 size_t RunSpecifiedBenchmarks();
279 size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter);
280 size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
281 BenchmarkReporter* file_reporter);
282
283 // Register a MemoryManager instance that will be used to collect and report
284 // allocation measurements for benchmark runs.
285 void RegisterMemoryManager(MemoryManager* memory_manager);
286
287 namespace internal {
288 class Benchmark;
289 class BenchmarkImp;
290 class BenchmarkFamilies;
291
292 void UseCharPointer(char const volatile*);
293
294 // Take ownership of the pointer and register the benchmark. Return the
295 // registered benchmark.
296 Benchmark* RegisterBenchmarkInternal(Benchmark*);
297
298 // Ensure that the standard streams are properly initialized in every TU.
299 int InitializeStreams();
300 BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
301
302 } // namespace internal
303
304 #if (!defined(__GNUC__) && !defined(__clang__)) || defined(__pnacl__) || \
305 defined(__EMSCRIPTEN__)
306 #define BENCHMARK_HAS_NO_INLINE_ASSEMBLY
307 #endif
308
309 // The DoNotOptimize(...) function can be used to prevent a value or
310 // expression from being optimized away by the compiler. This function is
311 // intended to add little to no overhead.
312 // See: https://youtu.be/nXaxk27zwlk?t=2441
313 #ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
314 template <class Tp>
DoNotOptimize(Tp const & value)315 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
316 asm volatile("" : : "r,m"(value) : "memory");
317 }
318
319 template <class Tp>
DoNotOptimize(Tp & value)320 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
321 #if defined(__clang__)
322 asm volatile("" : "+r,m"(value) : : "memory");
323 #else
324 asm volatile("" : "+m,r"(value) : : "memory");
325 #endif
326 }
327
328 // Force the compiler to flush pending writes to global memory. Acts as an
329 // effective read/write barrier
ClobberMemory()330 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
331 asm volatile("" : : : "memory");
332 }
333 #elif defined(_MSC_VER)
334 template <class Tp>
DoNotOptimize(Tp const & value)335 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
336 internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
337 _ReadWriteBarrier();
338 }
339
ClobberMemory()340 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); }
341 #else
342 template <class Tp>
DoNotOptimize(Tp const & value)343 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
344 internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
345 }
346 // FIXME Add ClobberMemory() for non-gnu and non-msvc compilers
347 #endif
348
349 // This class is used for user-defined counters.
350 class Counter {
351 public:
352 enum Flags {
353 kDefaults = 0,
354 // Mark the counter as a rate. It will be presented divided
355 // by the duration of the benchmark.
356 kIsRate = 1U << 0U,
357 // Mark the counter as a thread-average quantity. It will be
358 // presented divided by the number of threads.
359 kAvgThreads = 1U << 1U,
360 // Mark the counter as a thread-average rate. See above.
361 kAvgThreadsRate = kIsRate | kAvgThreads,
362 // Mark the counter as a constant value, valid/same for *every* iteration.
363 // When reporting, it will be *multiplied* by the iteration count.
364 kIsIterationInvariant = 1U << 2U,
365 // Mark the counter as a constant rate.
366 // When reporting, it will be *multiplied* by the iteration count
367 // and then divided by the duration of the benchmark.
368 kIsIterationInvariantRate = kIsRate | kIsIterationInvariant,
369 // Mark the counter as a iteration-average quantity.
370 // It will be presented divided by the number of iterations.
371 kAvgIterations = 1U << 3U,
372 // Mark the counter as a iteration-average rate. See above.
373 kAvgIterationsRate = kIsRate | kAvgIterations
374 };
375
376 enum OneK {
377 // 1'000 items per 1k
378 kIs1000 = 1000,
379 // 1'024 items per 1k
380 kIs1024 = 1024
381 };
382
383 double value;
384 Flags flags;
385 OneK oneK;
386
387 BENCHMARK_ALWAYS_INLINE
388 Counter(double v = 0., Flags f = kDefaults, OneK k = kIs1000)
value(v)389 : value(v), flags(f), oneK(k) {}
390
391 BENCHMARK_ALWAYS_INLINE operator double const&() const { return value; }
392 BENCHMARK_ALWAYS_INLINE operator double&() { return value; }
393 };
394
395 // A helper for user code to create unforeseen combinations of Flags, without
396 // having to do this cast manually each time, or providing this operator.
397 Counter::Flags inline operator|(const Counter::Flags& LHS,
398 const Counter::Flags& RHS) {
399 return static_cast<Counter::Flags>(static_cast<int>(LHS) |
400 static_cast<int>(RHS));
401 }
402
403 // This is the container for the user-defined counters.
404 typedef std::map<std::string, Counter> UserCounters;
405
406 // TimeUnit is passed to a benchmark in order to specify the order of magnitude
407 // for the measured time.
408 enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond };
409
410 // BigO is passed to a benchmark in order to specify the asymptotic
411 // computational
412 // complexity for the benchmark. In case oAuto is selected, complexity will be
413 // calculated automatically to the best fit.
414 enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
415
416 // BigOFunc is passed to a benchmark in order to specify the asymptotic
417 // computational complexity for the benchmark.
418 typedef double(BigOFunc)(int64_t);
419
420 // StatisticsFunc is passed to a benchmark in order to compute some descriptive
421 // statistics over all the measurements of some type
422 typedef double(StatisticsFunc)(const std::vector<double>&);
423
424 struct Statistics {
425 std::string name_;
426 StatisticsFunc* compute_;
427
StatisticsStatistics428 Statistics(const std::string& name, StatisticsFunc* compute)
429 : name_(name), compute_(compute) {}
430 };
431
432 namespace internal {
433 struct BenchmarkInstance;
434 class ThreadTimer;
435 class ThreadManager;
436
437 enum AggregationReportMode
438 #if defined(BENCHMARK_HAS_CXX11)
439 : unsigned
440 #else
441 #endif
442 {
443 // The mode has not been manually specified
444 ARM_Unspecified = 0,
445 // The mode is user-specified.
446 // This may or may not be set when the following bit-flags are set.
447 ARM_Default = 1U << 0U,
448 // File reporter should only output aggregates.
449 ARM_FileReportAggregatesOnly = 1U << 1U,
450 // Display reporter should only output aggregates
451 ARM_DisplayReportAggregatesOnly = 1U << 2U,
452 // Both reporters should only display aggregates.
453 ARM_ReportAggregatesOnly =
454 ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly
455 };
456
457 } // namespace internal
458
459 // State is passed to a running Benchmark and contains state for the
460 // benchmark to use.
461 class State {
462 public:
463 struct StateIterator;
464 friend struct StateIterator;
465
466 // Returns iterators used to run each iteration of a benchmark using a
467 // C++11 ranged-based for loop. These functions should not be called directly.
468 //
469 // REQUIRES: The benchmark has not started running yet. Neither begin nor end
470 // have been called previously.
471 //
472 // NOTE: KeepRunning may not be used after calling either of these functions.
473 BENCHMARK_ALWAYS_INLINE StateIterator begin();
474 BENCHMARK_ALWAYS_INLINE StateIterator end();
475
476 // Returns true if the benchmark should continue through another iteration.
477 // NOTE: A benchmark may not return from the test until KeepRunning() has
478 // returned false.
479 bool KeepRunning();
480
481 // Returns true iff the benchmark should run n more iterations.
482 // REQUIRES: 'n' > 0.
483 // NOTE: A benchmark must not return from the test until KeepRunningBatch()
484 // has returned false.
485 // NOTE: KeepRunningBatch() may overshoot by up to 'n' iterations.
486 //
487 // Intended usage:
488 // while (state.KeepRunningBatch(1000)) {
489 // // process 1000 elements
490 // }
491 bool KeepRunningBatch(size_t n);
492
493 // REQUIRES: timer is running and 'SkipWithError(...)' has not been called
494 // by the current thread.
495 // Stop the benchmark timer. If not called, the timer will be
496 // automatically stopped after the last iteration of the benchmark loop.
497 //
498 // For threaded benchmarks the PauseTiming() function only pauses the timing
499 // for the current thread.
500 //
501 // NOTE: The "real time" measurement is per-thread. If different threads
502 // report different measurements the largest one is reported.
503 //
504 // NOTE: PauseTiming()/ResumeTiming() are relatively
505 // heavyweight, and so their use should generally be avoided
506 // within each benchmark iteration, if possible.
507 void PauseTiming();
508
509 // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called
510 // by the current thread.
511 // Start the benchmark timer. The timer is NOT running on entrance to the
512 // benchmark function. It begins running after control flow enters the
513 // benchmark loop.
514 //
515 // NOTE: PauseTiming()/ResumeTiming() are relatively
516 // heavyweight, and so their use should generally be avoided
517 // within each benchmark iteration, if possible.
518 void ResumeTiming();
519
520 // REQUIRES: 'SkipWithError(...)' has not been called previously by the
521 // current thread.
522 // Report the benchmark as resulting in an error with the specified 'msg'.
523 // After this call the user may explicitly 'return' from the benchmark.
524 //
525 // If the ranged-for style of benchmark loop is used, the user must explicitly
526 // break from the loop, otherwise all future iterations will be run.
527 // If the 'KeepRunning()' loop is used the current thread will automatically
528 // exit the loop at the end of the current iteration.
529 //
530 // For threaded benchmarks only the current thread stops executing and future
531 // calls to `KeepRunning()` will block until all threads have completed
532 // the `KeepRunning()` loop. If multiple threads report an error only the
533 // first error message is used.
534 //
535 // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit
536 // the current scope immediately. If the function is called from within
537 // the 'KeepRunning()' loop the current iteration will finish. It is the users
538 // responsibility to exit the scope as needed.
539 void SkipWithError(const char* msg);
540
541 // REQUIRES: called exactly once per iteration of the benchmarking loop.
542 // Set the manually measured time for this benchmark iteration, which
543 // is used instead of automatically measured time if UseManualTime() was
544 // specified.
545 //
546 // For threaded benchmarks the final value will be set to the largest
547 // reported values.
548 void SetIterationTime(double seconds);
549
550 // Set the number of bytes processed by the current benchmark
551 // execution. This routine is typically called once at the end of a
552 // throughput oriented benchmark.
553 //
554 // REQUIRES: a benchmark has exited its benchmarking loop.
555 BENCHMARK_ALWAYS_INLINE
SetBytesProcessed(int64_t bytes)556 void SetBytesProcessed(int64_t bytes) {
557 counters["bytes_per_second"] =
558 Counter(static_cast<double>(bytes), Counter::kIsRate, Counter::kIs1024);
559 }
560
561 BENCHMARK_ALWAYS_INLINE
bytes_processed()562 int64_t bytes_processed() const {
563 if (counters.find("bytes_per_second") != counters.end())
564 return static_cast<int64_t>(counters.at("bytes_per_second"));
565 return 0;
566 }
567
568 // If this routine is called with complexity_n > 0 and complexity report is
569 // requested for the
570 // family benchmark, then current benchmark will be part of the computation
571 // and complexity_n will
572 // represent the length of N.
573 BENCHMARK_ALWAYS_INLINE
SetComplexityN(int64_t complexity_n)574 void SetComplexityN(int64_t complexity_n) { complexity_n_ = complexity_n; }
575
576 BENCHMARK_ALWAYS_INLINE
complexity_length_n()577 int64_t complexity_length_n() { return complexity_n_; }
578
579 // If this routine is called with items > 0, then an items/s
580 // label is printed on the benchmark report line for the currently
581 // executing benchmark. It is typically called at the end of a processing
582 // benchmark where a processing items/second output is desired.
583 //
584 // REQUIRES: a benchmark has exited its benchmarking loop.
585 BENCHMARK_ALWAYS_INLINE
SetItemsProcessed(int64_t items)586 void SetItemsProcessed(int64_t items) {
587 counters["items_per_second"] =
588 Counter(static_cast<double>(items), benchmark::Counter::kIsRate);
589 }
590
591 BENCHMARK_ALWAYS_INLINE
items_processed()592 int64_t items_processed() const {
593 if (counters.find("items_per_second") != counters.end())
594 return static_cast<int64_t>(counters.at("items_per_second"));
595 return 0;
596 }
597
598 // If this routine is called, the specified label is printed at the
599 // end of the benchmark report line for the currently executing
600 // benchmark. Example:
601 // static void BM_Compress(benchmark::State& state) {
602 // ...
603 // double compress = input_size / output_size;
604 // state.SetLabel(StrFormat("compress:%.1f%%", 100.0*compression));
605 // }
606 // Produces output that looks like:
607 // BM_Compress 50 50 14115038 compress:27.3%
608 //
609 // REQUIRES: a benchmark has exited its benchmarking loop.
610 void SetLabel(const char* label);
611
SetLabel(const std::string & str)612 void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) {
613 this->SetLabel(str.c_str());
614 }
615
616 // Range arguments for this run. CHECKs if the argument has been set.
617 BENCHMARK_ALWAYS_INLINE
618 int64_t range(std::size_t pos = 0) const {
619 assert(range_.size() > pos);
620 return range_[pos];
621 }
622
623 BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead")
range_x()624 int64_t range_x() const { return range(0); }
625
626 BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead")
range_y()627 int64_t range_y() const { return range(1); }
628
629 BENCHMARK_ALWAYS_INLINE
iterations()630 size_t iterations() const {
631 if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
632 return 0;
633 }
634 return max_iterations - total_iterations_ + batch_leftover_;
635 }
636
637 private
638 : // items we expect on the first cache line (ie 64 bytes of the struct)
639 // When total_iterations_ is 0, KeepRunning() and friends will return false.
640 // May be larger than max_iterations.
641 size_t total_iterations_;
642
643 // When using KeepRunningBatch(), batch_leftover_ holds the number of
644 // iterations beyond max_iters that were run. Used to track
645 // completed_iterations_ accurately.
646 size_t batch_leftover_;
647
648 public:
649 const size_t max_iterations;
650
651 private:
652 bool started_;
653 bool finished_;
654 bool error_occurred_;
655
656 private: // items we don't need on the first cache line
657 std::vector<int64_t> range_;
658
659 int64_t complexity_n_;
660
661 public:
662 // Container for user-defined counters.
663 UserCounters counters;
664 // Index of the executing thread. Values from [0, threads).
665 const int thread_index;
666 // Number of threads concurrently executing the benchmark.
667 const int threads;
668
669 private:
670 State(size_t max_iters, const std::vector<int64_t>& ranges, int thread_i,
671 int n_threads, internal::ThreadTimer* timer,
672 internal::ThreadManager* manager);
673
674 void StartKeepRunning();
675 // Implementation of KeepRunning() and KeepRunningBatch().
676 // is_batch must be true unless n is 1.
677 bool KeepRunningInternal(size_t n, bool is_batch);
678 void FinishKeepRunning();
679 internal::ThreadTimer* timer_;
680 internal::ThreadManager* manager_;
681
682 friend struct internal::BenchmarkInstance;
683 };
684
KeepRunning()685 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() {
686 return KeepRunningInternal(1, /*is_batch=*/false);
687 }
688
KeepRunningBatch(size_t n)689 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningBatch(size_t n) {
690 return KeepRunningInternal(n, /*is_batch=*/true);
691 }
692
KeepRunningInternal(size_t n,bool is_batch)693 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(size_t n,
694 bool is_batch) {
695 // total_iterations_ is set to 0 by the constructor, and always set to a
696 // nonzero value by StartKepRunning().
697 assert(n > 0);
698 // n must be 1 unless is_batch is true.
699 assert(is_batch || n == 1);
700 if (BENCHMARK_BUILTIN_EXPECT(total_iterations_ >= n, true)) {
701 total_iterations_ -= n;
702 return true;
703 }
704 if (!started_) {
705 StartKeepRunning();
706 if (!error_occurred_ && total_iterations_ >= n) {
707 total_iterations_ -= n;
708 return true;
709 }
710 }
711 // For non-batch runs, total_iterations_ must be 0 by now.
712 if (is_batch && total_iterations_ != 0) {
713 batch_leftover_ = n - total_iterations_;
714 total_iterations_ = 0;
715 return true;
716 }
717 FinishKeepRunning();
718 return false;
719 }
720
721 struct State::StateIterator {
722 struct BENCHMARK_UNUSED Value {};
723 typedef std::forward_iterator_tag iterator_category;
724 typedef Value value_type;
725 typedef Value reference;
726 typedef Value pointer;
727 typedef std::ptrdiff_t difference_type;
728
729 private:
730 friend class State;
731 BENCHMARK_ALWAYS_INLINE
StateIteratorStateIterator732 StateIterator() : cached_(0), parent_() {}
733
734 BENCHMARK_ALWAYS_INLINE
StateIteratorStateIterator735 explicit StateIterator(State* st)
736 : cached_(st->error_occurred_ ? 0 : st->max_iterations), parent_(st) {}
737
738 public:
739 BENCHMARK_ALWAYS_INLINE
740 Value operator*() const { return Value(); }
741
742 BENCHMARK_ALWAYS_INLINE
743 StateIterator& operator++() {
744 assert(cached_ > 0);
745 --cached_;
746 return *this;
747 }
748
749 BENCHMARK_ALWAYS_INLINE
750 bool operator!=(StateIterator const&) const {
751 if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) return true;
752 parent_->FinishKeepRunning();
753 return false;
754 }
755
756 private:
757 size_t cached_;
758 State* const parent_;
759 };
760
begin()761 inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::begin() {
762 return StateIterator(this);
763 }
end()764 inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::end() {
765 StartKeepRunning();
766 return StateIterator();
767 }
768
769 namespace internal {
770
771 typedef void(Function)(State&);
772
773 // ------------------------------------------------------
774 // Benchmark registration object. The BENCHMARK() macro expands
775 // into an internal::Benchmark* object. Various methods can
776 // be called on this object to change the properties of the benchmark.
777 // Each method returns "this" so that multiple method calls can
778 // chained into one expression.
779 class Benchmark {
780 public:
781 virtual ~Benchmark();
782
783 // Note: the following methods all return "this" so that multiple
784 // method calls can be chained together in one expression.
785
786 // Run this benchmark once with "x" as the extra argument passed
787 // to the function.
788 // REQUIRES: The function passed to the constructor must accept an arg1.
789 Benchmark* Arg(int64_t x);
790
791 // Run this benchmark with the given time unit for the generated output report
792 Benchmark* Unit(TimeUnit unit);
793
794 // Run this benchmark once for a number of values picked from the
795 // range [start..limit]. (start and limit are always picked.)
796 // REQUIRES: The function passed to the constructor must accept an arg1.
797 Benchmark* Range(int64_t start, int64_t limit);
798
799 // Run this benchmark once for all values in the range [start..limit] with
800 // specific step
801 // REQUIRES: The function passed to the constructor must accept an arg1.
802 Benchmark* DenseRange(int64_t start, int64_t limit, int step = 1);
803
804 // Run this benchmark once with "args" as the extra arguments passed
805 // to the function.
806 // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
807 Benchmark* Args(const std::vector<int64_t>& args);
808
809 // Equivalent to Args({x, y})
810 // NOTE: This is a legacy C++03 interface provided for compatibility only.
811 // New code should use 'Args'.
ArgPair(int64_t x,int64_t y)812 Benchmark* ArgPair(int64_t x, int64_t y) {
813 std::vector<int64_t> args;
814 args.push_back(x);
815 args.push_back(y);
816 return Args(args);
817 }
818
819 // Run this benchmark once for a number of values picked from the
820 // ranges [start..limit]. (starts and limits are always picked.)
821 // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
822 Benchmark* Ranges(const std::vector<std::pair<int64_t, int64_t> >& ranges);
823
824 // Equivalent to ArgNames({name})
825 Benchmark* ArgName(const std::string& name);
826
827 // Set the argument names to display in the benchmark name. If not called,
828 // only argument values will be shown.
829 Benchmark* ArgNames(const std::vector<std::string>& names);
830
831 // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}).
832 // NOTE: This is a legacy C++03 interface provided for compatibility only.
833 // New code should use 'Ranges'.
RangePair(int64_t lo1,int64_t hi1,int64_t lo2,int64_t hi2)834 Benchmark* RangePair(int64_t lo1, int64_t hi1, int64_t lo2, int64_t hi2) {
835 std::vector<std::pair<int64_t, int64_t> > ranges;
836 ranges.push_back(std::make_pair(lo1, hi1));
837 ranges.push_back(std::make_pair(lo2, hi2));
838 return Ranges(ranges);
839 }
840
841 // Pass this benchmark object to *func, which can customize
842 // the benchmark by calling various methods like Arg, Args,
843 // Threads, etc.
844 Benchmark* Apply(void (*func)(Benchmark* benchmark));
845
846 // Set the range multiplier for non-dense range. If not called, the range
847 // multiplier kRangeMultiplier will be used.
848 Benchmark* RangeMultiplier(int multiplier);
849
850 // Set the minimum amount of time to use when running this benchmark. This
851 // option overrides the `benchmark_min_time` flag.
852 // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
853 Benchmark* MinTime(double t);
854
855 // Specify the amount of iterations that should be run by this benchmark.
856 // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark.
857 //
858 // NOTE: This function should only be used when *exact* iteration control is
859 // needed and never to control or limit how long a benchmark runs, where
860 // `--benchmark_min_time=N` or `MinTime(...)` should be used instead.
861 Benchmark* Iterations(size_t n);
862
863 // Specify the amount of times to repeat this benchmark. This option overrides
864 // the `benchmark_repetitions` flag.
865 // REQUIRES: `n > 0`
866 Benchmark* Repetitions(int n);
867
868 // Specify if each repetition of the benchmark should be reported separately
869 // or if only the final statistics should be reported. If the benchmark
870 // is not repeated then the single result is always reported.
871 // Applies to *ALL* reporters (display and file).
872 Benchmark* ReportAggregatesOnly(bool value = true);
873
874 // Same as ReportAggregatesOnly(), but applies to display reporter only.
875 Benchmark* DisplayAggregatesOnly(bool value = true);
876
877 // If a particular benchmark is I/O bound, runs multiple threads internally or
878 // if for some reason CPU timings are not representative, call this method. If
879 // called, the elapsed time will be used to control how many iterations are
880 // run, and in the printing of items/second or MB/seconds values. If not
881 // called, the cpu time used by the benchmark will be used.
882 Benchmark* UseRealTime();
883
884 // If a benchmark must measure time manually (e.g. if GPU execution time is
885 // being
886 // measured), call this method. If called, each benchmark iteration should
887 // call
888 // SetIterationTime(seconds) to report the measured time, which will be used
889 // to control how many iterations are run, and in the printing of items/second
890 // or MB/second values.
891 Benchmark* UseManualTime();
892
893 // Set the asymptotic computational complexity for the benchmark. If called
894 // the asymptotic computational complexity will be shown on the output.
895 Benchmark* Complexity(BigO complexity = benchmark::oAuto);
896
897 // Set the asymptotic computational complexity for the benchmark. If called
898 // the asymptotic computational complexity will be shown on the output.
899 Benchmark* Complexity(BigOFunc* complexity);
900
901 // Add this statistics to be computed over all the values of benchmark run
902 Benchmark* ComputeStatistics(std::string name, StatisticsFunc* statistics);
903
904 // Support for running multiple copies of the same benchmark concurrently
905 // in multiple threads. This may be useful when measuring the scaling
906 // of some piece of code.
907
908 // Run one instance of this benchmark concurrently in t threads.
909 Benchmark* Threads(int t);
910
911 // Pick a set of values T from [min_threads,max_threads].
912 // min_threads and max_threads are always included in T. Run this
913 // benchmark once for each value in T. The benchmark run for a
914 // particular value t consists of t threads running the benchmark
915 // function concurrently. For example, consider:
916 // BENCHMARK(Foo)->ThreadRange(1,16);
917 // This will run the following benchmarks:
918 // Foo in 1 thread
919 // Foo in 2 threads
920 // Foo in 4 threads
921 // Foo in 8 threads
922 // Foo in 16 threads
923 Benchmark* ThreadRange(int min_threads, int max_threads);
924
925 // For each value n in the range, run this benchmark once using n threads.
926 // min_threads and max_threads are always included in the range.
927 // stride specifies the increment. E.g. DenseThreadRange(1, 8, 3) starts
928 // a benchmark with 1, 4, 7 and 8 threads.
929 Benchmark* DenseThreadRange(int min_threads, int max_threads, int stride = 1);
930
931 // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
932 Benchmark* ThreadPerCpu();
933
934 virtual void Run(State& state) = 0;
935
936 protected:
937 explicit Benchmark(const char* name);
938 Benchmark(Benchmark const&);
939 void SetName(const char* name);
940
941 int ArgsCnt() const;
942
943 private:
944 friend class BenchmarkFamilies;
945
946 std::string name_;
947 AggregationReportMode aggregation_report_mode_;
948 std::vector<std::string> arg_names_; // Args for all benchmark runs
949 std::vector<std::vector<int64_t> > args_; // Args for all benchmark runs
950 TimeUnit time_unit_;
951 int range_multiplier_;
952 double min_time_;
953 size_t iterations_;
954 int repetitions_;
955 bool use_real_time_;
956 bool use_manual_time_;
957 BigO complexity_;
958 BigOFunc* complexity_lambda_;
959 std::vector<Statistics> statistics_;
960 std::vector<int> thread_counts_;
961
962 Benchmark& operator=(Benchmark const&);
963 };
964
965 } // namespace internal
966
967 // Create and register a benchmark with the specified 'name' that invokes
968 // the specified functor 'fn'.
969 //
970 // RETURNS: A pointer to the registered benchmark.
971 internal::Benchmark* RegisterBenchmark(const char* name,
972 internal::Function* fn);
973
974 #if defined(BENCHMARK_HAS_CXX11)
975 template <class Lambda>
976 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn);
977 #endif
978
979 // Remove all registered benchmarks. All pointers to previously registered
980 // benchmarks are invalidated.
981 void ClearRegisteredBenchmarks();
982
983 namespace internal {
984 // The class used to hold all Benchmarks created from static function.
985 // (ie those created using the BENCHMARK(...) macros.
986 class FunctionBenchmark : public Benchmark {
987 public:
FunctionBenchmark(const char * name,Function * func)988 FunctionBenchmark(const char* name, Function* func)
989 : Benchmark(name), func_(func) {}
990
991 virtual void Run(State& st);
992
993 private:
994 Function* func_;
995 };
996
997 #ifdef BENCHMARK_HAS_CXX11
998 template <class Lambda>
999 class LambdaBenchmark : public Benchmark {
1000 public:
Run(State & st)1001 virtual void Run(State& st) { lambda_(st); }
1002
1003 private:
1004 template <class OLambda>
LambdaBenchmark(const char * name,OLambda && lam)1005 LambdaBenchmark(const char* name, OLambda&& lam)
1006 : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
1007
1008 LambdaBenchmark(LambdaBenchmark const&) = delete;
1009
1010 private:
1011 template <class Lam>
1012 friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&);
1013
1014 Lambda lambda_;
1015 };
1016 #endif
1017
1018 } // namespace internal
1019
RegisterBenchmark(const char * name,internal::Function * fn)1020 inline internal::Benchmark* RegisterBenchmark(const char* name,
1021 internal::Function* fn) {
1022 return internal::RegisterBenchmarkInternal(
1023 ::new internal::FunctionBenchmark(name, fn));
1024 }
1025
1026 #ifdef BENCHMARK_HAS_CXX11
1027 template <class Lambda>
RegisterBenchmark(const char * name,Lambda && fn)1028 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
1029 using BenchType =
1030 internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
1031 return internal::RegisterBenchmarkInternal(
1032 ::new BenchType(name, std::forward<Lambda>(fn)));
1033 }
1034 #endif
1035
1036 #if defined(BENCHMARK_HAS_CXX11) && \
1037 (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
1038 template <class Lambda, class... Args>
RegisterBenchmark(const char * name,Lambda && fn,Args &&...args)1039 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn,
1040 Args&&... args) {
1041 return benchmark::RegisterBenchmark(
1042 name, [=](benchmark::State& st) { fn(st, args...); });
1043 }
1044 #else
1045 #define BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
1046 #endif
1047
1048 // The base class for all fixture tests.
1049 class Fixture : public internal::Benchmark {
1050 public:
Fixture()1051 Fixture() : internal::Benchmark("") {}
1052
Run(State & st)1053 virtual void Run(State& st) {
1054 this->SetUp(st);
1055 this->BenchmarkCase(st);
1056 this->TearDown(st);
1057 }
1058
1059 // These will be deprecated ...
SetUp(const State &)1060 virtual void SetUp(const State&) {}
TearDown(const State &)1061 virtual void TearDown(const State&) {}
1062 // ... In favor of these.
SetUp(State & st)1063 virtual void SetUp(State& st) { SetUp(const_cast<const State&>(st)); }
TearDown(State & st)1064 virtual void TearDown(State& st) { TearDown(const_cast<const State&>(st)); }
1065
1066 protected:
1067 virtual void BenchmarkCase(State&) = 0;
1068 };
1069
1070 } // namespace benchmark
1071
1072 // ------------------------------------------------------
1073 // Macro to register benchmarks
1074
1075 // Check that __COUNTER__ is defined and that __COUNTER__ increases by 1
1076 // every time it is expanded. X + 1 == X + 0 is used in case X is defined to be
1077 // empty. If X is empty the expression becomes (+1 == +0).
1078 #if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0)
1079 #define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__
1080 #else
1081 #define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__
1082 #endif
1083
1084 // Helpers for generating unique variable names
1085 #define BENCHMARK_PRIVATE_NAME(n) \
1086 BENCHMARK_PRIVATE_CONCAT(_benchmark_, BENCHMARK_PRIVATE_UNIQUE_ID, n)
1087 #define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c)
1088 #define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c
1089
1090 #define BENCHMARK_PRIVATE_DECLARE(n) \
1091 static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \
1092 BENCHMARK_UNUSED
1093
1094 #define BENCHMARK(n) \
1095 BENCHMARK_PRIVATE_DECLARE(n) = \
1096 (::benchmark::internal::RegisterBenchmarkInternal( \
1097 new ::benchmark::internal::FunctionBenchmark(#n, n)))
1098
1099 // Old-style macros
1100 #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
1101 #define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)})
1102 #define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t))
1103 #define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
1104 #define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
1105 BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}})
1106
1107 #ifdef BENCHMARK_HAS_CXX11
1108
1109 // Register a benchmark which invokes the function specified by `func`
1110 // with the additional arguments specified by `...`.
1111 //
1112 // For example:
1113 //
1114 // template <class ...ExtraArgs>`
1115 // void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
1116 // [...]
1117 //}
1118 // /* Registers a benchmark named "BM_takes_args/int_string_test` */
1119 // BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
1120 #define BENCHMARK_CAPTURE(func, test_case_name, ...) \
1121 BENCHMARK_PRIVATE_DECLARE(func) = \
1122 (::benchmark::internal::RegisterBenchmarkInternal( \
1123 new ::benchmark::internal::FunctionBenchmark( \
1124 #func "/" #test_case_name, \
1125 [](::benchmark::State& st) { func(st, __VA_ARGS__); })))
1126
1127 #endif // BENCHMARK_HAS_CXX11
1128
1129 // This will register a benchmark for a templatized function. For example:
1130 //
1131 // template<int arg>
1132 // void BM_Foo(int iters);
1133 //
1134 // BENCHMARK_TEMPLATE(BM_Foo, 1);
1135 //
1136 // will register BM_Foo<1> as a benchmark.
1137 #define BENCHMARK_TEMPLATE1(n, a) \
1138 BENCHMARK_PRIVATE_DECLARE(n) = \
1139 (::benchmark::internal::RegisterBenchmarkInternal( \
1140 new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n<a>)))
1141
1142 #define BENCHMARK_TEMPLATE2(n, a, b) \
1143 BENCHMARK_PRIVATE_DECLARE(n) = \
1144 (::benchmark::internal::RegisterBenchmarkInternal( \
1145 new ::benchmark::internal::FunctionBenchmark(#n "<" #a "," #b ">", \
1146 n<a, b>)))
1147
1148 #ifdef BENCHMARK_HAS_CXX11
1149 #define BENCHMARK_TEMPLATE(n, ...) \
1150 BENCHMARK_PRIVATE_DECLARE(n) = \
1151 (::benchmark::internal::RegisterBenchmarkInternal( \
1152 new ::benchmark::internal::FunctionBenchmark( \
1153 #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>)))
1154 #else
1155 #define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
1156 #endif
1157
1158 #define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1159 class BaseClass##_##Method##_Benchmark : public BaseClass { \
1160 public: \
1161 BaseClass##_##Method##_Benchmark() : BaseClass() { \
1162 this->SetName(#BaseClass "/" #Method); \
1163 } \
1164 \
1165 protected: \
1166 virtual void BenchmarkCase(::benchmark::State&); \
1167 };
1168
1169 #define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1170 class BaseClass##_##Method##_Benchmark : public BaseClass<a> { \
1171 public: \
1172 BaseClass##_##Method##_Benchmark() : BaseClass<a>() { \
1173 this->SetName(#BaseClass "<" #a ">/" #Method); \
1174 } \
1175 \
1176 protected: \
1177 virtual void BenchmarkCase(::benchmark::State&); \
1178 };
1179
1180 #define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1181 class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> { \
1182 public: \
1183 BaseClass##_##Method##_Benchmark() : BaseClass<a, b>() { \
1184 this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \
1185 } \
1186 \
1187 protected: \
1188 virtual void BenchmarkCase(::benchmark::State&); \
1189 };
1190
1191 #ifdef BENCHMARK_HAS_CXX11
1192 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...) \
1193 class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \
1194 public: \
1195 BaseClass##_##Method##_Benchmark() : BaseClass<__VA_ARGS__>() { \
1196 this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method); \
1197 } \
1198 \
1199 protected: \
1200 virtual void BenchmarkCase(::benchmark::State&); \
1201 };
1202 #else
1203 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) \
1204 BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(n, a)
1205 #endif
1206
1207 #define BENCHMARK_DEFINE_F(BaseClass, Method) \
1208 BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1209 void BaseClass##_##Method##_Benchmark::BenchmarkCase
1210
1211 #define BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a) \
1212 BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1213 void BaseClass##_##Method##_Benchmark::BenchmarkCase
1214
1215 #define BENCHMARK_TEMPLATE2_DEFINE_F(BaseClass, Method, a, b) \
1216 BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1217 void BaseClass##_##Method##_Benchmark::BenchmarkCase
1218
1219 #ifdef BENCHMARK_HAS_CXX11
1220 #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, ...) \
1221 BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1222 void BaseClass##_##Method##_Benchmark::BenchmarkCase
1223 #else
1224 #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, a) \
1225 BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a)
1226 #endif
1227
1228 #define BENCHMARK_REGISTER_F(BaseClass, Method) \
1229 BENCHMARK_PRIVATE_REGISTER_F(BaseClass##_##Method##_Benchmark)
1230
1231 #define BENCHMARK_PRIVATE_REGISTER_F(TestName) \
1232 BENCHMARK_PRIVATE_DECLARE(TestName) = \
1233 (::benchmark::internal::RegisterBenchmarkInternal(new TestName()))
1234
1235 // This macro will define and register a benchmark within a fixture class.
1236 #define BENCHMARK_F(BaseClass, Method) \
1237 BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1238 BENCHMARK_REGISTER_F(BaseClass, Method); \
1239 void BaseClass##_##Method##_Benchmark::BenchmarkCase
1240
1241 #define BENCHMARK_TEMPLATE1_F(BaseClass, Method, a) \
1242 BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1243 BENCHMARK_REGISTER_F(BaseClass, Method); \
1244 void BaseClass##_##Method##_Benchmark::BenchmarkCase
1245
1246 #define BENCHMARK_TEMPLATE2_F(BaseClass, Method, a, b) \
1247 BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1248 BENCHMARK_REGISTER_F(BaseClass, Method); \
1249 void BaseClass##_##Method##_Benchmark::BenchmarkCase
1250
1251 #ifdef BENCHMARK_HAS_CXX11
1252 #define BENCHMARK_TEMPLATE_F(BaseClass, Method, ...) \
1253 BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1254 BENCHMARK_REGISTER_F(BaseClass, Method); \
1255 void BaseClass##_##Method##_Benchmark::BenchmarkCase
1256 #else
1257 #define BENCHMARK_TEMPLATE_F(BaseClass, Method, a) \
1258 BENCHMARK_TEMPLATE1_F(BaseClass, Method, a)
1259 #endif
1260
1261 // Helper macro to create a main routine in a test that runs the benchmarks
1262 #define BENCHMARK_MAIN() \
1263 int main(int argc, char** argv) { \
1264 ::benchmark::Initialize(&argc, argv); \
1265 if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \
1266 ::benchmark::RunSpecifiedBenchmarks(); \
1267 } \
1268 int main(int, char**)
1269
1270 // ------------------------------------------------------
1271 // Benchmark Reporters
1272
1273 namespace benchmark {
1274
1275 struct CPUInfo {
1276 struct CacheInfo {
1277 std::string type;
1278 int level;
1279 int size;
1280 int num_sharing;
1281 };
1282
1283 int num_cpus;
1284 double cycles_per_second;
1285 std::vector<CacheInfo> caches;
1286 bool scaling_enabled;
1287 std::vector<double> load_avg;
1288
1289 static const CPUInfo& Get();
1290
1291 private:
1292 CPUInfo();
1293 BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo);
1294 };
1295
1296 //Adding Struct for System Information
1297 struct SystemInfo {
1298 std::string name;
1299 static const SystemInfo& Get();
1300 private:
1301 SystemInfo();
1302 BENCHMARK_DISALLOW_COPY_AND_ASSIGN(SystemInfo);
1303 };
1304
1305 // Interface for custom benchmark result printers.
1306 // By default, benchmark reports are printed to stdout. However an application
1307 // can control the destination of the reports by calling
1308 // RunSpecifiedBenchmarks and passing it a custom reporter object.
1309 // The reporter object must implement the following interface.
1310 class BenchmarkReporter {
1311 public:
1312 struct Context {
1313 CPUInfo const& cpu_info;
1314 SystemInfo const& sys_info;
1315 // The number of chars in the longest benchmark name.
1316 size_t name_field_width;
1317 static const char* executable_name;
1318 Context();
1319 };
1320
1321 struct Run {
1322 enum RunType { RT_Iteration, RT_Aggregate };
1323
RunRun1324 Run()
1325 : run_type(RT_Iteration),
1326 error_occurred(false),
1327 iterations(1),
1328 time_unit(kNanosecond),
1329 real_accumulated_time(0),
1330 cpu_accumulated_time(0),
1331 max_heapbytes_used(0),
1332 complexity(oNone),
1333 complexity_lambda(),
1334 complexity_n(0),
1335 report_big_o(false),
1336 report_rms(false),
1337 counters(),
1338 has_memory_result(false),
1339 allocs_per_iter(0.0),
1340 max_bytes_used(0) {}
1341
1342 std::string benchmark_name() const;
1343 std::string run_name;
1344 RunType run_type; // is this a measurement, or an aggregate?
1345 std::string aggregate_name;
1346 std::string report_label; // Empty if not set by benchmark.
1347 bool error_occurred;
1348 std::string error_message;
1349
1350 int64_t iterations;
1351 TimeUnit time_unit;
1352 double real_accumulated_time;
1353 double cpu_accumulated_time;
1354
1355 // Return a value representing the real time per iteration in the unit
1356 // specified by 'time_unit'.
1357 // NOTE: If 'iterations' is zero the returned value represents the
1358 // accumulated time.
1359 double GetAdjustedRealTime() const;
1360
1361 // Return a value representing the cpu time per iteration in the unit
1362 // specified by 'time_unit'.
1363 // NOTE: If 'iterations' is zero the returned value represents the
1364 // accumulated time.
1365 double GetAdjustedCPUTime() const;
1366
1367 // This is set to 0.0 if memory tracing is not enabled.
1368 double max_heapbytes_used;
1369
1370 // Keep track of arguments to compute asymptotic complexity
1371 BigO complexity;
1372 BigOFunc* complexity_lambda;
1373 int64_t complexity_n;
1374
1375 // what statistics to compute from the measurements
1376 const std::vector<Statistics>* statistics;
1377
1378 // Inform print function whether the current run is a complexity report
1379 bool report_big_o;
1380 bool report_rms;
1381
1382 UserCounters counters;
1383
1384 // Memory metrics.
1385 bool has_memory_result;
1386 double allocs_per_iter;
1387 int64_t max_bytes_used;
1388 };
1389
1390 // Construct a BenchmarkReporter with the output stream set to 'std::cout'
1391 // and the error stream set to 'std::cerr'
1392 BenchmarkReporter();
1393
1394 // Called once for every suite of benchmarks run.
1395 // The parameter "context" contains information that the
1396 // reporter may wish to use when generating its report, for example the
1397 // platform under which the benchmarks are running. The benchmark run is
1398 // never started if this function returns false, allowing the reporter
1399 // to skip runs based on the context information.
1400 virtual bool ReportContext(const Context& context) = 0;
1401
1402 // Called once for each group of benchmark runs, gives information about
1403 // cpu-time and heap memory usage during the benchmark run. If the group
1404 // of runs contained more than two entries then 'report' contains additional
1405 // elements representing the mean and standard deviation of those runs.
1406 // Additionally if this group of runs was the last in a family of benchmarks
1407 // 'reports' contains additional entries representing the asymptotic
1408 // complexity and RMS of that benchmark family.
1409 virtual void ReportRuns(const std::vector<Run>& report) = 0;
1410
1411 // Called once and only once after ever group of benchmarks is run and
1412 // reported.
Finalize()1413 virtual void Finalize() {}
1414
1415 // REQUIRES: The object referenced by 'out' is valid for the lifetime
1416 // of the reporter.
SetOutputStream(std::ostream * out)1417 void SetOutputStream(std::ostream* out) {
1418 assert(out);
1419 output_stream_ = out;
1420 }
1421
1422 // REQUIRES: The object referenced by 'err' is valid for the lifetime
1423 // of the reporter.
SetErrorStream(std::ostream * err)1424 void SetErrorStream(std::ostream* err) {
1425 assert(err);
1426 error_stream_ = err;
1427 }
1428
GetOutputStream()1429 std::ostream& GetOutputStream() const { return *output_stream_; }
1430
GetErrorStream()1431 std::ostream& GetErrorStream() const { return *error_stream_; }
1432
1433 virtual ~BenchmarkReporter();
1434
1435 // Write a human readable string to 'out' representing the specified
1436 // 'context'.
1437 // REQUIRES: 'out' is non-null.
1438 static void PrintBasicContext(std::ostream* out, Context const& context);
1439
1440 private:
1441 std::ostream* output_stream_;
1442 std::ostream* error_stream_;
1443 };
1444
1445 // Simple reporter that outputs benchmark data to the console. This is the
1446 // default reporter used by RunSpecifiedBenchmarks().
1447 class ConsoleReporter : public BenchmarkReporter {
1448 public:
1449 enum OutputOptions {
1450 OO_None = 0,
1451 OO_Color = 1,
1452 OO_Tabular = 2,
1453 OO_ColorTabular = OO_Color | OO_Tabular,
1454 OO_Defaults = OO_ColorTabular
1455 };
1456 explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults)
output_options_(opts_)1457 : output_options_(opts_),
1458 name_field_width_(0),
1459 prev_counters_(),
1460 printed_header_(false) {}
1461
1462 virtual bool ReportContext(const Context& context);
1463 virtual void ReportRuns(const std::vector<Run>& reports);
1464
1465 protected:
1466 virtual void PrintRunData(const Run& report);
1467 virtual void PrintHeader(const Run& report);
1468
1469 OutputOptions output_options_;
1470 size_t name_field_width_;
1471 UserCounters prev_counters_;
1472 bool printed_header_;
1473 };
1474
1475 class JSONReporter : public BenchmarkReporter {
1476 public:
JSONReporter()1477 JSONReporter() : first_report_(true) {}
1478 virtual bool ReportContext(const Context& context);
1479 virtual void ReportRuns(const std::vector<Run>& reports);
1480 virtual void Finalize();
1481
1482 private:
1483 void PrintRunData(const Run& report);
1484
1485 bool first_report_;
1486 };
1487
1488 class BENCHMARK_DEPRECATED_MSG("The CSV Reporter will be removed in a future release")
1489 CSVReporter : public BenchmarkReporter {
1490 public:
CSVReporter()1491 CSVReporter() : printed_header_(false) {}
1492 virtual bool ReportContext(const Context& context);
1493 virtual void ReportRuns(const std::vector<Run>& reports);
1494
1495 private:
1496 void PrintRunData(const Run& report);
1497
1498 bool printed_header_;
1499 std::set<std::string> user_counter_names_;
1500 };
1501
1502 // If a MemoryManager is registered, it can be used to collect and report
1503 // allocation metrics for a run of the benchmark.
1504 class MemoryManager {
1505 public:
1506 struct Result {
ResultResult1507 Result() : num_allocs(0), max_bytes_used(0) {}
1508
1509 // The number of allocations made in total between Start and Stop.
1510 int64_t num_allocs;
1511
1512 // The peak memory use between Start and Stop.
1513 int64_t max_bytes_used;
1514 };
1515
~MemoryManager()1516 virtual ~MemoryManager() {}
1517
1518 // Implement this to start recording allocation information.
1519 virtual void Start() = 0;
1520
1521 // Implement this to stop recording and fill out the given Result structure.
1522 virtual void Stop(Result* result) = 0;
1523 };
1524
GetTimeUnitString(TimeUnit unit)1525 inline const char* GetTimeUnitString(TimeUnit unit) {
1526 switch (unit) {
1527 case kMillisecond:
1528 return "ms";
1529 case kMicrosecond:
1530 return "us";
1531 case kNanosecond:
1532 return "ns";
1533 }
1534 BENCHMARK_UNREACHABLE();
1535 }
1536
GetTimeUnitMultiplier(TimeUnit unit)1537 inline double GetTimeUnitMultiplier(TimeUnit unit) {
1538 switch (unit) {
1539 case kMillisecond:
1540 return 1e3;
1541 case kMicrosecond:
1542 return 1e6;
1543 case kNanosecond:
1544 return 1e9;
1545 }
1546 BENCHMARK_UNREACHABLE();
1547 }
1548
1549 } // namespace benchmark
1550
1551 #endif // BENCHMARK_BENCHMARK_H_
1552