1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "benchmark/benchmark.h"
16 #include "internal_macros.h"
17
18 #ifndef BENCHMARK_OS_WINDOWS
19 #include <sys/time.h>
20 #include <sys/resource.h>
21 #include <unistd.h>
22 #endif
23
24 #include <cstdlib>
25 #include <cstring>
26 #include <cstdio>
27 #include <algorithm>
28 #include <atomic>
29 #include <condition_variable>
30 #include <iostream>
31 #include <memory>
32 #include <thread>
33
34 #include "check.h"
35 #include "commandlineflags.h"
36 #include "log.h"
37 #include "mutex.h"
38 #include "re.h"
39 #include "stat.h"
40 #include "string_util.h"
41 #include "sysinfo.h"
42 #include "walltime.h"
43
44 DEFINE_bool(benchmark_list_tests, false,
45 "Print a list of benchmarks. This option overrides all other "
46 "options.");
47
48 DEFINE_string(benchmark_filter, ".",
49 "A regular expression that specifies the set of benchmarks "
50 "to execute. If this flag is empty, no benchmarks are run. "
51 "If this flag is the string \"all\", all benchmarks linked "
52 "into the process are run.");
53
54 DEFINE_double(benchmark_min_time, 0.5,
55 "Minimum number of seconds we should run benchmark before "
56 "results are considered significant. For cpu-time based "
57 "tests, this is the lower bound on the total cpu time "
58 "used by all threads that make up the test. For real-time "
59 "based tests, this is the lower bound on the elapsed time "
60 "of the benchmark execution, regardless of number of "
61 "threads.");
62
63 DEFINE_int32(benchmark_repetitions, 1,
64 "The number of runs of each benchmark. If greater than 1, the "
65 "mean and standard deviation of the runs will be reported.");
66
67 DEFINE_string(benchmark_format, "tabular",
68 "The format to use for console output. Valid values are "
69 "'tabular', 'json', or 'csv'.");
70
71 DEFINE_bool(color_print, true, "Enables colorized logging.");
72
73 DEFINE_int32(v, 0, "The level of verbose logging to output");
74
75
76 namespace benchmark {
77
78 namespace internal {
79
UseCharPointer(char const volatile *)80 void UseCharPointer(char const volatile*) {}
81
82 // NOTE: This is a dummy "mutex" type used to denote the actual mutex
83 // returned by GetBenchmarkLock(). This is only used to placate the thread
84 // safety warnings by giving the return of GetBenchmarkLock() a name.
85 struct CAPABILITY("mutex") BenchmarkLockType {};
86 BenchmarkLockType BenchmarkLockVar;
87
88 } // end namespace internal
89
RETURN_CAPABILITY(::benchmark::internal::BenchmarkLockVar)90 inline Mutex& RETURN_CAPABILITY(::benchmark::internal::BenchmarkLockVar)
91 GetBenchmarkLock()
92 {
93 static Mutex lock;
94 return lock;
95 }
96
97 namespace {
98
IsZero(double n)99 bool IsZero(double n) {
100 return std::abs(n) < std::numeric_limits<double>::epsilon();
101 }
102
103 // For non-dense Range, intermediate values are powers of kRangeMultiplier.
104 static const int kRangeMultiplier = 8;
105 static const size_t kMaxIterations = 1000000000;
106
107 bool running_benchmark = false;
108
109 // Global variable so that a benchmark can cause a little extra printing
GetReportLabel()110 std::string* GetReportLabel() {
111 static std::string label GUARDED_BY(GetBenchmarkLock());
112 return &label;
113 }
114
115 // TODO(ericwf): support MallocCounter.
116 //static benchmark::MallocCounter *benchmark_mc;
117
118 struct ThreadStats {
ThreadStatsbenchmark::__anon374782100111::ThreadStats119 ThreadStats() : bytes_processed(0), items_processed(0) {}
120 int64_t bytes_processed;
121 int64_t items_processed;
122 };
123
124 // Timer management class
125 class TimerManager {
126 public:
TimerManager(int num_threads,Notification * done)127 TimerManager(int num_threads, Notification* done)
128 : num_threads_(num_threads),
129 done_(done),
130 running_(false),
131 real_time_used_(0),
132 cpu_time_used_(0),
133 num_finalized_(0),
134 phase_number_(0),
135 entered_(0) {
136 }
137
138 // Called by each thread
StartTimer()139 void StartTimer() EXCLUDES(lock_) {
140 bool last_thread = false;
141 {
142 MutexLock ml(lock_);
143 last_thread = Barrier(ml);
144 if (last_thread) {
145 CHECK(!running_) << "Called StartTimer when timer is already running";
146 running_ = true;
147 start_real_time_ = walltime::Now();
148 start_cpu_time_ = MyCPUUsage() + ChildrenCPUUsage();
149 }
150 }
151 if (last_thread) {
152 phase_condition_.notify_all();
153 }
154 }
155
156 // Called by each thread
StopTimer()157 void StopTimer() EXCLUDES(lock_) {
158 bool last_thread = false;
159 {
160 MutexLock ml(lock_);
161 last_thread = Barrier(ml);
162 if (last_thread) {
163 CHECK(running_) << "Called StopTimer when timer is already stopped";
164 InternalStop();
165 }
166 }
167 if (last_thread) {
168 phase_condition_.notify_all();
169 }
170 }
171
172 // Called by each thread
Finalize()173 void Finalize() EXCLUDES(lock_) {
174 MutexLock l(lock_);
175 num_finalized_++;
176 if (num_finalized_ == num_threads_) {
177 CHECK(!running_) <<
178 "The timer should be stopped before the timer is finalized";
179 done_->Notify();
180 }
181 }
182
183 // REQUIRES: timer is not running
real_time_used()184 double real_time_used() EXCLUDES(lock_) {
185 MutexLock l(lock_);
186 CHECK(!running_);
187 return real_time_used_;
188 }
189
190 // REQUIRES: timer is not running
cpu_time_used()191 double cpu_time_used() EXCLUDES(lock_) {
192 MutexLock l(lock_);
193 CHECK(!running_);
194 return cpu_time_used_;
195 }
196
197 private:
198 Mutex lock_;
199 Condition phase_condition_;
200 int num_threads_;
201 Notification* done_;
202
203 bool running_; // Is the timer running
204 double start_real_time_; // If running_
205 double start_cpu_time_; // If running_
206
207 // Accumulated time so far (does not contain current slice if running_)
208 double real_time_used_;
209 double cpu_time_used_;
210
211 // How many threads have called Finalize()
212 int num_finalized_;
213
214 // State for barrier management
215 int phase_number_;
216 int entered_; // Number of threads that have entered this barrier
217
InternalStop()218 void InternalStop() REQUIRES(lock_) {
219 CHECK(running_);
220 running_ = false;
221 real_time_used_ += walltime::Now() - start_real_time_;
222 cpu_time_used_ += ((MyCPUUsage() + ChildrenCPUUsage())
223 - start_cpu_time_);
224 }
225
226 // Enter the barrier and wait until all other threads have also
227 // entered the barrier. Returns iff this is the last thread to
228 // enter the barrier.
Barrier(MutexLock & ml)229 bool Barrier(MutexLock& ml) REQUIRES(lock_) {
230 CHECK_LT(entered_, num_threads_);
231 entered_++;
232 if (entered_ < num_threads_) {
233 // Wait for all threads to enter
234 int phase_number_cp = phase_number_;
235 auto cb = [this, phase_number_cp]() {
236 return this->phase_number_ > phase_number_cp;
237 };
238 phase_condition_.wait(ml.native_handle(), cb);
239 return false; // I was not the last one
240 } else {
241 // Last thread has reached the barrier
242 phase_number_++;
243 entered_ = 0;
244 return true;
245 }
246 }
247 };
248
249 // TimerManager for current run.
250 static std::unique_ptr<TimerManager> timer_manager = nullptr;
251
252 } // end namespace
253
254 namespace internal {
255
256 // Information kept per benchmark we may want to run
257 struct Benchmark::Instance {
258 std::string name;
259 Benchmark* benchmark;
260 bool has_arg1;
261 int arg1;
262 bool has_arg2;
263 int arg2;
264 bool use_real_time;
265 double min_time;
266 int threads; // Number of concurrent threads to use
267 bool multithreaded; // Is benchmark multi-threaded?
268 };
269
270 // Class for managing registered benchmarks. Note that each registered
271 // benchmark identifies a family of related benchmarks to run.
272 class BenchmarkFamilies {
273 public:
274 static BenchmarkFamilies* GetInstance();
275
276 // Registers a benchmark family and returns the index assigned to it.
277 size_t AddBenchmark(std::unique_ptr<Benchmark> family);
278
279 // Extract the list of benchmark instances that match the specified
280 // regular expression.
281 bool FindBenchmarks(const std::string& re,
282 std::vector<Benchmark::Instance>* benchmarks);
283 private:
BenchmarkFamilies()284 BenchmarkFamilies() {}
285
286 std::vector<std::unique_ptr<Benchmark>> families_;
287 Mutex mutex_;
288 };
289
290
291 class BenchmarkImp {
292 public:
293 explicit BenchmarkImp(const char* name);
294 ~BenchmarkImp();
295
296 void Arg(int x);
297 void Range(int start, int limit);
298 void DenseRange(int start, int limit);
299 void ArgPair(int start, int limit);
300 void RangePair(int lo1, int hi1, int lo2, int hi2);
301 void MinTime(double n);
302 void UseRealTime();
303 void Threads(int t);
304 void ThreadRange(int min_threads, int max_threads);
305 void ThreadPerCpu();
306 void SetName(const char* name);
307
308 static void AddRange(std::vector<int>* dst, int lo, int hi, int mult);
309
310 private:
311 friend class BenchmarkFamilies;
312
313 std::string name_;
314 int arg_count_;
315 std::vector< std::pair<int, int> > args_; // Args for all benchmark runs
316 double min_time_;
317 bool use_real_time_;
318 std::vector<int> thread_counts_;
319
320 BenchmarkImp& operator=(BenchmarkImp const&);
321 };
322
GetInstance()323 BenchmarkFamilies* BenchmarkFamilies::GetInstance() {
324 static BenchmarkFamilies instance;
325 return &instance;
326 }
327
328
AddBenchmark(std::unique_ptr<Benchmark> family)329 size_t BenchmarkFamilies::AddBenchmark(std::unique_ptr<Benchmark> family) {
330 MutexLock l(mutex_);
331 size_t index = families_.size();
332 families_.push_back(std::move(family));
333 return index;
334 }
335
FindBenchmarks(const std::string & spec,std::vector<Benchmark::Instance> * benchmarks)336 bool BenchmarkFamilies::FindBenchmarks(
337 const std::string& spec,
338 std::vector<Benchmark::Instance>* benchmarks) {
339 // Make regular expression out of command-line flag
340 std::string error_msg;
341 Regex re;
342 if (!re.Init(spec, &error_msg)) {
343 std::cerr << "Could not compile benchmark re: " << error_msg << std::endl;
344 return false;
345 }
346
347 // Special list of thread counts to use when none are specified
348 std::vector<int> one_thread;
349 one_thread.push_back(1);
350
351 MutexLock l(mutex_);
352 for (std::unique_ptr<Benchmark>& bench_family : families_) {
353 // Family was deleted or benchmark doesn't match
354 if (!bench_family) continue;
355 BenchmarkImp* family = bench_family->imp_;
356
357 if (family->arg_count_ == -1) {
358 family->arg_count_ = 0;
359 family->args_.emplace_back(-1, -1);
360 }
361 for (auto const& args : family->args_) {
362 const std::vector<int>* thread_counts =
363 (family->thread_counts_.empty()
364 ? &one_thread
365 : &family->thread_counts_);
366 for (int num_threads : *thread_counts) {
367
368 Benchmark::Instance instance;
369 instance.name = family->name_;
370 instance.benchmark = bench_family.get();
371 instance.has_arg1 = family->arg_count_ >= 1;
372 instance.arg1 = args.first;
373 instance.has_arg2 = family->arg_count_ == 2;
374 instance.arg2 = args.second;
375 instance.min_time = family->min_time_;
376 instance.use_real_time = family->use_real_time_;
377 instance.threads = num_threads;
378 instance.multithreaded = !(family->thread_counts_.empty());
379
380 // Add arguments to instance name
381 if (family->arg_count_ >= 1) {
382 AppendHumanReadable(instance.arg1, &instance.name);
383 }
384 if (family->arg_count_ >= 2) {
385 AppendHumanReadable(instance.arg2, &instance.name);
386 }
387 if (!IsZero(family->min_time_)) {
388 instance.name += StringPrintF("/min_time:%0.3f", family->min_time_);
389 }
390 if (family->use_real_time_) {
391 instance.name += "/real_time";
392 }
393
394 // Add the number of threads used to the name
395 if (!family->thread_counts_.empty()) {
396 instance.name += StringPrintF("/threads:%d", instance.threads);
397 }
398
399 if (re.Match(instance.name)) {
400 benchmarks->push_back(instance);
401 }
402 }
403 }
404 }
405 return true;
406 }
407
BenchmarkImp(const char * name)408 BenchmarkImp::BenchmarkImp(const char* name)
409 : name_(name), arg_count_(-1),
410 min_time_(0.0), use_real_time_(false) {
411 }
412
~BenchmarkImp()413 BenchmarkImp::~BenchmarkImp() {
414 }
415
Arg(int x)416 void BenchmarkImp::Arg(int x) {
417 CHECK(arg_count_ == -1 || arg_count_ == 1);
418 arg_count_ = 1;
419 args_.emplace_back(x, -1);
420 }
421
Range(int start,int limit)422 void BenchmarkImp::Range(int start, int limit) {
423 CHECK(arg_count_ == -1 || arg_count_ == 1);
424 arg_count_ = 1;
425 std::vector<int> arglist;
426 AddRange(&arglist, start, limit, kRangeMultiplier);
427
428 for (int i : arglist) {
429 args_.emplace_back(i, -1);
430 }
431 }
432
DenseRange(int start,int limit)433 void BenchmarkImp::DenseRange(int start, int limit) {
434 CHECK(arg_count_ == -1 || arg_count_ == 1);
435 arg_count_ = 1;
436 CHECK_GE(start, 0);
437 CHECK_LE(start, limit);
438 for (int arg = start; arg <= limit; arg++) {
439 args_.emplace_back(arg, -1);
440 }
441 }
442
ArgPair(int x,int y)443 void BenchmarkImp::ArgPair(int x, int y) {
444 CHECK(arg_count_ == -1 || arg_count_ == 2);
445 arg_count_ = 2;
446 args_.emplace_back(x, y);
447 }
448
RangePair(int lo1,int hi1,int lo2,int hi2)449 void BenchmarkImp::RangePair(int lo1, int hi1, int lo2, int hi2) {
450 CHECK(arg_count_ == -1 || arg_count_ == 2);
451 arg_count_ = 2;
452 std::vector<int> arglist1, arglist2;
453 AddRange(&arglist1, lo1, hi1, kRangeMultiplier);
454 AddRange(&arglist2, lo2, hi2, kRangeMultiplier);
455
456 for (int i : arglist1) {
457 for (int j : arglist2) {
458 args_.emplace_back(i, j);
459 }
460 }
461 }
462
MinTime(double t)463 void BenchmarkImp::MinTime(double t) {
464 CHECK(t > 0.0);
465 min_time_ = t;
466 }
467
UseRealTime()468 void BenchmarkImp::UseRealTime() {
469 use_real_time_ = true;
470 }
471
Threads(int t)472 void BenchmarkImp::Threads(int t) {
473 CHECK_GT(t, 0);
474 thread_counts_.push_back(t);
475 }
476
ThreadRange(int min_threads,int max_threads)477 void BenchmarkImp::ThreadRange(int min_threads, int max_threads) {
478 CHECK_GT(min_threads, 0);
479 CHECK_GE(max_threads, min_threads);
480
481 AddRange(&thread_counts_, min_threads, max_threads, 2);
482 }
483
ThreadPerCpu()484 void BenchmarkImp::ThreadPerCpu() {
485 static int num_cpus = NumCPUs();
486 thread_counts_.push_back(num_cpus);
487 }
488
SetName(const char * name)489 void BenchmarkImp::SetName(const char* name) {
490 name_ = name;
491 }
492
AddRange(std::vector<int> * dst,int lo,int hi,int mult)493 void BenchmarkImp::AddRange(std::vector<int>* dst, int lo, int hi, int mult) {
494 CHECK_GE(lo, 0);
495 CHECK_GE(hi, lo);
496
497 // Add "lo"
498 dst->push_back(lo);
499
500 static const int kint32max = std::numeric_limits<int32_t>::max();
501
502 // Now space out the benchmarks in multiples of "mult"
503 for (int32_t i = 1; i < kint32max/mult; i *= mult) {
504 if (i >= hi) break;
505 if (i > lo) {
506 dst->push_back(i);
507 }
508 }
509 // Add "hi" (if different from "lo")
510 if (hi != lo) {
511 dst->push_back(hi);
512 }
513 }
514
Benchmark(const char * name)515 Benchmark::Benchmark(const char* name)
516 : imp_(new BenchmarkImp(name))
517 {
518 }
519
~Benchmark()520 Benchmark::~Benchmark() {
521 delete imp_;
522 }
523
Benchmark(Benchmark const & other)524 Benchmark::Benchmark(Benchmark const& other)
525 : imp_(new BenchmarkImp(*other.imp_))
526 {
527 }
528
Arg(int x)529 Benchmark* Benchmark::Arg(int x) {
530 imp_->Arg(x);
531 return this;
532 }
533
Range(int start,int limit)534 Benchmark* Benchmark::Range(int start, int limit) {
535 imp_->Range(start, limit);
536 return this;
537 }
538
DenseRange(int start,int limit)539 Benchmark* Benchmark::DenseRange(int start, int limit) {
540 imp_->DenseRange(start, limit);
541 return this;
542 }
543
ArgPair(int x,int y)544 Benchmark* Benchmark::ArgPair(int x, int y) {
545 imp_->ArgPair(x, y);
546 return this;
547 }
548
RangePair(int lo1,int hi1,int lo2,int hi2)549 Benchmark* Benchmark::RangePair(int lo1, int hi1, int lo2, int hi2) {
550 imp_->RangePair(lo1, hi1, lo2, hi2);
551 return this;
552 }
553
Apply(void (* custom_arguments)(Benchmark * benchmark))554 Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) {
555 custom_arguments(this);
556 return this;
557 }
558
MinTime(double t)559 Benchmark* Benchmark::MinTime(double t) {
560 imp_->MinTime(t);
561 return this;
562 }
563
UseRealTime()564 Benchmark* Benchmark::UseRealTime() {
565 imp_->UseRealTime();
566 return this;
567 }
568
Threads(int t)569 Benchmark* Benchmark::Threads(int t) {
570 imp_->Threads(t);
571 return this;
572 }
573
ThreadRange(int min_threads,int max_threads)574 Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) {
575 imp_->ThreadRange(min_threads, max_threads);
576 return this;
577 }
578
ThreadPerCpu()579 Benchmark* Benchmark::ThreadPerCpu() {
580 imp_->ThreadPerCpu();
581 return this;
582 }
583
SetName(const char * name)584 void Benchmark::SetName(const char* name) {
585 imp_->SetName(name);
586 }
587
Run(State & st)588 void FunctionBenchmark::Run(State& st) {
589 func_(st);
590 }
591
592 } // end namespace internal
593
594 namespace {
595
596
597 // Execute one thread of benchmark b for the specified number of iterations.
598 // Adds the stats collected for the thread into *total.
RunInThread(const benchmark::internal::Benchmark::Instance * b,size_t iters,int thread_id,ThreadStats * total)599 void RunInThread(const benchmark::internal::Benchmark::Instance* b,
600 size_t iters, int thread_id,
601 ThreadStats* total) EXCLUDES(GetBenchmarkLock()) {
602 State st(iters, b->has_arg1, b->arg1, b->has_arg2, b->arg2, thread_id, b->threads);
603 b->benchmark->Run(st);
604 CHECK(st.iterations() == st.max_iterations) <<
605 "Benchmark returned before State::KeepRunning() returned false!";
606 {
607 MutexLock l(GetBenchmarkLock());
608 total->bytes_processed += st.bytes_processed();
609 total->items_processed += st.items_processed();
610 }
611
612 timer_manager->Finalize();
613 }
614
RunBenchmark(const benchmark::internal::Benchmark::Instance & b,BenchmarkReporter * br)615 void RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
616 BenchmarkReporter* br) EXCLUDES(GetBenchmarkLock()) {
617 size_t iters = 1;
618
619 std::vector<BenchmarkReporter::Run> reports;
620
621 std::vector<std::thread> pool;
622 if (b.multithreaded)
623 pool.resize(b.threads);
624
625 for (int i = 0; i < FLAGS_benchmark_repetitions; i++) {
626 std::string mem;
627 for (;;) {
628 // Try benchmark
629 VLOG(2) << "Running " << b.name << " for " << iters << "\n";
630
631 {
632 MutexLock l(GetBenchmarkLock());
633 GetReportLabel()->clear();
634 }
635
636 Notification done;
637 timer_manager = std::unique_ptr<TimerManager>(new TimerManager(b.threads, &done));
638
639 ThreadStats total;
640 running_benchmark = true;
641 if (b.multithreaded) {
642 // If this is out first iteration of the while(true) loop then the
643 // threads haven't been started and can't be joined. Otherwise we need
644 // to join the thread before replacing them.
645 for (std::thread& thread : pool) {
646 if (thread.joinable())
647 thread.join();
648 }
649 for (std::size_t ti = 0; ti < pool.size(); ++ti) {
650 pool[ti] = std::thread(&RunInThread, &b, iters, ti, &total);
651 }
652 } else {
653 // Run directly in this thread
654 RunInThread(&b, iters, 0, &total);
655 }
656 done.WaitForNotification();
657 running_benchmark = false;
658
659 const double cpu_accumulated_time = timer_manager->cpu_time_used();
660 const double real_accumulated_time = timer_manager->real_time_used();
661 timer_manager.reset();
662
663 VLOG(2) << "Ran in " << cpu_accumulated_time << "/"
664 << real_accumulated_time << "\n";
665
666 // Base decisions off of real time if requested by this benchmark.
667 double seconds = cpu_accumulated_time;
668 if (b.use_real_time) {
669 seconds = real_accumulated_time;
670 }
671
672 std::string label;
673 {
674 MutexLock l(GetBenchmarkLock());
675 label = *GetReportLabel();
676 }
677
678 const double min_time = !IsZero(b.min_time) ? b.min_time
679 : FLAGS_benchmark_min_time;
680
681 // If this was the first run, was elapsed time or cpu time large enough?
682 // If this is not the first run, go with the current value of iter.
683 if ((i > 0) ||
684 (iters >= kMaxIterations) ||
685 (seconds >= min_time) ||
686 (real_accumulated_time >= 5*min_time)) {
687 double bytes_per_second = 0;
688 if (total.bytes_processed > 0 && seconds > 0.0) {
689 bytes_per_second = (total.bytes_processed / seconds);
690 }
691 double items_per_second = 0;
692 if (total.items_processed > 0 && seconds > 0.0) {
693 items_per_second = (total.items_processed / seconds);
694 }
695
696 // Create report about this benchmark run.
697 BenchmarkReporter::Run report;
698 report.benchmark_name = b.name;
699 report.report_label = label;
700 // Report the total iterations across all threads.
701 report.iterations = static_cast<int64_t>(iters) * b.threads;
702 report.real_accumulated_time = real_accumulated_time;
703 report.cpu_accumulated_time = cpu_accumulated_time;
704 report.bytes_per_second = bytes_per_second;
705 report.items_per_second = items_per_second;
706 reports.push_back(report);
707 break;
708 }
709
710 // See how much iterations should be increased by
711 // Note: Avoid division by zero with max(seconds, 1ns).
712 double multiplier = min_time * 1.4 / std::max(seconds, 1e-9);
713 // If our last run was at least 10% of FLAGS_benchmark_min_time then we
714 // use the multiplier directly. Otherwise we use at most 10 times
715 // expansion.
716 // NOTE: When the last run was at least 10% of the min time the max
717 // expansion should be 14x.
718 bool is_significant = (seconds / min_time) > 0.1;
719 multiplier = is_significant ? multiplier : std::min(10.0, multiplier);
720 if (multiplier <= 1.0) multiplier = 2.0;
721 double next_iters = std::max(multiplier * iters, iters + 1.0);
722 if (next_iters > kMaxIterations) {
723 next_iters = kMaxIterations;
724 }
725 VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
726 iters = static_cast<int>(next_iters + 0.5);
727 }
728 }
729 br->ReportRuns(reports);
730 if (b.multithreaded) {
731 for (std::thread& thread : pool)
732 thread.join();
733 }
734 }
735
736 } // namespace
737
State(size_t max_iters,bool has_x,int x,bool has_y,int y,int thread_i,int n_threads)738 State::State(size_t max_iters, bool has_x, int x, bool has_y, int y,
739 int thread_i, int n_threads)
740 : started_(false), total_iterations_(0),
741 has_range_x_(has_x), range_x_(x),
742 has_range_y_(has_y), range_y_(y),
743 bytes_processed_(0), items_processed_(0),
744 thread_index(thread_i),
745 threads(n_threads),
746 max_iterations(max_iters)
747 {
748 CHECK(max_iterations != 0) << "At least one iteration must be run";
749 CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
750 }
751
PauseTiming()752 void State::PauseTiming() {
753 // Add in time accumulated so far
754 CHECK(running_benchmark);
755 timer_manager->StopTimer();
756 }
757
ResumeTiming()758 void State::ResumeTiming() {
759 CHECK(running_benchmark);
760 timer_manager->StartTimer();
761 }
762
SetLabel(const char * label)763 void State::SetLabel(const char* label) {
764 CHECK(running_benchmark);
765 MutexLock l(GetBenchmarkLock());
766 *GetReportLabel() = label;
767 }
768
769 namespace internal {
770 namespace {
771
PrintBenchmarkList()772 void PrintBenchmarkList() {
773 std::vector<Benchmark::Instance> benchmarks;
774 auto families = BenchmarkFamilies::GetInstance();
775 if (!families->FindBenchmarks(".", &benchmarks)) return;
776
777 for (const internal::Benchmark::Instance& benchmark : benchmarks) {
778 std::cout << benchmark.name << "\n";
779 }
780 }
781
RunMatchingBenchmarks(const std::string & spec,BenchmarkReporter * reporter)782 void RunMatchingBenchmarks(const std::string& spec,
783 BenchmarkReporter* reporter) {
784 CHECK(reporter != nullptr);
785 if (spec.empty()) return;
786
787 std::vector<Benchmark::Instance> benchmarks;
788 auto families = BenchmarkFamilies::GetInstance();
789 if (!families->FindBenchmarks(spec, &benchmarks)) return;
790
791 // Determine the width of the name field using a minimum width of 10.
792 size_t name_field_width = 10;
793 for (const Benchmark::Instance& benchmark : benchmarks) {
794 name_field_width =
795 std::max<size_t>(name_field_width, benchmark.name.size());
796 }
797 if (FLAGS_benchmark_repetitions > 1)
798 name_field_width += std::strlen("_stddev");
799
800 // Print header here
801 BenchmarkReporter::Context context;
802 context.num_cpus = NumCPUs();
803 context.mhz_per_cpu = CyclesPerSecond() / 1000000.0f;
804
805 context.cpu_scaling_enabled = CpuScalingEnabled();
806 context.name_field_width = name_field_width;
807
808 if (reporter->ReportContext(context)) {
809 for (const auto& benchmark : benchmarks) {
810 RunBenchmark(benchmark, reporter);
811 }
812 }
813 }
814
GetDefaultReporter()815 std::unique_ptr<BenchmarkReporter> GetDefaultReporter() {
816 typedef std::unique_ptr<BenchmarkReporter> PtrType;
817 if (FLAGS_benchmark_format == "tabular") {
818 return PtrType(new ConsoleReporter);
819 } else if (FLAGS_benchmark_format == "json") {
820 return PtrType(new JSONReporter);
821 } else if (FLAGS_benchmark_format == "csv") {
822 return PtrType(new CSVReporter);
823 } else {
824 std::cerr << "Unexpected format: '" << FLAGS_benchmark_format << "'\n";
825 std::exit(1);
826 }
827 }
828
829 } // end namespace
830 } // end namespace internal
831
RunSpecifiedBenchmarks()832 void RunSpecifiedBenchmarks() {
833 RunSpecifiedBenchmarks(nullptr);
834 }
835
RunSpecifiedBenchmarks(BenchmarkReporter * reporter)836 void RunSpecifiedBenchmarks(BenchmarkReporter* reporter) {
837 if (FLAGS_benchmark_list_tests) {
838 internal::PrintBenchmarkList();
839 return;
840 }
841 std::string spec = FLAGS_benchmark_filter;
842 if (spec.empty() || spec == "all")
843 spec = "."; // Regexp that matches all benchmarks
844
845 std::unique_ptr<BenchmarkReporter> default_reporter;
846 if (!reporter) {
847 default_reporter = internal::GetDefaultReporter();
848 reporter = default_reporter.get();
849 }
850 internal::RunMatchingBenchmarks(spec, reporter);
851 reporter->Finalize();
852 }
853
854 namespace internal {
855
PrintUsageAndExit()856 void PrintUsageAndExit() {
857 fprintf(stdout,
858 "benchmark"
859 " [--benchmark_list_tests={true|false}]\n"
860 " [--benchmark_filter=<regex>]\n"
861 " [--benchmark_min_time=<min_time>]\n"
862 " [--benchmark_repetitions=<num_repetitions>]\n"
863 " [--benchmark_format=<tabular|json|csv>]\n"
864 " [--color_print={true|false}]\n"
865 " [--v=<verbosity>]\n");
866 exit(0);
867 }
868
ParseCommandLineFlags(int * argc,char ** argv)869 void ParseCommandLineFlags(int* argc, char** argv) {
870 using namespace benchmark;
871 for (int i = 1; i < *argc; ++i) {
872 if (
873 ParseBoolFlag(argv[i], "benchmark_list_tests",
874 &FLAGS_benchmark_list_tests) ||
875 ParseStringFlag(argv[i], "benchmark_filter",
876 &FLAGS_benchmark_filter) ||
877 ParseDoubleFlag(argv[i], "benchmark_min_time",
878 &FLAGS_benchmark_min_time) ||
879 ParseInt32Flag(argv[i], "benchmark_repetitions",
880 &FLAGS_benchmark_repetitions) ||
881 ParseStringFlag(argv[i], "benchmark_format",
882 &FLAGS_benchmark_format) ||
883 ParseBoolFlag(argv[i], "color_print",
884 &FLAGS_color_print) ||
885 ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
886 for (int j = i; j != *argc; ++j) argv[j] = argv[j + 1];
887
888 --(*argc);
889 --i;
890 } else if (IsFlag(argv[i], "help")) {
891 PrintUsageAndExit();
892 }
893 }
894 if (FLAGS_benchmark_format != "tabular" &&
895 FLAGS_benchmark_format != "json" &&
896 FLAGS_benchmark_format != "csv") {
897 PrintUsageAndExit();
898 }
899 }
900
RegisterBenchmarkInternal(Benchmark * bench)901 Benchmark* RegisterBenchmarkInternal(Benchmark* bench) {
902 std::unique_ptr<Benchmark> bench_ptr(bench);
903 BenchmarkFamilies* families = BenchmarkFamilies::GetInstance();
904 families->AddBenchmark(std::move(bench_ptr));
905 return bench;
906 }
907
908 } // end namespace internal
909
Initialize(int * argc,char ** argv)910 void Initialize(int* argc, char** argv) {
911 internal::ParseCommandLineFlags(argc, argv);
912 internal::SetLogLevel(FLAGS_v);
913 // TODO remove this. It prints some output the first time it is called.
914 // We don't want to have this ouput printed during benchmarking.
915 MyCPUUsage();
916 // The first call to walltime::Now initialized it. Call it once to
917 // prevent the initialization from happening in a benchmark.
918 walltime::Now();
919 }
920
921 } // end namespace benchmark
922