1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <stddef.h>
6
7 #include <memory>
8 #include <vector>
9
10 #include "base/base_switches.h"
11 #include "base/bind.h"
12 #include "base/command_line.h"
13 #include "base/location.h"
14 #include "base/memory/ptr_util.h"
15 #include "base/message_loop/message_loop.h"
16 #include "base/single_thread_task_runner.h"
17 #include "base/strings/stringprintf.h"
18 #include "base/synchronization/condition_variable.h"
19 #include "base/synchronization/lock.h"
20 #include "base/synchronization/waitable_event.h"
21 #include "base/threading/thread.h"
22 #include "base/time/time.h"
23 #include "build/build_config.h"
24 #include "testing/gtest/include/gtest/gtest.h"
25 #include "testing/perf/perf_test.h"
26
27 #if defined(OS_POSIX)
28 #include <pthread.h>
29 #endif
30
31 namespace base {
32
33 namespace {
34
35 const int kNumRuns = 100000;
36
37 // Base class for a threading perf-test. This sets up some threads for the
38 // test and measures the clock-time in addition to time spent on each thread.
39 class ThreadPerfTest : public testing::Test {
40 public:
ThreadPerfTest()41 ThreadPerfTest()
42 : done_(WaitableEvent::ResetPolicy::AUTOMATIC,
43 WaitableEvent::InitialState::NOT_SIGNALED) {}
44
45 // To be implemented by each test. Subclass must uses threads_ such that
46 // their cpu-time can be measured. Test must return from PingPong() _and_
47 // call FinishMeasurement from any thread to complete the test.
Init()48 virtual void Init() {
49 if (ThreadTicks::IsSupported())
50 ThreadTicks::WaitUntilInitialized();
51 }
52 virtual void PingPong(int hops) = 0;
Reset()53 virtual void Reset() {}
54
TimeOnThread(base::ThreadTicks * ticks,base::WaitableEvent * done)55 void TimeOnThread(base::ThreadTicks* ticks, base::WaitableEvent* done) {
56 *ticks = base::ThreadTicks::Now();
57 done->Signal();
58 }
59
ThreadNow(const base::Thread & thread)60 base::ThreadTicks ThreadNow(const base::Thread& thread) {
61 base::WaitableEvent done(WaitableEvent::ResetPolicy::AUTOMATIC,
62 WaitableEvent::InitialState::NOT_SIGNALED);
63 base::ThreadTicks ticks;
64 thread.task_runner()->PostTask(
65 FROM_HERE, base::BindOnce(&ThreadPerfTest::TimeOnThread,
66 base::Unretained(this), &ticks, &done));
67 done.Wait();
68 return ticks;
69 }
70
RunPingPongTest(const std::string & name,unsigned num_threads)71 void RunPingPongTest(const std::string& name, unsigned num_threads) {
72 // Create threads and collect starting cpu-time for each thread.
73 std::vector<base::ThreadTicks> thread_starts;
74 while (threads_.size() < num_threads) {
75 threads_.push_back(std::make_unique<base::Thread>("PingPonger"));
76 threads_.back()->Start();
77 if (base::ThreadTicks::IsSupported())
78 thread_starts.push_back(ThreadNow(*threads_.back()));
79 }
80
81 Init();
82
83 base::TimeTicks start = base::TimeTicks::Now();
84 PingPong(kNumRuns);
85 done_.Wait();
86 base::TimeTicks end = base::TimeTicks::Now();
87
88 // Gather the cpu-time spent on each thread. This does one extra tasks,
89 // but that should be in the noise given enough runs.
90 base::TimeDelta thread_time;
91 while (threads_.size()) {
92 if (base::ThreadTicks::IsSupported()) {
93 thread_time += ThreadNow(*threads_.back()) - thread_starts.back();
94 thread_starts.pop_back();
95 }
96 threads_.pop_back();
97 }
98
99 Reset();
100
101 double num_runs = static_cast<double>(kNumRuns);
102 double us_per_task_clock = (end - start).InMicroseconds() / num_runs;
103 double us_per_task_cpu = thread_time.InMicroseconds() / num_runs;
104
105 // Clock time per task.
106 perf_test::PrintResult(
107 "task", "", name + "_time ", us_per_task_clock, "us/hop", true);
108
109 // Total utilization across threads if available (likely higher).
110 if (base::ThreadTicks::IsSupported()) {
111 perf_test::PrintResult(
112 "task", "", name + "_cpu ", us_per_task_cpu, "us/hop", true);
113 }
114 }
115
116 protected:
FinishMeasurement()117 void FinishMeasurement() { done_.Signal(); }
118 std::vector<std::unique_ptr<base::Thread>> threads_;
119
120 private:
121 base::WaitableEvent done_;
122 };
123
124 // Class to test task performance by posting empty tasks back and forth.
125 class TaskPerfTest : public ThreadPerfTest {
NextThread(int count)126 base::Thread* NextThread(int count) {
127 return threads_[count % threads_.size()].get();
128 }
129
PingPong(int hops)130 void PingPong(int hops) override {
131 if (!hops) {
132 FinishMeasurement();
133 return;
134 }
135 NextThread(hops)->task_runner()->PostTask(
136 FROM_HERE, base::BindOnce(&ThreadPerfTest::PingPong,
137 base::Unretained(this), hops - 1));
138 }
139 };
140
141 // This tries to test the 'best-case' as well as the 'worst-case' task posting
142 // performance. The best-case keeps one thread alive such that it never yeilds,
143 // while the worse-case forces a context switch for every task. Four threads are
144 // used to ensure the threads do yeild (with just two it might be possible for
145 // both threads to stay awake if they can signal each other fast enough).
TEST_F(TaskPerfTest,TaskPingPong)146 TEST_F(TaskPerfTest, TaskPingPong) {
147 RunPingPongTest("1_Task_Threads", 1);
148 RunPingPongTest("4_Task_Threads", 4);
149 }
150
151
152 // Same as above, but add observers to test their perf impact.
153 class MessageLoopObserver : public base::MessageLoop::TaskObserver {
154 public:
WillProcessTask(const base::PendingTask & pending_task)155 void WillProcessTask(const base::PendingTask& pending_task) override {}
DidProcessTask(const base::PendingTask & pending_task)156 void DidProcessTask(const base::PendingTask& pending_task) override {}
157 };
158 MessageLoopObserver message_loop_observer;
159
160 class TaskObserverPerfTest : public TaskPerfTest {
161 public:
Init()162 void Init() override {
163 TaskPerfTest::Init();
164 for (size_t i = 0; i < threads_.size(); i++) {
165 threads_[i]->message_loop()->task_runner()->PostTask(
166 FROM_HERE, BindOnce(&MessageLoop::AddTaskObserver,
167 Unretained(threads_[i]->message_loop()),
168 Unretained(&message_loop_observer)));
169 }
170 }
171 };
172
TEST_F(TaskObserverPerfTest,TaskPingPong)173 TEST_F(TaskObserverPerfTest, TaskPingPong) {
174 RunPingPongTest("1_Task_Threads_With_Observer", 1);
175 RunPingPongTest("4_Task_Threads_With_Observer", 4);
176 }
177
178 // Class to test our WaitableEvent performance by signaling back and fort.
179 // WaitableEvent is templated so we can also compare with other versions.
180 template <typename WaitableEventType>
181 class EventPerfTest : public ThreadPerfTest {
182 public:
Init()183 void Init() override {
184 for (size_t i = 0; i < threads_.size(); i++) {
185 events_.push_back(std::make_unique<WaitableEventType>(
186 WaitableEvent::ResetPolicy::AUTOMATIC,
187 WaitableEvent::InitialState::NOT_SIGNALED));
188 }
189 }
190
Reset()191 void Reset() override { events_.clear(); }
192
WaitAndSignalOnThread(size_t event)193 void WaitAndSignalOnThread(size_t event) {
194 size_t next_event = (event + 1) % events_.size();
195 int my_hops = 0;
196 do {
197 events_[event]->Wait();
198 my_hops = --remaining_hops_; // We own 'hops' between Wait and Signal.
199 events_[next_event]->Signal();
200 } while (my_hops > 0);
201 // Once we are done, all threads will signal as hops passes zero.
202 // We only signal completion once, on the thread that reaches zero.
203 if (!my_hops)
204 FinishMeasurement();
205 }
206
PingPong(int hops)207 void PingPong(int hops) override {
208 remaining_hops_ = hops;
209 for (size_t i = 0; i < threads_.size(); i++) {
210 threads_[i]->task_runner()->PostTask(
211 FROM_HERE, base::BindOnce(&EventPerfTest::WaitAndSignalOnThread,
212 base::Unretained(this), i));
213 }
214
215 // Kick off the Signal ping-ponging.
216 events_.front()->Signal();
217 }
218
219 int remaining_hops_;
220 std::vector<std::unique_ptr<WaitableEventType>> events_;
221 };
222
223 // Similar to the task posting test, this just tests similar functionality
224 // using WaitableEvents. We only test four threads (worst-case), but we
225 // might want to craft a way to test the best-case (where the thread doesn't
226 // end up blocking because the event is already signalled).
227 typedef EventPerfTest<base::WaitableEvent> WaitableEventThreadPerfTest;
TEST_F(WaitableEventThreadPerfTest,EventPingPong)228 TEST_F(WaitableEventThreadPerfTest, EventPingPong) {
229 RunPingPongTest("4_WaitableEvent_Threads", 4);
230 }
231
232 // Build a minimal event using ConditionVariable.
233 class ConditionVariableEvent {
234 public:
ConditionVariableEvent(WaitableEvent::ResetPolicy reset_policy,WaitableEvent::InitialState initial_state)235 ConditionVariableEvent(WaitableEvent::ResetPolicy reset_policy,
236 WaitableEvent::InitialState initial_state)
237 : cond_(&lock_), signaled_(false) {
238 DCHECK_EQ(WaitableEvent::ResetPolicy::AUTOMATIC, reset_policy);
239 DCHECK_EQ(WaitableEvent::InitialState::NOT_SIGNALED, initial_state);
240 }
241
Signal()242 void Signal() {
243 {
244 base::AutoLock scoped_lock(lock_);
245 signaled_ = true;
246 }
247 cond_.Signal();
248 }
249
Wait()250 void Wait() {
251 base::AutoLock scoped_lock(lock_);
252 while (!signaled_)
253 cond_.Wait();
254 signaled_ = false;
255 }
256
257 private:
258 base::Lock lock_;
259 base::ConditionVariable cond_;
260 bool signaled_;
261 };
262
263 // This is meant to test the absolute minimal context switching time
264 // using our own base synchronization code.
265 typedef EventPerfTest<ConditionVariableEvent> ConditionVariablePerfTest;
TEST_F(ConditionVariablePerfTest,EventPingPong)266 TEST_F(ConditionVariablePerfTest, EventPingPong) {
267 RunPingPongTest("4_ConditionVariable_Threads", 4);
268 }
269 #if defined(OS_POSIX)
270
271 // Absolutely 100% minimal posix waitable event. If there is a better/faster
272 // way to force a context switch, we should use that instead.
273 class PthreadEvent {
274 public:
PthreadEvent(WaitableEvent::ResetPolicy reset_policy,WaitableEvent::InitialState initial_state)275 PthreadEvent(WaitableEvent::ResetPolicy reset_policy,
276 WaitableEvent::InitialState initial_state) {
277 DCHECK_EQ(WaitableEvent::ResetPolicy::AUTOMATIC, reset_policy);
278 DCHECK_EQ(WaitableEvent::InitialState::NOT_SIGNALED, initial_state);
279 pthread_mutex_init(&mutex_, nullptr);
280 pthread_cond_init(&cond_, nullptr);
281 signaled_ = false;
282 }
283
~PthreadEvent()284 ~PthreadEvent() {
285 pthread_cond_destroy(&cond_);
286 pthread_mutex_destroy(&mutex_);
287 }
288
Signal()289 void Signal() {
290 pthread_mutex_lock(&mutex_);
291 signaled_ = true;
292 pthread_mutex_unlock(&mutex_);
293 pthread_cond_signal(&cond_);
294 }
295
Wait()296 void Wait() {
297 pthread_mutex_lock(&mutex_);
298 while (!signaled_)
299 pthread_cond_wait(&cond_, &mutex_);
300 signaled_ = false;
301 pthread_mutex_unlock(&mutex_);
302 }
303
304 private:
305 bool signaled_;
306 pthread_mutex_t mutex_;
307 pthread_cond_t cond_;
308 };
309
310 // This is meant to test the absolute minimal context switching time.
311 // If there is any faster way to do this we should substitute it in.
312 typedef EventPerfTest<PthreadEvent> PthreadEventPerfTest;
TEST_F(PthreadEventPerfTest,EventPingPong)313 TEST_F(PthreadEventPerfTest, EventPingPong) {
314 RunPingPongTest("4_PthreadCondVar_Threads", 4);
315 }
316
317 #endif
318
319 } // namespace
320
321 } // namespace base
322