1 /*-------------------------------------------------------------------------
2 * drawElements C++ Base Library
3 * -----------------------------
4 *
5 * Copyright 2015 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Cross-thread barrier.
22 *//*--------------------------------------------------------------------*/
23
24 #include "deSpinBarrier.hpp"
25 #include "deThread.hpp"
26 #include "deRandom.hpp"
27 #include "deInt32.h"
28
29 #include <vector>
30
31 namespace de
32 {
33
SpinBarrier(deInt32 numThreads)34 SpinBarrier::SpinBarrier (deInt32 numThreads)
35 : m_numCores (deGetNumAvailableLogicalCores())
36 , m_numThreads (numThreads)
37 , m_numEntered (0)
38 , m_numLeaving (0)
39 , m_numRemoved (0)
40 {
41 DE_ASSERT(numThreads > 0);
42 }
43
~SpinBarrier(void)44 SpinBarrier::~SpinBarrier (void)
45 {
46 DE_ASSERT(m_numEntered == 0 && m_numLeaving == 0);
47 }
48
reset(deUint32 numThreads)49 void SpinBarrier::reset (deUint32 numThreads)
50 {
51 // If last threads were removed, m_numEntered > 0 && m_numRemoved > 0
52 DE_ASSERT(m_numLeaving == 0);
53 DE_ASSERT(numThreads > 0);
54 m_numThreads = numThreads;
55 m_numEntered = 0;
56 m_numLeaving = 0;
57 m_numRemoved = 0;
58 }
59
getWaitMode(SpinBarrier::WaitMode requested,deUint32 numCores,deInt32 numThreads)60 inline SpinBarrier::WaitMode getWaitMode (SpinBarrier::WaitMode requested, deUint32 numCores, deInt32 numThreads)
61 {
62 if (requested == SpinBarrier::WAIT_MODE_AUTO)
63 return ((deUint32)numThreads <= numCores) ? SpinBarrier::WAIT_MODE_BUSY : SpinBarrier::WAIT_MODE_YIELD;
64 else
65 return requested;
66 }
67
wait(SpinBarrier::WaitMode mode)68 inline void wait (SpinBarrier::WaitMode mode)
69 {
70 DE_ASSERT(mode == SpinBarrier::WAIT_MODE_YIELD || mode == SpinBarrier::WAIT_MODE_BUSY);
71
72 if (mode == SpinBarrier::WAIT_MODE_YIELD)
73 deYield();
74 }
75
sync(WaitMode requestedMode)76 void SpinBarrier::sync (WaitMode requestedMode)
77 {
78 const deInt32 cachedNumThreads = m_numThreads;
79 const WaitMode waitMode = getWaitMode(requestedMode, m_numCores, cachedNumThreads);
80
81 deMemoryReadWriteFence();
82
83 // m_numEntered must not be touched until all threads have had
84 // a chance to observe it being 0.
85 if (m_numLeaving > 0)
86 {
87 for (;;)
88 {
89 if (m_numLeaving == 0)
90 break;
91
92 wait(waitMode);
93 }
94 }
95
96 // If m_numRemoved > 0, m_numThreads will decrease. If m_numThreads is decreased
97 // just after atomicOp and before comparison, the branch could be taken by multiple
98 // threads. Since m_numThreads only changes if all threads are inside the spinbarrier,
99 // cached value at snapshotted at the beginning of the function will be equal for
100 // all threads.
101 if (deAtomicIncrement32(&m_numEntered) == cachedNumThreads)
102 {
103 // Release all waiting threads. Since this thread has not been removed, m_numLeaving will
104 // be >= 1 until m_numLeaving is decremented at the end of this function.
105 m_numThreads -= m_numRemoved;
106 m_numLeaving = m_numThreads;
107 m_numRemoved = 0;
108
109 deMemoryReadWriteFence();
110 m_numEntered = 0;
111 }
112 else
113 {
114 for (;;)
115 {
116 if (m_numEntered == 0)
117 break;
118
119 wait(waitMode);
120 }
121 }
122
123 deAtomicDecrement32(&m_numLeaving);
124 deMemoryReadWriteFence();
125 }
126
removeThread(WaitMode requestedMode)127 void SpinBarrier::removeThread (WaitMode requestedMode)
128 {
129 const deInt32 cachedNumThreads = m_numThreads;
130 const WaitMode waitMode = getWaitMode(requestedMode, m_numCores, cachedNumThreads);
131
132 // Wait for other threads exiting previous barrier
133 if (m_numLeaving > 0)
134 {
135 for (;;)
136 {
137 if (m_numLeaving == 0)
138 break;
139
140 wait(waitMode);
141 }
142 }
143
144 // Ask for last thread entering barrier to adjust thread count
145 deAtomicIncrement32(&m_numRemoved);
146
147 // See sync() - use cached value
148 if (deAtomicIncrement32(&m_numEntered) == cachedNumThreads)
149 {
150 // Release all waiting threads.
151 m_numThreads -= m_numRemoved;
152 m_numLeaving = m_numThreads;
153 m_numRemoved = 0;
154
155 deMemoryReadWriteFence();
156 m_numEntered = 0;
157 }
158 }
159
160 namespace
161 {
162
singleThreadTest(SpinBarrier::WaitMode mode)163 void singleThreadTest (SpinBarrier::WaitMode mode)
164 {
165 SpinBarrier barrier(1);
166
167 barrier.sync(mode);
168 barrier.sync(mode);
169 barrier.sync(mode);
170 }
171
172 class TestThread : public de::Thread
173 {
174 public:
TestThread(SpinBarrier & barrier,volatile deInt32 * sharedVar,int numThreads,int threadNdx)175 TestThread (SpinBarrier& barrier, volatile deInt32* sharedVar, int numThreads, int threadNdx)
176 : m_barrier (barrier)
177 , m_sharedVar (sharedVar)
178 , m_numThreads (numThreads)
179 , m_threadNdx (threadNdx)
180 , m_busyOk ((deUint32)m_numThreads <= deGetNumAvailableLogicalCores())
181 {
182 }
183
run(void)184 void run (void)
185 {
186 const int numIters = 10000;
187 de::Random rnd (deInt32Hash(m_numThreads) ^ deInt32Hash(m_threadNdx));
188
189 for (int iterNdx = 0; iterNdx < numIters; iterNdx++)
190 {
191 // Phase 1: count up
192 deAtomicIncrement32(m_sharedVar);
193
194 // Verify
195 m_barrier.sync(getWaitMode(rnd));
196
197 DE_TEST_ASSERT(*m_sharedVar == m_numThreads);
198
199 m_barrier.sync(getWaitMode(rnd));
200
201 // Phase 2: count down
202 deAtomicDecrement32(m_sharedVar);
203
204 // Verify
205 m_barrier.sync(getWaitMode(rnd));
206
207 DE_TEST_ASSERT(*m_sharedVar == 0);
208
209 m_barrier.sync(getWaitMode(rnd));
210 }
211 }
212
213 private:
214 SpinBarrier& m_barrier;
215 volatile deInt32* const m_sharedVar;
216 const int m_numThreads;
217 const int m_threadNdx;
218 const bool m_busyOk;
219
getWaitMode(de::Random & rnd)220 SpinBarrier::WaitMode getWaitMode (de::Random& rnd)
221 {
222 static const SpinBarrier::WaitMode s_allModes[] =
223 {
224 SpinBarrier::WAIT_MODE_YIELD,
225 SpinBarrier::WAIT_MODE_AUTO,
226 SpinBarrier::WAIT_MODE_BUSY,
227 };
228 const int numModes = DE_LENGTH_OF_ARRAY(s_allModes) - (m_busyOk ? 0 : 1);
229
230 return rnd.choose<SpinBarrier::WaitMode>(DE_ARRAY_BEGIN(s_allModes), DE_ARRAY_BEGIN(s_allModes) + numModes);
231 }
232 };
233
multiThreadTest(int numThreads)234 void multiThreadTest (int numThreads)
235 {
236 SpinBarrier barrier (numThreads);
237 volatile deInt32 sharedVar = 0;
238 std::vector<TestThread*> threads (numThreads, static_cast<TestThread*>(DE_NULL));
239
240 for (int ndx = 0; ndx < numThreads; ndx++)
241 {
242 threads[ndx] = new TestThread(barrier, &sharedVar, numThreads, ndx);
243 DE_TEST_ASSERT(threads[ndx]);
244 threads[ndx]->start();
245 }
246
247 for (int ndx = 0; ndx < numThreads; ndx++)
248 {
249 threads[ndx]->join();
250 delete threads[ndx];
251 }
252
253 DE_TEST_ASSERT(sharedVar == 0);
254 }
255
singleThreadRemoveTest(SpinBarrier::WaitMode mode)256 void singleThreadRemoveTest (SpinBarrier::WaitMode mode)
257 {
258 SpinBarrier barrier(3);
259
260 barrier.removeThread(mode);
261 barrier.removeThread(mode);
262 barrier.sync(mode);
263 barrier.removeThread(mode);
264
265 barrier.reset(1);
266 barrier.sync(mode);
267
268 barrier.reset(2);
269 barrier.removeThread(mode);
270 barrier.sync(mode);
271 }
272
273 class TestExitThread : public de::Thread
274 {
275 public:
TestExitThread(SpinBarrier & barrier,int numThreads,int threadNdx,SpinBarrier::WaitMode waitMode)276 TestExitThread (SpinBarrier& barrier, int numThreads, int threadNdx, SpinBarrier::WaitMode waitMode)
277 : m_barrier (barrier)
278 , m_numThreads (numThreads)
279 , m_threadNdx (threadNdx)
280 , m_waitMode (waitMode)
281 {
282 }
283
run(void)284 void run (void)
285 {
286 const int numIters = 10000;
287 de::Random rnd (deInt32Hash(m_numThreads) ^ deInt32Hash(m_threadNdx) ^ deInt32Hash((deInt32)m_waitMode));
288 const int invExitProb = 1000;
289
290 for (int iterNdx = 0; iterNdx < numIters; iterNdx++)
291 {
292 if (rnd.getInt(0, invExitProb) == 0)
293 {
294 m_barrier.removeThread(m_waitMode);
295 break;
296 }
297 else
298 m_barrier.sync(m_waitMode);
299 }
300 }
301
302 private:
303 SpinBarrier& m_barrier;
304 const int m_numThreads;
305 const int m_threadNdx;
306 const SpinBarrier::WaitMode m_waitMode;
307 };
308
multiThreadRemoveTest(int numThreads,SpinBarrier::WaitMode waitMode)309 void multiThreadRemoveTest (int numThreads, SpinBarrier::WaitMode waitMode)
310 {
311 SpinBarrier barrier (numThreads);
312 std::vector<TestExitThread*> threads (numThreads, static_cast<TestExitThread*>(DE_NULL));
313
314 for (int ndx = 0; ndx < numThreads; ndx++)
315 {
316 threads[ndx] = new TestExitThread(barrier, numThreads, ndx, waitMode);
317 DE_TEST_ASSERT(threads[ndx]);
318 threads[ndx]->start();
319 }
320
321 for (int ndx = 0; ndx < numThreads; ndx++)
322 {
323 threads[ndx]->join();
324 delete threads[ndx];
325 }
326 }
327
328 } // anonymous
329
SpinBarrier_selfTest(void)330 void SpinBarrier_selfTest (void)
331 {
332 singleThreadTest(SpinBarrier::WAIT_MODE_YIELD);
333 singleThreadTest(SpinBarrier::WAIT_MODE_BUSY);
334 singleThreadTest(SpinBarrier::WAIT_MODE_AUTO);
335 multiThreadTest(1);
336 multiThreadTest(2);
337 multiThreadTest(4);
338 multiThreadTest(8);
339 multiThreadTest(16);
340
341 singleThreadRemoveTest(SpinBarrier::WAIT_MODE_YIELD);
342 singleThreadRemoveTest(SpinBarrier::WAIT_MODE_BUSY);
343 singleThreadRemoveTest(SpinBarrier::WAIT_MODE_AUTO);
344 multiThreadRemoveTest(1, SpinBarrier::WAIT_MODE_BUSY);
345 multiThreadRemoveTest(2, SpinBarrier::WAIT_MODE_AUTO);
346 multiThreadRemoveTest(4, SpinBarrier::WAIT_MODE_AUTO);
347 multiThreadRemoveTest(8, SpinBarrier::WAIT_MODE_AUTO);
348 multiThreadRemoveTest(16, SpinBarrier::WAIT_MODE_AUTO);
349 multiThreadRemoveTest(1, SpinBarrier::WAIT_MODE_YIELD);
350 multiThreadRemoveTest(2, SpinBarrier::WAIT_MODE_YIELD);
351 multiThreadRemoveTest(4, SpinBarrier::WAIT_MODE_YIELD);
352 multiThreadRemoveTest(8, SpinBarrier::WAIT_MODE_YIELD);
353 multiThreadRemoveTest(16, SpinBarrier::WAIT_MODE_YIELD);
354 }
355
356 } // de
357