1 #include <gtest/gtest.h>
2 #include <sync/sync.h>
3 #include <sw_sync.h>
4 #include <fcntl.h>
5 #include <vector>
6 #include <string>
7 #include <cassert>
8 #include <iostream>
9 #include <unistd.h>
10 #include <thread>
11 #include <poll.h>
12 #include <mutex>
13 #include <algorithm>
14 #include <tuple>
15 #include <random>
16 #include <unordered_map>
17
18 // TODO: better stress tests?
19 // Handle more than 64 fd's simultaneously, i.e. fix sync_fence_info's 4k limit.
20 // Handle wraparound in timelines like nvidia.
21
22 using namespace std;
23
24 namespace {
25
26 // C++ wrapper class for sync timeline.
27 class SyncTimeline {
28 int m_fd = -1;
29 bool m_fdInitialized = false;
30 public:
31 SyncTimeline(const SyncTimeline &) = delete;
32 SyncTimeline& operator=(SyncTimeline&) = delete;
SyncTimeline()33 SyncTimeline() noexcept {
34 int fd = sw_sync_timeline_create();
35 if (fd == -1)
36 return;
37 m_fdInitialized = true;
38 m_fd = fd;
39 }
destroy()40 void destroy() {
41 if (m_fdInitialized) {
42 close(m_fd);
43 m_fd = -1;
44 m_fdInitialized = false;
45 }
46 }
~SyncTimeline()47 ~SyncTimeline() {
48 destroy();
49 }
isValid() const50 bool isValid() const {
51 if (m_fdInitialized) {
52 int status = fcntl(m_fd, F_GETFD, 0);
53 if (status >= 0)
54 return true;
55 else
56 return false;
57 }
58 else {
59 return false;
60 }
61 }
getFd() const62 int getFd() const {
63 return m_fd;
64 }
inc(int val=1)65 int inc(int val = 1) {
66 return sw_sync_timeline_inc(m_fd, val);
67 }
68 };
69
70 struct SyncPointInfo {
71 std::string driverName;
72 std::string objectName;
73 uint64_t timeStampNs;
74 int status; // 1 sig, 0 active, neg is err
75 };
76
77 // Wrapper class for sync fence.
78 class SyncFence {
79 int m_fd = -1;
80 bool m_fdInitialized = false;
81 static int s_fenceCount;
82
setFd(int fd)83 void setFd(int fd) {
84 m_fd = fd;
85 m_fdInitialized = true;
86 }
clearFd()87 void clearFd() {
88 m_fd = -1;
89 m_fdInitialized = false;
90 }
91 public:
isValid() const92 bool isValid() const {
93 if (m_fdInitialized) {
94 int status = fcntl(m_fd, F_GETFD, 0);
95 if (status >= 0)
96 return true;
97 else
98 return false;
99 }
100 else {
101 return false;
102 }
103 }
operator =(SyncFence && rhs)104 SyncFence& operator=(SyncFence &&rhs) noexcept {
105 destroy();
106 if (rhs.isValid()) {
107 setFd(rhs.getFd());
108 rhs.clearFd();
109 }
110 return *this;
111 }
SyncFence(SyncFence && fence)112 SyncFence(SyncFence &&fence) noexcept {
113 if (fence.isValid()) {
114 setFd(fence.getFd());
115 fence.clearFd();
116 }
117 }
SyncFence(const SyncFence & fence)118 SyncFence(const SyncFence &fence) noexcept {
119 // This is ok, as sync fences are immutable after construction, so a dup
120 // is basically the same thing as a copy.
121 if (fence.isValid()) {
122 int fd = dup(fence.getFd());
123 if (fd == -1)
124 return;
125 setFd(fd);
126 }
127 }
SyncFence(const SyncTimeline & timeline,int value,const char * name=nullptr)128 SyncFence(const SyncTimeline &timeline,
129 int value,
130 const char *name = nullptr) noexcept {
131 std::string autoName = "allocFence";
132 autoName += s_fenceCount;
133 s_fenceCount++;
134 int fd = sw_sync_fence_create(timeline.getFd(), name ? name : autoName.c_str(), value);
135 if (fd == -1)
136 return;
137 setFd(fd);
138 }
SyncFence(const SyncFence & a,const SyncFence & b,const char * name=nullptr)139 SyncFence(const SyncFence &a, const SyncFence &b, const char *name = nullptr) noexcept {
140 std::string autoName = "mergeFence";
141 autoName += s_fenceCount;
142 s_fenceCount++;
143 int fd = sync_merge(name ? name : autoName.c_str(), a.getFd(), b.getFd());
144 if (fd == -1)
145 return;
146 setFd(fd);
147 }
SyncFence(const vector<SyncFence> & sources)148 SyncFence(const vector<SyncFence> &sources) noexcept {
149 assert(sources.size());
150 SyncFence temp(*begin(sources));
151 for (auto itr = ++begin(sources); itr != end(sources); ++itr) {
152 temp = SyncFence(*itr, temp);
153 }
154 if (temp.isValid()) {
155 setFd(temp.getFd());
156 temp.clearFd();
157 }
158 }
destroy()159 void destroy() {
160 if (isValid()) {
161 close(m_fd);
162 clearFd();
163 }
164 }
~SyncFence()165 ~SyncFence() {
166 destroy();
167 }
getFd() const168 int getFd() const {
169 return m_fd;
170 }
wait(int timeout=-1)171 int wait(int timeout = -1) {
172 return sync_wait(m_fd, timeout);
173 }
getInfo() const174 vector<SyncPointInfo> getInfo() const {
175 struct sync_pt_info *pointInfo = nullptr;
176 vector<SyncPointInfo> fenceInfo;
177 sync_fence_info_data *info = sync_fence_info(getFd());
178 if (!info) {
179 return fenceInfo;
180 }
181 while ((pointInfo = sync_pt_info(info, pointInfo))) {
182 fenceInfo.push_back(SyncPointInfo{
183 pointInfo->driver_name,
184 pointInfo->obj_name,
185 pointInfo->timestamp_ns,
186 pointInfo->status});
187 }
188 sync_fence_info_free(info);
189 return fenceInfo;
190 }
getSize() const191 int getSize() const {
192 return getInfo().size();
193 }
getSignaledCount() const194 int getSignaledCount() const {
195 return countWithStatus(1);
196 }
getActiveCount() const197 int getActiveCount() const {
198 return countWithStatus(0);
199 }
getErrorCount() const200 int getErrorCount() const {
201 return countWithStatus(-1);
202 }
203 private:
countWithStatus(int status) const204 int countWithStatus(int status) const {
205 int count = 0;
206 for (auto &info : getInfo()) {
207 if (info.status == status) {
208 count++;
209 }
210 }
211 return count;
212 }
213 };
214
215 int SyncFence::s_fenceCount = 0;
216
TEST(AllocTest,Timeline)217 TEST(AllocTest, Timeline) {
218 SyncTimeline timeline;
219 ASSERT_TRUE(timeline.isValid());
220 }
221
TEST(AllocTest,Fence)222 TEST(AllocTest, Fence) {
223 SyncTimeline timeline;
224 ASSERT_TRUE(timeline.isValid());
225
226 SyncFence fence(timeline, 1);
227 ASSERT_TRUE(fence.isValid());
228 }
229
TEST(AllocTest,FenceNegative)230 TEST(AllocTest, FenceNegative) {
231 int timeline = sw_sync_timeline_create();
232 ASSERT_GT(timeline, 0);
233
234 // bad fd.
235 ASSERT_LT(sw_sync_fence_create(-1, "fence", 1), 0);
236
237 // No name - segfaults in user space.
238 // Maybe we should be friendlier here?
239 /*
240 ASSERT_LT(sw_sync_fence_create(timeline, nullptr, 1), 0);
241 */
242 close(timeline);
243 }
244
TEST(FenceTest,OneTimelineWait)245 TEST(FenceTest, OneTimelineWait) {
246 SyncTimeline timeline;
247 ASSERT_TRUE(timeline.isValid());
248
249 SyncFence fence(timeline, 5);
250 ASSERT_TRUE(fence.isValid());
251
252 // Wait on fence until timeout.
253 ASSERT_EQ(fence.wait(0), -1);
254 ASSERT_EQ(errno, ETIME);
255
256 // Advance timeline from 0 -> 1
257 ASSERT_EQ(timeline.inc(1), 0);
258
259 // Wait on fence until timeout.
260 ASSERT_EQ(fence.wait(0), -1);
261 ASSERT_EQ(errno, ETIME);
262
263 // Signal the fence.
264 ASSERT_EQ(timeline.inc(4), 0);
265
266 // Wait successfully.
267 ASSERT_EQ(fence.wait(0), 0);
268
269 // Go even futher, and confirm wait still succeeds.
270 ASSERT_EQ(timeline.inc(10), 0);
271 ASSERT_EQ(fence.wait(0), 0);
272 }
273
TEST(FenceTest,OneTimelinePoll)274 TEST(FenceTest, OneTimelinePoll) {
275 SyncTimeline timeline;
276 ASSERT_TRUE(timeline.isValid());
277
278 SyncFence fence(timeline, 100);
279 ASSERT_TRUE(fence.isValid());
280
281 fd_set set;
282 FD_ZERO(&set);
283 FD_SET(fence.getFd(), &set);
284
285 // Poll the fence, and wait till timeout.
286 timeval time = {0};
287 ASSERT_EQ(select(fence.getFd() + 1, &set, nullptr, nullptr, &time), 0);
288
289 // Advance the timeline.
290 timeline.inc(100);
291 timeline.inc(100);
292
293 // Select should return that the fd is read for reading.
294 FD_ZERO(&set);
295 FD_SET(fence.getFd(), &set);
296
297 ASSERT_EQ(select(fence.getFd() + 1, &set, nullptr, nullptr, &time), 1);
298 ASSERT_TRUE(FD_ISSET(fence.getFd(), &set));
299 }
300
TEST(FenceTest,OneTimelineMerge)301 TEST(FenceTest, OneTimelineMerge) {
302 SyncTimeline timeline;
303 ASSERT_TRUE(timeline.isValid());
304
305 // create fence a,b,c and then merge them all into fence d.
306 SyncFence a(timeline, 1), b(timeline, 2), c(timeline, 3);
307 ASSERT_TRUE(a.isValid());
308 ASSERT_TRUE(b.isValid());
309 ASSERT_TRUE(c.isValid());
310
311 SyncFence d({a,b,c});
312 ASSERT_TRUE(d.isValid());
313
314 // confirm all fences have one active point (even d).
315 ASSERT_EQ(a.getActiveCount(), 1);
316 ASSERT_EQ(b.getActiveCount(), 1);
317 ASSERT_EQ(c.getActiveCount(), 1);
318 ASSERT_EQ(d.getActiveCount(), 1);
319
320 // confirm that d is not signaled until the max of a,b,c
321 timeline.inc(1);
322 ASSERT_EQ(a.getSignaledCount(), 1);
323 ASSERT_EQ(d.getActiveCount(), 1);
324
325 timeline.inc(1);
326 ASSERT_EQ(b.getSignaledCount(), 1);
327 ASSERT_EQ(d.getActiveCount(), 1);
328
329 timeline.inc(1);
330 ASSERT_EQ(c.getSignaledCount(), 1);
331 ASSERT_EQ(d.getActiveCount(), 0);
332 ASSERT_EQ(d.getSignaledCount(), 1);
333 }
334
TEST(FenceTest,MergeSameFence)335 TEST(FenceTest, MergeSameFence) {
336 SyncTimeline timeline;
337 ASSERT_TRUE(timeline.isValid());
338
339 SyncFence fence(timeline, 5);
340 ASSERT_TRUE(fence.isValid());
341
342 SyncFence selfMergeFence(fence, fence);
343 ASSERT_TRUE(selfMergeFence.isValid());
344
345 ASSERT_EQ(selfMergeFence.getSignaledCount(), 0);
346
347 timeline.inc(5);
348 ASSERT_EQ(selfMergeFence.getSignaledCount(), 1);
349 }
350
TEST(FenceTest,WaitOnDestroyedTimeline)351 TEST(FenceTest, WaitOnDestroyedTimeline) {
352 SyncTimeline timeline;
353 ASSERT_TRUE(timeline.isValid());
354
355 SyncFence fenceSig(timeline, 100);
356 SyncFence fenceKill(timeline, 200);
357
358 // Spawn a thread to wait on a fence when the timeline is killed.
359 thread waitThread{
360 [&]() {
361 ASSERT_EQ(timeline.inc(100), 0);
362
363 ASSERT_EQ(fenceKill.wait(-1), -1);
364 ASSERT_EQ(errno, ENOENT);
365 }
366 };
367
368 // Wait for the thread to spool up.
369 fenceSig.wait();
370
371 // Kill the timeline.
372 timeline.destroy();
373
374 // wait for the thread to clean up.
375 waitThread.join();
376 }
377
TEST(FenceTest,PollOnDestroyedTimeline)378 TEST(FenceTest, PollOnDestroyedTimeline) {
379 SyncTimeline timeline;
380 ASSERT_TRUE(timeline.isValid());
381
382 SyncFence fenceSig(timeline, 100);
383 SyncFence fenceKill(timeline, 200);
384
385 // Spawn a thread to wait on a fence when the timeline is killed.
386 thread waitThread{
387 [&]() {
388 ASSERT_EQ(timeline.inc(100), 0);
389
390 // Wait on the fd.
391 struct pollfd fds;
392 fds.fd = fenceKill.getFd();
393 fds.events = POLLIN | POLLERR;
394 ASSERT_EQ(poll(&fds, 1, -1), 1);
395 ASSERT_TRUE(fds.revents & POLLERR);
396 }
397 };
398
399 // Wait for the thread to spool up.
400 fenceSig.wait();
401
402 // Kill the timeline.
403 timeline.destroy();
404
405 // wait for the thread to clean up.
406 waitThread.join();
407 }
408
TEST(FenceTest,MultiTimelineWait)409 TEST(FenceTest, MultiTimelineWait) {
410 SyncTimeline timelineA, timelineB, timelineC;
411
412 SyncFence fenceA(timelineA, 5);
413 SyncFence fenceB(timelineB, 5);
414 SyncFence fenceC(timelineC, 5);
415
416 // Make a larger fence using 3 other fences from different timelines.
417 SyncFence mergedFence({fenceA, fenceB, fenceC});
418 ASSERT_TRUE(mergedFence.isValid());
419
420 // Confirm fence isn't signaled
421 ASSERT_EQ(mergedFence.getActiveCount(), 3);
422 ASSERT_EQ(mergedFence.wait(0), -1);
423 ASSERT_EQ(errno, ETIME);
424
425 timelineA.inc(5);
426 ASSERT_EQ(mergedFence.getActiveCount(), 2);
427 ASSERT_EQ(mergedFence.getSignaledCount(), 1);
428
429 timelineB.inc(5);
430 ASSERT_EQ(mergedFence.getActiveCount(), 1);
431 ASSERT_EQ(mergedFence.getSignaledCount(), 2);
432
433 timelineC.inc(5);
434 ASSERT_EQ(mergedFence.getActiveCount(), 0);
435 ASSERT_EQ(mergedFence.getSignaledCount(), 3);
436
437 // confirm you can successfully wait.
438 ASSERT_EQ(mergedFence.wait(100), 0);
439 }
440
TEST(StressTest,TwoThreadsSharedTimeline)441 TEST(StressTest, TwoThreadsSharedTimeline) {
442 const int iterations = 1 << 16;
443 int counter = 0;
444 SyncTimeline timeline;
445 ASSERT_TRUE(timeline.isValid());
446
447 // Use a single timeline to synchronize two threads
448 // hammmering on the same counter.
449 auto threadMain = [&](int threadId) {
450 for (int i = 0; i < iterations; i++) {
451 SyncFence fence(timeline, i * 2 + threadId);
452 ASSERT_TRUE(fence.isValid());
453
454 // Wait on the prior thread to complete.
455 ASSERT_EQ(fence.wait(), 0);
456
457 // Confirm the previous thread's writes are visible and then inc.
458 ASSERT_EQ(counter, i * 2 + threadId);
459 counter++;
460
461 // Kick off the other thread.
462 ASSERT_EQ(timeline.inc(), 0);
463 }
464 };
465
466 thread a{threadMain, 0};
467 thread b{threadMain, 1};
468 a.join();
469 b.join();
470
471 // make sure the threads did not trample on one another.
472 ASSERT_EQ(counter, iterations * 2);
473 }
474
475 class ConsumerStressTest : public ::testing::TestWithParam<int> {};
476
TEST_P(ConsumerStressTest,MultiProducerSingleConsumer)477 TEST_P(ConsumerStressTest, MultiProducerSingleConsumer) {
478 mutex lock;
479 int counter = 0;
480 int iterations = 1 << 12;
481
482 vector<SyncTimeline> producerTimelines(GetParam());
483 vector<thread> threads;
484 SyncTimeline consumerTimeline;
485
486 // Producer threads run this lambda.
487 auto threadMain = [&](int threadId) {
488 for (int i = 0; i < iterations; i++) {
489 SyncFence fence(consumerTimeline, i);
490 ASSERT_TRUE(fence.isValid());
491
492 // Wait for the consumer to finish. Use alternate
493 // means of waiting on the fence.
494 if ((iterations + threadId) % 8 != 0) {
495 ASSERT_EQ(fence.wait(), 0);
496 }
497 else {
498 while (fence.getSignaledCount() != 1) {
499 ASSERT_EQ(fence.getErrorCount(), 0);
500 }
501 }
502
503 // Every producer increments the counter, the consumer checks + erases it.
504 lock.lock();
505 counter++;
506 lock.unlock();
507
508 ASSERT_EQ(producerTimelines[threadId].inc(), 0);
509 }
510 };
511
512 for (int i = 0; i < GetParam(); i++) {
513 threads.push_back(thread{threadMain, i});
514 }
515
516 // Consumer thread runs this loop.
517 for (int i = 1; i <= iterations; i++) {
518 // Create a fence representing all producers final timelines.
519 vector<SyncFence> fences;
520 for (auto& timeline : producerTimelines) {
521 fences.push_back(SyncFence(timeline, i));
522 }
523 SyncFence mergeFence(fences);
524 ASSERT_TRUE(mergeFence.isValid());
525
526 // Make sure we see an increment from every producer thread. Vary
527 // the means by which we wait.
528 if (iterations % 8 != 0) {
529 ASSERT_EQ(mergeFence.wait(), 0);
530 }
531 else {
532 while (mergeFence.getSignaledCount() != mergeFence.getSize()) {
533 ASSERT_EQ(mergeFence.getErrorCount(), 0);
534 }
535 }
536 ASSERT_EQ(counter, GetParam()*i);
537
538 // Release the producer threads.
539 ASSERT_EQ(consumerTimeline.inc(), 0);
540 }
541
542 for_each(begin(threads), end(threads), [](thread& thread) { thread.join(); });
543 }
544 INSTANTIATE_TEST_CASE_P(
545 ParameterizedStressTest,
546 ConsumerStressTest,
547 ::testing::Values(2,4,16));
548
549 class MergeStressTest : public ::testing::TestWithParam<tuple<int, int>> {};
550
551 template <typename K, typename V> using dict = unordered_map<K,V>;
552
TEST_P(MergeStressTest,RandomMerge)553 TEST_P(MergeStressTest, RandomMerge) {
554 int timelineCount = get<0>(GetParam());
555 int mergeCount = get<1>(GetParam());
556
557 vector<SyncTimeline> timelines(timelineCount);
558
559 default_random_engine generator;
560 uniform_int_distribution<int> timelineDist(0, timelines.size()-1);
561 uniform_int_distribution<int> syncPointDist(0, numeric_limits<int>::max());
562
563 SyncFence fence(timelines[0], 0);
564 ASSERT_TRUE(fence.isValid());
565
566 unordered_map<int, int> fenceMap;
567 fenceMap.insert(make_tuple(0, 0));
568
569 // Randomly create syncpoints out of a fixed set of timelines, and merge them together.
570 for (int i = 0; i < mergeCount; i++) {
571
572 // Generate syncpoint.
573 int timelineOffset = timelineDist(generator);
574 const SyncTimeline& timeline = timelines[timelineOffset];
575 int syncPoint = syncPointDist(generator);
576
577 // Keep track of the latest syncpoint in each timeline.
578 auto itr = fenceMap.find(timelineOffset);
579 if (itr == end(fenceMap)) {
580 fenceMap.insert(tie(timelineOffset, syncPoint));
581 }
582 else {
583 int oldSyncPoint = itr->second;
584 fenceMap.erase(itr);
585 fenceMap.insert(tie(timelineOffset, max(syncPoint, oldSyncPoint)));
586 }
587
588 // Merge.
589 fence = SyncFence(fence, SyncFence(timeline, syncPoint));
590 ASSERT_TRUE(fence.isValid());
591 }
592
593 // Confirm our map matches the fence.
594 ASSERT_EQ(fence.getSize(), fenceMap.size());
595
596 // Trigger the merged fence.
597 for (auto& item: fenceMap) {
598 ASSERT_EQ(fence.wait(0), -1);
599 ASSERT_EQ(errno, ETIME);
600
601 // Increment the timeline to the last syncpoint.
602 timelines[item.first].inc(item.second);
603 }
604
605 // Check that the fence is triggered.
606 ASSERT_EQ(fence.wait(0), 0);
607 }
608
609 INSTANTIATE_TEST_CASE_P(
610 ParameterizedMergeStressTest,
611 MergeStressTest,
612 ::testing::Combine(::testing::Values(16,32), ::testing::Values(32, 1024, 1024*32)));
613
614 }
615
616