1 /**************************************************************************** 2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * @file fifo.hpp 24 * 25 * @brief Definitions for our fifos used for thread communication. 26 * 27 ******************************************************************************/ 28 #pragma once 29 30 #include "common/os.h" 31 #include "arena.h" 32 33 #include <vector> 34 #include <cassert> 35 36 template <class T> 37 struct QUEUE 38 { OSALIGNLINEQUEUE39 OSALIGNLINE(volatile uint32_t) mLock{0}; OSALIGNLINEQUEUE40 OSALIGNLINE(volatile uint32_t) mNumEntries{0}; 41 std::vector<T*> mBlocks; 42 T* mCurBlock{nullptr}; 43 uint32_t mHead{0}; 44 uint32_t mTail{0}; 45 uint32_t mCurBlockIdx{0}; 46 47 // power of 2 48 static const uint32_t mBlockSizeShift = 6; 49 static const uint32_t mBlockSize = 1 << mBlockSizeShift; 50 51 template <typename ArenaT> clearQUEUE52 void clear(ArenaT& arena) 53 { 54 mHead = 0; 55 mTail = 0; 56 mBlocks.clear(); 57 T* pNewBlock = (T*)arena.AllocAligned(sizeof(T) * mBlockSize, KNOB_SIMD_WIDTH * 4); 58 mBlocks.push_back(pNewBlock); 59 mCurBlock = pNewBlock; 60 mCurBlockIdx = 0; 61 mNumEntries = 0; 62 mLock = 0; 63 } 64 getNumQueuedQUEUE65 uint32_t getNumQueued() { return mNumEntries; } 66 tryLockQUEUE67 bool tryLock() 68 { 69 if (mLock) 70 { 71 return false; 72 } 73 74 // try to lock the FIFO 75 long initial = InterlockedCompareExchange(&mLock, 1, 0); 76 return (initial == 0); 77 } 78 unlockQUEUE79 void unlock() { mLock = 0; } 80 peekQUEUE81 T* peek() 82 { 83 if (mNumEntries == 0) 84 { 85 return nullptr; 86 } 87 uint32_t block = mHead >> mBlockSizeShift; 88 return &mBlocks[block][mHead & (mBlockSize - 1)]; 89 } 90 dequeue_noincQUEUE91 void dequeue_noinc() 92 { 93 mHead++; 94 mNumEntries--; 95 } 96 97 template <typename ArenaT> enqueue_try_nosyncQUEUE98 bool enqueue_try_nosync(ArenaT& arena, const T* entry) 99 { 100 const float* pSrc = (const float*)entry; 101 float* pDst = (float*)&mCurBlock[mTail]; 102 103 auto lambda = [&](int32_t i) { 104 __m256 vSrc = _mm256_load_ps(pSrc + i * KNOB_SIMD_WIDTH); 105 _mm256_stream_ps(pDst + i * KNOB_SIMD_WIDTH, vSrc); 106 }; 107 108 const uint32_t numSimdLines = sizeof(T) / (KNOB_SIMD_WIDTH * 4); 109 static_assert(numSimdLines * KNOB_SIMD_WIDTH * 4 == sizeof(T), 110 "FIFO element size should be multiple of SIMD width."); 111 112 UnrollerL<0, numSimdLines, 1>::step(lambda); 113 114 mTail++; 115 if (mTail == mBlockSize) 116 { 117 if (++mCurBlockIdx < mBlocks.size()) 118 { 119 mCurBlock = mBlocks[mCurBlockIdx]; 120 } 121 else 122 { 123 T* newBlock = (T*)arena.AllocAligned(sizeof(T) * mBlockSize, KNOB_SIMD_WIDTH * 4); 124 SWR_ASSERT(newBlock); 125 126 mBlocks.push_back(newBlock); 127 mCurBlock = newBlock; 128 } 129 130 mTail = 0; 131 } 132 133 mNumEntries++; 134 return true; 135 } 136 destroyQUEUE137 void destroy() {} 138 }; 139