1 /**************************************************************************** 2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * @file fifo.hpp 24 * 25 * @brief Definitions for our fifos used for thread communication. 26 * 27 ******************************************************************************/ 28 #pragma once 29 30 31 #include "common/os.h" 32 #include "arena.h" 33 34 #include <vector> 35 #include <cassert> 36 37 template<class T> 38 struct QUEUE 39 { OSALIGNLINEQUEUE40 OSALIGNLINE(volatile uint32_t) mLock{ 0 }; OSALIGNLINEQUEUE41 OSALIGNLINE(volatile uint32_t) mNumEntries{ 0 }; 42 std::vector<T*> mBlocks; 43 T* mCurBlock{ nullptr }; 44 uint32_t mHead{ 0 }; 45 uint32_t mTail{ 0 }; 46 uint32_t mCurBlockIdx{ 0 }; 47 48 // power of 2 49 static const uint32_t mBlockSizeShift = 6; 50 static const uint32_t mBlockSize = 1 << mBlockSizeShift; 51 52 template <typename ArenaT> clearQUEUE53 void clear(ArenaT& arena) 54 { 55 mHead = 0; 56 mTail = 0; 57 mBlocks.clear(); 58 T* pNewBlock = (T*)arena.AllocAligned(sizeof(T)*mBlockSize, KNOB_SIMD_WIDTH*4); 59 mBlocks.push_back(pNewBlock); 60 mCurBlock = pNewBlock; 61 mCurBlockIdx = 0; 62 mNumEntries = 0; 63 mLock = 0; 64 } 65 getNumQueuedQUEUE66 uint32_t getNumQueued() 67 { 68 return mNumEntries; 69 } 70 tryLockQUEUE71 bool tryLock() 72 { 73 if (mLock) 74 { 75 return false; 76 } 77 78 // try to lock the FIFO 79 long initial = InterlockedCompareExchange(&mLock, 1, 0); 80 return (initial == 0); 81 } 82 unlockQUEUE83 void unlock() 84 { 85 mLock = 0; 86 } 87 peekQUEUE88 T* peek() 89 { 90 if (mNumEntries == 0) 91 { 92 return nullptr; 93 } 94 uint32_t block = mHead >> mBlockSizeShift; 95 return &mBlocks[block][mHead & (mBlockSize-1)]; 96 } 97 dequeue_noincQUEUE98 void dequeue_noinc() 99 { 100 mHead ++; 101 mNumEntries --; 102 } 103 104 template <typename ArenaT> enqueue_try_nosyncQUEUE105 bool enqueue_try_nosync(ArenaT& arena, const T* entry) 106 { 107 const float* pSrc = (const float*)entry; 108 float* pDst = (float*)&mCurBlock[mTail]; 109 110 auto lambda = [&](int32_t i) 111 { 112 __m256 vSrc = _mm256_load_ps(pSrc + i*KNOB_SIMD_WIDTH); 113 _mm256_stream_ps(pDst + i*KNOB_SIMD_WIDTH, vSrc); 114 }; 115 116 const uint32_t numSimdLines = sizeof(T) / (KNOB_SIMD_WIDTH*4); 117 static_assert(numSimdLines * KNOB_SIMD_WIDTH * 4 == sizeof(T), 118 "FIFO element size should be multiple of SIMD width."); 119 120 UnrollerL<0, numSimdLines, 1>::step(lambda); 121 122 mTail ++; 123 if (mTail == mBlockSize) 124 { 125 if (++mCurBlockIdx < mBlocks.size()) 126 { 127 mCurBlock = mBlocks[mCurBlockIdx]; 128 } 129 else 130 { 131 T* newBlock = (T*)arena.AllocAligned(sizeof(T)*mBlockSize, KNOB_SIMD_WIDTH*4); 132 SWR_ASSERT(newBlock); 133 134 mBlocks.push_back(newBlock); 135 mCurBlock = newBlock; 136 } 137 138 mTail = 0; 139 } 140 141 mNumEntries ++; 142 return true; 143 } 144 destroyQUEUE145 void destroy() 146 { 147 } 148 149 }; 150