1 /**************************************************************************** 2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * @file arena.h 24 * 25 * @brief Arena memory manager 26 * The arena is convenient and fast for managing allocations for any of 27 * our allocations that are associated with operations and can all be freed 28 * once when their operation has completed. Allocations are cheap since 29 * most of the time its simply an increment of an offset. Also, no need to 30 * free individual allocations. All of the arena memory can be freed at once. 31 * 32 ******************************************************************************/ 33 #pragma once 34 35 #include <mutex> 36 #include <algorithm> 37 #include <atomic> 38 #include "core/utils.h" 39 40 static const size_t ARENA_BLOCK_ALIGN = 64; 41 42 struct ArenaBlock 43 { 44 size_t blockSize = 0; 45 ArenaBlock* pNext = nullptr; 46 }; 47 static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN, 48 "Increase BLOCK_ALIGN size"); 49 50 class DefaultAllocator 51 { 52 public: AllocateAligned(size_t size,size_t align)53 ArenaBlock* AllocateAligned(size_t size, size_t align) 54 { 55 SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock)); 56 57 ArenaBlock* p = new (AlignedMalloc(size, align)) ArenaBlock(); 58 p->blockSize = size; 59 return p; 60 } 61 Free(ArenaBlock * pMem)62 void Free(ArenaBlock* pMem) 63 { 64 if (pMem) 65 { 66 SWR_ASSUME_ASSERT(pMem->blockSize < size_t(0xdddddddd)); 67 AlignedFree(pMem); 68 } 69 } 70 }; 71 72 // Caching Allocator for Arena 73 template<uint32_t NumBucketsT = 8, uint32_t StartBucketBitT = 12> 74 struct CachingAllocatorT : DefaultAllocator 75 { AllocateAlignedCachingAllocatorT76 ArenaBlock* AllocateAligned(size_t size, size_t align) 77 { 78 SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock)); 79 SWR_ASSUME_ASSERT(size <= uint32_t(-1)); 80 81 uint32_t bucket = GetBucketId(size); 82 83 { 84 // search cached blocks 85 std::lock_guard<std::mutex> l(m_mutex); 86 ArenaBlock* pPrevBlock = &m_cachedBlocks[bucket]; 87 ArenaBlock* pBlock = SearchBlocks(pPrevBlock, size, align); 88 89 if (pBlock) 90 { 91 m_cachedSize -= pBlock->blockSize; 92 if (pBlock == m_pLastCachedBlocks[bucket]) 93 { 94 m_pLastCachedBlocks[bucket] = pPrevBlock; 95 } 96 } 97 else 98 { 99 pPrevBlock = &m_oldCachedBlocks[bucket]; 100 pBlock = SearchBlocks(pPrevBlock, size, align); 101 102 if (pBlock) 103 { 104 m_oldCachedSize -= pBlock->blockSize; 105 if (pBlock == m_pOldLastCachedBlocks[bucket]) 106 { 107 m_pOldLastCachedBlocks[bucket] = pPrevBlock; 108 } 109 } 110 } 111 112 if (pBlock) 113 { 114 SWR_ASSUME_ASSERT(pPrevBlock && pPrevBlock->pNext == pBlock); 115 pPrevBlock->pNext = pBlock->pNext; 116 pBlock->pNext = nullptr; 117 118 return pBlock; 119 } 120 121 m_totalAllocated += size; 122 123 #if 0 124 { 125 static uint32_t count = 0; 126 char buf[128]; 127 sprintf_s(buf, "Arena Alloc %d 0x%llx bytes - 0x%llx total\n", ++count, uint64_t(size), uint64_t(m_totalAllocated)); 128 OutputDebugStringA(buf); 129 } 130 #endif 131 } 132 133 if (bucket && bucket < (CACHE_NUM_BUCKETS - 1)) 134 { 135 // Make all blocks in this bucket the same size 136 size = size_t(1) << (bucket + 1 + CACHE_START_BUCKET_BIT); 137 } 138 139 return this->DefaultAllocator::AllocateAligned(size, align); 140 } 141 FreeCachingAllocatorT142 void Free(ArenaBlock* pMem) 143 { 144 if (pMem) 145 { 146 std::unique_lock<std::mutex> l(m_mutex); 147 InsertCachedBlock(GetBucketId(pMem->blockSize), pMem); 148 } 149 } 150 FreeOldBlocksCachingAllocatorT151 void FreeOldBlocks() 152 { 153 if (!m_cachedSize) { return; } 154 std::lock_guard<std::mutex> l(m_mutex); 155 156 bool doFree = (m_oldCachedSize > MAX_UNUSED_SIZE); 157 158 for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i) 159 { 160 if (doFree) 161 { 162 ArenaBlock* pBlock = m_oldCachedBlocks[i].pNext; 163 while (pBlock) 164 { 165 ArenaBlock* pNext = pBlock->pNext; 166 m_oldCachedSize -= pBlock->blockSize; 167 m_totalAllocated -= pBlock->blockSize; 168 this->DefaultAllocator::Free(pBlock); 169 pBlock = pNext; 170 } 171 m_oldCachedBlocks[i].pNext = nullptr; 172 m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i]; 173 } 174 175 if (m_pLastCachedBlocks[i] != &m_cachedBlocks[i]) 176 { 177 if (i && i < (CACHE_NUM_BUCKETS - 1)) 178 { 179 // We know that all blocks are the same size. 180 // Just move the list over. 181 m_pLastCachedBlocks[i]->pNext = m_oldCachedBlocks[i].pNext; 182 m_oldCachedBlocks[i].pNext = m_cachedBlocks[i].pNext; 183 m_cachedBlocks[i].pNext = nullptr; 184 if (m_pOldLastCachedBlocks[i]->pNext) 185 { 186 m_pOldLastCachedBlocks[i] = m_pLastCachedBlocks[i]; 187 } 188 m_pLastCachedBlocks[i] = &m_cachedBlocks[i]; 189 } 190 else 191 { 192 // The end buckets can have variable sized lists. 193 // Insert each block based on size 194 ArenaBlock* pBlock = m_cachedBlocks[i].pNext; 195 while (pBlock) 196 { 197 ArenaBlock* pNext = pBlock->pNext; 198 pBlock->pNext = nullptr; 199 m_cachedSize -= pBlock->blockSize; 200 InsertCachedBlock<true>(i, pBlock); 201 pBlock = pNext; 202 } 203 204 m_pLastCachedBlocks[i] = &m_cachedBlocks[i]; 205 m_cachedBlocks[i].pNext = nullptr; 206 } 207 } 208 } 209 210 m_oldCachedSize += m_cachedSize; 211 m_cachedSize = 0; 212 } 213 CachingAllocatorTCachingAllocatorT214 CachingAllocatorT() 215 { 216 for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i) 217 { 218 m_pLastCachedBlocks[i] = &m_cachedBlocks[i]; 219 m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i]; 220 } 221 } 222 ~CachingAllocatorTCachingAllocatorT223 ~CachingAllocatorT() 224 { 225 // Free all cached blocks 226 for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i) 227 { 228 ArenaBlock* pBlock = m_cachedBlocks[i].pNext; 229 while (pBlock) 230 { 231 ArenaBlock* pNext = pBlock->pNext; 232 this->DefaultAllocator::Free(pBlock); 233 pBlock = pNext; 234 } 235 pBlock = m_oldCachedBlocks[i].pNext; 236 while (pBlock) 237 { 238 ArenaBlock* pNext = pBlock->pNext; 239 this->DefaultAllocator::Free(pBlock); 240 pBlock = pNext; 241 } 242 } 243 } 244 245 private: GetBucketIdCachingAllocatorT246 static uint32_t GetBucketId(size_t blockSize) 247 { 248 uint32_t bucketId = 0; 249 250 #if defined(BitScanReverseSizeT) 251 BitScanReverseSizeT((unsigned long*)&bucketId, (blockSize - 1) >> CACHE_START_BUCKET_BIT); 252 bucketId = std::min<uint32_t>(bucketId, CACHE_NUM_BUCKETS - 1); 253 #endif 254 255 return bucketId; 256 } 257 258 template <bool OldBlockT = false> InsertCachedBlockCachingAllocatorT259 void InsertCachedBlock(uint32_t bucketId, ArenaBlock* pNewBlock) 260 { 261 SWR_ASSUME_ASSERT(bucketId < CACHE_NUM_BUCKETS); 262 263 ArenaBlock* pPrevBlock = OldBlockT ? &m_oldCachedBlocks[bucketId] : &m_cachedBlocks[bucketId]; 264 ArenaBlock* pBlock = pPrevBlock->pNext; 265 266 while (pBlock) 267 { 268 if (pNewBlock->blockSize >= pBlock->blockSize) 269 { 270 // Insert here 271 break; 272 } 273 pPrevBlock = pBlock; 274 pBlock = pBlock->pNext; 275 } 276 277 // Insert into list 278 SWR_ASSUME_ASSERT(pPrevBlock); 279 pPrevBlock->pNext = pNewBlock; 280 pNewBlock->pNext = pBlock; 281 282 if (OldBlockT) 283 { 284 if (m_pOldLastCachedBlocks[bucketId] == pPrevBlock) 285 { 286 m_pOldLastCachedBlocks[bucketId] = pNewBlock; 287 } 288 289 m_oldCachedSize += pNewBlock->blockSize; 290 } 291 else 292 { 293 if (m_pLastCachedBlocks[bucketId] == pPrevBlock) 294 { 295 m_pLastCachedBlocks[bucketId] = pNewBlock; 296 } 297 298 m_cachedSize += pNewBlock->blockSize; 299 } 300 } 301 SearchBlocksCachingAllocatorT302 static ArenaBlock* SearchBlocks(ArenaBlock*& pPrevBlock, size_t blockSize, size_t align) 303 { 304 ArenaBlock* pBlock = pPrevBlock->pNext; 305 ArenaBlock* pPotentialBlock = nullptr; 306 ArenaBlock* pPotentialPrev = nullptr; 307 308 while (pBlock) 309 { 310 if (pBlock->blockSize >= blockSize) 311 { 312 if (pBlock == AlignUp(pBlock, align)) 313 { 314 if (pBlock->blockSize == blockSize) 315 { 316 // Won't find a better match 317 break; 318 } 319 320 // We could use this as it is larger than we wanted, but 321 // continue to search for a better match 322 pPotentialBlock = pBlock; 323 pPotentialPrev = pPrevBlock; 324 } 325 } 326 else 327 { 328 // Blocks are sorted by size (biggest first) 329 // So, if we get here, there are no blocks 330 // large enough, fall through to allocation. 331 pBlock = nullptr; 332 break; 333 } 334 335 pPrevBlock = pBlock; 336 pBlock = pBlock->pNext; 337 } 338 339 if (!pBlock) 340 { 341 // Couldn't find an exact match, use next biggest size 342 pBlock = pPotentialBlock; 343 pPrevBlock = pPotentialPrev; 344 } 345 346 return pBlock; 347 } 348 349 // buckets, for block sizes < (1 << (start+1)), < (1 << (start+2)), ... 350 static const uint32_t CACHE_NUM_BUCKETS = NumBucketsT; 351 static const uint32_t CACHE_START_BUCKET_BIT = StartBucketBitT; 352 static const size_t MAX_UNUSED_SIZE = sizeof(MEGABYTE); 353 354 ArenaBlock m_cachedBlocks[CACHE_NUM_BUCKETS]; 355 ArenaBlock* m_pLastCachedBlocks[CACHE_NUM_BUCKETS]; 356 ArenaBlock m_oldCachedBlocks[CACHE_NUM_BUCKETS]; 357 ArenaBlock* m_pOldLastCachedBlocks[CACHE_NUM_BUCKETS]; 358 std::mutex m_mutex; 359 360 size_t m_totalAllocated = 0; 361 362 size_t m_cachedSize = 0; 363 size_t m_oldCachedSize = 0; 364 }; 365 typedef CachingAllocatorT<> CachingAllocator; 366 367 template<typename T = DefaultAllocator, size_t BlockSizeT = 128 * sizeof(KILOBYTE)> 368 class TArena 369 { 370 public: TArena(T & in_allocator)371 TArena(T& in_allocator) : m_allocator(in_allocator) {} TArena()372 TArena() : m_allocator(m_defAllocator) {} ~TArena()373 ~TArena() 374 { 375 Reset(true); 376 } 377 AllocAligned(size_t size,size_t align)378 void* AllocAligned(size_t size, size_t align) 379 { 380 if (0 == size) 381 { 382 return nullptr; 383 } 384 385 SWR_ASSERT(align <= ARENA_BLOCK_ALIGN); 386 387 if (m_pCurBlock) 388 { 389 ArenaBlock* pCurBlock = m_pCurBlock; 390 size_t offset = AlignUp(m_offset, align); 391 392 if ((offset + size) <= pCurBlock->blockSize) 393 { 394 void* pMem = PtrAdd(pCurBlock, offset); 395 m_offset = offset + size; 396 return pMem; 397 } 398 399 // Not enough memory in this block, fall through to allocate 400 // a new block 401 } 402 403 static const size_t ArenaBlockSize = BlockSizeT; 404 size_t blockSize = std::max(size + ARENA_BLOCK_ALIGN, ArenaBlockSize); 405 406 // Add in one BLOCK_ALIGN unit to store ArenaBlock in. 407 blockSize = AlignUp(blockSize, ARENA_BLOCK_ALIGN); 408 409 ArenaBlock* pNewBlock = m_allocator.AllocateAligned(blockSize, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned. 410 SWR_ASSERT(pNewBlock != nullptr); 411 412 if (pNewBlock != nullptr) 413 { 414 m_offset = ARENA_BLOCK_ALIGN; 415 pNewBlock->pNext = m_pCurBlock; 416 417 m_pCurBlock = pNewBlock; 418 } 419 420 return AllocAligned(size, align); 421 } 422 Alloc(size_t size)423 void* Alloc(size_t size) 424 { 425 return AllocAligned(size, 1); 426 } 427 AllocAlignedSync(size_t size,size_t align)428 void* AllocAlignedSync(size_t size, size_t align) 429 { 430 void* pAlloc = nullptr; 431 432 m_mutex.lock(); 433 pAlloc = AllocAligned(size, align); 434 m_mutex.unlock(); 435 436 return pAlloc; 437 } 438 AllocSync(size_t size)439 void* AllocSync(size_t size) 440 { 441 void* pAlloc = nullptr; 442 443 m_mutex.lock(); 444 pAlloc = Alloc(size); 445 m_mutex.unlock(); 446 447 return pAlloc; 448 } 449 450 void Reset(bool removeAll = false) 451 { 452 m_offset = ARENA_BLOCK_ALIGN; 453 454 if (m_pCurBlock) 455 { 456 ArenaBlock *pUsedBlocks = m_pCurBlock->pNext; 457 m_pCurBlock->pNext = nullptr; 458 while (pUsedBlocks) 459 { 460 ArenaBlock* pBlock = pUsedBlocks; 461 pUsedBlocks = pBlock->pNext; 462 463 m_allocator.Free(pBlock); 464 } 465 466 if (removeAll) 467 { 468 m_allocator.Free(m_pCurBlock); 469 m_pCurBlock = nullptr; 470 } 471 } 472 } 473 IsEmpty()474 bool IsEmpty() 475 { 476 return (m_pCurBlock == nullptr) || (m_offset == ARENA_BLOCK_ALIGN && m_pCurBlock->pNext == nullptr); 477 } 478 479 private: 480 481 ArenaBlock* m_pCurBlock = nullptr; 482 size_t m_offset = ARENA_BLOCK_ALIGN; 483 484 /// @note Mutex is only used by sync allocation functions. 485 std::mutex m_mutex; 486 487 DefaultAllocator m_defAllocator; 488 T& m_allocator; 489 }; 490 491 using StdArena = TArena<DefaultAllocator>; 492 using CachingArena = TArena<CachingAllocator>; 493