1 /**************************************************************************** 2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * @file arena.h 24 * 25 * @brief Arena memory manager 26 * The arena is convenient and fast for managing allocations for any of 27 * our allocations that are associated with operations and can all be freed 28 * once when their operation has completed. Allocations are cheap since 29 * most of the time its simply an increment of an offset. Also, no need to 30 * free individual allocations. All of the arena memory can be freed at once. 31 * 32 ******************************************************************************/ 33 #pragma once 34 35 #include <mutex> 36 #include <algorithm> 37 #include <atomic> 38 #include "core/utils.h" 39 40 static const size_t ARENA_BLOCK_ALIGN = 64; 41 42 struct ArenaBlock 43 { 44 size_t blockSize = 0; 45 ArenaBlock* pNext = nullptr; 46 }; 47 static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN, "Increase BLOCK_ALIGN size"); 48 49 class DefaultAllocator 50 { 51 public: AllocateAligned(size_t size,size_t align)52 ArenaBlock* AllocateAligned(size_t size, size_t align) 53 { 54 SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock)); 55 56 ArenaBlock* p = new (AlignedMalloc(size, align)) ArenaBlock(); 57 p->blockSize = size; 58 return p; 59 } 60 Free(ArenaBlock * pMem)61 void Free(ArenaBlock* pMem) 62 { 63 if (pMem) 64 { 65 SWR_ASSUME_ASSERT(pMem->blockSize < size_t(0xdddddddd)); 66 AlignedFree(pMem); 67 } 68 } 69 }; 70 71 // Caching Allocator for Arena 72 template <uint32_t NumBucketsT = 8, uint32_t StartBucketBitT = 12> 73 struct CachingAllocatorT : DefaultAllocator 74 { AllocateAlignedCachingAllocatorT75 ArenaBlock* AllocateAligned(size_t size, size_t align) 76 { 77 SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock)); 78 SWR_ASSUME_ASSERT(size <= uint32_t(-1)); 79 80 uint32_t bucket = GetBucketId(size); 81 82 { 83 // search cached blocks 84 std::lock_guard<std::mutex> l(m_mutex); 85 ArenaBlock* pPrevBlock = &m_cachedBlocks[bucket]; 86 ArenaBlock* pBlock = SearchBlocks(pPrevBlock, size, align); 87 88 if (pBlock) 89 { 90 m_cachedSize -= pBlock->blockSize; 91 if (pBlock == m_pLastCachedBlocks[bucket]) 92 { 93 m_pLastCachedBlocks[bucket] = pPrevBlock; 94 } 95 } 96 else 97 { 98 pPrevBlock = &m_oldCachedBlocks[bucket]; 99 pBlock = SearchBlocks(pPrevBlock, size, align); 100 101 if (pBlock) 102 { 103 m_oldCachedSize -= pBlock->blockSize; 104 if (pBlock == m_pOldLastCachedBlocks[bucket]) 105 { 106 m_pOldLastCachedBlocks[bucket] = pPrevBlock; 107 } 108 } 109 } 110 111 if (pBlock) 112 { 113 assert(pPrevBlock && pPrevBlock->pNext == pBlock); 114 pPrevBlock->pNext = pBlock->pNext; 115 pBlock->pNext = nullptr; 116 117 return pBlock; 118 } 119 120 m_totalAllocated += size; 121 122 #if 0 123 { 124 static uint32_t count = 0; 125 char buf[128]; 126 sprintf_s(buf, "Arena Alloc %d 0x%llx bytes - 0x%llx total\n", ++count, uint64_t(size), uint64_t(m_totalAllocated)); 127 OutputDebugStringA(buf); 128 } 129 #endif 130 } 131 132 if (bucket && bucket < (CACHE_NUM_BUCKETS - 1)) 133 { 134 // Make all blocks in this bucket the same size 135 size = size_t(1) << (bucket + 1 + CACHE_START_BUCKET_BIT); 136 } 137 138 return this->DefaultAllocator::AllocateAligned(size, align); 139 } 140 FreeCachingAllocatorT141 void Free(ArenaBlock* pMem) 142 { 143 if (pMem) 144 { 145 std::unique_lock<std::mutex> l(m_mutex); 146 InsertCachedBlock(GetBucketId(pMem->blockSize), pMem); 147 } 148 } 149 FreeOldBlocksCachingAllocatorT150 void FreeOldBlocks() 151 { 152 if (!m_cachedSize) 153 { 154 return; 155 } 156 std::lock_guard<std::mutex> l(m_mutex); 157 158 bool doFree = (m_oldCachedSize > MAX_UNUSED_SIZE); 159 160 for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i) 161 { 162 if (doFree) 163 { 164 ArenaBlock* pBlock = m_oldCachedBlocks[i].pNext; 165 while (pBlock) 166 { 167 ArenaBlock* pNext = pBlock->pNext; 168 m_oldCachedSize -= pBlock->blockSize; 169 m_totalAllocated -= pBlock->blockSize; 170 this->DefaultAllocator::Free(pBlock); 171 pBlock = pNext; 172 } 173 m_oldCachedBlocks[i].pNext = nullptr; 174 m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i]; 175 } 176 177 if (m_pLastCachedBlocks[i] != &m_cachedBlocks[i]) 178 { 179 if (i && i < (CACHE_NUM_BUCKETS - 1)) 180 { 181 // We know that all blocks are the same size. 182 // Just move the list over. 183 m_pLastCachedBlocks[i]->pNext = m_oldCachedBlocks[i].pNext; 184 m_oldCachedBlocks[i].pNext = m_cachedBlocks[i].pNext; 185 m_cachedBlocks[i].pNext = nullptr; 186 if (m_pOldLastCachedBlocks[i]->pNext) 187 { 188 m_pOldLastCachedBlocks[i] = m_pLastCachedBlocks[i]; 189 } 190 m_pLastCachedBlocks[i] = &m_cachedBlocks[i]; 191 } 192 else 193 { 194 // The end buckets can have variable sized lists. 195 // Insert each block based on size 196 ArenaBlock* pBlock = m_cachedBlocks[i].pNext; 197 while (pBlock) 198 { 199 ArenaBlock* pNext = pBlock->pNext; 200 pBlock->pNext = nullptr; 201 m_cachedSize -= pBlock->blockSize; 202 InsertCachedBlock<true>(i, pBlock); 203 pBlock = pNext; 204 } 205 206 m_pLastCachedBlocks[i] = &m_cachedBlocks[i]; 207 m_cachedBlocks[i].pNext = nullptr; 208 } 209 } 210 } 211 212 m_oldCachedSize += m_cachedSize; 213 m_cachedSize = 0; 214 } 215 CachingAllocatorTCachingAllocatorT216 CachingAllocatorT() 217 { 218 for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i) 219 { 220 m_pLastCachedBlocks[i] = &m_cachedBlocks[i]; 221 m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i]; 222 } 223 } 224 ~CachingAllocatorTCachingAllocatorT225 ~CachingAllocatorT() 226 { 227 // Free all cached blocks 228 for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i) 229 { 230 ArenaBlock* pBlock = m_cachedBlocks[i].pNext; 231 while (pBlock) 232 { 233 ArenaBlock* pNext = pBlock->pNext; 234 this->DefaultAllocator::Free(pBlock); 235 pBlock = pNext; 236 } 237 pBlock = m_oldCachedBlocks[i].pNext; 238 while (pBlock) 239 { 240 ArenaBlock* pNext = pBlock->pNext; 241 this->DefaultAllocator::Free(pBlock); 242 pBlock = pNext; 243 } 244 } 245 } 246 247 private: GetBucketIdCachingAllocatorT248 static uint32_t GetBucketId(size_t blockSize) 249 { 250 uint32_t bucketId = 0; 251 252 #if defined(BitScanReverseSizeT) 253 BitScanReverseSizeT((unsigned long*)&bucketId, (blockSize - 1) >> CACHE_START_BUCKET_BIT); 254 bucketId = std::min<uint32_t>(bucketId, CACHE_NUM_BUCKETS - 1); 255 #endif 256 257 return bucketId; 258 } 259 260 template <bool OldBlockT = false> InsertCachedBlockCachingAllocatorT261 void InsertCachedBlock(uint32_t bucketId, ArenaBlock* pNewBlock) 262 { 263 SWR_ASSUME_ASSERT(bucketId < CACHE_NUM_BUCKETS); 264 265 ArenaBlock* pPrevBlock = 266 OldBlockT ? &m_oldCachedBlocks[bucketId] : &m_cachedBlocks[bucketId]; 267 ArenaBlock* pBlock = pPrevBlock->pNext; 268 269 while (pBlock) 270 { 271 if (pNewBlock->blockSize >= pBlock->blockSize) 272 { 273 // Insert here 274 break; 275 } 276 pPrevBlock = pBlock; 277 pBlock = pBlock->pNext; 278 } 279 280 // Insert into list 281 SWR_ASSUME_ASSERT(pPrevBlock); 282 pPrevBlock->pNext = pNewBlock; 283 pNewBlock->pNext = pBlock; 284 285 if (OldBlockT) 286 { 287 if (m_pOldLastCachedBlocks[bucketId] == pPrevBlock) 288 { 289 m_pOldLastCachedBlocks[bucketId] = pNewBlock; 290 } 291 292 m_oldCachedSize += pNewBlock->blockSize; 293 } 294 else 295 { 296 if (m_pLastCachedBlocks[bucketId] == pPrevBlock) 297 { 298 m_pLastCachedBlocks[bucketId] = pNewBlock; 299 } 300 301 m_cachedSize += pNewBlock->blockSize; 302 } 303 } 304 SearchBlocksCachingAllocatorT305 static ArenaBlock* SearchBlocks(ArenaBlock*& pPrevBlock, size_t blockSize, size_t align) 306 { 307 ArenaBlock* pBlock = pPrevBlock->pNext; 308 ArenaBlock* pPotentialBlock = nullptr; 309 ArenaBlock* pPotentialPrev = nullptr; 310 311 while (pBlock) 312 { 313 if (pBlock->blockSize >= blockSize) 314 { 315 if (pBlock == AlignUp(pBlock, align)) 316 { 317 if (pBlock->blockSize == blockSize) 318 { 319 // Won't find a better match 320 break; 321 } 322 323 // We could use this as it is larger than we wanted, but 324 // continue to search for a better match 325 pPotentialBlock = pBlock; 326 pPotentialPrev = pPrevBlock; 327 } 328 } 329 else 330 { 331 // Blocks are sorted by size (biggest first) 332 // So, if we get here, there are no blocks 333 // large enough, fall through to allocation. 334 pBlock = nullptr; 335 break; 336 } 337 338 pPrevBlock = pBlock; 339 pBlock = pBlock->pNext; 340 } 341 342 if (!pBlock) 343 { 344 // Couldn't find an exact match, use next biggest size 345 pBlock = pPotentialBlock; 346 pPrevBlock = pPotentialPrev; 347 } 348 349 return pBlock; 350 } 351 352 // buckets, for block sizes < (1 << (start+1)), < (1 << (start+2)), ... 353 static const uint32_t CACHE_NUM_BUCKETS = NumBucketsT; 354 static const uint32_t CACHE_START_BUCKET_BIT = StartBucketBitT; 355 static const size_t MAX_UNUSED_SIZE = sizeof(MEGABYTE); 356 357 ArenaBlock m_cachedBlocks[CACHE_NUM_BUCKETS]; 358 ArenaBlock* m_pLastCachedBlocks[CACHE_NUM_BUCKETS]; 359 ArenaBlock m_oldCachedBlocks[CACHE_NUM_BUCKETS]; 360 ArenaBlock* m_pOldLastCachedBlocks[CACHE_NUM_BUCKETS]; 361 std::mutex m_mutex; 362 363 size_t m_totalAllocated = 0; 364 365 size_t m_cachedSize = 0; 366 size_t m_oldCachedSize = 0; 367 }; 368 typedef CachingAllocatorT<> CachingAllocator; 369 370 template <typename T = DefaultAllocator, size_t BlockSizeT = 128 * sizeof(KILOBYTE)> 371 class TArena 372 { 373 public: TArena(T & in_allocator)374 TArena(T& in_allocator) : m_allocator(in_allocator) {} TArena()375 TArena() : m_allocator(m_defAllocator) {} ~TArena()376 ~TArena() { Reset(true); } 377 AllocAligned(size_t size,size_t align)378 void* AllocAligned(size_t size, size_t align) 379 { 380 if (0 == size) 381 { 382 return nullptr; 383 } 384 385 SWR_ASSERT(align <= ARENA_BLOCK_ALIGN); 386 387 if (m_pCurBlock) 388 { 389 ArenaBlock* pCurBlock = m_pCurBlock; 390 size_t offset = AlignUp(m_offset, align); 391 392 if ((offset + size) <= pCurBlock->blockSize) 393 { 394 void* pMem = PtrAdd(pCurBlock, offset); 395 m_offset = offset + size; 396 return pMem; 397 } 398 399 // Not enough memory in this block, fall through to allocate 400 // a new block 401 } 402 403 static const size_t ArenaBlockSize = BlockSizeT; 404 size_t blockSize = std::max(size + ARENA_BLOCK_ALIGN, ArenaBlockSize); 405 406 // Add in one BLOCK_ALIGN unit to store ArenaBlock in. 407 blockSize = AlignUp(blockSize, ARENA_BLOCK_ALIGN); 408 409 ArenaBlock* pNewBlock = m_allocator.AllocateAligned( 410 blockSize, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned. 411 SWR_ASSERT(pNewBlock != nullptr); 412 413 if (pNewBlock != nullptr) 414 { 415 m_offset = ARENA_BLOCK_ALIGN; 416 pNewBlock->pNext = m_pCurBlock; 417 418 m_pCurBlock = pNewBlock; 419 } 420 421 return AllocAligned(size, align); 422 } 423 Alloc(size_t size)424 void* Alloc(size_t size) { return AllocAligned(size, 1); } 425 AllocAlignedSync(size_t size,size_t align)426 void* AllocAlignedSync(size_t size, size_t align) 427 { 428 void* pAlloc = nullptr; 429 430 m_mutex.lock(); 431 pAlloc = AllocAligned(size, align); 432 m_mutex.unlock(); 433 434 return pAlloc; 435 } 436 AllocSync(size_t size)437 void* AllocSync(size_t size) 438 { 439 void* pAlloc = nullptr; 440 441 m_mutex.lock(); 442 pAlloc = Alloc(size); 443 m_mutex.unlock(); 444 445 return pAlloc; 446 } 447 448 void Reset(bool removeAll = false) 449 { 450 m_offset = ARENA_BLOCK_ALIGN; 451 452 if (m_pCurBlock) 453 { 454 ArenaBlock* pUsedBlocks = m_pCurBlock->pNext; 455 m_pCurBlock->pNext = nullptr; 456 while (pUsedBlocks) 457 { 458 ArenaBlock* pBlock = pUsedBlocks; 459 pUsedBlocks = pBlock->pNext; 460 461 m_allocator.Free(pBlock); 462 } 463 464 if (removeAll) 465 { 466 m_allocator.Free(m_pCurBlock); 467 m_pCurBlock = nullptr; 468 } 469 } 470 } 471 IsEmpty()472 bool IsEmpty() 473 { 474 return (m_pCurBlock == nullptr) || 475 (m_offset == ARENA_BLOCK_ALIGN && m_pCurBlock->pNext == nullptr); 476 } 477 478 private: 479 ArenaBlock* m_pCurBlock = nullptr; 480 size_t m_offset = ARENA_BLOCK_ALIGN; 481 482 /// @note Mutex is only used by sync allocation functions. 483 std::mutex m_mutex; 484 485 DefaultAllocator m_defAllocator; 486 T& m_allocator; 487 }; 488 489 using StdArena = TArena<DefaultAllocator>; 490 using CachingArena = TArena<CachingAllocator>; 491