1 /* 2 * Copyright 2021 Google LLC 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #ifndef sktext_gpu_SubRunAllocator_DEFINED 9 #define sktext_gpu_SubRunAllocator_DEFINED 10 11 #include "include/core/SkSpan.h" 12 #include "include/private/base/SkMath.h" 13 #include "include/private/base/SkTemplates.h" 14 #include "src/base/SkArenaAlloc.h" 15 16 #include <algorithm> 17 #include <climits> 18 #include <memory> 19 #include <tuple> 20 #include <utility> 21 22 namespace sktext::gpu { 23 24 // BagOfBytes parcels out bytes with a given size and alignment. 25 class BagOfBytes { 26 public: 27 BagOfBytes(char* block, size_t blockSize, size_t firstHeapAllocation); 28 explicit BagOfBytes(size_t firstHeapAllocation = 0); 29 BagOfBytes(const BagOfBytes&) = delete; 30 BagOfBytes& operator=(const BagOfBytes&) = delete; BagOfBytes(BagOfBytes && that)31 BagOfBytes(BagOfBytes&& that) 32 : fEndByte{std::exchange(that.fEndByte, nullptr)} 33 , fCapacity{that.fCapacity} 34 , fFibProgression{that.fFibProgression} {} 35 BagOfBytes& operator=(BagOfBytes&& that) { 36 this->~BagOfBytes(); new(this)37 new (this) BagOfBytes{std::move(that)}; 38 return *this; 39 } 40 41 ~BagOfBytes(); 42 43 // Given a requestedSize round up to the smallest size that accounts for all the per block 44 // overhead and alignment. It crashes if requestedSize is negative or too big. PlatformMinimumSizeWithOverhead(int requestedSize,int assumedAlignment)45 static constexpr int PlatformMinimumSizeWithOverhead(int requestedSize, int assumedAlignment) { 46 return MinimumSizeWithOverhead( 47 requestedSize, assumedAlignment, sizeof(Block), kMaxAlignment); 48 } 49 MinimumSizeWithOverhead(int requestedSize,int assumedAlignment,int blockSize,int maxAlignment)50 static constexpr int MinimumSizeWithOverhead( 51 int requestedSize, int assumedAlignment, int blockSize, int maxAlignment) { 52 SkASSERT_RELEASE(0 <= requestedSize && requestedSize < kMaxByteSize); 53 SkASSERT_RELEASE(SkIsPow2(assumedAlignment) && SkIsPow2(maxAlignment)); 54 55 const int minAlignment = std::min(maxAlignment, assumedAlignment); 56 // There are two cases, one easy and one subtle. The easy case is when minAlignment == 57 // maxAlignment. When that happens, the term maxAlignment - minAlignment is zero, and the 58 // block will be placed at the proper alignment because alignUp is properly 59 // aligned. 60 // The subtle case is where minAlignment < maxAlignment. Because 61 // minAlignment < maxAlignment, alignUp(requestedSize, minAlignment) + blockSize does not 62 // guarantee that block can be placed at a maxAlignment address. Block can be placed at 63 // maxAlignment/minAlignment different address to achieve alignment, so we need 64 // to add memory to allow the block to be placed on a maxAlignment address. 65 // For example, if assumedAlignment = 4 and maxAlignment = 16 then block can be placed at 66 // the following address offsets at the end of minimumSize bytes. 67 // 0 * minAlignment = 0 68 // 1 * minAlignment = 4 69 // 2 * minAlignment = 8 70 // 3 * minAlignment = 12 71 // Following this logic, the equation for the additional bytes is 72 // (maxAlignment/minAlignment - 1) * minAlignment 73 // = maxAlignment - minAlignment. 74 int minimumSize = SkToInt(AlignUp(requestedSize, minAlignment)) 75 + blockSize 76 + maxAlignment - minAlignment; 77 78 // If minimumSize is > 32k then round to a 4K boundary unless it is too close to the 79 // maximum int. The > 32K heuristic is from the JEMalloc behavior. 80 constexpr int k32K = (1 << 15); 81 if (minimumSize >= k32K && minimumSize < std::numeric_limits<int>::max() - k4K) { 82 minimumSize = SkToInt(AlignUp(minimumSize, k4K)); 83 } 84 85 return minimumSize; 86 } 87 88 template <int size> 89 using Storage = std::array<char, PlatformMinimumSizeWithOverhead(size, 1)>; 90 91 // Returns true if n * sizeof(T) will fit in an allocation block. 92 template <typename T> WillCountFit(int n)93 static bool WillCountFit(int n) { 94 constexpr int kMaxN = kMaxByteSize / sizeof(T); 95 return 0 <= n && n < kMaxN; 96 } 97 98 // Returns a pointer to memory suitable for holding n Ts. 99 template <typename T> char* allocateBytesFor(int n = 1) { 100 static_assert(alignof(T) <= kMaxAlignment, "Alignment is too big for arena"); 101 static_assert(sizeof(T) < kMaxByteSize, "Size is too big for arena"); 102 SkASSERT_RELEASE(WillCountFit<T>(n)); 103 104 int size = n ? n * sizeof(T) : 1; 105 return this->allocateBytes(size, alignof(T)); 106 } 107 108 void* alignedBytes(int unsafeSize, int unsafeAlignment); 109 110 private: 111 // The maximum alignment supported by GrBagOfBytes. 16 seems to be a good number for alignment. 112 // If a use case for larger alignments is found, we can turn this into a template parameter. 113 inline static constexpr int kMaxAlignment = std::max(16, (int)alignof(std::max_align_t)); 114 // The largest size that can be allocated. In larger sizes, the block is rounded up to 4K 115 // chunks. Leave a 4K of slop. 116 inline static constexpr int k4K = (1 << 12); 117 // This should never overflow with the calculations done on the code. 118 inline static constexpr int kMaxByteSize = std::numeric_limits<int>::max() - k4K; 119 // The assumed alignment of new char[] given the platform. 120 // There is a bug in Emscripten's allocator that make alignment different than max_align_t. 121 // kAllocationAlignment accounts for this difference. For more information see: 122 // https://github.com/emscripten-core/emscripten/issues/10072 123 #if !defined(SK_FORCE_8_BYTE_ALIGNMENT) 124 inline static constexpr int kAllocationAlignment = alignof(std::max_align_t); 125 #else 126 inline static constexpr int kAllocationAlignment = 8; 127 #endif 128 AlignUp(int size,int alignment)129 static constexpr size_t AlignUp(int size, int alignment) { 130 return (size + (alignment - 1)) & -alignment; 131 } 132 133 // The Block starts at the location pointed to by fEndByte. 134 // Beware. Order is important here. The destructor for fPrevious must be called first because 135 // the Block is embedded in fBlockStart. Destructors are run in reverse order. 136 struct Block { 137 Block(char* previous, char* startOfBlock); 138 // The start of the originally allocated bytes. This is the thing that must be deleted. 139 char* const fBlockStart; 140 Block* const fPrevious; 141 }; 142 143 // Note: fCapacity is the number of bytes remaining, and is subtracted from fEndByte to 144 // generate the location of the object. allocateBytes(int size,int alignment)145 char* allocateBytes(int size, int alignment) { 146 fCapacity = fCapacity & -alignment; 147 if (fCapacity < size) { 148 this->needMoreBytes(size, alignment); 149 } 150 char* const ptr = fEndByte - fCapacity; 151 SkASSERT(((intptr_t)ptr & (alignment - 1)) == 0); 152 SkASSERT(fCapacity >= size); 153 fCapacity -= size; 154 return ptr; 155 } 156 157 // Adjust fEndByte and fCapacity give a new block starting at bytes with size. 158 void setupBytesAndCapacity(char* bytes, int size); 159 160 // Adjust fEndByte and fCapacity to satisfy the size and alignment request. 161 void needMoreBytes(int size, int alignment); 162 163 // This points to the highest kMaxAlignment address in the allocated block. The address of 164 // the current end of allocated data is given by fEndByte - fCapacity. While the negative side 165 // of this pointer are the bytes to be allocated. The positive side points to the Block for 166 // this memory. In other words, the pointer to the Block structure for these bytes is 167 // reinterpret_cast<Block*>(fEndByte). 168 char* fEndByte{nullptr}; 169 170 // The number of bytes remaining in this block. 171 int fCapacity{0}; 172 173 SkFibBlockSizes<kMaxByteSize> fFibProgression; 174 }; 175 176 template <typename T> 177 class SubRunInitializer { 178 public: SubRunInitializer(void * memory)179 SubRunInitializer(void* memory) : fMemory{memory} { SkASSERT(memory != nullptr); } ~SubRunInitializer()180 ~SubRunInitializer() { 181 ::operator delete(fMemory); 182 } 183 template <typename... Args> initialize(Args &&...args)184 T* initialize(Args&&... args) { 185 // Warn on more than one initialization. 186 SkASSERT(fMemory != nullptr); 187 return new (std::exchange(fMemory, nullptr)) T(std::forward<Args>(args)...); 188 } 189 190 private: 191 void* fMemory; 192 }; 193 194 // GrSubRunAllocator provides fast allocation where the user takes care of calling the destructors 195 // of the returned pointers, and GrSubRunAllocator takes care of deleting the storage. The 196 // unique_ptrs returned, are to assist in assuring the object's destructor is called. 197 // A note on zero length arrays: according to the standard a pointer must be returned, and it 198 // can't be a nullptr. In such a case, SkArena allocates one byte, but does not initialize it. 199 class SubRunAllocator { 200 public: 201 struct Destroyer { 202 template <typename T> operatorDestroyer203 void operator()(T* ptr) { ptr->~T(); } 204 }; 205 206 struct ArrayDestroyer { 207 int n; 208 template <typename T> operatorArrayDestroyer209 void operator()(T* ptr) { 210 for (int i = 0; i < n; i++) { ptr[i].~T(); } 211 } 212 }; 213 214 template<class T> 215 inline static constexpr bool HasNoDestructor = std::is_trivially_destructible<T>::value; 216 217 SubRunAllocator(char* block, int blockSize, int firstHeapAllocation); 218 explicit SubRunAllocator(int firstHeapAllocation = 0); 219 SubRunAllocator(const SubRunAllocator&) = delete; 220 SubRunAllocator& operator=(const SubRunAllocator&) = delete; 221 SubRunAllocator(SubRunAllocator&&) = default; 222 SubRunAllocator& operator=(SubRunAllocator&&) = default; 223 224 template <typename T> 225 static std::tuple<SubRunInitializer<T>, int, SubRunAllocator> AllocateClassMemoryAndArena(int allocSizeHint)226 AllocateClassMemoryAndArena(int allocSizeHint) { 227 SkASSERT_RELEASE(allocSizeHint >= 0); 228 // Round the size after the object the optimal amount. 229 int extraSize = BagOfBytes::PlatformMinimumSizeWithOverhead(allocSizeHint, alignof(T)); 230 231 // Don't overflow or die. 232 SkASSERT_RELEASE(INT_MAX - SkTo<int>(sizeof(T)) > extraSize); 233 int totalMemorySize = sizeof(T) + extraSize; 234 235 void* memory = ::operator new (totalMemorySize); 236 SubRunAllocator alloc{SkTAddOffset<char>(memory, sizeof(T)), extraSize, extraSize/2}; 237 return {memory, totalMemorySize, std::move(alloc)}; 238 } 239 makePOD(Args &&...args)240 template <typename T, typename... Args> T* makePOD(Args&&... args) { 241 static_assert(HasNoDestructor<T>, "This is not POD. Use makeUnique."); 242 char* bytes = fAlloc.template allocateBytesFor<T>(); 243 return new (bytes) T(std::forward<Args>(args)...); 244 } 245 246 template <typename T, typename... Args> makeUnique(Args &&...args)247 std::unique_ptr<T, Destroyer> makeUnique(Args&&... args) { 248 static_assert(!HasNoDestructor<T>, "This is POD. Use makePOD."); 249 char* bytes = fAlloc.template allocateBytesFor<T>(); 250 return std::unique_ptr<T, Destroyer>{new (bytes) T(std::forward<Args>(args)...)}; 251 } 252 makePODArray(int n)253 template<typename T> T* makePODArray(int n) { 254 static_assert(HasNoDestructor<T>, "This is not POD. Use makeUniqueArray."); 255 return reinterpret_cast<T*>(fAlloc.template allocateBytesFor<T>(n)); 256 } 257 258 template<typename T> makePODSpan(SkSpan<const T> s)259 SkSpan<T> makePODSpan(SkSpan<const T> s) { 260 static_assert(HasNoDestructor<T>, "This is not POD. Use makeUniqueArray."); 261 if (s.empty()) { 262 return SkSpan<T>{}; 263 } 264 265 T* result = this->makePODArray<T>(SkTo<int>(s.size())); 266 memcpy(result, s.data(), s.size_bytes()); 267 return {result, s.size()}; 268 } 269 270 template<typename T, typename Src, typename Map> makePODArray(const Src & src,Map map)271 SkSpan<T> makePODArray(const Src& src, Map map) { 272 static_assert(HasNoDestructor<T>, "This is not POD. Use makeUniqueArray."); 273 int size = SkTo<int>(src.size()); 274 T* result = this->template makePODArray<T>(size); 275 for (int i = 0; i < size; i++) { 276 new (&result[i]) T(map(src[i])); 277 } 278 return {result, src.size()}; 279 } 280 281 template<typename T> makeUniqueArray(int n)282 std::unique_ptr<T[], ArrayDestroyer> makeUniqueArray(int n) { 283 static_assert(!HasNoDestructor<T>, "This is POD. Use makePODArray."); 284 T* array = reinterpret_cast<T*>(fAlloc.template allocateBytesFor<T>(n)); 285 for (int i = 0; i < n; i++) { 286 new (&array[i]) T{}; 287 } 288 return std::unique_ptr<T[], ArrayDestroyer>{array, ArrayDestroyer{n}}; 289 } 290 291 template<typename T, typename I> makeUniqueArray(int n,I initializer)292 std::unique_ptr<T[], ArrayDestroyer> makeUniqueArray(int n, I initializer) { 293 static_assert(!HasNoDestructor<T>, "This is POD. Use makePODArray."); 294 T* array = reinterpret_cast<T*>(fAlloc.template allocateBytesFor<T>(n)); 295 for (int i = 0; i < n; i++) { 296 new (&array[i]) T(initializer(i)); 297 } 298 return std::unique_ptr<T[], ArrayDestroyer>{array, ArrayDestroyer{n}}; 299 } 300 301 void* alignedBytes(int size, int alignment); 302 303 private: 304 BagOfBytes fAlloc; 305 }; 306 307 // Helper for defining allocators with inline/reserved storage. 308 // For argument declarations, stick to the base type (SubRunAllocator). 309 // Note: Inheriting from the storage first means the storage will outlive the 310 // SubRunAllocator, letting ~SubRunAllocator read it as it calls destructors. 311 // (This is mostly only relevant for strict tools like MSAN.) 312 template <size_t InlineStorageSize, size_t InlineStorageAlignment> 313 class STSubRunAllocator : private std::array<char, 314 BagOfBytes::PlatformMinimumSizeWithOverhead( 315 InlineStorageSize, InlineStorageAlignment)>, 316 public SubRunAllocator { 317 public: 318 explicit STSubRunAllocator(size_t firstHeapAllocation = InlineStorageSize) 319 : SubRunAllocator{this->data(), SkToInt(this->size()), SkToInt(firstHeapAllocation)} {} 320 }; 321 } // namespace sktext::gpu 322 323 #endif // sktext_gpu_SubRunAllocator_DEFINED 324