• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef sktext_gpu_SubRunAllocator_DEFINED
9 #define sktext_gpu_SubRunAllocator_DEFINED
10 
11 #include "include/core/SkSpan.h"
12 #include "include/private/base/SkMath.h"
13 #include "include/private/base/SkTemplates.h"
14 #include "src/base/SkArenaAlloc.h"
15 
16 #include <algorithm>
17 #include <climits>
18 #include <memory>
19 #include <tuple>
20 #include <utility>
21 
22 namespace sktext::gpu {
23 
24 // BagOfBytes parcels out bytes with a given size and alignment.
25 class BagOfBytes {
26 public:
27     BagOfBytes(char* block, size_t blockSize, size_t firstHeapAllocation);
28     explicit BagOfBytes(size_t firstHeapAllocation = 0);
29     BagOfBytes(const BagOfBytes&) = delete;
30     BagOfBytes& operator=(const BagOfBytes&) = delete;
BagOfBytes(BagOfBytes && that)31     BagOfBytes(BagOfBytes&& that)
32             : fEndByte{std::exchange(that.fEndByte, nullptr)}
33             , fCapacity{that.fCapacity}
34             , fFibProgression{that.fFibProgression} {}
35     BagOfBytes& operator=(BagOfBytes&& that) {
36         this->~BagOfBytes();
new(this)37         new (this) BagOfBytes{std::move(that)};
38         return *this;
39     }
40 
41     ~BagOfBytes();
42 
43     // Given a requestedSize round up to the smallest size that accounts for all the per block
44     // overhead and alignment. It crashes if requestedSize is negative or too big.
PlatformMinimumSizeWithOverhead(int requestedSize,int assumedAlignment)45     static constexpr int PlatformMinimumSizeWithOverhead(int requestedSize, int assumedAlignment) {
46         return MinimumSizeWithOverhead(
47                 requestedSize, assumedAlignment, sizeof(Block), kMaxAlignment);
48     }
49 
MinimumSizeWithOverhead(int requestedSize,int assumedAlignment,int blockSize,int maxAlignment)50     static constexpr int MinimumSizeWithOverhead(
51             int requestedSize, int assumedAlignment, int blockSize, int maxAlignment) {
52         SkASSERT_RELEASE(0 <= requestedSize && requestedSize < kMaxByteSize);
53         SkASSERT_RELEASE(SkIsPow2(assumedAlignment) && SkIsPow2(maxAlignment));
54 
55         const int minAlignment = std::min(maxAlignment, assumedAlignment);
56         // There are two cases, one easy and one subtle. The easy case is when minAlignment ==
57         // maxAlignment. When that happens, the term maxAlignment - minAlignment is zero, and the
58         // block will be placed at the proper alignment because alignUp is properly
59         // aligned.
60         // The subtle case is where minAlignment < maxAlignment. Because
61         // minAlignment < maxAlignment, alignUp(requestedSize, minAlignment) + blockSize does not
62         // guarantee that block can be placed at a maxAlignment address. Block can be placed at
63         // maxAlignment/minAlignment different address to achieve alignment, so we need
64         // to add memory to allow the block to be placed on a maxAlignment address.
65         // For example, if assumedAlignment = 4 and maxAlignment = 16 then block can be placed at
66         // the following address offsets at the end of minimumSize bytes.
67         //   0 * minAlignment =  0
68         //   1 * minAlignment =  4
69         //   2 * minAlignment =  8
70         //   3 * minAlignment = 12
71         // Following this logic, the equation for the additional bytes is
72         //   (maxAlignment/minAlignment - 1) * minAlignment
73         //     = maxAlignment - minAlignment.
74         int minimumSize = SkToInt(AlignUp(requestedSize, minAlignment))
75                           + blockSize
76                           + maxAlignment - minAlignment;
77 
78         // If minimumSize is > 32k then round to a 4K boundary unless it is too close to the
79         // maximum int. The > 32K heuristic is from the JEMalloc behavior.
80         constexpr int k32K = (1 << 15);
81         if (minimumSize >= k32K && minimumSize < std::numeric_limits<int>::max() - k4K) {
82             minimumSize = SkToInt(AlignUp(minimumSize, k4K));
83         }
84 
85         return minimumSize;
86     }
87 
88     template <int size>
89     using Storage = std::array<char, PlatformMinimumSizeWithOverhead(size, 1)>;
90 
91     // Returns true if n * sizeof(T) will fit in an allocation block.
92     template <typename T>
WillCountFit(int n)93     static bool WillCountFit(int n) {
94         constexpr int kMaxN = kMaxByteSize / sizeof(T);
95         return 0 <= n && n < kMaxN;
96     }
97 
98     // Returns a pointer to memory suitable for holding n Ts.
99     template <typename T> char* allocateBytesFor(int n = 1) {
100         static_assert(alignof(T) <= kMaxAlignment, "Alignment is too big for arena");
101         static_assert(sizeof(T) < kMaxByteSize, "Size is too big for arena");
102         SkASSERT_RELEASE(WillCountFit<T>(n));
103 
104         int size = n ? n * sizeof(T) : 1;
105         return this->allocateBytes(size, alignof(T));
106     }
107 
108     void* alignedBytes(int unsafeSize, int unsafeAlignment);
109 
110 private:
111     // The maximum alignment supported by GrBagOfBytes. 16 seems to be a good number for alignment.
112     // If a use case for larger alignments is found, we can turn this into a template parameter.
113     inline static constexpr int kMaxAlignment = std::max(16, (int)alignof(std::max_align_t));
114     // The largest size that can be allocated. In larger sizes, the block is rounded up to 4K
115     // chunks. Leave a 4K of slop.
116     inline static constexpr int k4K = (1 << 12);
117     // This should never overflow with the calculations done on the code.
118     inline static constexpr int kMaxByteSize = std::numeric_limits<int>::max() - k4K;
119     // The assumed alignment of new char[] given the platform.
120     // There is a bug in Emscripten's allocator that make alignment different than max_align_t.
121     // kAllocationAlignment accounts for this difference. For more information see:
122     // https://github.com/emscripten-core/emscripten/issues/10072
123     #if !defined(SK_FORCE_8_BYTE_ALIGNMENT)
124         inline static constexpr int kAllocationAlignment = alignof(std::max_align_t);
125     #else
126         inline static constexpr int kAllocationAlignment = 8;
127     #endif
128 
AlignUp(int size,int alignment)129     static constexpr size_t AlignUp(int size, int alignment) {
130         return (size + (alignment - 1)) & -alignment;
131     }
132 
133     // The Block starts at the location pointed to by fEndByte.
134     // Beware. Order is important here. The destructor for fPrevious must be called first because
135     // the Block is embedded in fBlockStart. Destructors are run in reverse order.
136     struct Block {
137         Block(char* previous, char* startOfBlock);
138         // The start of the originally allocated bytes. This is the thing that must be deleted.
139         char* const fBlockStart;
140         Block* const fPrevious;
141     };
142 
143     // Note: fCapacity is the number of bytes remaining, and is subtracted from fEndByte to
144     // generate the location of the object.
allocateBytes(int size,int alignment)145     char* allocateBytes(int size, int alignment) {
146         fCapacity = fCapacity & -alignment;
147         if (fCapacity < size) {
148             this->needMoreBytes(size, alignment);
149         }
150         char* const ptr = fEndByte - fCapacity;
151         SkASSERT(((intptr_t)ptr & (alignment - 1)) == 0);
152         SkASSERT(fCapacity >= size);
153         fCapacity -= size;
154         return ptr;
155     }
156 
157     // Adjust fEndByte and fCapacity give a new block starting at bytes with size.
158     void setupBytesAndCapacity(char* bytes, int size);
159 
160     // Adjust fEndByte and fCapacity to satisfy the size and alignment request.
161     void needMoreBytes(int size, int alignment);
162 
163     // This points to the highest kMaxAlignment address in the allocated block. The address of
164     // the current end of allocated data is given by fEndByte - fCapacity. While the negative side
165     // of this pointer are the bytes to be allocated. The positive side points to the Block for
166     // this memory. In other words, the pointer to the Block structure for these bytes is
167     // reinterpret_cast<Block*>(fEndByte).
168     char* fEndByte{nullptr};
169 
170     // The number of bytes remaining in this block.
171     int fCapacity{0};
172 
173     SkFibBlockSizes<kMaxByteSize> fFibProgression;
174 };
175 
176 template <typename T>
177 class SubRunInitializer {
178 public:
SubRunInitializer(void * memory)179     SubRunInitializer(void* memory) : fMemory{memory} { SkASSERT(memory != nullptr); }
~SubRunInitializer()180     ~SubRunInitializer() {
181         ::operator delete(fMemory);
182     }
183     template <typename... Args>
initialize(Args &&...args)184     T* initialize(Args&&... args) {
185         // Warn on more than one initialization.
186         SkASSERT(fMemory != nullptr);
187         return new (std::exchange(fMemory, nullptr)) T(std::forward<Args>(args)...);
188     }
189 
190 private:
191     void* fMemory;
192 };
193 
194 // GrSubRunAllocator provides fast allocation where the user takes care of calling the destructors
195 // of the returned pointers, and GrSubRunAllocator takes care of deleting the storage. The
196 // unique_ptrs returned, are to assist in assuring the object's destructor is called.
197 // A note on zero length arrays: according to the standard a pointer must be returned, and it
198 // can't be a nullptr. In such a case, SkArena allocates one byte, but does not initialize it.
199 class SubRunAllocator {
200 public:
201     struct Destroyer {
202         template <typename T>
operatorDestroyer203         void operator()(T* ptr) { ptr->~T(); }
204     };
205 
206     struct ArrayDestroyer {
207         int n;
208         template <typename T>
operatorArrayDestroyer209         void operator()(T* ptr) {
210             for (int i = 0; i < n; i++) { ptr[i].~T(); }
211         }
212     };
213 
214     template<class T>
215     inline static constexpr bool HasNoDestructor = std::is_trivially_destructible<T>::value;
216 
217     SubRunAllocator(char* block, int blockSize, int firstHeapAllocation);
218     explicit SubRunAllocator(int firstHeapAllocation = 0);
219     SubRunAllocator(const SubRunAllocator&) = delete;
220     SubRunAllocator& operator=(const SubRunAllocator&) = delete;
221     SubRunAllocator(SubRunAllocator&&) = default;
222     SubRunAllocator& operator=(SubRunAllocator&&) = default;
223 
224     template <typename T>
225     static std::tuple<SubRunInitializer<T>, int, SubRunAllocator>
AllocateClassMemoryAndArena(int allocSizeHint)226     AllocateClassMemoryAndArena(int allocSizeHint) {
227         SkASSERT_RELEASE(allocSizeHint >= 0);
228         // Round the size after the object the optimal amount.
229         int extraSize = BagOfBytes::PlatformMinimumSizeWithOverhead(allocSizeHint, alignof(T));
230 
231         // Don't overflow or die.
232         SkASSERT_RELEASE(INT_MAX - SkTo<int>(sizeof(T)) > extraSize);
233         int totalMemorySize = sizeof(T) + extraSize;
234 
235         void* memory = ::operator new (totalMemorySize);
236         SubRunAllocator alloc{SkTAddOffset<char>(memory, sizeof(T)), extraSize, extraSize/2};
237         return {memory, totalMemorySize, std::move(alloc)};
238     }
239 
makePOD(Args &&...args)240     template <typename T, typename... Args> T* makePOD(Args&&... args) {
241         static_assert(HasNoDestructor<T>, "This is not POD. Use makeUnique.");
242         char* bytes = fAlloc.template allocateBytesFor<T>();
243         return new (bytes) T(std::forward<Args>(args)...);
244     }
245 
246     template <typename T, typename... Args>
makeUnique(Args &&...args)247     std::unique_ptr<T, Destroyer> makeUnique(Args&&... args) {
248         static_assert(!HasNoDestructor<T>, "This is POD. Use makePOD.");
249         char* bytes = fAlloc.template allocateBytesFor<T>();
250         return std::unique_ptr<T, Destroyer>{new (bytes) T(std::forward<Args>(args)...)};
251     }
252 
makePODArray(int n)253     template<typename T> T* makePODArray(int n) {
254         static_assert(HasNoDestructor<T>, "This is not POD. Use makeUniqueArray.");
255         return reinterpret_cast<T*>(fAlloc.template allocateBytesFor<T>(n));
256     }
257 
258     template<typename T>
makePODSpan(SkSpan<const T> s)259     SkSpan<T> makePODSpan(SkSpan<const T> s) {
260         static_assert(HasNoDestructor<T>, "This is not POD. Use makeUniqueArray.");
261         if (s.empty()) {
262             return SkSpan<T>{};
263         }
264 
265         T* result = this->makePODArray<T>(SkTo<int>(s.size()));
266         memcpy(result, s.data(), s.size_bytes());
267         return {result, s.size()};
268     }
269 
270     template<typename T, typename Src, typename Map>
makePODArray(const Src & src,Map map)271     SkSpan<T> makePODArray(const Src& src, Map map) {
272         static_assert(HasNoDestructor<T>, "This is not POD. Use makeUniqueArray.");
273         int size = SkTo<int>(src.size());
274         T* result = this->template makePODArray<T>(size);
275         for (int i = 0; i < size; i++) {
276             new (&result[i]) T(map(src[i]));
277         }
278         return {result, src.size()};
279     }
280 
281     template<typename T>
makeUniqueArray(int n)282     std::unique_ptr<T[], ArrayDestroyer> makeUniqueArray(int n) {
283         static_assert(!HasNoDestructor<T>, "This is POD. Use makePODArray.");
284         T* array = reinterpret_cast<T*>(fAlloc.template allocateBytesFor<T>(n));
285         for (int i = 0; i < n; i++) {
286             new (&array[i]) T{};
287         }
288         return std::unique_ptr<T[], ArrayDestroyer>{array, ArrayDestroyer{n}};
289     }
290 
291     template<typename T, typename I>
makeUniqueArray(int n,I initializer)292     std::unique_ptr<T[], ArrayDestroyer> makeUniqueArray(int n, I initializer) {
293         static_assert(!HasNoDestructor<T>, "This is POD. Use makePODArray.");
294         T* array = reinterpret_cast<T*>(fAlloc.template allocateBytesFor<T>(n));
295         for (int i = 0; i < n; i++) {
296             new (&array[i]) T(initializer(i));
297         }
298         return std::unique_ptr<T[], ArrayDestroyer>{array, ArrayDestroyer{n}};
299     }
300 
301     void* alignedBytes(int size, int alignment);
302 
303 private:
304     BagOfBytes fAlloc;
305 };
306 
307 // Helper for defining allocators with inline/reserved storage.
308 // For argument declarations, stick to the base type (SubRunAllocator).
309 // Note: Inheriting from the storage first means the storage will outlive the
310 // SubRunAllocator, letting ~SubRunAllocator read it as it calls destructors.
311 // (This is mostly only relevant for strict tools like MSAN.)
312 template <size_t InlineStorageSize, size_t InlineStorageAlignment>
313 class STSubRunAllocator : private std::array<char,
314                                              BagOfBytes::PlatformMinimumSizeWithOverhead(
315                                                      InlineStorageSize, InlineStorageAlignment)>,
316                           public SubRunAllocator {
317 public:
318     explicit STSubRunAllocator(size_t firstHeapAllocation = InlineStorageSize)
319             : SubRunAllocator{this->data(), SkToInt(this->size()), SkToInt(firstHeapAllocation)} {}
320 };
321 }  // namespace sktext::gpu
322 
323 #endif // sktext_gpu_SubRunAllocator_DEFINED
324