1 // Copyright 2021 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef BASE_ALLOCATOR_PARTITION_ALLOCATOR_PARTITION_BUCKET_LOOKUP_H_
6 #define BASE_ALLOCATOR_PARTITION_ALLOCATOR_PARTITION_BUCKET_LOOKUP_H_
7
8 #include <cstdint>
9
10 #include "base/allocator/partition_allocator/partition_alloc_base/bits.h"
11 #include "base/allocator/partition_allocator/partition_alloc_base/compiler_specific.h"
12 #include "base/allocator/partition_allocator/partition_alloc_buildflags.h"
13 #include "base/allocator/partition_allocator/partition_alloc_check.h"
14 #include "base/allocator/partition_allocator/partition_alloc_constants.h"
15
16 namespace partition_alloc::internal {
17
18 // Don't use an anonymous namespace for the constants because it can inhibit
19 // collapsing them together, even when they are tagged as inline.
20
21 // Precalculate some shift and mask constants used in the hot path.
22 // Example: malloc(41) == 101001 binary.
23 // Order is 6 (1 << 6-1) == 32 is highest bit set.
24 // order_index is the next three MSB == 010 == 2.
25 // sub_order_index_mask is a mask for the remaining bits == 11 (masking to 01
26 // for the sub_order_index).
OrderIndexShift(uint8_t order)27 constexpr uint8_t OrderIndexShift(uint8_t order) {
28 if (order < kNumBucketsPerOrderBits + 1) {
29 return 0;
30 }
31
32 return order - (kNumBucketsPerOrderBits + 1);
33 }
34
OrderSubIndexMask(uint8_t order)35 constexpr size_t OrderSubIndexMask(uint8_t order) {
36 if (order == kBitsPerSizeT) {
37 return static_cast<size_t>(-1) >> (kNumBucketsPerOrderBits + 1);
38 }
39
40 return ((static_cast<size_t>(1) << order) - 1) >>
41 (kNumBucketsPerOrderBits + 1);
42 }
43
44 #if BUILDFLAG(HAS_64_BIT_POINTERS)
45 #define PA_BITS_PER_SIZE_T 64
46 static_assert(kBitsPerSizeT == 64, "");
47 #else
48 #define PA_BITS_PER_SIZE_T 32
49 static_assert(kBitsPerSizeT == 32, "");
50 #endif // BUILDFLAG(HAS_64_BIT_POINTERS)
51
52 inline constexpr uint8_t kOrderIndexShift[PA_BITS_PER_SIZE_T + 1] = {
53 OrderIndexShift(0), OrderIndexShift(1), OrderIndexShift(2),
54 OrderIndexShift(3), OrderIndexShift(4), OrderIndexShift(5),
55 OrderIndexShift(6), OrderIndexShift(7), OrderIndexShift(8),
56 OrderIndexShift(9), OrderIndexShift(10), OrderIndexShift(11),
57 OrderIndexShift(12), OrderIndexShift(13), OrderIndexShift(14),
58 OrderIndexShift(15), OrderIndexShift(16), OrderIndexShift(17),
59 OrderIndexShift(18), OrderIndexShift(19), OrderIndexShift(20),
60 OrderIndexShift(21), OrderIndexShift(22), OrderIndexShift(23),
61 OrderIndexShift(24), OrderIndexShift(25), OrderIndexShift(26),
62 OrderIndexShift(27), OrderIndexShift(28), OrderIndexShift(29),
63 OrderIndexShift(30), OrderIndexShift(31), OrderIndexShift(32),
64 #if PA_BITS_PER_SIZE_T == 64
65 OrderIndexShift(33), OrderIndexShift(34), OrderIndexShift(35),
66 OrderIndexShift(36), OrderIndexShift(37), OrderIndexShift(38),
67 OrderIndexShift(39), OrderIndexShift(40), OrderIndexShift(41),
68 OrderIndexShift(42), OrderIndexShift(43), OrderIndexShift(44),
69 OrderIndexShift(45), OrderIndexShift(46), OrderIndexShift(47),
70 OrderIndexShift(48), OrderIndexShift(49), OrderIndexShift(50),
71 OrderIndexShift(51), OrderIndexShift(52), OrderIndexShift(53),
72 OrderIndexShift(54), OrderIndexShift(55), OrderIndexShift(56),
73 OrderIndexShift(57), OrderIndexShift(58), OrderIndexShift(59),
74 OrderIndexShift(60), OrderIndexShift(61), OrderIndexShift(62),
75 OrderIndexShift(63), OrderIndexShift(64)
76 #endif
77 };
78
79 inline constexpr size_t kOrderSubIndexMask[PA_BITS_PER_SIZE_T + 1] = {
80 OrderSubIndexMask(0), OrderSubIndexMask(1), OrderSubIndexMask(2),
81 OrderSubIndexMask(3), OrderSubIndexMask(4), OrderSubIndexMask(5),
82 OrderSubIndexMask(6), OrderSubIndexMask(7), OrderSubIndexMask(8),
83 OrderSubIndexMask(9), OrderSubIndexMask(10), OrderSubIndexMask(11),
84 OrderSubIndexMask(12), OrderSubIndexMask(13), OrderSubIndexMask(14),
85 OrderSubIndexMask(15), OrderSubIndexMask(16), OrderSubIndexMask(17),
86 OrderSubIndexMask(18), OrderSubIndexMask(19), OrderSubIndexMask(20),
87 OrderSubIndexMask(21), OrderSubIndexMask(22), OrderSubIndexMask(23),
88 OrderSubIndexMask(24), OrderSubIndexMask(25), OrderSubIndexMask(26),
89 OrderSubIndexMask(27), OrderSubIndexMask(28), OrderSubIndexMask(29),
90 OrderSubIndexMask(30), OrderSubIndexMask(31), OrderSubIndexMask(32),
91 #if PA_BITS_PER_SIZE_T == 64
92 OrderSubIndexMask(33), OrderSubIndexMask(34), OrderSubIndexMask(35),
93 OrderSubIndexMask(36), OrderSubIndexMask(37), OrderSubIndexMask(38),
94 OrderSubIndexMask(39), OrderSubIndexMask(40), OrderSubIndexMask(41),
95 OrderSubIndexMask(42), OrderSubIndexMask(43), OrderSubIndexMask(44),
96 OrderSubIndexMask(45), OrderSubIndexMask(46), OrderSubIndexMask(47),
97 OrderSubIndexMask(48), OrderSubIndexMask(49), OrderSubIndexMask(50),
98 OrderSubIndexMask(51), OrderSubIndexMask(52), OrderSubIndexMask(53),
99 OrderSubIndexMask(54), OrderSubIndexMask(55), OrderSubIndexMask(56),
100 OrderSubIndexMask(57), OrderSubIndexMask(58), OrderSubIndexMask(59),
101 OrderSubIndexMask(60), OrderSubIndexMask(61), OrderSubIndexMask(62),
102 OrderSubIndexMask(63), OrderSubIndexMask(64)
103 #endif
104 };
105
106 // The class used to generate the bucket lookup table at compile-time.
107 class BucketIndexLookup final {
108 public:
109 PA_ALWAYS_INLINE static constexpr uint16_t GetIndexForDefaultBuckets(
110 size_t size);
111 PA_ALWAYS_INLINE static constexpr uint16_t GetIndexForDenserBuckets(
112 size_t size);
113 PA_ALWAYS_INLINE static constexpr uint16_t GetIndex(size_t size);
114
BucketIndexLookup()115 constexpr BucketIndexLookup() {
116 constexpr uint16_t sentinel_bucket_index = kNumBuckets;
117
118 InitBucketSizes();
119
120 uint16_t* bucket_index_ptr = &bucket_index_lookup_[0];
121 uint16_t bucket_index = 0;
122
123 // Very small allocations, smaller than the first bucketed order ->
124 // everything goes to the first bucket.
125 for (uint8_t order = 0; order < kMinBucketedOrder; ++order) {
126 for (uint16_t j = 0; j < kNumBucketsPerOrder; ++j) {
127 *bucket_index_ptr++ = 0;
128 }
129 }
130
131 // Normal buckets.
132 for (uint8_t order = kMinBucketedOrder; order <= kMaxBucketedOrder;
133 ++order) {
134 size_t size = static_cast<size_t>(1) << (order - 1);
135 size_t current_increment = size >> kNumBucketsPerOrderBits;
136 for (uint16_t j = 0; j < kNumBucketsPerOrder; ++j) {
137 *bucket_index_ptr++ = bucket_index;
138
139 // For small sizes, buckets are close together (current_increment is
140 // small). For instance, for:
141 // - kAlignment == 16 (which is the case on most 64 bit systems)
142 // - kNumBucketsPerOrder == 4
143 //
144 // The 3 next buckets after 16 are {20, 24, 28}. None of these are a
145 // multiple of kAlignment, so they use the next bucket, that is 32 here.
146 if (size % kAlignment != 0) {
147 PA_DCHECK(bucket_sizes_[bucket_index] > size);
148 // Do not increment bucket_index, since in the example above
149 // current_size may be 20, and bucket_sizes_[bucket_index] == 32.
150 } else {
151 PA_DCHECK(bucket_sizes_[bucket_index] == size);
152 bucket_index++;
153 }
154
155 size += current_increment;
156 }
157 }
158
159 // Direct-mapped, and overflow.
160 for (uint8_t order = kMaxBucketedOrder + 1; order <= kBitsPerSizeT;
161 ++order) {
162 for (uint16_t j = 0; j < kNumBucketsPerOrder; ++j) {
163 *bucket_index_ptr++ = sentinel_bucket_index;
164 }
165 }
166
167 // Smaller because some buckets are not valid due to alignment constraints.
168 PA_DCHECK(bucket_index < kNumBuckets);
169 PA_DCHECK(bucket_index_ptr == bucket_index_lookup_ + ((kBitsPerSizeT + 1) *
170 kNumBucketsPerOrder));
171 // And there's one last bucket lookup that will be hit for e.g. malloc(-1),
172 // which tries to overflow to a non-existent order.
173 *bucket_index_ptr = sentinel_bucket_index;
174 }
bucket_sizes()175 constexpr const size_t* bucket_sizes() const { return &bucket_sizes_[0]; }
176
177 private:
InitBucketSizes()178 constexpr void InitBucketSizes() {
179 size_t current_size = kSmallestBucket;
180 size_t current_increment = kSmallestBucket >> kNumBucketsPerOrderBits;
181 size_t* bucket_size = &bucket_sizes_[0];
182 for (size_t i = 0; i < kNumBucketedOrders; ++i) {
183 for (size_t j = 0; j < kNumBucketsPerOrder; ++j) {
184 // All bucket sizes have to be multiples of kAlignment, skip otherwise.
185 if (current_size % kAlignment == 0) {
186 *bucket_size = current_size;
187 ++bucket_size;
188 }
189 current_size += current_increment;
190 }
191 current_increment <<= 1;
192 }
193
194 // The remaining buckets are invalid.
195 while (bucket_size < bucket_sizes_ + kNumBuckets) {
196 *(bucket_size++) = kInvalidBucketSize;
197 }
198 }
199
200 size_t bucket_sizes_[kNumBuckets]{};
201 // The bucket lookup table lets us map a size_t to a bucket quickly.
202 // The trailing +1 caters for the overflow case for very large allocation
203 // sizes. It is one flat array instead of a 2D array because in the 2D
204 // world, we'd need to index array[blah][max+1] which risks undefined
205 // behavior.
206 uint16_t
207 bucket_index_lookup_[((kBitsPerSizeT + 1) * kNumBucketsPerOrder) + 1]{};
208 };
209
RoundUpToPowerOfTwo(size_t size)210 PA_ALWAYS_INLINE constexpr size_t RoundUpToPowerOfTwo(size_t size) {
211 const size_t n = 1 << base::bits::Log2Ceiling(static_cast<uint32_t>(size));
212 PA_DCHECK(size <= n);
213 return n;
214 }
215
RoundUpSize(size_t size)216 PA_ALWAYS_INLINE constexpr size_t RoundUpSize(size_t size) {
217 const size_t next_power = RoundUpToPowerOfTwo(size);
218 const size_t prev_power = next_power >> 1;
219 PA_DCHECK(size <= next_power);
220 PA_DCHECK(prev_power < size);
221 if (size <= prev_power * 5 / 4) {
222 return prev_power * 5 / 4;
223 } else {
224 return next_power;
225 }
226 }
227
RoundUpToOdd(uint16_t size)228 PA_ALWAYS_INLINE constexpr uint16_t RoundUpToOdd(uint16_t size) {
229 return (size % 2 == 0) + size;
230 }
231
232 // static
GetIndexForDenserBuckets(size_t size)233 PA_ALWAYS_INLINE constexpr uint16_t BucketIndexLookup::GetIndexForDenserBuckets(
234 size_t size) {
235 // This forces the bucket table to be constant-initialized and immediately
236 // materialized in the binary.
237 constexpr BucketIndexLookup lookup{};
238 const size_t order =
239 kBitsPerSizeT -
240 static_cast<size_t>(base::bits::CountLeadingZeroBits(size));
241 // The order index is simply the next few bits after the most significant
242 // bit.
243 const size_t order_index =
244 (size >> kOrderIndexShift[order]) & (kNumBucketsPerOrder - 1);
245 // And if the remaining bits are non-zero we must bump the bucket up.
246 const size_t sub_order_index = size & kOrderSubIndexMask[order];
247 const uint16_t index =
248 lookup.bucket_index_lookup_[(order << kNumBucketsPerOrderBits) +
249 order_index + !!sub_order_index];
250 PA_DCHECK(index <= kNumBuckets); // Last one is the sentinel bucket.
251 return index;
252 }
253
254 // static
255 PA_ALWAYS_INLINE constexpr uint16_t
GetIndexForDefaultBuckets(size_t size)256 BucketIndexLookup::GetIndexForDefaultBuckets(size_t size) {
257 const auto index = GetIndexForDenserBuckets(size);
258 // Below the minimum size, 4 and 8 bucket distributions are the same, since we
259 // can't fit any more buckets per order; this is due to alignment
260 // requirements: each bucket must be a multiple of the alignment, which
261 // implies the difference between buckets must also be a multiple of the
262 // alignment. In smaller orders, this limits the number of buckets we can
263 // have per order. So, for these small order, we do not want to skip every
264 // second bucket.
265 //
266 // We also do not want to go about the index for the max bucketed size.
267 if (size > kAlignment * kNumBucketsPerOrder &&
268 index < GetIndexForDenserBuckets(kMaxBucketed)) {
269 return RoundUpToOdd(index);
270 } else {
271 return index;
272 }
273 }
274
275 // static
GetIndex(size_t size)276 PA_ALWAYS_INLINE constexpr uint16_t BucketIndexLookup::GetIndex(size_t size) {
277 // For any order 2^N, under the denser bucket distribution ("Distribution A"),
278 // we have 4 evenly distributed buckets: 2^N, 1.25*2^N, 1.5*2^N, and 1.75*2^N.
279 // These numbers represent the maximum size of an allocation that can go into
280 // a given bucket.
281 //
282 // Under the less dense bucket distribution ("Distribution B"), we only have
283 // 2 buckets for the same order 2^N: 2^N and 1.25*2^N.
284 //
285 // Everything that would be mapped to the last two buckets of an order under
286 // Distribution A is instead mapped to the first bucket of the next order
287 // under Distribution B. The following diagram shows roughly what this looks
288 // like for the order starting from 2^10, as an example.
289 //
290 // A: ... | 2^10 | 1.25*2^10 | 1.5*2^10 | 1.75*2^10 | 2^11 | ...
291 // B: ... | 2^10 | 1.25*2^10 | -------- | --------- | 2^11 | ...
292 //
293 // So, an allocation of size 1.4*2^10 would go into the 1.5*2^10 bucket under
294 // Distribution A, but to the 2^11 bucket under Distribution B.
295 if (1 << 8 < size && size < kHighThresholdForAlternateDistribution) {
296 return BucketIndexLookup::GetIndexForDefaultBuckets(RoundUpSize(size));
297 }
298 return BucketIndexLookup::GetIndexForDefaultBuckets(size);
299 }
300
301 } // namespace partition_alloc::internal
302
303 #endif // BASE_ALLOCATOR_PARTITION_ALLOCATOR_PARTITION_BUCKET_LOOKUP_H_
304