• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef BASE_ALLOCATOR_PARTITION_ALLOCATOR_SRC_PARTITION_ALLOC_PARTITION_BUCKET_LOOKUP_H_
6 #define BASE_ALLOCATOR_PARTITION_ALLOCATOR_SRC_PARTITION_ALLOC_PARTITION_BUCKET_LOOKUP_H_
7 
8 #include <bit>
9 #include <cstdint>
10 
11 #include "partition_alloc/partition_alloc_base/bits.h"
12 #include "partition_alloc/partition_alloc_base/compiler_specific.h"
13 #include "partition_alloc/partition_alloc_buildflags.h"
14 #include "partition_alloc/partition_alloc_check.h"
15 #include "partition_alloc/partition_alloc_constants.h"
16 
17 namespace partition_alloc::internal {
18 
19 // Don't use an anonymous namespace for the constants because it can inhibit
20 // collapsing them together, even when they are tagged as inline.
21 
22 // Precalculate some shift and mask constants used in the hot path.
23 // Example: malloc(41) == 101001 binary.
24 // Order is 6 (1 << 6-1) == 32 is highest bit set.
25 // order_index is the next three MSB == 010 == 2.
26 // sub_order_index_mask is a mask for the remaining bits == 11 (masking to 01
27 // for the sub_order_index).
OrderIndexShift(uint8_t order)28 constexpr uint8_t OrderIndexShift(uint8_t order) {
29   if (order < kNumBucketsPerOrderBits + 1) {
30     return 0;
31   }
32 
33   return order - (kNumBucketsPerOrderBits + 1);
34 }
35 
OrderSubIndexMask(uint8_t order)36 constexpr size_t OrderSubIndexMask(uint8_t order) {
37   if (order == kBitsPerSizeT) {
38     return static_cast<size_t>(-1) >> (kNumBucketsPerOrderBits + 1);
39   }
40 
41   return ((static_cast<size_t>(1) << order) - 1) >>
42          (kNumBucketsPerOrderBits + 1);
43 }
44 
45 #if BUILDFLAG(HAS_64_BIT_POINTERS)
46 #define PA_BITS_PER_SIZE_T 64
47 static_assert(kBitsPerSizeT == 64, "");
48 #else
49 #define PA_BITS_PER_SIZE_T 32
50 static_assert(kBitsPerSizeT == 32, "");
51 #endif  // BUILDFLAG(HAS_64_BIT_POINTERS)
52 
53 inline constexpr uint8_t kOrderIndexShift[PA_BITS_PER_SIZE_T + 1] = {
54     OrderIndexShift(0),  OrderIndexShift(1),  OrderIndexShift(2),
55     OrderIndexShift(3),  OrderIndexShift(4),  OrderIndexShift(5),
56     OrderIndexShift(6),  OrderIndexShift(7),  OrderIndexShift(8),
57     OrderIndexShift(9),  OrderIndexShift(10), OrderIndexShift(11),
58     OrderIndexShift(12), OrderIndexShift(13), OrderIndexShift(14),
59     OrderIndexShift(15), OrderIndexShift(16), OrderIndexShift(17),
60     OrderIndexShift(18), OrderIndexShift(19), OrderIndexShift(20),
61     OrderIndexShift(21), OrderIndexShift(22), OrderIndexShift(23),
62     OrderIndexShift(24), OrderIndexShift(25), OrderIndexShift(26),
63     OrderIndexShift(27), OrderIndexShift(28), OrderIndexShift(29),
64     OrderIndexShift(30), OrderIndexShift(31), OrderIndexShift(32),
65 #if PA_BITS_PER_SIZE_T == 64
66     OrderIndexShift(33), OrderIndexShift(34), OrderIndexShift(35),
67     OrderIndexShift(36), OrderIndexShift(37), OrderIndexShift(38),
68     OrderIndexShift(39), OrderIndexShift(40), OrderIndexShift(41),
69     OrderIndexShift(42), OrderIndexShift(43), OrderIndexShift(44),
70     OrderIndexShift(45), OrderIndexShift(46), OrderIndexShift(47),
71     OrderIndexShift(48), OrderIndexShift(49), OrderIndexShift(50),
72     OrderIndexShift(51), OrderIndexShift(52), OrderIndexShift(53),
73     OrderIndexShift(54), OrderIndexShift(55), OrderIndexShift(56),
74     OrderIndexShift(57), OrderIndexShift(58), OrderIndexShift(59),
75     OrderIndexShift(60), OrderIndexShift(61), OrderIndexShift(62),
76     OrderIndexShift(63), OrderIndexShift(64)
77 #endif
78 };
79 
80 inline constexpr size_t kOrderSubIndexMask[PA_BITS_PER_SIZE_T + 1] = {
81     OrderSubIndexMask(0),  OrderSubIndexMask(1),  OrderSubIndexMask(2),
82     OrderSubIndexMask(3),  OrderSubIndexMask(4),  OrderSubIndexMask(5),
83     OrderSubIndexMask(6),  OrderSubIndexMask(7),  OrderSubIndexMask(8),
84     OrderSubIndexMask(9),  OrderSubIndexMask(10), OrderSubIndexMask(11),
85     OrderSubIndexMask(12), OrderSubIndexMask(13), OrderSubIndexMask(14),
86     OrderSubIndexMask(15), OrderSubIndexMask(16), OrderSubIndexMask(17),
87     OrderSubIndexMask(18), OrderSubIndexMask(19), OrderSubIndexMask(20),
88     OrderSubIndexMask(21), OrderSubIndexMask(22), OrderSubIndexMask(23),
89     OrderSubIndexMask(24), OrderSubIndexMask(25), OrderSubIndexMask(26),
90     OrderSubIndexMask(27), OrderSubIndexMask(28), OrderSubIndexMask(29),
91     OrderSubIndexMask(30), OrderSubIndexMask(31), OrderSubIndexMask(32),
92 #if PA_BITS_PER_SIZE_T == 64
93     OrderSubIndexMask(33), OrderSubIndexMask(34), OrderSubIndexMask(35),
94     OrderSubIndexMask(36), OrderSubIndexMask(37), OrderSubIndexMask(38),
95     OrderSubIndexMask(39), OrderSubIndexMask(40), OrderSubIndexMask(41),
96     OrderSubIndexMask(42), OrderSubIndexMask(43), OrderSubIndexMask(44),
97     OrderSubIndexMask(45), OrderSubIndexMask(46), OrderSubIndexMask(47),
98     OrderSubIndexMask(48), OrderSubIndexMask(49), OrderSubIndexMask(50),
99     OrderSubIndexMask(51), OrderSubIndexMask(52), OrderSubIndexMask(53),
100     OrderSubIndexMask(54), OrderSubIndexMask(55), OrderSubIndexMask(56),
101     OrderSubIndexMask(57), OrderSubIndexMask(58), OrderSubIndexMask(59),
102     OrderSubIndexMask(60), OrderSubIndexMask(61), OrderSubIndexMask(62),
103     OrderSubIndexMask(63), OrderSubIndexMask(64)
104 #endif
105 };
106 
107 // The class used to generate the bucket lookup table at compile-time.
108 class BucketIndexLookup final {
109  public:
110   PA_ALWAYS_INLINE static constexpr uint16_t GetIndexForNeutralBuckets(
111       size_t size);
112   PA_ALWAYS_INLINE static constexpr uint16_t GetIndexForDenserBuckets(
113       size_t size);
114   PA_ALWAYS_INLINE static constexpr uint16_t GetIndex(size_t size);
115 
BucketIndexLookup()116   constexpr BucketIndexLookup() {
117     constexpr uint16_t sentinel_bucket_index = kNumBuckets;
118 
119     InitBucketSizes();
120 
121     uint16_t* bucket_index_ptr = &bucket_index_lookup_[0];
122     uint16_t bucket_index = 0;
123 
124     // Very small allocations, smaller than the first bucketed order ->
125     // everything goes to the first bucket.
126     for (uint8_t order = 0; order < kMinBucketedOrder; ++order) {
127       for (uint16_t j = 0; j < kNumBucketsPerOrder; ++j) {
128         *bucket_index_ptr++ = 0;
129       }
130     }
131 
132     // Normal buckets.
133     for (uint8_t order = kMinBucketedOrder; order <= kMaxBucketedOrder;
134          ++order) {
135       size_t size = static_cast<size_t>(1) << (order - 1);
136       size_t current_increment = size >> kNumBucketsPerOrderBits;
137       for (uint16_t j = 0; j < kNumBucketsPerOrder; ++j) {
138         *bucket_index_ptr++ = bucket_index;
139 
140         // For small sizes, buckets are close together (current_increment is
141         // small). For instance, for:
142         // - kAlignment == 16 (which is the case on most 64 bit systems)
143         // - kNumBucketsPerOrder == 4
144         //
145         // The 3 next buckets after 16 are {20, 24, 28}. None of these are a
146         // multiple of kAlignment, so they use the next bucket, that is 32 here.
147         if (size % kAlignment != 0) {
148           PA_DCHECK(bucket_sizes_[bucket_index] > size);
149           // Do not increment bucket_index, since in the example above
150           // current_size may be 20, and bucket_sizes_[bucket_index] == 32.
151         } else {
152           PA_DCHECK(bucket_sizes_[bucket_index] == size);
153           bucket_index++;
154         }
155 
156         size += current_increment;
157       }
158     }
159 
160     // Direct-mapped, and overflow.
161     for (uint8_t order = kMaxBucketedOrder + 1; order <= kBitsPerSizeT;
162          ++order) {
163       for (uint16_t j = 0; j < kNumBucketsPerOrder; ++j) {
164         *bucket_index_ptr++ = sentinel_bucket_index;
165       }
166     }
167 
168     // Smaller because some buckets are not valid due to alignment constraints.
169     PA_DCHECK(bucket_index < kNumBuckets);
170     PA_DCHECK(bucket_index_ptr == bucket_index_lookup_ + ((kBitsPerSizeT + 1) *
171                                                           kNumBucketsPerOrder));
172     // And there's one last bucket lookup that will be hit for e.g. malloc(-1),
173     // which tries to overflow to a non-existent order.
174     *bucket_index_ptr = sentinel_bucket_index;
175   }
bucket_sizes()176   constexpr const size_t* bucket_sizes() const { return &bucket_sizes_[0]; }
177 
178  private:
InitBucketSizes()179   constexpr void InitBucketSizes() {
180     size_t current_size = kSmallestBucket;
181     size_t current_increment = kSmallestBucket >> kNumBucketsPerOrderBits;
182     size_t* bucket_size = &bucket_sizes_[0];
183     for (size_t i = 0; i < kNumBucketedOrders; ++i) {
184       for (size_t j = 0; j < kNumBucketsPerOrder; ++j) {
185         // All bucket sizes have to be multiples of kAlignment, skip otherwise.
186         if (current_size % kAlignment == 0) {
187           *bucket_size = current_size;
188           ++bucket_size;
189         }
190         current_size += current_increment;
191       }
192       current_increment <<= 1;
193     }
194 
195     // The remaining buckets are invalid.
196     while (bucket_size < bucket_sizes_ + kNumBuckets) {
197       *(bucket_size++) = kInvalidBucketSize;
198     }
199   }
200 
201   size_t bucket_sizes_[kNumBuckets]{};
202   // The bucket lookup table lets us map a size_t to a bucket quickly.
203   // The trailing +1 caters for the overflow case for very large allocation
204   // sizes.  It is one flat array instead of a 2D array because in the 2D
205   // world, we'd need to index array[blah][max+1] which risks undefined
206   // behavior.
207   uint16_t
208       bucket_index_lookup_[((kBitsPerSizeT + 1) * kNumBucketsPerOrder) + 1]{};
209 };
210 
RoundUpSize(size_t size)211 PA_ALWAYS_INLINE constexpr size_t RoundUpSize(size_t size) {
212   const size_t next_power = std::bit_ceil(size);
213   const size_t prev_power = next_power >> 1;
214   PA_DCHECK(size <= next_power);
215   PA_DCHECK(prev_power < size);
216   if (size <= prev_power * 5 / 4) {
217     return prev_power * 5 / 4;
218   } else {
219     return next_power;
220   }
221 }
222 
RoundUpToOdd(uint16_t size)223 PA_ALWAYS_INLINE constexpr uint16_t RoundUpToOdd(uint16_t size) {
224   return (size % 2 == 0) + size;
225 }
226 
227 // static
GetIndexForDenserBuckets(size_t size)228 PA_ALWAYS_INLINE constexpr uint16_t BucketIndexLookup::GetIndexForDenserBuckets(
229     size_t size) {
230   // This forces the bucket table to be constant-initialized and immediately
231   // materialized in the binary.
232   constexpr BucketIndexLookup lookup{};
233   const size_t order =
234       kBitsPerSizeT - static_cast<size_t>(std::countl_zero(size));
235   // The order index is simply the next few bits after the most significant
236   // bit.
237   const size_t order_index =
238       (size >> kOrderIndexShift[order]) & (kNumBucketsPerOrder - 1);
239   // And if the remaining bits are non-zero we must bump the bucket up.
240   const size_t sub_order_index = size & kOrderSubIndexMask[order];
241   const uint16_t index =
242       lookup.bucket_index_lookup_[(order << kNumBucketsPerOrderBits) +
243                                   order_index + !!sub_order_index];
244   PA_DCHECK(index <= kNumBuckets);  // Last one is the sentinel bucket.
245   return index;
246 }
247 
248 // static
249 PA_ALWAYS_INLINE constexpr uint16_t
GetIndexForNeutralBuckets(size_t size)250 BucketIndexLookup::GetIndexForNeutralBuckets(size_t size) {
251   const auto index = GetIndexForDenserBuckets(size);
252   // Below the minimum size, 4 and 8 bucket distributions are the same, since we
253   // can't fit any more buckets per order; this is due to alignment
254   // requirements: each bucket must be a multiple of the alignment, which
255   // implies the difference between buckets must also be a multiple of the
256   // alignment. In smaller orders, this limits the number of buckets we can
257   // have per order. So, for these small order, we do not want to skip every
258   // second bucket.
259   //
260   // We also do not want to go about the index for the max bucketed size.
261   if (size > kAlignment * kNumBucketsPerOrder &&
262       index < GetIndexForDenserBuckets(kMaxBucketed)) {
263     return RoundUpToOdd(index);
264   } else {
265     return index;
266   }
267 }
268 
269 // static
GetIndex(size_t size)270 PA_ALWAYS_INLINE constexpr uint16_t BucketIndexLookup::GetIndex(size_t size) {
271   // For any order 2^N, under the denser bucket distribution ("Distribution A"),
272   // we have 4 evenly distributed buckets: 2^N, 1.25*2^N, 1.5*2^N, and 1.75*2^N.
273   // These numbers represent the maximum size of an allocation that can go into
274   // a given bucket.
275   //
276   // Under the less dense bucket distribution ("Distribution B"), we only have
277   // 2 buckets for the same order 2^N: 2^N and 1.25*2^N.
278   //
279   // Everything that would be mapped to the last two buckets of an order under
280   // Distribution A is instead mapped to the first bucket of the next order
281   // under Distribution B. The following diagram shows roughly what this looks
282   // like for the order starting from 2^10, as an example.
283   //
284   // A: ... | 2^10 | 1.25*2^10 | 1.5*2^10 | 1.75*2^10 | 2^11 | ...
285   // B: ... | 2^10 | 1.25*2^10 | -------- | --------- | 2^11 | ...
286   //
287   // So, an allocation of size 1.4*2^10 would go into the 1.5*2^10 bucket under
288   // Distribution A, but to the 2^11 bucket under Distribution B.
289   if (1 << 8 < size && size < kHighThresholdForAlternateDistribution) {
290     return BucketIndexLookup::GetIndexForNeutralBuckets(RoundUpSize(size));
291   }
292   return BucketIndexLookup::GetIndexForNeutralBuckets(size);
293 }
294 
295 }  // namespace partition_alloc::internal
296 
297 #endif  // BASE_ALLOCATOR_PARTITION_ALLOCATOR_SRC_PARTITION_ALLOC_PARTITION_BUCKET_LOOKUP_H_
298