• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/container/internal/raw_hash_set.h"
16 
17 #include <atomic>
18 #include <cassert>
19 #include <cstddef>
20 #include <cstdint>
21 #include <cstring>
22 
23 #include "absl/base/attributes.h"
24 #include "absl/base/config.h"
25 #include "absl/base/dynamic_annotations.h"
26 #include "absl/container/internal/container_memory.h"
27 #include "absl/hash/hash.h"
28 
29 namespace absl {
30 ABSL_NAMESPACE_BEGIN
31 namespace container_internal {
32 
33 // We have space for `growth_left` before a single block of control bytes. A
34 // single block of empty control bytes for tables without any slots allocated.
35 // This enables removing a branch in the hot path of find(). In order to ensure
36 // that the control bytes are aligned to 16, we have 16 bytes before the control
37 // bytes even though growth_left only needs 8.
ZeroCtrlT()38 constexpr ctrl_t ZeroCtrlT() { return static_cast<ctrl_t>(0); }
39 alignas(16) ABSL_CONST_INIT ABSL_DLL const ctrl_t kEmptyGroup[32] = {
40     ZeroCtrlT(),       ZeroCtrlT(),    ZeroCtrlT(),    ZeroCtrlT(),
41     ZeroCtrlT(),       ZeroCtrlT(),    ZeroCtrlT(),    ZeroCtrlT(),
42     ZeroCtrlT(),       ZeroCtrlT(),    ZeroCtrlT(),    ZeroCtrlT(),
43     ZeroCtrlT(),       ZeroCtrlT(),    ZeroCtrlT(),    ZeroCtrlT(),
44     ctrl_t::kSentinel, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty,
45     ctrl_t::kEmpty,    ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty,
46     ctrl_t::kEmpty,    ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty,
47     ctrl_t::kEmpty,    ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty};
48 
49 #ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL
50 constexpr size_t Group::kWidth;
51 #endif
52 
53 namespace {
54 
55 // Returns "random" seed.
RandomSeed()56 inline size_t RandomSeed() {
57 #ifdef ABSL_HAVE_THREAD_LOCAL
58   static thread_local size_t counter = 0;
59   // On Linux kernels >= 5.4 the MSAN runtime has a false-positive when
60   // accessing thread local storage data from loaded libraries
61   // (https://github.com/google/sanitizers/issues/1265), for this reason counter
62   // needs to be annotated as initialized.
63   ABSL_ANNOTATE_MEMORY_IS_INITIALIZED(&counter, sizeof(size_t));
64   size_t value = ++counter;
65 #else   // ABSL_HAVE_THREAD_LOCAL
66   static std::atomic<size_t> counter(0);
67   size_t value = counter.fetch_add(1, std::memory_order_relaxed);
68 #endif  // ABSL_HAVE_THREAD_LOCAL
69   return value ^ static_cast<size_t>(reinterpret_cast<uintptr_t>(&counter));
70 }
71 
ShouldRehashForBugDetection(const ctrl_t * ctrl,size_t capacity)72 bool ShouldRehashForBugDetection(const ctrl_t* ctrl, size_t capacity) {
73   // Note: we can't use the abseil-random library because abseil-random
74   // depends on swisstable. We want to return true with probability
75   // `min(1, RehashProbabilityConstant() / capacity())`. In order to do this,
76   // we probe based on a random hash and see if the offset is less than
77   // RehashProbabilityConstant().
78   return probe(ctrl, capacity, absl::HashOf(RandomSeed())).offset() <
79          RehashProbabilityConstant();
80 }
81 
82 }  // namespace
83 
EmptyGeneration()84 GenerationType* EmptyGeneration() {
85   if (SwisstableGenerationsEnabled()) {
86     constexpr size_t kNumEmptyGenerations = 1024;
87     static constexpr GenerationType kEmptyGenerations[kNumEmptyGenerations]{};
88     return const_cast<GenerationType*>(
89         &kEmptyGenerations[RandomSeed() % kNumEmptyGenerations]);
90   }
91   return nullptr;
92 }
93 
94 bool CommonFieldsGenerationInfoEnabled::
should_rehash_for_bug_detection_on_insert(const ctrl_t * ctrl,size_t capacity) const95     should_rehash_for_bug_detection_on_insert(const ctrl_t* ctrl,
96                                               size_t capacity) const {
97   if (reserved_growth_ == kReservedGrowthJustRanOut) return true;
98   if (reserved_growth_ > 0) return false;
99   return ShouldRehashForBugDetection(ctrl, capacity);
100 }
101 
should_rehash_for_bug_detection_on_move(const ctrl_t * ctrl,size_t capacity) const102 bool CommonFieldsGenerationInfoEnabled::should_rehash_for_bug_detection_on_move(
103     const ctrl_t* ctrl, size_t capacity) const {
104   return ShouldRehashForBugDetection(ctrl, capacity);
105 }
106 
ShouldInsertBackwards(size_t hash,const ctrl_t * ctrl)107 bool ShouldInsertBackwards(size_t hash, const ctrl_t* ctrl) {
108   // To avoid problems with weak hashes and single bit tests, we use % 13.
109   // TODO(kfm,sbenza): revisit after we do unconditional mixing
110   return (H1(hash, ctrl) ^ RandomSeed()) % 13 > 6;
111 }
112 
ConvertDeletedToEmptyAndFullToDeleted(ctrl_t * ctrl,size_t capacity)113 void ConvertDeletedToEmptyAndFullToDeleted(ctrl_t* ctrl, size_t capacity) {
114   assert(ctrl[capacity] == ctrl_t::kSentinel);
115   assert(IsValidCapacity(capacity));
116   for (ctrl_t* pos = ctrl; pos < ctrl + capacity; pos += Group::kWidth) {
117     Group{pos}.ConvertSpecialToEmptyAndFullToDeleted(pos);
118   }
119   // Copy the cloned ctrl bytes.
120   std::memcpy(ctrl + capacity + 1, ctrl, NumClonedBytes());
121   ctrl[capacity] = ctrl_t::kSentinel;
122 }
123 // Extern template instantiation for inline function.
124 template FindInfo find_first_non_full(const CommonFields&, size_t);
125 
find_first_non_full_outofline(const CommonFields & common,size_t hash)126 FindInfo find_first_non_full_outofline(const CommonFields& common,
127                                        size_t hash) {
128   return find_first_non_full(common, hash);
129 }
130 
131 // Returns the address of the slot just after slot assuming each slot has the
132 // specified size.
NextSlot(void * slot,size_t slot_size)133 static inline void* NextSlot(void* slot, size_t slot_size) {
134   return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(slot) + slot_size);
135 }
136 
137 // Returns the address of the slot just before slot assuming each slot has the
138 // specified size.
PrevSlot(void * slot,size_t slot_size)139 static inline void* PrevSlot(void* slot, size_t slot_size) {
140   return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(slot) - slot_size);
141 }
142 
DropDeletesWithoutResize(CommonFields & common,const PolicyFunctions & policy,void * tmp_space)143 void DropDeletesWithoutResize(CommonFields& common,
144                               const PolicyFunctions& policy, void* tmp_space) {
145   void* set = &common;
146   void* slot_array = common.slot_array();
147   const size_t capacity = common.capacity();
148   assert(IsValidCapacity(capacity));
149   assert(!is_small(capacity));
150   // Algorithm:
151   // - mark all DELETED slots as EMPTY
152   // - mark all FULL slots as DELETED
153   // - for each slot marked as DELETED
154   //     hash = Hash(element)
155   //     target = find_first_non_full(hash)
156   //     if target is in the same group
157   //       mark slot as FULL
158   //     else if target is EMPTY
159   //       transfer element to target
160   //       mark slot as EMPTY
161   //       mark target as FULL
162   //     else if target is DELETED
163   //       swap current element with target element
164   //       mark target as FULL
165   //       repeat procedure for current slot with moved from element (target)
166   ctrl_t* ctrl = common.control();
167   ConvertDeletedToEmptyAndFullToDeleted(ctrl, capacity);
168   auto hasher = policy.hash_slot;
169   auto transfer = policy.transfer;
170   const size_t slot_size = policy.slot_size;
171 
172   size_t total_probe_length = 0;
173   void* slot_ptr = SlotAddress(slot_array, 0, slot_size);
174   for (size_t i = 0; i != capacity;
175        ++i, slot_ptr = NextSlot(slot_ptr, slot_size)) {
176     assert(slot_ptr == SlotAddress(slot_array, i, slot_size));
177     if (!IsDeleted(ctrl[i])) continue;
178     const size_t hash = (*hasher)(set, slot_ptr);
179     const FindInfo target = find_first_non_full(common, hash);
180     const size_t new_i = target.offset;
181     total_probe_length += target.probe_length;
182 
183     // Verify if the old and new i fall within the same group wrt the hash.
184     // If they do, we don't need to move the object as it falls already in the
185     // best probe we can.
186     const size_t probe_offset = probe(common, hash).offset();
187     const auto probe_index = [probe_offset, capacity](size_t pos) {
188       return ((pos - probe_offset) & capacity) / Group::kWidth;
189     };
190 
191     // Element doesn't move.
192     if (ABSL_PREDICT_TRUE(probe_index(new_i) == probe_index(i))) {
193       SetCtrl(common, i, H2(hash), slot_size);
194       continue;
195     }
196 
197     void* new_slot_ptr = SlotAddress(slot_array, new_i, slot_size);
198     if (IsEmpty(ctrl[new_i])) {
199       // Transfer element to the empty spot.
200       // SetCtrl poisons/unpoisons the slots so we have to call it at the
201       // right time.
202       SetCtrl(common, new_i, H2(hash), slot_size);
203       (*transfer)(set, new_slot_ptr, slot_ptr);
204       SetCtrl(common, i, ctrl_t::kEmpty, slot_size);
205     } else {
206       assert(IsDeleted(ctrl[new_i]));
207       SetCtrl(common, new_i, H2(hash), slot_size);
208       // Until we are done rehashing, DELETED marks previously FULL slots.
209 
210       // Swap i and new_i elements.
211       (*transfer)(set, tmp_space, new_slot_ptr);
212       (*transfer)(set, new_slot_ptr, slot_ptr);
213       (*transfer)(set, slot_ptr, tmp_space);
214 
215       // repeat the processing of the ith slot
216       --i;
217       slot_ptr = PrevSlot(slot_ptr, slot_size);
218     }
219   }
220   ResetGrowthLeft(common);
221   common.infoz().RecordRehash(total_probe_length);
222 }
223 
WasNeverFull(CommonFields & c,size_t index)224 static bool WasNeverFull(CommonFields& c, size_t index) {
225   if (is_single_group(c.capacity())) {
226     return true;
227   }
228   const size_t index_before = (index - Group::kWidth) & c.capacity();
229   const auto empty_after = Group(c.control() + index).MaskEmpty();
230   const auto empty_before = Group(c.control() + index_before).MaskEmpty();
231 
232   // We count how many consecutive non empties we have to the right and to the
233   // left of `it`. If the sum is >= kWidth then there is at least one probe
234   // window that might have seen a full group.
235   return empty_before && empty_after &&
236          static_cast<size_t>(empty_after.TrailingZeros()) +
237                  empty_before.LeadingZeros() <
238              Group::kWidth;
239 }
240 
EraseMetaOnly(CommonFields & c,size_t index,size_t slot_size)241 void EraseMetaOnly(CommonFields& c, size_t index, size_t slot_size) {
242   assert(IsFull(c.control()[index]) && "erasing a dangling iterator");
243   c.decrement_size();
244   c.infoz().RecordErase();
245 
246   if (WasNeverFull(c, index)) {
247     SetCtrl(c, index, ctrl_t::kEmpty, slot_size);
248     c.set_growth_left(c.growth_left() + 1);
249     return;
250   }
251 
252   SetCtrl(c, index, ctrl_t::kDeleted, slot_size);
253 }
254 
ClearBackingArray(CommonFields & c,const PolicyFunctions & policy,bool reuse)255 void ClearBackingArray(CommonFields& c, const PolicyFunctions& policy,
256                        bool reuse) {
257   c.set_size(0);
258   if (reuse) {
259     ResetCtrl(c, policy.slot_size);
260     ResetGrowthLeft(c);
261     c.infoz().RecordStorageChanged(0, c.capacity());
262   } else {
263     // We need to record infoz before calling dealloc, which will unregister
264     // infoz.
265     c.infoz().RecordClearedReservation();
266     c.infoz().RecordStorageChanged(0, 0);
267     (*policy.dealloc)(c, policy);
268     c.set_control(EmptyGroup());
269     c.set_generation_ptr(EmptyGeneration());
270     c.set_slots(nullptr);
271     c.set_capacity(0);
272   }
273 }
274 
GrowIntoSingleGroupShuffleControlBytes(ctrl_t * new_ctrl,size_t new_capacity) const275 void HashSetResizeHelper::GrowIntoSingleGroupShuffleControlBytes(
276     ctrl_t* new_ctrl, size_t new_capacity) const {
277   assert(is_single_group(new_capacity));
278   constexpr size_t kHalfWidth = Group::kWidth / 2;
279   assert(old_capacity_ < kHalfWidth);
280 
281   const size_t half_old_capacity = old_capacity_ / 2;
282 
283   // NOTE: operations are done with compile time known size = kHalfWidth.
284   // Compiler optimizes that into single ASM operation.
285 
286   // Copy second half of bytes to the beginning.
287   // We potentially copy more bytes in order to have compile time known size.
288   // Mirrored bytes from the old_ctrl_ will also be copied.
289   // In case of old_capacity_ == 3, we will copy 1st element twice.
290   // Examples:
291   // old_ctrl = 0S0EEEEEEE...
292   // new_ctrl = S0EEEEEEEE...
293   //
294   // old_ctrl = 01S01EEEEE...
295   // new_ctrl = 1S01EEEEEE...
296   //
297   // old_ctrl = 0123456S0123456EE...
298   // new_ctrl = 456S0123?????????...
299   std::memcpy(new_ctrl, old_ctrl_ + half_old_capacity + 1, kHalfWidth);
300   // Clean up copied kSentinel from old_ctrl.
301   new_ctrl[half_old_capacity] = ctrl_t::kEmpty;
302 
303   // Clean up damaged or uninitialized bytes.
304 
305   // Clean bytes after the intended size of the copy.
306   // Example:
307   // new_ctrl = 1E01EEEEEEE????
308   // *new_ctrl= 1E0EEEEEEEE????
309   // position      /
310   std::memset(new_ctrl + old_capacity_ + 1, static_cast<int8_t>(ctrl_t::kEmpty),
311               kHalfWidth);
312   // Clean non-mirrored bytes that are not initialized.
313   // For small old_capacity that may be inside of mirrored bytes zone.
314   // Examples:
315   // new_ctrl = 1E0EEEEEEEE??????????....
316   // *new_ctrl= 1E0EEEEEEEEEEEEE?????....
317   // position           /
318   //
319   // new_ctrl = 456E0123???????????...
320   // *new_ctrl= 456E0123EEEEEEEE???...
321   // position           /
322   std::memset(new_ctrl + kHalfWidth, static_cast<int8_t>(ctrl_t::kEmpty),
323               kHalfWidth);
324   // Clean last mirrored bytes that are not initialized
325   // and will not be overwritten by mirroring.
326   // Examples:
327   // new_ctrl = 1E0EEEEEEEEEEEEE????????
328   // *new_ctrl= 1E0EEEEEEEEEEEEEEEEEEEEE
329   // position           S       /
330   //
331   // new_ctrl = 456E0123EEEEEEEE???????????????
332   // *new_ctrl= 456E0123EEEEEEEE???????EEEEEEEE
333   // position                  S       /
334   std::memset(new_ctrl + new_capacity + kHalfWidth,
335               static_cast<int8_t>(ctrl_t::kEmpty), kHalfWidth);
336 
337   // Create mirrored bytes. old_capacity_ < kHalfWidth
338   // Example:
339   // new_ctrl = 456E0123EEEEEEEE???????EEEEEEEE
340   // *new_ctrl= 456E0123EEEEEEEE456E0123EEEEEEE
341   // position                  S/
342   ctrl_t g[kHalfWidth];
343   std::memcpy(g, new_ctrl, kHalfWidth);
344   std::memcpy(new_ctrl + new_capacity + 1, g, kHalfWidth);
345 
346   // Finally set sentinel to its place.
347   new_ctrl[new_capacity] = ctrl_t::kSentinel;
348 }
349 
GrowIntoSingleGroupShuffleTransferableSlots(void * old_slots,void * new_slots,size_t slot_size) const350 void HashSetResizeHelper::GrowIntoSingleGroupShuffleTransferableSlots(
351     void* old_slots, void* new_slots, size_t slot_size) const {
352   assert(old_capacity_ > 0);
353   const size_t half_old_capacity = old_capacity_ / 2;
354 
355   SanitizerUnpoisonMemoryRegion(old_slots, slot_size * old_capacity_);
356   std::memcpy(new_slots,
357               SlotAddress(old_slots, half_old_capacity + 1, slot_size),
358               slot_size * half_old_capacity);
359   std::memcpy(SlotAddress(new_slots, half_old_capacity + 1, slot_size),
360               old_slots, slot_size * (half_old_capacity + 1));
361 }
362 
GrowSizeIntoSingleGroupTransferable(CommonFields & c,void * old_slots,size_t slot_size)363 void HashSetResizeHelper::GrowSizeIntoSingleGroupTransferable(
364     CommonFields& c, void* old_slots, size_t slot_size) {
365   assert(old_capacity_ < Group::kWidth / 2);
366   assert(is_single_group(c.capacity()));
367   assert(IsGrowingIntoSingleGroupApplicable(old_capacity_, c.capacity()));
368 
369   GrowIntoSingleGroupShuffleControlBytes(c.control(), c.capacity());
370   GrowIntoSingleGroupShuffleTransferableSlots(old_slots, c.slot_array(),
371                                               slot_size);
372 
373   // We poison since GrowIntoSingleGroupShuffleTransferableSlots
374   // may leave empty slots unpoisoned.
375   PoisonSingleGroupEmptySlots(c, slot_size);
376 }
377 
378 }  // namespace container_internal
379 ABSL_NAMESPACE_END
380 }  // namespace absl
381