• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Collection of swiss table helpers that are independent from a specific
6 // container, like SwissNameDictionary. Taken almost in verbatim from Abseil,
7 // comments in this file indicate what is taken from what Abseil file.
8 
9 #include <climits>
10 #include <cstdint>
11 #include <type_traits>
12 
13 #include "src/base/bits.h"
14 #include "src/base/logging.h"
15 #include "src/base/memory.h"
16 
17 #ifndef V8_OBJECTS_SWISS_HASH_TABLE_HELPERS_H_
18 #define V8_OBJECTS_SWISS_HASH_TABLE_HELPERS_H_
19 
20 // The following #defines are taken from Abseil's have_sse.h (but renamed).
21 #ifndef V8_SWISS_TABLE_HAVE_SSE2_HOST
22 #if (defined(__SSE2__) ||  \
23      (defined(_MSC_VER) && \
24       (defined(_M_X64) || (defined(_M_IX86) && _M_IX86_FP >= 2))))
25 #define V8_SWISS_TABLE_HAVE_SSE2_HOST 1
26 #else
27 #define V8_SWISS_TABLE_HAVE_SSE2_HOST 0
28 #endif
29 #endif
30 
31 #ifndef V8_SWISS_TABLE_HAVE_SSSE3_HOST
32 #if defined(__SSSE3__)
33 #define V8_SWISS_TABLE_HAVE_SSSE3_HOST 1
34 #else
35 #define V8_SWISS_TABLE_HAVE_SSSE3_HOST 0
36 #endif
37 #endif
38 
39 #if V8_SWISS_TABLE_HAVE_SSSE3_HOST && !V8_SWISS_TABLE_HAVE_SSE2_HOST
40 #error "Bad configuration!"
41 #endif
42 
43 // Unlike Abseil, we cannot select SSE purely by host capabilities. When
44 // creating a snapshot, the group width must be compatible. The SSE
45 // implementation uses a group width of 16, whereas the non-SSE version uses 8.
46 // Thus we select the group size based on target capabilities and, if the host
47 // does not match, select a polyfill implementation. This means, in supported
48 // cross-compiling configurations, we must be able to determine matching target
49 // capabilities from the host.
50 #ifndef V8_SWISS_TABLE_HAVE_SSE2_TARGET
51 #if V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_X64
52 // x64 always has SSE2, and ia32 without SSE2 is not supported by V8.
53 #define V8_SWISS_TABLE_HAVE_SSE2_TARGET 1
54 #else
55 #define V8_SWISS_TABLE_HAVE_SSE2_TARGET 0
56 #endif
57 #endif
58 
59 #if V8_SWISS_TABLE_HAVE_SSE2_HOST
60 #include <emmintrin.h>
61 #endif
62 
63 #if V8_SWISS_TABLE_HAVE_SSSE3_HOST
64 #include <tmmintrin.h>
65 #endif
66 
67 namespace v8 {
68 namespace internal {
69 namespace swiss_table {
70 
71 // All definitions below are taken from Abseil's raw_hash_set.h with only minor
72 // changes, like using existing V8 versions of certain helper functions.
73 
74 // Denotes the group of the control table currently being probed.
75 // Implements quadratic probing by advancing by i groups after the i-th
76 // (unsuccesful) probe.
77 template <size_t GroupSize>
78 class ProbeSequence {
79  public:
ProbeSequence(uint32_t hash,uint32_t mask)80   ProbeSequence(uint32_t hash, uint32_t mask) {
81     // Mask must be a power of 2 minus 1.
82     DCHECK_EQ(0, ((mask + 1) & mask));
83     mask_ = mask;
84     offset_ = hash & mask_;
85   }
offset()86   uint32_t offset() const { return offset_; }
offset(int i)87   uint32_t offset(int i) const { return (offset_ + i) & mask_; }
88 
next()89   void next() {
90     index_ += GroupSize;
91     offset_ += index_;
92     offset_ &= mask_;
93   }
94 
index()95   size_t index() const { return index_; }
96 
97  private:
98   // Used for modulo calculation.
99   uint32_t mask_;
100 
101   // The index/offset into the control table, meaning that {ctrl[offset_]} is
102   // the start of the group currently being probed, assuming that |ctrl| is the
103   // pointer to the beginning of the control table.
104   uint32_t offset_;
105 
106   // States the number of probes that have been performed (starting at 0),
107   // multiplied by GroupSize.
108   uint32_t index_ = 0;
109 };
110 
111 // An abstraction over a bitmask. It provides an easy way to iterate through the
112 // indexes of the set bits of a bitmask. When Shift=0 (platforms with SSE),
113 // this is a true bitmask.
114 // When Shift=3 (used on non-SSE platforms), we obtain a "byte mask", where each
115 // logical bit is represented by a full byte. The logical bit 0 is represented
116 // as 0x00, whereas 1 is represented as 0x80. Other values must not appear.
117 //
118 // For example:
119 //   for (int i : BitMask<uint32_t, 16>(0x5)) -> yields 0, 2
120 //   for (int i : BitMask<uint64_t, 8, 3>(0x0000000080800000)) -> yields 2, 3
121 template <class T, int SignificantBits, int Shift = 0>
122 class BitMask {
123   STATIC_ASSERT(std::is_unsigned<T>::value);
124   STATIC_ASSERT(Shift == 0 || Shift == 3);
125 
126  public:
127   // These are useful for unit tests (gunit).
128   using value_type = int;
129   using iterator = BitMask;
130   using const_iterator = BitMask;
131 
BitMask(T mask)132   explicit BitMask(T mask) : mask_(mask) {}
133   BitMask& operator++() {
134     // Clear the least significant bit that is set.
135     mask_ &= (mask_ - 1);
136     return *this;
137   }
138   explicit operator bool() const { return mask_ != 0; }
139   int operator*() const { return LowestBitSet(); }
LowestBitSet()140   int LowestBitSet() const { return TrailingZeros(); }
HighestBitSet()141   int HighestBitSet() const {
142     return (sizeof(T) * CHAR_BIT - base::bits::CountLeadingZeros(mask_) - 1) >>
143            Shift;
144   }
145 
begin()146   BitMask begin() const { return *this; }
end()147   BitMask end() const { return BitMask(0); }
148 
TrailingZeros()149   int TrailingZeros() const {
150     DCHECK_NE(mask_, 0);
151     return base::bits::CountTrailingZerosNonZero(mask_) >> Shift;
152   }
153 
LeadingZeros()154   int LeadingZeros() const {
155     constexpr int total_significant_bits = SignificantBits << Shift;
156     constexpr int extra_bits = sizeof(T) * 8 - total_significant_bits;
157     return base::bits::CountLeadingZeros(mask_ << extra_bits) >> Shift;
158   }
159 
160  private:
161   friend bool operator==(const BitMask& a, const BitMask& b) {
162     return a.mask_ == b.mask_;
163   }
164   friend bool operator!=(const BitMask& a, const BitMask& b) {
165     return a.mask_ != b.mask_;
166   }
167 
168   T mask_;
169 };
170 
171 using ctrl_t = signed char;
172 using h2_t = uint8_t;
173 
174 // The values here are selected for maximum performance. See the static asserts
175 // below for details.
176 enum Ctrl : ctrl_t {
177   kEmpty = -128,   // 0b10000000
178   kDeleted = -2,   // 0b11111110
179   kSentinel = -1,  // 0b11111111
180 };
181 static_assert(
182     kEmpty & kDeleted & kSentinel & 0x80,
183     "Special markers need to have the MSB to make checking for them efficient");
184 static_assert(kEmpty < kSentinel && kDeleted < kSentinel,
185               "kEmpty and kDeleted must be smaller than kSentinel to make the "
186               "SIMD test of IsEmptyOrDeleted() efficient");
187 static_assert(kSentinel == -1,
188               "kSentinel must be -1 to elide loading it from memory into SIMD "
189               "registers (pcmpeqd xmm, xmm)");
190 static_assert(kEmpty == -128,
191               "kEmpty must be -128 to make the SIMD check for its "
192               "existence efficient (psignb xmm, xmm)");
193 static_assert(~kEmpty & ~kDeleted & kSentinel & 0x7F,
194               "kEmpty and kDeleted must share an unset bit that is not shared "
195               "by kSentinel to make the scalar test for MatchEmptyOrDeleted() "
196               "efficient");
197 static_assert(kDeleted == -2,
198               "kDeleted must be -2 to make the implementation of "
199               "ConvertSpecialToEmptyAndFullToDeleted efficient");
200 
201 // See below for explanation of H2. Just here for documentation purposes, Swiss
202 // Table implementations rely on this being 7.
203 static constexpr int kH2Bits = 7;
204 
205 static constexpr int kNotFullMask = (1 << kH2Bits);
206 static_assert(
207     kEmpty & kDeleted & kSentinel & kNotFullMask,
208     "Special markers need to have the MSB to make checking for them efficient");
209 
210 // Extracts H1 from the given overall hash, which means discarding the lowest 7
211 // bits of the overall hash. H1 is used to determine the first group to probe.
H1(uint32_t hash)212 inline static uint32_t H1(uint32_t hash) { return (hash >> kH2Bits); }
213 
214 // Extracts H2 from the given overall hash, which means using only the lowest 7
215 // bits of the overall hash. H2 is stored in the control table byte for each
216 // present entry.
H2(uint32_t hash)217 inline static swiss_table::ctrl_t H2(uint32_t hash) {
218   return hash & ((1 << kH2Bits) - 1);
219 }
220 
221 #if V8_SWISS_TABLE_HAVE_SSE2_HOST
222 // https://github.com/abseil/abseil-cpp/issues/209
223 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87853
224 // _mm_cmpgt_epi8 is broken under GCC with -funsigned-char
225 // Work around this by using the portable implementation of Group
226 // when using -funsigned-char under GCC.
_mm_cmpgt_epi8_fixed(__m128i a,__m128i b)227 inline __m128i _mm_cmpgt_epi8_fixed(__m128i a, __m128i b) {
228 #if defined(__GNUC__) && !defined(__clang__)
229   if (std::is_unsigned<char>::value) {
230     const __m128i mask = _mm_set1_epi8(0x80);
231     const __m128i diff = _mm_subs_epi8(b, a);
232     return _mm_cmpeq_epi8(_mm_and_si128(diff, mask), mask);
233   }
234 #endif
235   return _mm_cmpgt_epi8(a, b);
236 }
237 
238 struct GroupSse2Impl {
239   static constexpr size_t kWidth = 16;  // the number of slots per group
240 
GroupSse2ImplGroupSse2Impl241   explicit GroupSse2Impl(const ctrl_t* pos) {
242     ctrl = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pos));
243   }
244 
245   // Returns a bitmask representing the positions of slots that match |hash|.
MatchGroupSse2Impl246   BitMask<uint32_t, kWidth> Match(h2_t hash) const {
247     auto match = _mm_set1_epi8(hash);
248     return BitMask<uint32_t, kWidth>(
249         _mm_movemask_epi8(_mm_cmpeq_epi8(match, ctrl)));
250   }
251 
252   // Returns a bitmask representing the positions of empty slots.
MatchEmptyGroupSse2Impl253   BitMask<uint32_t, kWidth> MatchEmpty() const {
254 #if V8_SWISS_TABLE_HAVE_SSSE3_HOST
255     // This only works because kEmpty is -128.
256     return BitMask<uint32_t, kWidth>(
257         _mm_movemask_epi8(_mm_sign_epi8(ctrl, ctrl)));
258 #else
259     return Match(static_cast<h2_t>(kEmpty));
260 #endif
261   }
262 
263   // Returns a bitmask representing the positions of empty or deleted slots.
MatchEmptyOrDeletedGroupSse2Impl264   BitMask<uint32_t, kWidth> MatchEmptyOrDeleted() const {
265     auto special = _mm_set1_epi8(kSentinel);
266     return BitMask<uint32_t, kWidth>(
267         _mm_movemask_epi8(_mm_cmpgt_epi8_fixed(special, ctrl)));
268   }
269 
270   // Returns the number of trailing empty or deleted elements in the group.
CountLeadingEmptyOrDeletedGroupSse2Impl271   uint32_t CountLeadingEmptyOrDeleted() const {
272     auto special = _mm_set1_epi8(kSentinel);
273     return base::bits::CountTrailingZerosNonZero(
274         _mm_movemask_epi8(_mm_cmpgt_epi8_fixed(special, ctrl)) + 1);
275   }
276 
ConvertSpecialToEmptyAndFullToDeletedGroupSse2Impl277   void ConvertSpecialToEmptyAndFullToDeleted(ctrl_t* dst) const {
278     auto msbs = _mm_set1_epi8(static_cast<char>(-128));
279     auto x126 = _mm_set1_epi8(126);
280 #if V8_SWISS_TABLE_HAVE_SSSE3_HOST
281     auto res = _mm_or_si128(_mm_shuffle_epi8(x126, ctrl), msbs);
282 #else
283     auto zero = _mm_setzero_si128();
284     auto special_mask = _mm_cmpgt_epi8_fixed(zero, ctrl);
285     auto res = _mm_or_si128(msbs, _mm_andnot_si128(special_mask, x126));
286 #endif
287     _mm_storeu_si128(reinterpret_cast<__m128i*>(dst), res);
288   }
289 
290   __m128i ctrl;
291 };
292 #endif  // V8_SWISS_TABLE_HAVE_SSE2_HOST
293 
294 // A portable, inefficient version of GroupSse2Impl. This exists so SSE2-less
295 // hosts can generate snapshots for SSE2-capable targets.
296 struct GroupSse2Polyfill {
297   static constexpr size_t kWidth = 16;  // the number of slots per group
298 
GroupSse2PolyfillGroupSse2Polyfill299   explicit GroupSse2Polyfill(const ctrl_t* pos) { memcpy(ctrl_, pos, kWidth); }
300 
301   // Returns a bitmask representing the positions of slots that match |hash|.
MatchGroupSse2Polyfill302   BitMask<uint32_t, kWidth> Match(h2_t hash) const {
303     uint32_t mask = 0;
304     for (size_t i = 0; i < kWidth; i++) {
305       if (static_cast<h2_t>(ctrl_[i]) == hash) {
306         mask |= 1u << i;
307       }
308     }
309     return BitMask<uint32_t, kWidth>(mask);
310   }
311 
312   // Returns a bitmask representing the positions of empty slots.
MatchEmptyGroupSse2Polyfill313   BitMask<uint32_t, kWidth> MatchEmpty() const {
314     return Match(static_cast<h2_t>(kEmpty));
315   }
316 
317   // Returns a bitmask representing the positions of empty or deleted slots.
MatchEmptyOrDeletedGroupSse2Polyfill318   BitMask<uint32_t, kWidth> MatchEmptyOrDeleted() const {
319     return BitMask<uint32_t, kWidth>(MatchEmptyOrDeletedMask());
320   }
321 
322   // Returns the number of trailing empty or deleted elements in the group.
CountLeadingEmptyOrDeletedGroupSse2Polyfill323   uint32_t CountLeadingEmptyOrDeleted() const {
324     return base::bits::CountTrailingZerosNonZero(MatchEmptyOrDeletedMask() + 1);
325   }
326 
ConvertSpecialToEmptyAndFullToDeletedGroupSse2Polyfill327   void ConvertSpecialToEmptyAndFullToDeleted(ctrl_t* dst) const {
328     for (size_t i = 0; i < kWidth; i++) {
329       if (ctrl_[i] < 0) {
330         dst[i] = kEmpty;
331       } else {
332         dst[i] = kDeleted;
333       }
334     }
335   }
336 
337  private:
MatchEmptyOrDeletedMaskGroupSse2Polyfill338   uint32_t MatchEmptyOrDeletedMask() const {
339     uint32_t mask = 0;
340     for (size_t i = 0; i < kWidth; i++) {
341       if (ctrl_[i] < kSentinel) {
342         mask |= 1u << i;
343       }
344     }
345     return mask;
346   }
347 
348   ctrl_t ctrl_[kWidth];
349 };
350 
351 struct GroupPortableImpl {
352   static constexpr size_t kWidth = 8;  // the number of slots per group
353 
GroupPortableImplGroupPortableImpl354   explicit GroupPortableImpl(const ctrl_t* pos)
355       : ctrl(base::ReadLittleEndianValue<uint64_t>(
356             reinterpret_cast<uintptr_t>(const_cast<ctrl_t*>(pos)))) {}
357 
358   static constexpr uint64_t kMsbs = 0x8080808080808080ULL;
359   static constexpr uint64_t kLsbs = 0x0101010101010101ULL;
360 
361   // Returns a bitmask representing the positions of slots that match |hash|.
MatchGroupPortableImpl362   BitMask<uint64_t, kWidth, 3> Match(h2_t hash) const {
363     // For the technique, see:
364     // http://graphics.stanford.edu/~seander/bithacks.html##ValueInWord
365     // (Determine if a word has a byte equal to n).
366     //
367     // Caveat: there are false positives but:
368     // - they only occur if |hash| actually appears elsewhere in |ctrl|
369     // - they never occur on kEmpty, kDeleted, kSentinel
370     // - they will be handled gracefully by subsequent checks in code
371     //
372     // Example:
373     //   v = 0x1716151413121110
374     //   hash = 0x12
375     //   retval = (v - lsbs) & ~v & msbs = 0x0000000080800000
376     auto x = ctrl ^ (kLsbs * hash);
377     return BitMask<uint64_t, kWidth, 3>((x - kLsbs) & ~x & kMsbs);
378   }
379 
380   // Returns a bitmask representing the positions of empty slots.
MatchEmptyGroupPortableImpl381   BitMask<uint64_t, kWidth, 3> MatchEmpty() const {
382     return BitMask<uint64_t, kWidth, 3>((ctrl & (~ctrl << 6)) & kMsbs);
383   }
384 
385   // Returns a bitmask representing the positions of empty or deleted slots.
MatchEmptyOrDeletedGroupPortableImpl386   BitMask<uint64_t, kWidth, 3> MatchEmptyOrDeleted() const {
387     return BitMask<uint64_t, kWidth, 3>((ctrl & (~ctrl << 7)) & kMsbs);
388   }
389 
390   // Returns the number of trailing empty or deleted elements in the group.
CountLeadingEmptyOrDeletedGroupPortableImpl391   uint32_t CountLeadingEmptyOrDeleted() const {
392     constexpr uint64_t gaps = 0x00FEFEFEFEFEFEFEULL;
393     return (base::bits::CountTrailingZerosNonZero(
394                 ((~ctrl & (ctrl >> 7)) | gaps) + 1) +
395             7) >>
396            3;
397   }
398 
ConvertSpecialToEmptyAndFullToDeletedGroupPortableImpl399   void ConvertSpecialToEmptyAndFullToDeleted(ctrl_t* dst) const {
400     auto x = ctrl & kMsbs;
401     auto res = (~x + (x >> 7)) & ~kLsbs;
402     base::WriteLittleEndianValue(reinterpret_cast<uint64_t*>(dst), res);
403   }
404 
405   uint64_t ctrl;
406 };
407 
408 // Determine which Group implementation SwissNameDictionary uses.
409 #if defined(V8_ENABLE_SWISS_NAME_DICTIONARY) && DEBUG
410 // TODO(v8:11388) If v8_enable_swiss_name_dictionary is enabled, we are supposed
411 // to use SwissNameDictionary as the dictionary backing store. If we want to use
412 // the SIMD version of SwissNameDictionary, that would require us to compile SSE
413 // instructions into the snapshot that exceed the minimum requirements for V8
414 // SSE support. Therefore, this fails a DCHECK. However, given the experimental
415 // nature of v8_enable_swiss_name_dictionary mode, we only except this to be run
416 // by developers/bots, that always have the necessary instructions. This means
417 // that if v8_enable_swiss_name_dictionary is enabled and debug mode isn't, we
418 // ignore the DCHECK that would fail in debug mode. However, if both
419 // v8_enable_swiss_name_dictionary and debug mode are enabled, we must fallback
420 // to the non-SSE implementation. Given that V8 requires SSE2, there should be a
421 // solution that doesn't require the workaround present here. Instead, the
422 // backend should only use SSE2 when compiling the SIMD version of
423 // SwissNameDictionary into the builtin.
424 using Group = GroupPortableImpl;
425 #elif V8_SWISS_TABLE_HAVE_SSE2_TARGET
426 // Use a matching group size between host and target.
427 #if V8_SWISS_TABLE_HAVE_SSE2_HOST
428 using Group = GroupSse2Impl;
429 #else
430 #if V8_HOST_ARCH_IA32 || V8_HOST_ARCH_X64
431 // If we do not detect SSE2 when building for the ia32/x64 target, the
432 // V8_SWISS_TABLE_HAVE_SSE2_TARGET logic will incorrectly cause the final output
433 // to use the inefficient polyfill implementation. Detect this case and warn if
434 // it happens.
435 #warning "Did not detect required SSE2 support on ia32/x64."
436 #endif
437 using Group = GroupSse2Polyfill;
438 #endif
439 #else
440 using Group = GroupPortableImpl;
441 #endif
442 
443 }  // namespace swiss_table
444 }  // namespace internal
445 }  // namespace v8
446 
447 #endif  // V8_OBJECTS_SWISS_HASH_TABLE_HELPERS_H_
448