• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use super::super::{BitMask, Tag};
2 use core::mem;
3 use core::num::NonZeroU16;
4 
5 #[cfg(target_arch = "x86")]
6 use core::arch::x86;
7 #[cfg(target_arch = "x86_64")]
8 use core::arch::x86_64 as x86;
9 
10 pub(crate) type BitMaskWord = u16;
11 pub(crate) type NonZeroBitMaskWord = NonZeroU16;
12 pub(crate) const BITMASK_STRIDE: usize = 1;
13 pub(crate) const BITMASK_MASK: BitMaskWord = 0xffff;
14 pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0;
15 
16 /// Abstraction over a group of control tags which can be scanned in
17 /// parallel.
18 ///
19 /// This implementation uses a 128-bit SSE value.
20 #[derive(Copy, Clone)]
21 pub(crate) struct Group(x86::__m128i);
22 
23 // FIXME: https://github.com/rust-lang/rust-clippy/issues/3859
24 #[allow(clippy::use_self)]
25 impl Group {
26     /// Number of bytes in the group.
27     pub(crate) const WIDTH: usize = mem::size_of::<Self>();
28 
29     /// Returns a full group of empty tags, suitable for use as the initial
30     /// value for an empty hash table.
31     ///
32     /// This is guaranteed to be aligned to the group size.
33     #[inline]
34     #[allow(clippy::items_after_statements)]
static_empty() -> &'static [Tag; Group::WIDTH]35     pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] {
36         #[repr(C)]
37         struct AlignedTags {
38             _align: [Group; 0],
39             tags: [Tag; Group::WIDTH],
40         }
41         const ALIGNED_TAGS: AlignedTags = AlignedTags {
42             _align: [],
43             tags: [Tag::EMPTY; Group::WIDTH],
44         };
45         &ALIGNED_TAGS.tags
46     }
47 
48     /// Loads a group of tags starting at the given address.
49     #[inline]
50     #[allow(clippy::cast_ptr_alignment)] // unaligned load
load(ptr: *const Tag) -> Self51     pub(crate) unsafe fn load(ptr: *const Tag) -> Self {
52         Group(x86::_mm_loadu_si128(ptr.cast()))
53     }
54 
55     /// Loads a group of tags starting at the given address, which must be
56     /// aligned to `mem::align_of::<Group>()`.
57     #[inline]
58     #[allow(clippy::cast_ptr_alignment)]
load_aligned(ptr: *const Tag) -> Self59     pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self {
60         debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
61         Group(x86::_mm_load_si128(ptr.cast()))
62     }
63 
64     /// Stores the group of tags to the given address, which must be
65     /// aligned to `mem::align_of::<Group>()`.
66     #[inline]
67     #[allow(clippy::cast_ptr_alignment)]
store_aligned(self, ptr: *mut Tag)68     pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) {
69         debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
70         x86::_mm_store_si128(ptr.cast(), self.0);
71     }
72 
73     /// Returns a `BitMask` indicating all tags in the group which have
74     /// the given value.
75     #[inline]
match_tag(self, tag: Tag) -> BitMask76     pub(crate) fn match_tag(self, tag: Tag) -> BitMask {
77         #[allow(
78             clippy::cast_possible_wrap, // tag.0: Tag as i8
79             // tag: i32 as u16
80             //   note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the
81             //   upper 16-bits of the i32 are zeroed:
82             clippy::cast_sign_loss,
83             clippy::cast_possible_truncation
84         )]
85         unsafe {
86             let cmp = x86::_mm_cmpeq_epi8(self.0, x86::_mm_set1_epi8(tag.0 as i8));
87             BitMask(x86::_mm_movemask_epi8(cmp) as u16)
88         }
89     }
90 
91     /// Returns a `BitMask` indicating all tags in the group which are
92     /// `EMPTY`.
93     #[inline]
match_empty(self) -> BitMask94     pub(crate) fn match_empty(self) -> BitMask {
95         self.match_tag(Tag::EMPTY)
96     }
97 
98     /// Returns a `BitMask` indicating all tags in the group which are
99     /// `EMPTY` or `DELETED`.
100     #[inline]
match_empty_or_deleted(self) -> BitMask101     pub(crate) fn match_empty_or_deleted(self) -> BitMask {
102         #[allow(
103             // tag: i32 as u16
104             //   note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the
105             //   upper 16-bits of the i32 are zeroed:
106             clippy::cast_sign_loss,
107             clippy::cast_possible_truncation
108         )]
109         unsafe {
110             // A tag is EMPTY or DELETED iff the high bit is set
111             BitMask(x86::_mm_movemask_epi8(self.0) as u16)
112         }
113     }
114 
115     /// Returns a `BitMask` indicating all tags in the group which are full.
116     #[inline]
match_full(&self) -> BitMask117     pub(crate) fn match_full(&self) -> BitMask {
118         self.match_empty_or_deleted().invert()
119     }
120 
121     /// Performs the following transformation on all tags in the group:
122     /// - `EMPTY => EMPTY`
123     /// - `DELETED => EMPTY`
124     /// - `FULL => DELETED`
125     #[inline]
convert_special_to_empty_and_full_to_deleted(self) -> Self126     pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self {
127         // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111
128         // and high_bit = 0 (FULL) to 1000_0000
129         //
130         // Here's this logic expanded to concrete values:
131         //   let special = 0 > tag = 1111_1111 (true) or 0000_0000 (false)
132         //   1111_1111 | 1000_0000 = 1111_1111
133         //   0000_0000 | 1000_0000 = 1000_0000
134         #[allow(
135             clippy::cast_possible_wrap, // tag: Tag::DELETED.0 as i8
136         )]
137         unsafe {
138             let zero = x86::_mm_setzero_si128();
139             let special = x86::_mm_cmpgt_epi8(zero, self.0);
140             Group(x86::_mm_or_si128(
141                 special,
142                 x86::_mm_set1_epi8(Tag::DELETED.0 as i8),
143             ))
144         }
145     }
146 }
147