• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #![allow(non_camel_case_types)]
2 use core::ops::{Add, AddAssign, BitAnd, BitOr, BitXor, BitXorAssign, Not};
3 
4 pub trait AndNot {
5     type Output;
andnot(self, rhs: Self) -> Self::Output6     fn andnot(self, rhs: Self) -> Self::Output;
7 }
8 pub trait BSwap {
bswap(self) -> Self9     fn bswap(self) -> Self;
10 }
11 /// Ops that depend on word size
12 pub trait ArithOps: Add<Output = Self> + AddAssign + Sized + Copy + Clone + BSwap {}
13 /// Ops that are independent of word size and endian
14 pub trait BitOps0:
15     BitAnd<Output = Self>
16     + BitOr<Output = Self>
17     + BitXor<Output = Self>
18     + BitXorAssign
19     + Not<Output = Self>
20     + AndNot<Output = Self>
21     + Sized
22     + Copy
23     + Clone
24 {
25 }
26 
27 pub trait BitOps32: BitOps0 + RotateEachWord32 {}
28 pub trait BitOps64: BitOps32 + RotateEachWord64 {}
29 pub trait BitOps128: BitOps64 + RotateEachWord128 {}
30 
31 pub trait RotateEachWord32 {
rotate_each_word_right7(self) -> Self32     fn rotate_each_word_right7(self) -> Self;
rotate_each_word_right8(self) -> Self33     fn rotate_each_word_right8(self) -> Self;
rotate_each_word_right11(self) -> Self34     fn rotate_each_word_right11(self) -> Self;
rotate_each_word_right12(self) -> Self35     fn rotate_each_word_right12(self) -> Self;
rotate_each_word_right16(self) -> Self36     fn rotate_each_word_right16(self) -> Self;
rotate_each_word_right20(self) -> Self37     fn rotate_each_word_right20(self) -> Self;
rotate_each_word_right24(self) -> Self38     fn rotate_each_word_right24(self) -> Self;
rotate_each_word_right25(self) -> Self39     fn rotate_each_word_right25(self) -> Self;
40 }
41 
42 pub trait RotateEachWord64 {
rotate_each_word_right32(self) -> Self43     fn rotate_each_word_right32(self) -> Self;
44 }
45 
46 pub trait RotateEachWord128 {}
47 
48 // Vector type naming scheme:
49 // uN[xP]xL
50 // Unsigned; N-bit words * P bits per lane * L lanes
51 //
52 // A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of
53 // wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and
54 // slow inter-lane operations.
55 
56 use crate::arch::{vec128_storage, vec256_storage, vec512_storage};
57 
58 #[allow(clippy::missing_safety_doc)]
59 pub trait UnsafeFrom<T> {
unsafe_from(t: T) -> Self60     unsafe fn unsafe_from(t: T) -> Self;
61 }
62 
63 /// A vector composed of two elements, which may be words or themselves vectors.
64 pub trait Vec2<W> {
extract(self, i: u32) -> W65     fn extract(self, i: u32) -> W;
insert(self, w: W, i: u32) -> Self66     fn insert(self, w: W, i: u32) -> Self;
67 }
68 
69 /// A vector composed of four elements, which may be words or themselves vectors.
70 pub trait Vec4<W> {
extract(self, i: u32) -> W71     fn extract(self, i: u32) -> W;
insert(self, w: W, i: u32) -> Self72     fn insert(self, w: W, i: u32) -> Self;
73 }
74 /// Vec4 functions which may not be implemented yet for all Vec4 types.
75 /// NOTE: functions in this trait may be moved to Vec4 in any patch release. To avoid breakage,
76 /// import Vec4Ext only together with Vec4, and don't qualify its methods.
77 pub trait Vec4Ext<W> {
transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self) where Self: Sized78     fn transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self)
79     where
80         Self: Sized;
81 }
82 pub trait Vector<T> {
to_scalars(self) -> T83     fn to_scalars(self) -> T;
84 }
85 
86 // TODO: multiples of 4 should inherit this
87 /// A vector composed of four words; depending on their size, operations may cross lanes.
88 pub trait Words4 {
shuffle1230(self) -> Self89     fn shuffle1230(self) -> Self;
shuffle2301(self) -> Self90     fn shuffle2301(self) -> Self;
shuffle3012(self) -> Self91     fn shuffle3012(self) -> Self;
92 }
93 
94 /// A vector composed one or more lanes each composed of four words.
95 pub trait LaneWords4 {
shuffle_lane_words1230(self) -> Self96     fn shuffle_lane_words1230(self) -> Self;
shuffle_lane_words2301(self) -> Self97     fn shuffle_lane_words2301(self) -> Self;
shuffle_lane_words3012(self) -> Self98     fn shuffle_lane_words3012(self) -> Self;
99 }
100 
101 // TODO: make this a part of BitOps
102 /// Exchange neigboring ranges of bits of the specified size
103 pub trait Swap64 {
swap1(self) -> Self104     fn swap1(self) -> Self;
swap2(self) -> Self105     fn swap2(self) -> Self;
swap4(self) -> Self106     fn swap4(self) -> Self;
swap8(self) -> Self107     fn swap8(self) -> Self;
swap16(self) -> Self108     fn swap16(self) -> Self;
swap32(self) -> Self109     fn swap32(self) -> Self;
swap64(self) -> Self110     fn swap64(self) -> Self;
111 }
112 
113 pub trait u32x4<M: Machine>:
114     BitOps32
115     + Store<vec128_storage>
116     + ArithOps
117     + Vec4<u32>
118     + Words4
119     + LaneWords4
120     + StoreBytes
121     + MultiLane<[u32; 4]>
122     + Into<vec128_storage>
123 {
124 }
125 pub trait u64x2<M: Machine>:
126     BitOps64 + Store<vec128_storage> + ArithOps + Vec2<u64> + MultiLane<[u64; 2]> + Into<vec128_storage>
127 {
128 }
129 pub trait u128x1<M: Machine>:
130     BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage>
131 {
132 }
133 
134 pub trait u32x4x2<M: Machine>:
135     BitOps32
136     + Store<vec256_storage>
137     + Vec2<M::u32x4>
138     + MultiLane<[M::u32x4; 2]>
139     + ArithOps
140     + Into<vec256_storage>
141     + StoreBytes
142 {
143 }
144 pub trait u64x2x2<M: Machine>:
145     BitOps64
146     + Store<vec256_storage>
147     + Vec2<M::u64x2>
148     + MultiLane<[M::u64x2; 2]>
149     + ArithOps
150     + StoreBytes
151     + Into<vec256_storage>
152 {
153 }
154 pub trait u64x4<M: Machine>:
155     BitOps64
156     + Store<vec256_storage>
157     + Vec4<u64>
158     + MultiLane<[u64; 4]>
159     + ArithOps
160     + Words4
161     + StoreBytes
162     + Into<vec256_storage>
163 {
164 }
165 pub trait u128x2<M: Machine>:
166     BitOps128
167     + Store<vec256_storage>
168     + Vec2<M::u128x1>
169     + MultiLane<[M::u128x1; 2]>
170     + Swap64
171     + Into<vec256_storage>
172 {
173 }
174 
175 pub trait u32x4x4<M: Machine>:
176     BitOps32
177     + Store<vec512_storage>
178     + Vec4<M::u32x4>
179     + Vec4Ext<M::u32x4>
180     + Vector<[u32; 16]>
181     + MultiLane<[M::u32x4; 4]>
182     + ArithOps
183     + LaneWords4
184     + Into<vec512_storage>
185     + StoreBytes
186 {
187 }
188 pub trait u64x2x4<M: Machine>:
189     BitOps64
190     + Store<vec512_storage>
191     + Vec4<M::u64x2>
192     + MultiLane<[M::u64x2; 4]>
193     + ArithOps
194     + Into<vec512_storage>
195 {
196 }
197 // TODO: Words4
198 pub trait u128x4<M: Machine>:
199     BitOps128
200     + Store<vec512_storage>
201     + Vec4<M::u128x1>
202     + MultiLane<[M::u128x1; 4]>
203     + Swap64
204     + Into<vec512_storage>
205 {
206 }
207 
208 /// A vector composed of multiple 128-bit lanes.
209 pub trait MultiLane<Lanes> {
210     /// Split a multi-lane vector into single-lane vectors.
to_lanes(self) -> Lanes211     fn to_lanes(self) -> Lanes;
212     /// Build a multi-lane vector from individual lanes.
from_lanes(lanes: Lanes) -> Self213     fn from_lanes(lanes: Lanes) -> Self;
214 }
215 
216 /// Combine single vectors into a multi-lane vector.
217 pub trait VZip<V> {
vzip(self) -> V218     fn vzip(self) -> V;
219 }
220 
221 impl<V, T> VZip<V> for T
222 where
223     V: MultiLane<T>,
224 {
225     #[inline(always)]
vzip(self) -> V226     fn vzip(self) -> V {
227         V::from_lanes(self)
228     }
229 }
230 
231 pub trait Machine: Sized + Copy {
232     type u32x4: u32x4<Self>;
233     type u64x2: u64x2<Self>;
234     type u128x1: u128x1<Self>;
235 
236     type u32x4x2: u32x4x2<Self>;
237     type u64x2x2: u64x2x2<Self>;
238     type u64x4: u64x4<Self>;
239     type u128x2: u128x2<Self>;
240 
241     type u32x4x4: u32x4x4<Self>;
242     type u64x2x4: u64x2x4<Self>;
243     type u128x4: u128x4<Self>;
244 
245     #[inline(always)]
unpack<S, V: Store<S>>(self, s: S) -> V246     fn unpack<S, V: Store<S>>(self, s: S) -> V {
247         unsafe { V::unpack(s) }
248     }
249 
250     #[inline(always)]
vec<V, A>(self, a: A) -> V where V: MultiLane<A>,251     fn vec<V, A>(self, a: A) -> V
252     where
253         V: MultiLane<A>,
254     {
255         V::from_lanes(a)
256     }
257 
258     #[inline(always)]
read_le<V>(self, input: &[u8]) -> V where V: StoreBytes,259     fn read_le<V>(self, input: &[u8]) -> V
260     where
261         V: StoreBytes,
262     {
263         unsafe { V::unsafe_read_le(input) }
264     }
265 
266     #[inline(always)]
read_be<V>(self, input: &[u8]) -> V where V: StoreBytes,267     fn read_be<V>(self, input: &[u8]) -> V
268     where
269         V: StoreBytes,
270     {
271         unsafe { V::unsafe_read_be(input) }
272     }
273 
274     /// # Safety
275     /// Caller must ensure the type of Self is appropriate for the hardware of the execution
276     /// environment.
instance() -> Self277     unsafe fn instance() -> Self;
278 }
279 
280 pub trait Store<S> {
281     /// # Safety
282     /// Caller must ensure the type of Self is appropriate for the hardware of the execution
283     /// environment.
unpack(p: S) -> Self284     unsafe fn unpack(p: S) -> Self;
285 }
286 
287 pub trait StoreBytes {
288     /// # Safety
289     /// Caller must ensure the type of Self is appropriate for the hardware of the execution
290     /// environment.
unsafe_read_le(input: &[u8]) -> Self291     unsafe fn unsafe_read_le(input: &[u8]) -> Self;
292     /// # Safety
293     /// Caller must ensure the type of Self is appropriate for the hardware of the execution
294     /// environment.
unsafe_read_be(input: &[u8]) -> Self295     unsafe fn unsafe_read_be(input: &[u8]) -> Self;
write_le(self, out: &mut [u8])296     fn write_le(self, out: &mut [u8]);
write_be(self, out: &mut [u8])297     fn write_be(self, out: &mut [u8]);
298 }
299