• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #![allow(non_camel_case_types)]
2 
3 use crate::soft::{x2, x4};
4 use crate::types::*;
5 use core::ops::*;
6 
7 #[repr(C)]
8 #[derive(Clone, Copy)]
9 pub union vec128_storage {
10     d: [u32; 4],
11     q: [u64; 2],
12 }
13 impl From<[u32; 4]> for vec128_storage {
14     #[inline(always)]
from(d: [u32; 4]) -> Self15     fn from(d: [u32; 4]) -> Self {
16         Self { d }
17     }
18 }
19 impl From<vec128_storage> for [u32; 4] {
20     #[inline(always)]
from(d: vec128_storage) -> Self21     fn from(d: vec128_storage) -> Self {
22         unsafe { d.d }
23     }
24 }
25 impl From<[u64; 2]> for vec128_storage {
26     #[inline(always)]
from(q: [u64; 2]) -> Self27     fn from(q: [u64; 2]) -> Self {
28         Self { q }
29     }
30 }
31 impl From<vec128_storage> for [u64; 2] {
32     #[inline(always)]
from(q: vec128_storage) -> Self33     fn from(q: vec128_storage) -> Self {
34         unsafe { q.q }
35     }
36 }
37 impl Default for vec128_storage {
38     #[inline(always)]
default() -> Self39     fn default() -> Self {
40         Self { q: [0, 0] }
41     }
42 }
43 impl Eq for vec128_storage {}
44 impl PartialEq<vec128_storage> for vec128_storage {
45     #[inline(always)]
eq(&self, rhs: &Self) -> bool46     fn eq(&self, rhs: &Self) -> bool {
47         unsafe { self.q == rhs.q }
48     }
49 }
50 #[derive(Clone, Copy, PartialEq, Eq, Default)]
51 pub struct vec256_storage {
52     v128: [vec128_storage; 2],
53 }
54 impl vec256_storage {
55     #[inline(always)]
new128(v128: [vec128_storage; 2]) -> Self56     pub fn new128(v128: [vec128_storage; 2]) -> Self {
57         Self { v128 }
58     }
59     #[inline(always)]
split128(self) -> [vec128_storage; 2]60     pub fn split128(self) -> [vec128_storage; 2] {
61         self.v128
62     }
63 }
64 impl From<vec256_storage> for [u64; 4] {
65     #[inline(always)]
from(q: vec256_storage) -> Self66     fn from(q: vec256_storage) -> Self {
67         let [a, b]: [u64; 2] = q.v128[0].into();
68         let [c, d]: [u64; 2] = q.v128[1].into();
69         [a, b, c, d]
70     }
71 }
72 impl From<[u64; 4]> for vec256_storage {
73     #[inline(always)]
from([a, b, c, d]: [u64; 4]) -> Self74     fn from([a, b, c, d]: [u64; 4]) -> Self {
75         Self {
76             v128: [[a, b].into(), [c, d].into()],
77         }
78     }
79 }
80 #[derive(Clone, Copy, PartialEq, Eq, Default)]
81 pub struct vec512_storage {
82     v128: [vec128_storage; 4],
83 }
84 impl vec512_storage {
85     #[inline(always)]
new128(v128: [vec128_storage; 4]) -> Self86     pub fn new128(v128: [vec128_storage; 4]) -> Self {
87         Self { v128 }
88     }
89     #[inline(always)]
split128(self) -> [vec128_storage; 4]90     pub fn split128(self) -> [vec128_storage; 4] {
91         self.v128
92     }
93 }
94 
95 #[inline(always)]
dmap<T, F>(t: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u32) -> u32,96 fn dmap<T, F>(t: T, f: F) -> T
97 where
98     T: Store<vec128_storage> + Into<vec128_storage>,
99     F: Fn(u32) -> u32,
100 {
101     let t: vec128_storage = t.into();
102     let d = unsafe { t.d };
103     let d = vec128_storage {
104         d: [f(d[0]), f(d[1]), f(d[2]), f(d[3])],
105     };
106     unsafe { T::unpack(d) }
107 }
108 
dmap2<T, F>(a: T, b: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u32, u32) -> u32,109 fn dmap2<T, F>(a: T, b: T, f: F) -> T
110 where
111     T: Store<vec128_storage> + Into<vec128_storage>,
112     F: Fn(u32, u32) -> u32,
113 {
114     let a: vec128_storage = a.into();
115     let b: vec128_storage = b.into();
116     let ao = unsafe { a.d };
117     let bo = unsafe { b.d };
118     let d = vec128_storage {
119         d: [
120             f(ao[0], bo[0]),
121             f(ao[1], bo[1]),
122             f(ao[2], bo[2]),
123             f(ao[3], bo[3]),
124         ],
125     };
126     unsafe { T::unpack(d) }
127 }
128 
129 #[inline(always)]
qmap<T, F>(t: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u64) -> u64,130 fn qmap<T, F>(t: T, f: F) -> T
131 where
132     T: Store<vec128_storage> + Into<vec128_storage>,
133     F: Fn(u64) -> u64,
134 {
135     let t: vec128_storage = t.into();
136     let q = unsafe { t.q };
137     let q = vec128_storage {
138         q: [f(q[0]), f(q[1])],
139     };
140     unsafe { T::unpack(q) }
141 }
142 
143 #[inline(always)]
qmap2<T, F>(a: T, b: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u64, u64) -> u64,144 fn qmap2<T, F>(a: T, b: T, f: F) -> T
145 where
146     T: Store<vec128_storage> + Into<vec128_storage>,
147     F: Fn(u64, u64) -> u64,
148 {
149     let a: vec128_storage = a.into();
150     let b: vec128_storage = b.into();
151     let ao = unsafe { a.q };
152     let bo = unsafe { b.q };
153     let q = vec128_storage {
154         q: [f(ao[0], bo[0]), f(ao[1], bo[1])],
155     };
156     unsafe { T::unpack(q) }
157 }
158 
159 #[inline(always)]
o_of_q(q: [u64; 2]) -> u128160 fn o_of_q(q: [u64; 2]) -> u128 {
161     u128::from(q[0]) | (u128::from(q[1]) << 64)
162 }
163 
164 #[inline(always)]
q_of_o(o: u128) -> [u64; 2]165 fn q_of_o(o: u128) -> [u64; 2] {
166     [o as u64, (o >> 64) as u64]
167 }
168 
169 #[inline(always)]
omap<T, F>(a: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u128) -> u128,170 fn omap<T, F>(a: T, f: F) -> T
171 where
172     T: Store<vec128_storage> + Into<vec128_storage>,
173     F: Fn(u128) -> u128,
174 {
175     let a: vec128_storage = a.into();
176     let ao = o_of_q(unsafe { a.q });
177     let o = vec128_storage { q: q_of_o(f(ao)) };
178     unsafe { T::unpack(o) }
179 }
180 
181 #[inline(always)]
omap2<T, F>(a: T, b: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u128, u128) -> u128,182 fn omap2<T, F>(a: T, b: T, f: F) -> T
183 where
184     T: Store<vec128_storage> + Into<vec128_storage>,
185     F: Fn(u128, u128) -> u128,
186 {
187     let a: vec128_storage = a.into();
188     let b: vec128_storage = b.into();
189     let ao = o_of_q(unsafe { a.q });
190     let bo = o_of_q(unsafe { b.q });
191     let o = vec128_storage {
192         q: q_of_o(f(ao, bo)),
193     };
194     unsafe { T::unpack(o) }
195 }
196 
197 impl RotateEachWord128 for u128x1_generic {}
198 impl BitOps128 for u128x1_generic {}
199 impl BitOps64 for u128x1_generic {}
200 impl BitOps64 for u64x2_generic {}
201 impl BitOps32 for u128x1_generic {}
202 impl BitOps32 for u64x2_generic {}
203 impl BitOps32 for u32x4_generic {}
204 impl BitOps0 for u128x1_generic {}
205 impl BitOps0 for u64x2_generic {}
206 impl BitOps0 for u32x4_generic {}
207 
208 macro_rules! impl_bitops {
209     ($vec:ident) => {
210         impl Not for $vec {
211             type Output = Self;
212             #[inline(always)]
213             fn not(self) -> Self::Output {
214                 omap(self, |x| !x)
215             }
216         }
217         impl BitAnd for $vec {
218             type Output = Self;
219             #[inline(always)]
220             fn bitand(self, rhs: Self) -> Self::Output {
221                 omap2(self, rhs, |x, y| x & y)
222             }
223         }
224         impl BitOr for $vec {
225             type Output = Self;
226             #[inline(always)]
227             fn bitor(self, rhs: Self) -> Self::Output {
228                 omap2(self, rhs, |x, y| x | y)
229             }
230         }
231         impl BitXor for $vec {
232             type Output = Self;
233             #[inline(always)]
234             fn bitxor(self, rhs: Self) -> Self::Output {
235                 omap2(self, rhs, |x, y| x ^ y)
236             }
237         }
238         impl AndNot for $vec {
239             type Output = Self;
240             #[inline(always)]
241             fn andnot(self, rhs: Self) -> Self::Output {
242                 omap2(self, rhs, |x, y| !x & y)
243             }
244         }
245         impl BitAndAssign for $vec {
246             #[inline(always)]
247             fn bitand_assign(&mut self, rhs: Self) {
248                 *self = *self & rhs
249             }
250         }
251         impl BitOrAssign for $vec {
252             #[inline(always)]
253             fn bitor_assign(&mut self, rhs: Self) {
254                 *self = *self | rhs
255             }
256         }
257         impl BitXorAssign for $vec {
258             #[inline(always)]
259             fn bitxor_assign(&mut self, rhs: Self) {
260                 *self = *self ^ rhs
261             }
262         }
263 
264         impl Swap64 for $vec {
265             #[inline(always)]
266             fn swap1(self) -> Self {
267                 qmap(self, |x| {
268                     ((x & 0x5555555555555555) << 1) | ((x & 0xaaaaaaaaaaaaaaaa) >> 1)
269                 })
270             }
271             #[inline(always)]
272             fn swap2(self) -> Self {
273                 qmap(self, |x| {
274                     ((x & 0x3333333333333333) << 2) | ((x & 0xcccccccccccccccc) >> 2)
275                 })
276             }
277             #[inline(always)]
278             fn swap4(self) -> Self {
279                 qmap(self, |x| {
280                     ((x & 0x0f0f0f0f0f0f0f0f) << 4) | ((x & 0xf0f0f0f0f0f0f0f0) >> 4)
281                 })
282             }
283             #[inline(always)]
284             fn swap8(self) -> Self {
285                 qmap(self, |x| {
286                     ((x & 0x00ff00ff00ff00ff) << 8) | ((x & 0xff00ff00ff00ff00) >> 8)
287                 })
288             }
289             #[inline(always)]
290             fn swap16(self) -> Self {
291                 dmap(self, |x| x.rotate_left(16))
292             }
293             #[inline(always)]
294             fn swap32(self) -> Self {
295                 qmap(self, |x| x.rotate_left(32))
296             }
297             #[inline(always)]
298             fn swap64(self) -> Self {
299                 omap(self, |x| (x << 64) | (x >> 64))
300             }
301         }
302     };
303 }
304 impl_bitops!(u32x4_generic);
305 impl_bitops!(u64x2_generic);
306 impl_bitops!(u128x1_generic);
307 
308 impl RotateEachWord32 for u32x4_generic {
309     #[inline(always)]
rotate_each_word_right7(self) -> Self310     fn rotate_each_word_right7(self) -> Self {
311         dmap(self, |x| x.rotate_right(7))
312     }
313     #[inline(always)]
rotate_each_word_right8(self) -> Self314     fn rotate_each_word_right8(self) -> Self {
315         dmap(self, |x| x.rotate_right(8))
316     }
317     #[inline(always)]
rotate_each_word_right11(self) -> Self318     fn rotate_each_word_right11(self) -> Self {
319         dmap(self, |x| x.rotate_right(11))
320     }
321     #[inline(always)]
rotate_each_word_right12(self) -> Self322     fn rotate_each_word_right12(self) -> Self {
323         dmap(self, |x| x.rotate_right(12))
324     }
325     #[inline(always)]
rotate_each_word_right16(self) -> Self326     fn rotate_each_word_right16(self) -> Self {
327         dmap(self, |x| x.rotate_right(16))
328     }
329     #[inline(always)]
rotate_each_word_right20(self) -> Self330     fn rotate_each_word_right20(self) -> Self {
331         dmap(self, |x| x.rotate_right(20))
332     }
333     #[inline(always)]
rotate_each_word_right24(self) -> Self334     fn rotate_each_word_right24(self) -> Self {
335         dmap(self, |x| x.rotate_right(24))
336     }
337     #[inline(always)]
rotate_each_word_right25(self) -> Self338     fn rotate_each_word_right25(self) -> Self {
339         dmap(self, |x| x.rotate_right(25))
340     }
341 }
342 
343 impl RotateEachWord32 for u64x2_generic {
344     #[inline(always)]
rotate_each_word_right7(self) -> Self345     fn rotate_each_word_right7(self) -> Self {
346         qmap(self, |x| x.rotate_right(7))
347     }
348     #[inline(always)]
rotate_each_word_right8(self) -> Self349     fn rotate_each_word_right8(self) -> Self {
350         qmap(self, |x| x.rotate_right(8))
351     }
352     #[inline(always)]
rotate_each_word_right11(self) -> Self353     fn rotate_each_word_right11(self) -> Self {
354         qmap(self, |x| x.rotate_right(11))
355     }
356     #[inline(always)]
rotate_each_word_right12(self) -> Self357     fn rotate_each_word_right12(self) -> Self {
358         qmap(self, |x| x.rotate_right(12))
359     }
360     #[inline(always)]
rotate_each_word_right16(self) -> Self361     fn rotate_each_word_right16(self) -> Self {
362         qmap(self, |x| x.rotate_right(16))
363     }
364     #[inline(always)]
rotate_each_word_right20(self) -> Self365     fn rotate_each_word_right20(self) -> Self {
366         qmap(self, |x| x.rotate_right(20))
367     }
368     #[inline(always)]
rotate_each_word_right24(self) -> Self369     fn rotate_each_word_right24(self) -> Self {
370         qmap(self, |x| x.rotate_right(24))
371     }
372     #[inline(always)]
rotate_each_word_right25(self) -> Self373     fn rotate_each_word_right25(self) -> Self {
374         qmap(self, |x| x.rotate_right(25))
375     }
376 }
377 impl RotateEachWord64 for u64x2_generic {
378     #[inline(always)]
rotate_each_word_right32(self) -> Self379     fn rotate_each_word_right32(self) -> Self {
380         qmap(self, |x| x.rotate_right(32))
381     }
382 }
383 
384 // workaround for koute/cargo-web#52 (u128::rotate_* broken with cargo web)
385 #[inline(always)]
rotate_u128_right(x: u128, i: u32) -> u128386 fn rotate_u128_right(x: u128, i: u32) -> u128 {
387     (x >> i) | (x << (128 - i))
388 }
389 #[test]
test_rotate_u128()390 fn test_rotate_u128() {
391     const X: u128 = 0x0001_0203_0405_0607_0809_0a0b_0c0d_0e0f;
392     assert_eq!(rotate_u128_right(X, 17), X.rotate_right(17));
393 }
394 
395 impl RotateEachWord32 for u128x1_generic {
396     #[inline(always)]
rotate_each_word_right7(self) -> Self397     fn rotate_each_word_right7(self) -> Self {
398         Self([rotate_u128_right(self.0[0], 7)])
399     }
400     #[inline(always)]
rotate_each_word_right8(self) -> Self401     fn rotate_each_word_right8(self) -> Self {
402         Self([rotate_u128_right(self.0[0], 8)])
403     }
404     #[inline(always)]
rotate_each_word_right11(self) -> Self405     fn rotate_each_word_right11(self) -> Self {
406         Self([rotate_u128_right(self.0[0], 11)])
407     }
408     #[inline(always)]
rotate_each_word_right12(self) -> Self409     fn rotate_each_word_right12(self) -> Self {
410         Self([rotate_u128_right(self.0[0], 12)])
411     }
412     #[inline(always)]
rotate_each_word_right16(self) -> Self413     fn rotate_each_word_right16(self) -> Self {
414         Self([rotate_u128_right(self.0[0], 16)])
415     }
416     #[inline(always)]
rotate_each_word_right20(self) -> Self417     fn rotate_each_word_right20(self) -> Self {
418         Self([rotate_u128_right(self.0[0], 20)])
419     }
420     #[inline(always)]
rotate_each_word_right24(self) -> Self421     fn rotate_each_word_right24(self) -> Self {
422         Self([rotate_u128_right(self.0[0], 24)])
423     }
424     #[inline(always)]
rotate_each_word_right25(self) -> Self425     fn rotate_each_word_right25(self) -> Self {
426         Self([rotate_u128_right(self.0[0], 25)])
427     }
428 }
429 impl RotateEachWord64 for u128x1_generic {
430     #[inline(always)]
rotate_each_word_right32(self) -> Self431     fn rotate_each_word_right32(self) -> Self {
432         Self([rotate_u128_right(self.0[0], 32)])
433     }
434 }
435 
436 #[derive(Copy, Clone)]
437 pub struct GenericMachine;
438 impl Machine for GenericMachine {
439     type u32x4 = u32x4_generic;
440     type u64x2 = u64x2_generic;
441     type u128x1 = u128x1_generic;
442     type u32x4x2 = u32x4x2_generic;
443     type u64x2x2 = u64x2x2_generic;
444     type u64x4 = u64x4_generic;
445     type u128x2 = u128x2_generic;
446     type u32x4x4 = u32x4x4_generic;
447     type u64x2x4 = u64x2x4_generic;
448     type u128x4 = u128x4_generic;
449     #[inline(always)]
instance() -> Self450     unsafe fn instance() -> Self {
451         Self
452     }
453 }
454 
455 #[derive(Copy, Clone, Debug, PartialEq)]
456 pub struct u32x4_generic([u32; 4]);
457 #[derive(Copy, Clone, Debug, PartialEq)]
458 pub struct u64x2_generic([u64; 2]);
459 #[derive(Copy, Clone, Debug, PartialEq)]
460 pub struct u128x1_generic([u128; 1]);
461 
462 impl From<u32x4_generic> for vec128_storage {
463     #[inline(always)]
from(d: u32x4_generic) -> Self464     fn from(d: u32x4_generic) -> Self {
465         Self { d: d.0 }
466     }
467 }
468 impl From<u64x2_generic> for vec128_storage {
469     #[inline(always)]
from(q: u64x2_generic) -> Self470     fn from(q: u64x2_generic) -> Self {
471         Self { q: q.0 }
472     }
473 }
474 impl From<u128x1_generic> for vec128_storage {
475     #[inline(always)]
from(o: u128x1_generic) -> Self476     fn from(o: u128x1_generic) -> Self {
477         Self { q: q_of_o(o.0[0]) }
478     }
479 }
480 
481 impl Store<vec128_storage> for u32x4_generic {
482     #[inline(always)]
unpack(s: vec128_storage) -> Self483     unsafe fn unpack(s: vec128_storage) -> Self {
484         Self(s.d)
485     }
486 }
487 impl Store<vec128_storage> for u64x2_generic {
488     #[inline(always)]
unpack(s: vec128_storage) -> Self489     unsafe fn unpack(s: vec128_storage) -> Self {
490         Self(s.q)
491     }
492 }
493 impl Store<vec128_storage> for u128x1_generic {
494     #[inline(always)]
unpack(s: vec128_storage) -> Self495     unsafe fn unpack(s: vec128_storage) -> Self {
496         Self([o_of_q(s.q); 1])
497     }
498 }
499 
500 impl ArithOps for u32x4_generic {}
501 impl ArithOps for u64x2_generic {}
502 impl ArithOps for u128x1_generic {}
503 
504 impl Add for u32x4_generic {
505     type Output = Self;
506     #[inline(always)]
add(self, rhs: Self) -> Self::Output507     fn add(self, rhs: Self) -> Self::Output {
508         dmap2(self, rhs, |x, y| x.wrapping_add(y))
509     }
510 }
511 impl Add for u64x2_generic {
512     type Output = Self;
513     #[inline(always)]
add(self, rhs: Self) -> Self::Output514     fn add(self, rhs: Self) -> Self::Output {
515         qmap2(self, rhs, |x, y| x.wrapping_add(y))
516     }
517 }
518 impl Add for u128x1_generic {
519     type Output = Self;
520     #[inline(always)]
add(self, rhs: Self) -> Self::Output521     fn add(self, rhs: Self) -> Self::Output {
522         omap2(self, rhs, |x, y| x.wrapping_add(y))
523     }
524 }
525 impl AddAssign for u32x4_generic {
526     #[inline(always)]
add_assign(&mut self, rhs: Self)527     fn add_assign(&mut self, rhs: Self) {
528         *self = *self + rhs
529     }
530 }
531 impl AddAssign for u64x2_generic {
532     #[inline(always)]
add_assign(&mut self, rhs: Self)533     fn add_assign(&mut self, rhs: Self) {
534         *self = *self + rhs
535     }
536 }
537 impl AddAssign for u128x1_generic {
538     #[inline(always)]
add_assign(&mut self, rhs: Self)539     fn add_assign(&mut self, rhs: Self) {
540         *self = *self + rhs
541     }
542 }
543 impl BSwap for u32x4_generic {
544     #[inline(always)]
bswap(self) -> Self545     fn bswap(self) -> Self {
546         dmap(self, |x| x.swap_bytes())
547     }
548 }
549 impl BSwap for u64x2_generic {
550     #[inline(always)]
bswap(self) -> Self551     fn bswap(self) -> Self {
552         qmap(self, |x| x.swap_bytes())
553     }
554 }
555 impl BSwap for u128x1_generic {
556     #[inline(always)]
bswap(self) -> Self557     fn bswap(self) -> Self {
558         omap(self, |x| x.swap_bytes())
559     }
560 }
561 impl StoreBytes for u32x4_generic {
562     #[inline(always)]
unsafe_read_le(input: &[u8]) -> Self563     unsafe fn unsafe_read_le(input: &[u8]) -> Self {
564         assert_eq!(input.len(), 16);
565         let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
566         dmap(x, |x| x.to_le())
567     }
568     #[inline(always)]
unsafe_read_be(input: &[u8]) -> Self569     unsafe fn unsafe_read_be(input: &[u8]) -> Self {
570         assert_eq!(input.len(), 16);
571         let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
572         dmap(x, |x| x.to_be())
573     }
574     #[inline(always)]
write_le(self, out: &mut [u8])575     fn write_le(self, out: &mut [u8]) {
576         assert_eq!(out.len(), 16);
577         let x = dmap(self, |x| x.to_le());
578         unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
579     }
580     #[inline(always)]
write_be(self, out: &mut [u8])581     fn write_be(self, out: &mut [u8]) {
582         assert_eq!(out.len(), 16);
583         let x = dmap(self, |x| x.to_be());
584         unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
585     }
586 }
587 impl StoreBytes for u64x2_generic {
588     #[inline(always)]
unsafe_read_le(input: &[u8]) -> Self589     unsafe fn unsafe_read_le(input: &[u8]) -> Self {
590         assert_eq!(input.len(), 16);
591         let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
592         qmap(x, |x| x.to_le())
593     }
594     #[inline(always)]
unsafe_read_be(input: &[u8]) -> Self595     unsafe fn unsafe_read_be(input: &[u8]) -> Self {
596         assert_eq!(input.len(), 16);
597         let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
598         qmap(x, |x| x.to_be())
599     }
600     #[inline(always)]
write_le(self, out: &mut [u8])601     fn write_le(self, out: &mut [u8]) {
602         assert_eq!(out.len(), 16);
603         let x = qmap(self, |x| x.to_le());
604         unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
605     }
606     #[inline(always)]
write_be(self, out: &mut [u8])607     fn write_be(self, out: &mut [u8]) {
608         assert_eq!(out.len(), 16);
609         let x = qmap(self, |x| x.to_be());
610         unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
611     }
612 }
613 
614 #[derive(Copy, Clone)]
615 pub struct G0;
616 #[derive(Copy, Clone)]
617 pub struct G1;
618 pub type u32x4x2_generic = x2<u32x4_generic, G0>;
619 pub type u64x2x2_generic = x2<u64x2_generic, G0>;
620 pub type u64x4_generic = x2<u64x2_generic, G1>;
621 pub type u128x2_generic = x2<u128x1_generic, G0>;
622 pub type u32x4x4_generic = x4<u32x4_generic>;
623 pub type u64x2x4_generic = x4<u64x2_generic>;
624 pub type u128x4_generic = x4<u128x1_generic>;
625 
626 impl Vector<[u32; 16]> for u32x4x4_generic {
to_scalars(self) -> [u32; 16]627     fn to_scalars(self) -> [u32; 16] {
628         let [a, b, c, d] = self.0;
629         let a = a.0;
630         let b = b.0;
631         let c = c.0;
632         let d = d.0;
633         [
634             a[0], a[1], a[2], a[3], //
635             b[0], b[1], b[2], b[3], //
636             c[0], c[1], c[2], c[3], //
637             d[0], d[1], d[2], d[3], //
638         ]
639     }
640 }
641 
642 impl MultiLane<[u32; 4]> for u32x4_generic {
643     #[inline(always)]
to_lanes(self) -> [u32; 4]644     fn to_lanes(self) -> [u32; 4] {
645         self.0
646     }
647     #[inline(always)]
from_lanes(xs: [u32; 4]) -> Self648     fn from_lanes(xs: [u32; 4]) -> Self {
649         Self(xs)
650     }
651 }
652 impl MultiLane<[u64; 2]> for u64x2_generic {
653     #[inline(always)]
to_lanes(self) -> [u64; 2]654     fn to_lanes(self) -> [u64; 2] {
655         self.0
656     }
657     #[inline(always)]
from_lanes(xs: [u64; 2]) -> Self658     fn from_lanes(xs: [u64; 2]) -> Self {
659         Self(xs)
660     }
661 }
662 impl MultiLane<[u64; 4]> for u64x4_generic {
663     #[inline(always)]
to_lanes(self) -> [u64; 4]664     fn to_lanes(self) -> [u64; 4] {
665         let (a, b) = (self.0[0].to_lanes(), self.0[1].to_lanes());
666         [a[0], a[1], b[0], b[1]]
667     }
668     #[inline(always)]
from_lanes(xs: [u64; 4]) -> Self669     fn from_lanes(xs: [u64; 4]) -> Self {
670         let (a, b) = (
671             u64x2_generic::from_lanes([xs[0], xs[1]]),
672             u64x2_generic::from_lanes([xs[2], xs[3]]),
673         );
674         x2::new([a, b])
675     }
676 }
677 impl MultiLane<[u128; 1]> for u128x1_generic {
678     #[inline(always)]
to_lanes(self) -> [u128; 1]679     fn to_lanes(self) -> [u128; 1] {
680         self.0
681     }
682     #[inline(always)]
from_lanes(xs: [u128; 1]) -> Self683     fn from_lanes(xs: [u128; 1]) -> Self {
684         Self(xs)
685     }
686 }
687 impl Vec4<u32> for u32x4_generic {
688     #[inline(always)]
extract(self, i: u32) -> u32689     fn extract(self, i: u32) -> u32 {
690         self.0[i as usize]
691     }
692     #[inline(always)]
insert(mut self, v: u32, i: u32) -> Self693     fn insert(mut self, v: u32, i: u32) -> Self {
694         self.0[i as usize] = v;
695         self
696     }
697 }
698 impl Vec4<u64> for u64x4_generic {
699     #[inline(always)]
extract(self, i: u32) -> u64700     fn extract(self, i: u32) -> u64 {
701         let d: [u64; 4] = self.to_lanes();
702         d[i as usize]
703     }
704     #[inline(always)]
insert(self, v: u64, i: u32) -> Self705     fn insert(self, v: u64, i: u32) -> Self {
706         self.0[(i / 2) as usize].insert(v, i % 2);
707         self
708     }
709 }
710 impl Vec2<u64> for u64x2_generic {
711     #[inline(always)]
extract(self, i: u32) -> u64712     fn extract(self, i: u32) -> u64 {
713         self.0[i as usize]
714     }
715     #[inline(always)]
insert(mut self, v: u64, i: u32) -> Self716     fn insert(mut self, v: u64, i: u32) -> Self {
717         self.0[i as usize] = v;
718         self
719     }
720 }
721 
722 impl Words4 for u32x4_generic {
723     #[inline(always)]
shuffle2301(self) -> Self724     fn shuffle2301(self) -> Self {
725         self.swap64()
726     }
727     #[inline(always)]
shuffle1230(self) -> Self728     fn shuffle1230(self) -> Self {
729         let x = self.0;
730         Self([x[3], x[0], x[1], x[2]])
731     }
732     #[inline(always)]
shuffle3012(self) -> Self733     fn shuffle3012(self) -> Self {
734         let x = self.0;
735         Self([x[1], x[2], x[3], x[0]])
736     }
737 }
738 impl LaneWords4 for u32x4_generic {
739     #[inline(always)]
shuffle_lane_words2301(self) -> Self740     fn shuffle_lane_words2301(self) -> Self {
741         self.shuffle2301()
742     }
743     #[inline(always)]
shuffle_lane_words1230(self) -> Self744     fn shuffle_lane_words1230(self) -> Self {
745         self.shuffle1230()
746     }
747     #[inline(always)]
shuffle_lane_words3012(self) -> Self748     fn shuffle_lane_words3012(self) -> Self {
749         self.shuffle3012()
750     }
751 }
752 
753 impl Words4 for u64x4_generic {
754     #[inline(always)]
shuffle2301(self) -> Self755     fn shuffle2301(self) -> Self {
756         x2::new([self.0[1], self.0[0]])
757     }
758     #[inline(always)]
shuffle1230(self) -> Self759     fn shuffle1230(self) -> Self {
760         unimplemented!()
761     }
762     #[inline(always)]
shuffle3012(self) -> Self763     fn shuffle3012(self) -> Self {
764         unimplemented!()
765     }
766 }
767 
768 impl u32x4<GenericMachine> for u32x4_generic {}
769 impl u64x2<GenericMachine> for u64x2_generic {}
770 impl u128x1<GenericMachine> for u128x1_generic {}
771 impl u32x4x2<GenericMachine> for u32x4x2_generic {}
772 impl u64x2x2<GenericMachine> for u64x2x2_generic {}
773 impl u64x4<GenericMachine> for u64x4_generic {}
774 impl u128x2<GenericMachine> for u128x2_generic {}
775 impl u32x4x4<GenericMachine> for u32x4x4_generic {}
776 impl u64x2x4<GenericMachine> for u64x2x4_generic {}
777 impl u128x4<GenericMachine> for u128x4_generic {}
778 
779 #[macro_export]
780 macro_rules! dispatch {
781     ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
782         #[inline(always)]
783         $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
784             let $mach = unsafe { $crate::generic::GenericMachine::instance() };
785             #[inline(always)]
786             fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
787             fn_impl($mach, $($arg),*)
788         }
789     };
790     ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
791         dispatch!($mach, $MTy, {
792             $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
793         });
794     }
795 }
796 #[macro_export]
797 macro_rules! dispatch_light128 {
798     ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
799         #[inline(always)]
800         $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
801             let $mach = unsafe { $crate::generic::GenericMachine::instance() };
802             #[inline(always)]
803             fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
804             fn_impl($mach, $($arg),*)
805         }
806     };
807     ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
808         dispatch!($mach, $MTy, {
809             $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
810         });
811     }
812 }
813 #[macro_export]
814 macro_rules! dispatch_light256 {
815     ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
816         #[inline(always)]
817         $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
818             let $mach = unsafe { $crate::generic::GenericMachine::instance() };
819             #[inline(always)]
820             fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
821             fn_impl($mach, $($arg),*)
822         }
823     };
824     ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
825         dispatch!($mach, $MTy, {
826             $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
827         });
828     }
829 }
830 #[macro_export]
831 macro_rules! dispatch_light512 {
832     ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
833         #[inline(always)]
834         $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
835             let $mach = unsafe { $crate::generic::GenericMachine::instance() };
836             #[inline(always)]
837             fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
838             fn_impl($mach, $($arg),*)
839         }
840     };
841     ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
842         dispatch!($mach, $MTy, {
843             $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
844         });
845     }
846 }
847 
848 #[cfg(test)]
849 mod test {
850     use super::*;
851 
852     #[test]
test_bswap32()853     fn test_bswap32() {
854         let xs = [0x0f0e_0d0c, 0x0b0a_0908, 0x0706_0504, 0x0302_0100];
855         let ys = [0x0c0d_0e0f, 0x0809_0a0b, 0x0405_0607, 0x0001_0203];
856 
857         let m = unsafe { GenericMachine::instance() };
858 
859         let x: <GenericMachine as Machine>::u32x4 = m.vec(xs);
860         let x = x.bswap();
861 
862         let y = m.vec(ys);
863         assert_eq!(x, y);
864     }
865 }
866