1 #![allow(non_camel_case_types)]
2
3 use crate::soft::{x2, x4};
4 use crate::types::*;
5 use core::ops::*;
6
7 #[repr(C)]
8 #[derive(Clone, Copy)]
9 pub union vec128_storage {
10 d: [u32; 4],
11 q: [u64; 2],
12 }
13 impl From<[u32; 4]> for vec128_storage {
14 #[inline(always)]
from(d: [u32; 4]) -> Self15 fn from(d: [u32; 4]) -> Self {
16 Self { d }
17 }
18 }
19 impl From<vec128_storage> for [u32; 4] {
20 #[inline(always)]
from(d: vec128_storage) -> Self21 fn from(d: vec128_storage) -> Self {
22 unsafe { d.d }
23 }
24 }
25 impl From<[u64; 2]> for vec128_storage {
26 #[inline(always)]
from(q: [u64; 2]) -> Self27 fn from(q: [u64; 2]) -> Self {
28 Self { q }
29 }
30 }
31 impl From<vec128_storage> for [u64; 2] {
32 #[inline(always)]
from(q: vec128_storage) -> Self33 fn from(q: vec128_storage) -> Self {
34 unsafe { q.q }
35 }
36 }
37 impl Default for vec128_storage {
38 #[inline(always)]
default() -> Self39 fn default() -> Self {
40 Self { q: [0, 0] }
41 }
42 }
43 impl Eq for vec128_storage {}
44 impl PartialEq<vec128_storage> for vec128_storage {
45 #[inline(always)]
eq(&self, rhs: &Self) -> bool46 fn eq(&self, rhs: &Self) -> bool {
47 unsafe { self.q == rhs.q }
48 }
49 }
50 #[derive(Clone, Copy, PartialEq, Eq, Default)]
51 pub struct vec256_storage {
52 v128: [vec128_storage; 2],
53 }
54 impl vec256_storage {
55 #[inline(always)]
new128(v128: [vec128_storage; 2]) -> Self56 pub fn new128(v128: [vec128_storage; 2]) -> Self {
57 Self { v128 }
58 }
59 #[inline(always)]
split128(self) -> [vec128_storage; 2]60 pub fn split128(self) -> [vec128_storage; 2] {
61 self.v128
62 }
63 }
64 impl From<vec256_storage> for [u64; 4] {
65 #[inline(always)]
from(q: vec256_storage) -> Self66 fn from(q: vec256_storage) -> Self {
67 let [a, b]: [u64; 2] = q.v128[0].into();
68 let [c, d]: [u64; 2] = q.v128[1].into();
69 [a, b, c, d]
70 }
71 }
72 impl From<[u64; 4]> for vec256_storage {
73 #[inline(always)]
from([a, b, c, d]: [u64; 4]) -> Self74 fn from([a, b, c, d]: [u64; 4]) -> Self {
75 Self {
76 v128: [[a, b].into(), [c, d].into()],
77 }
78 }
79 }
80 #[derive(Clone, Copy, PartialEq, Eq, Default)]
81 pub struct vec512_storage {
82 v128: [vec128_storage; 4],
83 }
84 impl vec512_storage {
85 #[inline(always)]
new128(v128: [vec128_storage; 4]) -> Self86 pub fn new128(v128: [vec128_storage; 4]) -> Self {
87 Self { v128 }
88 }
89 #[inline(always)]
split128(self) -> [vec128_storage; 4]90 pub fn split128(self) -> [vec128_storage; 4] {
91 self.v128
92 }
93 }
94
95 #[inline(always)]
dmap<T, F>(t: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u32) -> u32,96 fn dmap<T, F>(t: T, f: F) -> T
97 where
98 T: Store<vec128_storage> + Into<vec128_storage>,
99 F: Fn(u32) -> u32,
100 {
101 let t: vec128_storage = t.into();
102 let d = unsafe { t.d };
103 let d = vec128_storage {
104 d: [f(d[0]), f(d[1]), f(d[2]), f(d[3])],
105 };
106 unsafe { T::unpack(d) }
107 }
108
dmap2<T, F>(a: T, b: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u32, u32) -> u32,109 fn dmap2<T, F>(a: T, b: T, f: F) -> T
110 where
111 T: Store<vec128_storage> + Into<vec128_storage>,
112 F: Fn(u32, u32) -> u32,
113 {
114 let a: vec128_storage = a.into();
115 let b: vec128_storage = b.into();
116 let ao = unsafe { a.d };
117 let bo = unsafe { b.d };
118 let d = vec128_storage {
119 d: [
120 f(ao[0], bo[0]),
121 f(ao[1], bo[1]),
122 f(ao[2], bo[2]),
123 f(ao[3], bo[3]),
124 ],
125 };
126 unsafe { T::unpack(d) }
127 }
128
129 #[inline(always)]
qmap<T, F>(t: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u64) -> u64,130 fn qmap<T, F>(t: T, f: F) -> T
131 where
132 T: Store<vec128_storage> + Into<vec128_storage>,
133 F: Fn(u64) -> u64,
134 {
135 let t: vec128_storage = t.into();
136 let q = unsafe { t.q };
137 let q = vec128_storage {
138 q: [f(q[0]), f(q[1])],
139 };
140 unsafe { T::unpack(q) }
141 }
142
143 #[inline(always)]
qmap2<T, F>(a: T, b: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u64, u64) -> u64,144 fn qmap2<T, F>(a: T, b: T, f: F) -> T
145 where
146 T: Store<vec128_storage> + Into<vec128_storage>,
147 F: Fn(u64, u64) -> u64,
148 {
149 let a: vec128_storage = a.into();
150 let b: vec128_storage = b.into();
151 let ao = unsafe { a.q };
152 let bo = unsafe { b.q };
153 let q = vec128_storage {
154 q: [f(ao[0], bo[0]), f(ao[1], bo[1])],
155 };
156 unsafe { T::unpack(q) }
157 }
158
159 #[inline(always)]
o_of_q(q: [u64; 2]) -> u128160 fn o_of_q(q: [u64; 2]) -> u128 {
161 u128::from(q[0]) | (u128::from(q[1]) << 64)
162 }
163
164 #[inline(always)]
q_of_o(o: u128) -> [u64; 2]165 fn q_of_o(o: u128) -> [u64; 2] {
166 [o as u64, (o >> 64) as u64]
167 }
168
169 #[inline(always)]
omap<T, F>(a: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u128) -> u128,170 fn omap<T, F>(a: T, f: F) -> T
171 where
172 T: Store<vec128_storage> + Into<vec128_storage>,
173 F: Fn(u128) -> u128,
174 {
175 let a: vec128_storage = a.into();
176 let ao = o_of_q(unsafe { a.q });
177 let o = vec128_storage { q: q_of_o(f(ao)) };
178 unsafe { T::unpack(o) }
179 }
180
181 #[inline(always)]
omap2<T, F>(a: T, b: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u128, u128) -> u128,182 fn omap2<T, F>(a: T, b: T, f: F) -> T
183 where
184 T: Store<vec128_storage> + Into<vec128_storage>,
185 F: Fn(u128, u128) -> u128,
186 {
187 let a: vec128_storage = a.into();
188 let b: vec128_storage = b.into();
189 let ao = o_of_q(unsafe { a.q });
190 let bo = o_of_q(unsafe { b.q });
191 let o = vec128_storage {
192 q: q_of_o(f(ao, bo)),
193 };
194 unsafe { T::unpack(o) }
195 }
196
197 impl RotateEachWord128 for u128x1_generic {}
198 impl BitOps128 for u128x1_generic {}
199 impl BitOps64 for u128x1_generic {}
200 impl BitOps64 for u64x2_generic {}
201 impl BitOps32 for u128x1_generic {}
202 impl BitOps32 for u64x2_generic {}
203 impl BitOps32 for u32x4_generic {}
204 impl BitOps0 for u128x1_generic {}
205 impl BitOps0 for u64x2_generic {}
206 impl BitOps0 for u32x4_generic {}
207
208 macro_rules! impl_bitops {
209 ($vec:ident) => {
210 impl Not for $vec {
211 type Output = Self;
212 #[inline(always)]
213 fn not(self) -> Self::Output {
214 omap(self, |x| !x)
215 }
216 }
217 impl BitAnd for $vec {
218 type Output = Self;
219 #[inline(always)]
220 fn bitand(self, rhs: Self) -> Self::Output {
221 omap2(self, rhs, |x, y| x & y)
222 }
223 }
224 impl BitOr for $vec {
225 type Output = Self;
226 #[inline(always)]
227 fn bitor(self, rhs: Self) -> Self::Output {
228 omap2(self, rhs, |x, y| x | y)
229 }
230 }
231 impl BitXor for $vec {
232 type Output = Self;
233 #[inline(always)]
234 fn bitxor(self, rhs: Self) -> Self::Output {
235 omap2(self, rhs, |x, y| x ^ y)
236 }
237 }
238 impl AndNot for $vec {
239 type Output = Self;
240 #[inline(always)]
241 fn andnot(self, rhs: Self) -> Self::Output {
242 omap2(self, rhs, |x, y| !x & y)
243 }
244 }
245 impl BitAndAssign for $vec {
246 #[inline(always)]
247 fn bitand_assign(&mut self, rhs: Self) {
248 *self = *self & rhs
249 }
250 }
251 impl BitOrAssign for $vec {
252 #[inline(always)]
253 fn bitor_assign(&mut self, rhs: Self) {
254 *self = *self | rhs
255 }
256 }
257 impl BitXorAssign for $vec {
258 #[inline(always)]
259 fn bitxor_assign(&mut self, rhs: Self) {
260 *self = *self ^ rhs
261 }
262 }
263
264 impl Swap64 for $vec {
265 #[inline(always)]
266 fn swap1(self) -> Self {
267 qmap(self, |x| {
268 ((x & 0x5555555555555555) << 1) | ((x & 0xaaaaaaaaaaaaaaaa) >> 1)
269 })
270 }
271 #[inline(always)]
272 fn swap2(self) -> Self {
273 qmap(self, |x| {
274 ((x & 0x3333333333333333) << 2) | ((x & 0xcccccccccccccccc) >> 2)
275 })
276 }
277 #[inline(always)]
278 fn swap4(self) -> Self {
279 qmap(self, |x| {
280 ((x & 0x0f0f0f0f0f0f0f0f) << 4) | ((x & 0xf0f0f0f0f0f0f0f0) >> 4)
281 })
282 }
283 #[inline(always)]
284 fn swap8(self) -> Self {
285 qmap(self, |x| {
286 ((x & 0x00ff00ff00ff00ff) << 8) | ((x & 0xff00ff00ff00ff00) >> 8)
287 })
288 }
289 #[inline(always)]
290 fn swap16(self) -> Self {
291 dmap(self, |x| x.rotate_left(16))
292 }
293 #[inline(always)]
294 fn swap32(self) -> Self {
295 qmap(self, |x| x.rotate_left(32))
296 }
297 #[inline(always)]
298 fn swap64(self) -> Self {
299 omap(self, |x| (x << 64) | (x >> 64))
300 }
301 }
302 };
303 }
304 impl_bitops!(u32x4_generic);
305 impl_bitops!(u64x2_generic);
306 impl_bitops!(u128x1_generic);
307
308 impl RotateEachWord32 for u32x4_generic {
309 #[inline(always)]
rotate_each_word_right7(self) -> Self310 fn rotate_each_word_right7(self) -> Self {
311 dmap(self, |x| x.rotate_right(7))
312 }
313 #[inline(always)]
rotate_each_word_right8(self) -> Self314 fn rotate_each_word_right8(self) -> Self {
315 dmap(self, |x| x.rotate_right(8))
316 }
317 #[inline(always)]
rotate_each_word_right11(self) -> Self318 fn rotate_each_word_right11(self) -> Self {
319 dmap(self, |x| x.rotate_right(11))
320 }
321 #[inline(always)]
rotate_each_word_right12(self) -> Self322 fn rotate_each_word_right12(self) -> Self {
323 dmap(self, |x| x.rotate_right(12))
324 }
325 #[inline(always)]
rotate_each_word_right16(self) -> Self326 fn rotate_each_word_right16(self) -> Self {
327 dmap(self, |x| x.rotate_right(16))
328 }
329 #[inline(always)]
rotate_each_word_right20(self) -> Self330 fn rotate_each_word_right20(self) -> Self {
331 dmap(self, |x| x.rotate_right(20))
332 }
333 #[inline(always)]
rotate_each_word_right24(self) -> Self334 fn rotate_each_word_right24(self) -> Self {
335 dmap(self, |x| x.rotate_right(24))
336 }
337 #[inline(always)]
rotate_each_word_right25(self) -> Self338 fn rotate_each_word_right25(self) -> Self {
339 dmap(self, |x| x.rotate_right(25))
340 }
341 }
342
343 impl RotateEachWord32 for u64x2_generic {
344 #[inline(always)]
rotate_each_word_right7(self) -> Self345 fn rotate_each_word_right7(self) -> Self {
346 qmap(self, |x| x.rotate_right(7))
347 }
348 #[inline(always)]
rotate_each_word_right8(self) -> Self349 fn rotate_each_word_right8(self) -> Self {
350 qmap(self, |x| x.rotate_right(8))
351 }
352 #[inline(always)]
rotate_each_word_right11(self) -> Self353 fn rotate_each_word_right11(self) -> Self {
354 qmap(self, |x| x.rotate_right(11))
355 }
356 #[inline(always)]
rotate_each_word_right12(self) -> Self357 fn rotate_each_word_right12(self) -> Self {
358 qmap(self, |x| x.rotate_right(12))
359 }
360 #[inline(always)]
rotate_each_word_right16(self) -> Self361 fn rotate_each_word_right16(self) -> Self {
362 qmap(self, |x| x.rotate_right(16))
363 }
364 #[inline(always)]
rotate_each_word_right20(self) -> Self365 fn rotate_each_word_right20(self) -> Self {
366 qmap(self, |x| x.rotate_right(20))
367 }
368 #[inline(always)]
rotate_each_word_right24(self) -> Self369 fn rotate_each_word_right24(self) -> Self {
370 qmap(self, |x| x.rotate_right(24))
371 }
372 #[inline(always)]
rotate_each_word_right25(self) -> Self373 fn rotate_each_word_right25(self) -> Self {
374 qmap(self, |x| x.rotate_right(25))
375 }
376 }
377 impl RotateEachWord64 for u64x2_generic {
378 #[inline(always)]
rotate_each_word_right32(self) -> Self379 fn rotate_each_word_right32(self) -> Self {
380 qmap(self, |x| x.rotate_right(32))
381 }
382 }
383
384 // workaround for koute/cargo-web#52 (u128::rotate_* broken with cargo web)
385 #[inline(always)]
rotate_u128_right(x: u128, i: u32) -> u128386 fn rotate_u128_right(x: u128, i: u32) -> u128 {
387 (x >> i) | (x << (128 - i))
388 }
389 #[test]
test_rotate_u128()390 fn test_rotate_u128() {
391 const X: u128 = 0x0001_0203_0405_0607_0809_0a0b_0c0d_0e0f;
392 assert_eq!(rotate_u128_right(X, 17), X.rotate_right(17));
393 }
394
395 impl RotateEachWord32 for u128x1_generic {
396 #[inline(always)]
rotate_each_word_right7(self) -> Self397 fn rotate_each_word_right7(self) -> Self {
398 Self([rotate_u128_right(self.0[0], 7)])
399 }
400 #[inline(always)]
rotate_each_word_right8(self) -> Self401 fn rotate_each_word_right8(self) -> Self {
402 Self([rotate_u128_right(self.0[0], 8)])
403 }
404 #[inline(always)]
rotate_each_word_right11(self) -> Self405 fn rotate_each_word_right11(self) -> Self {
406 Self([rotate_u128_right(self.0[0], 11)])
407 }
408 #[inline(always)]
rotate_each_word_right12(self) -> Self409 fn rotate_each_word_right12(self) -> Self {
410 Self([rotate_u128_right(self.0[0], 12)])
411 }
412 #[inline(always)]
rotate_each_word_right16(self) -> Self413 fn rotate_each_word_right16(self) -> Self {
414 Self([rotate_u128_right(self.0[0], 16)])
415 }
416 #[inline(always)]
rotate_each_word_right20(self) -> Self417 fn rotate_each_word_right20(self) -> Self {
418 Self([rotate_u128_right(self.0[0], 20)])
419 }
420 #[inline(always)]
rotate_each_word_right24(self) -> Self421 fn rotate_each_word_right24(self) -> Self {
422 Self([rotate_u128_right(self.0[0], 24)])
423 }
424 #[inline(always)]
rotate_each_word_right25(self) -> Self425 fn rotate_each_word_right25(self) -> Self {
426 Self([rotate_u128_right(self.0[0], 25)])
427 }
428 }
429 impl RotateEachWord64 for u128x1_generic {
430 #[inline(always)]
rotate_each_word_right32(self) -> Self431 fn rotate_each_word_right32(self) -> Self {
432 Self([rotate_u128_right(self.0[0], 32)])
433 }
434 }
435
436 #[derive(Copy, Clone)]
437 pub struct GenericMachine;
438 impl Machine for GenericMachine {
439 type u32x4 = u32x4_generic;
440 type u64x2 = u64x2_generic;
441 type u128x1 = u128x1_generic;
442 type u32x4x2 = u32x4x2_generic;
443 type u64x2x2 = u64x2x2_generic;
444 type u64x4 = u64x4_generic;
445 type u128x2 = u128x2_generic;
446 type u32x4x4 = u32x4x4_generic;
447 type u64x2x4 = u64x2x4_generic;
448 type u128x4 = u128x4_generic;
449 #[inline(always)]
instance() -> Self450 unsafe fn instance() -> Self {
451 Self
452 }
453 }
454
455 #[derive(Copy, Clone, Debug, PartialEq)]
456 pub struct u32x4_generic([u32; 4]);
457 #[derive(Copy, Clone, Debug, PartialEq)]
458 pub struct u64x2_generic([u64; 2]);
459 #[derive(Copy, Clone, Debug, PartialEq)]
460 pub struct u128x1_generic([u128; 1]);
461
462 impl From<u32x4_generic> for vec128_storage {
463 #[inline(always)]
from(d: u32x4_generic) -> Self464 fn from(d: u32x4_generic) -> Self {
465 Self { d: d.0 }
466 }
467 }
468 impl From<u64x2_generic> for vec128_storage {
469 #[inline(always)]
from(q: u64x2_generic) -> Self470 fn from(q: u64x2_generic) -> Self {
471 Self { q: q.0 }
472 }
473 }
474 impl From<u128x1_generic> for vec128_storage {
475 #[inline(always)]
from(o: u128x1_generic) -> Self476 fn from(o: u128x1_generic) -> Self {
477 Self { q: q_of_o(o.0[0]) }
478 }
479 }
480
481 impl Store<vec128_storage> for u32x4_generic {
482 #[inline(always)]
unpack(s: vec128_storage) -> Self483 unsafe fn unpack(s: vec128_storage) -> Self {
484 Self(s.d)
485 }
486 }
487 impl Store<vec128_storage> for u64x2_generic {
488 #[inline(always)]
unpack(s: vec128_storage) -> Self489 unsafe fn unpack(s: vec128_storage) -> Self {
490 Self(s.q)
491 }
492 }
493 impl Store<vec128_storage> for u128x1_generic {
494 #[inline(always)]
unpack(s: vec128_storage) -> Self495 unsafe fn unpack(s: vec128_storage) -> Self {
496 Self([o_of_q(s.q); 1])
497 }
498 }
499
500 impl ArithOps for u32x4_generic {}
501 impl ArithOps for u64x2_generic {}
502 impl ArithOps for u128x1_generic {}
503
504 impl Add for u32x4_generic {
505 type Output = Self;
506 #[inline(always)]
add(self, rhs: Self) -> Self::Output507 fn add(self, rhs: Self) -> Self::Output {
508 dmap2(self, rhs, |x, y| x.wrapping_add(y))
509 }
510 }
511 impl Add for u64x2_generic {
512 type Output = Self;
513 #[inline(always)]
add(self, rhs: Self) -> Self::Output514 fn add(self, rhs: Self) -> Self::Output {
515 qmap2(self, rhs, |x, y| x.wrapping_add(y))
516 }
517 }
518 impl Add for u128x1_generic {
519 type Output = Self;
520 #[inline(always)]
add(self, rhs: Self) -> Self::Output521 fn add(self, rhs: Self) -> Self::Output {
522 omap2(self, rhs, |x, y| x.wrapping_add(y))
523 }
524 }
525 impl AddAssign for u32x4_generic {
526 #[inline(always)]
add_assign(&mut self, rhs: Self)527 fn add_assign(&mut self, rhs: Self) {
528 *self = *self + rhs
529 }
530 }
531 impl AddAssign for u64x2_generic {
532 #[inline(always)]
add_assign(&mut self, rhs: Self)533 fn add_assign(&mut self, rhs: Self) {
534 *self = *self + rhs
535 }
536 }
537 impl AddAssign for u128x1_generic {
538 #[inline(always)]
add_assign(&mut self, rhs: Self)539 fn add_assign(&mut self, rhs: Self) {
540 *self = *self + rhs
541 }
542 }
543 impl BSwap for u32x4_generic {
544 #[inline(always)]
bswap(self) -> Self545 fn bswap(self) -> Self {
546 dmap(self, |x| x.swap_bytes())
547 }
548 }
549 impl BSwap for u64x2_generic {
550 #[inline(always)]
bswap(self) -> Self551 fn bswap(self) -> Self {
552 qmap(self, |x| x.swap_bytes())
553 }
554 }
555 impl BSwap for u128x1_generic {
556 #[inline(always)]
bswap(self) -> Self557 fn bswap(self) -> Self {
558 omap(self, |x| x.swap_bytes())
559 }
560 }
561 impl StoreBytes for u32x4_generic {
562 #[inline(always)]
unsafe_read_le(input: &[u8]) -> Self563 unsafe fn unsafe_read_le(input: &[u8]) -> Self {
564 assert_eq!(input.len(), 16);
565 let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
566 dmap(x, |x| x.to_le())
567 }
568 #[inline(always)]
unsafe_read_be(input: &[u8]) -> Self569 unsafe fn unsafe_read_be(input: &[u8]) -> Self {
570 assert_eq!(input.len(), 16);
571 let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
572 dmap(x, |x| x.to_be())
573 }
574 #[inline(always)]
write_le(self, out: &mut [u8])575 fn write_le(self, out: &mut [u8]) {
576 assert_eq!(out.len(), 16);
577 let x = dmap(self, |x| x.to_le());
578 unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
579 }
580 #[inline(always)]
write_be(self, out: &mut [u8])581 fn write_be(self, out: &mut [u8]) {
582 assert_eq!(out.len(), 16);
583 let x = dmap(self, |x| x.to_be());
584 unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
585 }
586 }
587 impl StoreBytes for u64x2_generic {
588 #[inline(always)]
unsafe_read_le(input: &[u8]) -> Self589 unsafe fn unsafe_read_le(input: &[u8]) -> Self {
590 assert_eq!(input.len(), 16);
591 let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
592 qmap(x, |x| x.to_le())
593 }
594 #[inline(always)]
unsafe_read_be(input: &[u8]) -> Self595 unsafe fn unsafe_read_be(input: &[u8]) -> Self {
596 assert_eq!(input.len(), 16);
597 let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
598 qmap(x, |x| x.to_be())
599 }
600 #[inline(always)]
write_le(self, out: &mut [u8])601 fn write_le(self, out: &mut [u8]) {
602 assert_eq!(out.len(), 16);
603 let x = qmap(self, |x| x.to_le());
604 unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
605 }
606 #[inline(always)]
write_be(self, out: &mut [u8])607 fn write_be(self, out: &mut [u8]) {
608 assert_eq!(out.len(), 16);
609 let x = qmap(self, |x| x.to_be());
610 unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
611 }
612 }
613
614 #[derive(Copy, Clone)]
615 pub struct G0;
616 #[derive(Copy, Clone)]
617 pub struct G1;
618 pub type u32x4x2_generic = x2<u32x4_generic, G0>;
619 pub type u64x2x2_generic = x2<u64x2_generic, G0>;
620 pub type u64x4_generic = x2<u64x2_generic, G1>;
621 pub type u128x2_generic = x2<u128x1_generic, G0>;
622 pub type u32x4x4_generic = x4<u32x4_generic>;
623 pub type u64x2x4_generic = x4<u64x2_generic>;
624 pub type u128x4_generic = x4<u128x1_generic>;
625
626 impl Vector<[u32; 16]> for u32x4x4_generic {
to_scalars(self) -> [u32; 16]627 fn to_scalars(self) -> [u32; 16] {
628 let [a, b, c, d] = self.0;
629 let a = a.0;
630 let b = b.0;
631 let c = c.0;
632 let d = d.0;
633 [
634 a[0], a[1], a[2], a[3], //
635 b[0], b[1], b[2], b[3], //
636 c[0], c[1], c[2], c[3], //
637 d[0], d[1], d[2], d[3], //
638 ]
639 }
640 }
641
642 impl MultiLane<[u32; 4]> for u32x4_generic {
643 #[inline(always)]
to_lanes(self) -> [u32; 4]644 fn to_lanes(self) -> [u32; 4] {
645 self.0
646 }
647 #[inline(always)]
from_lanes(xs: [u32; 4]) -> Self648 fn from_lanes(xs: [u32; 4]) -> Self {
649 Self(xs)
650 }
651 }
652 impl MultiLane<[u64; 2]> for u64x2_generic {
653 #[inline(always)]
to_lanes(self) -> [u64; 2]654 fn to_lanes(self) -> [u64; 2] {
655 self.0
656 }
657 #[inline(always)]
from_lanes(xs: [u64; 2]) -> Self658 fn from_lanes(xs: [u64; 2]) -> Self {
659 Self(xs)
660 }
661 }
662 impl MultiLane<[u64; 4]> for u64x4_generic {
663 #[inline(always)]
to_lanes(self) -> [u64; 4]664 fn to_lanes(self) -> [u64; 4] {
665 let (a, b) = (self.0[0].to_lanes(), self.0[1].to_lanes());
666 [a[0], a[1], b[0], b[1]]
667 }
668 #[inline(always)]
from_lanes(xs: [u64; 4]) -> Self669 fn from_lanes(xs: [u64; 4]) -> Self {
670 let (a, b) = (
671 u64x2_generic::from_lanes([xs[0], xs[1]]),
672 u64x2_generic::from_lanes([xs[2], xs[3]]),
673 );
674 x2::new([a, b])
675 }
676 }
677 impl MultiLane<[u128; 1]> for u128x1_generic {
678 #[inline(always)]
to_lanes(self) -> [u128; 1]679 fn to_lanes(self) -> [u128; 1] {
680 self.0
681 }
682 #[inline(always)]
from_lanes(xs: [u128; 1]) -> Self683 fn from_lanes(xs: [u128; 1]) -> Self {
684 Self(xs)
685 }
686 }
687 impl Vec4<u32> for u32x4_generic {
688 #[inline(always)]
extract(self, i: u32) -> u32689 fn extract(self, i: u32) -> u32 {
690 self.0[i as usize]
691 }
692 #[inline(always)]
insert(mut self, v: u32, i: u32) -> Self693 fn insert(mut self, v: u32, i: u32) -> Self {
694 self.0[i as usize] = v;
695 self
696 }
697 }
698 impl Vec4<u64> for u64x4_generic {
699 #[inline(always)]
extract(self, i: u32) -> u64700 fn extract(self, i: u32) -> u64 {
701 let d: [u64; 4] = self.to_lanes();
702 d[i as usize]
703 }
704 #[inline(always)]
insert(self, v: u64, i: u32) -> Self705 fn insert(self, v: u64, i: u32) -> Self {
706 self.0[(i / 2) as usize].insert(v, i % 2);
707 self
708 }
709 }
710 impl Vec2<u64> for u64x2_generic {
711 #[inline(always)]
extract(self, i: u32) -> u64712 fn extract(self, i: u32) -> u64 {
713 self.0[i as usize]
714 }
715 #[inline(always)]
insert(mut self, v: u64, i: u32) -> Self716 fn insert(mut self, v: u64, i: u32) -> Self {
717 self.0[i as usize] = v;
718 self
719 }
720 }
721
722 impl Words4 for u32x4_generic {
723 #[inline(always)]
shuffle2301(self) -> Self724 fn shuffle2301(self) -> Self {
725 self.swap64()
726 }
727 #[inline(always)]
shuffle1230(self) -> Self728 fn shuffle1230(self) -> Self {
729 let x = self.0;
730 Self([x[3], x[0], x[1], x[2]])
731 }
732 #[inline(always)]
shuffle3012(self) -> Self733 fn shuffle3012(self) -> Self {
734 let x = self.0;
735 Self([x[1], x[2], x[3], x[0]])
736 }
737 }
738 impl LaneWords4 for u32x4_generic {
739 #[inline(always)]
shuffle_lane_words2301(self) -> Self740 fn shuffle_lane_words2301(self) -> Self {
741 self.shuffle2301()
742 }
743 #[inline(always)]
shuffle_lane_words1230(self) -> Self744 fn shuffle_lane_words1230(self) -> Self {
745 self.shuffle1230()
746 }
747 #[inline(always)]
shuffle_lane_words3012(self) -> Self748 fn shuffle_lane_words3012(self) -> Self {
749 self.shuffle3012()
750 }
751 }
752
753 impl Words4 for u64x4_generic {
754 #[inline(always)]
shuffle2301(self) -> Self755 fn shuffle2301(self) -> Self {
756 x2::new([self.0[1], self.0[0]])
757 }
758 #[inline(always)]
shuffle1230(self) -> Self759 fn shuffle1230(self) -> Self {
760 unimplemented!()
761 }
762 #[inline(always)]
shuffle3012(self) -> Self763 fn shuffle3012(self) -> Self {
764 unimplemented!()
765 }
766 }
767
768 impl u32x4<GenericMachine> for u32x4_generic {}
769 impl u64x2<GenericMachine> for u64x2_generic {}
770 impl u128x1<GenericMachine> for u128x1_generic {}
771 impl u32x4x2<GenericMachine> for u32x4x2_generic {}
772 impl u64x2x2<GenericMachine> for u64x2x2_generic {}
773 impl u64x4<GenericMachine> for u64x4_generic {}
774 impl u128x2<GenericMachine> for u128x2_generic {}
775 impl u32x4x4<GenericMachine> for u32x4x4_generic {}
776 impl u64x2x4<GenericMachine> for u64x2x4_generic {}
777 impl u128x4<GenericMachine> for u128x4_generic {}
778
779 #[macro_export]
780 macro_rules! dispatch {
781 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
782 #[inline(always)]
783 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
784 let $mach = unsafe { $crate::generic::GenericMachine::instance() };
785 #[inline(always)]
786 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
787 fn_impl($mach, $($arg),*)
788 }
789 };
790 ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
791 dispatch!($mach, $MTy, {
792 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
793 });
794 }
795 }
796 #[macro_export]
797 macro_rules! dispatch_light128 {
798 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
799 #[inline(always)]
800 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
801 let $mach = unsafe { $crate::generic::GenericMachine::instance() };
802 #[inline(always)]
803 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
804 fn_impl($mach, $($arg),*)
805 }
806 };
807 ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
808 dispatch!($mach, $MTy, {
809 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
810 });
811 }
812 }
813 #[macro_export]
814 macro_rules! dispatch_light256 {
815 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
816 #[inline(always)]
817 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
818 let $mach = unsafe { $crate::generic::GenericMachine::instance() };
819 #[inline(always)]
820 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
821 fn_impl($mach, $($arg),*)
822 }
823 };
824 ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
825 dispatch!($mach, $MTy, {
826 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
827 });
828 }
829 }
830 #[macro_export]
831 macro_rules! dispatch_light512 {
832 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
833 #[inline(always)]
834 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
835 let $mach = unsafe { $crate::generic::GenericMachine::instance() };
836 #[inline(always)]
837 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
838 fn_impl($mach, $($arg),*)
839 }
840 };
841 ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
842 dispatch!($mach, $MTy, {
843 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
844 });
845 }
846 }
847
848 #[cfg(test)]
849 mod test {
850 use super::*;
851
852 #[test]
test_bswap32()853 fn test_bswap32() {
854 let xs = [0x0f0e_0d0c, 0x0b0a_0908, 0x0706_0504, 0x0302_0100];
855 let ys = [0x0c0d_0e0f, 0x0809_0a0b, 0x0405_0607, 0x0001_0203];
856
857 let m = unsafe { GenericMachine::instance() };
858
859 let x: <GenericMachine as Machine>::u32x4 = m.vec(xs);
860 let x = x.bswap();
861
862 let y = m.vec(ys);
863 assert_eq!(x, y);
864 }
865 }
866