1 #![allow(non_camel_case_types)]
2
3 use crate::soft::{x2, x4};
4 use crate::types::*;
5 use core::ops::*;
6
7 #[repr(C)]
8 #[derive(Clone, Copy)]
9 pub union vec128_storage {
10 d: [u32; 4],
11 q: [u64; 2],
12 }
13 impl From<[u32; 4]> for vec128_storage {
14 #[inline]
from(d: [u32; 4]) -> Self15 fn from(d: [u32; 4]) -> Self {
16 Self { d }
17 }
18 }
19 impl From<vec128_storage> for [u32; 4] {
20 #[inline]
from(d: vec128_storage) -> Self21 fn from(d: vec128_storage) -> Self {
22 unsafe { d.d }
23 }
24 }
25 impl From<[u64; 2]> for vec128_storage {
26 #[inline]
from(q: [u64; 2]) -> Self27 fn from(q: [u64; 2]) -> Self {
28 Self { q }
29 }
30 }
31 impl From<vec128_storage> for [u64; 2] {
32 #[inline]
from(q: vec128_storage) -> Self33 fn from(q: vec128_storage) -> Self {
34 unsafe { q.q }
35 }
36 }
37 impl Default for vec128_storage {
38 #[inline]
default() -> Self39 fn default() -> Self {
40 Self { q: [0, 0] }
41 }
42 }
43 impl Eq for vec128_storage {}
44 impl PartialEq<vec128_storage> for vec128_storage {
45 #[inline]
eq(&self, rhs: &Self) -> bool46 fn eq(&self, rhs: &Self) -> bool {
47 unsafe { self.q == rhs.q }
48 }
49 }
50 #[derive(Clone, Copy, PartialEq, Eq, Default)]
51 pub struct vec256_storage {
52 v128: [vec128_storage; 2],
53 }
54 impl vec256_storage {
55 #[inline(always)]
new128(v128: [vec128_storage; 2]) -> Self56 pub fn new128(v128: [vec128_storage; 2]) -> Self {
57 Self { v128 }
58 }
59 #[inline(always)]
split128(self) -> [vec128_storage; 2]60 pub fn split128(self) -> [vec128_storage; 2] {
61 self.v128
62 }
63 }
64 impl From<vec256_storage> for [u64; 4] {
65 #[inline]
from(q: vec256_storage) -> Self66 fn from(q: vec256_storage) -> Self {
67 let [a, b]: [u64; 2] = q.v128[0].into();
68 let [c, d]: [u64; 2] = q.v128[1].into();
69 [a, b, c, d]
70 }
71 }
72 #[derive(Clone, Copy, PartialEq, Eq, Default)]
73 pub struct vec512_storage {
74 v128: [vec128_storage; 4],
75 }
76 impl vec512_storage {
77 #[inline(always)]
new128(v128: [vec128_storage; 4]) -> Self78 pub fn new128(v128: [vec128_storage; 4]) -> Self {
79 Self { v128 }
80 }
81 #[inline(always)]
split128(self) -> [vec128_storage; 4]82 pub fn split128(self) -> [vec128_storage; 4] {
83 self.v128
84 }
85 }
86
dmap<T, F>(t: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u32) -> u32,87 fn dmap<T, F>(t: T, f: F) -> T
88 where
89 T: Store<vec128_storage> + Into<vec128_storage>,
90 F: Fn(u32) -> u32,
91 {
92 let t: vec128_storage = t.into();
93 let d = unsafe { t.d };
94 let d = vec128_storage {
95 d: [f(d[0]), f(d[1]), f(d[2]), f(d[3])],
96 };
97 unsafe { T::unpack(d) }
98 }
99
dmap2<T, F>(a: T, b: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u32, u32) -> u32,100 fn dmap2<T, F>(a: T, b: T, f: F) -> T
101 where
102 T: Store<vec128_storage> + Into<vec128_storage>,
103 F: Fn(u32, u32) -> u32,
104 {
105 let a: vec128_storage = a.into();
106 let b: vec128_storage = b.into();
107 let ao = unsafe { a.d };
108 let bo = unsafe { b.d };
109 let d = vec128_storage {
110 d: [
111 f(ao[0], bo[0]),
112 f(ao[1], bo[1]),
113 f(ao[2], bo[2]),
114 f(ao[3], bo[3]),
115 ],
116 };
117 unsafe { T::unpack(d) }
118 }
119
qmap<T, F>(t: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u64) -> u64,120 fn qmap<T, F>(t: T, f: F) -> T
121 where
122 T: Store<vec128_storage> + Into<vec128_storage>,
123 F: Fn(u64) -> u64,
124 {
125 let t: vec128_storage = t.into();
126 let q = unsafe { t.q };
127 let q = vec128_storage {
128 q: [f(q[0]), f(q[1])],
129 };
130 unsafe { T::unpack(q) }
131 }
132
qmap2<T, F>(a: T, b: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u64, u64) -> u64,133 fn qmap2<T, F>(a: T, b: T, f: F) -> T
134 where
135 T: Store<vec128_storage> + Into<vec128_storage>,
136 F: Fn(u64, u64) -> u64,
137 {
138 let a: vec128_storage = a.into();
139 let b: vec128_storage = b.into();
140 let ao = unsafe { a.q };
141 let bo = unsafe { b.q };
142 let q = vec128_storage {
143 q: [f(ao[0], bo[0]), f(ao[1], bo[1])],
144 };
145 unsafe { T::unpack(q) }
146 }
147
o_of_q(q: [u64; 2]) -> u128148 fn o_of_q(q: [u64; 2]) -> u128 {
149 u128::from(q[0]) | (u128::from(q[1]) << 64)
150 }
151
q_of_o(o: u128) -> [u64; 2]152 fn q_of_o(o: u128) -> [u64; 2] {
153 [o as u64, (o >> 64) as u64]
154 }
155
omap<T, F>(a: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u128) -> u128,156 fn omap<T, F>(a: T, f: F) -> T
157 where
158 T: Store<vec128_storage> + Into<vec128_storage>,
159 F: Fn(u128) -> u128,
160 {
161 let a: vec128_storage = a.into();
162 let ao = o_of_q(unsafe { a.q });
163 let o = vec128_storage { q: q_of_o(f(ao)) };
164 unsafe { T::unpack(o) }
165 }
166
omap2<T, F>(a: T, b: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u128, u128) -> u128,167 fn omap2<T, F>(a: T, b: T, f: F) -> T
168 where
169 T: Store<vec128_storage> + Into<vec128_storage>,
170 F: Fn(u128, u128) -> u128,
171 {
172 let a: vec128_storage = a.into();
173 let b: vec128_storage = b.into();
174 let ao = o_of_q(unsafe { a.q });
175 let bo = o_of_q(unsafe { b.q });
176 let o = vec128_storage {
177 q: q_of_o(f(ao, bo)),
178 };
179 unsafe { T::unpack(o) }
180 }
181
182 impl RotateEachWord128 for u128x1_generic {}
183 impl BitOps128 for u128x1_generic {}
184 impl BitOps64 for u128x1_generic {}
185 impl BitOps64 for u64x2_generic {}
186 impl BitOps32 for u128x1_generic {}
187 impl BitOps32 for u64x2_generic {}
188 impl BitOps32 for u32x4_generic {}
189 impl BitOps0 for u128x1_generic {}
190 impl BitOps0 for u64x2_generic {}
191 impl BitOps0 for u32x4_generic {}
192
193 macro_rules! impl_bitops {
194 ($vec:ident) => {
195 impl Not for $vec {
196 type Output = Self;
197 #[inline(always)]
198 fn not(self) -> Self::Output {
199 omap(self, |x| !x)
200 }
201 }
202 impl BitAnd for $vec {
203 type Output = Self;
204 #[inline(always)]
205 fn bitand(self, rhs: Self) -> Self::Output {
206 omap2(self, rhs, |x, y| x & y)
207 }
208 }
209 impl BitOr for $vec {
210 type Output = Self;
211 #[inline(always)]
212 fn bitor(self, rhs: Self) -> Self::Output {
213 omap2(self, rhs, |x, y| x | y)
214 }
215 }
216 impl BitXor for $vec {
217 type Output = Self;
218 #[inline(always)]
219 fn bitxor(self, rhs: Self) -> Self::Output {
220 omap2(self, rhs, |x, y| x ^ y)
221 }
222 }
223 impl AndNot for $vec {
224 type Output = Self;
225 #[inline(always)]
226 fn andnot(self, rhs: Self) -> Self::Output {
227 omap2(self, rhs, |x, y| !x & y)
228 }
229 }
230 impl BitAndAssign for $vec {
231 #[inline(always)]
232 fn bitand_assign(&mut self, rhs: Self) {
233 *self = *self & rhs
234 }
235 }
236 impl BitOrAssign for $vec {
237 #[inline(always)]
238 fn bitor_assign(&mut self, rhs: Self) {
239 *self = *self | rhs
240 }
241 }
242 impl BitXorAssign for $vec {
243 #[inline(always)]
244 fn bitxor_assign(&mut self, rhs: Self) {
245 *self = *self ^ rhs
246 }
247 }
248
249 impl Swap64 for $vec {
250 #[inline]
251 fn swap1(self) -> Self {
252 qmap(self, |x| {
253 ((x & 0x5555555555555555) << 1) | ((x & 0xaaaaaaaaaaaaaaaa) >> 1)
254 })
255 }
256 #[inline]
257 fn swap2(self) -> Self {
258 qmap(self, |x| {
259 ((x & 0x3333333333333333) << 2) | ((x & 0xcccccccccccccccc) >> 2)
260 })
261 }
262 #[inline]
263 fn swap4(self) -> Self {
264 qmap(self, |x| {
265 ((x & 0x0f0f0f0f0f0f0f0f) << 4) | ((x & 0xf0f0f0f0f0f0f0f0) >> 4)
266 })
267 }
268 #[inline]
269 fn swap8(self) -> Self {
270 qmap(self, |x| {
271 ((x & 0x00ff00ff00ff00ff) << 8) | ((x & 0xff00ff00ff00ff00) >> 8)
272 })
273 }
274 #[inline]
275 fn swap16(self) -> Self {
276 dmap(self, |x| x.rotate_left(16))
277 }
278 #[inline]
279 fn swap32(self) -> Self {
280 qmap(self, |x| x.rotate_left(32))
281 }
282 #[inline]
283 fn swap64(self) -> Self {
284 omap(self, |x| (x << 64) | (x >> 64))
285 }
286 }
287 };
288 }
289 impl_bitops!(u32x4_generic);
290 impl_bitops!(u64x2_generic);
291 impl_bitops!(u128x1_generic);
292
293 impl RotateEachWord32 for u32x4_generic {
294 #[inline]
rotate_each_word_right7(self) -> Self295 fn rotate_each_word_right7(self) -> Self {
296 dmap(self, |x| x.rotate_right(7))
297 }
298 #[inline]
rotate_each_word_right8(self) -> Self299 fn rotate_each_word_right8(self) -> Self {
300 dmap(self, |x| x.rotate_right(8))
301 }
302 #[inline]
rotate_each_word_right11(self) -> Self303 fn rotate_each_word_right11(self) -> Self {
304 dmap(self, |x| x.rotate_right(11))
305 }
306 #[inline]
rotate_each_word_right12(self) -> Self307 fn rotate_each_word_right12(self) -> Self {
308 dmap(self, |x| x.rotate_right(12))
309 }
310 #[inline]
rotate_each_word_right16(self) -> Self311 fn rotate_each_word_right16(self) -> Self {
312 dmap(self, |x| x.rotate_right(16))
313 }
314 #[inline]
rotate_each_word_right20(self) -> Self315 fn rotate_each_word_right20(self) -> Self {
316 dmap(self, |x| x.rotate_right(20))
317 }
318 #[inline]
rotate_each_word_right24(self) -> Self319 fn rotate_each_word_right24(self) -> Self {
320 dmap(self, |x| x.rotate_right(24))
321 }
322 #[inline]
rotate_each_word_right25(self) -> Self323 fn rotate_each_word_right25(self) -> Self {
324 dmap(self, |x| x.rotate_right(25))
325 }
326 }
327
328 impl RotateEachWord32 for u64x2_generic {
329 #[inline]
rotate_each_word_right7(self) -> Self330 fn rotate_each_word_right7(self) -> Self {
331 qmap(self, |x| x.rotate_right(7))
332 }
333 #[inline]
rotate_each_word_right8(self) -> Self334 fn rotate_each_word_right8(self) -> Self {
335 qmap(self, |x| x.rotate_right(8))
336 }
337 #[inline]
rotate_each_word_right11(self) -> Self338 fn rotate_each_word_right11(self) -> Self {
339 qmap(self, |x| x.rotate_right(11))
340 }
341 #[inline]
rotate_each_word_right12(self) -> Self342 fn rotate_each_word_right12(self) -> Self {
343 qmap(self, |x| x.rotate_right(12))
344 }
345 #[inline]
rotate_each_word_right16(self) -> Self346 fn rotate_each_word_right16(self) -> Self {
347 qmap(self, |x| x.rotate_right(16))
348 }
349 #[inline]
rotate_each_word_right20(self) -> Self350 fn rotate_each_word_right20(self) -> Self {
351 qmap(self, |x| x.rotate_right(20))
352 }
353 #[inline]
rotate_each_word_right24(self) -> Self354 fn rotate_each_word_right24(self) -> Self {
355 qmap(self, |x| x.rotate_right(24))
356 }
357 #[inline]
rotate_each_word_right25(self) -> Self358 fn rotate_each_word_right25(self) -> Self {
359 qmap(self, |x| x.rotate_right(25))
360 }
361 }
362 impl RotateEachWord64 for u64x2_generic {
363 #[inline]
rotate_each_word_right32(self) -> Self364 fn rotate_each_word_right32(self) -> Self {
365 qmap(self, |x| x.rotate_right(32))
366 }
367 }
368
369 // workaround for koute/cargo-web#52 (u128::rotate_* broken with cargo web)
rotate_u128_right(x: u128, i: u32) -> u128370 fn rotate_u128_right(x: u128, i: u32) -> u128 {
371 (x >> i) | (x << (128 - i))
372 }
373 #[test]
test_rotate_u128()374 fn test_rotate_u128() {
375 const X: u128 = 0x0001_0203_0405_0607_0809_0a0b_0c0d_0e0f;
376 assert_eq!(rotate_u128_right(X, 17), X.rotate_right(17));
377 }
378
379 impl RotateEachWord32 for u128x1_generic {
380 #[inline]
rotate_each_word_right7(self) -> Self381 fn rotate_each_word_right7(self) -> Self {
382 Self([rotate_u128_right(self.0[0], 7)])
383 }
384 #[inline]
rotate_each_word_right8(self) -> Self385 fn rotate_each_word_right8(self) -> Self {
386 Self([rotate_u128_right(self.0[0], 8)])
387 }
388 #[inline]
rotate_each_word_right11(self) -> Self389 fn rotate_each_word_right11(self) -> Self {
390 Self([rotate_u128_right(self.0[0], 11)])
391 }
392 #[inline]
rotate_each_word_right12(self) -> Self393 fn rotate_each_word_right12(self) -> Self {
394 Self([rotate_u128_right(self.0[0], 12)])
395 }
396 #[inline]
rotate_each_word_right16(self) -> Self397 fn rotate_each_word_right16(self) -> Self {
398 Self([rotate_u128_right(self.0[0], 16)])
399 }
400 #[inline]
rotate_each_word_right20(self) -> Self401 fn rotate_each_word_right20(self) -> Self {
402 Self([rotate_u128_right(self.0[0], 20)])
403 }
404 #[inline]
rotate_each_word_right24(self) -> Self405 fn rotate_each_word_right24(self) -> Self {
406 Self([rotate_u128_right(self.0[0], 24)])
407 }
408 #[inline]
rotate_each_word_right25(self) -> Self409 fn rotate_each_word_right25(self) -> Self {
410 Self([rotate_u128_right(self.0[0], 25)])
411 }
412 }
413 impl RotateEachWord64 for u128x1_generic {
414 #[inline]
rotate_each_word_right32(self) -> Self415 fn rotate_each_word_right32(self) -> Self {
416 Self([rotate_u128_right(self.0[0], 32)])
417 }
418 }
419
420 #[derive(Copy, Clone)]
421 pub struct GenericMachine;
422 impl Machine for GenericMachine {
423 type u32x4 = u32x4_generic;
424 type u64x2 = u64x2_generic;
425 type u128x1 = u128x1_generic;
426 type u32x4x2 = u32x4x2_generic;
427 type u64x2x2 = u64x2x2_generic;
428 type u64x4 = u64x4_generic;
429 type u128x2 = u128x2_generic;
430 type u32x4x4 = u32x4x4_generic;
431 type u64x2x4 = u64x2x4_generic;
432 type u128x4 = u128x4_generic;
433 #[inline]
instance() -> Self434 unsafe fn instance() -> Self {
435 Self
436 }
437 }
438
439 #[derive(Copy, Clone, Debug, PartialEq)]
440 pub struct u32x4_generic([u32; 4]);
441 #[derive(Copy, Clone, Debug, PartialEq)]
442 pub struct u64x2_generic([u64; 2]);
443 #[derive(Copy, Clone, Debug, PartialEq)]
444 pub struct u128x1_generic([u128; 1]);
445
446 impl From<u32x4_generic> for vec128_storage {
447 #[inline(always)]
from(d: u32x4_generic) -> Self448 fn from(d: u32x4_generic) -> Self {
449 Self { d: d.0 }
450 }
451 }
452 impl From<u64x2_generic> for vec128_storage {
453 #[inline(always)]
from(q: u64x2_generic) -> Self454 fn from(q: u64x2_generic) -> Self {
455 Self { q: q.0 }
456 }
457 }
458 impl From<u128x1_generic> for vec128_storage {
459 #[inline(always)]
from(o: u128x1_generic) -> Self460 fn from(o: u128x1_generic) -> Self {
461 Self { q: q_of_o(o.0[0]) }
462 }
463 }
464
465 impl Store<vec128_storage> for u32x4_generic {
466 #[inline(always)]
unpack(s: vec128_storage) -> Self467 unsafe fn unpack(s: vec128_storage) -> Self {
468 Self(s.d)
469 }
470 }
471 impl Store<vec128_storage> for u64x2_generic {
472 #[inline(always)]
unpack(s: vec128_storage) -> Self473 unsafe fn unpack(s: vec128_storage) -> Self {
474 Self(s.q)
475 }
476 }
477 impl Store<vec128_storage> for u128x1_generic {
478 #[inline(always)]
unpack(s: vec128_storage) -> Self479 unsafe fn unpack(s: vec128_storage) -> Self {
480 Self([o_of_q(s.q); 1])
481 }
482 }
483
484 impl ArithOps for u32x4_generic {}
485 impl ArithOps for u64x2_generic {}
486 impl ArithOps for u128x1_generic {}
487
488 impl Add for u32x4_generic {
489 type Output = Self;
490 #[inline(always)]
add(self, rhs: Self) -> Self::Output491 fn add(self, rhs: Self) -> Self::Output {
492 dmap2(self, rhs, |x, y| x.wrapping_add(y))
493 }
494 }
495 impl Add for u64x2_generic {
496 type Output = Self;
497 #[inline(always)]
add(self, rhs: Self) -> Self::Output498 fn add(self, rhs: Self) -> Self::Output {
499 qmap2(self, rhs, |x, y| x.wrapping_add(y))
500 }
501 }
502 impl Add for u128x1_generic {
503 type Output = Self;
504 #[inline(always)]
add(self, rhs: Self) -> Self::Output505 fn add(self, rhs: Self) -> Self::Output {
506 omap2(self, rhs, |x, y| x.wrapping_add(y))
507 }
508 }
509 impl AddAssign for u32x4_generic {
510 #[inline(always)]
add_assign(&mut self, rhs: Self)511 fn add_assign(&mut self, rhs: Self) {
512 *self = *self + rhs
513 }
514 }
515 impl AddAssign for u64x2_generic {
516 #[inline(always)]
add_assign(&mut self, rhs: Self)517 fn add_assign(&mut self, rhs: Self) {
518 *self = *self + rhs
519 }
520 }
521 impl AddAssign for u128x1_generic {
522 #[inline(always)]
add_assign(&mut self, rhs: Self)523 fn add_assign(&mut self, rhs: Self) {
524 *self = *self + rhs
525 }
526 }
527 impl BSwap for u32x4_generic {
528 #[inline(always)]
bswap(self) -> Self529 fn bswap(self) -> Self {
530 dmap(self, |x| x.swap_bytes())
531 }
532 }
533 impl BSwap for u64x2_generic {
534 #[inline(always)]
bswap(self) -> Self535 fn bswap(self) -> Self {
536 qmap(self, |x| x.swap_bytes())
537 }
538 }
539 impl BSwap for u128x1_generic {
540 #[inline(always)]
bswap(self) -> Self541 fn bswap(self) -> Self {
542 omap(self, |x| x.swap_bytes())
543 }
544 }
545 impl StoreBytes for u32x4_generic {
546 #[inline(always)]
unsafe_read_le(input: &[u8]) -> Self547 unsafe fn unsafe_read_le(input: &[u8]) -> Self {
548 assert_eq!(input.len(), 16);
549 let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
550 dmap(x, |x| x.to_le())
551 }
552 #[inline(always)]
unsafe_read_be(input: &[u8]) -> Self553 unsafe fn unsafe_read_be(input: &[u8]) -> Self {
554 assert_eq!(input.len(), 16);
555 let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
556 dmap(x, |x| x.to_be())
557 }
558 #[inline(always)]
write_le(self, out: &mut [u8])559 fn write_le(self, out: &mut [u8]) {
560 assert_eq!(out.len(), 16);
561 let x = dmap(self, |x| x.to_le());
562 unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
563 }
564 #[inline(always)]
write_be(self, out: &mut [u8])565 fn write_be(self, out: &mut [u8]) {
566 assert_eq!(out.len(), 16);
567 let x = dmap(self, |x| x.to_be());
568 unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
569 }
570 }
571 impl StoreBytes for u64x2_generic {
572 #[inline(always)]
unsafe_read_le(input: &[u8]) -> Self573 unsafe fn unsafe_read_le(input: &[u8]) -> Self {
574 assert_eq!(input.len(), 16);
575 let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
576 qmap(x, |x| x.to_le())
577 }
578 #[inline(always)]
unsafe_read_be(input: &[u8]) -> Self579 unsafe fn unsafe_read_be(input: &[u8]) -> Self {
580 assert_eq!(input.len(), 16);
581 let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
582 qmap(x, |x| x.to_be())
583 }
584 #[inline(always)]
write_le(self, out: &mut [u8])585 fn write_le(self, out: &mut [u8]) {
586 assert_eq!(out.len(), 16);
587 let x = qmap(self, |x| x.to_le());
588 unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
589 }
590 #[inline(always)]
write_be(self, out: &mut [u8])591 fn write_be(self, out: &mut [u8]) {
592 assert_eq!(out.len(), 16);
593 let x = qmap(self, |x| x.to_be());
594 unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
595 }
596 }
597
598 #[derive(Copy, Clone)]
599 pub struct G0;
600 #[derive(Copy, Clone)]
601 pub struct G1;
602 pub type u32x4x2_generic = x2<u32x4_generic, G0>;
603 pub type u64x2x2_generic = x2<u64x2_generic, G0>;
604 pub type u64x4_generic = x2<u64x2_generic, G1>;
605 pub type u128x2_generic = x2<u128x1_generic, G0>;
606 pub type u32x4x4_generic = x4<u32x4_generic>;
607 pub type u64x2x4_generic = x4<u64x2_generic>;
608 pub type u128x4_generic = x4<u128x1_generic>;
609
610 impl MultiLane<[u32; 4]> for u32x4_generic {
611 #[inline(always)]
to_lanes(self) -> [u32; 4]612 fn to_lanes(self) -> [u32; 4] {
613 self.0
614 }
615 #[inline(always)]
from_lanes(xs: [u32; 4]) -> Self616 fn from_lanes(xs: [u32; 4]) -> Self {
617 Self(xs)
618 }
619 }
620 impl MultiLane<[u64; 2]> for u64x2_generic {
621 #[inline(always)]
to_lanes(self) -> [u64; 2]622 fn to_lanes(self) -> [u64; 2] {
623 self.0
624 }
625 #[inline(always)]
from_lanes(xs: [u64; 2]) -> Self626 fn from_lanes(xs: [u64; 2]) -> Self {
627 Self(xs)
628 }
629 }
630 impl MultiLane<[u64; 4]> for u64x4_generic {
631 #[inline(always)]
to_lanes(self) -> [u64; 4]632 fn to_lanes(self) -> [u64; 4] {
633 let (a, b) = (self.0[0].to_lanes(), self.0[1].to_lanes());
634 [a[0], a[1], b[0], b[1]]
635 }
636 #[inline(always)]
from_lanes(xs: [u64; 4]) -> Self637 fn from_lanes(xs: [u64; 4]) -> Self {
638 let (a, b) = (
639 u64x2_generic::from_lanes([xs[0], xs[1]]),
640 u64x2_generic::from_lanes([xs[2], xs[3]]),
641 );
642 x2::new([a, b])
643 }
644 }
645 impl MultiLane<[u128; 1]> for u128x1_generic {
646 #[inline(always)]
to_lanes(self) -> [u128; 1]647 fn to_lanes(self) -> [u128; 1] {
648 self.0
649 }
650 #[inline(always)]
from_lanes(xs: [u128; 1]) -> Self651 fn from_lanes(xs: [u128; 1]) -> Self {
652 Self(xs)
653 }
654 }
655 impl Vec4<u32> for u32x4_generic {
656 #[inline(always)]
extract(self, i: u32) -> u32657 fn extract(self, i: u32) -> u32 {
658 self.0[i as usize]
659 }
660 #[inline(always)]
insert(mut self, v: u32, i: u32) -> Self661 fn insert(mut self, v: u32, i: u32) -> Self {
662 self.0[i as usize] = v;
663 self
664 }
665 }
666 impl Vec4<u64> for u64x4_generic {
667 #[inline(always)]
extract(self, i: u32) -> u64668 fn extract(self, i: u32) -> u64 {
669 let d: [u64; 4] = self.to_lanes();
670 d[i as usize]
671 }
672 #[inline(always)]
insert(self, v: u64, i: u32) -> Self673 fn insert(self, v: u64, i: u32) -> Self {
674 self.0[(i / 2) as usize].insert(v, i % 2);
675 self
676 }
677 }
678 impl Vec2<u64> for u64x2_generic {
679 #[inline(always)]
extract(self, i: u32) -> u64680 fn extract(self, i: u32) -> u64 {
681 self.0[i as usize]
682 }
683 #[inline(always)]
insert(mut self, v: u64, i: u32) -> Self684 fn insert(mut self, v: u64, i: u32) -> Self {
685 self.0[i as usize] = v;
686 self
687 }
688 }
689
690 impl Words4 for u32x4_generic {
691 #[inline(always)]
shuffle2301(self) -> Self692 fn shuffle2301(self) -> Self {
693 self.swap64()
694 }
695 #[inline(always)]
shuffle1230(self) -> Self696 fn shuffle1230(self) -> Self {
697 let x = self.0;
698 Self([x[3], x[0], x[1], x[2]])
699 }
700 #[inline(always)]
shuffle3012(self) -> Self701 fn shuffle3012(self) -> Self {
702 let x = self.0;
703 Self([x[1], x[2], x[3], x[0]])
704 }
705 }
706 impl LaneWords4 for u32x4_generic {
707 #[inline(always)]
shuffle_lane_words2301(self) -> Self708 fn shuffle_lane_words2301(self) -> Self {
709 self.shuffle2301()
710 }
711 #[inline(always)]
shuffle_lane_words1230(self) -> Self712 fn shuffle_lane_words1230(self) -> Self {
713 self.shuffle1230()
714 }
715 #[inline(always)]
shuffle_lane_words3012(self) -> Self716 fn shuffle_lane_words3012(self) -> Self {
717 self.shuffle3012()
718 }
719 }
720
721 impl Words4 for u64x4_generic {
722 #[inline(always)]
shuffle2301(self) -> Self723 fn shuffle2301(self) -> Self {
724 x2::new([self.0[1], self.0[0]])
725 }
726 #[inline(always)]
shuffle1230(self) -> Self727 fn shuffle1230(self) -> Self {
728 unimplemented!()
729 }
730 #[inline(always)]
shuffle3012(self) -> Self731 fn shuffle3012(self) -> Self {
732 unimplemented!()
733 }
734 }
735
736 impl u32x4<GenericMachine> for u32x4_generic {}
737 impl u64x2<GenericMachine> for u64x2_generic {}
738 impl u128x1<GenericMachine> for u128x1_generic {}
739 impl u32x4x2<GenericMachine> for u32x4x2_generic {}
740 impl u64x2x2<GenericMachine> for u64x2x2_generic {}
741 impl u64x4<GenericMachine> for u64x4_generic {}
742 impl u128x2<GenericMachine> for u128x2_generic {}
743 impl u32x4x4<GenericMachine> for u32x4x4_generic {}
744 impl u64x2x4<GenericMachine> for u64x2x4_generic {}
745 impl u128x4<GenericMachine> for u128x4_generic {}
746
747 #[macro_export]
748 macro_rules! dispatch {
749 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
750 #[inline]
751 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
752 let $mach = unsafe { $crate::generic::GenericMachine::instance() };
753 #[inline(always)]
754 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
755 fn_impl($mach, $($arg),*)
756 }
757 };
758 ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
759 dispatch!($mach, $MTy, {
760 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
761 });
762 }
763 }
764 #[macro_export]
765 macro_rules! dispatch_light128 {
766 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
767 #[inline]
768 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
769 let $mach = unsafe { $crate::generic::GenericMachine::instance() };
770 #[inline(always)]
771 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
772 fn_impl($mach, $($arg),*)
773 }
774 };
775 ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
776 dispatch!($mach, $MTy, {
777 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
778 });
779 }
780 }
781 #[macro_export]
782 macro_rules! dispatch_light256 {
783 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
784 #[inline]
785 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
786 let $mach = unsafe { $crate::generic::GenericMachine::instance() };
787 #[inline(always)]
788 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
789 fn_impl($mach, $($arg),*)
790 }
791 };
792 ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
793 dispatch!($mach, $MTy, {
794 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
795 });
796 }
797 }
798 #[macro_export]
799 macro_rules! dispatch_light512 {
800 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
801 #[inline]
802 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
803 let $mach = unsafe { $crate::generic::GenericMachine::instance() };
804 #[inline(always)]
805 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
806 fn_impl($mach, $($arg),*)
807 }
808 };
809 ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
810 dispatch!($mach, $MTy, {
811 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
812 });
813 }
814 }
815
816 #[cfg(test)]
817 mod test {
818 use super::*;
819
820 #[test]
test_bswap32()821 fn test_bswap32() {
822 let xs = [0x0f0e_0d0c, 0x0b0a_0908, 0x0706_0504, 0x0302_0100];
823 let ys = [0x0c0d_0e0f, 0x0809_0a0b, 0x0405_0607, 0x0001_0203];
824
825 let m = unsafe { GenericMachine::instance() };
826
827 let x: <GenericMachine as Machine>::u32x4 = m.vec(xs);
828 let x = x.bswap();
829
830 let y = m.vec(ys);
831 assert_eq!(x, y);
832 }
833 }
834