1 static inline uint8x16x4_t
enc_reshuffle(uint8x16x3_t in)2 enc_reshuffle (uint8x16x3_t in)
3 {
4 uint8x16x4_t out;
5
6 // Input:
7 // in[0] = a7 a6 a5 a4 a3 a2 a1 a0
8 // in[1] = b7 b6 b5 b4 b3 b2 b1 b0
9 // in[2] = c7 c6 c5 c4 c3 c2 c1 c0
10
11 // Output:
12 // out[0] = 00 00 a7 a6 a5 a4 a3 a2
13 // out[1] = 00 00 a1 a0 b7 b6 b5 b4
14 // out[2] = 00 00 b3 b2 b1 b0 c7 c6
15 // out[3] = 00 00 c5 c4 c3 c2 c1 c0
16
17 // Move the input bits to where they need to be in the outputs. Except
18 // for the first output, the high two bits are not cleared.
19 out.val[0] = vshrq_n_u8(in.val[0], 2);
20 out.val[1] = vshrq_n_u8(in.val[1], 4);
21 out.val[2] = vshrq_n_u8(in.val[2], 6);
22 out.val[1] = vsliq_n_u8(out.val[1], in.val[0], 4);
23 out.val[2] = vsliq_n_u8(out.val[2], in.val[1], 2);
24
25 // Clear the high two bits in the second, third and fourth output.
26 out.val[1] = vandq_u8(out.val[1], vdupq_n_u8(0x3F));
27 out.val[2] = vandq_u8(out.val[2], vdupq_n_u8(0x3F));
28 out.val[3] = vandq_u8(in.val[2], vdupq_n_u8(0x3F));
29
30 return out;
31 }
32