• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // AVX512 algorithm is based on permutevar and multishift. The code is based on
2 // https://github.com/WojciechMula/base64simd which is under BSD-2 license.
3 
4 static inline __m512i
enc_reshuffle_translate(const __m512i input)5 enc_reshuffle_translate (const __m512i input)
6 {
7 	// 32-bit input
8 	// [ 0  0  0  0  0  0  0  0|c1 c0 d5 d4 d3 d2 d1 d0|
9 	//  b3 b2 b1 b0 c5 c4 c3 c2|a5 a4 a3 a2 a1 a0 b5 b4]
10 	// output order  [1, 2, 0, 1]
11 	// [b3 b2 b1 b0 c5 c4 c3 c2|c1 c0 d5 d4 d3 d2 d1 d0|
12 	//  a5 a4 a3 a2 a1 a0 b5 b4|b3 b2 b1 b0 c3 c2 c1 c0]
13 
14 	const __m512i shuffle_input = _mm512_setr_epi32(0x01020001,
15 	                                                0x04050304,
16 	                                                0x07080607,
17 	                                                0x0a0b090a,
18 	                                                0x0d0e0c0d,
19 	                                                0x10110f10,
20 	                                                0x13141213,
21 	                                                0x16171516,
22 	                                                0x191a1819,
23 	                                                0x1c1d1b1c,
24 	                                                0x1f201e1f,
25 	                                                0x22232122,
26 	                                                0x25262425,
27 	                                                0x28292728,
28 	                                                0x2b2c2a2b,
29 	                                                0x2e2f2d2e);
30 
31 	// Reorder bytes
32 	// [b3 b2 b1 b0 c5 c4 c3 c2|c1 c0 d5 d4 d3 d2 d1 d0|
33 	//  a5 a4 a3 a2 a1 a0 b5 b4|b3 b2 b1 b0 c3 c2 c1 c0]
34 	const __m512i in = _mm512_permutexvar_epi8(shuffle_input, input);
35 
36 	// After multishift a single 32-bit lane has following layout
37 	// [c1 c0 d5 d4 d3 d2 d1 d0|b1 b0 c5 c4 c3 c2 c1 c0|
38 	//  a1 a0 b5 b4 b3 b2 b1 b0|d1 d0 a5 a4 a3 a2 a1 a0]
39 	// (a = [10:17], b = [4:11], c = [22:27], d = [16:21])
40 
41 	// 48, 54, 36, 42, 16, 22, 4, 10
42 	const __m512i shifts = _mm512_set1_epi64(0x3036242a1016040alu);
43 	__m512i shuffled_in = _mm512_multishift_epi64_epi8(shifts, in);
44 
45 	// Translate immediatedly after reshuffled.
46 	const __m512i lookup = _mm512_loadu_si512(base64_table_enc_6bit);
47 
48 	// Translation 6-bit values to ASCII.
49 	return _mm512_permutexvar_epi8(shuffled_in, lookup);
50 }
51