• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <stdint.h>
2 #include <stddef.h>
3 #include <string.h>
4 
5 #include "../../../include/libbase64.h"
6 #include "../../tables/tables.h"
7 #include "../../codecs.h"
8 #include "config.h"
9 #include "../../env.h"
10 
11 #ifdef __aarch64__
12 #  if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && HAVE_NEON64
13 #    define BASE64_USE_NEON64
14 #  endif
15 #endif
16 
17 #ifdef BASE64_USE_NEON64
18 #include <arm_neon.h>
19 
20 // Only enable inline assembly on supported compilers.
21 #if defined(__GNUC__) || defined(__clang__)
22 #define BASE64_NEON64_USE_ASM
23 #endif
24 
25 static inline uint8x16x4_t
load_64byte_table(const uint8_t * p)26 load_64byte_table (const uint8_t *p)
27 {
28 #ifdef BASE64_NEON64_USE_ASM
29 
30 	// Force the table to be loaded into contiguous registers. GCC will not
31 	// normally allocate contiguous registers for a `uint8x16x4_t'. These
32 	// registers are chosen to not conflict with the ones in the enc loop.
33 	register uint8x16_t t0 __asm__ ("v8");
34 	register uint8x16_t t1 __asm__ ("v9");
35 	register uint8x16_t t2 __asm__ ("v10");
36 	register uint8x16_t t3 __asm__ ("v11");
37 
38 	__asm__ (
39 		"ld1 {%[t0].16b, %[t1].16b, %[t2].16b, %[t3].16b}, [%[src]], #64 \n\t"
40 		: [src] "+r" (p),
41 		  [t0]  "=w" (t0),
42 		  [t1]  "=w" (t1),
43 		  [t2]  "=w" (t2),
44 		  [t3]  "=w" (t3)
45 	);
46 
47 	return (uint8x16x4_t) {
48 		.val[0] = t0,
49 		.val[1] = t1,
50 		.val[2] = t2,
51 		.val[3] = t3,
52 	};
53 #else
54 	return vld1q_u8_x4(p);
55 #endif
56 }
57 
58 #include "../generic/32/dec_loop.c"
59 #include "../generic/64/enc_loop.c"
60 #include "dec_loop.c"
61 
62 #ifdef BASE64_NEON64_USE_ASM
63 # include "enc_loop_asm.c"
64 #else
65 # include "enc_reshuffle.c"
66 # include "enc_loop.c"
67 #endif
68 
69 #endif	// BASE64_USE_NEON64
70 
71 // Stride size is so large on these NEON 64-bit functions
72 // (48 bytes encode, 64 bytes decode) that we inline the
73 // uint64 codec to stay performant on smaller inputs.
74 
BASE64_ENC_FUNCTION(neon64)75 BASE64_ENC_FUNCTION(neon64)
76 {
77 #ifdef BASE64_USE_NEON64
78 	#include "../generic/enc_head.c"
79 	enc_loop_neon64(&s, &slen, &o, &olen);
80 	enc_loop_generic_64(&s, &slen, &o, &olen);
81 	#include "../generic/enc_tail.c"
82 #else
83 	BASE64_ENC_STUB
84 #endif
85 }
86 
BASE64_DEC_FUNCTION(neon64)87 BASE64_DEC_FUNCTION(neon64)
88 {
89 #ifdef BASE64_USE_NEON64
90 	#include "../generic/dec_head.c"
91 	dec_loop_neon64(&s, &slen, &o, &olen);
92 	dec_loop_generic_32(&s, &slen, &o, &olen);
93 	#include "../generic/dec_tail.c"
94 #else
95 	BASE64_DEC_STUB
96 #endif
97 }
98