1 #include <stdint.h>
2 #include <stddef.h>
3 #include <string.h>
4
5 #include "../../../include/libbase64.h"
6 #include "../../tables/tables.h"
7 #include "../../codecs.h"
8 #include "config.h"
9 #include "../../env.h"
10
11 #ifdef __aarch64__
12 # if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && HAVE_NEON64
13 # define BASE64_USE_NEON64
14 # endif
15 #endif
16
17 #ifdef BASE64_USE_NEON64
18 #include <arm_neon.h>
19
20 // Only enable inline assembly on supported compilers.
21 #if defined(__GNUC__) || defined(__clang__)
22 #define BASE64_NEON64_USE_ASM
23 #endif
24
25 static inline uint8x16x4_t
load_64byte_table(const uint8_t * p)26 load_64byte_table (const uint8_t *p)
27 {
28 #ifdef BASE64_NEON64_USE_ASM
29
30 // Force the table to be loaded into contiguous registers. GCC will not
31 // normally allocate contiguous registers for a `uint8x16x4_t'. These
32 // registers are chosen to not conflict with the ones in the enc loop.
33 register uint8x16_t t0 __asm__ ("v8");
34 register uint8x16_t t1 __asm__ ("v9");
35 register uint8x16_t t2 __asm__ ("v10");
36 register uint8x16_t t3 __asm__ ("v11");
37
38 __asm__ (
39 "ld1 {%[t0].16b, %[t1].16b, %[t2].16b, %[t3].16b}, [%[src]], #64 \n\t"
40 : [src] "+r" (p),
41 [t0] "=w" (t0),
42 [t1] "=w" (t1),
43 [t2] "=w" (t2),
44 [t3] "=w" (t3)
45 );
46
47 return (uint8x16x4_t) {
48 .val[0] = t0,
49 .val[1] = t1,
50 .val[2] = t2,
51 .val[3] = t3,
52 };
53 #else
54 return vld1q_u8_x4(p);
55 #endif
56 }
57
58 #include "../generic/32/dec_loop.c"
59 #include "../generic/64/enc_loop.c"
60 #include "dec_loop.c"
61
62 #ifdef BASE64_NEON64_USE_ASM
63 # include "enc_loop_asm.c"
64 #else
65 # include "enc_reshuffle.c"
66 # include "enc_loop.c"
67 #endif
68
69 #endif // BASE64_USE_NEON64
70
71 // Stride size is so large on these NEON 64-bit functions
72 // (48 bytes encode, 64 bytes decode) that we inline the
73 // uint64 codec to stay performant on smaller inputs.
74
BASE64_ENC_FUNCTION(neon64)75 BASE64_ENC_FUNCTION(neon64)
76 {
77 #ifdef BASE64_USE_NEON64
78 #include "../generic/enc_head.c"
79 enc_loop_neon64(&s, &slen, &o, &olen);
80 enc_loop_generic_64(&s, &slen, &o, &olen);
81 #include "../generic/enc_tail.c"
82 #else
83 BASE64_ENC_STUB
84 #endif
85 }
86
BASE64_DEC_FUNCTION(neon64)87 BASE64_DEC_FUNCTION(neon64)
88 {
89 #ifdef BASE64_USE_NEON64
90 #include "../generic/dec_head.c"
91 dec_loop_neon64(&s, &slen, &o, &olen);
92 dec_loop_generic_32(&s, &slen, &o, &olen);
93 #include "../generic/dec_tail.c"
94 #else
95 BASE64_DEC_STUB
96 #endif
97 }
98