1 #include <stdbool.h>
2 #include <stdint.h>
3 #include <stddef.h>
4 #include <stdint.h>
5
6 #include "../include/libbase64.h"
7 #include "codecs.h"
8 #include "config.h"
9 #include "env.h"
10
11 #if (__x86_64__ || __i386__ || _M_X86 || _M_X64)
12 #define BASE64_X86
13 #if (HAVE_SSSE3 || HAVE_SSE41 || HAVE_SSE42 || HAVE_AVX || HAVE_AVX2)
14 #define BASE64_X86_SIMD
15 #endif
16 #endif
17
18 #ifdef BASE64_X86
19 #ifdef _MSC_VER
20 #include <intrin.h>
21 #define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \
22 { \
23 int info[4]; \
24 __cpuidex(info, __level, __count); \
25 __eax = info[0]; \
26 __ebx = info[1]; \
27 __ecx = info[2]; \
28 __edx = info[3]; \
29 }
30 #define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
31 __cpuid_count(__level, 0, __eax, __ebx, __ecx, __edx)
32 #else
33 #include <cpuid.h>
34 #if HAVE_AVX2 || HAVE_AVX
35 #if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3))
_xgetbv(uint32_t index)36 static inline uint64_t _xgetbv (uint32_t index)
37 {
38 uint32_t eax, edx;
39 __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
40 return ((uint64_t)edx << 32) | eax;
41 }
42 #else
43 #error "Platform not supported"
44 #endif
45 #endif
46 #endif
47
48 #ifndef bit_AVX2
49 #define bit_AVX2 (1 << 5)
50 #endif
51 #ifndef bit_SSSE3
52 #define bit_SSSE3 (1 << 9)
53 #endif
54 #ifndef bit_SSE41
55 #define bit_SSE41 (1 << 19)
56 #endif
57 #ifndef bit_SSE42
58 #define bit_SSE42 (1 << 20)
59 #endif
60 #ifndef bit_AVX
61 #define bit_AVX (1 << 28)
62 #endif
63
64 #define bit_XSAVE_XRSTORE (1 << 27)
65
66 #ifndef _XCR_XFEATURE_ENABLED_MASK
67 #define _XCR_XFEATURE_ENABLED_MASK 0
68 #endif
69
70 #define _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS 0x6
71 #endif
72
73 // Function declarations:
74 #define BASE64_CODEC_FUNCS(arch) \
75 BASE64_ENC_FUNCTION(arch); \
76 BASE64_DEC_FUNCTION(arch); \
77
78 BASE64_CODEC_FUNCS(avx2)
BASE64_CODEC_FUNCS(neon32)79 BASE64_CODEC_FUNCS(neon32)
80 BASE64_CODEC_FUNCS(neon64)
81 BASE64_CODEC_FUNCS(plain)
82 BASE64_CODEC_FUNCS(ssse3)
83 BASE64_CODEC_FUNCS(sse41)
84 BASE64_CODEC_FUNCS(sse42)
85 BASE64_CODEC_FUNCS(avx)
86
87 static bool
88 codec_choose_forced (struct codec *codec, int flags)
89 {
90 // If the user wants to use a certain codec,
91 // always allow it, even if the codec is a no-op.
92 // For testing purposes.
93
94 if (!(flags & 0xFF)) {
95 return false;
96 }
97 if (flags & BASE64_FORCE_AVX2) {
98 codec->enc = base64_stream_encode_avx2;
99 codec->dec = base64_stream_decode_avx2;
100 return true;
101 }
102 if (flags & BASE64_FORCE_NEON32) {
103 codec->enc = base64_stream_encode_neon32;
104 codec->dec = base64_stream_decode_neon32;
105 return true;
106 }
107 if (flags & BASE64_FORCE_NEON64) {
108 codec->enc = base64_stream_encode_neon64;
109 codec->dec = base64_stream_decode_neon64;
110 return true;
111 }
112 if (flags & BASE64_FORCE_PLAIN) {
113 codec->enc = base64_stream_encode_plain;
114 codec->dec = base64_stream_decode_plain;
115 return true;
116 }
117 if (flags & BASE64_FORCE_SSSE3) {
118 codec->enc = base64_stream_encode_ssse3;
119 codec->dec = base64_stream_decode_ssse3;
120 return true;
121 }
122 if (flags & BASE64_FORCE_SSE41) {
123 codec->enc = base64_stream_encode_sse41;
124 codec->dec = base64_stream_decode_sse41;
125 return true;
126 }
127 if (flags & BASE64_FORCE_SSE42) {
128 codec->enc = base64_stream_encode_sse42;
129 codec->dec = base64_stream_decode_sse42;
130 return true;
131 }
132 if (flags & BASE64_FORCE_AVX) {
133 codec->enc = base64_stream_encode_avx;
134 codec->dec = base64_stream_decode_avx;
135 return true;
136 }
137 return false;
138 }
139
140 static bool
codec_choose_arm(struct codec * codec)141 codec_choose_arm (struct codec *codec)
142 {
143 #if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && ((defined(__aarch64__) && HAVE_NEON64) || HAVE_NEON32)
144
145 // Unfortunately there is no portable way to check for NEON
146 // support at runtime from userland in the same way that x86
147 // has cpuid, so just stick to the compile-time configuration:
148
149 #if defined(__aarch64__) && HAVE_NEON64
150 codec->enc = base64_stream_encode_neon64;
151 codec->dec = base64_stream_decode_neon64;
152 #else
153 codec->enc = base64_stream_encode_neon32;
154 codec->dec = base64_stream_decode_neon32;
155 #endif
156
157 return true;
158
159 #else
160 (void)codec;
161 return false;
162 #endif
163 }
164
165 static bool
codec_choose_x86(struct codec * codec)166 codec_choose_x86 (struct codec *codec)
167 {
168 #ifdef BASE64_X86_SIMD
169
170 unsigned int eax, ebx = 0, ecx = 0, edx;
171 unsigned int max_level;
172
173 #ifdef _MSC_VER
174 int info[4];
175 __cpuidex(info, 0, 0);
176 max_level = info[0];
177 #else
178 max_level = __get_cpuid_max(0, NULL);
179 #endif
180
181 #if HAVE_AVX2 || HAVE_AVX
182 // Check for AVX/AVX2 support:
183 // Checking for AVX requires 3 things:
184 // 1) CPUID indicates that the OS uses XSAVE and XRSTORE instructions
185 // (allowing saving YMM registers on context switch)
186 // 2) CPUID indicates support for AVX
187 // 3) XGETBV indicates the AVX registers will be saved and restored on
188 // context switch
189 //
190 // Note that XGETBV is only available on 686 or later CPUs, so the
191 // instruction needs to be conditionally run.
192 if (max_level >= 1) {
193 __cpuid_count(1, 0, eax, ebx, ecx, edx);
194 if (ecx & bit_XSAVE_XRSTORE) {
195 uint64_t xcr_mask;
196 xcr_mask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
197 if (xcr_mask & _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS) {
198 #if HAVE_AVX2
199 if (max_level >= 7) {
200 __cpuid_count(7, 0, eax, ebx, ecx, edx);
201 if (ebx & bit_AVX2) {
202 codec->enc = base64_stream_encode_avx2;
203 codec->dec = base64_stream_decode_avx2;
204 return true;
205 }
206 }
207 #endif
208 #if HAVE_AVX
209 __cpuid_count(1, 0, eax, ebx, ecx, edx);
210 if (ecx & bit_AVX) {
211 codec->enc = base64_stream_encode_avx;
212 codec->dec = base64_stream_decode_avx;
213 return true;
214 }
215 #endif
216 }
217 }
218 }
219 #endif
220
221 #if HAVE_SSE42
222 // Check for SSE42 support:
223 if (max_level >= 1) {
224 __cpuid(1, eax, ebx, ecx, edx);
225 if (ecx & bit_SSE42) {
226 codec->enc = base64_stream_encode_sse42;
227 codec->dec = base64_stream_decode_sse42;
228 return true;
229 }
230 }
231 #endif
232
233 #if HAVE_SSE41
234 // Check for SSE41 support:
235 if (max_level >= 1) {
236 __cpuid(1, eax, ebx, ecx, edx);
237 if (ecx & bit_SSE41) {
238 codec->enc = base64_stream_encode_sse41;
239 codec->dec = base64_stream_decode_sse41;
240 return true;
241 }
242 }
243 #endif
244
245 #if HAVE_SSSE3
246 // Check for SSSE3 support:
247 if (max_level >= 1) {
248 __cpuid(1, eax, ebx, ecx, edx);
249 if (ecx & bit_SSSE3) {
250 codec->enc = base64_stream_encode_ssse3;
251 codec->dec = base64_stream_decode_ssse3;
252 return true;
253 }
254 }
255 #endif
256
257 #else
258 (void)codec;
259 #endif
260
261 return false;
262 }
263
264 void
codec_choose(struct codec * codec,int flags)265 codec_choose (struct codec *codec, int flags)
266 {
267 // User forced a codec:
268 if (codec_choose_forced(codec, flags)) {
269 return;
270 }
271
272 // Runtime feature detection:
273 if (codec_choose_arm(codec)) {
274 return;
275 }
276 if (codec_choose_x86(codec)) {
277 return;
278 }
279 codec->enc = base64_stream_encode_plain;
280 codec->dec = base64_stream_decode_plain;
281 }
282