1 #include <stdbool.h>
2 #include <stdint.h>
3 #include <stddef.h>
4 #include <stdint.h>
5 #include <stdio.h>
6
7 #include "../include/libbase64.h"
8 #include "codecs.h"
9 #include "config.h"
10 #include "env.h"
11
12 #if (__x86_64__ || __i386__ || _M_X86 || _M_X64)
13 #define BASE64_X86
14 #if (HAVE_SSSE3 || HAVE_SSE41 || HAVE_SSE42 || HAVE_AVX || HAVE_AVX2 || HAVE_AVX512)
15 #define BASE64_X86_SIMD
16 #endif
17 #endif
18
19 #ifdef BASE64_X86
20 #ifdef _MSC_VER
21 #include <intrin.h>
22 #define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \
23 { \
24 int info[4]; \
25 __cpuidex(info, __level, __count); \
26 __eax = info[0]; \
27 __ebx = info[1]; \
28 __ecx = info[2]; \
29 __edx = info[3]; \
30 }
31 #define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
32 __cpuid_count(__level, 0, __eax, __ebx, __ecx, __edx)
33 #else
34 #include <cpuid.h>
35 #if HAVE_AVX512 || HAVE_AVX2 || HAVE_AVX
36 #if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3))
_xgetbv(uint32_t index)37 static inline uint64_t _xgetbv (uint32_t index)
38 {
39 uint32_t eax, edx;
40 __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
41 return ((uint64_t)edx << 32) | eax;
42 }
43 #else
44 #error "Platform not supported"
45 #endif
46 #endif
47 #endif
48
49 #ifndef bit_AVX512vl
50 #define bit_AVX512vl (1 << 31)
51 #endif
52 #ifndef bit_AVX512vbmi
53 #define bit_AVX512vbmi (1 << 1)
54 #endif
55 #ifndef bit_AVX2
56 #define bit_AVX2 (1 << 5)
57 #endif
58 #ifndef bit_SSSE3
59 #define bit_SSSE3 (1 << 9)
60 #endif
61 #ifndef bit_SSE41
62 #define bit_SSE41 (1 << 19)
63 #endif
64 #ifndef bit_SSE42
65 #define bit_SSE42 (1 << 20)
66 #endif
67 #ifndef bit_AVX
68 #define bit_AVX (1 << 28)
69 #endif
70
71 #define bit_XSAVE_XRSTORE (1 << 27)
72
73 #ifndef _XCR_XFEATURE_ENABLED_MASK
74 #define _XCR_XFEATURE_ENABLED_MASK 0
75 #endif
76
77 #define _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS 0x6
78 #endif
79
80 // Function declarations:
81 #define BASE64_CODEC_FUNCS(arch) \
82 BASE64_ENC_FUNCTION(arch); \
83 BASE64_DEC_FUNCTION(arch); \
84
85 BASE64_CODEC_FUNCS(avx512)
BASE64_CODEC_FUNCS(avx2)86 BASE64_CODEC_FUNCS(avx2)
87 BASE64_CODEC_FUNCS(neon32)
88 BASE64_CODEC_FUNCS(neon64)
89 BASE64_CODEC_FUNCS(plain)
90 BASE64_CODEC_FUNCS(ssse3)
91 BASE64_CODEC_FUNCS(sse41)
92 BASE64_CODEC_FUNCS(sse42)
93 BASE64_CODEC_FUNCS(avx)
94
95 static bool
96 codec_choose_forced (struct codec *codec, int flags)
97 {
98 // If the user wants to use a certain codec,
99 // always allow it, even if the codec is a no-op.
100 // For testing purposes.
101
102 if (!(flags & 0xFFFF)) {
103 return false;
104 }
105
106 if (flags & BASE64_FORCE_AVX2) {
107 codec->enc = base64_stream_encode_avx2;
108 codec->dec = base64_stream_decode_avx2;
109 return true;
110 }
111 if (flags & BASE64_FORCE_NEON32) {
112 codec->enc = base64_stream_encode_neon32;
113 codec->dec = base64_stream_decode_neon32;
114 return true;
115 }
116 if (flags & BASE64_FORCE_NEON64) {
117 codec->enc = base64_stream_encode_neon64;
118 codec->dec = base64_stream_decode_neon64;
119 return true;
120 }
121 if (flags & BASE64_FORCE_PLAIN) {
122 codec->enc = base64_stream_encode_plain;
123 codec->dec = base64_stream_decode_plain;
124 return true;
125 }
126 if (flags & BASE64_FORCE_SSSE3) {
127 codec->enc = base64_stream_encode_ssse3;
128 codec->dec = base64_stream_decode_ssse3;
129 return true;
130 }
131 if (flags & BASE64_FORCE_SSE41) {
132 codec->enc = base64_stream_encode_sse41;
133 codec->dec = base64_stream_decode_sse41;
134 return true;
135 }
136 if (flags & BASE64_FORCE_SSE42) {
137 codec->enc = base64_stream_encode_sse42;
138 codec->dec = base64_stream_decode_sse42;
139 return true;
140 }
141 if (flags & BASE64_FORCE_AVX) {
142 codec->enc = base64_stream_encode_avx;
143 codec->dec = base64_stream_decode_avx;
144 return true;
145 }
146 if (flags & BASE64_FORCE_AVX512) {
147 codec->enc = base64_stream_encode_avx512;
148 codec->dec = base64_stream_decode_avx512;
149 return true;
150 }
151 return false;
152 }
153
154 static bool
codec_choose_arm(struct codec * codec)155 codec_choose_arm (struct codec *codec)
156 {
157 #if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && ((defined(__aarch64__) && HAVE_NEON64) || HAVE_NEON32)
158
159 // Unfortunately there is no portable way to check for NEON
160 // support at runtime from userland in the same way that x86
161 // has cpuid, so just stick to the compile-time configuration:
162
163 #if defined(__aarch64__) && HAVE_NEON64
164 codec->enc = base64_stream_encode_neon64;
165 codec->dec = base64_stream_decode_neon64;
166 #else
167 codec->enc = base64_stream_encode_neon32;
168 codec->dec = base64_stream_decode_neon32;
169 #endif
170
171 return true;
172
173 #else
174 (void)codec;
175 return false;
176 #endif
177 }
178
179 static bool
codec_choose_x86(struct codec * codec)180 codec_choose_x86 (struct codec *codec)
181 {
182 #ifdef BASE64_X86_SIMD
183
184 unsigned int eax, ebx = 0, ecx = 0, edx;
185 unsigned int max_level;
186
187 #ifdef _MSC_VER
188 int info[4];
189 __cpuidex(info, 0, 0);
190 max_level = info[0];
191 #else
192 max_level = __get_cpuid_max(0, NULL);
193 #endif
194
195 #if HAVE_AVX512 || HAVE_AVX2 || HAVE_AVX
196 // Check for AVX/AVX2/AVX512 support:
197 // Checking for AVX requires 3 things:
198 // 1) CPUID indicates that the OS uses XSAVE and XRSTORE instructions
199 // (allowing saving YMM registers on context switch)
200 // 2) CPUID indicates support for AVX
201 // 3) XGETBV indicates the AVX registers will be saved and restored on
202 // context switch
203 //
204 // Note that XGETBV is only available on 686 or later CPUs, so the
205 // instruction needs to be conditionally run.
206 if (max_level >= 1) {
207 __cpuid_count(1, 0, eax, ebx, ecx, edx);
208 if (ecx & bit_XSAVE_XRSTORE) {
209 uint64_t xcr_mask;
210 xcr_mask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
211 if ((xcr_mask & _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS) == _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS) { // check multiple bits at once
212 #if HAVE_AVX512
213 if (max_level >= 7) {
214 __cpuid_count(7, 0, eax, ebx, ecx, edx);
215 if ((ebx & bit_AVX512vl) && (ecx & bit_AVX512vbmi)) {
216 codec->enc = base64_stream_encode_avx512;
217 codec->dec = base64_stream_decode_avx512;
218 return true;
219 }
220 }
221 #endif
222 #if HAVE_AVX2
223 if (max_level >= 7) {
224 __cpuid_count(7, 0, eax, ebx, ecx, edx);
225 if (ebx & bit_AVX2) {
226 codec->enc = base64_stream_encode_avx2;
227 codec->dec = base64_stream_decode_avx2;
228 return true;
229 }
230 }
231 #endif
232 #if HAVE_AVX
233 __cpuid_count(1, 0, eax, ebx, ecx, edx);
234 if (ecx & bit_AVX) {
235 codec->enc = base64_stream_encode_avx;
236 codec->dec = base64_stream_decode_avx;
237 return true;
238 }
239 #endif
240 }
241 }
242 }
243 #endif
244
245 #if HAVE_SSE42
246 // Check for SSE42 support:
247 if (max_level >= 1) {
248 __cpuid(1, eax, ebx, ecx, edx);
249 if (ecx & bit_SSE42) {
250 codec->enc = base64_stream_encode_sse42;
251 codec->dec = base64_stream_decode_sse42;
252 return true;
253 }
254 }
255 #endif
256
257 #if HAVE_SSE41
258 // Check for SSE41 support:
259 if (max_level >= 1) {
260 __cpuid(1, eax, ebx, ecx, edx);
261 if (ecx & bit_SSE41) {
262 codec->enc = base64_stream_encode_sse41;
263 codec->dec = base64_stream_decode_sse41;
264 return true;
265 }
266 }
267 #endif
268
269 #if HAVE_SSSE3
270 // Check for SSSE3 support:
271 if (max_level >= 1) {
272 __cpuid(1, eax, ebx, ecx, edx);
273 if (ecx & bit_SSSE3) {
274 codec->enc = base64_stream_encode_ssse3;
275 codec->dec = base64_stream_decode_ssse3;
276 return true;
277 }
278 }
279 #endif
280
281 #else
282 (void)codec;
283 #endif
284
285 return false;
286 }
287
288 void
codec_choose(struct codec * codec,int flags)289 codec_choose (struct codec *codec, int flags)
290 {
291 // User forced a codec:
292 if (codec_choose_forced(codec, flags)) {
293 return;
294 }
295
296 // Runtime feature detection:
297 if (codec_choose_arm(codec)) {
298 return;
299 }
300 if (codec_choose_x86(codec)) {
301 return;
302 }
303 codec->enc = base64_stream_encode_plain;
304 codec->dec = base64_stream_decode_plain;
305 }
306