• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <stdbool.h>
2 #include <stdint.h>
3 #include <stddef.h>
4 #include <stdint.h>
5 #include <stdio.h>
6 
7 #include "../include/libbase64.h"
8 #include "codecs.h"
9 #include "config.h"
10 #include "env.h"
11 
12 #if (__x86_64__ || __i386__ || _M_X86 || _M_X64)
13   #define BASE64_X86
14   #if (HAVE_SSSE3 || HAVE_SSE41 || HAVE_SSE42 || HAVE_AVX || HAVE_AVX2 || HAVE_AVX512)
15     #define BASE64_X86_SIMD
16   #endif
17 #endif
18 
19 #ifdef BASE64_X86
20 #ifdef _MSC_VER
21 	#include <intrin.h>
22 	#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \
23 	{						\
24 		int info[4];				\
25 		__cpuidex(info, __level, __count);	\
26 		__eax = info[0];			\
27 		__ebx = info[1];			\
28 		__ecx = info[2];			\
29 		__edx = info[3];			\
30 	}
31 	#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
32 		__cpuid_count(__level, 0, __eax, __ebx, __ecx, __edx)
33 #else
34 	#include <cpuid.h>
35 	#if HAVE_AVX512 || HAVE_AVX2 || HAVE_AVX
36 		#if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3))
_xgetbv(uint32_t index)37 			static inline uint64_t _xgetbv (uint32_t index)
38 			{
39 				uint32_t eax, edx;
40 				__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
41 				return ((uint64_t)edx << 32) | eax;
42 			}
43 		#else
44 			#error "Platform not supported"
45 		#endif
46 	#endif
47 #endif
48 
49 #ifndef bit_AVX512vl
50 #define bit_AVX512vl (1 << 31)
51 #endif
52 #ifndef bit_AVX512vbmi
53 #define bit_AVX512vbmi (1 << 1)
54 #endif
55 #ifndef bit_AVX2
56 #define bit_AVX2 (1 << 5)
57 #endif
58 #ifndef bit_SSSE3
59 #define bit_SSSE3 (1 << 9)
60 #endif
61 #ifndef bit_SSE41
62 #define bit_SSE41 (1 << 19)
63 #endif
64 #ifndef bit_SSE42
65 #define bit_SSE42 (1 << 20)
66 #endif
67 #ifndef bit_AVX
68 #define bit_AVX (1 << 28)
69 #endif
70 
71 #define bit_XSAVE_XRSTORE (1 << 27)
72 
73 #ifndef _XCR_XFEATURE_ENABLED_MASK
74 #define _XCR_XFEATURE_ENABLED_MASK 0
75 #endif
76 
77 #define _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS 0x6
78 #endif
79 
80 // Function declarations:
81 #define BASE64_CODEC_FUNCS(arch)	\
82 	BASE64_ENC_FUNCTION(arch);	\
83 	BASE64_DEC_FUNCTION(arch);	\
84 
85 BASE64_CODEC_FUNCS(avx512)
BASE64_CODEC_FUNCS(avx2)86 BASE64_CODEC_FUNCS(avx2)
87 BASE64_CODEC_FUNCS(neon32)
88 BASE64_CODEC_FUNCS(neon64)
89 BASE64_CODEC_FUNCS(plain)
90 BASE64_CODEC_FUNCS(ssse3)
91 BASE64_CODEC_FUNCS(sse41)
92 BASE64_CODEC_FUNCS(sse42)
93 BASE64_CODEC_FUNCS(avx)
94 
95 static bool
96 codec_choose_forced (struct codec *codec, int flags)
97 {
98 	// If the user wants to use a certain codec,
99 	// always allow it, even if the codec is a no-op.
100 	// For testing purposes.
101 
102 	if (!(flags & 0xFFFF)) {
103 		return false;
104 	}
105 
106 	if (flags & BASE64_FORCE_AVX2) {
107 		codec->enc = base64_stream_encode_avx2;
108 		codec->dec = base64_stream_decode_avx2;
109 		return true;
110 	}
111 	if (flags & BASE64_FORCE_NEON32) {
112 		codec->enc = base64_stream_encode_neon32;
113 		codec->dec = base64_stream_decode_neon32;
114 		return true;
115 	}
116 	if (flags & BASE64_FORCE_NEON64) {
117 		codec->enc = base64_stream_encode_neon64;
118 		codec->dec = base64_stream_decode_neon64;
119 		return true;
120 	}
121 	if (flags & BASE64_FORCE_PLAIN) {
122 		codec->enc = base64_stream_encode_plain;
123 		codec->dec = base64_stream_decode_plain;
124 		return true;
125 	}
126 	if (flags & BASE64_FORCE_SSSE3) {
127 		codec->enc = base64_stream_encode_ssse3;
128 		codec->dec = base64_stream_decode_ssse3;
129 		return true;
130 	}
131 	if (flags & BASE64_FORCE_SSE41) {
132 		codec->enc = base64_stream_encode_sse41;
133 		codec->dec = base64_stream_decode_sse41;
134 		return true;
135 	}
136 	if (flags & BASE64_FORCE_SSE42) {
137 		codec->enc = base64_stream_encode_sse42;
138 		codec->dec = base64_stream_decode_sse42;
139 		return true;
140 	}
141 	if (flags & BASE64_FORCE_AVX) {
142 		codec->enc = base64_stream_encode_avx;
143 		codec->dec = base64_stream_decode_avx;
144 		return true;
145 	}
146 	if (flags & BASE64_FORCE_AVX512) {
147 		codec->enc = base64_stream_encode_avx512;
148 		codec->dec = base64_stream_decode_avx512;
149 		return true;
150 	}
151 	return false;
152 }
153 
154 static bool
codec_choose_arm(struct codec * codec)155 codec_choose_arm (struct codec *codec)
156 {
157 #if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && ((defined(__aarch64__) && HAVE_NEON64) || HAVE_NEON32)
158 
159 	// Unfortunately there is no portable way to check for NEON
160 	// support at runtime from userland in the same way that x86
161 	// has cpuid, so just stick to the compile-time configuration:
162 
163 	#if defined(__aarch64__) && HAVE_NEON64
164 	codec->enc = base64_stream_encode_neon64;
165 	codec->dec = base64_stream_decode_neon64;
166 	#else
167 	codec->enc = base64_stream_encode_neon32;
168 	codec->dec = base64_stream_decode_neon32;
169 	#endif
170 
171 	return true;
172 
173 #else
174 	(void)codec;
175 	return false;
176 #endif
177 }
178 
179 static bool
codec_choose_x86(struct codec * codec)180 codec_choose_x86 (struct codec *codec)
181 {
182 #ifdef BASE64_X86_SIMD
183 
184 	unsigned int eax, ebx = 0, ecx = 0, edx;
185 	unsigned int max_level;
186 
187 	#ifdef _MSC_VER
188 	int info[4];
189 	__cpuidex(info, 0, 0);
190 	max_level = info[0];
191 	#else
192 	max_level = __get_cpuid_max(0, NULL);
193 	#endif
194 
195 	#if HAVE_AVX512 || HAVE_AVX2 || HAVE_AVX
196 	// Check for AVX/AVX2/AVX512 support:
197 	// Checking for AVX requires 3 things:
198 	// 1) CPUID indicates that the OS uses XSAVE and XRSTORE instructions
199 	//    (allowing saving YMM registers on context switch)
200 	// 2) CPUID indicates support for AVX
201 	// 3) XGETBV indicates the AVX registers will be saved and restored on
202 	//    context switch
203 	//
204 	// Note that XGETBV is only available on 686 or later CPUs, so the
205 	// instruction needs to be conditionally run.
206 	if (max_level >= 1) {
207 		__cpuid_count(1, 0, eax, ebx, ecx, edx);
208 		if (ecx & bit_XSAVE_XRSTORE) {
209 			uint64_t xcr_mask;
210 			xcr_mask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
211 			if ((xcr_mask & _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS) == _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS) { // check multiple bits at once
212 				#if HAVE_AVX512
213 				if (max_level >= 7) {
214 					__cpuid_count(7, 0, eax, ebx, ecx, edx);
215 					if ((ebx & bit_AVX512vl) && (ecx & bit_AVX512vbmi)) {
216 						codec->enc = base64_stream_encode_avx512;
217 						codec->dec = base64_stream_decode_avx512;
218 						return true;
219 					}
220 				}
221 				#endif
222 				#if HAVE_AVX2
223 				if (max_level >= 7) {
224 					__cpuid_count(7, 0, eax, ebx, ecx, edx);
225 					if (ebx & bit_AVX2) {
226 						codec->enc = base64_stream_encode_avx2;
227 						codec->dec = base64_stream_decode_avx2;
228 						return true;
229 					}
230 				}
231 				#endif
232 				#if HAVE_AVX
233 				__cpuid_count(1, 0, eax, ebx, ecx, edx);
234 				if (ecx & bit_AVX) {
235 					codec->enc = base64_stream_encode_avx;
236 					codec->dec = base64_stream_decode_avx;
237 					return true;
238 				}
239 				#endif
240 			}
241 		}
242 	}
243 	#endif
244 
245 	#if HAVE_SSE42
246 	// Check for SSE42 support:
247 	if (max_level >= 1) {
248 		__cpuid(1, eax, ebx, ecx, edx);
249 		if (ecx & bit_SSE42) {
250 			codec->enc = base64_stream_encode_sse42;
251 			codec->dec = base64_stream_decode_sse42;
252 			return true;
253 		}
254 	}
255 	#endif
256 
257 	#if HAVE_SSE41
258 	// Check for SSE41 support:
259 	if (max_level >= 1) {
260 		__cpuid(1, eax, ebx, ecx, edx);
261 		if (ecx & bit_SSE41) {
262 			codec->enc = base64_stream_encode_sse41;
263 			codec->dec = base64_stream_decode_sse41;
264 			return true;
265 		}
266 	}
267 	#endif
268 
269 	#if HAVE_SSSE3
270 	// Check for SSSE3 support:
271 	if (max_level >= 1) {
272 		__cpuid(1, eax, ebx, ecx, edx);
273 		if (ecx & bit_SSSE3) {
274 			codec->enc = base64_stream_encode_ssse3;
275 			codec->dec = base64_stream_decode_ssse3;
276 			return true;
277 		}
278 	}
279 	#endif
280 
281 #else
282 	(void)codec;
283 #endif
284 
285 	return false;
286 }
287 
288 void
codec_choose(struct codec * codec,int flags)289 codec_choose (struct codec *codec, int flags)
290 {
291 	// User forced a codec:
292 	if (codec_choose_forced(codec, flags)) {
293 		return;
294 	}
295 
296 	// Runtime feature detection:
297 	if (codec_choose_arm(codec)) {
298 		return;
299 	}
300 	if (codec_choose_x86(codec)) {
301 		return;
302 	}
303 	codec->enc = base64_stream_encode_plain;
304 	codec->dec = base64_stream_decode_plain;
305 }
306