• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <stdbool.h>
2 #include <stdint.h>
3 #include <stddef.h>
4 #include <stdint.h>
5 
6 #include "../include/libbase64.h"
7 #include "codecs.h"
8 #include "config.h"
9 #include "env.h"
10 
11 #if (__x86_64__ || __i386__ || _M_X86 || _M_X64)
12   #define BASE64_X86
13   #if (HAVE_SSSE3 || HAVE_SSE41 || HAVE_SSE42 || HAVE_AVX || HAVE_AVX2)
14     #define BASE64_X86_SIMD
15   #endif
16 #endif
17 
18 #ifdef BASE64_X86
19 #ifdef _MSC_VER
20 	#include <intrin.h>
21 	#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \
22 	{						\
23 		int info[4];				\
24 		__cpuidex(info, __level, __count);	\
25 		__eax = info[0];			\
26 		__ebx = info[1];			\
27 		__ecx = info[2];			\
28 		__edx = info[3];			\
29 	}
30 	#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
31 		__cpuid_count(__level, 0, __eax, __ebx, __ecx, __edx)
32 #else
33 	#include <cpuid.h>
34 	#if HAVE_AVX2 || HAVE_AVX
35 		#if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3))
_xgetbv(uint32_t index)36 			static inline uint64_t _xgetbv (uint32_t index)
37 			{
38 				uint32_t eax, edx;
39 				__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
40 				return ((uint64_t)edx << 32) | eax;
41 			}
42 		#else
43 			#error "Platform not supported"
44 		#endif
45 	#endif
46 #endif
47 
48 #ifndef bit_AVX2
49 #define bit_AVX2 (1 << 5)
50 #endif
51 #ifndef bit_SSSE3
52 #define bit_SSSE3 (1 << 9)
53 #endif
54 #ifndef bit_SSE41
55 #define bit_SSE41 (1 << 19)
56 #endif
57 #ifndef bit_SSE42
58 #define bit_SSE42 (1 << 20)
59 #endif
60 #ifndef bit_AVX
61 #define bit_AVX (1 << 28)
62 #endif
63 
64 #define bit_XSAVE_XRSTORE (1 << 27)
65 
66 #ifndef _XCR_XFEATURE_ENABLED_MASK
67 #define _XCR_XFEATURE_ENABLED_MASK 0
68 #endif
69 
70 #define _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS 0x6
71 #endif
72 
73 // Function declarations:
74 #define BASE64_CODEC_FUNCS(arch)	\
75 	BASE64_ENC_FUNCTION(arch);	\
76 	BASE64_DEC_FUNCTION(arch);	\
77 
78 BASE64_CODEC_FUNCS(avx2)
BASE64_CODEC_FUNCS(neon32)79 BASE64_CODEC_FUNCS(neon32)
80 BASE64_CODEC_FUNCS(neon64)
81 BASE64_CODEC_FUNCS(plain)
82 BASE64_CODEC_FUNCS(ssse3)
83 BASE64_CODEC_FUNCS(sse41)
84 BASE64_CODEC_FUNCS(sse42)
85 BASE64_CODEC_FUNCS(avx)
86 
87 static bool
88 codec_choose_forced (struct codec *codec, int flags)
89 {
90 	// If the user wants to use a certain codec,
91 	// always allow it, even if the codec is a no-op.
92 	// For testing purposes.
93 
94 	if (!(flags & 0xFF)) {
95 		return false;
96 	}
97 	if (flags & BASE64_FORCE_AVX2) {
98 		codec->enc = base64_stream_encode_avx2;
99 		codec->dec = base64_stream_decode_avx2;
100 		return true;
101 	}
102 	if (flags & BASE64_FORCE_NEON32) {
103 		codec->enc = base64_stream_encode_neon32;
104 		codec->dec = base64_stream_decode_neon32;
105 		return true;
106 	}
107 	if (flags & BASE64_FORCE_NEON64) {
108 		codec->enc = base64_stream_encode_neon64;
109 		codec->dec = base64_stream_decode_neon64;
110 		return true;
111 	}
112 	if (flags & BASE64_FORCE_PLAIN) {
113 		codec->enc = base64_stream_encode_plain;
114 		codec->dec = base64_stream_decode_plain;
115 		return true;
116 	}
117 	if (flags & BASE64_FORCE_SSSE3) {
118 		codec->enc = base64_stream_encode_ssse3;
119 		codec->dec = base64_stream_decode_ssse3;
120 		return true;
121 	}
122 	if (flags & BASE64_FORCE_SSE41) {
123 		codec->enc = base64_stream_encode_sse41;
124 		codec->dec = base64_stream_decode_sse41;
125 		return true;
126 	}
127 	if (flags & BASE64_FORCE_SSE42) {
128 		codec->enc = base64_stream_encode_sse42;
129 		codec->dec = base64_stream_decode_sse42;
130 		return true;
131 	}
132 	if (flags & BASE64_FORCE_AVX) {
133 		codec->enc = base64_stream_encode_avx;
134 		codec->dec = base64_stream_decode_avx;
135 		return true;
136 	}
137 	return false;
138 }
139 
140 static bool
codec_choose_arm(struct codec * codec)141 codec_choose_arm (struct codec *codec)
142 {
143 #if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && ((defined(__aarch64__) && HAVE_NEON64) || HAVE_NEON32)
144 
145 	// Unfortunately there is no portable way to check for NEON
146 	// support at runtime from userland in the same way that x86
147 	// has cpuid, so just stick to the compile-time configuration:
148 
149 	#if defined(__aarch64__) && HAVE_NEON64
150 	codec->enc = base64_stream_encode_neon64;
151 	codec->dec = base64_stream_decode_neon64;
152 	#else
153 	codec->enc = base64_stream_encode_neon32;
154 	codec->dec = base64_stream_decode_neon32;
155 	#endif
156 
157 	return true;
158 
159 #else
160 	(void)codec;
161 	return false;
162 #endif
163 }
164 
165 static bool
codec_choose_x86(struct codec * codec)166 codec_choose_x86 (struct codec *codec)
167 {
168 #ifdef BASE64_X86_SIMD
169 
170 	unsigned int eax, ebx = 0, ecx = 0, edx;
171 	unsigned int max_level;
172 
173 	#ifdef _MSC_VER
174 	int info[4];
175 	__cpuidex(info, 0, 0);
176 	max_level = info[0];
177 	#else
178 	max_level = __get_cpuid_max(0, NULL);
179 	#endif
180 
181 	#if HAVE_AVX2 || HAVE_AVX
182 	// Check for AVX/AVX2 support:
183 	// Checking for AVX requires 3 things:
184 	// 1) CPUID indicates that the OS uses XSAVE and XRSTORE instructions
185 	//    (allowing saving YMM registers on context switch)
186 	// 2) CPUID indicates support for AVX
187 	// 3) XGETBV indicates the AVX registers will be saved and restored on
188 	//    context switch
189 	//
190 	// Note that XGETBV is only available on 686 or later CPUs, so the
191 	// instruction needs to be conditionally run.
192 	if (max_level >= 1) {
193 		__cpuid_count(1, 0, eax, ebx, ecx, edx);
194 		if (ecx & bit_XSAVE_XRSTORE) {
195 			uint64_t xcr_mask;
196 			xcr_mask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
197 			if (xcr_mask & _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS) {
198 				#if HAVE_AVX2
199 				if (max_level >= 7) {
200 					__cpuid_count(7, 0, eax, ebx, ecx, edx);
201 					if (ebx & bit_AVX2) {
202 						codec->enc = base64_stream_encode_avx2;
203 						codec->dec = base64_stream_decode_avx2;
204 						return true;
205 					}
206 				}
207 				#endif
208 				#if HAVE_AVX
209 				__cpuid_count(1, 0, eax, ebx, ecx, edx);
210 				if (ecx & bit_AVX) {
211 					codec->enc = base64_stream_encode_avx;
212 					codec->dec = base64_stream_decode_avx;
213 					return true;
214 				}
215 				#endif
216 			}
217 		}
218 	}
219 	#endif
220 
221 	#if HAVE_SSE42
222 	// Check for SSE42 support:
223 	if (max_level >= 1) {
224 		__cpuid(1, eax, ebx, ecx, edx);
225 		if (ecx & bit_SSE42) {
226 			codec->enc = base64_stream_encode_sse42;
227 			codec->dec = base64_stream_decode_sse42;
228 			return true;
229 		}
230 	}
231 	#endif
232 
233 	#if HAVE_SSE41
234 	// Check for SSE41 support:
235 	if (max_level >= 1) {
236 		__cpuid(1, eax, ebx, ecx, edx);
237 		if (ecx & bit_SSE41) {
238 			codec->enc = base64_stream_encode_sse41;
239 			codec->dec = base64_stream_decode_sse41;
240 			return true;
241 		}
242 	}
243 	#endif
244 
245 	#if HAVE_SSSE3
246 	// Check for SSSE3 support:
247 	if (max_level >= 1) {
248 		__cpuid(1, eax, ebx, ecx, edx);
249 		if (ecx & bit_SSSE3) {
250 			codec->enc = base64_stream_encode_ssse3;
251 			codec->dec = base64_stream_decode_ssse3;
252 			return true;
253 		}
254 	}
255 	#endif
256 
257 #else
258 	(void)codec;
259 #endif
260 
261 	return false;
262 }
263 
264 void
codec_choose(struct codec * codec,int flags)265 codec_choose (struct codec *codec, int flags)
266 {
267 	// User forced a codec:
268 	if (codec_choose_forced(codec, flags)) {
269 		return;
270 	}
271 
272 	// Runtime feature detection:
273 	if (codec_choose_arm(codec)) {
274 		return;
275 	}
276 	if (codec_choose_x86(codec)) {
277 		return;
278 	}
279 	codec->enc = base64_stream_encode_plain;
280 	codec->dec = base64_stream_decode_plain;
281 }
282