1 /* CpuArch.c -- CPU specific code
2 2021-07-13 : Igor Pavlov : Public domain */
3
4 #include "Precomp.h"
5
6 #include "CpuArch.h"
7
8 #ifdef MY_CPU_X86_OR_AMD64
9
10 #if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__)
11 #define USE_ASM
12 #endif
13
14 #if !defined(USE_ASM) && _MSC_VER >= 1500
15 #include <intrin.h>
16 #endif
17
18 #if defined(USE_ASM) && !defined(MY_CPU_AMD64)
CheckFlag(UInt32 flag)19 static UInt32 CheckFlag(UInt32 flag)
20 {
21 #ifdef _MSC_VER
22 __asm pushfd;
23 __asm pop EAX;
24 __asm mov EDX, EAX;
25 __asm xor EAX, flag;
26 __asm push EAX;
27 __asm popfd;
28 __asm pushfd;
29 __asm pop EAX;
30 __asm xor EAX, EDX;
31 __asm push EDX;
32 __asm popfd;
33 __asm and flag, EAX;
34 #else
35 __asm__ __volatile__ (
36 "pushf\n\t"
37 "pop %%EAX\n\t"
38 "movl %%EAX,%%EDX\n\t"
39 "xorl %0,%%EAX\n\t"
40 "push %%EAX\n\t"
41 "popf\n\t"
42 "pushf\n\t"
43 "pop %%EAX\n\t"
44 "xorl %%EDX,%%EAX\n\t"
45 "push %%EDX\n\t"
46 "popf\n\t"
47 "andl %%EAX, %0\n\t":
48 "=c" (flag) : "c" (flag) :
49 "%eax", "%edx");
50 #endif
51 return flag;
52 }
53 #define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False;
54 #else
55 #define CHECK_CPUID_IS_SUPPORTED
56 #endif
57
58 #ifndef USE_ASM
59 #ifdef _MSC_VER
60 #if _MSC_VER >= 1600
61 #define MY__cpuidex __cpuidex
62 #else
63
64 /*
65 __cpuid (function == 4) requires subfunction number in ECX.
66 MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
67 __cpuid() in new MSVC clears ECX.
68 __cpuid() in old MSVC (14.00) doesn't clear ECX
69 We still can use __cpuid for low (function) values that don't require ECX,
70 but __cpuid() in old MSVC will be incorrect for some function values: (function == 4).
71 So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
72 where ECX value is first parameter for FAST_CALL / NO_INLINE function,
73 So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and
74 old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
75
76 DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!!
77 */
78
79 static
80 MY_NO_INLINE
MY__cpuidex_HACK(UInt32 subFunction,int * CPUInfo,UInt32 function)81 void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function)
82 {
83 UNUSED_VAR(subFunction);
84 __cpuid(CPUInfo, function);
85 }
86
87 #define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func)
88 #pragma message("======== MY__cpuidex_HACK WAS USED ========")
89 #endif
90 #else
91 #define MY__cpuidex(info, func, func2) __cpuid(info, func)
92 #pragma message("======== (INCORRECT ?) cpuid WAS USED ========")
93 #endif
94 #endif
95
96
97
98
MyCPUID(UInt32 function,UInt32 * a,UInt32 * b,UInt32 * c,UInt32 * d)99 void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
100 {
101 #ifdef USE_ASM
102
103 #ifdef _MSC_VER
104
105 UInt32 a2, b2, c2, d2;
106 __asm xor EBX, EBX;
107 __asm xor ECX, ECX;
108 __asm xor EDX, EDX;
109 __asm mov EAX, function;
110 __asm cpuid;
111 __asm mov a2, EAX;
112 __asm mov b2, EBX;
113 __asm mov c2, ECX;
114 __asm mov d2, EDX;
115
116 *a = a2;
117 *b = b2;
118 *c = c2;
119 *d = d2;
120
121 #else
122
123 __asm__ __volatile__ (
124 #if defined(MY_CPU_AMD64) && defined(__PIC__)
125 "mov %%rbx, %%rdi;"
126 "cpuid;"
127 "xchg %%rbx, %%rdi;"
128 : "=a" (*a) ,
129 "=D" (*b) ,
130 #elif defined(MY_CPU_X86) && defined(__PIC__)
131 "mov %%ebx, %%edi;"
132 "cpuid;"
133 "xchgl %%ebx, %%edi;"
134 : "=a" (*a) ,
135 "=D" (*b) ,
136 #else
137 "cpuid"
138 : "=a" (*a) ,
139 "=b" (*b) ,
140 #endif
141 "=c" (*c) ,
142 "=d" (*d)
143 : "0" (function), "c"(0) ) ;
144
145 #endif
146
147 #else
148
149 int CPUInfo[4];
150
151 MY__cpuidex(CPUInfo, (int)function, 0);
152
153 *a = (UInt32)CPUInfo[0];
154 *b = (UInt32)CPUInfo[1];
155 *c = (UInt32)CPUInfo[2];
156 *d = (UInt32)CPUInfo[3];
157
158 #endif
159 }
160
x86cpuid_CheckAndRead(Cx86cpuid * p)161 BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p)
162 {
163 CHECK_CPUID_IS_SUPPORTED
164 MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]);
165 MyCPUID(1, &p->ver, &p->b, &p->c, &p->d);
166 return True;
167 }
168
169 static const UInt32 kVendors[][3] =
170 {
171 { 0x756E6547, 0x49656E69, 0x6C65746E},
172 { 0x68747541, 0x69746E65, 0x444D4163},
173 { 0x746E6543, 0x48727561, 0x736C7561}
174 };
175
x86cpuid_GetFirm(const Cx86cpuid * p)176 int x86cpuid_GetFirm(const Cx86cpuid *p)
177 {
178 unsigned i;
179 for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++)
180 {
181 const UInt32 *v = kVendors[i];
182 if (v[0] == p->vendor[0] &&
183 v[1] == p->vendor[1] &&
184 v[2] == p->vendor[2])
185 return (int)i;
186 }
187 return -1;
188 }
189
CPU_Is_InOrder()190 BoolInt CPU_Is_InOrder()
191 {
192 Cx86cpuid p;
193 int firm;
194 UInt32 family, model;
195 if (!x86cpuid_CheckAndRead(&p))
196 return True;
197
198 family = x86cpuid_GetFamily(p.ver);
199 model = x86cpuid_GetModel(p.ver);
200
201 firm = x86cpuid_GetFirm(&p);
202
203 switch (firm)
204 {
205 case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
206 /* In-Order Atom CPU */
207 model == 0x1C /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */
208 || model == 0x26 /* 45 nm, Z6xx */
209 || model == 0x27 /* 32 nm, Z2460 */
210 || model == 0x35 /* 32 nm, Z2760 */
211 || model == 0x36 /* 32 nm, N2xxx, D2xxx */
212 )));
213 case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
214 case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
215 }
216 return True;
217 }
218
219 #if !defined(MY_CPU_AMD64) && defined(_WIN32)
220 #include <windows.h>
CPU_Sys_Is_SSE_Supported()221 static BoolInt CPU_Sys_Is_SSE_Supported()
222 {
223 OSVERSIONINFO vi;
224 vi.dwOSVersionInfoSize = sizeof(vi);
225 if (!GetVersionEx(&vi))
226 return False;
227 return (vi.dwMajorVersion >= 5);
228 }
229 #define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
230 #else
231 #define CHECK_SYS_SSE_SUPPORT
232 #endif
233
234
X86_CPUID_ECX_Get_Flags()235 static UInt32 X86_CPUID_ECX_Get_Flags()
236 {
237 Cx86cpuid p;
238 CHECK_SYS_SSE_SUPPORT
239 if (!x86cpuid_CheckAndRead(&p))
240 return 0;
241 return p.c;
242 }
243
CPU_IsSupported_AES()244 BoolInt CPU_IsSupported_AES()
245 {
246 return (X86_CPUID_ECX_Get_Flags() >> 25) & 1;
247 }
248
CPU_IsSupported_SSSE3()249 BoolInt CPU_IsSupported_SSSE3()
250 {
251 return (X86_CPUID_ECX_Get_Flags() >> 9) & 1;
252 }
253
CPU_IsSupported_SSE41()254 BoolInt CPU_IsSupported_SSE41()
255 {
256 return (X86_CPUID_ECX_Get_Flags() >> 19) & 1;
257 }
258
CPU_IsSupported_SHA()259 BoolInt CPU_IsSupported_SHA()
260 {
261 Cx86cpuid p;
262 CHECK_SYS_SSE_SUPPORT
263 if (!x86cpuid_CheckAndRead(&p))
264 return False;
265
266 if (p.maxFunc < 7)
267 return False;
268 {
269 UInt32 d[4] = { 0 };
270 MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
271 return (d[1] >> 29) & 1;
272 }
273 }
274
275 // #include <stdio.h>
276
277 #ifdef _WIN32
278 #include <windows.h>
279 #endif
280
CPU_IsSupported_AVX2()281 BoolInt CPU_IsSupported_AVX2()
282 {
283 Cx86cpuid p;
284 CHECK_SYS_SSE_SUPPORT
285
286 #ifdef _WIN32
287 #define MY__PF_XSAVE_ENABLED 17
288 if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
289 return False;
290 #endif
291
292 if (!x86cpuid_CheckAndRead(&p))
293 return False;
294 if (p.maxFunc < 7)
295 return False;
296 {
297 UInt32 d[4] = { 0 };
298 MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
299 // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
300 return 1
301 & (d[1] >> 5); // avx2
302 }
303 }
304
CPU_IsSupported_VAES_AVX2()305 BoolInt CPU_IsSupported_VAES_AVX2()
306 {
307 Cx86cpuid p;
308 CHECK_SYS_SSE_SUPPORT
309
310 #ifdef _WIN32
311 #define MY__PF_XSAVE_ENABLED 17
312 if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
313 return False;
314 #endif
315
316 if (!x86cpuid_CheckAndRead(&p))
317 return False;
318 if (p.maxFunc < 7)
319 return False;
320 {
321 UInt32 d[4] = { 0 };
322 MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
323 // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
324 return 1
325 & (d[1] >> 5) // avx2
326 // & (d[1] >> 31) // avx512vl
327 & (d[2] >> 9); // vaes // VEX-256/EVEX
328 }
329 }
330
CPU_IsSupported_PageGB()331 BoolInt CPU_IsSupported_PageGB()
332 {
333 Cx86cpuid cpuid;
334 if (!x86cpuid_CheckAndRead(&cpuid))
335 return False;
336 {
337 UInt32 d[4] = { 0 };
338 MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]);
339 if (d[0] < 0x80000001)
340 return False;
341 }
342 {
343 UInt32 d[4] = { 0 };
344 MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]);
345 return (d[3] >> 26) & 1;
346 }
347 }
348
349
350 #elif defined(MY_CPU_ARM_OR_ARM64)
351
352 #ifdef _WIN32
353
354 #include <windows.h>
355
CPU_IsSupported_CRC32()356 BoolInt CPU_IsSupported_CRC32() { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
CPU_IsSupported_CRYPTO()357 BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
CPU_IsSupported_NEON()358 BoolInt CPU_IsSupported_NEON() { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
359
360 #else
361
362 #if defined(__APPLE__)
363
364 /*
365 #include <stdio.h>
366 #include <string.h>
367 static void Print_sysctlbyname(const char *name)
368 {
369 size_t bufSize = 256;
370 char buf[256];
371 int res = sysctlbyname(name, &buf, &bufSize, NULL, 0);
372 {
373 int i;
374 printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize);
375 for (i = 0; i < 20; i++)
376 printf(" %2x", (unsigned)(Byte)buf[i]);
377
378 }
379 }
380 */
381
My_sysctlbyname_Get_BoolInt(const char * name)382 static BoolInt My_sysctlbyname_Get_BoolInt(const char *name)
383 {
384 UInt32 val = 0;
385 if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
386 return 1;
387 return 0;
388 }
389
390 /*
391 Print_sysctlbyname("hw.pagesize");
392 Print_sysctlbyname("machdep.cpu.brand_string");
393 */
394
CPU_IsSupported_CRC32(void)395 BoolInt CPU_IsSupported_CRC32(void)
396 {
397 return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
398 }
399
CPU_IsSupported_NEON(void)400 BoolInt CPU_IsSupported_NEON(void)
401 {
402 return My_sysctlbyname_Get_BoolInt("hw.optional.neon");
403 }
404
405 #ifdef MY_CPU_ARM64
406 #define APPLE_CRYPTO_SUPPORT_VAL 1
407 #else
408 #define APPLE_CRYPTO_SUPPORT_VAL 0
409 #endif
410
CPU_IsSupported_SHA1(void)411 BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
CPU_IsSupported_SHA2(void)412 BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
CPU_IsSupported_AES(void)413 BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
414
415
416 #else // __APPLE__
417
418 #include <sys/auxv.h>
419
420 #define USE_HWCAP
421
422 #ifdef USE_HWCAP
423
424 #include <asm/hwcap.h>
425
426 #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
427 BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; }
428
429 #ifdef MY_CPU_ARM64
430 #define MY_HWCAP_CHECK_FUNC(name) \
431 MY_HWCAP_CHECK_FUNC_2(name, name)
MY_HWCAP_CHECK_FUNC_2(NEON,ASIMD)432 MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
433 // MY_HWCAP_CHECK_FUNC (ASIMD)
434 #elif defined(MY_CPU_ARM)
435 #define MY_HWCAP_CHECK_FUNC(name) \
436 BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; }
437 MY_HWCAP_CHECK_FUNC_2(NEON, NEON)
438 #endif
439
440 #else // USE_HWCAP
441
442 #define MY_HWCAP_CHECK_FUNC(name) \
443 BoolInt CPU_IsSupported_ ## name() { return 0; }
444 MY_HWCAP_CHECK_FUNC(NEON)
445
446 #endif // USE_HWCAP
447
448 MY_HWCAP_CHECK_FUNC (CRC32)
449 MY_HWCAP_CHECK_FUNC (SHA1)
450 MY_HWCAP_CHECK_FUNC (SHA2)
451 MY_HWCAP_CHECK_FUNC (AES)
452
453 #endif // __APPLE__
454 #endif // _WIN32
455
456 #endif // MY_CPU_ARM_OR_ARM64
457
458
459
460 #ifdef __APPLE__
461
462 #include <sys/sysctl.h>
463
464 int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
465 {
466 return sysctlbyname(name, buf, bufSize, NULL, 0);
467 }
468
My_sysctlbyname_Get_UInt32(const char * name,UInt32 * val)469 int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
470 {
471 size_t bufSize = sizeof(*val);
472 int res = My_sysctlbyname_Get(name, val, &bufSize);
473 if (res == 0 && bufSize != sizeof(*val))
474 return EFAULT;
475 return res;
476 }
477
478 #endif
479