1 /*!
2 * \copy
3 * Copyright (c) 2009-2013, Cisco Systems
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 *
32 * \file cpu.cpp
33 *
34 * \brief CPU compatibility detection
35 *
36 * \date 04/29/2009 Created
37 *
38 *************************************************************************************
39 */
40 #include <string.h>
41 #include <stdio.h>
42 #ifdef ANDROID_NDK
43 #include <cpu-features.h>
44 #endif
45 #include "cpu.h"
46 #include "cpu_core.h"
47
48
49
50 #define CPU_Vendor_AMD "AuthenticAMD"
51 #define CPU_Vendor_INTEL "GenuineIntel"
52 #define CPU_Vendor_CYRIX "CyrixInstead"
53
54 #if defined(X86_ASM)
55
WelsCPUFeatureDetect(int32_t * pNumberOfLogicProcessors)56 uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
57 uint32_t uiCPU = 0;
58 uint32_t uiFeatureA = 0, uiFeatureB = 0, uiFeatureC = 0, uiFeatureD = 0;
59 int32_t CacheLineSize = 0;
60 int8_t chVendorName[16] = { 0 };
61 uint32_t uiMaxCpuidLevel = 0;
62
63 if (!WelsCPUIdVerify()) {
64 /* cpuid is not supported in cpu */
65 return 0;
66 }
67
68 WelsCPUId (0, &uiFeatureA, (uint32_t*)&chVendorName[0], (uint32_t*)&chVendorName[8], (uint32_t*)&chVendorName[4]);
69 uiMaxCpuidLevel = uiFeatureA;
70 if (uiMaxCpuidLevel == 0) {
71 /* maximum input value for basic cpuid information */
72 return 0;
73 }
74
75 WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
76 if ((uiFeatureD & 0x00800000) == 0) {
77 /* Basic MMX technology is not support in cpu, mean nothing for us so return here */
78 return 0;
79 }
80
81 uiCPU = WELS_CPU_MMX;
82 if (uiFeatureD & 0x02000000) {
83 /* SSE technology is identical to AMD MMX extensions */
84 uiCPU |= WELS_CPU_MMXEXT | WELS_CPU_SSE;
85 }
86 if (uiFeatureD & 0x04000000) {
87 /* SSE2 support here */
88 uiCPU |= WELS_CPU_SSE2;
89 }
90 if (uiFeatureD & 0x00000001) {
91 /* x87 FPU on-chip checking */
92 uiCPU |= WELS_CPU_FPU;
93 }
94 if (uiFeatureD & 0x00008000) {
95 /* CMOV instruction checking */
96 uiCPU |= WELS_CPU_CMOV;
97 }
98 if ((!strcmp ((const char*)chVendorName, CPU_Vendor_INTEL)) ||
99 (!strcmp ((const char*)chVendorName, CPU_Vendor_AMD))) { // confirmed_safe_unsafe_usage
100 if (uiFeatureD & 0x10000000) {
101 /* Multi-Threading checking: contains of multiple logic processors */
102 uiCPU |= WELS_CPU_HTT;
103 }
104 }
105
106 if (uiFeatureC & 0x00000001) {
107 /* SSE3 support here */
108 uiCPU |= WELS_CPU_SSE3;
109 }
110 if (uiFeatureC & 0x00000200) {
111 /* SSSE3 support here */
112 uiCPU |= WELS_CPU_SSSE3;
113 }
114 if (uiFeatureC & 0x00080000) {
115 /* SSE4.1 support here, 45nm Penryn processor */
116 uiCPU |= WELS_CPU_SSE41;
117 }
118 if (uiFeatureC & 0x00100000) {
119 /* SSE4.2 support here, next generation Nehalem processor */
120 uiCPU |= WELS_CPU_SSE42;
121 }
122 if (WelsCPUSupportAVX (uiFeatureA, uiFeatureC)) {
123 /* AVX supported */
124 uiCPU |= WELS_CPU_AVX;
125 }
126 if (WelsCPUSupportFMA (uiFeatureA, uiFeatureC)) {
127 /* AVX FMA supported */
128 uiCPU |= WELS_CPU_FMA;
129 }
130 if (uiFeatureC & 0x02000000) {
131 /* AES checking */
132 uiCPU |= WELS_CPU_AES;
133 }
134 if (uiFeatureC & 0x00400000) {
135 /* MOVBE checking */
136 uiCPU |= WELS_CPU_MOVBE;
137 }
138
139 if (uiMaxCpuidLevel >= 7) {
140 uiFeatureC = 0;
141 WelsCPUId (7, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
142 if ((uiCPU & WELS_CPU_AVX) && (uiFeatureB & 0x00000020)) {
143 /* AVX2 supported */
144 uiCPU |= WELS_CPU_AVX2;
145 }
146 }
147
148 if (uiMaxCpuidLevel >= 7) {
149 uiFeatureC = WelsCPUDetectAVX512();
150 if (uiFeatureC & 0x10000) uiCPU |= WELS_CPU_AVX512F;
151 if (uiFeatureC & 0x10000000) uiCPU |= WELS_CPU_AVX512CD;
152 if (uiFeatureC & 0x20000) uiCPU |= WELS_CPU_AVX512DQ;
153 if (uiFeatureC & 0x40000000) uiCPU |= WELS_CPU_AVX512BW;
154 if (uiFeatureC & 0x80000000) uiCPU |= WELS_CPU_AVX512VL;
155 }
156
157 if (pNumberOfLogicProcessors != NULL) {
158 if (uiCPU & WELS_CPU_HTT) {
159 *pNumberOfLogicProcessors = (uiFeatureB & 0x00ff0000) >> 16; // feature bits: 23-16 on returned EBX
160 } else {
161 *pNumberOfLogicProcessors = 0;
162 }
163 if (!strcmp ((const char*)chVendorName, CPU_Vendor_INTEL)) {
164 if (uiMaxCpuidLevel >= 4) {
165 uiFeatureC = 0;
166 WelsCPUId (0x4, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
167 if (uiFeatureA != 0) {
168 *pNumberOfLogicProcessors = ((uiFeatureA & 0xfc000000) >> 26) + 1;
169 }
170 }
171 }
172 }
173
174 WelsCPUId (0x80000000, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
175
176 if ((!strcmp ((const char*)chVendorName, CPU_Vendor_AMD))
177 && (uiFeatureA >= 0x80000001)) { // confirmed_safe_unsafe_usage
178 WelsCPUId (0x80000001, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
179 if (uiFeatureD & 0x00400000) {
180 uiCPU |= WELS_CPU_MMXEXT;
181 }
182 if (uiFeatureD & 0x80000000) {
183 uiCPU |= WELS_CPU_3DNOW;
184 }
185 }
186
187 if (!strcmp ((const char*)chVendorName, CPU_Vendor_INTEL)) { // confirmed_safe_unsafe_usage
188 int32_t family, model;
189
190 WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
191 family = ((uiFeatureA >> 8) & 0xf) + ((uiFeatureA >> 20) & 0xff);
192 model = ((uiFeatureA >> 4) & 0xf) + ((uiFeatureA >> 12) & 0xf0);
193
194 if ((family == 6) && (model == 9 || model == 13 || model == 14)) {
195 uiCPU &= ~ (WELS_CPU_SSE2 | WELS_CPU_SSE3);
196 }
197 }
198
199 // get cache line size
200 if ((!strcmp ((const char*)chVendorName, CPU_Vendor_INTEL))
201 || ! (strcmp ((const char*)chVendorName, CPU_Vendor_CYRIX))) { // confirmed_safe_unsafe_usage
202 WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
203
204 CacheLineSize = (uiFeatureB & 0xff00) >>
205 5; // ((clflush_line_size >> 8) << 3), CLFLUSH_line_size * 8 = CacheLineSize_in_byte
206
207 if (CacheLineSize == 128) {
208 uiCPU |= WELS_CPU_CACHELINE_128;
209 } else if (CacheLineSize == 64) {
210 uiCPU |= WELS_CPU_CACHELINE_64;
211 } else if (CacheLineSize == 32) {
212 uiCPU |= WELS_CPU_CACHELINE_32;
213 } else if (CacheLineSize == 16) {
214 uiCPU |= WELS_CPU_CACHELINE_16;
215 }
216 }
217
218 return uiCPU;
219 }
220
221
WelsCPURestore(const uint32_t kuiCPU)222 void WelsCPURestore (const uint32_t kuiCPU) {
223 if (kuiCPU & (WELS_CPU_MMX | WELS_CPU_MMXEXT | WELS_CPU_3DNOW | WELS_CPU_3DNOWEXT)) {
224 WelsEmms();
225 }
226 }
227
228 #elif defined(HAVE_NEON) //For supporting both android platform and iOS platform
229 #if defined(ANDROID_NDK)
WelsCPUFeatureDetect(int32_t * pNumberOfLogicProcessors)230 uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
231 uint32_t uiCPU = 0;
232 AndroidCpuFamily cpuFamily = ANDROID_CPU_FAMILY_UNKNOWN;
233 uint64_t uiFeatures = 0;
234 cpuFamily = android_getCpuFamily();
235 if (cpuFamily == ANDROID_CPU_FAMILY_ARM) {
236 uiFeatures = android_getCpuFeatures();
237 if (uiFeatures & ANDROID_CPU_ARM_FEATURE_ARMv7) {
238 uiCPU |= WELS_CPU_ARMv7;
239 }
240 if (uiFeatures & ANDROID_CPU_ARM_FEATURE_VFPv3) {
241 uiCPU |= WELS_CPU_VFPv3;
242 }
243 if (uiFeatures & ANDROID_CPU_ARM_FEATURE_NEON) {
244 uiCPU |= WELS_CPU_NEON;
245 }
246 }
247
248 if (pNumberOfLogicProcessors != NULL) {
249 *pNumberOfLogicProcessors = android_getCpuCount();
250 }
251
252 return uiCPU;
253 }
254
255 #elif defined(__APPLE__)
WelsCPUFeatureDetect(int32_t * pNumberOfLogicProcessors)256 uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
257 uint32_t uiCPU = 0;
258
259 #if defined(__ARM_NEON__)
260 uiCPU |= WELS_CPU_ARMv7;
261 uiCPU |= WELS_CPU_VFPv3;
262 uiCPU |= WELS_CPU_NEON;
263 #endif
264 return uiCPU;
265 }
266 #elif defined(__linux__)
267
268 /* Generic arm/linux cpu feature detection */
WelsCPUFeatureDetect(int32_t * pNumberOfLogicProcessors)269 uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
270 int flags = 0;
271 FILE* f = fopen ("/proc/cpuinfo", "r");
272
273 #if defined(__chromeos__)
274 flags |= WELS_CPU_NEON;
275 #endif
276
277 if (!f) {
278 return flags;
279 }
280
281 char buf[200];
282 while (fgets (buf, sizeof (buf), f)) {
283 if (!strncmp (buf, "Features", strlen ("Features"))) {
284 // The asimd and fp features are listed on 64 bit ARMv8 kernels
285 if (strstr (buf, " neon ") || strstr (buf, " asimd "))
286 flags |= WELS_CPU_NEON;
287 if (strstr (buf, " vfpv3 ") || strstr (buf, " fp "))
288 flags |= WELS_CPU_VFPv3;
289 break;
290 }
291 }
292 fclose (f);
293 return flags;
294 }
295
296 #else /* HAVE_NEON enabled but no runtime detection */
297
298 /* No runtime feature detection available, but built with HAVE_NEON - assume
299 * that NEON and all associated features are available. */
300
WelsCPUFeatureDetect(int32_t * pNumberOfLogicProcessors)301 uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
302 return WELS_CPU_ARMv7 |
303 WELS_CPU_VFPv3 |
304 WELS_CPU_NEON;
305 }
306 #endif
307 #elif defined(HAVE_NEON_AARCH64)
308
309 /* For AArch64, no runtime detection actually is necessary for now, since
310 * NEON and VFPv3 is mandatory on all such CPUs. (/proc/cpuinfo doesn't
311 * contain neon, and the android cpufeatures library doesn't return it
312 * either.) */
313
WelsCPUFeatureDetect(int32_t * pNumberOfLogicProcessors)314 uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
315 return WELS_CPU_VFPv3 |
316 WELS_CPU_NEON;
317 }
318
319 #elif defined(mips)
320 /* Get cpu features from cpuinfo. */
get_cpu_flags_from_cpuinfo(void)321 static uint32_t get_cpu_flags_from_cpuinfo(void)
322 {
323 uint32_t flags = 0;
324
325 # ifdef __linux__
326 FILE* fp = fopen("/proc/cpuinfo", "r");
327 if (!fp)
328 return flags;
329
330 char buf[200];
331 memset(buf, 0, sizeof(buf));
332 while (fgets(buf, sizeof(buf), fp)) {
333 if (!strncmp(buf, "model name", strlen("model name"))) {
334 if (strstr(buf, "Loongson-3A") || strstr(buf, "Loongson-3B") ||
335 strstr(buf, "Loongson-2K")) {
336 flags |= WELS_CPU_MMI;
337 }
338 break;
339 }
340 }
341 while (fgets(buf, sizeof(buf), fp)) {
342 if(!strncmp(buf, "ASEs implemented", strlen("ASEs implemented"))) {
343 if (strstr(buf, "loongson-mmi") && strstr(buf, "loongson-ext")) {
344 flags |= WELS_CPU_MMI;
345 }
346 if (strstr(buf, "msa")) {
347 flags |= WELS_CPU_MSA;
348 }
349 break;
350 }
351 }
352 fclose(fp);
353 # endif
354
355 return flags;
356 }
357
WelsCPUFeatureDetect(int32_t * pNumberOfLogicProcessors)358 uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
359 return get_cpu_flags_from_cpuinfo();
360 }
361
362 #elif defined(__loongarch__) && defined(__linux__)
363 /* The CPUCFG instruction is used to dynamically identify the characteristics
364 * of the loongarch in the running processor during software execution. */
365 #define LOONGARCH_CFG2 0x02
366 #define LOONGARCH_CFG2_LSX (1<<6)
367 #define LOONGARCH_CFG2_LASX (1<<7)
368
get_cpu_flags_from_cpucfg(void)369 static uint32_t get_cpu_flags_from_cpucfg(void) {
370 uint32_t reg = 0;
371 uint32_t flags = 0;
372
373 __asm__ volatile(
374 "cpucfg %0, %1 \n\t"
375 : "+&r"(reg)
376 : "r"(LOONGARCH_CFG2)
377 );
378 if (reg & LOONGARCH_CFG2_LSX)
379 flags |= WELS_CPU_LSX;
380 if (reg & LOONGARCH_CFG2_LASX)
381 flags |= WELS_CPU_LASX;
382 return flags;
383 }
384
WelsCPUFeatureDetect(int32_t * pNumberOfLogicProcessors)385 uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
386 return get_cpu_flags_from_cpucfg();
387 }
388
389 #else /* Neither X86_ASM, HAVE_NEON, HAVE_NEON_AARCH64 nor mips */
390
WelsCPUFeatureDetect(int32_t * pNumberOfLogicProcessors)391 uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
392 return 0;
393 }
394
395 #endif
396