• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*!
2  * \copy
3  *     Copyright (c)  2009-2013, Cisco Systems
4  *     All rights reserved.
5  *
6  *     Redistribution and use in source and binary forms, with or without
7  *     modification, are permitted provided that the following conditions
8  *     are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *
13  *        * Redistributions in binary form must reproduce the above copyright
14  *          notice, this list of conditions and the following disclaimer in
15  *          the documentation and/or other materials provided with the
16  *          distribution.
17  *
18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26  *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *     POSSIBILITY OF SUCH DAMAGE.
30  *
31  *
32  * \file    cpu.cpp
33  *
34  * \brief   CPU compatibility detection
35  *
36  * \date    04/29/2009 Created
37  *
38  *************************************************************************************
39  */
40 #include <string.h>
41 #include <stdio.h>
42 #ifdef ANDROID_NDK
43 #include <cpu-features.h>
44 #endif
45 #include "cpu.h"
46 #include "cpu_core.h"
47 
48 
49 
50 #define    CPU_Vendor_AMD    "AuthenticAMD"
51 #define    CPU_Vendor_INTEL  "GenuineIntel"
52 #define    CPU_Vendor_CYRIX  "CyrixInstead"
53 
54 #if defined(X86_ASM)
55 
WelsCPUFeatureDetect(int32_t * pNumberOfLogicProcessors)56 uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
57   uint32_t uiCPU = 0;
58   uint32_t uiFeatureA = 0, uiFeatureB = 0, uiFeatureC = 0, uiFeatureD = 0;
59   int32_t  CacheLineSize = 0;
60   int8_t   chVendorName[16] = { 0 };
61   uint32_t uiMaxCpuidLevel = 0;
62 
63   if (!WelsCPUIdVerify()) {
64     /* cpuid is not supported in cpu */
65     return 0;
66   }
67 
68   WelsCPUId (0, &uiFeatureA, (uint32_t*)&chVendorName[0], (uint32_t*)&chVendorName[8], (uint32_t*)&chVendorName[4]);
69   uiMaxCpuidLevel = uiFeatureA;
70   if (uiMaxCpuidLevel == 0) {
71     /* maximum input value for basic cpuid information */
72     return 0;
73   }
74 
75   WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
76   if ((uiFeatureD & 0x00800000) == 0) {
77     /* Basic MMX technology is not support in cpu, mean nothing for us so return here */
78     return 0;
79   }
80 
81   uiCPU = WELS_CPU_MMX;
82   if (uiFeatureD & 0x02000000) {
83     /* SSE technology is identical to AMD MMX extensions */
84     uiCPU |= WELS_CPU_MMXEXT | WELS_CPU_SSE;
85   }
86   if (uiFeatureD & 0x04000000) {
87     /* SSE2 support here */
88     uiCPU |= WELS_CPU_SSE2;
89   }
90   if (uiFeatureD & 0x00000001) {
91     /* x87 FPU on-chip checking */
92     uiCPU |= WELS_CPU_FPU;
93   }
94   if (uiFeatureD & 0x00008000) {
95     /* CMOV instruction checking */
96     uiCPU |= WELS_CPU_CMOV;
97   }
98   if ((!strcmp ((const char*)chVendorName, CPU_Vendor_INTEL)) ||
99       (!strcmp ((const char*)chVendorName, CPU_Vendor_AMD))) { // confirmed_safe_unsafe_usage
100     if (uiFeatureD & 0x10000000) {
101       /* Multi-Threading checking: contains of multiple logic processors */
102       uiCPU |= WELS_CPU_HTT;
103     }
104   }
105 
106   if (uiFeatureC & 0x00000001) {
107     /* SSE3 support here */
108     uiCPU |= WELS_CPU_SSE3;
109   }
110   if (uiFeatureC & 0x00000200) {
111     /* SSSE3 support here */
112     uiCPU |= WELS_CPU_SSSE3;
113   }
114   if (uiFeatureC & 0x00080000) {
115     /* SSE4.1 support here, 45nm Penryn processor */
116     uiCPU |= WELS_CPU_SSE41;
117   }
118   if (uiFeatureC & 0x00100000) {
119     /* SSE4.2 support here, next generation Nehalem processor */
120     uiCPU |= WELS_CPU_SSE42;
121   }
122   if (WelsCPUSupportAVX (uiFeatureA, uiFeatureC)) {
123     /* AVX supported */
124     uiCPU |= WELS_CPU_AVX;
125   }
126   if (WelsCPUSupportFMA (uiFeatureA, uiFeatureC)) {
127     /* AVX FMA supported */
128     uiCPU |= WELS_CPU_FMA;
129   }
130   if (uiFeatureC & 0x02000000) {
131     /* AES checking */
132     uiCPU |= WELS_CPU_AES;
133   }
134   if (uiFeatureC & 0x00400000) {
135     /* MOVBE checking */
136     uiCPU |= WELS_CPU_MOVBE;
137   }
138 
139   if (uiMaxCpuidLevel >= 7) {
140     uiFeatureC = 0;
141     WelsCPUId (7, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
142     if ((uiCPU & WELS_CPU_AVX) && (uiFeatureB & 0x00000020)) {
143       /* AVX2 supported */
144       uiCPU |= WELS_CPU_AVX2;
145     }
146   }
147 
148   if (uiMaxCpuidLevel >= 7) {
149     uiFeatureC = WelsCPUDetectAVX512();
150     if (uiFeatureC & 0x10000) uiCPU |= WELS_CPU_AVX512F;
151     if (uiFeatureC & 0x10000000) uiCPU |= WELS_CPU_AVX512CD;
152     if (uiFeatureC & 0x20000) uiCPU |= WELS_CPU_AVX512DQ;
153     if (uiFeatureC & 0x40000000) uiCPU |= WELS_CPU_AVX512BW;
154     if (uiFeatureC & 0x80000000) uiCPU |= WELS_CPU_AVX512VL;
155   }
156 
157   if (pNumberOfLogicProcessors != NULL) {
158     if (uiCPU & WELS_CPU_HTT) {
159       *pNumberOfLogicProcessors = (uiFeatureB & 0x00ff0000) >> 16; // feature bits: 23-16 on returned EBX
160     } else {
161       *pNumberOfLogicProcessors = 0;
162     }
163     if (!strcmp ((const char*)chVendorName, CPU_Vendor_INTEL)) {
164       if (uiMaxCpuidLevel >= 4) {
165         uiFeatureC = 0;
166         WelsCPUId (0x4, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
167         if (uiFeatureA != 0) {
168           *pNumberOfLogicProcessors = ((uiFeatureA & 0xfc000000) >> 26) + 1;
169         }
170       }
171     }
172   }
173 
174   WelsCPUId (0x80000000, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
175 
176   if ((!strcmp ((const char*)chVendorName, CPU_Vendor_AMD))
177       && (uiFeatureA >= 0x80000001)) { // confirmed_safe_unsafe_usage
178     WelsCPUId (0x80000001, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
179     if (uiFeatureD & 0x00400000) {
180       uiCPU |= WELS_CPU_MMXEXT;
181     }
182     if (uiFeatureD & 0x80000000) {
183       uiCPU |= WELS_CPU_3DNOW;
184     }
185   }
186 
187   if (!strcmp ((const char*)chVendorName, CPU_Vendor_INTEL)) { // confirmed_safe_unsafe_usage
188     int32_t  family, model;
189 
190     WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
191     family = ((uiFeatureA >> 8) & 0xf) + ((uiFeatureA >> 20) & 0xff);
192     model  = ((uiFeatureA >> 4) & 0xf) + ((uiFeatureA >> 12) & 0xf0);
193 
194     if ((family == 6) && (model == 9 || model == 13 || model == 14)) {
195       uiCPU &= ~ (WELS_CPU_SSE2 | WELS_CPU_SSE3);
196     }
197   }
198 
199   // get cache line size
200   if ((!strcmp ((const char*)chVendorName, CPU_Vendor_INTEL))
201       || ! (strcmp ((const char*)chVendorName, CPU_Vendor_CYRIX))) { // confirmed_safe_unsafe_usage
202     WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
203 
204     CacheLineSize = (uiFeatureB & 0xff00) >>
205                     5; // ((clflush_line_size >> 8) << 3), CLFLUSH_line_size * 8 = CacheLineSize_in_byte
206 
207     if (CacheLineSize == 128) {
208       uiCPU |= WELS_CPU_CACHELINE_128;
209     } else if (CacheLineSize == 64) {
210       uiCPU |= WELS_CPU_CACHELINE_64;
211     } else if (CacheLineSize == 32) {
212       uiCPU |= WELS_CPU_CACHELINE_32;
213     } else if (CacheLineSize == 16) {
214       uiCPU |= WELS_CPU_CACHELINE_16;
215     }
216   }
217 
218   return uiCPU;
219 }
220 
221 
WelsCPURestore(const uint32_t kuiCPU)222 void WelsCPURestore (const uint32_t kuiCPU) {
223   if (kuiCPU & (WELS_CPU_MMX | WELS_CPU_MMXEXT | WELS_CPU_3DNOW | WELS_CPU_3DNOWEXT)) {
224     WelsEmms();
225   }
226 }
227 
228 #elif defined(HAVE_NEON) //For supporting both android platform and iOS platform
229 #if defined(ANDROID_NDK)
WelsCPUFeatureDetect(int32_t * pNumberOfLogicProcessors)230 uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
231   uint32_t         uiCPU = 0;
232   AndroidCpuFamily cpuFamily = ANDROID_CPU_FAMILY_UNKNOWN;
233   uint64_t         uiFeatures = 0;
234   cpuFamily = android_getCpuFamily();
235   if (cpuFamily == ANDROID_CPU_FAMILY_ARM) {
236     uiFeatures = android_getCpuFeatures();
237     if (uiFeatures & ANDROID_CPU_ARM_FEATURE_ARMv7) {
238       uiCPU |= WELS_CPU_ARMv7;
239     }
240     if (uiFeatures & ANDROID_CPU_ARM_FEATURE_VFPv3) {
241       uiCPU |= WELS_CPU_VFPv3;
242     }
243     if (uiFeatures & ANDROID_CPU_ARM_FEATURE_NEON) {
244       uiCPU |= WELS_CPU_NEON;
245     }
246   }
247 
248   if (pNumberOfLogicProcessors != NULL) {
249     *pNumberOfLogicProcessors = android_getCpuCount();
250   }
251 
252   return uiCPU;
253 }
254 
255 #elif defined(__APPLE__)
WelsCPUFeatureDetect(int32_t * pNumberOfLogicProcessors)256 uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
257   uint32_t       uiCPU = 0;
258 
259 #if defined(__ARM_NEON__)
260   uiCPU |= WELS_CPU_ARMv7;
261   uiCPU |= WELS_CPU_VFPv3;
262   uiCPU |= WELS_CPU_NEON;
263 #endif
264   return uiCPU;
265 }
266 #elif defined(__linux__)
267 
268 /* Generic arm/linux cpu feature detection */
WelsCPUFeatureDetect(int32_t * pNumberOfLogicProcessors)269 uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
270   int flags = 0;
271   FILE* f = fopen ("/proc/cpuinfo", "r");
272 
273 #if defined(__chromeos__)
274   flags |= WELS_CPU_NEON;
275 #endif
276 
277   if (!f) {
278     return flags;
279   }
280 
281   char buf[200];
282   while (fgets (buf, sizeof (buf), f)) {
283     if (!strncmp (buf, "Features", strlen ("Features"))) {
284       // The asimd and fp features are listed on 64 bit ARMv8 kernels
285       if (strstr (buf, " neon ") || strstr (buf, " asimd "))
286         flags |= WELS_CPU_NEON;
287       if (strstr (buf, " vfpv3 ") || strstr (buf, " fp "))
288         flags |= WELS_CPU_VFPv3;
289       break;
290     }
291   }
292   fclose (f);
293   return flags;
294 }
295 
296 #else /* HAVE_NEON enabled but no runtime detection */
297 
298 /* No runtime feature detection available, but built with HAVE_NEON - assume
299  * that NEON and all associated features are available. */
300 
WelsCPUFeatureDetect(int32_t * pNumberOfLogicProcessors)301 uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
302   return WELS_CPU_ARMv7 |
303          WELS_CPU_VFPv3 |
304          WELS_CPU_NEON;
305 }
306 #endif
307 #elif defined(HAVE_NEON_AARCH64)
308 
309 /* For AArch64, no runtime detection actually is necessary for now, since
310  * NEON and VFPv3 is mandatory on all such CPUs. (/proc/cpuinfo doesn't
311  * contain neon, and the android cpufeatures library doesn't return it
312  * either.) */
313 
WelsCPUFeatureDetect(int32_t * pNumberOfLogicProcessors)314 uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
315   return WELS_CPU_VFPv3 |
316          WELS_CPU_NEON;
317 }
318 
319 #elif defined(mips)
320 /* Get cpu features from cpuinfo. */
get_cpu_flags_from_cpuinfo(void)321 static uint32_t get_cpu_flags_from_cpuinfo(void)
322 {
323     uint32_t flags = 0;
324 
325 # ifdef __linux__
326     FILE* fp = fopen("/proc/cpuinfo", "r");
327     if (!fp)
328         return flags;
329 
330     char buf[200];
331     memset(buf, 0, sizeof(buf));
332     while (fgets(buf, sizeof(buf), fp)) {
333         if (!strncmp(buf, "model name", strlen("model name"))) {
334             if (strstr(buf, "Loongson-3A") || strstr(buf, "Loongson-3B") ||
335                 strstr(buf, "Loongson-2K")) {
336                 flags |= WELS_CPU_MMI;
337             }
338             break;
339         }
340     }
341     while (fgets(buf, sizeof(buf), fp)) {
342         if(!strncmp(buf, "ASEs implemented", strlen("ASEs implemented"))) {
343             if (strstr(buf, "loongson-mmi") && strstr(buf, "loongson-ext")) {
344                 flags |= WELS_CPU_MMI;
345             }
346             if (strstr(buf, "msa")) {
347                 flags |= WELS_CPU_MSA;
348             }
349             break;
350         }
351     }
352     fclose(fp);
353 # endif
354 
355     return flags;
356 }
357 
WelsCPUFeatureDetect(int32_t * pNumberOfLogicProcessors)358 uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
359     return get_cpu_flags_from_cpuinfo();
360 }
361 
362 #elif defined(__loongarch__) && defined(__linux__)
363 /* The CPUCFG instruction is used to dynamically identify the characteristics
364  * of the loongarch in the running processor during software execution. */
365 #define LOONGARCH_CFG2 0x02
366 #define LOONGARCH_CFG2_LSX  (1<<6)
367 #define LOONGARCH_CFG2_LASX (1<<7)
368 
get_cpu_flags_from_cpucfg(void)369 static uint32_t get_cpu_flags_from_cpucfg(void) {
370   uint32_t reg = 0;
371   uint32_t flags = 0;
372 
373   __asm__ volatile(
374     "cpucfg %0, %1 \n\t"
375     : "+&r"(reg)
376     : "r"(LOONGARCH_CFG2)
377   );
378   if (reg & LOONGARCH_CFG2_LSX)
379       flags |= WELS_CPU_LSX;
380   if (reg & LOONGARCH_CFG2_LASX)
381       flags |= WELS_CPU_LASX;
382   return flags;
383 }
384 
WelsCPUFeatureDetect(int32_t * pNumberOfLogicProcessors)385 uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
386     return get_cpu_flags_from_cpucfg();
387 }
388 
389 #else /* Neither X86_ASM, HAVE_NEON, HAVE_NEON_AARCH64 nor mips */
390 
WelsCPUFeatureDetect(int32_t * pNumberOfLogicProcessors)391 uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
392   return 0;
393 }
394 
395 #endif
396