1 /*
2 * Copyright (C) 2010 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /* ChangeLog for this library:
30 *
31 * NDK r9?: Support for 64-bit CPUs (Intel, ARM & MIPS).
32 *
33 * NDK r8d: Add android_setCpu().
34 *
35 * NDK r8c: Add new ARM CPU features: VFPv2, VFP_D32, VFP_FP16,
36 * VFP_FMA, NEON_FMA, IDIV_ARM, IDIV_THUMB2 and iWMMXt.
37 *
38 * Rewrite the code to parse /proc/self/auxv instead of
39 * the "Features" field in /proc/cpuinfo.
40 *
41 * Dynamically allocate the buffer that hold the content
42 * of /proc/cpuinfo to deal with newer hardware.
43 *
44 * NDK r7c: Fix CPU count computation. The old method only reported the
45 * number of _active_ CPUs when the library was initialized,
46 * which could be less than the real total.
47 *
48 * NDK r5: Handle buggy kernels which report a CPU Architecture number of 7
49 * for an ARMv6 CPU (see below).
50 *
51 * Handle kernels that only report 'neon', and not 'vfpv3'
52 * (VFPv3 is mandated by the ARM architecture is Neon is implemented)
53 *
54 * Handle kernels that only report 'vfpv3d16', and not 'vfpv3'
55 *
56 * Fix x86 compilation. Report ANDROID_CPU_FAMILY_X86 in
57 * android_getCpuFamily().
58 *
59 * NDK r4: Initial release
60 */
61
62 #if defined(__le32__) || defined(__le64__)
63
64 // When users enter this, we should only provide interface and
65 // libportable will give the implementations.
66
67 #else // !__le32__ && !__le64__
68
69 #include "cpu-features.h"
70
71 #include <dlfcn.h>
72 #include <errno.h>
73 #include <fcntl.h>
74 #include <pthread.h>
75 #include <stdio.h>
76 #include <stdlib.h>
77 #include <sys/system_properties.h>
78
79 static pthread_once_t g_once;
80 static int g_inited;
81 static AndroidCpuFamily g_cpuFamily;
82 static uint64_t g_cpuFeatures;
83 static int g_cpuCount;
84
85 #ifdef __arm__
86 static uint32_t g_cpuIdArm;
87 #endif
88
89 static const int android_cpufeatures_debug = 0;
90
91 #define D(...) \
92 do { \
93 if (android_cpufeatures_debug) { \
94 printf(__VA_ARGS__); fflush(stdout); \
95 } \
96 } while (0)
97
98 #ifdef __i386__
x86_cpuid(int func,int values[4])99 static __inline__ void x86_cpuid(int func, int values[4])
100 {
101 int a, b, c, d;
102 /* We need to preserve ebx since we're compiling PIC code */
103 /* this means we can't use "=b" for the second output register */
104 __asm__ __volatile__ ( \
105 "push %%ebx\n"
106 "cpuid\n" \
107 "mov %%ebx, %1\n"
108 "pop %%ebx\n"
109 : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
110 : "a" (func) \
111 );
112 values[0] = a;
113 values[1] = b;
114 values[2] = c;
115 values[3] = d;
116 }
117 #endif
118
119 /* Get the size of a file by reading it until the end. This is needed
120 * because files under /proc do not always return a valid size when
121 * using fseek(0, SEEK_END) + ftell(). Nor can they be mmap()-ed.
122 */
123 static int
get_file_size(const char * pathname)124 get_file_size(const char* pathname)
125 {
126 int fd, ret, result = 0;
127 char buffer[256];
128
129 fd = open(pathname, O_RDONLY);
130 if (fd < 0) {
131 D("Can't open %s: %s\n", pathname, strerror(errno));
132 return -1;
133 }
134
135 for (;;) {
136 int ret = read(fd, buffer, sizeof buffer);
137 if (ret < 0) {
138 if (errno == EINTR)
139 continue;
140 D("Error while reading %s: %s\n", pathname, strerror(errno));
141 break;
142 }
143 if (ret == 0)
144 break;
145
146 result += ret;
147 }
148 close(fd);
149 return result;
150 }
151
152 /* Read the content of /proc/cpuinfo into a user-provided buffer.
153 * Return the length of the data, or -1 on error. Does *not*
154 * zero-terminate the content. Will not read more
155 * than 'buffsize' bytes.
156 */
157 static int
read_file(const char * pathname,char * buffer,size_t buffsize)158 read_file(const char* pathname, char* buffer, size_t buffsize)
159 {
160 int fd, count;
161
162 fd = open(pathname, O_RDONLY);
163 if (fd < 0) {
164 D("Could not open %s: %s\n", pathname, strerror(errno));
165 return -1;
166 }
167 count = 0;
168 while (count < (int)buffsize) {
169 int ret = read(fd, buffer + count, buffsize - count);
170 if (ret < 0) {
171 if (errno == EINTR)
172 continue;
173 D("Error while reading from %s: %s\n", pathname, strerror(errno));
174 if (count == 0)
175 count = -1;
176 break;
177 }
178 if (ret == 0)
179 break;
180 count += ret;
181 }
182 close(fd);
183 return count;
184 }
185
186 /* Extract the content of a the first occurence of a given field in
187 * the content of /proc/cpuinfo and return it as a heap-allocated
188 * string that must be freed by the caller.
189 *
190 * Return NULL if not found
191 */
192 static char*
extract_cpuinfo_field(const char * buffer,int buflen,const char * field)193 extract_cpuinfo_field(const char* buffer, int buflen, const char* field)
194 {
195 int fieldlen = strlen(field);
196 const char* bufend = buffer + buflen;
197 char* result = NULL;
198 int len, ignore;
199 const char *p, *q;
200
201 /* Look for first field occurence, and ensures it starts the line. */
202 p = buffer;
203 for (;;) {
204 p = memmem(p, bufend-p, field, fieldlen);
205 if (p == NULL)
206 goto EXIT;
207
208 if (p == buffer || p[-1] == '\n')
209 break;
210
211 p += fieldlen;
212 }
213
214 /* Skip to the first column followed by a space */
215 p += fieldlen;
216 p = memchr(p, ':', bufend-p);
217 if (p == NULL || p[1] != ' ')
218 goto EXIT;
219
220 /* Find the end of the line */
221 p += 2;
222 q = memchr(p, '\n', bufend-p);
223 if (q == NULL)
224 q = bufend;
225
226 /* Copy the line into a heap-allocated buffer */
227 len = q-p;
228 result = malloc(len+1);
229 if (result == NULL)
230 goto EXIT;
231
232 memcpy(result, p, len);
233 result[len] = '\0';
234
235 EXIT:
236 return result;
237 }
238
239 /* Checks that a space-separated list of items contains one given 'item'.
240 * Returns 1 if found, 0 otherwise.
241 */
242 static int
has_list_item(const char * list,const char * item)243 has_list_item(const char* list, const char* item)
244 {
245 const char* p = list;
246 int itemlen = strlen(item);
247
248 if (list == NULL)
249 return 0;
250
251 while (*p) {
252 const char* q;
253
254 /* skip spaces */
255 while (*p == ' ' || *p == '\t')
256 p++;
257
258 /* find end of current list item */
259 q = p;
260 while (*q && *q != ' ' && *q != '\t')
261 q++;
262
263 if (itemlen == q-p && !memcmp(p, item, itemlen))
264 return 1;
265
266 /* skip to next item */
267 p = q;
268 }
269 return 0;
270 }
271
272 /* Parse a number starting from 'input', but not going further
273 * than 'limit'. Return the value into '*result'.
274 *
275 * NOTE: Does not skip over leading spaces, or deal with sign characters.
276 * NOTE: Ignores overflows.
277 *
278 * The function returns NULL in case of error (bad format), or the new
279 * position after the decimal number in case of success (which will always
280 * be <= 'limit').
281 */
282 static const char*
parse_number(const char * input,const char * limit,int base,int * result)283 parse_number(const char* input, const char* limit, int base, int* result)
284 {
285 const char* p = input;
286 int val = 0;
287 while (p < limit) {
288 int d = (*p - '0');
289 if ((unsigned)d >= 10U) {
290 d = (*p - 'a');
291 if ((unsigned)d >= 6U)
292 d = (*p - 'A');
293 if ((unsigned)d >= 6U)
294 break;
295 d += 10;
296 }
297 if (d >= base)
298 break;
299 val = val*base + d;
300 p++;
301 }
302 if (p == input)
303 return NULL;
304
305 *result = val;
306 return p;
307 }
308
309 static const char*
parse_decimal(const char * input,const char * limit,int * result)310 parse_decimal(const char* input, const char* limit, int* result)
311 {
312 return parse_number(input, limit, 10, result);
313 }
314
315 static const char*
parse_hexadecimal(const char * input,const char * limit,int * result)316 parse_hexadecimal(const char* input, const char* limit, int* result)
317 {
318 return parse_number(input, limit, 16, result);
319 }
320
321 /* This small data type is used to represent a CPU list / mask, as read
322 * from sysfs on Linux. See http://www.kernel.org/doc/Documentation/cputopology.txt
323 *
324 * For now, we don't expect more than 32 cores on mobile devices, so keep
325 * everything simple.
326 */
327 typedef struct {
328 uint32_t mask;
329 } CpuList;
330
331 static __inline__ void
cpulist_init(CpuList * list)332 cpulist_init(CpuList* list) {
333 list->mask = 0;
334 }
335
336 static __inline__ void
cpulist_and(CpuList * list1,CpuList * list2)337 cpulist_and(CpuList* list1, CpuList* list2) {
338 list1->mask &= list2->mask;
339 }
340
341 static __inline__ void
cpulist_set(CpuList * list,int index)342 cpulist_set(CpuList* list, int index) {
343 if ((unsigned)index < 32) {
344 list->mask |= (uint32_t)(1U << index);
345 }
346 }
347
348 static __inline__ int
cpulist_count(CpuList * list)349 cpulist_count(CpuList* list) {
350 return __builtin_popcount(list->mask);
351 }
352
353 /* Parse a textual list of cpus and store the result inside a CpuList object.
354 * Input format is the following:
355 * - comma-separated list of items (no spaces)
356 * - each item is either a single decimal number (cpu index), or a range made
357 * of two numbers separated by a single dash (-). Ranges are inclusive.
358 *
359 * Examples: 0
360 * 2,4-127,128-143
361 * 0-1
362 */
363 static void
cpulist_parse(CpuList * list,const char * line,int line_len)364 cpulist_parse(CpuList* list, const char* line, int line_len)
365 {
366 const char* p = line;
367 const char* end = p + line_len;
368 const char* q;
369
370 /* NOTE: the input line coming from sysfs typically contains a
371 * trailing newline, so take care of it in the code below
372 */
373 while (p < end && *p != '\n')
374 {
375 int val, start_value, end_value;
376
377 /* Find the end of current item, and put it into 'q' */
378 q = memchr(p, ',', end-p);
379 if (q == NULL) {
380 q = end;
381 }
382
383 /* Get first value */
384 p = parse_decimal(p, q, &start_value);
385 if (p == NULL)
386 goto BAD_FORMAT;
387
388 end_value = start_value;
389
390 /* If we're not at the end of the item, expect a dash and
391 * and integer; extract end value.
392 */
393 if (p < q && *p == '-') {
394 p = parse_decimal(p+1, q, &end_value);
395 if (p == NULL)
396 goto BAD_FORMAT;
397 }
398
399 /* Set bits CPU list bits */
400 for (val = start_value; val <= end_value; val++) {
401 cpulist_set(list, val);
402 }
403
404 /* Jump to next item */
405 p = q;
406 if (p < end)
407 p++;
408 }
409
410 BAD_FORMAT:
411 ;
412 }
413
414 /* Read a CPU list from one sysfs file */
415 static void
cpulist_read_from(CpuList * list,const char * filename)416 cpulist_read_from(CpuList* list, const char* filename)
417 {
418 char file[64];
419 int filelen;
420
421 cpulist_init(list);
422
423 filelen = read_file(filename, file, sizeof file);
424 if (filelen < 0) {
425 D("Could not read %s: %s\n", filename, strerror(errno));
426 return;
427 }
428
429 cpulist_parse(list, file, filelen);
430 }
431
432 #if defined(__arm__)
433
434 // See <asm/hwcap.h> kernel header.
435 #define HWCAP_VFP (1 << 6)
436 #define HWCAP_IWMMXT (1 << 9)
437 #define HWCAP_NEON (1 << 12)
438 #define HWCAP_VFPv3 (1 << 13)
439 #define HWCAP_VFPv3D16 (1 << 14)
440 #define HWCAP_VFPv4 (1 << 16)
441 #define HWCAP_IDIVA (1 << 17)
442 #define HWCAP_IDIVT (1 << 18)
443
444 // This is the list of 32-bit ARMv7 optional features that are _always_
445 // supported by ARMv8 CPUs, as mandated by the ARM Architecture Reference
446 // Manual.
447 #define HWCAP_SET_FOR_ARMV8 \
448 ( HWCAP_VFP | \
449 HWCAP_NEON | \
450 HWCAP_VFPv3 | \
451 HWCAP_VFPv4 | \
452 HWCAP_IDIVA | \
453 HWCAP_IDIVT )
454
455 #define AT_HWCAP 16
456
457 // Probe the system's C library for a 'getauxval' function and call it if
458 // it exits, or return 0 for failure. This function is available since API
459 // level 20.
460 //
461 // This code does *NOT* check for '__ANDROID_API__ >= 20' to support the
462 // edge case where some NDK developers use headers for a platform that is
463 // newer than the one really targetted by their application.
464 // This is typically done to use newer native APIs only when running on more
465 // recent Android versions, and requires careful symbol management.
466 //
467 // Note that getauxval() can't really be re-implemented here, because
468 // its implementation does not parse /proc/self/auxv. Instead it depends
469 // on values that are passed by the kernel at process-init time to the
470 // C runtime initialization layer.
471 static uint32_t
get_elf_hwcap_from_getauxval(void)472 get_elf_hwcap_from_getauxval(void) {
473 typedef unsigned long getauxval_func_t(unsigned long);
474
475 dlerror();
476 void* libc_handle = dlopen("libc.so", RTLD_NOW);
477 if (!libc_handle) {
478 D("Could not dlopen() C library: %s\n", dlerror());
479 return 0;
480 }
481
482 uint32_t ret = 0;
483 getauxval_func_t* func = (getauxval_func_t*)
484 dlsym(libc_handle, "getauxval");
485 if (!func) {
486 D("Could not find getauxval() in C library\n");
487 } else {
488 // Note: getauxval() returns 0 on failure. Doesn't touch errno.
489 ret = (uint32_t)(*func)(AT_HWCAP);
490 }
491 dlclose(libc_handle);
492 return ret;
493 }
494
495 // Parse /proc/self/auxv to extract the ELF HW capabilities bitmap for the
496 // current CPU. Note that this file is not accessible from regular
497 // application processes on some Android platform releases.
498 // On success, return new ELF hwcaps, or 0 on failure.
499 static uint32_t
get_elf_hwcap_from_proc_self_auxv(void)500 get_elf_hwcap_from_proc_self_auxv(void) {
501 const char filepath[] = "/proc/self/auxv";
502 int fd = TEMP_FAILURE_RETRY(open(filepath, O_RDONLY));
503 if (fd < 0) {
504 D("Could not open %s: %s\n", filepath, strerror(errno));
505 return 0;
506 }
507
508 struct { uint32_t tag; uint32_t value; } entry;
509
510 uint32_t result = 0;
511 for (;;) {
512 int ret = TEMP_FAILURE_RETRY(read(fd, (char*)&entry, sizeof entry));
513 if (ret < 0) {
514 D("Error while reading %s: %s\n", filepath, strerror(errno));
515 break;
516 }
517 // Detect end of list.
518 if (ret == 0 || (entry.tag == 0 && entry.value == 0))
519 break;
520 if (entry.tag == AT_HWCAP) {
521 result = entry.value;
522 break;
523 }
524 }
525 close(fd);
526 return result;
527 }
528
529 /* Compute the ELF HWCAP flags from the content of /proc/cpuinfo.
530 * This works by parsing the 'Features' line, which lists which optional
531 * features the device's CPU supports, on top of its reference
532 * architecture.
533 */
534 static uint32_t
get_elf_hwcap_from_proc_cpuinfo(const char * cpuinfo,int cpuinfo_len)535 get_elf_hwcap_from_proc_cpuinfo(const char* cpuinfo, int cpuinfo_len) {
536 uint32_t hwcaps = 0;
537 long architecture = 0;
538 char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture");
539 if (cpuArch) {
540 architecture = strtol(cpuArch, NULL, 10);
541 free(cpuArch);
542
543 if (architecture >= 8L) {
544 // This is a 32-bit ARM binary running on a 64-bit ARM64 kernel.
545 // The 'Features' line only lists the optional features that the
546 // device's CPU supports, compared to its reference architecture
547 // which are of no use for this process.
548 D("Faking 32-bit ARM HWCaps on ARMv%ld CPU\n", architecture);
549 return HWCAP_SET_FOR_ARMV8;
550 }
551 }
552
553 char* cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features");
554 if (cpuFeatures != NULL) {
555 D("Found cpuFeatures = '%s'\n", cpuFeatures);
556
557 if (has_list_item(cpuFeatures, "vfp"))
558 hwcaps |= HWCAP_VFP;
559 if (has_list_item(cpuFeatures, "vfpv3"))
560 hwcaps |= HWCAP_VFPv3;
561 if (has_list_item(cpuFeatures, "vfpv3d16"))
562 hwcaps |= HWCAP_VFPv3D16;
563 if (has_list_item(cpuFeatures, "vfpv4"))
564 hwcaps |= HWCAP_VFPv4;
565 if (has_list_item(cpuFeatures, "neon"))
566 hwcaps |= HWCAP_NEON;
567 if (has_list_item(cpuFeatures, "idiva"))
568 hwcaps |= HWCAP_IDIVA;
569 if (has_list_item(cpuFeatures, "idivt"))
570 hwcaps |= HWCAP_IDIVT;
571 if (has_list_item(cpuFeatures, "idiv"))
572 hwcaps |= HWCAP_IDIVA | HWCAP_IDIVT;
573 if (has_list_item(cpuFeatures, "iwmmxt"))
574 hwcaps |= HWCAP_IWMMXT;
575
576 free(cpuFeatures);
577 }
578 return hwcaps;
579 }
580 #endif /* __arm__ */
581
582 /* Return the number of cpus present on a given device.
583 *
584 * To handle all weird kernel configurations, we need to compute the
585 * intersection of the 'present' and 'possible' CPU lists and count
586 * the result.
587 */
588 static int
get_cpu_count(void)589 get_cpu_count(void)
590 {
591 CpuList cpus_present[1];
592 CpuList cpus_possible[1];
593
594 cpulist_read_from(cpus_present, "/sys/devices/system/cpu/present");
595 cpulist_read_from(cpus_possible, "/sys/devices/system/cpu/possible");
596
597 /* Compute the intersection of both sets to get the actual number of
598 * CPU cores that can be used on this device by the kernel.
599 */
600 cpulist_and(cpus_present, cpus_possible);
601
602 return cpulist_count(cpus_present);
603 }
604
605 static void
android_cpuInitFamily(void)606 android_cpuInitFamily(void)
607 {
608 #if defined(__arm__)
609 g_cpuFamily = ANDROID_CPU_FAMILY_ARM;
610 #elif defined(__i386__)
611 g_cpuFamily = ANDROID_CPU_FAMILY_X86;
612 #elif defined(__mips64)
613 /* Needs to be before __mips__ since the compiler defines both */
614 g_cpuFamily = ANDROID_CPU_FAMILY_MIPS64;
615 #elif defined(__mips__)
616 g_cpuFamily = ANDROID_CPU_FAMILY_MIPS;
617 #elif defined(__aarch64__)
618 g_cpuFamily = ANDROID_CPU_FAMILY_ARM64;
619 #elif defined(__x86_64__)
620 g_cpuFamily = ANDROID_CPU_FAMILY_X86_64;
621 #else
622 g_cpuFamily = ANDROID_CPU_FAMILY_UNKNOWN;
623 #endif
624 }
625
626 static void
android_cpuInit(void)627 android_cpuInit(void)
628 {
629 char* cpuinfo = NULL;
630 int cpuinfo_len;
631
632 android_cpuInitFamily();
633
634 g_cpuFeatures = 0;
635 g_cpuCount = 1;
636 g_inited = 1;
637
638 cpuinfo_len = get_file_size("/proc/cpuinfo");
639 if (cpuinfo_len < 0) {
640 D("cpuinfo_len cannot be computed!");
641 return;
642 }
643 cpuinfo = malloc(cpuinfo_len);
644 if (cpuinfo == NULL) {
645 D("cpuinfo buffer could not be allocated");
646 return;
647 }
648 cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, cpuinfo_len);
649 D("cpuinfo_len is (%d):\n%.*s\n", cpuinfo_len,
650 cpuinfo_len >= 0 ? cpuinfo_len : 0, cpuinfo);
651
652 if (cpuinfo_len < 0) /* should not happen */ {
653 free(cpuinfo);
654 return;
655 }
656
657 /* Count the CPU cores, the value may be 0 for single-core CPUs */
658 g_cpuCount = get_cpu_count();
659 if (g_cpuCount == 0) {
660 g_cpuCount = 1;
661 }
662
663 D("found cpuCount = %d\n", g_cpuCount);
664
665 #ifdef __arm__
666 {
667 /* Extract architecture from the "CPU Architecture" field.
668 * The list is well-known, unlike the the output of
669 * the 'Processor' field which can vary greatly.
670 *
671 * See the definition of the 'proc_arch' array in
672 * $KERNEL/arch/arm/kernel/setup.c and the 'c_show' function in
673 * same file.
674 */
675 char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture");
676
677 if (cpuArch != NULL) {
678 char* end;
679 long archNumber;
680 int hasARMv7 = 0;
681
682 D("found cpuArch = '%s'\n", cpuArch);
683
684 /* read the initial decimal number, ignore the rest */
685 archNumber = strtol(cpuArch, &end, 10);
686
687 /* Note that ARMv8 is upwards compatible with ARMv7. */
688 if (end > cpuArch && archNumber >= 7) {
689 hasARMv7 = 1;
690 }
691
692 /* Unfortunately, it seems that certain ARMv6-based CPUs
693 * report an incorrect architecture number of 7!
694 *
695 * See http://code.google.com/p/android/issues/detail?id=10812
696 *
697 * We try to correct this by looking at the 'elf_format'
698 * field reported by the 'Processor' field, which is of the
699 * form of "(v7l)" for an ARMv7-based CPU, and "(v6l)" for
700 * an ARMv6-one.
701 */
702 if (hasARMv7) {
703 char* cpuProc = extract_cpuinfo_field(cpuinfo, cpuinfo_len,
704 "Processor");
705 if (cpuProc != NULL) {
706 D("found cpuProc = '%s'\n", cpuProc);
707 if (has_list_item(cpuProc, "(v6l)")) {
708 D("CPU processor and architecture mismatch!!\n");
709 hasARMv7 = 0;
710 }
711 free(cpuProc);
712 }
713 }
714
715 if (hasARMv7) {
716 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_ARMv7;
717 }
718
719 /* The LDREX / STREX instructions are available from ARMv6 */
720 if (archNumber >= 6) {
721 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_LDREX_STREX;
722 }
723
724 free(cpuArch);
725 }
726
727 /* Extract the list of CPU features from ELF hwcaps */
728 uint32_t hwcaps = 0;
729 hwcaps = get_elf_hwcap_from_getauxval();
730 if (!hwcaps) {
731 D("Parsing /proc/self/auxv to extract ELF hwcaps!\n");
732 hwcaps = get_elf_hwcap_from_proc_self_auxv();
733 }
734 if (!hwcaps) {
735 // Parsing /proc/self/auxv will fail from regular application
736 // processes on some Android platform versions, when this happens
737 // parse proc/cpuinfo instead.
738 D("Parsing /proc/cpuinfo to extract ELF hwcaps!\n");
739 hwcaps = get_elf_hwcap_from_proc_cpuinfo(cpuinfo, cpuinfo_len);
740 }
741
742 if (hwcaps != 0) {
743 int has_vfp = (hwcaps & HWCAP_VFP);
744 int has_vfpv3 = (hwcaps & HWCAP_VFPv3);
745 int has_vfpv3d16 = (hwcaps & HWCAP_VFPv3D16);
746 int has_vfpv4 = (hwcaps & HWCAP_VFPv4);
747 int has_neon = (hwcaps & HWCAP_NEON);
748 int has_idiva = (hwcaps & HWCAP_IDIVA);
749 int has_idivt = (hwcaps & HWCAP_IDIVT);
750 int has_iwmmxt = (hwcaps & HWCAP_IWMMXT);
751
752 // The kernel does a poor job at ensuring consistency when
753 // describing CPU features. So lots of guessing is needed.
754
755 // 'vfpv4' implies VFPv3|VFP_FMA|FP16
756 if (has_vfpv4)
757 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 |
758 ANDROID_CPU_ARM_FEATURE_VFP_FP16 |
759 ANDROID_CPU_ARM_FEATURE_VFP_FMA;
760
761 // 'vfpv3' or 'vfpv3d16' imply VFPv3. Note that unlike GCC,
762 // a value of 'vfpv3' doesn't necessarily mean that the D32
763 // feature is present, so be conservative. All CPUs in the
764 // field that support D32 also support NEON, so this should
765 // not be a problem in practice.
766 if (has_vfpv3 || has_vfpv3d16)
767 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3;
768
769 // 'vfp' is super ambiguous. Depending on the kernel, it can
770 // either mean VFPv2 or VFPv3. Make it depend on ARMv7.
771 if (has_vfp) {
772 if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_ARMv7)
773 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3;
774 else
775 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2;
776 }
777
778 // Neon implies VFPv3|D32, and if vfpv4 is detected, NEON_FMA
779 if (has_neon) {
780 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 |
781 ANDROID_CPU_ARM_FEATURE_NEON |
782 ANDROID_CPU_ARM_FEATURE_VFP_D32;
783 if (has_vfpv4)
784 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_NEON_FMA;
785 }
786
787 // VFPv3 implies VFPv2 and ARMv7
788 if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_VFPv3)
789 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2 |
790 ANDROID_CPU_ARM_FEATURE_ARMv7;
791
792 if (has_idiva)
793 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM;
794 if (has_idivt)
795 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2;
796
797 if (has_iwmmxt)
798 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_iWMMXt;
799 }
800
801 /* Extract the cpuid value from various fields */
802 // The CPUID value is broken up in several entries in /proc/cpuinfo.
803 // This table is used to rebuild it from the entries.
804 static const struct CpuIdEntry {
805 const char* field;
806 char format;
807 char bit_lshift;
808 char bit_length;
809 } cpu_id_entries[] = {
810 { "CPU implementer", 'x', 24, 8 },
811 { "CPU variant", 'x', 20, 4 },
812 { "CPU part", 'x', 4, 12 },
813 { "CPU revision", 'd', 0, 4 },
814 };
815 size_t i;
816 D("Parsing /proc/cpuinfo to recover CPUID\n");
817 for (i = 0;
818 i < sizeof(cpu_id_entries)/sizeof(cpu_id_entries[0]);
819 ++i) {
820 const struct CpuIdEntry* entry = &cpu_id_entries[i];
821 char* value = extract_cpuinfo_field(cpuinfo,
822 cpuinfo_len,
823 entry->field);
824 if (value == NULL)
825 continue;
826
827 D("field=%s value='%s'\n", entry->field, value);
828 char* value_end = value + strlen(value);
829 int val = 0;
830 const char* start = value;
831 const char* p;
832 if (value[0] == '0' && (value[1] == 'x' || value[1] == 'X')) {
833 start += 2;
834 p = parse_hexadecimal(start, value_end, &val);
835 } else if (entry->format == 'x')
836 p = parse_hexadecimal(value, value_end, &val);
837 else
838 p = parse_decimal(value, value_end, &val);
839
840 if (p > (const char*)start) {
841 val &= ((1 << entry->bit_length)-1);
842 val <<= entry->bit_lshift;
843 g_cpuIdArm |= (uint32_t) val;
844 }
845
846 free(value);
847 }
848
849 // Handle kernel configuration bugs that prevent the correct
850 // reporting of CPU features.
851 static const struct CpuFix {
852 uint32_t cpuid;
853 uint64_t or_flags;
854 } cpu_fixes[] = {
855 /* The Nexus 4 (Qualcomm Krait) kernel configuration
856 * forgets to report IDIV support. */
857 { 0x510006f2, ANDROID_CPU_ARM_FEATURE_IDIV_ARM |
858 ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 },
859 };
860 size_t n;
861 for (n = 0; n < sizeof(cpu_fixes)/sizeof(cpu_fixes[0]); ++n) {
862 const struct CpuFix* entry = &cpu_fixes[n];
863
864 if (g_cpuIdArm == entry->cpuid)
865 g_cpuFeatures |= entry->or_flags;
866 }
867
868 // Special case: The emulator-specific Android 4.2 kernel fails
869 // to report support for the 32-bit ARM IDIV instruction.
870 // Technically, this is a feature of the virtual CPU implemented
871 // by the emulator. Note that it could also support Thumb IDIV
872 // in the future, and this will have to be slightly updated.
873 char* hardware = extract_cpuinfo_field(cpuinfo,
874 cpuinfo_len,
875 "Hardware");
876 if (hardware) {
877 if (!strcmp(hardware, "Goldfish") &&
878 g_cpuIdArm == 0x4100c080 &&
879 (g_cpuFamily & ANDROID_CPU_ARM_FEATURE_ARMv7) != 0) {
880 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM;
881 }
882 free(hardware);
883 }
884 }
885 #endif /* __arm__ */
886
887 #ifdef __i386__
888 int regs[4];
889
890 /* According to http://en.wikipedia.org/wiki/CPUID */
891 #define VENDOR_INTEL_b 0x756e6547
892 #define VENDOR_INTEL_c 0x6c65746e
893 #define VENDOR_INTEL_d 0x49656e69
894
895 x86_cpuid(0, regs);
896 int vendorIsIntel = (regs[1] == VENDOR_INTEL_b &&
897 regs[2] == VENDOR_INTEL_c &&
898 regs[3] == VENDOR_INTEL_d);
899
900 x86_cpuid(1, regs);
901 if ((regs[2] & (1 << 9)) != 0) {
902 g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSSE3;
903 }
904 if ((regs[2] & (1 << 23)) != 0) {
905 g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_POPCNT;
906 }
907 if (vendorIsIntel && (regs[2] & (1 << 22)) != 0) {
908 g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_MOVBE;
909 }
910 #endif
911
912 free(cpuinfo);
913 }
914
915
916 AndroidCpuFamily
android_getCpuFamily(void)917 android_getCpuFamily(void)
918 {
919 pthread_once(&g_once, android_cpuInit);
920 return g_cpuFamily;
921 }
922
923
924 uint64_t
android_getCpuFeatures(void)925 android_getCpuFeatures(void)
926 {
927 pthread_once(&g_once, android_cpuInit);
928 return g_cpuFeatures;
929 }
930
931
932 int
android_getCpuCount(void)933 android_getCpuCount(void)
934 {
935 pthread_once(&g_once, android_cpuInit);
936 return g_cpuCount;
937 }
938
939 static void
android_cpuInitDummy(void)940 android_cpuInitDummy(void)
941 {
942 g_inited = 1;
943 }
944
945 int
android_setCpu(int cpu_count,uint64_t cpu_features)946 android_setCpu(int cpu_count, uint64_t cpu_features)
947 {
948 /* Fail if the library was already initialized. */
949 if (g_inited)
950 return 0;
951
952 android_cpuInitFamily();
953 g_cpuCount = (cpu_count <= 0 ? 1 : cpu_count);
954 g_cpuFeatures = cpu_features;
955 pthread_once(&g_once, android_cpuInitDummy);
956
957 return 1;
958 }
959
960 #ifdef __arm__
961 uint32_t
android_getCpuIdArm(void)962 android_getCpuIdArm(void)
963 {
964 pthread_once(&g_once, android_cpuInit);
965 return g_cpuIdArm;
966 }
967
968 int
android_setCpuArm(int cpu_count,uint64_t cpu_features,uint32_t cpu_id)969 android_setCpuArm(int cpu_count, uint64_t cpu_features, uint32_t cpu_id)
970 {
971 if (!android_setCpu(cpu_count, cpu_features))
972 return 0;
973
974 g_cpuIdArm = cpu_id;
975 return 1;
976 }
977 #endif /* __arm__ */
978
979 /*
980 * Technical note: Making sense of ARM's FPU architecture versions.
981 *
982 * FPA was ARM's first attempt at an FPU architecture. There is no Android
983 * device that actually uses it since this technology was already obsolete
984 * when the project started. If you see references to FPA instructions
985 * somewhere, you can be sure that this doesn't apply to Android at all.
986 *
987 * FPA was followed by "VFP", soon renamed "VFPv1" due to the emergence of
988 * new versions / additions to it. ARM considers this obsolete right now,
989 * and no known Android device implements it either.
990 *
991 * VFPv2 added a few instructions to VFPv1, and is an *optional* extension
992 * supported by some ARMv5TE, ARMv6 and ARMv6T2 CPUs. Note that a device
993 * supporting the 'armeabi' ABI doesn't necessarily support these.
994 *
995 * VFPv3-D16 adds a few instructions on top of VFPv2 and is typically used
996 * on ARMv7-A CPUs which implement a FPU. Note that it is also mandated
997 * by the Android 'armeabi-v7a' ABI. The -D16 suffix in its name means
998 * that it provides 16 double-precision FPU registers (d0-d15) and 32
999 * single-precision ones (s0-s31) which happen to be mapped to the same
1000 * register banks.
1001 *
1002 * VFPv3-D32 is the name of an extension to VFPv3-D16 that provides 16
1003 * additional double precision registers (d16-d31). Note that there are
1004 * still only 32 single precision registers.
1005 *
1006 * VFPv3xD is a *subset* of VFPv3-D16 that only provides single-precision
1007 * registers. It is only used on ARMv7-M (i.e. on micro-controllers) which
1008 * are not supported by Android. Note that it is not compatible with VFPv2.
1009 *
1010 * NOTE: The term 'VFPv3' usually designate either VFPv3-D16 or VFPv3-D32
1011 * depending on context. For example GCC uses it for VFPv3-D32, but
1012 * the Linux kernel code uses it for VFPv3-D16 (especially in
1013 * /proc/cpuinfo). Always try to use the full designation when
1014 * possible.
1015 *
1016 * NEON, a.k.a. "ARM Advanced SIMD" is an extension that provides
1017 * instructions to perform parallel computations on vectors of 8, 16,
1018 * 32, 64 and 128 bit quantities. NEON requires VFPv32-D32 since all
1019 * NEON registers are also mapped to the same register banks.
1020 *
1021 * VFPv4-D16, adds a few instructions on top of VFPv3-D16 in order to
1022 * perform fused multiply-accumulate on VFP registers, as well as
1023 * half-precision (16-bit) conversion operations.
1024 *
1025 * VFPv4-D32 is VFPv4-D16 with 32, instead of 16, FPU double precision
1026 * registers.
1027 *
1028 * VPFv4-NEON is VFPv4-D32 with NEON instructions. It also adds fused
1029 * multiply-accumulate instructions that work on the NEON registers.
1030 *
1031 * NOTE: Similarly, "VFPv4" might either reference VFPv4-D16 or VFPv4-D32
1032 * depending on context.
1033 *
1034 * The following information was determined by scanning the binutils-2.22
1035 * sources:
1036 *
1037 * Basic VFP instruction subsets:
1038 *
1039 * #define FPU_VFP_EXT_V1xD 0x08000000 // Base VFP instruction set.
1040 * #define FPU_VFP_EXT_V1 0x04000000 // Double-precision insns.
1041 * #define FPU_VFP_EXT_V2 0x02000000 // ARM10E VFPr1.
1042 * #define FPU_VFP_EXT_V3xD 0x01000000 // VFPv3 single-precision.
1043 * #define FPU_VFP_EXT_V3 0x00800000 // VFPv3 double-precision.
1044 * #define FPU_NEON_EXT_V1 0x00400000 // Neon (SIMD) insns.
1045 * #define FPU_VFP_EXT_D32 0x00200000 // Registers D16-D31.
1046 * #define FPU_VFP_EXT_FP16 0x00100000 // Half-precision extensions.
1047 * #define FPU_NEON_EXT_FMA 0x00080000 // Neon fused multiply-add
1048 * #define FPU_VFP_EXT_FMA 0x00040000 // VFP fused multiply-add
1049 *
1050 * FPU types (excluding NEON)
1051 *
1052 * FPU_VFP_V1xD (EXT_V1xD)
1053 * |
1054 * +--------------------------+
1055 * | |
1056 * FPU_VFP_V1 (+EXT_V1) FPU_VFP_V3xD (+EXT_V2+EXT_V3xD)
1057 * | |
1058 * | |
1059 * FPU_VFP_V2 (+EXT_V2) FPU_VFP_V4_SP_D16 (+EXT_FP16+EXT_FMA)
1060 * |
1061 * FPU_VFP_V3D16 (+EXT_Vx3D+EXT_V3)
1062 * |
1063 * +--------------------------+
1064 * | |
1065 * FPU_VFP_V3 (+EXT_D32) FPU_VFP_V4D16 (+EXT_FP16+EXT_FMA)
1066 * | |
1067 * | FPU_VFP_V4 (+EXT_D32)
1068 * |
1069 * FPU_VFP_HARD (+EXT_FMA+NEON_EXT_FMA)
1070 *
1071 * VFP architectures:
1072 *
1073 * ARCH_VFP_V1xD (EXT_V1xD)
1074 * |
1075 * +------------------+
1076 * | |
1077 * | ARCH_VFP_V3xD (+EXT_V2+EXT_V3xD)
1078 * | |
1079 * | ARCH_VFP_V3xD_FP16 (+EXT_FP16)
1080 * | |
1081 * | ARCH_VFP_V4_SP_D16 (+EXT_FMA)
1082 * |
1083 * ARCH_VFP_V1 (+EXT_V1)
1084 * |
1085 * ARCH_VFP_V2 (+EXT_V2)
1086 * |
1087 * ARCH_VFP_V3D16 (+EXT_V3xD+EXT_V3)
1088 * |
1089 * +-------------------+
1090 * | |
1091 * | ARCH_VFP_V3D16_FP16 (+EXT_FP16)
1092 * |
1093 * +-------------------+
1094 * | |
1095 * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA)
1096 * | |
1097 * | ARCH_VFP_V4 (+EXT_D32)
1098 * | |
1099 * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA)
1100 * |
1101 * ARCH_VFP_V3 (+EXT_D32)
1102 * |
1103 * +-------------------+
1104 * | |
1105 * | ARCH_VFP_V3_FP16 (+EXT_FP16)
1106 * |
1107 * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON)
1108 * |
1109 * ARCH_NEON_FP16 (+EXT_FP16)
1110 *
1111 * -fpu=<name> values and their correspondance with FPU architectures above:
1112 *
1113 * {"vfp", FPU_ARCH_VFP_V2},
1114 * {"vfp9", FPU_ARCH_VFP_V2},
1115 * {"vfp3", FPU_ARCH_VFP_V3}, // For backwards compatbility.
1116 * {"vfp10", FPU_ARCH_VFP_V2},
1117 * {"vfp10-r0", FPU_ARCH_VFP_V1},
1118 * {"vfpxd", FPU_ARCH_VFP_V1xD},
1119 * {"vfpv2", FPU_ARCH_VFP_V2},
1120 * {"vfpv3", FPU_ARCH_VFP_V3},
1121 * {"vfpv3-fp16", FPU_ARCH_VFP_V3_FP16},
1122 * {"vfpv3-d16", FPU_ARCH_VFP_V3D16},
1123 * {"vfpv3-d16-fp16", FPU_ARCH_VFP_V3D16_FP16},
1124 * {"vfpv3xd", FPU_ARCH_VFP_V3xD},
1125 * {"vfpv3xd-fp16", FPU_ARCH_VFP_V3xD_FP16},
1126 * {"neon", FPU_ARCH_VFP_V3_PLUS_NEON_V1},
1127 * {"neon-fp16", FPU_ARCH_NEON_FP16},
1128 * {"vfpv4", FPU_ARCH_VFP_V4},
1129 * {"vfpv4-d16", FPU_ARCH_VFP_V4D16},
1130 * {"fpv4-sp-d16", FPU_ARCH_VFP_V4_SP_D16},
1131 * {"neon-vfpv4", FPU_ARCH_NEON_VFP_V4},
1132 *
1133 *
1134 * Simplified diagram that only includes FPUs supported by Android:
1135 * Only ARCH_VFP_V3D16 is actually mandated by the armeabi-v7a ABI,
1136 * all others are optional and must be probed at runtime.
1137 *
1138 * ARCH_VFP_V3D16 (EXT_V1xD+EXT_V1+EXT_V2+EXT_V3xD+EXT_V3)
1139 * |
1140 * +-------------------+
1141 * | |
1142 * | ARCH_VFP_V3D16_FP16 (+EXT_FP16)
1143 * |
1144 * +-------------------+
1145 * | |
1146 * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA)
1147 * | |
1148 * | ARCH_VFP_V4 (+EXT_D32)
1149 * | |
1150 * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA)
1151 * |
1152 * ARCH_VFP_V3 (+EXT_D32)
1153 * |
1154 * +-------------------+
1155 * | |
1156 * | ARCH_VFP_V3_FP16 (+EXT_FP16)
1157 * |
1158 * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON)
1159 * |
1160 * ARCH_NEON_FP16 (+EXT_FP16)
1161 *
1162 */
1163
1164 #endif // defined(__le32__) || defined(__le64__)
1165