1 /*
2 * Copyright (c) 2023, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include "config/aom_config.h"
13
14 #include "arm_cpudetect.h"
15
16 #include "aom_ports/arm.h"
17
18 #if defined(__APPLE__)
19 #include <sys/sysctl.h>
20 #endif
21
22 #if !CONFIG_RUNTIME_CPU_DETECT
23
arm_get_cpu_caps(void)24 static int arm_get_cpu_caps(void) {
25 // This function should actually be a no-op. There is no way to adjust any of
26 // these because the RTCD tables do not exist: the functions are called
27 // statically.
28 int flags = 0;
29 #if HAVE_NEON
30 flags |= HAS_NEON;
31 #endif // HAVE_NEON
32 return flags;
33 }
34
35 #elif defined(__APPLE__) // end !CONFIG_RUNTIME_CPU_DETECT
36
37 // sysctlbyname() parameter documentation for instruction set characteristics:
38 // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
have_feature(const char * feature)39 static INLINE bool have_feature(const char *feature) {
40 int64_t feature_present = 0;
41 size_t size = sizeof(feature_present);
42 if (sysctlbyname(feature, &feature_present, &size, NULL, 0) != 0) {
43 return false;
44 }
45 return feature_present;
46 }
47
arm_get_cpu_caps(void)48 static int arm_get_cpu_caps(void) {
49 int flags = 0;
50 #if HAVE_NEON
51 flags |= HAS_NEON;
52 #endif // HAVE_NEON
53 #if HAVE_ARM_CRC32
54 if (have_feature("hw.optional.armv8_crc32")) flags |= HAS_ARM_CRC32;
55 #endif // HAVE_ARM_CRC32
56 #if HAVE_NEON_DOTPROD
57 if (have_feature("hw.optional.arm.FEAT_DotProd")) flags |= HAS_NEON_DOTPROD;
58 #endif // HAVE_NEON_DOTPROD
59 #if HAVE_NEON_I8MM
60 if (have_feature("hw.optional.arm.FEAT_I8MM")) flags |= HAS_NEON_I8MM;
61 #endif // HAVE_NEON_I8MM
62 return flags;
63 }
64
65 #elif defined(_WIN32) // end __APPLE__
66
arm_get_cpu_caps(void)67 static int arm_get_cpu_caps(void) {
68 int flags = 0;
69 // IsProcessorFeaturePresent() parameter documentation:
70 // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent#parameters
71 #if HAVE_NEON
72 flags |= HAS_NEON; // Neon is mandatory in Armv8.0-A.
73 #endif // HAVE_NEON
74 #if HAVE_ARM_CRC32
75 if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) {
76 flags |= HAS_ARM_CRC32;
77 }
78 #endif // HAVE_ARM_CRC32
79 #if HAVE_NEON_DOTPROD
80 // Support for PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE was added in Windows SDK
81 // 20348, supported by Windows 11 and Windows Server 2022.
82 #if defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
83 if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) {
84 flags |= HAS_NEON_DOTPROD;
85 }
86 #endif // defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
87 #endif // HAVE_NEON_DOTPROD
88 // No I8MM or SVE feature detection available on Windows at time of writing.
89 return flags;
90 }
91
92 #elif defined(ANDROID_USE_CPU_FEATURES_LIB)
93
arm_get_cpu_caps(void)94 static int arm_get_cpu_caps(void) {
95 int flags = 0;
96 #if HAVE_NEON
97 flags |= HAS_NEON; // Neon is mandatory in Armv8.0-A.
98 #endif // HAVE_NEON
99 return flags;
100 }
101
102 #elif defined(__linux__) // end defined(AOM_USE_ANDROID_CPU_FEATURES)
103
104 #include <sys/auxv.h>
105
106 // Define hwcap values ourselves: building with an old auxv header where these
107 // hwcap values are not defined should not prevent features from being enabled.
108 #define AOM_AARCH64_HWCAP_CRC32 (1 << 7)
109 #define AOM_AARCH64_HWCAP_ASIMDDP (1 << 20)
110 #define AOM_AARCH64_HWCAP_SVE (1 << 22)
111 #define AOM_AARCH64_HWCAP2_SVE2 (1 << 1)
112 #define AOM_AARCH64_HWCAP2_I8MM (1 << 13)
113
arm_get_cpu_caps(void)114 static int arm_get_cpu_caps(void) {
115 int flags = 0;
116 #if HAVE_ARM_CRC32 || HAVE_NEON_DOTPROD || HAVE_SVE
117 unsigned long hwcap = getauxval(AT_HWCAP);
118 #endif
119 #if HAVE_NEON_I8MM || HAVE_SVE2
120 unsigned long hwcap2 = getauxval(AT_HWCAP2);
121 #endif
122
123 #if HAVE_NEON
124 flags |= HAS_NEON; // Neon is mandatory in Armv8.0-A.
125 #endif // HAVE_NEON
126 #if HAVE_ARM_CRC32
127 if (hwcap & AOM_AARCH64_HWCAP_CRC32) flags |= HAS_ARM_CRC32;
128 #endif // HAVE_ARM_CRC32
129 #if HAVE_NEON_DOTPROD
130 if (hwcap & AOM_AARCH64_HWCAP_ASIMDDP) flags |= HAS_NEON_DOTPROD;
131 #endif // HAVE_NEON_DOTPROD
132 #if HAVE_NEON_I8MM
133 if (hwcap2 & AOM_AARCH64_HWCAP2_I8MM) flags |= HAS_NEON_I8MM;
134 #endif // HAVE_NEON_I8MM
135 #if HAVE_SVE
136 if (hwcap & AOM_AARCH64_HWCAP_SVE) flags |= HAS_SVE;
137 #endif // HAVE_SVE
138 #if HAVE_SVE2
139 if (hwcap2 & AOM_AARCH64_HWCAP2_SVE2) flags |= HAS_SVE2;
140 #endif // HAVE_SVE2
141 return flags;
142 }
143
144 #elif defined(__Fuchsia__) // end __linux__
145
146 #include <zircon/features.h>
147 #include <zircon/syscalls.h>
148
149 // Added in https://fuchsia-review.googlesource.com/c/fuchsia/+/894282.
150 #ifndef ZX_ARM64_FEATURE_ISA_I8MM
151 #define ZX_ARM64_FEATURE_ISA_I8MM ((uint32_t)(1u << 19))
152 #endif
153 // Added in https://fuchsia-review.googlesource.com/c/fuchsia/+/895083.
154 #ifndef ZX_ARM64_FEATURE_ISA_SVE
155 #define ZX_ARM64_FEATURE_ISA_SVE ((uint32_t)(1u << 20))
156 #endif
157
arm_get_cpu_caps(void)158 static int arm_get_cpu_caps(void) {
159 int flags = 0;
160 #if HAVE_NEON
161 flags |= HAS_NEON; // Neon is mandatory in Armv8.0-A.
162 #endif // HAVE_NEON
163 uint32_t features;
164 zx_status_t status = zx_system_get_features(ZX_FEATURE_KIND_CPU, &features);
165 if (status != ZX_OK) return flags;
166 #if HAVE_ARM_CRC32
167 if (features & ZX_ARM64_FEATURE_ISA_CRC32) flags |= HAS_ARM_CRC32;
168 #endif // HAVE_ARM_CRC32
169 #if HAVE_NEON_DOTPROD
170 if (features & ZX_ARM64_FEATURE_ISA_DP) flags |= HAS_NEON_DOTPROD;
171 #endif // HAVE_NEON_DOTPROD
172 #if HAVE_NEON_I8MM
173 if (features & ZX_ARM64_FEATURE_ISA_I8MM) flags |= HAS_NEON_I8MM;
174 #endif // HAVE_NEON_I8MM
175 #if HAVE_SVE
176 if (features & ZX_ARM64_FEATURE_ISA_SVE) flags |= HAS_SVE;
177 #endif // HAVE_SVE
178 return flags;
179 }
180
181 #else // end __Fuchsia__
182 #error \
183 "Runtime CPU detection selected, but no CPU detection method " \
184 "available for your platform. Rerun cmake with -DCONFIG_RUNTIME_CPU_DETECT=0."
185 #endif
186
aom_arm_cpu_caps(void)187 int aom_arm_cpu_caps(void) {
188 int flags = 0;
189 if (!arm_cpu_env_flags(&flags)) {
190 flags = arm_get_cpu_caps() & arm_cpu_env_mask();
191 }
192
193 // Restrict flags: FEAT_I8MM assumes that FEAT_DotProd is available.
194 if (!(flags & HAS_NEON_DOTPROD)) flags &= ~HAS_NEON_I8MM;
195
196 // Restrict flags: SVE assumes that FEAT_{DotProd,I8MM} are available.
197 if (!(flags & HAS_NEON_DOTPROD)) flags &= ~HAS_SVE;
198 if (!(flags & HAS_NEON_I8MM)) flags &= ~HAS_SVE;
199
200 // Restrict flags: SVE2 assumes that FEAT_SVE is available.
201 if (!(flags & HAS_SVE)) flags &= ~HAS_SVE2;
202
203 return flags;
204 }
205