1 /**************************************************************************
2 *
3 * Copyright 2008 Dennis Smit
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 ***************************************************************************/
26
27 /**
28 * @file
29 * CPU feature detection.
30 *
31 * @author Dennis Smit
32 * @author Based on the work of Eric Anholt <anholt@FreeBSD.org>
33 */
34
35 #ifndef _UTIL_CPU_DETECT_H
36 #define _UTIL_CPU_DETECT_H
37
38 #include <stdbool.h>
39
40 #include "util/macros.h"
41 #include "util/u_atomic.h"
42 #include "util/u_thread.h"
43
44
45 /* Maximal cpu count for update affinity */
46 #define UTIL_MAX_CPUS 1024 /* this should be enough */
47
48 #ifdef __cplusplus
49 extern "C" {
50 #endif
51
52 enum cpu_family {
53 CPU_UNKNOWN,
54
55 CPU_AMD_ZEN1_ZEN2,
56 CPU_AMD_ZEN_HYGON,
57 CPU_AMD_ZEN3,
58 CPU_AMD_ZEN_NEXT,
59 CPU_AMD_LAST,
60
61 CPU_S390X,
62 };
63
64 typedef uint32_t util_affinity_mask[UTIL_MAX_CPUS / 32];
65
66 struct util_cpu_caps_t {
67 /**
68 * Number of CPUs available to the process.
69 *
70 * This will be less than or equal to \c max_cpus. This is the number of
71 * CPUs that are online and available to the process.
72 */
73 int16_t nr_cpus;
74
75 /**
76 * Maximum number of CPUs that can be online in the system.
77 *
78 * This will be greater than or equal to \c nr_cpus. This is the number of
79 * CPUs installed in the system. \c nr_cpus will be less if some CPUs are
80 * offline.
81 */
82 int16_t max_cpus;
83
84 enum cpu_family family;
85
86 /* Feature flags */
87 int x86_cpu_type;
88 unsigned cacheline;
89
90 unsigned has_intel:1;
91 unsigned has_mmx:1;
92 unsigned has_mmx2:1;
93 unsigned has_sse:1;
94 unsigned has_sse2:1;
95 unsigned has_sse3:1;
96 unsigned has_ssse3:1;
97 unsigned has_sse4_1:1;
98 unsigned has_sse4_2:1;
99 unsigned has_popcnt:1;
100 unsigned has_avx:1;
101 unsigned has_avx2:1;
102 unsigned has_f16c:1;
103 unsigned has_fma:1;
104 unsigned has_3dnow:1;
105 unsigned has_3dnow_ext:1;
106 unsigned has_xop:1;
107 unsigned has_altivec:1;
108 unsigned has_vsx:1;
109 unsigned has_daz:1;
110 unsigned has_neon:1;
111 unsigned has_msa:1;
112
113 unsigned has_avx512f:1;
114 unsigned has_avx512dq:1;
115 unsigned has_avx512ifma:1;
116 unsigned has_avx512pf:1;
117 unsigned has_avx512er:1;
118 unsigned has_avx512cd:1;
119 unsigned has_avx512bw:1;
120 unsigned has_avx512vl:1;
121 unsigned has_avx512vbmi:1;
122
123 unsigned has_clflushopt:1;
124
125 unsigned num_L3_caches;
126 unsigned num_cpu_mask_bits;
127 unsigned max_vector_bits;
128
129 uint16_t cpu_to_L3[UTIL_MAX_CPUS];
130
131 /* Affinity masks for each L3 cache. */
132 util_affinity_mask *L3_affinity_mask;
133 /**
134 * number of "big" CPUs in big.LITTLE configuration
135 *
136 * a "big" CPU is defined as anything with >= 50% the capacity of the largest CPU,
137 * useful for drivers determining how many and what kinds of threads to use
138 * example: 1x prime + 3x big + 4x little = 4x "big" cores
139 *
140 * A value of zero indicates that CPUs are homogeneous.
141 */
142 int16_t nr_big_cpus;
143 };
144
145 struct _util_cpu_caps_state_t {
146 once_flag once_flag;
147 /**
148 * Initialized to 0 and set to non-zero with an atomic after the entire
149 * struct has been initialized.
150 */
151 uint32_t detect_done;
152 struct util_cpu_caps_t caps;
153 };
154
155 #define U_CPU_INVALID_L3 0xffff
156
157 static inline ATTRIBUTE_CONST const struct util_cpu_caps_t *
util_get_cpu_caps(void)158 util_get_cpu_caps(void)
159 {
160 extern void _util_cpu_detect_once(void);
161 extern struct _util_cpu_caps_state_t _util_cpu_caps_state;
162
163 /* On most CPU architectures, an atomic read is simply a regular memory
164 * load instruction with some extra compiler magic to prevent code
165 * re-ordering around it. The perf impact of doing this check should be
166 * negligible in most cases.
167 *
168 * Also, even though it looks like a bit of a lie, we've declared this
169 * function with ATTRIBUTE_CONST. The GCC docs say:
170 *
171 * "Calls to functions whose return value is not affected by changes to
172 * the observable state of the program and that have no observable
173 * effects on such state other than to return a value may lend
174 * themselves to optimizations such as common subexpression elimination.
175 * Declaring such functions with the const attribute allows GCC to avoid
176 * emitting some calls in repeated invocations of the function with the
177 * same argument values."
178 *
179 * The word "observable" is important here. With the exception of a
180 * llvmpipe debug flag behind an environment variable and a few unit tests,
181 * all of which emulate worse CPUs, this function neither affects nor is
182 * affected by any "observable" state. It has its own internal state for
183 * sure, but that state is such that it appears to return exactly the same
184 * value with the same internal data every time.
185 */
186 if (unlikely(!p_atomic_read(&_util_cpu_caps_state.detect_done)))
187 call_once(&_util_cpu_caps_state.once_flag, _util_cpu_detect_once);
188
189 return &_util_cpu_caps_state.caps;
190 }
191
192 #ifdef __cplusplus
193 }
194 #endif
195
196
197 #endif /* _UTIL_CPU_DETECT_H */
198