1 #include <stdint.h>
2 #include <stdlib.h>
3 #include <string.h>
4
5 #include <cpuinfo.h>
6 #include <cpuinfo/internal-api.h>
7 #include <cpuinfo/log.h>
8 #include <mach/api.h>
9 #include <x86/api.h>
10
max(uint32_t a,uint32_t b)11 static inline uint32_t max(uint32_t a, uint32_t b) {
12 return a > b ? a : b;
13 }
14
bit_mask(uint32_t bits)15 static inline uint32_t bit_mask(uint32_t bits) {
16 return (UINT32_C(1) << bits) - UINT32_C(1);
17 }
18
cpuinfo_x86_mach_init(void)19 void cpuinfo_x86_mach_init(void) {
20 struct cpuinfo_processor* processors = NULL;
21 struct cpuinfo_core* cores = NULL;
22 struct cpuinfo_cluster* clusters = NULL;
23 struct cpuinfo_package* packages = NULL;
24 struct cpuinfo_cache* l1i = NULL;
25 struct cpuinfo_cache* l1d = NULL;
26 struct cpuinfo_cache* l2 = NULL;
27 struct cpuinfo_cache* l3 = NULL;
28 struct cpuinfo_cache* l4 = NULL;
29
30 struct cpuinfo_mach_topology mach_topology = cpuinfo_mach_detect_topology();
31 processors = calloc(mach_topology.threads, sizeof(struct cpuinfo_processor));
32 if (processors == NULL) {
33 cpuinfo_log_error(
34 "failed to allocate %zu bytes for descriptions of %" PRIu32 " logical processors",
35 mach_topology.threads * sizeof(struct cpuinfo_processor),
36 mach_topology.threads);
37 goto cleanup;
38 }
39 cores = calloc(mach_topology.cores, sizeof(struct cpuinfo_core));
40 if (cores == NULL) {
41 cpuinfo_log_error(
42 "failed to allocate %zu bytes for descriptions of %" PRIu32 " cores",
43 mach_topology.cores * sizeof(struct cpuinfo_core),
44 mach_topology.cores);
45 goto cleanup;
46 }
47 /* On x86 cluster of cores is a physical package */
48 clusters = calloc(mach_topology.packages, sizeof(struct cpuinfo_cluster));
49 if (clusters == NULL) {
50 cpuinfo_log_error(
51 "failed to allocate %zu bytes for descriptions of %" PRIu32 " core clusters",
52 mach_topology.packages * sizeof(struct cpuinfo_cluster),
53 mach_topology.packages);
54 goto cleanup;
55 }
56 packages = calloc(mach_topology.packages, sizeof(struct cpuinfo_package));
57 if (packages == NULL) {
58 cpuinfo_log_error(
59 "failed to allocate %zu bytes for descriptions of %" PRIu32 " physical packages",
60 mach_topology.packages * sizeof(struct cpuinfo_package),
61 mach_topology.packages);
62 goto cleanup;
63 }
64
65 struct cpuinfo_x86_processor x86_processor;
66 memset(&x86_processor, 0, sizeof(x86_processor));
67 cpuinfo_x86_init_processor(&x86_processor);
68 char brand_string[48];
69 cpuinfo_x86_normalize_brand_string(x86_processor.brand_string, brand_string);
70
71 const uint32_t threads_per_core = mach_topology.threads / mach_topology.cores;
72 const uint32_t threads_per_package = mach_topology.threads / mach_topology.packages;
73 const uint32_t cores_per_package = mach_topology.cores / mach_topology.packages;
74 for (uint32_t i = 0; i < mach_topology.packages; i++) {
75 clusters[i] = (struct cpuinfo_cluster){
76 .processor_start = i * threads_per_package,
77 .processor_count = threads_per_package,
78 .core_start = i * cores_per_package,
79 .core_count = cores_per_package,
80 .cluster_id = 0,
81 .package = packages + i,
82 .vendor = x86_processor.vendor,
83 .uarch = x86_processor.uarch,
84 .cpuid = x86_processor.cpuid,
85 };
86 packages[i].processor_start = i * threads_per_package;
87 packages[i].processor_count = threads_per_package;
88 packages[i].core_start = i * cores_per_package;
89 packages[i].core_count = cores_per_package;
90 packages[i].cluster_start = i;
91 packages[i].cluster_count = 1;
92 cpuinfo_x86_format_package_name(x86_processor.vendor, brand_string, packages[i].name);
93 }
94 for (uint32_t i = 0; i < mach_topology.cores; i++) {
95 cores[i] = (struct cpuinfo_core){
96 .processor_start = i * threads_per_core,
97 .processor_count = threads_per_core,
98 .core_id = i % cores_per_package,
99 .cluster = clusters + i / cores_per_package,
100 .package = packages + i / cores_per_package,
101 .vendor = x86_processor.vendor,
102 .uarch = x86_processor.uarch,
103 .cpuid = x86_processor.cpuid,
104 };
105 }
106 for (uint32_t i = 0; i < mach_topology.threads; i++) {
107 const uint32_t smt_id = i % threads_per_core;
108 const uint32_t core_id = i / threads_per_core;
109 const uint32_t package_id = i / threads_per_package;
110
111 /* Reconstruct APIC IDs from topology components */
112 const uint32_t thread_bits_mask = bit_mask(x86_processor.topology.thread_bits_length);
113 const uint32_t core_bits_mask = bit_mask(x86_processor.topology.core_bits_length);
114 const uint32_t package_bits_offset =
115 max(x86_processor.topology.thread_bits_offset + x86_processor.topology.thread_bits_length,
116 x86_processor.topology.core_bits_offset + x86_processor.topology.core_bits_length);
117 const uint32_t apic_id = ((smt_id & thread_bits_mask) << x86_processor.topology.thread_bits_offset) |
118 ((core_id & core_bits_mask) << x86_processor.topology.core_bits_offset) |
119 (package_id << package_bits_offset);
120 cpuinfo_log_debug("reconstructed APIC ID 0x%08" PRIx32 " for thread %" PRIu32, apic_id, i);
121
122 processors[i].smt_id = smt_id;
123 processors[i].core = cores + i / threads_per_core;
124 processors[i].cluster = clusters + i / threads_per_package;
125 processors[i].package = packages + i / threads_per_package;
126 processors[i].apic_id = apic_id;
127 }
128
129 uint32_t threads_per_l1 = 0, l1_count = 0;
130 if (x86_processor.cache.l1i.size != 0 || x86_processor.cache.l1d.size != 0) {
131 threads_per_l1 = mach_topology.threads_per_cache[1];
132 if (threads_per_l1 == 0) {
133 /* Assume that threads on the same core share L1 */
134 threads_per_l1 = mach_topology.threads / mach_topology.cores;
135 cpuinfo_log_warning(
136 "Mach kernel did not report number of threads sharing L1 cache; assume %" PRIu32,
137 threads_per_l1);
138 }
139 l1_count = mach_topology.threads / threads_per_l1;
140 cpuinfo_log_debug("detected %" PRIu32 " L1 caches", l1_count);
141 }
142
143 uint32_t threads_per_l2 = 0, l2_count = 0;
144 if (x86_processor.cache.l2.size != 0) {
145 threads_per_l2 = mach_topology.threads_per_cache[2];
146 if (threads_per_l2 == 0) {
147 if (x86_processor.cache.l3.size != 0) {
148 /* This is not a last-level cache; assume that
149 * threads on the same core share L2 */
150 threads_per_l2 = mach_topology.threads / mach_topology.cores;
151 } else {
152 /* This is a last-level cache; assume that
153 * threads on the same package share L2 */
154 threads_per_l2 = mach_topology.threads / mach_topology.packages;
155 }
156 cpuinfo_log_warning(
157 "Mach kernel did not report number of threads sharing L2 cache; assume %" PRIu32,
158 threads_per_l2);
159 }
160 l2_count = mach_topology.threads / threads_per_l2;
161 cpuinfo_log_debug("detected %" PRIu32 " L2 caches", l2_count);
162 }
163
164 uint32_t threads_per_l3 = 0, l3_count = 0;
165 if (x86_processor.cache.l3.size != 0) {
166 threads_per_l3 = mach_topology.threads_per_cache[3];
167 if (threads_per_l3 == 0) {
168 /*
169 * Assume that threads on the same package share L3.
170 * However, is it not necessarily the last-level cache
171 * (there may be L4 cache as well)
172 */
173 threads_per_l3 = mach_topology.threads / mach_topology.packages;
174 cpuinfo_log_warning(
175 "Mach kernel did not report number of threads sharing L3 cache; assume %" PRIu32,
176 threads_per_l3);
177 }
178 l3_count = mach_topology.threads / threads_per_l3;
179 cpuinfo_log_debug("detected %" PRIu32 " L3 caches", l3_count);
180 }
181
182 uint32_t threads_per_l4 = 0, l4_count = 0;
183 if (x86_processor.cache.l4.size != 0) {
184 threads_per_l4 = mach_topology.threads_per_cache[4];
185 if (threads_per_l4 == 0) {
186 /*
187 * Assume that all threads share this L4.
188 * As of now, L4 cache exists only on notebook x86 CPUs,
189 * which are single-package, but multi-socket systems
190 * could have shared L4 (like on IBM POWER8).
191 */
192 threads_per_l4 = mach_topology.threads;
193 cpuinfo_log_warning(
194 "Mach kernel did not report number of threads sharing L4 cache; assume %" PRIu32,
195 threads_per_l4);
196 }
197 l4_count = mach_topology.threads / threads_per_l4;
198 cpuinfo_log_debug("detected %" PRIu32 " L4 caches", l4_count);
199 }
200
201 if (x86_processor.cache.l1i.size != 0) {
202 l1i = calloc(l1_count, sizeof(struct cpuinfo_cache));
203 if (l1i == NULL) {
204 cpuinfo_log_error(
205 "failed to allocate %zu bytes for descriptions of %" PRIu32 " L1I caches",
206 l1_count * sizeof(struct cpuinfo_cache),
207 l1_count);
208 return;
209 }
210 for (uint32_t c = 0; c < l1_count; c++) {
211 l1i[c] = (struct cpuinfo_cache){
212 .size = x86_processor.cache.l1i.size,
213 .associativity = x86_processor.cache.l1i.associativity,
214 .sets = x86_processor.cache.l1i.sets,
215 .partitions = x86_processor.cache.l1i.partitions,
216 .line_size = x86_processor.cache.l1i.line_size,
217 .flags = x86_processor.cache.l1i.flags,
218 .processor_start = c * threads_per_l1,
219 .processor_count = threads_per_l1,
220 };
221 }
222 for (uint32_t t = 0; t < mach_topology.threads; t++) {
223 processors[t].cache.l1i = &l1i[t / threads_per_l1];
224 }
225 }
226
227 if (x86_processor.cache.l1d.size != 0) {
228 l1d = calloc(l1_count, sizeof(struct cpuinfo_cache));
229 if (l1d == NULL) {
230 cpuinfo_log_error(
231 "failed to allocate %zu bytes for descriptions of %" PRIu32 " L1D caches",
232 l1_count * sizeof(struct cpuinfo_cache),
233 l1_count);
234 return;
235 }
236 for (uint32_t c = 0; c < l1_count; c++) {
237 l1d[c] = (struct cpuinfo_cache){
238 .size = x86_processor.cache.l1d.size,
239 .associativity = x86_processor.cache.l1d.associativity,
240 .sets = x86_processor.cache.l1d.sets,
241 .partitions = x86_processor.cache.l1d.partitions,
242 .line_size = x86_processor.cache.l1d.line_size,
243 .flags = x86_processor.cache.l1d.flags,
244 .processor_start = c * threads_per_l1,
245 .processor_count = threads_per_l1,
246 };
247 }
248 for (uint32_t t = 0; t < mach_topology.threads; t++) {
249 processors[t].cache.l1d = &l1d[t / threads_per_l1];
250 }
251 }
252
253 if (l2_count != 0) {
254 l2 = calloc(l2_count, sizeof(struct cpuinfo_cache));
255 if (l2 == NULL) {
256 cpuinfo_log_error(
257 "failed to allocate %zu bytes for descriptions of %" PRIu32 " L2 caches",
258 l2_count * sizeof(struct cpuinfo_cache),
259 l2_count);
260 return;
261 }
262 for (uint32_t c = 0; c < l2_count; c++) {
263 l2[c] = (struct cpuinfo_cache){
264 .size = x86_processor.cache.l2.size,
265 .associativity = x86_processor.cache.l2.associativity,
266 .sets = x86_processor.cache.l2.sets,
267 .partitions = x86_processor.cache.l2.partitions,
268 .line_size = x86_processor.cache.l2.line_size,
269 .flags = x86_processor.cache.l2.flags,
270 .processor_start = c * threads_per_l2,
271 .processor_count = threads_per_l2,
272 };
273 }
274 for (uint32_t t = 0; t < mach_topology.threads; t++) {
275 processors[t].cache.l2 = &l2[t / threads_per_l2];
276 }
277 }
278
279 if (l3_count != 0) {
280 l3 = calloc(l3_count, sizeof(struct cpuinfo_cache));
281 if (l3 == NULL) {
282 cpuinfo_log_error(
283 "failed to allocate %zu bytes for descriptions of %" PRIu32 " L3 caches",
284 l3_count * sizeof(struct cpuinfo_cache),
285 l3_count);
286 return;
287 }
288 for (uint32_t c = 0; c < l3_count; c++) {
289 l3[c] = (struct cpuinfo_cache){
290 .size = x86_processor.cache.l3.size,
291 .associativity = x86_processor.cache.l3.associativity,
292 .sets = x86_processor.cache.l3.sets,
293 .partitions = x86_processor.cache.l3.partitions,
294 .line_size = x86_processor.cache.l3.line_size,
295 .flags = x86_processor.cache.l3.flags,
296 .processor_start = c * threads_per_l3,
297 .processor_count = threads_per_l3,
298 };
299 }
300 for (uint32_t t = 0; t < mach_topology.threads; t++) {
301 processors[t].cache.l3 = &l3[t / threads_per_l3];
302 }
303 }
304
305 if (l4_count != 0) {
306 l4 = calloc(l4_count, sizeof(struct cpuinfo_cache));
307 if (l4 == NULL) {
308 cpuinfo_log_error(
309 "failed to allocate %zu bytes for descriptions of %" PRIu32 " L4 caches",
310 l4_count * sizeof(struct cpuinfo_cache),
311 l4_count);
312 return;
313 }
314 for (uint32_t c = 0; c < l4_count; c++) {
315 l4[c] = (struct cpuinfo_cache){
316 .size = x86_processor.cache.l4.size,
317 .associativity = x86_processor.cache.l4.associativity,
318 .sets = x86_processor.cache.l4.sets,
319 .partitions = x86_processor.cache.l4.partitions,
320 .line_size = x86_processor.cache.l4.line_size,
321 .flags = x86_processor.cache.l4.flags,
322 .processor_start = c * threads_per_l4,
323 .processor_count = threads_per_l4,
324 };
325 }
326 for (uint32_t t = 0; t < mach_topology.threads; t++) {
327 processors[t].cache.l4 = &l4[t / threads_per_l4];
328 }
329 }
330
331 /* Commit changes */
332 cpuinfo_processors = processors;
333 cpuinfo_cores = cores;
334 cpuinfo_clusters = clusters;
335 cpuinfo_packages = packages;
336 cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
337 cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
338 cpuinfo_cache[cpuinfo_cache_level_2] = l2;
339 cpuinfo_cache[cpuinfo_cache_level_3] = l3;
340 cpuinfo_cache[cpuinfo_cache_level_4] = l4;
341
342 cpuinfo_processors_count = mach_topology.threads;
343 cpuinfo_cores_count = mach_topology.cores;
344 cpuinfo_clusters_count = mach_topology.packages;
345 cpuinfo_packages_count = mach_topology.packages;
346 cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count;
347 cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count;
348 cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
349 cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
350 cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count;
351 cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
352
353 cpuinfo_global_uarch = (struct cpuinfo_uarch_info){
354 .uarch = x86_processor.uarch,
355 .cpuid = x86_processor.cpuid,
356 .processor_count = mach_topology.threads,
357 .core_count = mach_topology.cores,
358 };
359
360 __sync_synchronize();
361
362 cpuinfo_is_initialized = true;
363
364 processors = NULL;
365 cores = NULL;
366 clusters = NULL;
367 packages = NULL;
368 l1i = l1d = l2 = l3 = l4 = NULL;
369
370 cleanup:
371 free(processors);
372 free(cores);
373 free(clusters);
374 free(packages);
375 free(l1i);
376 free(l1d);
377 free(l2);
378 free(l3);
379 free(l4);
380 }
381