• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <stdint.h>
2 #include <stdlib.h>
3 #include <string.h>
4 
5 #include <cpuinfo.h>
6 #include <cpuinfo/internal-api.h>
7 #include <cpuinfo/log.h>
8 #include <mach/api.h>
9 #include <x86/api.h>
10 
max(uint32_t a,uint32_t b)11 static inline uint32_t max(uint32_t a, uint32_t b) {
12 	return a > b ? a : b;
13 }
14 
bit_mask(uint32_t bits)15 static inline uint32_t bit_mask(uint32_t bits) {
16 	return (UINT32_C(1) << bits) - UINT32_C(1);
17 }
18 
cpuinfo_x86_mach_init(void)19 void cpuinfo_x86_mach_init(void) {
20 	struct cpuinfo_processor* processors = NULL;
21 	struct cpuinfo_core* cores = NULL;
22 	struct cpuinfo_cluster* clusters = NULL;
23 	struct cpuinfo_package* packages = NULL;
24 	struct cpuinfo_cache* l1i = NULL;
25 	struct cpuinfo_cache* l1d = NULL;
26 	struct cpuinfo_cache* l2 = NULL;
27 	struct cpuinfo_cache* l3 = NULL;
28 	struct cpuinfo_cache* l4 = NULL;
29 
30 	struct cpuinfo_mach_topology mach_topology = cpuinfo_mach_detect_topology();
31 	processors = calloc(mach_topology.threads, sizeof(struct cpuinfo_processor));
32 	if (processors == NULL) {
33 		cpuinfo_log_error(
34 			"failed to allocate %zu bytes for descriptions of %" PRIu32 " logical processors",
35 			mach_topology.threads * sizeof(struct cpuinfo_processor),
36 			mach_topology.threads);
37 		goto cleanup;
38 	}
39 	cores = calloc(mach_topology.cores, sizeof(struct cpuinfo_core));
40 	if (cores == NULL) {
41 		cpuinfo_log_error(
42 			"failed to allocate %zu bytes for descriptions of %" PRIu32 " cores",
43 			mach_topology.cores * sizeof(struct cpuinfo_core),
44 			mach_topology.cores);
45 		goto cleanup;
46 	}
47 	/* On x86 cluster of cores is a physical package */
48 	clusters = calloc(mach_topology.packages, sizeof(struct cpuinfo_cluster));
49 	if (clusters == NULL) {
50 		cpuinfo_log_error(
51 			"failed to allocate %zu bytes for descriptions of %" PRIu32 " core clusters",
52 			mach_topology.packages * sizeof(struct cpuinfo_cluster),
53 			mach_topology.packages);
54 		goto cleanup;
55 	}
56 	packages = calloc(mach_topology.packages, sizeof(struct cpuinfo_package));
57 	if (packages == NULL) {
58 		cpuinfo_log_error(
59 			"failed to allocate %zu bytes for descriptions of %" PRIu32 " physical packages",
60 			mach_topology.packages * sizeof(struct cpuinfo_package),
61 			mach_topology.packages);
62 		goto cleanup;
63 	}
64 
65 	struct cpuinfo_x86_processor x86_processor;
66 	memset(&x86_processor, 0, sizeof(x86_processor));
67 	cpuinfo_x86_init_processor(&x86_processor);
68 	char brand_string[48];
69 	cpuinfo_x86_normalize_brand_string(x86_processor.brand_string, brand_string);
70 
71 	const uint32_t threads_per_core = mach_topology.threads / mach_topology.cores;
72 	const uint32_t threads_per_package = mach_topology.threads / mach_topology.packages;
73 	const uint32_t cores_per_package = mach_topology.cores / mach_topology.packages;
74 	for (uint32_t i = 0; i < mach_topology.packages; i++) {
75 		clusters[i] = (struct cpuinfo_cluster){
76 			.processor_start = i * threads_per_package,
77 			.processor_count = threads_per_package,
78 			.core_start = i * cores_per_package,
79 			.core_count = cores_per_package,
80 			.cluster_id = 0,
81 			.package = packages + i,
82 			.vendor = x86_processor.vendor,
83 			.uarch = x86_processor.uarch,
84 			.cpuid = x86_processor.cpuid,
85 		};
86 		packages[i].processor_start = i * threads_per_package;
87 		packages[i].processor_count = threads_per_package;
88 		packages[i].core_start = i * cores_per_package;
89 		packages[i].core_count = cores_per_package;
90 		packages[i].cluster_start = i;
91 		packages[i].cluster_count = 1;
92 		cpuinfo_x86_format_package_name(x86_processor.vendor, brand_string, packages[i].name);
93 	}
94 	for (uint32_t i = 0; i < mach_topology.cores; i++) {
95 		cores[i] = (struct cpuinfo_core){
96 			.processor_start = i * threads_per_core,
97 			.processor_count = threads_per_core,
98 			.core_id = i % cores_per_package,
99 			.cluster = clusters + i / cores_per_package,
100 			.package = packages + i / cores_per_package,
101 			.vendor = x86_processor.vendor,
102 			.uarch = x86_processor.uarch,
103 			.cpuid = x86_processor.cpuid,
104 		};
105 	}
106 	for (uint32_t i = 0; i < mach_topology.threads; i++) {
107 		const uint32_t smt_id = i % threads_per_core;
108 		const uint32_t core_id = i / threads_per_core;
109 		const uint32_t package_id = i / threads_per_package;
110 
111 		/* Reconstruct APIC IDs from topology components */
112 		const uint32_t thread_bits_mask = bit_mask(x86_processor.topology.thread_bits_length);
113 		const uint32_t core_bits_mask = bit_mask(x86_processor.topology.core_bits_length);
114 		const uint32_t package_bits_offset =
115 			max(x86_processor.topology.thread_bits_offset + x86_processor.topology.thread_bits_length,
116 			    x86_processor.topology.core_bits_offset + x86_processor.topology.core_bits_length);
117 		const uint32_t apic_id = ((smt_id & thread_bits_mask) << x86_processor.topology.thread_bits_offset) |
118 			((core_id & core_bits_mask) << x86_processor.topology.core_bits_offset) |
119 			(package_id << package_bits_offset);
120 		cpuinfo_log_debug("reconstructed APIC ID 0x%08" PRIx32 " for thread %" PRIu32, apic_id, i);
121 
122 		processors[i].smt_id = smt_id;
123 		processors[i].core = cores + i / threads_per_core;
124 		processors[i].cluster = clusters + i / threads_per_package;
125 		processors[i].package = packages + i / threads_per_package;
126 		processors[i].apic_id = apic_id;
127 	}
128 
129 	uint32_t threads_per_l1 = 0, l1_count = 0;
130 	if (x86_processor.cache.l1i.size != 0 || x86_processor.cache.l1d.size != 0) {
131 		threads_per_l1 = mach_topology.threads_per_cache[1];
132 		if (threads_per_l1 == 0) {
133 			/* Assume that threads on the same core share L1 */
134 			threads_per_l1 = mach_topology.threads / mach_topology.cores;
135 			cpuinfo_log_warning(
136 				"Mach kernel did not report number of threads sharing L1 cache; assume %" PRIu32,
137 				threads_per_l1);
138 		}
139 		l1_count = mach_topology.threads / threads_per_l1;
140 		cpuinfo_log_debug("detected %" PRIu32 " L1 caches", l1_count);
141 	}
142 
143 	uint32_t threads_per_l2 = 0, l2_count = 0;
144 	if (x86_processor.cache.l2.size != 0) {
145 		threads_per_l2 = mach_topology.threads_per_cache[2];
146 		if (threads_per_l2 == 0) {
147 			if (x86_processor.cache.l3.size != 0) {
148 				/* This is not a last-level cache; assume that
149 				 * threads on the same core share L2 */
150 				threads_per_l2 = mach_topology.threads / mach_topology.cores;
151 			} else {
152 				/* This is a last-level cache; assume that
153 				 * threads on the same package share L2 */
154 				threads_per_l2 = mach_topology.threads / mach_topology.packages;
155 			}
156 			cpuinfo_log_warning(
157 				"Mach kernel did not report number of threads sharing L2 cache; assume %" PRIu32,
158 				threads_per_l2);
159 		}
160 		l2_count = mach_topology.threads / threads_per_l2;
161 		cpuinfo_log_debug("detected %" PRIu32 " L2 caches", l2_count);
162 	}
163 
164 	uint32_t threads_per_l3 = 0, l3_count = 0;
165 	if (x86_processor.cache.l3.size != 0) {
166 		threads_per_l3 = mach_topology.threads_per_cache[3];
167 		if (threads_per_l3 == 0) {
168 			/*
169 			 * Assume that threads on the same package share L3.
170 			 * However, is it not necessarily the last-level cache
171 			 * (there may be L4 cache as well)
172 			 */
173 			threads_per_l3 = mach_topology.threads / mach_topology.packages;
174 			cpuinfo_log_warning(
175 				"Mach kernel did not report number of threads sharing L3 cache; assume %" PRIu32,
176 				threads_per_l3);
177 		}
178 		l3_count = mach_topology.threads / threads_per_l3;
179 		cpuinfo_log_debug("detected %" PRIu32 " L3 caches", l3_count);
180 	}
181 
182 	uint32_t threads_per_l4 = 0, l4_count = 0;
183 	if (x86_processor.cache.l4.size != 0) {
184 		threads_per_l4 = mach_topology.threads_per_cache[4];
185 		if (threads_per_l4 == 0) {
186 			/*
187 			 * Assume that all threads share this L4.
188 			 * As of now, L4 cache exists only on notebook x86 CPUs,
189 			 * which are single-package, but multi-socket systems
190 			 * could have shared L4 (like on IBM POWER8).
191 			 */
192 			threads_per_l4 = mach_topology.threads;
193 			cpuinfo_log_warning(
194 				"Mach kernel did not report number of threads sharing L4 cache; assume %" PRIu32,
195 				threads_per_l4);
196 		}
197 		l4_count = mach_topology.threads / threads_per_l4;
198 		cpuinfo_log_debug("detected %" PRIu32 " L4 caches", l4_count);
199 	}
200 
201 	if (x86_processor.cache.l1i.size != 0) {
202 		l1i = calloc(l1_count, sizeof(struct cpuinfo_cache));
203 		if (l1i == NULL) {
204 			cpuinfo_log_error(
205 				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L1I caches",
206 				l1_count * sizeof(struct cpuinfo_cache),
207 				l1_count);
208 			return;
209 		}
210 		for (uint32_t c = 0; c < l1_count; c++) {
211 			l1i[c] = (struct cpuinfo_cache){
212 				.size = x86_processor.cache.l1i.size,
213 				.associativity = x86_processor.cache.l1i.associativity,
214 				.sets = x86_processor.cache.l1i.sets,
215 				.partitions = x86_processor.cache.l1i.partitions,
216 				.line_size = x86_processor.cache.l1i.line_size,
217 				.flags = x86_processor.cache.l1i.flags,
218 				.processor_start = c * threads_per_l1,
219 				.processor_count = threads_per_l1,
220 			};
221 		}
222 		for (uint32_t t = 0; t < mach_topology.threads; t++) {
223 			processors[t].cache.l1i = &l1i[t / threads_per_l1];
224 		}
225 	}
226 
227 	if (x86_processor.cache.l1d.size != 0) {
228 		l1d = calloc(l1_count, sizeof(struct cpuinfo_cache));
229 		if (l1d == NULL) {
230 			cpuinfo_log_error(
231 				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L1D caches",
232 				l1_count * sizeof(struct cpuinfo_cache),
233 				l1_count);
234 			return;
235 		}
236 		for (uint32_t c = 0; c < l1_count; c++) {
237 			l1d[c] = (struct cpuinfo_cache){
238 				.size = x86_processor.cache.l1d.size,
239 				.associativity = x86_processor.cache.l1d.associativity,
240 				.sets = x86_processor.cache.l1d.sets,
241 				.partitions = x86_processor.cache.l1d.partitions,
242 				.line_size = x86_processor.cache.l1d.line_size,
243 				.flags = x86_processor.cache.l1d.flags,
244 				.processor_start = c * threads_per_l1,
245 				.processor_count = threads_per_l1,
246 			};
247 		}
248 		for (uint32_t t = 0; t < mach_topology.threads; t++) {
249 			processors[t].cache.l1d = &l1d[t / threads_per_l1];
250 		}
251 	}
252 
253 	if (l2_count != 0) {
254 		l2 = calloc(l2_count, sizeof(struct cpuinfo_cache));
255 		if (l2 == NULL) {
256 			cpuinfo_log_error(
257 				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L2 caches",
258 				l2_count * sizeof(struct cpuinfo_cache),
259 				l2_count);
260 			return;
261 		}
262 		for (uint32_t c = 0; c < l2_count; c++) {
263 			l2[c] = (struct cpuinfo_cache){
264 				.size = x86_processor.cache.l2.size,
265 				.associativity = x86_processor.cache.l2.associativity,
266 				.sets = x86_processor.cache.l2.sets,
267 				.partitions = x86_processor.cache.l2.partitions,
268 				.line_size = x86_processor.cache.l2.line_size,
269 				.flags = x86_processor.cache.l2.flags,
270 				.processor_start = c * threads_per_l2,
271 				.processor_count = threads_per_l2,
272 			};
273 		}
274 		for (uint32_t t = 0; t < mach_topology.threads; t++) {
275 			processors[t].cache.l2 = &l2[t / threads_per_l2];
276 		}
277 	}
278 
279 	if (l3_count != 0) {
280 		l3 = calloc(l3_count, sizeof(struct cpuinfo_cache));
281 		if (l3 == NULL) {
282 			cpuinfo_log_error(
283 				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L3 caches",
284 				l3_count * sizeof(struct cpuinfo_cache),
285 				l3_count);
286 			return;
287 		}
288 		for (uint32_t c = 0; c < l3_count; c++) {
289 			l3[c] = (struct cpuinfo_cache){
290 				.size = x86_processor.cache.l3.size,
291 				.associativity = x86_processor.cache.l3.associativity,
292 				.sets = x86_processor.cache.l3.sets,
293 				.partitions = x86_processor.cache.l3.partitions,
294 				.line_size = x86_processor.cache.l3.line_size,
295 				.flags = x86_processor.cache.l3.flags,
296 				.processor_start = c * threads_per_l3,
297 				.processor_count = threads_per_l3,
298 			};
299 		}
300 		for (uint32_t t = 0; t < mach_topology.threads; t++) {
301 			processors[t].cache.l3 = &l3[t / threads_per_l3];
302 		}
303 	}
304 
305 	if (l4_count != 0) {
306 		l4 = calloc(l4_count, sizeof(struct cpuinfo_cache));
307 		if (l4 == NULL) {
308 			cpuinfo_log_error(
309 				"failed to allocate %zu bytes for descriptions of %" PRIu32 " L4 caches",
310 				l4_count * sizeof(struct cpuinfo_cache),
311 				l4_count);
312 			return;
313 		}
314 		for (uint32_t c = 0; c < l4_count; c++) {
315 			l4[c] = (struct cpuinfo_cache){
316 				.size = x86_processor.cache.l4.size,
317 				.associativity = x86_processor.cache.l4.associativity,
318 				.sets = x86_processor.cache.l4.sets,
319 				.partitions = x86_processor.cache.l4.partitions,
320 				.line_size = x86_processor.cache.l4.line_size,
321 				.flags = x86_processor.cache.l4.flags,
322 				.processor_start = c * threads_per_l4,
323 				.processor_count = threads_per_l4,
324 			};
325 		}
326 		for (uint32_t t = 0; t < mach_topology.threads; t++) {
327 			processors[t].cache.l4 = &l4[t / threads_per_l4];
328 		}
329 	}
330 
331 	/* Commit changes */
332 	cpuinfo_processors = processors;
333 	cpuinfo_cores = cores;
334 	cpuinfo_clusters = clusters;
335 	cpuinfo_packages = packages;
336 	cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
337 	cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
338 	cpuinfo_cache[cpuinfo_cache_level_2] = l2;
339 	cpuinfo_cache[cpuinfo_cache_level_3] = l3;
340 	cpuinfo_cache[cpuinfo_cache_level_4] = l4;
341 
342 	cpuinfo_processors_count = mach_topology.threads;
343 	cpuinfo_cores_count = mach_topology.cores;
344 	cpuinfo_clusters_count = mach_topology.packages;
345 	cpuinfo_packages_count = mach_topology.packages;
346 	cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count;
347 	cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count;
348 	cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
349 	cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
350 	cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count;
351 	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
352 
353 	cpuinfo_global_uarch = (struct cpuinfo_uarch_info){
354 		.uarch = x86_processor.uarch,
355 		.cpuid = x86_processor.cpuid,
356 		.processor_count = mach_topology.threads,
357 		.core_count = mach_topology.cores,
358 	};
359 
360 	__sync_synchronize();
361 
362 	cpuinfo_is_initialized = true;
363 
364 	processors = NULL;
365 	cores = NULL;
366 	clusters = NULL;
367 	packages = NULL;
368 	l1i = l1d = l2 = l3 = l4 = NULL;
369 
370 cleanup:
371 	free(processors);
372 	free(cores);
373 	free(clusters);
374 	free(packages);
375 	free(l1i);
376 	free(l1d);
377 	free(l2);
378 	free(l3);
379 	free(l4);
380 }
381