• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <stdint.h>
2 #include <stdlib.h>
3 #include <string.h>
4 
5 #include <cpuinfo.h>
6 #include <x86/api.h>
7 #include <mach/api.h>
8 #include <cpuinfo/internal-api.h>
9 #include <cpuinfo/log.h>
10 
11 
max(uint32_t a,uint32_t b)12 static inline uint32_t max(uint32_t a, uint32_t b) {
13 	return a > b ? a : b;
14 }
15 
bit_mask(uint32_t bits)16 static inline uint32_t bit_mask(uint32_t bits) {
17 	return (UINT32_C(1) << bits) - UINT32_C(1);
18 }
19 
cpuinfo_x86_mach_init(void)20 void cpuinfo_x86_mach_init(void) {
21 	struct cpuinfo_processor* processors = NULL;
22 	struct cpuinfo_core* cores = NULL;
23 	struct cpuinfo_cluster* clusters = NULL;
24 	struct cpuinfo_package* packages = NULL;
25 	struct cpuinfo_cache* l1i = NULL;
26 	struct cpuinfo_cache* l1d = NULL;
27 	struct cpuinfo_cache* l2 = NULL;
28 	struct cpuinfo_cache* l3 = NULL;
29 	struct cpuinfo_cache* l4 = NULL;
30 
31 	struct cpuinfo_mach_topology mach_topology = cpuinfo_mach_detect_topology();
32 	processors = calloc(mach_topology.threads, sizeof(struct cpuinfo_processor));
33 	if (processors == NULL) {
34 		cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors",
35 			mach_topology.threads * sizeof(struct cpuinfo_processor), mach_topology.threads);
36 		goto cleanup;
37 	}
38 	cores = calloc(mach_topology.cores, sizeof(struct cpuinfo_core));
39 	if (cores == NULL) {
40 		cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores",
41 			mach_topology.cores * sizeof(struct cpuinfo_core), mach_topology.cores);
42 		goto cleanup;
43 	}
44 	/* On x86 cluster of cores is a physical package */
45 	clusters = calloc(mach_topology.packages, sizeof(struct cpuinfo_cluster));
46 	if (clusters == NULL) {
47 		cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" core clusters",
48 			mach_topology.packages * sizeof(struct cpuinfo_cluster), mach_topology.packages);
49 		goto cleanup;
50 	}
51 	packages = calloc(mach_topology.packages, sizeof(struct cpuinfo_package));
52 	if (packages == NULL) {
53 		cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" physical packages",
54 			mach_topology.packages * sizeof(struct cpuinfo_package), mach_topology.packages);
55 		goto cleanup;
56 	}
57 
58 	struct cpuinfo_x86_processor x86_processor;
59 	memset(&x86_processor, 0, sizeof(x86_processor));
60 	cpuinfo_x86_init_processor(&x86_processor);
61 	char brand_string[48];
62 	cpuinfo_x86_normalize_brand_string(x86_processor.brand_string, brand_string);
63 
64 	const uint32_t threads_per_core = mach_topology.threads / mach_topology.cores;
65 	const uint32_t threads_per_package = mach_topology.threads / mach_topology.packages;
66 	const uint32_t cores_per_package = mach_topology.cores / mach_topology.packages;
67 	for (uint32_t i = 0; i < mach_topology.packages; i++) {
68 		clusters[i] = (struct cpuinfo_cluster) {
69 			.processor_start = i * threads_per_package,
70 			.processor_count = threads_per_package,
71 			.core_start = i * cores_per_package,
72 			.core_count = cores_per_package,
73 			.cluster_id = 0,
74 			.package = packages + i,
75 			.vendor = x86_processor.vendor,
76 			.uarch = x86_processor.uarch,
77 			.cpuid = x86_processor.cpuid,
78 		};
79 		packages[i].processor_start = i * threads_per_package;
80 		packages[i].processor_count = threads_per_package;
81 		packages[i].core_start = i * cores_per_package;
82 		packages[i].core_count = cores_per_package;
83 		packages[i].cluster_start = i;
84 		packages[i].cluster_count = 1;
85 		cpuinfo_x86_format_package_name(x86_processor.vendor, brand_string, packages[i].name);
86 	}
87 	for (uint32_t i = 0; i < mach_topology.cores; i++) {
88 		cores[i] = (struct cpuinfo_core) {
89 			.processor_start = i * threads_per_core,
90 			.processor_count = threads_per_core,
91 			.core_id = i % cores_per_package,
92 			.cluster = clusters + i / cores_per_package,
93 			.package = packages + i / cores_per_package,
94 			.vendor = x86_processor.vendor,
95 			.uarch = x86_processor.uarch,
96 			.cpuid = x86_processor.cpuid,
97 		};
98 	}
99 	for (uint32_t i = 0; i < mach_topology.threads; i++) {
100 		const uint32_t smt_id = i % threads_per_core;
101 		const uint32_t core_id = i / threads_per_core;
102 		const uint32_t package_id = i / threads_per_package;
103 
104 		/* Reconstruct APIC IDs from topology components */
105 		const uint32_t thread_bits_mask = bit_mask(x86_processor.topology.thread_bits_length);
106 		const uint32_t core_bits_mask   = bit_mask(x86_processor.topology.core_bits_length);
107 		const uint32_t package_bits_offset = max(
108 			x86_processor.topology.thread_bits_offset + x86_processor.topology.thread_bits_length,
109 			x86_processor.topology.core_bits_offset + x86_processor.topology.core_bits_length);
110 		const uint32_t apic_id =
111 			((smt_id & thread_bits_mask) << x86_processor.topology.thread_bits_offset) |
112 			((core_id & core_bits_mask) << x86_processor.topology.core_bits_offset) |
113 			(package_id << package_bits_offset);
114 		cpuinfo_log_debug("reconstructed APIC ID 0x%08"PRIx32" for thread %"PRIu32, apic_id, i);
115 
116 		processors[i].smt_id = smt_id;
117 		processors[i].core = cores + i / threads_per_core;
118 		processors[i].cluster = clusters + i / threads_per_package;
119 		processors[i].package = packages + i / threads_per_package;
120 		processors[i].apic_id = apic_id;
121 	}
122 
123 	uint32_t threads_per_l1 = 0, l1_count = 0;
124 	if (x86_processor.cache.l1i.size != 0 || x86_processor.cache.l1d.size != 0) {
125 		threads_per_l1 = mach_topology.threads_per_cache[1];
126 		if (threads_per_l1 == 0) {
127 			/* Assume that threads on the same core share L1 */
128 			threads_per_l1 = mach_topology.threads / mach_topology.cores;
129 			cpuinfo_log_warning("Mach kernel did not report number of threads sharing L1 cache; assume %"PRIu32,
130 				threads_per_l1);
131 		}
132 		l1_count = mach_topology.threads / threads_per_l1;
133 		cpuinfo_log_debug("detected %"PRIu32" L1 caches", l1_count);
134 	}
135 
136 	uint32_t threads_per_l2 = 0, l2_count = 0;
137 	if (x86_processor.cache.l2.size != 0) {
138 		threads_per_l2 = mach_topology.threads_per_cache[2];
139 		if (threads_per_l2 == 0) {
140 			if (x86_processor.cache.l3.size != 0) {
141 				/* This is not a last-level cache; assume that threads on the same core share L2 */
142 				threads_per_l2 = mach_topology.threads / mach_topology.cores;
143 			} else {
144 				/* This is a last-level cache; assume that threads on the same package share L2 */
145 				threads_per_l2 = mach_topology.threads / mach_topology.packages;
146 			}
147 			cpuinfo_log_warning("Mach kernel did not report number of threads sharing L2 cache; assume %"PRIu32,
148 				threads_per_l2);
149 		}
150 		l2_count = mach_topology.threads / threads_per_l2;
151 		cpuinfo_log_debug("detected %"PRIu32" L2 caches", l2_count);
152 	}
153 
154 	uint32_t threads_per_l3 = 0, l3_count = 0;
155 	if (x86_processor.cache.l3.size != 0) {
156 		threads_per_l3 = mach_topology.threads_per_cache[3];
157 		if (threads_per_l3 == 0) {
158 			/*
159 			 * Assume that threads on the same package share L3.
160 			 * However, is it not necessarily the last-level cache (there may be L4 cache as well)
161 			 */
162 			threads_per_l3 = mach_topology.threads / mach_topology.packages;
163 			cpuinfo_log_warning("Mach kernel did not report number of threads sharing L3 cache; assume %"PRIu32,
164 				threads_per_l3);
165 		}
166 		l3_count = mach_topology.threads / threads_per_l3;
167 		cpuinfo_log_debug("detected %"PRIu32" L3 caches", l3_count);
168 	}
169 
170 	uint32_t threads_per_l4 = 0, l4_count = 0;
171 	if (x86_processor.cache.l4.size != 0) {
172 		threads_per_l4 = mach_topology.threads_per_cache[4];
173 		if (threads_per_l4 == 0) {
174 			/*
175 			 * Assume that all threads share this L4.
176 			 * As of now, L4 cache exists only on notebook x86 CPUs, which are single-package,
177 			 * but multi-socket systems could have shared L4 (like on IBM POWER8).
178 			 */
179 			threads_per_l4 = mach_topology.threads;
180 			cpuinfo_log_warning("Mach kernel did not report number of threads sharing L4 cache; assume %"PRIu32,
181 				threads_per_l4);
182 		}
183 		l4_count = mach_topology.threads / threads_per_l4;
184 		cpuinfo_log_debug("detected %"PRIu32" L4 caches", l4_count);
185 	}
186 
187 	if (x86_processor.cache.l1i.size != 0) {
188 		l1i = calloc(l1_count, sizeof(struct cpuinfo_cache));
189 		if (l1i == NULL) {
190 			cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches",
191 				l1_count * sizeof(struct cpuinfo_cache), l1_count);
192 			return;
193 		}
194 		for (uint32_t c = 0; c < l1_count; c++) {
195 			l1i[c] = (struct cpuinfo_cache) {
196 				.size            = x86_processor.cache.l1i.size,
197 				.associativity   = x86_processor.cache.l1i.associativity,
198 				.sets            = x86_processor.cache.l1i.sets,
199 				.partitions      = x86_processor.cache.l1i.partitions,
200 				.line_size       = x86_processor.cache.l1i.line_size,
201 				.flags           = x86_processor.cache.l1i.flags,
202 				.processor_start = c * threads_per_l1,
203 				.processor_count = threads_per_l1,
204 			};
205 		}
206 		for (uint32_t t = 0; t < mach_topology.threads; t++) {
207 			processors[t].cache.l1i = &l1i[t / threads_per_l1];
208 		}
209 	}
210 
211 	if (x86_processor.cache.l1d.size != 0) {
212 		l1d = calloc(l1_count, sizeof(struct cpuinfo_cache));
213 		if (l1d == NULL) {
214 			cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches",
215 				l1_count * sizeof(struct cpuinfo_cache), l1_count);
216 			return;
217 		}
218 		for (uint32_t c = 0; c < l1_count; c++) {
219 			l1d[c] = (struct cpuinfo_cache) {
220 				.size            = x86_processor.cache.l1d.size,
221 				.associativity   = x86_processor.cache.l1d.associativity,
222 				.sets            = x86_processor.cache.l1d.sets,
223 				.partitions      = x86_processor.cache.l1d.partitions,
224 				.line_size       = x86_processor.cache.l1d.line_size,
225 				.flags           = x86_processor.cache.l1d.flags,
226 				.processor_start = c * threads_per_l1,
227 				.processor_count = threads_per_l1,
228 			};
229 		}
230 		for (uint32_t t = 0; t < mach_topology.threads; t++) {
231 			processors[t].cache.l1d = &l1d[t / threads_per_l1];
232 		}
233 	}
234 
235 	if (l2_count != 0) {
236 		l2 = calloc(l2_count, sizeof(struct cpuinfo_cache));
237 		if (l2 == NULL) {
238 			cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches",
239 				l2_count * sizeof(struct cpuinfo_cache), l2_count);
240 			return;
241 		}
242 		for (uint32_t c = 0; c < l2_count; c++) {
243 			l2[c] = (struct cpuinfo_cache) {
244 				.size            = x86_processor.cache.l2.size,
245 				.associativity   = x86_processor.cache.l2.associativity,
246 				.sets            = x86_processor.cache.l2.sets,
247 				.partitions      = x86_processor.cache.l2.partitions,
248 				.line_size       = x86_processor.cache.l2.line_size,
249 				.flags           = x86_processor.cache.l2.flags,
250 				.processor_start = c * threads_per_l2,
251 				.processor_count = threads_per_l2,
252 			};
253 		}
254 		for (uint32_t t = 0; t < mach_topology.threads; t++) {
255 			processors[t].cache.l2 = &l2[t / threads_per_l2];
256 		}
257 	}
258 
259 	if (l3_count != 0) {
260 		l3 = calloc(l3_count, sizeof(struct cpuinfo_cache));
261 		if (l3 == NULL) {
262 			cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L3 caches",
263 				l3_count * sizeof(struct cpuinfo_cache), l3_count);
264 			return;
265 		}
266 		for (uint32_t c = 0; c < l3_count; c++) {
267 			l3[c] = (struct cpuinfo_cache) {
268 				.size            = x86_processor.cache.l3.size,
269 				.associativity   = x86_processor.cache.l3.associativity,
270 				.sets            = x86_processor.cache.l3.sets,
271 				.partitions      = x86_processor.cache.l3.partitions,
272 				.line_size       = x86_processor.cache.l3.line_size,
273 				.flags           = x86_processor.cache.l3.flags,
274 				.processor_start = c * threads_per_l3,
275 				.processor_count = threads_per_l3,
276 			};
277 		}
278 		for (uint32_t t = 0; t < mach_topology.threads; t++) {
279 			processors[t].cache.l3 = &l3[t / threads_per_l3];
280 		}
281 	}
282 
283 	if (l4_count != 0) {
284 		l4 = calloc(l4_count, sizeof(struct cpuinfo_cache));
285 		if (l4 == NULL) {
286 			cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L4 caches",
287 				l4_count * sizeof(struct cpuinfo_cache), l4_count);
288 			return;
289 		}
290 		for (uint32_t c = 0; c < l4_count; c++) {
291 			l4[c] = (struct cpuinfo_cache) {
292 				.size            = x86_processor.cache.l4.size,
293 				.associativity   = x86_processor.cache.l4.associativity,
294 				.sets            = x86_processor.cache.l4.sets,
295 				.partitions      = x86_processor.cache.l4.partitions,
296 				.line_size       = x86_processor.cache.l4.line_size,
297 				.flags           = x86_processor.cache.l4.flags,
298 				.processor_start = c * threads_per_l4,
299 				.processor_count = threads_per_l4,
300 			};
301 		}
302 		for (uint32_t t = 0; t < mach_topology.threads; t++) {
303 			processors[t].cache.l4 = &l4[t / threads_per_l4];
304 		}
305 	}
306 
307 	/* Commit changes */
308 	cpuinfo_processors = processors;
309 	cpuinfo_cores = cores;
310 	cpuinfo_clusters = clusters;
311 	cpuinfo_packages = packages;
312 	cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
313 	cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
314 	cpuinfo_cache[cpuinfo_cache_level_2]  = l2;
315 	cpuinfo_cache[cpuinfo_cache_level_3]  = l3;
316 	cpuinfo_cache[cpuinfo_cache_level_4]  = l4;
317 
318 	cpuinfo_processors_count = mach_topology.threads;
319 	cpuinfo_cores_count = mach_topology.cores;
320 	cpuinfo_clusters_count = mach_topology.packages;
321 	cpuinfo_packages_count = mach_topology.packages;
322 	cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count;
323 	cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count;
324 	cpuinfo_cache_count[cpuinfo_cache_level_2]  = l2_count;
325 	cpuinfo_cache_count[cpuinfo_cache_level_3]  = l3_count;
326 	cpuinfo_cache_count[cpuinfo_cache_level_4]  = l4_count;
327 	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
328 
329 	cpuinfo_global_uarch = (struct cpuinfo_uarch_info) {
330 		.uarch = x86_processor.uarch,
331 		.cpuid = x86_processor.cpuid,
332 		.processor_count = mach_topology.threads,
333 		.core_count = mach_topology.cores,
334 	};
335 
336 	__sync_synchronize();
337 
338 	cpuinfo_is_initialized = true;
339 
340 	processors = NULL;
341 	cores = NULL;
342 	clusters = NULL;
343 	packages = NULL;
344 	l1i = l1d = l2 = l3 = l4 = NULL;
345 
346 cleanup:
347 	free(processors);
348 	free(cores);
349 	free(clusters);
350 	free(packages);
351 	free(l1i);
352 	free(l1d);
353 	free(l2);
354 	free(l3);
355 	free(l4);
356 }
357