• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <errno.h>
2 #include <malloc.h>
3 #include <stdint.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <sys/types.h>
8 
9 #include <cpuinfo.h>
10 #include <cpuinfo/internal-api.h>
11 #include <cpuinfo/log.h>
12 
13 #include "windows-arm-init.h"
14 
15 #define MAX_NR_OF_CACHES (cpuinfo_cache_level_max - 1)
16 
17 /* Call chain:
18  * cpu_info_init_by_logical_sys_info
19  * 		read_packages_for_processors
20  * 		read_cores_for_processors
21  * 		read_caches_for_processors
22  * 			read_all_logical_processor_info_of_relation
23  * 				parse_relation_processor_info
24  * 					store_package_info_per_processor
25  * 					store_core_info_per_processor
26  * 				parse_relation_cache_info
27  * 					store_cache_info_per_processor
28  */
29 
30 static uint32_t count_logical_processors(const uint32_t max_group_count, uint32_t* global_proc_index_per_group);
31 
32 static uint32_t read_packages_for_processors(
33 	struct cpuinfo_processor* processors,
34 	const uint32_t number_of_processors,
35 	const uint32_t* global_proc_index_per_group,
36 	const struct woa_chip_info* chip_info);
37 
38 static uint32_t read_cores_for_processors(
39 	struct cpuinfo_processor* processors,
40 	const uint32_t number_of_processors,
41 	const uint32_t* global_proc_index_per_group,
42 	struct cpuinfo_core* cores,
43 	const struct woa_chip_info* chip_info);
44 
45 static uint32_t read_caches_for_processors(
46 	struct cpuinfo_processor* processors,
47 	const uint32_t number_of_processors,
48 	struct cpuinfo_cache* caches,
49 	uint32_t* numbers_of_caches,
50 	const uint32_t* global_proc_index_per_group,
51 	const struct woa_chip_info* chip_info);
52 
53 static uint32_t read_all_logical_processor_info_of_relation(
54 	LOGICAL_PROCESSOR_RELATIONSHIP info_type,
55 	struct cpuinfo_processor* processors,
56 	const uint32_t number_of_processors,
57 	struct cpuinfo_cache* caches,
58 	uint32_t* numbers_of_caches,
59 	struct cpuinfo_core* cores,
60 	const uint32_t* global_proc_index_per_group,
61 	const struct woa_chip_info* chip_info);
62 
63 static bool parse_relation_processor_info(
64 	struct cpuinfo_processor* processors,
65 	uint32_t nr_of_processors,
66 	const uint32_t* global_proc_index_per_group,
67 	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
68 	const uint32_t info_id,
69 	struct cpuinfo_core* cores,
70 	const struct woa_chip_info* chip_info);
71 
72 static bool parse_relation_cache_info(
73 	struct cpuinfo_processor* processors,
74 	struct cpuinfo_cache* caches,
75 	uint32_t* numbers_of_caches,
76 	const uint32_t* global_proc_index_per_group,
77 	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info);
78 
79 static void store_package_info_per_processor(
80 	struct cpuinfo_processor* processors,
81 	const uint32_t processor_global_index,
82 	const uint32_t package_id,
83 	const uint32_t group_id,
84 	const uint32_t processor_id_in_group);
85 
86 static void store_core_info_per_processor(
87 	struct cpuinfo_processor* processors,
88 	const uint32_t processor_global_index,
89 	const uint32_t core_id,
90 	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX core_info,
91 	struct cpuinfo_core* cores,
92 	const struct woa_chip_info* chip_info);
93 
94 static void store_cache_info_per_processor(
95 	struct cpuinfo_processor* processors,
96 	const uint32_t processor_global_index,
97 	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
98 	struct cpuinfo_cache* current_cache);
99 
100 static bool connect_packages_cores_clusters_by_processors(
101 	struct cpuinfo_processor* processors,
102 	const uint32_t nr_of_processors,
103 	struct cpuinfo_package* packages,
104 	const uint32_t nr_of_packages,
105 	struct cpuinfo_cluster* clusters,
106 	struct cpuinfo_core* cores,
107 	const uint32_t nr_of_cores,
108 	const struct woa_chip_info* chip_info,
109 	enum cpuinfo_vendor vendor);
110 
111 static inline uint32_t low_index_from_kaffinity(KAFFINITY kaffinity);
112 
cpu_info_init_by_logical_sys_info(const struct woa_chip_info * chip_info,const enum cpuinfo_vendor vendor)113 bool cpu_info_init_by_logical_sys_info(const struct woa_chip_info* chip_info, const enum cpuinfo_vendor vendor) {
114 	struct cpuinfo_processor* processors = NULL;
115 	struct cpuinfo_package* packages = NULL;
116 	struct cpuinfo_cluster* clusters = NULL;
117 	struct cpuinfo_core* cores = NULL;
118 	struct cpuinfo_cache* caches = NULL;
119 	struct cpuinfo_uarch_info* uarchs = NULL;
120 
121 	uint32_t nr_of_packages = 0;
122 	uint32_t nr_of_cores = 0;
123 	uint32_t nr_of_all_caches = 0;
124 	uint32_t numbers_of_caches[MAX_NR_OF_CACHES] = {0};
125 
126 	uint32_t nr_of_uarchs = 0;
127 	bool result = false;
128 
129 	HANDLE heap = GetProcessHeap();
130 
131 	/* 1. Count available logical processor groups and processors */
132 	const uint32_t max_group_count = (uint32_t)GetMaximumProcessorGroupCount();
133 	cpuinfo_log_debug("detected %" PRIu32 " processor group(s)", max_group_count);
134 	/* We need to store the absolute processor ID offsets for every groups,
135 	 * because
136 	 *  1. We can't assume every processor groups include the same number of
137 	 *     logical processors.
138 	 *  2. Every processor groups know its group number and processor IDs
139 	 * within the group, but not the global processor IDs.
140 	 *  3. We need to list every logical processors by global IDs.
141 	 */
142 	uint32_t* global_proc_index_per_group = (uint32_t*)HeapAlloc(heap, 0, max_group_count * sizeof(uint32_t));
143 	if (global_proc_index_per_group == NULL) {
144 		cpuinfo_log_error(
145 			"failed to allocate %zu bytes for descriptions of %" PRIu32 " processor groups",
146 			max_group_count * sizeof(struct cpuinfo_processor),
147 			max_group_count);
148 		goto clean_up;
149 	}
150 
151 	uint32_t nr_of_processors = count_logical_processors(max_group_count, global_proc_index_per_group);
152 	processors = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_processors * sizeof(struct cpuinfo_processor));
153 	if (processors == NULL) {
154 		cpuinfo_log_error(
155 			"failed to allocate %zu bytes for descriptions of %" PRIu32 " logical processors",
156 			nr_of_processors * sizeof(struct cpuinfo_processor),
157 			nr_of_processors);
158 		goto clean_up;
159 	}
160 
161 	/* 2. Read topology information via MSDN API: packages, cores and
162 	 * caches*/
163 	nr_of_packages =
164 		read_packages_for_processors(processors, nr_of_processors, global_proc_index_per_group, chip_info);
165 	if (!nr_of_packages) {
166 		cpuinfo_log_error("error in reading package information");
167 		goto clean_up;
168 	}
169 	cpuinfo_log_debug("detected %" PRIu32 " processor package(s)", nr_of_packages);
170 
171 	/* We need the EfficiencyClass to parse uarch from the core information,
172 	 * but we need to iterate first to count cores and allocate memory then
173 	 * we will iterate again to read and store data to cpuinfo_core
174 	 * structures.
175 	 */
176 	nr_of_cores =
177 		read_cores_for_processors(processors, nr_of_processors, global_proc_index_per_group, NULL, chip_info);
178 	if (!nr_of_cores) {
179 		cpuinfo_log_error("error in reading core information");
180 		goto clean_up;
181 	}
182 	cpuinfo_log_debug("detected %" PRIu32 " processor core(s)", nr_of_cores);
183 
184 	/* There is no API to read number of caches, so we need to iterate twice
185 	   on caches:
186 		1. Count all type of caches -> allocate memory
187 		2. Read out cache data and store to allocated memory
188 	 */
189 	nr_of_all_caches = read_caches_for_processors(
190 		processors, nr_of_processors, caches, numbers_of_caches, global_proc_index_per_group, chip_info);
191 	if (!nr_of_all_caches) {
192 		cpuinfo_log_error("error in reading cache information");
193 		goto clean_up;
194 	}
195 	cpuinfo_log_debug("detected %" PRIu32 " processor cache(s)", nr_of_all_caches);
196 
197 	/* 3. Allocate memory for package, cluster, core and cache structures */
198 	packages = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_packages * sizeof(struct cpuinfo_package));
199 	if (packages == NULL) {
200 		cpuinfo_log_error(
201 			"failed to allocate %zu bytes for descriptions of %" PRIu32 " physical packages",
202 			nr_of_packages * sizeof(struct cpuinfo_package),
203 			nr_of_packages);
204 		goto clean_up;
205 	}
206 
207 	/* We don't have cluster information so we explicitly set clusters to
208 	 * equal to cores. */
209 	clusters = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_cores * sizeof(struct cpuinfo_cluster));
210 	if (clusters == NULL) {
211 		cpuinfo_log_error(
212 			"failed to allocate %zu bytes for descriptions of %" PRIu32 " core clusters",
213 			nr_of_cores * sizeof(struct cpuinfo_cluster),
214 			nr_of_cores);
215 		goto clean_up;
216 	}
217 
218 	cores = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_cores * sizeof(struct cpuinfo_core));
219 	if (cores == NULL) {
220 		cpuinfo_log_error(
221 			"failed to allocate %zu bytes for descriptions of %" PRIu32 " cores",
222 			nr_of_cores * sizeof(struct cpuinfo_core),
223 			nr_of_cores);
224 		goto clean_up;
225 	}
226 
227 	/* We allocate one contiguous cache array for all caches, then use
228 	 * offsets per cache type. */
229 	caches = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_all_caches * sizeof(struct cpuinfo_cache));
230 	if (caches == NULL) {
231 		cpuinfo_log_error(
232 			"failed to allocate %zu bytes for descriptions of %" PRIu32 " caches",
233 			nr_of_all_caches * sizeof(struct cpuinfo_cache),
234 			nr_of_all_caches);
235 		goto clean_up;
236 	}
237 
238 	/* 4.Read missing topology information that can't be saved without
239 	 * counted allocate structures in the first round.
240 	 */
241 	nr_of_all_caches = read_caches_for_processors(
242 		processors, nr_of_processors, caches, numbers_of_caches, global_proc_index_per_group, chip_info);
243 	if (!nr_of_all_caches) {
244 		cpuinfo_log_error("error in reading cache information");
245 		goto clean_up;
246 	}
247 
248 	nr_of_cores =
249 		read_cores_for_processors(processors, nr_of_processors, global_proc_index_per_group, cores, chip_info);
250 	if (!nr_of_cores) {
251 		cpuinfo_log_error("error in reading core information");
252 		goto clean_up;
253 	}
254 
255 	/* 5. Now that we read out everything from the system we can, fill the
256 	 * package, cluster and core structures respectively.
257 	 */
258 	result = connect_packages_cores_clusters_by_processors(
259 		processors,
260 		nr_of_processors,
261 		packages,
262 		nr_of_packages,
263 		clusters,
264 		cores,
265 		nr_of_cores,
266 		chip_info,
267 		vendor);
268 	if (!result) {
269 		cpuinfo_log_error("error in connecting information");
270 		goto clean_up;
271 	}
272 
273 	/* 6. Count and store uarchs of cores, assuming same uarchs are
274 	 * neighbors */
275 	enum cpuinfo_uarch prev_uarch = cpuinfo_uarch_unknown;
276 	for (uint32_t i = 0; i < nr_of_cores; i++) {
277 		if (prev_uarch != cores[i].uarch) {
278 			nr_of_uarchs++;
279 			prev_uarch = cores[i].uarch;
280 		}
281 	}
282 	uarchs = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_uarchs * sizeof(struct cpuinfo_uarch_info));
283 	if (uarchs == NULL) {
284 		cpuinfo_log_error(
285 			"failed to allocate %zu bytes for descriptions of %" PRIu32 " uarchs",
286 			nr_of_uarchs * sizeof(struct cpuinfo_uarch_info),
287 			nr_of_uarchs);
288 		goto clean_up;
289 	}
290 	prev_uarch = cpuinfo_uarch_unknown;
291 	for (uint32_t i = 0, uarch_index = 0; i < nr_of_cores; i++) {
292 		if (prev_uarch != cores[i].uarch) {
293 			if (i != 0) {
294 				uarch_index++;
295 			}
296 			if (uarch_index >= nr_of_uarchs) {
297 				cpuinfo_log_error("more uarchs detected than reported");
298 			}
299 			prev_uarch = cores[i].uarch;
300 			uarchs[uarch_index].uarch = cores[i].uarch;
301 			uarchs[uarch_index].core_count = 1;
302 			uarchs[uarch_index].processor_count = cores[i].processor_count;
303 		} else if (prev_uarch != cpuinfo_uarch_unknown) {
304 			uarchs[uarch_index].core_count++;
305 			uarchs[uarch_index].processor_count += cores[i].processor_count;
306 		}
307 	}
308 
309 	/* 7. Commit changes */
310 	cpuinfo_processors = processors;
311 	cpuinfo_packages = packages;
312 	cpuinfo_clusters = clusters;
313 	cpuinfo_cores = cores;
314 	cpuinfo_uarchs = uarchs;
315 
316 	cpuinfo_processors_count = nr_of_processors;
317 	cpuinfo_packages_count = nr_of_packages;
318 	cpuinfo_clusters_count = nr_of_cores;
319 	cpuinfo_cores_count = nr_of_cores;
320 	cpuinfo_uarchs_count = nr_of_uarchs;
321 
322 	for (uint32_t i = 0; i < MAX_NR_OF_CACHES; i++) {
323 		cpuinfo_cache_count[i] = numbers_of_caches[i];
324 	}
325 	cpuinfo_cache[cpuinfo_cache_level_1i] = caches;
326 	cpuinfo_cache[cpuinfo_cache_level_1d] =
327 		cpuinfo_cache[cpuinfo_cache_level_1i] + cpuinfo_cache_count[cpuinfo_cache_level_1i];
328 	cpuinfo_cache[cpuinfo_cache_level_2] =
329 		cpuinfo_cache[cpuinfo_cache_level_1d] + cpuinfo_cache_count[cpuinfo_cache_level_1d];
330 	cpuinfo_cache[cpuinfo_cache_level_3] =
331 		cpuinfo_cache[cpuinfo_cache_level_2] + cpuinfo_cache_count[cpuinfo_cache_level_2];
332 	cpuinfo_cache[cpuinfo_cache_level_4] =
333 		cpuinfo_cache[cpuinfo_cache_level_3] + cpuinfo_cache_count[cpuinfo_cache_level_3];
334 	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
335 
336 	result = true;
337 	MemoryBarrier();
338 
339 	processors = NULL;
340 	packages = NULL;
341 	clusters = NULL;
342 	cores = NULL;
343 	caches = NULL;
344 	uarchs = NULL;
345 
346 clean_up:
347 	/* The propagated pointers, shouldn't be freed, only in case of error
348 	 * and unfinished init.
349 	 */
350 	if (processors != NULL) {
351 		HeapFree(heap, 0, processors);
352 	}
353 	if (packages != NULL) {
354 		HeapFree(heap, 0, packages);
355 	}
356 	if (clusters != NULL) {
357 		HeapFree(heap, 0, clusters);
358 	}
359 	if (cores != NULL) {
360 		HeapFree(heap, 0, cores);
361 	}
362 	if (caches != NULL) {
363 		HeapFree(heap, 0, caches);
364 	}
365 	if (uarchs != NULL) {
366 		HeapFree(heap, 0, uarchs);
367 	}
368 
369 	/* Free the locally used temporary pointers */
370 	HeapFree(heap, 0, global_proc_index_per_group);
371 	global_proc_index_per_group = NULL;
372 	return result;
373 }
374 
count_logical_processors(const uint32_t max_group_count,uint32_t * global_proc_index_per_group)375 static uint32_t count_logical_processors(const uint32_t max_group_count, uint32_t* global_proc_index_per_group) {
376 	uint32_t nr_of_processors = 0;
377 
378 	for (uint32_t i = 0; i < max_group_count; i++) {
379 		uint32_t nr_of_processors_per_group = GetMaximumProcessorCount((WORD)i);
380 		cpuinfo_log_debug(
381 			"detected %" PRIu32 " processor(s) in group %" PRIu32 "", nr_of_processors_per_group, i);
382 		global_proc_index_per_group[i] = nr_of_processors;
383 		nr_of_processors += nr_of_processors_per_group;
384 	}
385 	return nr_of_processors;
386 }
387 
read_packages_for_processors(struct cpuinfo_processor * processors,const uint32_t number_of_processors,const uint32_t * global_proc_index_per_group,const struct woa_chip_info * chip_info)388 static uint32_t read_packages_for_processors(
389 	struct cpuinfo_processor* processors,
390 	const uint32_t number_of_processors,
391 	const uint32_t* global_proc_index_per_group,
392 	const struct woa_chip_info* chip_info) {
393 	return read_all_logical_processor_info_of_relation(
394 		RelationProcessorPackage,
395 		processors,
396 		number_of_processors,
397 		NULL,
398 		NULL,
399 		NULL,
400 		global_proc_index_per_group,
401 		chip_info);
402 }
403 
read_cores_for_processors(struct cpuinfo_processor * processors,const uint32_t number_of_processors,const uint32_t * global_proc_index_per_group,struct cpuinfo_core * cores,const struct woa_chip_info * chip_info)404 uint32_t read_cores_for_processors(
405 	struct cpuinfo_processor* processors,
406 	const uint32_t number_of_processors,
407 	const uint32_t* global_proc_index_per_group,
408 	struct cpuinfo_core* cores,
409 	const struct woa_chip_info* chip_info) {
410 	return read_all_logical_processor_info_of_relation(
411 		RelationProcessorCore,
412 		processors,
413 		number_of_processors,
414 		NULL,
415 		NULL,
416 		cores,
417 		global_proc_index_per_group,
418 		chip_info);
419 }
420 
read_caches_for_processors(struct cpuinfo_processor * processors,const uint32_t number_of_processors,struct cpuinfo_cache * caches,uint32_t * numbers_of_caches,const uint32_t * global_proc_index_per_group,const struct woa_chip_info * chip_info)421 static uint32_t read_caches_for_processors(
422 	struct cpuinfo_processor* processors,
423 	const uint32_t number_of_processors,
424 	struct cpuinfo_cache* caches,
425 	uint32_t* numbers_of_caches,
426 	const uint32_t* global_proc_index_per_group,
427 	const struct woa_chip_info* chip_info) {
428 	/* Reset processor start indexes */
429 	if (caches) {
430 		uint32_t cache_offset = 0;
431 		for (uint32_t i = 0; i < MAX_NR_OF_CACHES; i++) {
432 			for (uint32_t j = 0; j < numbers_of_caches[i]; j++) {
433 				caches[cache_offset + j].processor_start = UINT32_MAX;
434 			}
435 			cache_offset += numbers_of_caches[i];
436 		}
437 	}
438 
439 	return read_all_logical_processor_info_of_relation(
440 		RelationCache,
441 		processors,
442 		number_of_processors,
443 		caches,
444 		numbers_of_caches,
445 		NULL,
446 		global_proc_index_per_group,
447 		chip_info);
448 }
449 
read_all_logical_processor_info_of_relation(LOGICAL_PROCESSOR_RELATIONSHIP info_type,struct cpuinfo_processor * processors,const uint32_t number_of_processors,struct cpuinfo_cache * caches,uint32_t * numbers_of_caches,struct cpuinfo_core * cores,const uint32_t * global_proc_index_per_group,const struct woa_chip_info * chip_info)450 static uint32_t read_all_logical_processor_info_of_relation(
451 	LOGICAL_PROCESSOR_RELATIONSHIP info_type,
452 	struct cpuinfo_processor* processors,
453 	const uint32_t number_of_processors,
454 	struct cpuinfo_cache* caches,
455 	uint32_t* numbers_of_caches,
456 	struct cpuinfo_core* cores,
457 	const uint32_t* global_proc_index_per_group,
458 	const struct woa_chip_info* chip_info) {
459 	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX infos = NULL;
460 	uint32_t nr_of_structs = 0;
461 	DWORD info_size = 0;
462 	bool result = false;
463 	HANDLE heap = GetProcessHeap();
464 
465 	/* 1. Query the size of the information structure first */
466 	if (GetLogicalProcessorInformationEx(info_type, NULL, &info_size) == FALSE) {
467 		const DWORD last_error = GetLastError();
468 		if (last_error != ERROR_INSUFFICIENT_BUFFER) {
469 			cpuinfo_log_error(
470 				"failed to query size of processor %" PRIu32 " information information: error %" PRIu32
471 				"",
472 				(uint32_t)info_type,
473 				(uint32_t)last_error);
474 			goto clean_up;
475 		}
476 	}
477 	/* 2. Allocate memory for the information structure */
478 	infos = HeapAlloc(heap, 0, info_size);
479 	if (infos == NULL) {
480 		cpuinfo_log_error(
481 			"failed to allocate %" PRIu32 " bytes for logical processor information", (uint32_t)info_size);
482 		goto clean_up;
483 	}
484 	/* 3. Read the information structure */
485 	if (GetLogicalProcessorInformationEx(info_type, infos, &info_size) == FALSE) {
486 		cpuinfo_log_error(
487 			"failed to query processor %" PRIu32 " information: error %" PRIu32 "",
488 			(uint32_t)info_type,
489 			(uint32_t)GetLastError());
490 		goto clean_up;
491 	}
492 
493 	/* 4. Parse the structure and store relevant data */
494 	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info_end =
495 		(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)((uintptr_t)infos + info_size);
496 	for (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = infos; info < info_end;
497 	     info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)((uintptr_t)info + info->Size)) {
498 		if (info->Relationship != info_type) {
499 			cpuinfo_log_warning(
500 				"unexpected processor info type (%" PRIu32 ") for processor information",
501 				(uint32_t)info->Relationship);
502 			continue;
503 		}
504 
505 		const uint32_t info_id = nr_of_structs++;
506 
507 		switch (info_type) {
508 			case RelationProcessorPackage:
509 				result = parse_relation_processor_info(
510 					processors,
511 					number_of_processors,
512 					global_proc_index_per_group,
513 					info,
514 					info_id,
515 					cores,
516 					chip_info);
517 				break;
518 			case RelationProcessorCore:
519 				result = parse_relation_processor_info(
520 					processors,
521 					number_of_processors,
522 					global_proc_index_per_group,
523 					info,
524 					info_id,
525 					cores,
526 					chip_info);
527 				break;
528 			case RelationCache:
529 				result = parse_relation_cache_info(
530 					processors, caches, numbers_of_caches, global_proc_index_per_group, info);
531 				break;
532 			default:
533 				cpuinfo_log_error(
534 					"unexpected processor info type (%" PRIu32 ") for processor information",
535 					(uint32_t)info->Relationship);
536 				result = false;
537 				break;
538 		}
539 		if (!result) {
540 			nr_of_structs = 0;
541 			goto clean_up;
542 		}
543 	}
544 clean_up:
545 	/* 5. Release dynamically allocated info structure. */
546 	HeapFree(heap, 0, infos);
547 	infos = NULL;
548 	return nr_of_structs;
549 }
550 
parse_relation_processor_info(struct cpuinfo_processor * processors,uint32_t nr_of_processors,const uint32_t * global_proc_index_per_group,PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,const uint32_t info_id,struct cpuinfo_core * cores,const struct woa_chip_info * chip_info)551 static bool parse_relation_processor_info(
552 	struct cpuinfo_processor* processors,
553 	uint32_t nr_of_processors,
554 	const uint32_t* global_proc_index_per_group,
555 	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
556 	const uint32_t info_id,
557 	struct cpuinfo_core* cores,
558 	const struct woa_chip_info* chip_info) {
559 	for (uint32_t i = 0; i < info->Processor.GroupCount; i++) {
560 		const uint32_t group_id = info->Processor.GroupMask[i].Group;
561 		/* Bitmask representing processors in this group belonging to
562 		 * this package
563 		 */
564 		KAFFINITY group_processors_mask = info->Processor.GroupMask[i].Mask;
565 		while (group_processors_mask != 0) {
566 			const uint32_t processor_id_in_group = low_index_from_kaffinity(group_processors_mask);
567 			const uint32_t processor_global_index =
568 				global_proc_index_per_group[group_id] + processor_id_in_group;
569 
570 			if (processor_global_index >= nr_of_processors) {
571 				cpuinfo_log_error("unexpected processor index %" PRIu32 "", processor_global_index);
572 				return false;
573 			}
574 
575 			switch (info->Relationship) {
576 				case RelationProcessorPackage:
577 					store_package_info_per_processor(
578 						processors,
579 						processor_global_index,
580 						info_id,
581 						group_id,
582 						processor_id_in_group);
583 					break;
584 				case RelationProcessorCore:
585 					store_core_info_per_processor(
586 						processors, processor_global_index, info_id, info, cores, chip_info);
587 					break;
588 				default:
589 					cpuinfo_log_error(
590 						"unexpected processor info type (%" PRIu32
591 						") for processor information",
592 						(uint32_t)info->Relationship);
593 					break;
594 			}
595 			/* Clear the bits in affinity mask, lower the least set
596 			 * bit. */
597 			group_processors_mask &= (group_processors_mask - 1);
598 		}
599 	}
600 	return true;
601 }
602 
parse_relation_cache_info(struct cpuinfo_processor * processors,struct cpuinfo_cache * caches,uint32_t * numbers_of_caches,const uint32_t * global_proc_index_per_group,PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info)603 static bool parse_relation_cache_info(
604 	struct cpuinfo_processor* processors,
605 	struct cpuinfo_cache* caches,
606 	uint32_t* numbers_of_caches,
607 	const uint32_t* global_proc_index_per_group,
608 	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info) {
609 	static uint32_t l1i_counter = 0;
610 	static uint32_t l1d_counter = 0;
611 	static uint32_t l2_counter = 0;
612 	static uint32_t l3_counter = 0;
613 
614 	/* Count cache types for allocation at first. */
615 	if (caches == NULL) {
616 		switch (info->Cache.Level) {
617 			case 1:
618 				switch (info->Cache.Type) {
619 					case CacheInstruction:
620 						numbers_of_caches[cpuinfo_cache_level_1i]++;
621 						break;
622 					case CacheData:
623 						numbers_of_caches[cpuinfo_cache_level_1d]++;
624 						break;
625 					case CacheUnified:
626 						break;
627 					case CacheTrace:
628 						break;
629 					default:
630 						break;
631 				}
632 				break;
633 			case 2:
634 				numbers_of_caches[cpuinfo_cache_level_2]++;
635 				break;
636 			case 3:
637 				numbers_of_caches[cpuinfo_cache_level_3]++;
638 				break;
639 		}
640 		return true;
641 	}
642 	struct cpuinfo_cache* l1i_base = caches;
643 	struct cpuinfo_cache* l1d_base = l1i_base + numbers_of_caches[cpuinfo_cache_level_1i];
644 	struct cpuinfo_cache* l2_base = l1d_base + numbers_of_caches[cpuinfo_cache_level_1d];
645 	struct cpuinfo_cache* l3_base = l2_base + numbers_of_caches[cpuinfo_cache_level_2];
646 
647 	cpuinfo_log_debug(
648 		"info->Cache.GroupCount:%" PRIu32 ", info->Cache.GroupMask:%" PRIu32
649 		","
650 		"info->Cache.Level:%" PRIu32 ", info->Cache.Associativity:%" PRIu32
651 		","
652 		"info->Cache.LineSize:%" PRIu32
653 		","
654 		"info->Cache.CacheSize:%" PRIu32 ", info->Cache.Type:%" PRIu32 "",
655 		info->Cache.GroupCount,
656 		(unsigned int)info->Cache.GroupMask.Mask,
657 		info->Cache.Level,
658 		info->Cache.Associativity,
659 		info->Cache.LineSize,
660 		info->Cache.CacheSize,
661 		info->Cache.Type);
662 
663 	struct cpuinfo_cache* current_cache = NULL;
664 	switch (info->Cache.Level) {
665 		case 1:
666 			switch (info->Cache.Type) {
667 				case CacheInstruction:
668 					current_cache = l1i_base + l1i_counter;
669 					l1i_counter++;
670 					break;
671 				case CacheData:
672 					current_cache = l1d_base + l1d_counter;
673 					l1d_counter++;
674 					break;
675 				case CacheUnified:
676 					break;
677 				case CacheTrace:
678 					break;
679 				default:
680 					break;
681 			}
682 			break;
683 		case 2:
684 			current_cache = l2_base + l2_counter;
685 			l2_counter++;
686 			break;
687 		case 3:
688 			current_cache = l3_base + l3_counter;
689 			l3_counter++;
690 			break;
691 	}
692 	current_cache->size = info->Cache.CacheSize;
693 	current_cache->line_size = info->Cache.LineSize;
694 	current_cache->associativity = info->Cache.Associativity;
695 	/* We don't have partition and set information of caches on Windows,
696 	 * so we set partitions to 1 and calculate the expected sets.
697 	 */
698 	current_cache->partitions = 1;
699 	current_cache->sets = current_cache->size / current_cache->line_size / current_cache->associativity;
700 	if (info->Cache.Type == CacheUnified) {
701 		current_cache->flags = CPUINFO_CACHE_UNIFIED;
702 	}
703 
704 	for (uint32_t i = 0; i < info->Cache.GroupCount; i++) {
705 		/* Zero GroupCount is valid, GroupMask still can store bits set.
706 		 */
707 		const uint32_t group_id = info->Cache.GroupMasks[i].Group;
708 		/* Bitmask representing processors in this group belonging to
709 		 * this package
710 		 */
711 		KAFFINITY group_processors_mask = info->Cache.GroupMasks[i].Mask;
712 		while (group_processors_mask != 0) {
713 			const uint32_t processor_id_in_group = low_index_from_kaffinity(group_processors_mask);
714 			const uint32_t processor_global_index =
715 				global_proc_index_per_group[group_id] + processor_id_in_group;
716 
717 			store_cache_info_per_processor(processors, processor_global_index, info, current_cache);
718 
719 			/* Clear the bits in affinity mask, lower the least set
720 			 * bit. */
721 			group_processors_mask &= (group_processors_mask - 1);
722 		}
723 	}
724 	return true;
725 }
726 
store_package_info_per_processor(struct cpuinfo_processor * processors,const uint32_t processor_global_index,const uint32_t package_id,const uint32_t group_id,const uint32_t processor_id_in_group)727 static void store_package_info_per_processor(
728 	struct cpuinfo_processor* processors,
729 	const uint32_t processor_global_index,
730 	const uint32_t package_id,
731 	const uint32_t group_id,
732 	const uint32_t processor_id_in_group) {
733 	processors[processor_global_index].windows_group_id = (uint16_t)group_id;
734 	processors[processor_global_index].windows_processor_id = (uint16_t)processor_id_in_group;
735 
736 	/* As we're counting the number of packages now, we haven't allocated
737 	 * memory for cpuinfo_packages yet, so we only set the package pointer's
738 	 * offset now.
739 	 */
740 	processors[processor_global_index].package = (const struct cpuinfo_package*)NULL + package_id;
741 }
742 
store_core_info_per_processor(struct cpuinfo_processor * processors,const uint32_t processor_global_index,const uint32_t core_id,PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX core_info,struct cpuinfo_core * cores,const struct woa_chip_info * chip_info)743 void store_core_info_per_processor(
744 	struct cpuinfo_processor* processors,
745 	const uint32_t processor_global_index,
746 	const uint32_t core_id,
747 	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX core_info,
748 	struct cpuinfo_core* cores,
749 	const struct woa_chip_info* chip_info) {
750 	if (cores) {
751 		processors[processor_global_index].core = cores + core_id;
752 		cores[core_id].core_id = core_id;
753 		get_core_uarch_for_efficiency(
754 			chip_info->chip_name,
755 			core_info->Processor.EfficiencyClass,
756 			&(cores[core_id].uarch),
757 			&(cores[core_id].frequency));
758 
759 		/* We don't have cluster information, so we handle it as
760 		 * fixed 1 to (cluster / cores).
761 		 * Set the cluster offset ID now, as soon as we have the
762 		 * cluster base address, we'll set the absolute address.
763 		 */
764 		processors[processor_global_index].cluster = (const struct cpuinfo_cluster*)NULL + core_id;
765 	}
766 }
767 
store_cache_info_per_processor(struct cpuinfo_processor * processors,const uint32_t processor_global_index,PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,struct cpuinfo_cache * current_cache)768 static void store_cache_info_per_processor(
769 	struct cpuinfo_processor* processors,
770 	const uint32_t processor_global_index,
771 	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
772 	struct cpuinfo_cache* current_cache) {
773 	if (current_cache->processor_start > processor_global_index) {
774 		current_cache->processor_start = processor_global_index;
775 	}
776 	current_cache->processor_count++;
777 
778 	switch (info->Cache.Level) {
779 		case 1:
780 			switch (info->Cache.Type) {
781 				case CacheInstruction:
782 					processors[processor_global_index].cache.l1i = current_cache;
783 					break;
784 				case CacheData:
785 					processors[processor_global_index].cache.l1d = current_cache;
786 					break;
787 				case CacheUnified:
788 					break;
789 				case CacheTrace:
790 					break;
791 				default:
792 					break;
793 			}
794 			break;
795 		case 2:
796 			processors[processor_global_index].cache.l2 = current_cache;
797 			break;
798 		case 3:
799 			processors[processor_global_index].cache.l3 = current_cache;
800 			break;
801 	}
802 }
803 
connect_packages_cores_clusters_by_processors(struct cpuinfo_processor * processors,const uint32_t nr_of_processors,struct cpuinfo_package * packages,const uint32_t nr_of_packages,struct cpuinfo_cluster * clusters,struct cpuinfo_core * cores,const uint32_t nr_of_cores,const struct woa_chip_info * chip_info,enum cpuinfo_vendor vendor)804 static bool connect_packages_cores_clusters_by_processors(
805 	struct cpuinfo_processor* processors,
806 	const uint32_t nr_of_processors,
807 	struct cpuinfo_package* packages,
808 	const uint32_t nr_of_packages,
809 	struct cpuinfo_cluster* clusters,
810 	struct cpuinfo_core* cores,
811 	const uint32_t nr_of_cores,
812 	const struct woa_chip_info* chip_info,
813 	enum cpuinfo_vendor vendor) {
814 	/* Adjust core and package pointers for all logical processors. */
815 	for (uint32_t i = nr_of_processors; i != 0; i--) {
816 		const uint32_t processor_id = i - 1;
817 		struct cpuinfo_processor* processor = processors + processor_id;
818 
819 		struct cpuinfo_core* core = (struct cpuinfo_core*)processor->core;
820 
821 		/* We stored the offset of pointers when we haven't allocated
822 		 * memory for packages and clusters, so now add offsets to base
823 		 * addresses.
824 		 */
825 		struct cpuinfo_package* package =
826 			(struct cpuinfo_package*)((uintptr_t)packages + (uintptr_t)processor->package);
827 		if (package < packages || package >= (packages + nr_of_packages)) {
828 			cpuinfo_log_error("invalid package indexing");
829 			return false;
830 		}
831 		processor->package = package;
832 
833 		struct cpuinfo_cluster* cluster =
834 			(struct cpuinfo_cluster*)((uintptr_t)clusters + (uintptr_t)processor->cluster);
835 		if (cluster < clusters || cluster >= (clusters + nr_of_cores)) {
836 			cpuinfo_log_error("invalid cluster indexing");
837 			return false;
838 		}
839 		processor->cluster = cluster;
840 
841 		if (chip_info) {
842 			size_t converted_chars = 0;
843 			if (!WideCharToMultiByte(
844 				    CP_UTF8,
845 				    WC_ERR_INVALID_CHARS,
846 				    chip_info->chip_name_string,
847 				    -1,
848 				    package->name,
849 				    CPUINFO_PACKAGE_NAME_MAX,
850 				    NULL,
851 				    NULL)) {
852 				cpuinfo_log_error("cpu name character conversion error");
853 				return false;
854 			};
855 		}
856 
857 		/* Set start indexes and counts per packages / clusters / cores
858 		 * - going backwards */
859 
860 		/* This can be overwritten by lower-index processors on the same
861 		 * package. */
862 		package->processor_start = processor_id;
863 		package->processor_count++;
864 
865 		/* This can be overwritten by lower-index processors on the same
866 		 * cluster. */
867 		cluster->processor_start = processor_id;
868 		cluster->processor_count++;
869 
870 		/* This can be overwritten by lower-index processors on the same
871 		 * core. */
872 		core->processor_start = processor_id;
873 		core->processor_count++;
874 	}
875 	/* Fill cores */
876 	for (uint32_t i = nr_of_cores; i != 0; i--) {
877 		const uint32_t global_core_id = i - 1;
878 		struct cpuinfo_core* core = cores + global_core_id;
879 		const struct cpuinfo_processor* processor = processors + core->processor_start;
880 		struct cpuinfo_package* package = (struct cpuinfo_package*)processor->package;
881 		struct cpuinfo_cluster* cluster = (struct cpuinfo_cluster*)processor->cluster;
882 
883 		core->package = package;
884 		core->cluster = cluster;
885 		core->vendor = vendor;
886 
887 		/* This can be overwritten by lower-index cores on the same
888 		 * cluster/package.
889 		 */
890 		cluster->core_start = global_core_id;
891 		cluster->core_count++;
892 		package->core_start = global_core_id;
893 		package->core_count++;
894 		package->cluster_start = global_core_id;
895 		package->cluster_count = package->core_count;
896 
897 		cluster->package = package;
898 		cluster->vendor = cores[cluster->core_start].vendor;
899 		cluster->uarch = cores[cluster->core_start].uarch;
900 		cluster->frequency = cores[cluster->core_start].frequency;
901 	}
902 	return true;
903 }
904 
low_index_from_kaffinity(KAFFINITY kaffinity)905 static inline uint32_t low_index_from_kaffinity(KAFFINITY kaffinity) {
906 	unsigned long index;
907 	_BitScanForward64(&index, (unsigned __int64)kaffinity);
908 	return (uint32_t)index;
909 }
910