• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Arch specific cpu topology information
4  *
5  * Copyright (C) 2016, ARM Ltd.
6  * Written by: Juri Lelli, ARM Ltd.
7  */
8 
9 #include <linux/acpi.h>
10 #include <linux/cacheinfo.h>
11 #include <linux/cpu.h>
12 #include <linux/cpufreq.h>
13 #include <linux/device.h>
14 #include <linux/of.h>
15 #include <linux/slab.h>
16 #include <linux/sched/topology.h>
17 #include <linux/cpuset.h>
18 #include <linux/cpumask.h>
19 #include <linux/init.h>
20 #include <linux/rcupdate.h>
21 #include <linux/sched.h>
22 
23 #define CREATE_TRACE_POINTS
24 #include <trace/events/thermal_pressure.h>
25 
26 #undef CREATE_TRACE_POINTS
27 #include <trace/hooks/sched.h>
28 #include <trace/hooks/topology.h>
29 
30 static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
31 static struct cpumask scale_freq_counters_mask;
32 static bool scale_freq_invariant;
33 static DEFINE_PER_CPU(u32, freq_factor) = 1;
34 
supports_scale_freq_counters(const struct cpumask * cpus)35 static bool supports_scale_freq_counters(const struct cpumask *cpus)
36 {
37 	bool use_amu_fie = true;
38 
39 	trace_android_vh_use_amu_fie(&use_amu_fie);
40 	if (!use_amu_fie)
41 		return false;
42 
43 	return cpumask_subset(cpus, &scale_freq_counters_mask);
44 }
45 
topology_scale_freq_invariant(void)46 bool topology_scale_freq_invariant(void)
47 {
48 	return cpufreq_supports_freq_invariance() ||
49 	       supports_scale_freq_counters(cpu_online_mask);
50 }
51 
update_scale_freq_invariant(bool status)52 static void update_scale_freq_invariant(bool status)
53 {
54 	if (scale_freq_invariant == status)
55 		return;
56 
57 	/*
58 	 * Task scheduler behavior depends on frequency invariance support,
59 	 * either cpufreq or counter driven. If the support status changes as
60 	 * a result of counter initialisation and use, retrigger the build of
61 	 * scheduling domains to ensure the information is propagated properly.
62 	 */
63 	if (topology_scale_freq_invariant() == status) {
64 		scale_freq_invariant = status;
65 		rebuild_sched_domains_energy();
66 	}
67 }
68 
topology_set_scale_freq_source(struct scale_freq_data * data,const struct cpumask * cpus)69 void topology_set_scale_freq_source(struct scale_freq_data *data,
70 				    const struct cpumask *cpus)
71 {
72 	struct scale_freq_data *sfd;
73 	int cpu;
74 
75 	/*
76 	 * Avoid calling rebuild_sched_domains() unnecessarily if FIE is
77 	 * supported by cpufreq.
78 	 */
79 	if (cpumask_empty(&scale_freq_counters_mask))
80 		scale_freq_invariant = topology_scale_freq_invariant();
81 
82 	rcu_read_lock();
83 
84 	for_each_cpu(cpu, cpus) {
85 		sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu));
86 
87 		/* Use ARCH provided counters whenever possible */
88 		if (!sfd || sfd->source != SCALE_FREQ_SOURCE_ARCH) {
89 			rcu_assign_pointer(per_cpu(sft_data, cpu), data);
90 			cpumask_set_cpu(cpu, &scale_freq_counters_mask);
91 		}
92 	}
93 
94 	rcu_read_unlock();
95 
96 	update_scale_freq_invariant(true);
97 }
98 EXPORT_SYMBOL_GPL(topology_set_scale_freq_source);
99 
topology_clear_scale_freq_source(enum scale_freq_source source,const struct cpumask * cpus)100 void topology_clear_scale_freq_source(enum scale_freq_source source,
101 				      const struct cpumask *cpus)
102 {
103 	struct scale_freq_data *sfd;
104 	int cpu;
105 
106 	rcu_read_lock();
107 
108 	for_each_cpu(cpu, cpus) {
109 		sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu));
110 
111 		if (sfd && sfd->source == source) {
112 			rcu_assign_pointer(per_cpu(sft_data, cpu), NULL);
113 			cpumask_clear_cpu(cpu, &scale_freq_counters_mask);
114 		}
115 	}
116 
117 	rcu_read_unlock();
118 
119 	/*
120 	 * Make sure all references to previous sft_data are dropped to avoid
121 	 * use-after-free races.
122 	 */
123 	synchronize_rcu();
124 
125 	update_scale_freq_invariant(false);
126 }
127 EXPORT_SYMBOL_GPL(topology_clear_scale_freq_source);
128 
topology_scale_freq_tick(void)129 void topology_scale_freq_tick(void)
130 {
131 	struct scale_freq_data *sfd = rcu_dereference_sched(*this_cpu_ptr(&sft_data));
132 
133 	if (sfd)
134 		sfd->set_freq_scale();
135 }
136 
137 DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
138 EXPORT_PER_CPU_SYMBOL_GPL(arch_freq_scale);
139 
topology_set_freq_scale(const struct cpumask * cpus,unsigned long cur_freq,unsigned long max_freq)140 void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
141 			     unsigned long max_freq)
142 {
143 	unsigned long scale;
144 	int i;
145 
146 	if (WARN_ON_ONCE(!cur_freq || !max_freq))
147 		return;
148 
149 	/*
150 	 * If the use of counters for FIE is enabled, just return as we don't
151 	 * want to update the scale factor with information from CPUFREQ.
152 	 * Instead the scale factor will be updated from arch_scale_freq_tick.
153 	 */
154 	if (supports_scale_freq_counters(cpus))
155 		return;
156 
157 	scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
158 
159 	trace_android_vh_arch_set_freq_scale(cpus, cur_freq, max_freq, &scale);
160 
161 	for_each_cpu(i, cpus)
162 		per_cpu(arch_freq_scale, i) = scale;
163 }
164 
165 DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
166 EXPORT_PER_CPU_SYMBOL_GPL(cpu_scale);
167 
topology_set_cpu_scale(unsigned int cpu,unsigned long capacity)168 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
169 {
170 	per_cpu(cpu_scale, cpu) = capacity;
171 }
172 
173 DEFINE_PER_CPU(unsigned long, thermal_pressure);
174 EXPORT_PER_CPU_SYMBOL_GPL(thermal_pressure);
175 
176 /**
177  * topology_update_thermal_pressure() - Update thermal pressure for CPUs
178  * @cpus        : The related CPUs for which capacity has been reduced
179  * @capped_freq : The maximum allowed frequency that CPUs can run at
180  *
181  * Update the value of thermal pressure for all @cpus in the mask. The
182  * cpumask should include all (online+offline) affected CPUs, to avoid
183  * operating on stale data when hot-plug is used for some CPUs. The
184  * @capped_freq reflects the currently allowed max CPUs frequency due to
185  * thermal capping. It might be also a boost frequency value, which is bigger
186  * than the internal 'freq_factor' max frequency. In such case the pressure
187  * value should simply be removed, since this is an indication that there is
188  * no thermal throttling. The @capped_freq must be provided in kHz.
189  */
topology_update_thermal_pressure(const struct cpumask * cpus,unsigned long capped_freq)190 void topology_update_thermal_pressure(const struct cpumask *cpus,
191 				      unsigned long capped_freq)
192 {
193 	unsigned long max_capacity, capacity, th_pressure;
194 	u32 max_freq;
195 	int cpu;
196 
197 	cpu = cpumask_first(cpus);
198 	max_capacity = arch_scale_cpu_capacity(cpu);
199 	max_freq = per_cpu(freq_factor, cpu);
200 
201 	/* Convert to MHz scale which is used in 'freq_factor' */
202 	capped_freq /= 1000;
203 
204 	/*
205 	 * Handle properly the boost frequencies, which should simply clean
206 	 * the thermal pressure value.
207 	 */
208 	if (max_freq <= capped_freq)
209 		capacity = max_capacity;
210 	else
211 		capacity = mult_frac(max_capacity, capped_freq, max_freq);
212 
213 	th_pressure = max_capacity - capacity;
214 
215 	trace_thermal_pressure_update(cpu, th_pressure);
216 
217 	for_each_cpu(cpu, cpus) {
218 		WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
219 		trace_android_rvh_update_thermal_stats(cpu);
220 	}
221 
222 }
223 EXPORT_SYMBOL_GPL(topology_update_thermal_pressure);
224 
cpu_capacity_show(struct device * dev,struct device_attribute * attr,char * buf)225 static ssize_t cpu_capacity_show(struct device *dev,
226 				 struct device_attribute *attr,
227 				 char *buf)
228 {
229 	struct cpu *cpu = container_of(dev, struct cpu, dev);
230 
231 	return sysfs_emit(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
232 }
233 
234 static void update_topology_flags_workfn(struct work_struct *work);
235 static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
236 
237 static DEVICE_ATTR_RO(cpu_capacity);
238 
register_cpu_capacity_sysctl(void)239 static int register_cpu_capacity_sysctl(void)
240 {
241 	int i;
242 	struct device *cpu;
243 
244 	for_each_possible_cpu(i) {
245 		cpu = get_cpu_device(i);
246 		if (!cpu) {
247 			pr_err("%s: too early to get CPU%d device!\n",
248 			       __func__, i);
249 			continue;
250 		}
251 		device_create_file(cpu, &dev_attr_cpu_capacity);
252 	}
253 
254 	return 0;
255 }
256 subsys_initcall(register_cpu_capacity_sysctl);
257 
258 static int update_topology;
259 bool topology_update_done;
260 EXPORT_SYMBOL_GPL(topology_update_done);
261 
topology_update_cpu_topology(void)262 int topology_update_cpu_topology(void)
263 {
264 	return update_topology;
265 }
266 
267 /*
268  * Updating the sched_domains can't be done directly from cpufreq callbacks
269  * due to locking, so queue the work for later.
270  */
update_topology_flags_workfn(struct work_struct * work)271 static void update_topology_flags_workfn(struct work_struct *work)
272 {
273 	update_topology = 1;
274 	rebuild_sched_domains();
275 	topology_update_done = true;
276 	trace_android_vh_update_topology_flags_workfn(NULL);
277 	pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
278 	update_topology = 0;
279 }
280 
281 static u32 *raw_capacity;
282 
free_raw_capacity(void)283 static int free_raw_capacity(void)
284 {
285 	kfree(raw_capacity);
286 	raw_capacity = NULL;
287 
288 	return 0;
289 }
290 
topology_normalize_cpu_scale(void)291 void topology_normalize_cpu_scale(void)
292 {
293 	u64 capacity;
294 	u64 capacity_scale;
295 	int cpu;
296 
297 	if (!raw_capacity)
298 		return;
299 
300 	capacity_scale = 1;
301 	for_each_possible_cpu(cpu) {
302 		capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
303 		capacity_scale = max(capacity, capacity_scale);
304 	}
305 
306 	pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale);
307 	for_each_possible_cpu(cpu) {
308 		capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
309 		capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT,
310 			capacity_scale);
311 		topology_set_cpu_scale(cpu, capacity);
312 		pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
313 			cpu, topology_get_cpu_scale(cpu));
314 	}
315 }
316 
topology_parse_cpu_capacity(struct device_node * cpu_node,int cpu)317 bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
318 {
319 	struct clk *cpu_clk;
320 	static bool cap_parsing_failed;
321 	int ret;
322 	u32 cpu_capacity;
323 
324 	if (cap_parsing_failed)
325 		return false;
326 
327 	ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz",
328 				   &cpu_capacity);
329 	if (!ret) {
330 		if (!raw_capacity) {
331 			raw_capacity = kcalloc(num_possible_cpus(),
332 					       sizeof(*raw_capacity),
333 					       GFP_KERNEL);
334 			if (!raw_capacity) {
335 				cap_parsing_failed = true;
336 				return false;
337 			}
338 		}
339 		raw_capacity[cpu] = cpu_capacity;
340 		pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n",
341 			cpu_node, raw_capacity[cpu]);
342 
343 		/*
344 		 * Update freq_factor for calculating early boot cpu capacities.
345 		 * For non-clk CPU DVFS mechanism, there's no way to get the
346 		 * frequency value now, assuming they are running at the same
347 		 * frequency (by keeping the initial freq_factor value).
348 		 */
349 		cpu_clk = of_clk_get(cpu_node, 0);
350 		if (!PTR_ERR_OR_ZERO(cpu_clk)) {
351 			per_cpu(freq_factor, cpu) =
352 				clk_get_rate(cpu_clk) / 1000;
353 			clk_put(cpu_clk);
354 		}
355 	} else {
356 		if (raw_capacity) {
357 			pr_err("cpu_capacity: missing %pOF raw capacity\n",
358 				cpu_node);
359 			pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
360 		}
361 		cap_parsing_failed = true;
362 		free_raw_capacity();
363 	}
364 
365 	return !ret;
366 }
367 
368 #ifdef CONFIG_ACPI_CPPC_LIB
369 #include <acpi/cppc_acpi.h>
370 
topology_init_cpu_capacity_cppc(void)371 void topology_init_cpu_capacity_cppc(void)
372 {
373 	struct cppc_perf_caps perf_caps;
374 	int cpu;
375 
376 	if (likely(!acpi_cpc_valid()))
377 		return;
378 
379 	raw_capacity = kcalloc(num_possible_cpus(), sizeof(*raw_capacity),
380 			       GFP_KERNEL);
381 	if (!raw_capacity)
382 		return;
383 
384 	for_each_possible_cpu(cpu) {
385 		if (!cppc_get_perf_caps(cpu, &perf_caps) &&
386 		    (perf_caps.highest_perf >= perf_caps.nominal_perf) &&
387 		    (perf_caps.highest_perf >= perf_caps.lowest_perf)) {
388 			raw_capacity[cpu] = perf_caps.highest_perf;
389 			pr_debug("cpu_capacity: CPU%d cpu_capacity=%u (raw).\n",
390 				 cpu, raw_capacity[cpu]);
391 			continue;
392 		}
393 
394 		pr_err("cpu_capacity: CPU%d missing/invalid highest performance.\n", cpu);
395 		pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
396 		goto exit;
397 	}
398 
399 	topology_normalize_cpu_scale();
400 	schedule_work(&update_topology_flags_work);
401 	pr_debug("cpu_capacity: cpu_capacity initialization done\n");
402 
403 exit:
404 	free_raw_capacity();
405 }
406 #endif
407 
408 #ifdef CONFIG_CPU_FREQ
409 static cpumask_var_t cpus_to_visit;
410 static void parsing_done_workfn(struct work_struct *work);
411 static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
412 
413 static int
init_cpu_capacity_callback(struct notifier_block * nb,unsigned long val,void * data)414 init_cpu_capacity_callback(struct notifier_block *nb,
415 			   unsigned long val,
416 			   void *data)
417 {
418 	struct cpufreq_policy *policy = data;
419 	int cpu;
420 
421 	if (!raw_capacity)
422 		return 0;
423 
424 	if (val != CPUFREQ_CREATE_POLICY)
425 		return 0;
426 
427 	pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
428 		 cpumask_pr_args(policy->related_cpus),
429 		 cpumask_pr_args(cpus_to_visit));
430 
431 	cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
432 
433 	for_each_cpu(cpu, policy->related_cpus)
434 		per_cpu(freq_factor, cpu) = policy->cpuinfo.max_freq / 1000;
435 
436 	if (cpumask_empty(cpus_to_visit)) {
437 		topology_normalize_cpu_scale();
438 		schedule_work(&update_topology_flags_work);
439 		free_raw_capacity();
440 		pr_debug("cpu_capacity: parsing done\n");
441 		schedule_work(&parsing_done_work);
442 	}
443 
444 	return 0;
445 }
446 
447 static struct notifier_block init_cpu_capacity_notifier = {
448 	.notifier_call = init_cpu_capacity_callback,
449 };
450 
register_cpufreq_notifier(void)451 static int __init register_cpufreq_notifier(void)
452 {
453 	int ret;
454 
455 	/*
456 	 * On ACPI-based systems skip registering cpufreq notifier as cpufreq
457 	 * information is not needed for cpu capacity initialization.
458 	 */
459 	if (!acpi_disabled || !raw_capacity)
460 		return -EINVAL;
461 
462 	if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL))
463 		return -ENOMEM;
464 
465 	cpumask_copy(cpus_to_visit, cpu_possible_mask);
466 
467 	ret = cpufreq_register_notifier(&init_cpu_capacity_notifier,
468 					CPUFREQ_POLICY_NOTIFIER);
469 
470 	if (ret)
471 		free_cpumask_var(cpus_to_visit);
472 
473 	return ret;
474 }
475 core_initcall(register_cpufreq_notifier);
476 
parsing_done_workfn(struct work_struct * work)477 static void parsing_done_workfn(struct work_struct *work)
478 {
479 	cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
480 					 CPUFREQ_POLICY_NOTIFIER);
481 	free_cpumask_var(cpus_to_visit);
482 }
483 
484 #else
485 core_initcall(free_raw_capacity);
486 #endif
487 
488 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
489 /*
490  * This function returns the logic cpu number of the node.
491  * There are basically three kinds of return values:
492  * (1) logic cpu number which is > 0.
493  * (2) -ENODEV when the device tree(DT) node is valid and found in the DT but
494  * there is no possible logical CPU in the kernel to match. This happens
495  * when CONFIG_NR_CPUS is configure to be smaller than the number of
496  * CPU nodes in DT. We need to just ignore this case.
497  * (3) -1 if the node does not exist in the device tree
498  */
get_cpu_for_node(struct device_node * node)499 static int __init get_cpu_for_node(struct device_node *node)
500 {
501 	struct device_node *cpu_node;
502 	int cpu;
503 
504 	cpu_node = of_parse_phandle(node, "cpu", 0);
505 	if (!cpu_node)
506 		return -1;
507 
508 	cpu = of_cpu_node_to_id(cpu_node);
509 	if (cpu >= 0)
510 		topology_parse_cpu_capacity(cpu_node, cpu);
511 	else
512 		pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n",
513 			cpu_node, cpumask_pr_args(cpu_possible_mask));
514 
515 	of_node_put(cpu_node);
516 	return cpu;
517 }
518 
parse_core(struct device_node * core,int package_id,int cluster_id,int core_id)519 static int __init parse_core(struct device_node *core, int package_id,
520 			     int cluster_id, int core_id)
521 {
522 	char name[20];
523 	bool leaf = true;
524 	int i = 0;
525 	int cpu;
526 	struct device_node *t;
527 
528 	do {
529 		snprintf(name, sizeof(name), "thread%d", i);
530 		t = of_get_child_by_name(core, name);
531 		if (t) {
532 			leaf = false;
533 			cpu = get_cpu_for_node(t);
534 			if (cpu >= 0) {
535 				cpu_topology[cpu].package_id = package_id;
536 				cpu_topology[cpu].cluster_id = cluster_id;
537 				cpu_topology[cpu].core_id = core_id;
538 				cpu_topology[cpu].thread_id = i;
539 			} else if (cpu != -ENODEV) {
540 				pr_err("%pOF: Can't get CPU for thread\n", t);
541 				of_node_put(t);
542 				return -EINVAL;
543 			}
544 			of_node_put(t);
545 		}
546 		i++;
547 	} while (t);
548 
549 	cpu = get_cpu_for_node(core);
550 	if (cpu >= 0) {
551 		if (!leaf) {
552 			pr_err("%pOF: Core has both threads and CPU\n",
553 			       core);
554 			return -EINVAL;
555 		}
556 
557 		cpu_topology[cpu].package_id = package_id;
558 		cpu_topology[cpu].cluster_id = cluster_id;
559 		cpu_topology[cpu].core_id = core_id;
560 	} else if (leaf && cpu != -ENODEV) {
561 		pr_err("%pOF: Can't get CPU for leaf core\n", core);
562 		return -EINVAL;
563 	}
564 
565 	return 0;
566 }
567 
parse_cluster(struct device_node * cluster,int package_id,int cluster_id,int depth)568 static int __init parse_cluster(struct device_node *cluster, int package_id,
569 				int cluster_id, int depth)
570 {
571 	char name[20];
572 	bool leaf = true;
573 	bool has_cores = false;
574 	struct device_node *c;
575 	int core_id = 0;
576 	int i, ret;
577 
578 	/*
579 	 * First check for child clusters; we currently ignore any
580 	 * information about the nesting of clusters and present the
581 	 * scheduler with a flat list of them.
582 	 */
583 	i = 0;
584 	do {
585 		snprintf(name, sizeof(name), "cluster%d", i);
586 		c = of_get_child_by_name(cluster, name);
587 		if (c) {
588 			leaf = false;
589 			ret = parse_cluster(c, package_id, i, depth + 1);
590 			if (depth > 0)
591 				pr_warn("Topology for clusters of clusters not yet supported\n");
592 			of_node_put(c);
593 			if (ret != 0)
594 				return ret;
595 		}
596 		i++;
597 	} while (c);
598 
599 	/* Now check for cores */
600 	i = 0;
601 	do {
602 		snprintf(name, sizeof(name), "core%d", i);
603 		c = of_get_child_by_name(cluster, name);
604 		if (c) {
605 			has_cores = true;
606 
607 			if (depth == 0) {
608 				pr_err("%pOF: cpu-map children should be clusters\n",
609 				       c);
610 				of_node_put(c);
611 				return -EINVAL;
612 			}
613 
614 			if (leaf) {
615 				ret = parse_core(c, package_id, cluster_id,
616 						 core_id++);
617 			} else {
618 				pr_err("%pOF: Non-leaf cluster with core %s\n",
619 				       cluster, name);
620 				ret = -EINVAL;
621 			}
622 
623 			of_node_put(c);
624 			if (ret != 0)
625 				return ret;
626 		}
627 		i++;
628 	} while (c);
629 
630 	if (leaf && !has_cores)
631 		pr_warn("%pOF: empty cluster\n", cluster);
632 
633 	return 0;
634 }
635 
parse_socket(struct device_node * socket)636 static int __init parse_socket(struct device_node *socket)
637 {
638 	char name[20];
639 	struct device_node *c;
640 	bool has_socket = false;
641 	int package_id = 0, ret;
642 
643 	do {
644 		snprintf(name, sizeof(name), "socket%d", package_id);
645 		c = of_get_child_by_name(socket, name);
646 		if (c) {
647 			has_socket = true;
648 			ret = parse_cluster(c, package_id, -1, 0);
649 			of_node_put(c);
650 			if (ret != 0)
651 				return ret;
652 		}
653 		package_id++;
654 	} while (c);
655 
656 	if (!has_socket)
657 		ret = parse_cluster(socket, 0, -1, 0);
658 
659 	return ret;
660 }
661 
parse_dt_topology(void)662 static int __init parse_dt_topology(void)
663 {
664 	struct device_node *cn, *map;
665 	int ret = 0;
666 	int cpu;
667 
668 	cn = of_find_node_by_path("/cpus");
669 	if (!cn) {
670 		pr_err("No CPU information found in DT\n");
671 		return 0;
672 	}
673 
674 	/*
675 	 * When topology is provided cpu-map is essentially a root
676 	 * cluster with restricted subnodes.
677 	 */
678 	map = of_get_child_by_name(cn, "cpu-map");
679 	if (!map)
680 		goto out;
681 
682 	ret = parse_socket(map);
683 	if (ret != 0)
684 		goto out_map;
685 
686 	topology_normalize_cpu_scale();
687 
688 	/*
689 	 * Check that all cores are in the topology; the SMP code will
690 	 * only mark cores described in the DT as possible.
691 	 */
692 	for_each_possible_cpu(cpu)
693 		if (cpu_topology[cpu].package_id < 0) {
694 			ret = -EINVAL;
695 			break;
696 		}
697 
698 out_map:
699 	of_node_put(map);
700 out:
701 	of_node_put(cn);
702 	return ret;
703 }
704 #endif
705 
706 /*
707  * cpu topology table
708  */
709 struct cpu_topology cpu_topology[NR_CPUS];
710 EXPORT_SYMBOL_GPL(cpu_topology);
711 
cpu_coregroup_mask(int cpu)712 const struct cpumask *cpu_coregroup_mask(int cpu)
713 {
714 	const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));
715 
716 	/* Find the smaller of NUMA, core or LLC siblings */
717 	if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
718 		/* not numa in package, lets use the package siblings */
719 		core_mask = &cpu_topology[cpu].core_sibling;
720 	}
721 
722 	if (last_level_cache_is_valid(cpu)) {
723 		if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
724 			core_mask = &cpu_topology[cpu].llc_sibling;
725 	}
726 
727 	/*
728 	 * For systems with no shared cpu-side LLC but with clusters defined,
729 	 * extend core_mask to cluster_siblings. The sched domain builder will
730 	 * then remove MC as redundant with CLS if SCHED_CLUSTER is enabled.
731 	 */
732 	if (IS_ENABLED(CONFIG_SCHED_CLUSTER) &&
733 	    cpumask_subset(core_mask, &cpu_topology[cpu].cluster_sibling))
734 		core_mask = &cpu_topology[cpu].cluster_sibling;
735 
736 	return core_mask;
737 }
738 
cpu_clustergroup_mask(int cpu)739 const struct cpumask *cpu_clustergroup_mask(int cpu)
740 {
741 	/*
742 	 * Forbid cpu_clustergroup_mask() to span more or the same CPUs as
743 	 * cpu_coregroup_mask().
744 	 */
745 	if (cpumask_subset(cpu_coregroup_mask(cpu),
746 			   &cpu_topology[cpu].cluster_sibling))
747 		return topology_sibling_cpumask(cpu);
748 
749 	return &cpu_topology[cpu].cluster_sibling;
750 }
751 
update_siblings_masks(unsigned int cpuid)752 void update_siblings_masks(unsigned int cpuid)
753 {
754 	struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
755 	int cpu, ret;
756 
757 	ret = detect_cache_attributes(cpuid);
758 	if (ret && ret != -ENOENT)
759 		pr_info("Early cacheinfo failed, ret = %d\n", ret);
760 
761 	/* update core and thread sibling masks */
762 	for_each_online_cpu(cpu) {
763 		cpu_topo = &cpu_topology[cpu];
764 
765 		if (last_level_cache_is_shared(cpu, cpuid)) {
766 			cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
767 			cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
768 		}
769 
770 		if (cpuid_topo->package_id != cpu_topo->package_id)
771 			continue;
772 
773 		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
774 		cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
775 
776 		if (cpuid_topo->cluster_id != cpu_topo->cluster_id)
777 			continue;
778 
779 		if (cpuid_topo->cluster_id >= 0) {
780 			cpumask_set_cpu(cpu, &cpuid_topo->cluster_sibling);
781 			cpumask_set_cpu(cpuid, &cpu_topo->cluster_sibling);
782 		}
783 
784 		if (cpuid_topo->core_id != cpu_topo->core_id)
785 			continue;
786 
787 		cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
788 		cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
789 	}
790 }
791 
clear_cpu_topology(int cpu)792 static void clear_cpu_topology(int cpu)
793 {
794 	struct cpu_topology *cpu_topo = &cpu_topology[cpu];
795 
796 	cpumask_clear(&cpu_topo->llc_sibling);
797 	cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
798 
799 	cpumask_clear(&cpu_topo->cluster_sibling);
800 	cpumask_set_cpu(cpu, &cpu_topo->cluster_sibling);
801 
802 	cpumask_clear(&cpu_topo->core_sibling);
803 	cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
804 	cpumask_clear(&cpu_topo->thread_sibling);
805 	cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
806 }
807 
reset_cpu_topology(void)808 void __init reset_cpu_topology(void)
809 {
810 	unsigned int cpu;
811 
812 	for_each_possible_cpu(cpu) {
813 		struct cpu_topology *cpu_topo = &cpu_topology[cpu];
814 
815 		cpu_topo->thread_id = -1;
816 		cpu_topo->core_id = -1;
817 		cpu_topo->cluster_id = -1;
818 		cpu_topo->package_id = -1;
819 
820 		clear_cpu_topology(cpu);
821 	}
822 }
823 
remove_cpu_topology(unsigned int cpu)824 void remove_cpu_topology(unsigned int cpu)
825 {
826 	int sibling;
827 
828 	for_each_cpu(sibling, topology_core_cpumask(cpu))
829 		cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
830 	for_each_cpu(sibling, topology_sibling_cpumask(cpu))
831 		cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
832 	for_each_cpu(sibling, topology_cluster_cpumask(cpu))
833 		cpumask_clear_cpu(cpu, topology_cluster_cpumask(sibling));
834 	for_each_cpu(sibling, topology_llc_cpumask(cpu))
835 		cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
836 
837 	clear_cpu_topology(cpu);
838 }
839 
parse_acpi_topology(void)840 __weak int __init parse_acpi_topology(void)
841 {
842 	return 0;
843 }
844 
845 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
init_cpu_topology(void)846 void __init init_cpu_topology(void)
847 {
848 	int ret;
849 
850 	reset_cpu_topology();
851 	ret = parse_acpi_topology();
852 	if (!ret)
853 		ret = of_have_populated_dt() && parse_dt_topology();
854 
855 	if (ret) {
856 		/*
857 		 * Discard anything that was parsed if we hit an error so we
858 		 * don't use partial information.
859 		 */
860 		reset_cpu_topology();
861 		return;
862 	}
863 }
864 
store_cpu_topology(unsigned int cpuid)865 void store_cpu_topology(unsigned int cpuid)
866 {
867 	struct cpu_topology *cpuid_topo = &cpu_topology[cpuid];
868 
869 	if (cpuid_topo->package_id != -1)
870 		goto topology_populated;
871 
872 	cpuid_topo->thread_id = -1;
873 	cpuid_topo->core_id = cpuid;
874 	cpuid_topo->package_id = cpu_to_node(cpuid);
875 
876 	pr_debug("CPU%u: package %d core %d thread %d\n",
877 		 cpuid, cpuid_topo->package_id, cpuid_topo->core_id,
878 		 cpuid_topo->thread_id);
879 
880 topology_populated:
881 	update_siblings_masks(cpuid);
882 }
883 #endif
884