topology.c - OpenGrok cross reference for /kernel/linux/linux-5.10/kernel/sched/topology.c

Lines Matching +full:cpu +full:- +full:3
1 // SPDX-License-Identifier: GPL-2.0
34 static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,  in sched_domain_debug_one()  argument
37 	struct sched_group *group = sd->groups;  in sched_domain_debug_one()
38 	unsigned long flags = sd->flags;  in sched_domain_debug_one()
43 	printk(KERN_DEBUG "%*s domain-%d: ", level, "", level);  in sched_domain_debug_one()
45 	       cpumask_pr_args(sched_domain_span(sd)), sd->name);  in sched_domain_debug_one()
47 	if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {  in sched_domain_debug_one()
48 		printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu);  in sched_domain_debug_one()
50 	if (group && !cpumask_test_cpu(cpu, sched_group_span(group))) {  in sched_domain_debug_one()
51 		printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu);  in sched_domain_debug_one()
58 		if ((meta_flags & SDF_SHARED_CHILD) && sd->child &&  in sched_domain_debug_one()
59 		    !(sd->child->flags & flag))  in sched_domain_debug_one()
63 		if ((meta_flags & SDF_SHARED_PARENT) && sd->parent &&  in sched_domain_debug_one()
64 		    !(sd->parent->flags & flag))  in sched_domain_debug_one()
83 		if (!(sd->flags & SD_OVERLAP) &&  in sched_domain_debug_one()
93 				group->sgc->id,  in sched_domain_debug_one()
96 		if ((sd->flags & SD_OVERLAP) &&  in sched_domain_debug_one()
102 		if (group->sgc->capacity != SCHED_CAPACITY_SCALE)  in sched_domain_debug_one()
103 			printk(KERN_CONT " cap=%lu", group->sgc->capacity);  in sched_domain_debug_one()
105 		if (group == sd->groups && sd->child &&  in sched_domain_debug_one()
106 		    !cpumask_equal(sched_domain_span(sd->child),  in sched_domain_debug_one()
108 			printk(KERN_ERR "ERROR: domain->groups does not match domain->child\n");  in sched_domain_debug_one()
113 		group = group->next;  in sched_domain_debug_one()
115 		if (group != sd->groups)  in sched_domain_debug_one()
118 	} while (group != sd->groups);  in sched_domain_debug_one()
122 		printk(KERN_ERR "ERROR: groups don't span domain->span\n");  in sched_domain_debug_one()
124 	if (sd->parent &&  in sched_domain_debug_one()
125 	    !cpumask_subset(groupmask, sched_domain_span(sd->parent)))  in sched_domain_debug_one()
126 		printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n");  in sched_domain_debug_one()
130 static void sched_domain_debug(struct sched_domain *sd, int cpu)  in sched_domain_debug()  argument
138 		printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu);  in sched_domain_debug()
142 	printk(KERN_DEBUG "CPU%d attaching sched-domain(s):\n", cpu);  in sched_domain_debug()
145 		if (sched_domain_debug_one(sd, cpu, level, sched_domains_tmpmask))  in sched_domain_debug()
148 		sd = sd->parent;  in sched_domain_debug()
156 # define sched_domain_debug(sd, cpu) do { } while (0)  argument
176 	if ((sd->flags & SD_DEGENERATE_GROUPS_MASK) &&  in sd_degenerate()
177 	    (sd->groups != sd->groups->next))  in sd_degenerate()
181 	if (sd->flags & (SD_WAKE_AFFINE))  in sd_degenerate()
190 	unsigned long cflags = sd->flags, pflags = parent->flags;  in sd_parent_degenerate()
199 	if (parent->groups == parent->groups->next)  in sd_parent_degenerate()
221 		return -EPERM;  in sched_energy_aware_handler()
244 		tmp = pd->next;  in free_pd()
250 static struct perf_domain *find_pd(struct perf_domain *pd, int cpu)  in find_pd()  argument
253 		if (cpumask_test_cpu(cpu, perf_domain_span(pd)))  in find_pd()
255 		pd = pd->next;  in find_pd()
261 static struct perf_domain *pd_init(int cpu)  in pd_init()  argument
263 	struct em_perf_domain *obj = em_cpu_get(cpu);  in pd_init()
268 			pr_info("%s: no EM found for CPU%d\n", __func__, cpu);  in pd_init()
275 	pd->em_pd = obj;  in pd_init()
292 				em_pd_nr_perf_states(pd->em_pd));  in perf_domain_debug()
293 		pd = pd->next;  in perf_domain_debug()
324  *    3. no SMT is detected.
333  *  - nr_pd:    the number of performance domains
334  *  - nr_cpus:  the number of CPUs
335  *  - nr_ps:    the sum of the number of performance states of all performance
339  * It is generally not a good idea to use such a model in the wake-up path on
342  * with per-CPU DVFS and less than 8 performance states each, for example.
351 	int cpu = cpumask_first(cpu_map);  in build_perf_domains()  local
352 	struct root_domain *rd = cpu_rq(cpu)->rd;  in build_perf_domains()
359 	/* EAS is enabled for asymmetric CPU capacity topologies. */  in build_perf_domains()
360 	if (!per_cpu(sd_asym_cpucapacity, cpu)) {  in build_perf_domains()
384 		gov = policy->governor;  in build_perf_domains()
387 			if (rd->pd)  in build_perf_domains()
397 		tmp->next = pd;  in build_perf_domains()
405 		nr_ps += em_pd_nr_perf_states(pd->em_pd);  in build_perf_domains()
418 	tmp = rd->pd;  in build_perf_domains()
419 	rcu_assign_pointer(rd->pd, pd);  in build_perf_domains()
421 		call_rcu(&tmp->rcu, destroy_perf_domain_rcu);  in build_perf_domains()
427 	tmp = rd->pd;  in build_perf_domains()
428 	rcu_assign_pointer(rd->pd, NULL);  in build_perf_domains()
430 		call_rcu(&tmp->rcu, destroy_perf_domain_rcu);  in build_perf_domains()
442 	cpupri_cleanup(&rd->cpupri);  in free_rootdomain()
443 	cpudl_cleanup(&rd->cpudl);  in free_rootdomain()
444 	free_cpumask_var(rd->dlo_mask);  in free_rootdomain()
445 	free_cpumask_var(rd->rto_mask);  in free_rootdomain()
446 	free_cpumask_var(rd->online);  in free_rootdomain()
447 	free_cpumask_var(rd->span);  in free_rootdomain()
448 	free_pd(rd->pd);  in free_rootdomain()
457 	raw_spin_lock_irqsave(&rq->lock, flags);  in rq_attach_root()
459 	if (rq->rd) {  in rq_attach_root()
460 		old_rd = rq->rd;  in rq_attach_root()
462 		if (cpumask_test_cpu(rq->cpu, old_rd->online))  in rq_attach_root()
465 		cpumask_clear_cpu(rq->cpu, old_rd->span);  in rq_attach_root()
472 		if (!atomic_dec_and_test(&old_rd->refcount))  in rq_attach_root()
476 	atomic_inc(&rd->refcount);  in rq_attach_root()
477 	rq->rd = rd;  in rq_attach_root()
479 	cpumask_set_cpu(rq->cpu, rd->span);  in rq_attach_root()
480 	if (cpumask_test_cpu(rq->cpu, cpu_active_mask))  in rq_attach_root()
483 	raw_spin_unlock_irqrestore(&rq->lock, flags);  in rq_attach_root()
486 		call_rcu(&old_rd->rcu, free_rootdomain);  in rq_attach_root()
491 	atomic_inc(&rd->refcount);  in sched_get_rd()
496 	if (!atomic_dec_and_test(&rd->refcount))  in sched_put_rd()
499 	call_rcu(&rd->rcu, free_rootdomain);  in sched_put_rd()
504 	if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL))  in init_rootdomain()
506 	if (!zalloc_cpumask_var(&rd->online, GFP_KERNEL))  in init_rootdomain()
508 	if (!zalloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL))  in init_rootdomain()
510 	if (!zalloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))  in init_rootdomain()
514 	rd->rto_cpu = -1;  in init_rootdomain()
515 	raw_spin_lock_init(&rd->rto_lock);  in init_rootdomain()
516 	init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);  in init_rootdomain()
519 	init_dl_bw(&rd->dl_bw);  in init_rootdomain()
520 	if (cpudl_init(&rd->cpudl) != 0)  in init_rootdomain()
523 	if (cpupri_init(&rd->cpupri) != 0)  in init_rootdomain()
527 	rd->max_cap_orig_cpu = -1;  in init_rootdomain()
532 	cpudl_cleanup(&rd->cpudl);  in init_rootdomain()
534 	free_cpumask_var(rd->rto_mask);  in init_rootdomain()
536 	free_cpumask_var(rd->dlo_mask);  in init_rootdomain()
538 	free_cpumask_var(rd->online);  in init_rootdomain()
540 	free_cpumask_var(rd->span);  in init_rootdomain()
542 	return -ENOMEM;  in init_rootdomain()
546  * By default the system creates a single root-domain with all CPUs as
583 		tmp = sg->next;  in free_sched_groups()
585 		if (free_sgc && atomic_dec_and_test(&sg->sgc->ref))  in free_sched_groups()
586 			kfree(sg->sgc);  in free_sched_groups()
588 		if (atomic_dec_and_test(&sg->ref))  in free_sched_groups()
601 	free_sched_groups(sd->groups, 1);  in destroy_sched_domain()
603 	if (sd->shared && atomic_dec_and_test(&sd->shared->ref))  in destroy_sched_domain()
604 		kfree(sd->shared);  in destroy_sched_domain()
613 		struct sched_domain *parent = sd->parent;  in destroy_sched_domains_rcu()
622 		call_rcu(&sd->rcu, destroy_sched_domains_rcu);  in destroy_sched_domains()
630  * Also keep a unique ID per domain (we use the first CPU number in
643 static void update_top_cache_domain(int cpu)  in update_top_cache_domain()  argument
647 	int id = cpu;  in update_top_cache_domain()
650 	sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);  in update_top_cache_domain()
654 		sds = sd->shared;  in update_top_cache_domain()
657 	rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);  in update_top_cache_domain()
658 	per_cpu(sd_llc_size, cpu) = size;  in update_top_cache_domain()
659 	per_cpu(sd_llc_id, cpu) = id;  in update_top_cache_domain()
660 	rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds);  in update_top_cache_domain()
662 	sd = lowest_flag_domain(cpu, SD_NUMA);  in update_top_cache_domain()
663 	rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);  in update_top_cache_domain()
665 	sd = highest_flag_domain(cpu, SD_ASYM_PACKING);  in update_top_cache_domain()
666 	rcu_assign_pointer(per_cpu(sd_asym_packing, cpu), sd);  in update_top_cache_domain()
668 	sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY);  in update_top_cache_domain()
669 	rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd);  in update_top_cache_domain()
673  * Attach the domain 'sd' to 'cpu' as its base domain. Callers must
677 cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)  in cpu_attach_domain()  argument
679 	struct rq *rq = cpu_rq(cpu);  in cpu_attach_domain()
685 		struct sched_domain *parent = tmp->parent;  in cpu_attach_domain()
690 			tmp->parent = parent->parent;  in cpu_attach_domain()
691 			if (parent->parent)  in cpu_attach_domain()
692 				parent->parent->child = tmp;  in cpu_attach_domain()
698 			if (parent->flags & SD_PREFER_SIBLING)  in cpu_attach_domain()
699 				tmp->flags |= SD_PREFER_SIBLING;  in cpu_attach_domain()
702 			tmp = tmp->parent;  in cpu_attach_domain()
707 		sd = sd->parent;  in cpu_attach_domain()
710 			sd->child = NULL;  in cpu_attach_domain()
713 	for (tmp = sd; tmp; tmp = tmp->parent)  in cpu_attach_domain()
714 		numa_distance += !!(tmp->flags & SD_NUMA);  in cpu_attach_domain()
716 	sched_domain_debug(sd, cpu);  in cpu_attach_domain()
719 	tmp = rq->sd;  in cpu_attach_domain()
720 	rcu_assign_pointer(rq->sd, sd);  in cpu_attach_domain()
721 	dirty_sched_domain_sysctl(cpu);  in cpu_attach_domain()
724 	update_top_cache_domain(cpu);  in cpu_attach_domain()
740  * Return the canonical balance CPU for this group, this is the first CPU
757  * Given a node-distance table, for example:
759  *   node   0   1   2   3
763  *     3:  20  30  20  10
767  *   0 ----- 1
771  *   3 ----- 2
777  * For the above NUMA topology that gives 3 levels:
779  * NUMA-2	0-3		0-3		0-3		0-3
780  *  groups:	{0-1,3},{1-3}	{0-2},{0,2-3}	{1-3},{0-1,3}	{0,2-3},{0-2}
782  * NUMA-1	0-1,3		0-2		1-3		0,2-3
783  *  groups:	{0},{1},{3}	{0},{1},{2}	{1},{2},{3}	{0},{2},{3}
785  * NUMA-0	0		1		2		3
790  * represented multiple times -- hence the "overlap" naming for this part of
794  * domain. For instance Node-0 NUMA-2 would only get groups: 0-1,3 and 1-3.
798  *  - the first group of each domain is its child domain; this
799  *    gets us the first 0-1,3
800  *  - the only uncovered node is 2, who's child domain is 1-3.
802  * However, because of the overlap, computing a unique CPU for each group is
803  * more complicated. Consider for instance the groups of NODE-1 NUMA-2, both
804  * groups include the CPUs of Node-0, while those CPUs would not in fact ever
805  * end up at those groups (they would end up in group: 0-1,3).
820  *   node   0   1   2   3
824  *     3:  30  20  20  10
828  *   0 ----- 1
832  *   2 ----- 3
834  * This topology is asymmetric, nodes 1,2 are fully connected, but nodes 0,3
838  * not of the same number for each CPU. Consider:
840  * NUMA-2	0-3						0-3
841  *  groups:	{0-2},{1-3}					{1-3},{0-2}
843  * NUMA-1	0-2		0-3		0-3		1-3
845  * NUMA-0	0		1		2		3
863 	struct sd_data *sdd = sd->private;  in build_balance_mask()
870 		sibling = *per_cpu_ptr(sdd->sd, i);  in build_balance_mask()
877 		if (!sibling->child)  in build_balance_mask()
881 		if (!cpumask_equal(sg_span, sched_domain_span(sibling->child)))  in build_balance_mask()
892  * XXX: This creates per-node group entries; since the load-balancer will
893  * immediately access remote memory to construct this group's load-balance
897 build_group_from_child_sched_domain(struct sched_domain *sd, int cpu)  in build_group_from_child_sched_domain()  argument
903 			GFP_KERNEL, cpu_to_node(cpu));  in build_group_from_child_sched_domain()
909 	if (sd->child)  in build_group_from_child_sched_domain()
910 		cpumask_copy(sg_span, sched_domain_span(sd->child));  in build_group_from_child_sched_domain()
914 	atomic_inc(&sg->ref);  in build_group_from_child_sched_domain()
922 	struct sd_data *sdd = sd->private;  in init_overlap_sched_group()
924 	int cpu;  in init_overlap_sched_group()  local
927 	cpu = cpumask_first_and(sched_group_span(sg), mask);  in init_overlap_sched_group()
929 	sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);  in init_overlap_sched_group()
930 	if (atomic_inc_return(&sg->sgc->ref) == 1)  in init_overlap_sched_group()
936 	 * Initialize sgc->capacity such that even if we mess up the  in init_overlap_sched_group()
941 	sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);  in init_overlap_sched_group()
942 	sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;  in init_overlap_sched_group()
943 	sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;  in init_overlap_sched_group()
953 	while (sibling->child &&  in find_descended_sibling()
954 	       !cpumask_subset(sched_domain_span(sibling->child),  in find_descended_sibling()
956 		sibling = sibling->child;  in find_descended_sibling()
963 	while (sibling->child &&  in find_descended_sibling()
964 	       cpumask_equal(sched_domain_span(sibling->child),  in find_descended_sibling()
966 		sibling = sibling->child;  in find_descended_sibling()
972 build_overlap_sched_groups(struct sched_domain *sd, int cpu)  in build_overlap_sched_groups()  argument
977 	struct sd_data *sdd = sd->private;  in build_overlap_sched_groups()
983 	for_each_cpu_wrap(i, span, cpu) {  in build_overlap_sched_groups()
989 		sibling = *per_cpu_ptr(sdd->sd, i);  in build_overlap_sched_groups()
998 		 * Domains should always include the CPU they're built on, so  in build_overlap_sched_groups()
1006 		 * But for machines whose NUMA diameter are 3 or above, we move  in build_overlap_sched_groups()
1011 		 * Smallest diameter=3 topology is:  in build_overlap_sched_groups()
1013 		 *   node   0   1   2   3  in build_overlap_sched_groups()
1017 		 *     3:  40  30  20  10  in build_overlap_sched_groups()
1019 		 *   0 --- 1 --- 2 --- 3  in build_overlap_sched_groups()
1021 		 * NUMA-3       0-3             N/A             N/A             0-3  in build_overlap_sched_groups()
1022 		 *  groups:     {0-2},{1-3}                                     {1-3},{0-2}  in build_overlap_sched_groups()
1024 		 * NUMA-2       0-2             0-3             0-3             1-3  in build_overlap_sched_groups()
1025 		 *  groups:     {0-1},{1-3}     {0-2},{2-3}     {1-3},{0-1}     {2-3},{0-2}  in build_overlap_sched_groups()
1027 		 * NUMA-1       0-1             0-2             1-3             2-3  in build_overlap_sched_groups()
1028 		 *  groups:     {0},{1}         {1},{2},{0}     {2},{3},{1}     {3},{2}  in build_overlap_sched_groups()
1030 		 * NUMA-0       0               1               2               3  in build_overlap_sched_groups()
1032 		 * The NUMA-2 groups for nodes 0 and 3 are obviously buggered, as the  in build_overlap_sched_groups()
1035 		if (sibling->child &&  in build_overlap_sched_groups()
1036 		    !cpumask_subset(sched_domain_span(sibling->child), span))  in build_overlap_sched_groups()
1039 		sg = build_group_from_child_sched_domain(sibling, cpu);  in build_overlap_sched_groups()
1051 			last->next = sg;  in build_overlap_sched_groups()
1053 		last->next = first;  in build_overlap_sched_groups()
1055 	sd->groups = first;  in build_overlap_sched_groups()
1062 	return -ENOMEM;  in build_overlap_sched_groups()
1067  * Package topology (also see the load-balance blurb in fair.c)
1072  *  - Simultaneous multithreading (SMT)
1073  *  - Multi-Core Cache (MC)
1074  *  - Package (DIE)
1078  * The tree consists of 3 primary data structures:
1080  *	sched_domain -> sched_group -> sched_group_capacity
1082  *          `-'             `-'
1084  * The sched_domains are per-CPU and have a two way link (parent & child) and
1090  * CPU of that sched_domain [*].
1094  * CPU   0   1   2   3   4   5   6   7
1100  *  - or -
1102  * DIE  0-7 0-7 0-7 0-7 0-7 0-7 0-7 0-7
1103  * MC	0-3 0-3 0-3 0-3 4-7 4-7 4-7 4-7
1104  * SMT  0-1 0-1 2-3 2-3 4-5 4-5 6-7 6-7
1106  * CPU   0   1   2   3   4   5   6   7
1114  * There are two related construction problems, both require a CPU that
1117  *  - The first is the balance_cpu (see should_we_balance() and the
1118  *    load-balance blub in fair.c); for each group we only want 1 CPU to
1121  *  - The second is the sched_group_capacity; we want all identical groups
1127  * for each CPU in the hierarchy.
1129  * Therefore computing a unique CPU for each group is trivial (the iteration
1131  * group), we can simply pick the first CPU in each group.
1137 static struct sched_group *get_group(int cpu, struct sd_data *sdd)  in get_group()  argument
1139 	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);  in get_group()
1140 	struct sched_domain *child = sd->child;  in get_group()
1145 		cpu = cpumask_first(sched_domain_span(child));  in get_group()
1147 	sg = *per_cpu_ptr(sdd->sg, cpu);  in get_group()
1148 	sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);  in get_group()
1151 	already_visited = atomic_inc_return(&sg->ref) > 1;  in get_group()
1153 	WARN_ON(already_visited != (atomic_inc_return(&sg->sgc->ref) > 1));  in get_group()
1163 		cpumask_set_cpu(cpu, sched_group_span(sg));  in get_group()
1164 		cpumask_set_cpu(cpu, group_balance_mask(sg));  in get_group()
1167 	sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_span(sg));  in get_group()
1168 	sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;  in get_group()
1169 	sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;  in get_group()
1176  * covered by the given span, will set each group's ->cpumask correctly,
1177  * and will initialize their ->sgc.
1182 build_sched_groups(struct sched_domain *sd, int cpu)  in build_sched_groups()  argument
1185 	struct sd_data *sdd = sd->private;  in build_sched_groups()
1195 	for_each_cpu_wrap(i, span, cpu) {  in build_sched_groups()
1208 			last->next = sg;  in build_sched_groups()
1211 	last->next = first;  in build_sched_groups()
1212 	sd->groups = first;  in build_sched_groups()
1227 void init_sched_groups_capacity(int cpu, struct sched_domain *sd)  in init_sched_groups_capacity()  argument
1229 	struct sched_group *sg = sd->groups;  in init_sched_groups_capacity()
1237 		int cpu, max_cpu = -1;  in init_sched_groups_capacity()  local
1242 		sg->group_weight = cpumask_weight(&avail_mask);  in init_sched_groups_capacity()
1244 		sg->group_weight = cpumask_weight(sched_group_span(sg));  in init_sched_groups_capacity()
1247 		if (!(sd->flags & SD_ASYM_PACKING))  in init_sched_groups_capacity()
1250 		for_each_cpu(cpu, sched_group_span(sg)) {  in init_sched_groups_capacity()
1252 				max_cpu = cpu;  in init_sched_groups_capacity()
1253 			else if (sched_asym_prefer(cpu, max_cpu))  in init_sched_groups_capacity()
1254 				max_cpu = cpu;  in init_sched_groups_capacity()
1256 		sg->asym_prefer_cpu = max_cpu;  in init_sched_groups_capacity()
1259 		sg = sg->next;  in init_sched_groups_capacity()
1260 	} while (sg != sd->groups);  in init_sched_groups_capacity()
1262 	if (cpu != group_balance_cpu(sg))  in init_sched_groups_capacity()
1265 	update_group_capacity(sd, cpu);  in init_sched_groups_capacity()
1270  * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
1273 static int default_relax_domain_level = -1;
1290 	if (!attr || attr->relax_domain_level < 0) {  in set_domain_attribute()
1295 		request = attr->relax_domain_level;  in set_domain_attribute()
1297 	if (sd->level > request) {  in set_domain_attribute()
1299 		sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);  in set_domain_attribute()
1311 		if (!atomic_read(&d->rd->refcount))  in __free_domain_allocs()
1312 			free_rootdomain(&d->rd->rcu);  in __free_domain_allocs()
1315 		free_percpu(d->sd);  in __free_domain_allocs()
1332 	d->sd = alloc_percpu(struct sched_domain *);  in __visit_domain_allocation_hell()
1333 	if (!d->sd)  in __visit_domain_allocation_hell()
1335 	d->rd = alloc_rootdomain();  in __visit_domain_allocation_hell()
1336 	if (!d->rd)  in __visit_domain_allocation_hell()
1347 static void claim_allocations(int cpu, struct sched_domain *sd)  in claim_allocations()  argument
1349 	struct sd_data *sdd = sd->private;  in claim_allocations()
1351 	WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);  in claim_allocations()
1352 	*per_cpu_ptr(sdd->sd, cpu) = NULL;  in claim_allocations()
1354 	if (atomic_read(&(*per_cpu_ptr(sdd->sds, cpu))->ref))  in claim_allocations()
1355 		*per_cpu_ptr(sdd->sds, cpu) = NULL;  in claim_allocations()
1357 	if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))  in claim_allocations()
1358 		*per_cpu_ptr(sdd->sg, cpu) = NULL;  in claim_allocations()
1360 	if (atomic_read(&(*per_cpu_ptr(sdd->sgc, cpu))->ref))  in claim_allocations()
1361 		*per_cpu_ptr(sdd->sgc, cpu) = NULL;  in claim_allocations()
1383  *   SD_SHARE_CPUCAPACITY   - describes SMT topologies
1384  *   SD_SHARE_PKG_RESOURCES - describes shared caches
1385  *   SD_NUMA                - describes NUMA topologies
1390  *   SD_ASYM_PACKING        - describes SMT quirks
1401 	struct sched_domain *child, int dflags, int cpu)  in sd_init()  argument
1403 	struct sd_data *sdd = &tl->data;  in sd_init()
1404 	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);  in sd_init()
1411 	sched_domains_curr_level = tl->numa_level;  in sd_init()
1414 	sd_weight = cpumask_weight(tl->mask(cpu));  in sd_init()
1416 	if (tl->sd_flags)  in sd_init()
1417 		sd_flags = (*tl->sd_flags)();  in sd_init()
1452 		.name			= tl->name,  in sd_init()
1456 	cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));  in sd_init()
1464 	if ((sd->flags & SD_ASYM_CPUCAPACITY) && sd->child)  in sd_init()
1465 		sd->child->flags &= ~SD_PREFER_SIBLING;  in sd_init()
1467 	if (sd->flags & SD_SHARE_CPUCAPACITY) {  in sd_init()
1468 		sd->imbalance_pct = 110;  in sd_init()
1470 	} else if (sd->flags & SD_SHARE_PKG_RESOURCES) {  in sd_init()
1471 		sd->imbalance_pct = 117;  in sd_init()
1472 		sd->cache_nice_tries = 1;  in sd_init()
1475 	} else if (sd->flags & SD_NUMA) {  in sd_init()
1476 		sd->cache_nice_tries = 2;  in sd_init()
1478 		sd->flags &= ~SD_PREFER_SIBLING;  in sd_init()
1479 		sd->flags |= SD_SERIALIZE;  in sd_init()
1480 		if (sched_domains_numa_distance[tl->numa_level] > node_reclaim_distance) {  in sd_init()
1481 			sd->flags &= ~(SD_BALANCE_EXEC |  in sd_init()
1488 		sd->cache_nice_tries = 1;  in sd_init()
1495 	if (sd->flags & SD_SHARE_PKG_RESOURCES) {  in sd_init()
1496 		sd->shared = *per_cpu_ptr(sdd->sds, sd_id);  in sd_init()
1497 		atomic_inc(&sd->shared->ref);  in sd_init()
1498 		atomic_set(&sd->shared->nr_busy_cpus, sd_weight);  in sd_init()
1501 	sd->private = sdd;  in sd_init()
1507  * Topology list, bottom-up.
1524 	for (tl = sched_domain_topology; tl->mask; tl++)
1536 static const struct cpumask *sd_numa_mask(int cpu)  in sd_numa_mask()  argument
1538 	return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)];  in sd_numa_mask()
1590  * - If the maximum distance between any nodes is 1 hop, the system
1592  * - If for two nodes A and B, located N > 1 hops away from each other,
1639 	 * O(nr_nodes^2) deduplicating selection sort -- in order to find the  in sched_init_numa()
1721 					sched_numa_warn("Node-distance not symmetric");  in sched_init_numa()
1770 	sched_max_numa_distance = sched_domains_numa_distance[nr_levels - 1];  in sched_init_numa()
1775 void sched_domains_numa_masks_set(unsigned int cpu)  in sched_domains_numa_masks_set()  argument
1777 	int node = cpu_to_node(cpu);  in sched_domains_numa_masks_set()
1783 				cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]);  in sched_domains_numa_masks_set()
1788 void sched_domains_numa_masks_clear(unsigned int cpu)  in sched_domains_numa_masks_clear()  argument
1794 			cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);  in sched_domains_numa_masks_clear()
1799  * sched_numa_find_closest() - given the NUMA topology, find the cpu
1800  *                             closest to @cpu from @cpumask.
1801  * cpumask: cpumask to find a cpu from
1802  * cpu: cpu to be close to
1804  * returns: cpu, or nr_cpu_ids when nothing found.
1806 int sched_numa_find_closest(const struct cpumask *cpus, int cpu)  in sched_numa_find_closest()  argument
1808 	int i, j = cpu_to_node(cpu);  in sched_numa_find_closest()
1811 		cpu = cpumask_any_and(cpus, sched_domains_numa_masks[i][j]);  in sched_numa_find_closest()
1812 		if (cpu < nr_cpu_ids)  in sched_numa_find_closest()
1813 			return cpu;  in sched_numa_find_closest()
1826 		struct sd_data *sdd = &tl->data;  in __sdt_alloc()
1828 		sdd->sd = alloc_percpu(struct sched_domain *);  in __sdt_alloc()
1829 		if (!sdd->sd)  in __sdt_alloc()
1830 			return -ENOMEM;  in __sdt_alloc()
1832 		sdd->sds = alloc_percpu(struct sched_domain_shared *);  in __sdt_alloc()
1833 		if (!sdd->sds)  in __sdt_alloc()
1834 			return -ENOMEM;  in __sdt_alloc()
1836 		sdd->sg = alloc_percpu(struct sched_group *);  in __sdt_alloc()
1837 		if (!sdd->sg)  in __sdt_alloc()
1838 			return -ENOMEM;  in __sdt_alloc()
1840 		sdd->sgc = alloc_percpu(struct sched_group_capacity *);  in __sdt_alloc()
1841 		if (!sdd->sgc)  in __sdt_alloc()
1842 			return -ENOMEM;  in __sdt_alloc()
1853 				return -ENOMEM;  in __sdt_alloc()
1855 			*per_cpu_ptr(sdd->sd, j) = sd;  in __sdt_alloc()
1860 				return -ENOMEM;  in __sdt_alloc()
1862 			*per_cpu_ptr(sdd->sds, j) = sds;  in __sdt_alloc()
1867 				return -ENOMEM;  in __sdt_alloc()
1869 			sg->next = sg;  in __sdt_alloc()
1871 			*per_cpu_ptr(sdd->sg, j) = sg;  in __sdt_alloc()
1876 				return -ENOMEM;  in __sdt_alloc()
1879 			sgc->id = j;  in __sdt_alloc()
1882 			*per_cpu_ptr(sdd->sgc, j) = sgc;  in __sdt_alloc()
1895 		struct sd_data *sdd = &tl->data;  in __sdt_free()
1900 			if (sdd->sd) {  in __sdt_free()
1901 				sd = *per_cpu_ptr(sdd->sd, j);  in __sdt_free()
1902 				if (sd && (sd->flags & SD_OVERLAP))  in __sdt_free()
1903 					free_sched_groups(sd->groups, 0);  in __sdt_free()
1904 				kfree(*per_cpu_ptr(sdd->sd, j));  in __sdt_free()
1907 			if (sdd->sds)  in __sdt_free()
1908 				kfree(*per_cpu_ptr(sdd->sds, j));  in __sdt_free()
1909 			if (sdd->sg)  in __sdt_free()
1910 				kfree(*per_cpu_ptr(sdd->sg, j));  in __sdt_free()
1911 			if (sdd->sgc)  in __sdt_free()
1912 				kfree(*per_cpu_ptr(sdd->sgc, j));  in __sdt_free()
1914 		free_percpu(sdd->sd);  in __sdt_free()
1915 		sdd->sd = NULL;  in __sdt_free()
1916 		free_percpu(sdd->sds);  in __sdt_free()
1917 		sdd->sds = NULL;  in __sdt_free()
1918 		free_percpu(sdd->sg);  in __sdt_free()
1919 		sdd->sg = NULL;  in __sdt_free()
1920 		free_percpu(sdd->sgc);  in __sdt_free()
1921 		sdd->sgc = NULL;  in __sdt_free()
1927 		struct sched_domain *child, int dflags, int cpu)  in build_sched_domain()  argument
1929 	struct sched_domain *sd = sd_init(tl, cpu_map, child, dflags, cpu);  in build_sched_domain()
1932 		sd->level = child->level + 1;  in build_sched_domain()
1933 		sched_domain_level_max = max(sched_domain_level_max, sd->level);  in build_sched_domain()
1934 		child->parent = sd;  in build_sched_domain()
1941 					child->name, sd->name);  in build_sched_domain()
1957  * any two given CPUs at this (non-NUMA) topology level.
1960 			      const struct cpumask *cpu_map, int cpu)  in topology_span_sane()  argument
1965 	if (tl->flags & SDTL_OVERLAP)  in topology_span_sane()
1969 	 * Non-NUMA levels cannot partially overlap - they must be either  in topology_span_sane()
1971 	 * breaking the sched_group lists - i.e. a later get_group() pass  in topology_span_sane()
1975 		if (i == cpu)  in topology_span_sane()
1983 		if (!cpumask_equal(tl->mask(cpu), tl->mask(i)) &&  in topology_span_sane()
1984 		    cpumask_intersects(tl->mask(cpu), tl->mask(i)))  in topology_span_sane()
1992  * Find the sched_domain_topology_level where all CPU capacities are visible
2017 	 * Examine topology from all CPU's point of views to detect the lowest  in asym_cpu_capacity_level()
2018 	 * sched_domain_topology_level where a highest capacity CPU is visible  in asym_cpu_capacity_level()
2029 			for_each_cpu_and(j, tl->mask(i), cpu_map) {  in asym_cpu_capacity_level()
2061 	int i, ret = -ENOMEM;  in build_sched_domains()
2093 			if (tl->flags & SDTL_OVERLAP)  in build_sched_domains()
2094 				sd->flags |= SD_OVERLAP;  in build_sched_domains()
2102 		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {  in build_sched_domains()
2103 			sd->span_weight = cpumask_weight(sched_domain_span(sd));  in build_sched_domains()
2104 			if (sd->flags & SD_OVERLAP) {  in build_sched_domains()
2114 	/* Calculate CPU capacity for physical packages and nodes */  in build_sched_domains()
2115 	for (i = nr_cpumask_bits-1; i >= 0; i--) {  in build_sched_domains()
2119 		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {  in build_sched_domains()
2129 		int max_cpu = READ_ONCE(d.rd->max_cap_orig_cpu);  in build_sched_domains()
2138 			WRITE_ONCE(d.rd->max_cap_orig_cpu, i);  in build_sched_domains()
2142 		if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity))  in build_sched_domains()
2143 			WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);  in build_sched_domains()
2154 			cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);  in build_sched_domains()
2182  * CPU core maps. It is supposed to return 1 if the topology changed
2245 	unsigned int cpu = cpumask_any(cpu_map);  in detach_destroy_domains()  local
2248 	if (rcu_access_pointer(per_cpu(sd_asym_cpucapacity, cpu)))  in detach_destroy_domains()
2312 	/* Let the architecture update CPU core mappings: */  in partition_sched_domains_locked()
2337 				 * its dl_bw->total_bw needs to be cleared.  It  in partition_sched_domains_locked()
2341 				rd = cpu_rq(cpumask_any(doms_cur[i]))->rd;  in partition_sched_domains_locked()
2346 		/* No match - a current sched domain not in new doms_new[] */  in partition_sched_domains_locked()
2367 		/* No match - add a new doms_new */  in partition_sched_domains_locked()
2378 			    cpu_rq(cpumask_first(doms_cur[j]))->rd->pd) {  in partition_sched_domains_locked()
2383 		/* No match - add perf. domains for a new rd */  in partition_sched_domains_locked()