• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2014-2021, The Linux Foundation. All rights reserved.
4  */
5 
6 #define pr_fmt(fmt)	"core_ctl: " fmt
7 
8 #include <linux/init.h>
9 #include <linux/cpu.h>
10 #include <linux/cpumask.h>
11 #include <linux/cpufreq.h>
12 #include <linux/kthread.h>
13 #include <linux/sched.h>
14 #include <linux/sched/rt.h>
15 #include <linux/syscore_ops.h>
16 #include <uapi/linux/sched/types.h>
17 #include <linux/sched/core_ctl.h>
18 
19 #include <trace/events/sched.h>
20 #include "sched.h"
21 #include "walt.h"
22 
23 #define MAX_CPUS_PER_CLUSTER 6
24 #define MAX_CLUSTERS 3
25 
26 struct cluster_data {
27 	bool inited;
28 	unsigned int min_cpus;
29 	unsigned int max_cpus;
30 	unsigned int offline_delay_ms;
31 	unsigned int busy_up_thres[MAX_CPUS_PER_CLUSTER];
32 	unsigned int busy_down_thres[MAX_CPUS_PER_CLUSTER];
33 	unsigned int active_cpus;
34 	unsigned int num_cpus;
35 	unsigned int nr_isolated_cpus;
36 	unsigned int nr_not_preferred_cpus;
37 	cpumask_t cpu_mask;
38 	unsigned int need_cpus;
39 	unsigned int task_thres;
40 	unsigned int max_nr;
41 	unsigned int nr_prev_assist;
42 	unsigned int nr_prev_assist_thresh;
43 	s64 need_ts;
44 	struct list_head lru;
45 	bool pending;
46 	spinlock_t pending_lock;
47 	bool enable;
48 	int nrrun;
49 	struct task_struct *core_ctl_thread;
50 	unsigned int first_cpu;
51 	unsigned int boost;
52 	struct kobject kobj;
53 };
54 
55 struct cpu_data {
56 	bool is_busy;
57 	unsigned int busy;
58 	unsigned int cpu;
59 	bool not_preferred;
60 	struct cluster_data *cluster;
61 	struct list_head sib;
62 	bool isolated_by_us;
63 };
64 
65 static DEFINE_PER_CPU(struct cpu_data, cpu_state);
66 static struct cluster_data cluster_state[MAX_CLUSTERS];
67 static unsigned int num_clusters;
68 
69 #define for_each_cluster(cluster, idx) \
70 	for (; (idx) < num_clusters && ((cluster) = &cluster_state[idx]);\
71 		(idx)++)
72 
73 static DEFINE_SPINLOCK(state_lock);
74 static void apply_need(struct cluster_data *state);
75 static void wake_up_core_ctl_thread(struct cluster_data *state);
76 static bool initialized;
77 
78 ATOMIC_NOTIFIER_HEAD(core_ctl_notifier);
79 static unsigned int last_nr_big;
80 
81 static unsigned int get_active_cpu_count(const struct cluster_data *cluster);
82 
83 /* ========================= sysfs interface =========================== */
84 
store_min_cpus(struct cluster_data * state,const char * buf,size_t count)85 static ssize_t store_min_cpus(struct cluster_data *state,
86 				const char *buf, size_t count)
87 {
88 	unsigned int val;
89 
90 	if (sscanf(buf, "%u\n", &val) != 1)
91 		return -EINVAL;
92 
93 	state->min_cpus = min(val, state->max_cpus);
94 	wake_up_core_ctl_thread(state);
95 
96 	return count;
97 }
98 
show_min_cpus(const struct cluster_data * state,char * buf)99 static ssize_t show_min_cpus(const struct cluster_data *state, char *buf)
100 {
101 	return sysfs_emit(buf, "%u\n", state->min_cpus);
102 }
103 
store_max_cpus(struct cluster_data * state,const char * buf,size_t count)104 static ssize_t store_max_cpus(struct cluster_data *state,
105 				const char *buf, size_t count)
106 {
107 	unsigned int val;
108 
109 	if (sscanf(buf, "%u\n", &val) != 1)
110 		return -EINVAL;
111 
112 	val = min(val, state->num_cpus);
113 	state->max_cpus = val;
114 	state->min_cpus = min(state->min_cpus, state->max_cpus);
115 	wake_up_core_ctl_thread(state);
116 
117 	return count;
118 }
119 
show_max_cpus(const struct cluster_data * state,char * buf)120 static ssize_t show_max_cpus(const struct cluster_data *state, char *buf)
121 {
122 	return sysfs_emit(buf, "%u\n", state->max_cpus);
123 }
124 
store_enable(struct cluster_data * state,const char * buf,size_t count)125 static ssize_t store_enable(struct cluster_data *state,
126 				const char *buf, size_t count)
127 {
128 	unsigned int val;
129 	bool bval;
130 
131 	if (sscanf(buf, "%u\n", &val) != 1)
132 		return -EINVAL;
133 
134 	bval = !!val;
135 	if (bval != state->enable) {
136 		state->enable = bval;
137 		apply_need(state);
138 	}
139 
140 	return count;
141 }
142 
show_enable(const struct cluster_data * state,char * buf)143 static ssize_t show_enable(const struct cluster_data *state, char *buf)
144 {
145 	return sysfs_emit(buf, "%u\n", state->enable);
146 }
147 
show_need_cpus(const struct cluster_data * state,char * buf)148 static ssize_t show_need_cpus(const struct cluster_data *state, char *buf)
149 {
150 	return sysfs_emit(buf, "%u\n", state->need_cpus);
151 }
152 
show_active_cpus(const struct cluster_data * state,char * buf)153 static ssize_t show_active_cpus(const struct cluster_data *state, char *buf)
154 {
155 	return sysfs_emit(buf, "%u\n", state->active_cpus);
156 }
157 
show_global_state(const struct cluster_data * state,char * buf)158 static ssize_t show_global_state(const struct cluster_data *state, char *buf)
159 {
160 	struct cpu_data *c;
161 	struct cluster_data *cluster;
162 	ssize_t count = 0;
163 	unsigned int cpu;
164 
165 	spin_lock_irq(&state_lock);
166 	for_each_possible_cpu(cpu) {
167 		c = &per_cpu(cpu_state, cpu);
168 		cluster = c->cluster;
169 		if (!cluster || !cluster->inited)
170 			continue;
171 
172 		count += sysfs_emit_at(buf, count,
173 					"CPU%u\n", cpu);
174 		count += sysfs_emit_at(buf, count,
175 					"\tCPU: %u\n", c->cpu);
176 		count += sysfs_emit_at(buf, count,
177 					"\tOnline: %u\n",
178 					cpu_online(c->cpu));
179 		count += sysfs_emit_at(buf, count,
180 					"\tIsolated: %u\n",
181 					cpu_isolated(c->cpu));
182 		count += sysfs_emit_at(buf, count,
183 					"\tFirst CPU: %u\n",
184 						cluster->first_cpu);
185 		count += sysfs_emit_at(buf, count,
186 					"\tBusy%%: %u\n", c->busy);
187 		count += sysfs_emit_at(buf, count,
188 					"\tIs busy: %u\n", c->is_busy);
189 		count += sysfs_emit_at(buf, count,
190 					"\tNot preferred: %u\n",
191 						c->not_preferred);
192 		count += sysfs_emit_at(buf, count,
193 					"\tNr running: %u\n", cluster->nrrun);
194 		count += sysfs_emit_at(buf, count,
195 			"\tActive CPUs: %u\n", get_active_cpu_count(cluster));
196 		count += sysfs_emit_at(buf, count,
197 				"\tNeed CPUs: %u\n", cluster->need_cpus);
198 		count += sysfs_emit_at(buf, count,
199 				"\tNr isolated CPUs: %u\n",
200 						cluster->nr_isolated_cpus);
201 		count += sysfs_emit_at(buf, count,
202 				"\tBoost: %u\n", (unsigned int) cluster->boost);
203 	}
204 	spin_unlock_irq(&state_lock);
205 
206 	return count;
207 }
208 
209 struct core_ctl_attr {
210 	struct attribute attr;
211 	ssize_t (*show)(const struct cluster_data *, char *);
212 	ssize_t (*store)(struct cluster_data *, const char *, size_t count);
213 };
214 
215 #define core_ctl_attr_ro(_name)		\
216 static struct core_ctl_attr _name =	\
217 __ATTR(_name, 0444, show_##_name, NULL)
218 
219 #define core_ctl_attr_rw(_name)			\
220 static struct core_ctl_attr _name =		\
221 __ATTR(_name, 0644, show_##_name, store_##_name)
222 
223 core_ctl_attr_rw(min_cpus);
224 core_ctl_attr_rw(max_cpus);
225 core_ctl_attr_ro(need_cpus);
226 core_ctl_attr_ro(active_cpus);
227 core_ctl_attr_ro(global_state);
228 core_ctl_attr_rw(enable);
229 
230 static struct attribute *default_attrs[] = {
231 	&min_cpus.attr,
232 	&max_cpus.attr,
233 	&enable.attr,
234 	&need_cpus.attr,
235 	&active_cpus.attr,
236 	&global_state.attr,
237 	NULL
238 };
239 
240 #define to_cluster_data(k) container_of(k, struct cluster_data, kobj)
241 #define to_attr(a) container_of(a, struct core_ctl_attr, attr)
show(struct kobject * kobj,struct attribute * attr,char * buf)242 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
243 {
244 	struct cluster_data *data = to_cluster_data(kobj);
245 	struct core_ctl_attr *cattr = to_attr(attr);
246 	ssize_t ret = -EIO;
247 
248 	if (cattr->show)
249 		ret = cattr->show(data, buf);
250 
251 	return ret;
252 }
253 
store(struct kobject * kobj,struct attribute * attr,const char * buf,size_t count)254 static ssize_t store(struct kobject *kobj, struct attribute *attr,
255 		     const char *buf, size_t count)
256 {
257 	struct cluster_data *data = to_cluster_data(kobj);
258 	struct core_ctl_attr *cattr = to_attr(attr);
259 	ssize_t ret = -EIO;
260 
261 	if (cattr->store)
262 		ret = cattr->store(data, buf, count);
263 
264 	return ret;
265 }
266 
267 static const struct sysfs_ops sysfs_ops = {
268 	.show	= show,
269 	.store	= store,
270 };
271 
272 static struct kobj_type ktype_core_ctl = {
273 	.sysfs_ops	= &sysfs_ops,
274 	.default_attrs	= default_attrs,
275 };
276 
277 /* ==================== runqueue based core count =================== */
278 
279 static struct sched_avg_stats nr_stats[NR_CPUS];
280 
281 /*
282  * nr_need:
283  *   Number of tasks running on this cluster plus
284  *   tasks running on higher capacity clusters.
285  *   To find out CPUs needed from this cluster.
286  *
287  * For example:
288  *   On dual cluster system with 4 min capacity
289  *   CPUs and 4 max capacity CPUs, if there are
290  *   4 small tasks running on min capacity CPUs
291  *   and 2 big tasks running on 2 max capacity
292  *   CPUs, nr_need has to be 6 for min capacity
293  *   cluster and 2 for max capacity cluster.
294  *   This is because, min capacity cluster has to
295  *   account for tasks running on max capacity
296  *   cluster, so that, the min capacity cluster
297  *   can be ready to accommodate tasks running on max
298  *   capacity CPUs if the demand of tasks goes down.
299  */
compute_cluster_nr_need(int index)300 static int compute_cluster_nr_need(int index)
301 {
302 	int cpu;
303 	struct cluster_data *cluster;
304 	int nr_need = 0;
305 
306 	for_each_cluster(cluster, index) {
307 		for_each_cpu(cpu, &cluster->cpu_mask)
308 			nr_need += nr_stats[cpu].nr;
309 	}
310 
311 	return nr_need;
312 }
313 
314 /*
315  * prev_misfit_need:
316  *   Tasks running on smaller capacity cluster which
317  *   needs to be migrated to higher capacity cluster.
318  *   To find out how many tasks need higher capacity CPUs.
319  *
320  * For example:
321  *   On dual cluster system with 4 min capacity
322  *   CPUs and 4 max capacity CPUs, if there are
323  *   2 small tasks and 2 big tasks running on
324  *   min capacity CPUs and no tasks running on
325  *   max cpacity, prev_misfit_need of min capacity
326  *   cluster will be 0 and prev_misfit_need of
327  *   max capacity cluster will be 2.
328  */
compute_prev_cluster_misfit_need(int index)329 static int compute_prev_cluster_misfit_need(int index)
330 {
331 	int cpu;
332 	struct cluster_data *prev_cluster;
333 	int prev_misfit_need = 0;
334 
335 	/*
336 	 * Lowest capacity cluster does not have to
337 	 * accommodate any misfit tasks.
338 	 */
339 	if (index == 0)
340 		return 0;
341 
342 	prev_cluster = &cluster_state[index - 1];
343 
344 	for_each_cpu(cpu, &prev_cluster->cpu_mask)
345 		prev_misfit_need += nr_stats[cpu].nr_misfit;
346 
347 	return prev_misfit_need;
348 }
349 
compute_cluster_max_nr(int index)350 static int compute_cluster_max_nr(int index)
351 {
352 	int cpu;
353 	struct cluster_data *cluster = &cluster_state[index];
354 	int max_nr = 0;
355 
356 	for_each_cpu(cpu, &cluster->cpu_mask)
357 		max_nr = max(max_nr, nr_stats[cpu].nr_max);
358 
359 	return max_nr;
360 }
361 
cluster_real_big_tasks(int index)362 static int cluster_real_big_tasks(int index)
363 {
364 	int nr_big = 0;
365 	int cpu;
366 	struct cluster_data *cluster = &cluster_state[index];
367 
368 	if (index == 0) {
369 		for_each_cpu(cpu, &cluster->cpu_mask)
370 			nr_big += nr_stats[cpu].nr_misfit;
371 	} else {
372 		for_each_cpu(cpu, &cluster->cpu_mask)
373 			nr_big += nr_stats[cpu].nr;
374 	}
375 
376 	return nr_big;
377 }
378 
379 /*
380  * prev_nr_need_assist:
381  *   Tasks that are eligible to run on the previous
382  *   cluster but cannot run because of insufficient
383  *   CPUs there. prev_nr_need_assist is indicative
384  *   of number of CPUs in this cluster that should
385  *   assist its previous cluster to makeup for
386  *   insufficient CPUs there.
387  *
388  * For example:
389  *   On tri-cluster system with 4 min capacity
390  *   CPUs, 3 intermediate capacity CPUs and 1
391  *   max capacity CPU, if there are 4 small
392  *   tasks running on min capacity CPUs, 4 big
393  *   tasks running on intermediate capacity CPUs
394  *   and no tasks running on max capacity CPU,
395  *   prev_nr_need_assist for min & max capacity
396  *   clusters will be 0, but, for intermediate
397  *   capacity cluster prev_nr_need_assist will
398  *   be 1 as it has 3 CPUs, but, there are 4 big
399  *   tasks to be served.
400  */
prev_cluster_nr_need_assist(int index)401 static int prev_cluster_nr_need_assist(int index)
402 {
403 	int need = 0;
404 	int cpu;
405 	struct cluster_data *prev_cluster;
406 
407 	if (index == 0)
408 		return 0;
409 
410 	index--;
411 	prev_cluster = &cluster_state[index];
412 
413 	/*
414 	 * Next cluster should not assist, while there are isolated cpus
415 	 * in this cluster.
416 	 */
417 	if (prev_cluster->nr_isolated_cpus)
418 		return 0;
419 
420 	for_each_cpu(cpu, &prev_cluster->cpu_mask)
421 		need += nr_stats[cpu].nr;
422 
423 	need += compute_prev_cluster_misfit_need(index);
424 
425 	if (need > prev_cluster->active_cpus)
426 		need = need - prev_cluster->active_cpus;
427 	else
428 		need = 0;
429 
430 	return need;
431 }
432 
update_running_avg(void)433 static void update_running_avg(void)
434 {
435 	struct cluster_data *cluster;
436 	unsigned int index = 0;
437 	unsigned long flags;
438 	int big_avg = 0;
439 
440 	sched_get_nr_running_avg(nr_stats);
441 
442 	spin_lock_irqsave(&state_lock, flags);
443 	for_each_cluster(cluster, index) {
444 		int nr_need, prev_misfit_need;
445 
446 		if (!cluster->inited)
447 			continue;
448 
449 		nr_need = compute_cluster_nr_need(index);
450 		prev_misfit_need = compute_prev_cluster_misfit_need(index);
451 
452 
453 		cluster->nrrun = nr_need + prev_misfit_need;
454 		cluster->max_nr = compute_cluster_max_nr(index);
455 		cluster->nr_prev_assist = prev_cluster_nr_need_assist(index);
456 		trace_core_ctl_update_nr_need(cluster->first_cpu, nr_need,
457 					prev_misfit_need,
458 					cluster->nrrun, cluster->max_nr,
459 					cluster->nr_prev_assist);
460 		big_avg += cluster_real_big_tasks(index);
461 	}
462 	spin_unlock_irqrestore(&state_lock, flags);
463 
464 	last_nr_big = big_avg;
465 }
466 
467 #define MAX_NR_THRESHOLD	4
468 /* adjust needed CPUs based on current runqueue information */
apply_task_need(const struct cluster_data * cluster,unsigned int new_need)469 static unsigned int apply_task_need(const struct cluster_data *cluster,
470 				    unsigned int new_need)
471 {
472 	/* unisolate all cores if there are enough tasks */
473 	if (cluster->nrrun >= cluster->task_thres)
474 		return cluster->num_cpus;
475 
476 	/*
477 	 * unisolate as many cores as the previous cluster
478 	 * needs assistance with.
479 	 */
480 	if (cluster->nr_prev_assist >= cluster->nr_prev_assist_thresh)
481 		new_need = new_need + cluster->nr_prev_assist;
482 
483 	/* only unisolate more cores if there are tasks to run */
484 	if (cluster->nrrun > new_need)
485 		new_need = new_need + 1;
486 
487 	/*
488 	 * We don't want tasks to be overcrowded in a cluster.
489 	 * If any CPU has more than MAX_NR_THRESHOLD in the last
490 	 * window, bring another CPU to help out.
491 	 */
492 	if (cluster->max_nr > MAX_NR_THRESHOLD)
493 		new_need = new_need + 1;
494 
495 	return new_need;
496 }
497 
498 /* ======================= load based core count  ====================== */
499 
apply_limits(const struct cluster_data * cluster,unsigned int need_cpus)500 static unsigned int apply_limits(const struct cluster_data *cluster,
501 				 unsigned int need_cpus)
502 {
503 	return min(max(cluster->min_cpus, need_cpus), cluster->max_cpus);
504 }
505 
get_active_cpu_count(const struct cluster_data * cluster)506 static unsigned int get_active_cpu_count(const struct cluster_data *cluster)
507 {
508 	return cluster->num_cpus -
509 				sched_isolate_count(&cluster->cpu_mask, true);
510 }
511 
is_active(const struct cpu_data * state)512 static bool is_active(const struct cpu_data *state)
513 {
514 	return cpu_online(state->cpu) && !cpu_isolated(state->cpu);
515 }
516 
adjustment_possible(const struct cluster_data * cluster,unsigned int need)517 static bool adjustment_possible(const struct cluster_data *cluster,
518 							unsigned int need)
519 {
520 	return (need < cluster->active_cpus || (need > cluster->active_cpus &&
521 						cluster->nr_isolated_cpus));
522 }
523 
eval_need(struct cluster_data * cluster)524 static bool eval_need(struct cluster_data *cluster)
525 {
526 	unsigned long flags;
527 	struct cpu_data *c;
528 	unsigned int need_cpus = 0, last_need, thres_idx;
529 	int ret = 0;
530 	bool need_flag = false;
531 	unsigned int new_need;
532 	s64 now, elapsed;
533 
534 	if (unlikely(!cluster->inited))
535 		return 0;
536 
537 	spin_lock_irqsave(&state_lock, flags);
538 
539 	if (cluster->boost || !cluster->enable) {
540 		need_cpus = cluster->max_cpus;
541 	} else {
542 		cluster->active_cpus = get_active_cpu_count(cluster);
543 		thres_idx = cluster->active_cpus ? cluster->active_cpus - 1 : 0;
544 		list_for_each_entry(c, &cluster->lru, sib) {
545 			bool old_is_busy = c->is_busy;
546 			int high_irqload = sched_cpu_high_irqload(c->cpu);
547 
548 			if (c->busy >= cluster->busy_up_thres[thres_idx] ||
549 			    high_irqload)
550 				c->is_busy = true;
551 			else if (c->busy < cluster->busy_down_thres[thres_idx])
552 				c->is_busy = false;
553 			trace_core_ctl_set_busy(c->cpu, c->busy, old_is_busy,
554 						c->is_busy, high_irqload);
555 			need_cpus += c->is_busy;
556 		}
557 		need_cpus = apply_task_need(cluster, need_cpus);
558 	}
559 	new_need = apply_limits(cluster, need_cpus);
560 	need_flag = adjustment_possible(cluster, new_need);
561 
562 	last_need = cluster->need_cpus;
563 	now = ktime_to_ms(ktime_get());
564 
565 	if (new_need > cluster->active_cpus) {
566 		ret = 1;
567 	} else {
568 		/*
569 		 * When there is no change in need and there are no more
570 		 * active CPUs than currently needed, just update the
571 		 * need time stamp and return.
572 		 */
573 		if (new_need == last_need && new_need == cluster->active_cpus) {
574 			cluster->need_ts = now;
575 			spin_unlock_irqrestore(&state_lock, flags);
576 			return 0;
577 		}
578 
579 		elapsed =  now - cluster->need_ts;
580 		ret = elapsed >= cluster->offline_delay_ms;
581 	}
582 
583 	if (ret) {
584 		cluster->need_ts = now;
585 		cluster->need_cpus = new_need;
586 	}
587 	trace_core_ctl_eval_need(cluster->first_cpu, last_need, new_need,
588 				 ret && need_flag);
589 	spin_unlock_irqrestore(&state_lock, flags);
590 
591 	return ret && need_flag;
592 }
593 
apply_need(struct cluster_data * cluster)594 static void apply_need(struct cluster_data *cluster)
595 {
596 	if (eval_need(cluster))
597 		wake_up_core_ctl_thread(cluster);
598 }
599 
600 /* ========================= core count enforcement ==================== */
601 
wake_up_core_ctl_thread(struct cluster_data * cluster)602 static void wake_up_core_ctl_thread(struct cluster_data *cluster)
603 {
604 	unsigned long flags;
605 
606 	spin_lock_irqsave(&cluster->pending_lock, flags);
607 	cluster->pending = true;
608 	spin_unlock_irqrestore(&cluster->pending_lock, flags);
609 
610 	wake_up_process(cluster->core_ctl_thread);
611 }
612 
613 static u64 core_ctl_check_timestamp;
614 
core_ctl_set_boost(bool boost)615 int core_ctl_set_boost(bool boost)
616 {
617 	unsigned int index = 0;
618 	struct cluster_data *cluster = NULL;
619 	unsigned long flags;
620 	int ret = 0;
621 	bool boost_state_changed = false;
622 
623 	if (unlikely(!initialized))
624 		return 0;
625 
626 	spin_lock_irqsave(&state_lock, flags);
627 	for_each_cluster(cluster, index) {
628 		if (boost) {
629 			boost_state_changed = !cluster->boost;
630 			++cluster->boost;
631 		} else {
632 			if (!cluster->boost) {
633 				ret = -EINVAL;
634 				break;
635 			} else {
636 				--cluster->boost;
637 				boost_state_changed = !cluster->boost;
638 			}
639 		}
640 	}
641 	spin_unlock_irqrestore(&state_lock, flags);
642 
643 	if (boost_state_changed) {
644 		index = 0;
645 		for_each_cluster(cluster, index)
646 			apply_need(cluster);
647 	}
648 
649 	if (cluster)
650 		trace_core_ctl_set_boost(cluster->boost, ret);
651 
652 	return ret;
653 }
654 EXPORT_SYMBOL(core_ctl_set_boost);
655 
core_ctl_check(u64 window_start)656 void core_ctl_check(u64 window_start)
657 {
658 	int cpu;
659 	struct cpu_data *c;
660 	struct cluster_data *cluster;
661 	unsigned int index = 0;
662 	unsigned long flags;
663 
664 	if (unlikely(!initialized))
665 		return;
666 
667 	if (window_start == core_ctl_check_timestamp)
668 		return;
669 
670 	core_ctl_check_timestamp = window_start;
671 
672 	spin_lock_irqsave(&state_lock, flags);
673 	for_each_possible_cpu(cpu) {
674 
675 		c = &per_cpu(cpu_state, cpu);
676 		cluster = c->cluster;
677 
678 		if (!cluster || !cluster->inited)
679 			continue;
680 
681 		c->busy = sched_get_cpu_util(cpu);
682 	}
683 	spin_unlock_irqrestore(&state_lock, flags);
684 
685 	update_running_avg();
686 
687 	for_each_cluster(cluster, index) {
688 		if (eval_need(cluster))
689 			wake_up_core_ctl_thread(cluster);
690 	}
691 }
692 
move_cpu_lru(struct cpu_data * cpu_data)693 static void move_cpu_lru(struct cpu_data *cpu_data)
694 {
695 	unsigned long flags;
696 
697 	spin_lock_irqsave(&state_lock, flags);
698 	list_del(&cpu_data->sib);
699 	list_add_tail(&cpu_data->sib, &cpu_data->cluster->lru);
700 	spin_unlock_irqrestore(&state_lock, flags);
701 }
702 
try_to_isolate(struct cluster_data * cluster,unsigned int need)703 static void try_to_isolate(struct cluster_data *cluster, unsigned int need)
704 {
705 	struct cpu_data *c, *tmp;
706 	unsigned long flags;
707 	unsigned int num_cpus = cluster->num_cpus;
708 	unsigned int nr_isolated = 0;
709 	bool first_pass = cluster->nr_not_preferred_cpus;
710 
711 	/*
712 	 * Protect against entry being removed (and added at tail) by other
713 	 * thread (hotplug).
714 	 */
715 	spin_lock_irqsave(&state_lock, flags);
716 	list_for_each_entry_safe(c, tmp, &cluster->lru, sib) {
717 		if (!num_cpus--)
718 			break;
719 
720 		if (!is_active(c))
721 			continue;
722 		if (cluster->active_cpus == need)
723 			break;
724 		/* Don't isolate busy CPUs. */
725 		if (c->is_busy)
726 			continue;
727 
728 		/*
729 		 * We isolate only the not_preferred CPUs. If none
730 		 * of the CPUs are selected as not_preferred, then
731 		 * all CPUs are eligible for isolation.
732 		 */
733 		if (cluster->nr_not_preferred_cpus && !c->not_preferred)
734 			continue;
735 
736 		spin_unlock_irqrestore(&state_lock, flags);
737 
738 		pr_debug("Trying to isolate CPU%u\n", c->cpu);
739 		if (!sched_isolate_cpu(c->cpu)) {
740 			c->isolated_by_us = true;
741 			move_cpu_lru(c);
742 			nr_isolated++;
743 		} else {
744 			pr_debug("Unable to isolate CPU%u\n", c->cpu);
745 		}
746 		cluster->active_cpus = get_active_cpu_count(cluster);
747 		spin_lock_irqsave(&state_lock, flags);
748 	}
749 	cluster->nr_isolated_cpus += nr_isolated;
750 	spin_unlock_irqrestore(&state_lock, flags);
751 
752 again:
753 	/*
754 	 * If the number of active CPUs is within the limits, then
755 	 * don't force isolation of any busy CPUs.
756 	 */
757 	if (cluster->active_cpus <= cluster->max_cpus)
758 		return;
759 
760 	nr_isolated = 0;
761 	num_cpus = cluster->num_cpus;
762 	spin_lock_irqsave(&state_lock, flags);
763 	list_for_each_entry_safe(c, tmp, &cluster->lru, sib) {
764 		if (!num_cpus--)
765 			break;
766 
767 		if (!is_active(c))
768 			continue;
769 		if (cluster->active_cpus <= cluster->max_cpus)
770 			break;
771 
772 		if (first_pass && !c->not_preferred)
773 			continue;
774 
775 		spin_unlock_irqrestore(&state_lock, flags);
776 
777 		pr_debug("Trying to isolate CPU%u\n", c->cpu);
778 		if (!sched_isolate_cpu(c->cpu)) {
779 			c->isolated_by_us = true;
780 			move_cpu_lru(c);
781 			nr_isolated++;
782 		} else {
783 			pr_debug("Unable to isolate CPU%u\n", c->cpu);
784 		}
785 		cluster->active_cpus = get_active_cpu_count(cluster);
786 		spin_lock_irqsave(&state_lock, flags);
787 	}
788 	cluster->nr_isolated_cpus += nr_isolated;
789 	spin_unlock_irqrestore(&state_lock, flags);
790 
791 	if (first_pass && cluster->active_cpus > cluster->max_cpus) {
792 		first_pass = false;
793 		goto again;
794 	}
795 }
796 
__try_to_unisolate(struct cluster_data * cluster,unsigned int need,bool force)797 static void __try_to_unisolate(struct cluster_data *cluster,
798 			       unsigned int need, bool force)
799 {
800 	struct cpu_data *c, *tmp;
801 	unsigned long flags;
802 	unsigned int num_cpus = cluster->num_cpus;
803 	unsigned int nr_unisolated = 0;
804 
805 	/*
806 	 * Protect against entry being removed (and added at tail) by other
807 	 * thread (hotplug).
808 	 */
809 	spin_lock_irqsave(&state_lock, flags);
810 	list_for_each_entry_safe(c, tmp, &cluster->lru, sib) {
811 		if (!num_cpus--)
812 			break;
813 
814 		if (!c->isolated_by_us)
815 			continue;
816 		if ((cpu_online(c->cpu) && !cpu_isolated(c->cpu)) ||
817 			(!force && c->not_preferred))
818 			continue;
819 		if (cluster->active_cpus == need)
820 			break;
821 
822 		spin_unlock_irqrestore(&state_lock, flags);
823 
824 		pr_debug("Trying to unisolate CPU%u\n", c->cpu);
825 		if (!sched_unisolate_cpu(c->cpu)) {
826 			c->isolated_by_us = false;
827 			move_cpu_lru(c);
828 			nr_unisolated++;
829 		} else {
830 			pr_debug("Unable to unisolate CPU%u\n", c->cpu);
831 		}
832 		cluster->active_cpus = get_active_cpu_count(cluster);
833 		spin_lock_irqsave(&state_lock, flags);
834 	}
835 	cluster->nr_isolated_cpus -= nr_unisolated;
836 	spin_unlock_irqrestore(&state_lock, flags);
837 }
838 
try_to_unisolate(struct cluster_data * cluster,unsigned int need)839 static void try_to_unisolate(struct cluster_data *cluster, unsigned int need)
840 {
841 	bool force_use_non_preferred = false;
842 
843 	__try_to_unisolate(cluster, need, force_use_non_preferred);
844 
845 	if (cluster->active_cpus == need)
846 		return;
847 
848 	force_use_non_preferred = true;
849 	__try_to_unisolate(cluster, need, force_use_non_preferred);
850 }
851 
do_core_ctl(struct cluster_data * cluster)852 static void __ref do_core_ctl(struct cluster_data *cluster)
853 {
854 	unsigned int need;
855 
856 	need = apply_limits(cluster, cluster->need_cpus);
857 
858 	if (adjustment_possible(cluster, need)) {
859 		pr_debug("Trying to adjust group %u from %u to %u\n",
860 				cluster->first_cpu, cluster->active_cpus, need);
861 
862 		if (cluster->active_cpus > need)
863 			try_to_isolate(cluster, need);
864 		else if (cluster->active_cpus < need)
865 			try_to_unisolate(cluster, need);
866 	}
867 }
868 
try_core_ctl(void * data)869 static int __ref try_core_ctl(void *data)
870 {
871 	struct cluster_data *cluster = data;
872 	unsigned long flags;
873 
874 	while (1) {
875 		set_current_state(TASK_INTERRUPTIBLE);
876 		spin_lock_irqsave(&cluster->pending_lock, flags);
877 		if (!cluster->pending) {
878 			spin_unlock_irqrestore(&cluster->pending_lock, flags);
879 			schedule();
880 			if (kthread_should_stop())
881 				break;
882 			spin_lock_irqsave(&cluster->pending_lock, flags);
883 		}
884 		set_current_state(TASK_RUNNING);
885 		cluster->pending = false;
886 		spin_unlock_irqrestore(&cluster->pending_lock, flags);
887 
888 		do_core_ctl(cluster);
889 	}
890 
891 	return 0;
892 }
893 
isolation_cpuhp_state(unsigned int cpu,bool online)894 static int isolation_cpuhp_state(unsigned int cpu,  bool online)
895 {
896 	struct cpu_data *state = &per_cpu(cpu_state, cpu);
897 	struct cluster_data *cluster = state->cluster;
898 	unsigned int need;
899 	bool do_wakeup = false, unisolated = false;
900 	unsigned long flags;
901 
902 	if (unlikely(!cluster || !cluster->inited))
903 		return 0;
904 
905 	if (online) {
906 		cluster->active_cpus = get_active_cpu_count(cluster);
907 
908 		/*
909 		 * Moving to the end of the list should only happen in
910 		 * CPU_ONLINE and not on CPU_UP_PREPARE to prevent an
911 		 * infinite list traversal when thermal (or other entities)
912 		 * reject trying to online CPUs.
913 		 */
914 		move_cpu_lru(state);
915 	} else {
916 		/*
917 		 * We don't want to have a CPU both offline and isolated.
918 		 * So unisolate a CPU that went down if it was isolated by us.
919 		 */
920 		if (state->isolated_by_us) {
921 			sched_unisolate_cpu_unlocked(cpu);
922 			state->isolated_by_us = false;
923 			unisolated = true;
924 		}
925 
926 		/* Move a CPU to the end of the LRU when it goes offline. */
927 		move_cpu_lru(state);
928 
929 		state->busy = 0;
930 		cluster->active_cpus = get_active_cpu_count(cluster);
931 	}
932 
933 	need = apply_limits(cluster, cluster->need_cpus);
934 	spin_lock_irqsave(&state_lock, flags);
935 	if (unisolated)
936 		cluster->nr_isolated_cpus--;
937 	do_wakeup = adjustment_possible(cluster, need);
938 	spin_unlock_irqrestore(&state_lock, flags);
939 	if (do_wakeup)
940 		wake_up_core_ctl_thread(cluster);
941 
942 	return 0;
943 }
944 
core_ctl_isolation_online_cpu(unsigned int cpu)945 static int core_ctl_isolation_online_cpu(unsigned int cpu)
946 {
947 	return isolation_cpuhp_state(cpu, true);
948 }
949 
core_ctl_isolation_dead_cpu(unsigned int cpu)950 static int core_ctl_isolation_dead_cpu(unsigned int cpu)
951 {
952 	return isolation_cpuhp_state(cpu, false);
953 }
954 
955 /* ============================ init code ============================== */
956 
find_cluster_by_first_cpu(unsigned int first_cpu)957 static struct cluster_data *find_cluster_by_first_cpu(unsigned int first_cpu)
958 {
959 	unsigned int i;
960 
961 	for (i = 0; i < num_clusters; ++i) {
962 		if (cluster_state[i].first_cpu == first_cpu)
963 			return &cluster_state[i];
964 	}
965 
966 	return NULL;
967 }
968 
cluster_init(const struct cpumask * mask)969 static int cluster_init(const struct cpumask *mask)
970 {
971 	struct device *dev;
972 	unsigned int first_cpu = cpumask_first(mask);
973 	struct cluster_data *cluster;
974 	struct cpu_data *state;
975 	unsigned int cpu;
976 	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
977 
978 	if (find_cluster_by_first_cpu(first_cpu))
979 		return 0;
980 
981 	dev = get_cpu_device(first_cpu);
982 	if (!dev)
983 		return -ENODEV;
984 
985 	pr_info("Creating CPU group %d\n", first_cpu);
986 
987 	if (num_clusters == MAX_CLUSTERS) {
988 		pr_err("Unsupported number of clusters. Only %u supported\n",
989 								MAX_CLUSTERS);
990 		return -EINVAL;
991 	}
992 	cluster = &cluster_state[num_clusters];
993 	++num_clusters;
994 
995 	cpumask_copy(&cluster->cpu_mask, mask);
996 	cluster->num_cpus = cpumask_weight(mask);
997 	if (cluster->num_cpus > MAX_CPUS_PER_CLUSTER) {
998 		pr_err("HW configuration not supported\n");
999 		return -EINVAL;
1000 	}
1001 	cluster->first_cpu = first_cpu;
1002 	cluster->min_cpus = 1;
1003 	cluster->max_cpus = cluster->num_cpus;
1004 	cluster->need_cpus = cluster->num_cpus;
1005 	cluster->offline_delay_ms = 100;
1006 	cluster->task_thres = UINT_MAX;
1007 	cluster->nr_prev_assist_thresh = UINT_MAX;
1008 	cluster->nrrun = cluster->num_cpus;
1009 	cluster->enable = true;
1010 	cluster->nr_not_preferred_cpus = 0;
1011 	INIT_LIST_HEAD(&cluster->lru);
1012 	spin_lock_init(&cluster->pending_lock);
1013 
1014 	for_each_cpu(cpu, mask) {
1015 		pr_info("Init CPU%u state\n", cpu);
1016 
1017 		state = &per_cpu(cpu_state, cpu);
1018 		state->cluster = cluster;
1019 		state->cpu = cpu;
1020 		list_add_tail(&state->sib, &cluster->lru);
1021 	}
1022 	cluster->active_cpus = get_active_cpu_count(cluster);
1023 
1024 	cluster->core_ctl_thread = kthread_run(try_core_ctl, (void *) cluster,
1025 					"core_ctl/%d", first_cpu);
1026 	if (IS_ERR(cluster->core_ctl_thread))
1027 		return PTR_ERR(cluster->core_ctl_thread);
1028 
1029 	sched_setscheduler_nocheck(cluster->core_ctl_thread, SCHED_FIFO,
1030 				   &param);
1031 
1032 	cluster->inited = true;
1033 
1034 	kobject_init(&cluster->kobj, &ktype_core_ctl);
1035 	return kobject_add(&cluster->kobj, &dev->kobj, "core_ctl");
1036 }
1037 
core_ctl_init(void)1038 static int __init core_ctl_init(void)
1039 {
1040 	struct sched_cluster *cluster;
1041 	int ret;
1042 
1043 	cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
1044 			"core_ctl/isolation:online",
1045 			core_ctl_isolation_online_cpu, NULL);
1046 
1047 	cpuhp_setup_state_nocalls(CPUHP_CORE_CTL_ISOLATION_DEAD,
1048 			"core_ctl/isolation:dead",
1049 			NULL, core_ctl_isolation_dead_cpu);
1050 
1051 	for_each_sched_cluster(cluster) {
1052 		ret = cluster_init(&cluster->cpus);
1053 		if (ret)
1054 			pr_warn("unable to create core ctl group: %d\n", ret);
1055 	}
1056 
1057 	initialized = true;
1058 	return 0;
1059 }
1060 
1061 late_initcall(core_ctl_init);
1062