1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2014-2021, The Linux Foundation. All rights reserved.
4 */
5
6 #define pr_fmt(fmt) "core_ctl: " fmt
7
8 #include <linux/init.h>
9 #include <linux/cpu.h>
10 #include <linux/cpumask.h>
11 #include <linux/cpufreq.h>
12 #include <linux/kthread.h>
13 #include <linux/sched.h>
14 #include <linux/sched/rt.h>
15 #include <linux/syscore_ops.h>
16 #include <uapi/linux/sched/types.h>
17 #include <linux/sched/core_ctl.h>
18
19 #include <trace/events/sched.h>
20 #include "sched.h"
21 #include "walt.h"
22
23 #define MAX_CPUS_PER_CLUSTER 6
24 #define MAX_CLUSTERS 3
25
26 struct cluster_data {
27 bool inited;
28 unsigned int min_cpus;
29 unsigned int max_cpus;
30 unsigned int offline_delay_ms;
31 unsigned int busy_up_thres[MAX_CPUS_PER_CLUSTER];
32 unsigned int busy_down_thres[MAX_CPUS_PER_CLUSTER];
33 unsigned int active_cpus;
34 unsigned int num_cpus;
35 unsigned int nr_isolated_cpus;
36 unsigned int nr_not_preferred_cpus;
37 cpumask_t cpu_mask;
38 unsigned int need_cpus;
39 unsigned int task_thres;
40 unsigned int max_nr;
41 unsigned int nr_prev_assist;
42 unsigned int nr_prev_assist_thresh;
43 s64 need_ts;
44 struct list_head lru;
45 bool pending;
46 spinlock_t pending_lock;
47 bool enable;
48 int nrrun;
49 struct task_struct *core_ctl_thread;
50 unsigned int first_cpu;
51 unsigned int boost;
52 struct kobject kobj;
53 };
54
55 struct cpu_data {
56 bool is_busy;
57 unsigned int busy;
58 unsigned int cpu;
59 bool not_preferred;
60 struct cluster_data *cluster;
61 struct list_head sib;
62 bool isolated_by_us;
63 };
64
65 static DEFINE_PER_CPU(struct cpu_data, cpu_state);
66 static struct cluster_data cluster_state[MAX_CLUSTERS];
67 static unsigned int num_clusters;
68
69 #define for_each_cluster(cluster, idx) \
70 for (; (idx) < num_clusters && ((cluster) = &cluster_state[idx]);\
71 (idx)++)
72
73 static DEFINE_SPINLOCK(state_lock);
74 static void apply_need(struct cluster_data *state);
75 static void wake_up_core_ctl_thread(struct cluster_data *state);
76 static bool initialized;
77
78 ATOMIC_NOTIFIER_HEAD(core_ctl_notifier);
79 static unsigned int last_nr_big;
80
81 static unsigned int get_active_cpu_count(const struct cluster_data *cluster);
82
83 /* ========================= sysfs interface =========================== */
84
store_min_cpus(struct cluster_data * state,const char * buf,size_t count)85 static ssize_t store_min_cpus(struct cluster_data *state,
86 const char *buf, size_t count)
87 {
88 unsigned int val;
89
90 if (sscanf(buf, "%u\n", &val) != 1)
91 return -EINVAL;
92
93 state->min_cpus = min(val, state->max_cpus);
94 wake_up_core_ctl_thread(state);
95
96 return count;
97 }
98
show_min_cpus(const struct cluster_data * state,char * buf)99 static ssize_t show_min_cpus(const struct cluster_data *state, char *buf)
100 {
101 return sysfs_emit(buf, "%u\n", state->min_cpus);
102 }
103
store_max_cpus(struct cluster_data * state,const char * buf,size_t count)104 static ssize_t store_max_cpus(struct cluster_data *state,
105 const char *buf, size_t count)
106 {
107 unsigned int val;
108
109 if (sscanf(buf, "%u\n", &val) != 1)
110 return -EINVAL;
111
112 val = min(val, state->num_cpus);
113 state->max_cpus = val;
114 state->min_cpus = min(state->min_cpus, state->max_cpus);
115 wake_up_core_ctl_thread(state);
116
117 return count;
118 }
119
show_max_cpus(const struct cluster_data * state,char * buf)120 static ssize_t show_max_cpus(const struct cluster_data *state, char *buf)
121 {
122 return sysfs_emit(buf, "%u\n", state->max_cpus);
123 }
124
store_enable(struct cluster_data * state,const char * buf,size_t count)125 static ssize_t store_enable(struct cluster_data *state,
126 const char *buf, size_t count)
127 {
128 unsigned int val;
129 bool bval;
130
131 if (sscanf(buf, "%u\n", &val) != 1)
132 return -EINVAL;
133
134 bval = !!val;
135 if (bval != state->enable) {
136 state->enable = bval;
137 apply_need(state);
138 }
139
140 return count;
141 }
142
show_enable(const struct cluster_data * state,char * buf)143 static ssize_t show_enable(const struct cluster_data *state, char *buf)
144 {
145 return sysfs_emit(buf, "%u\n", state->enable);
146 }
147
show_need_cpus(const struct cluster_data * state,char * buf)148 static ssize_t show_need_cpus(const struct cluster_data *state, char *buf)
149 {
150 return sysfs_emit(buf, "%u\n", state->need_cpus);
151 }
152
show_active_cpus(const struct cluster_data * state,char * buf)153 static ssize_t show_active_cpus(const struct cluster_data *state, char *buf)
154 {
155 return sysfs_emit(buf, "%u\n", state->active_cpus);
156 }
157
show_global_state(const struct cluster_data * state,char * buf)158 static ssize_t show_global_state(const struct cluster_data *state, char *buf)
159 {
160 struct cpu_data *c;
161 struct cluster_data *cluster;
162 ssize_t count = 0;
163 unsigned int cpu;
164
165 spin_lock_irq(&state_lock);
166 for_each_possible_cpu(cpu) {
167 c = &per_cpu(cpu_state, cpu);
168 cluster = c->cluster;
169 if (!cluster || !cluster->inited)
170 continue;
171
172 count += sysfs_emit_at(buf, count,
173 "CPU%u\n", cpu);
174 count += sysfs_emit_at(buf, count,
175 "\tCPU: %u\n", c->cpu);
176 count += sysfs_emit_at(buf, count,
177 "\tOnline: %u\n",
178 cpu_online(c->cpu));
179 count += sysfs_emit_at(buf, count,
180 "\tIsolated: %u\n",
181 cpu_isolated(c->cpu));
182 count += sysfs_emit_at(buf, count,
183 "\tFirst CPU: %u\n",
184 cluster->first_cpu);
185 count += sysfs_emit_at(buf, count,
186 "\tBusy%%: %u\n", c->busy);
187 count += sysfs_emit_at(buf, count,
188 "\tIs busy: %u\n", c->is_busy);
189 count += sysfs_emit_at(buf, count,
190 "\tNot preferred: %u\n",
191 c->not_preferred);
192 count += sysfs_emit_at(buf, count,
193 "\tNr running: %u\n", cluster->nrrun);
194 count += sysfs_emit_at(buf, count,
195 "\tActive CPUs: %u\n", get_active_cpu_count(cluster));
196 count += sysfs_emit_at(buf, count,
197 "\tNeed CPUs: %u\n", cluster->need_cpus);
198 count += sysfs_emit_at(buf, count,
199 "\tNr isolated CPUs: %u\n",
200 cluster->nr_isolated_cpus);
201 count += sysfs_emit_at(buf, count,
202 "\tBoost: %u\n", (unsigned int) cluster->boost);
203 }
204 spin_unlock_irq(&state_lock);
205
206 return count;
207 }
208
209 struct core_ctl_attr {
210 struct attribute attr;
211 ssize_t (*show)(const struct cluster_data *, char *);
212 ssize_t (*store)(struct cluster_data *, const char *, size_t count);
213 };
214
215 #define core_ctl_attr_ro(_name) \
216 static struct core_ctl_attr _name = \
217 __ATTR(_name, 0444, show_##_name, NULL)
218
219 #define core_ctl_attr_rw(_name) \
220 static struct core_ctl_attr _name = \
221 __ATTR(_name, 0644, show_##_name, store_##_name)
222
223 core_ctl_attr_rw(min_cpus);
224 core_ctl_attr_rw(max_cpus);
225 core_ctl_attr_ro(need_cpus);
226 core_ctl_attr_ro(active_cpus);
227 core_ctl_attr_ro(global_state);
228 core_ctl_attr_rw(enable);
229
230 static struct attribute *default_attrs[] = {
231 &min_cpus.attr,
232 &max_cpus.attr,
233 &enable.attr,
234 &need_cpus.attr,
235 &active_cpus.attr,
236 &global_state.attr,
237 NULL
238 };
239
240 #define to_cluster_data(k) container_of(k, struct cluster_data, kobj)
241 #define to_attr(a) container_of(a, struct core_ctl_attr, attr)
show(struct kobject * kobj,struct attribute * attr,char * buf)242 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
243 {
244 struct cluster_data *data = to_cluster_data(kobj);
245 struct core_ctl_attr *cattr = to_attr(attr);
246 ssize_t ret = -EIO;
247
248 if (cattr->show)
249 ret = cattr->show(data, buf);
250
251 return ret;
252 }
253
store(struct kobject * kobj,struct attribute * attr,const char * buf,size_t count)254 static ssize_t store(struct kobject *kobj, struct attribute *attr,
255 const char *buf, size_t count)
256 {
257 struct cluster_data *data = to_cluster_data(kobj);
258 struct core_ctl_attr *cattr = to_attr(attr);
259 ssize_t ret = -EIO;
260
261 if (cattr->store)
262 ret = cattr->store(data, buf, count);
263
264 return ret;
265 }
266
267 static const struct sysfs_ops sysfs_ops = {
268 .show = show,
269 .store = store,
270 };
271
272 static struct kobj_type ktype_core_ctl = {
273 .sysfs_ops = &sysfs_ops,
274 .default_attrs = default_attrs,
275 };
276
277 /* ==================== runqueue based core count =================== */
278
279 static struct sched_avg_stats nr_stats[NR_CPUS];
280
281 /*
282 * nr_need:
283 * Number of tasks running on this cluster plus
284 * tasks running on higher capacity clusters.
285 * To find out CPUs needed from this cluster.
286 *
287 * For example:
288 * On dual cluster system with 4 min capacity
289 * CPUs and 4 max capacity CPUs, if there are
290 * 4 small tasks running on min capacity CPUs
291 * and 2 big tasks running on 2 max capacity
292 * CPUs, nr_need has to be 6 for min capacity
293 * cluster and 2 for max capacity cluster.
294 * This is because, min capacity cluster has to
295 * account for tasks running on max capacity
296 * cluster, so that, the min capacity cluster
297 * can be ready to accommodate tasks running on max
298 * capacity CPUs if the demand of tasks goes down.
299 */
compute_cluster_nr_need(int index)300 static int compute_cluster_nr_need(int index)
301 {
302 int cpu;
303 struct cluster_data *cluster;
304 int nr_need = 0;
305
306 for_each_cluster(cluster, index) {
307 for_each_cpu(cpu, &cluster->cpu_mask)
308 nr_need += nr_stats[cpu].nr;
309 }
310
311 return nr_need;
312 }
313
314 /*
315 * prev_misfit_need:
316 * Tasks running on smaller capacity cluster which
317 * needs to be migrated to higher capacity cluster.
318 * To find out how many tasks need higher capacity CPUs.
319 *
320 * For example:
321 * On dual cluster system with 4 min capacity
322 * CPUs and 4 max capacity CPUs, if there are
323 * 2 small tasks and 2 big tasks running on
324 * min capacity CPUs and no tasks running on
325 * max cpacity, prev_misfit_need of min capacity
326 * cluster will be 0 and prev_misfit_need of
327 * max capacity cluster will be 2.
328 */
compute_prev_cluster_misfit_need(int index)329 static int compute_prev_cluster_misfit_need(int index)
330 {
331 int cpu;
332 struct cluster_data *prev_cluster;
333 int prev_misfit_need = 0;
334
335 /*
336 * Lowest capacity cluster does not have to
337 * accommodate any misfit tasks.
338 */
339 if (index == 0)
340 return 0;
341
342 prev_cluster = &cluster_state[index - 1];
343
344 for_each_cpu(cpu, &prev_cluster->cpu_mask)
345 prev_misfit_need += nr_stats[cpu].nr_misfit;
346
347 return prev_misfit_need;
348 }
349
compute_cluster_max_nr(int index)350 static int compute_cluster_max_nr(int index)
351 {
352 int cpu;
353 struct cluster_data *cluster = &cluster_state[index];
354 int max_nr = 0;
355
356 for_each_cpu(cpu, &cluster->cpu_mask)
357 max_nr = max(max_nr, nr_stats[cpu].nr_max);
358
359 return max_nr;
360 }
361
cluster_real_big_tasks(int index)362 static int cluster_real_big_tasks(int index)
363 {
364 int nr_big = 0;
365 int cpu;
366 struct cluster_data *cluster = &cluster_state[index];
367
368 if (index == 0) {
369 for_each_cpu(cpu, &cluster->cpu_mask)
370 nr_big += nr_stats[cpu].nr_misfit;
371 } else {
372 for_each_cpu(cpu, &cluster->cpu_mask)
373 nr_big += nr_stats[cpu].nr;
374 }
375
376 return nr_big;
377 }
378
379 /*
380 * prev_nr_need_assist:
381 * Tasks that are eligible to run on the previous
382 * cluster but cannot run because of insufficient
383 * CPUs there. prev_nr_need_assist is indicative
384 * of number of CPUs in this cluster that should
385 * assist its previous cluster to makeup for
386 * insufficient CPUs there.
387 *
388 * For example:
389 * On tri-cluster system with 4 min capacity
390 * CPUs, 3 intermediate capacity CPUs and 1
391 * max capacity CPU, if there are 4 small
392 * tasks running on min capacity CPUs, 4 big
393 * tasks running on intermediate capacity CPUs
394 * and no tasks running on max capacity CPU,
395 * prev_nr_need_assist for min & max capacity
396 * clusters will be 0, but, for intermediate
397 * capacity cluster prev_nr_need_assist will
398 * be 1 as it has 3 CPUs, but, there are 4 big
399 * tasks to be served.
400 */
prev_cluster_nr_need_assist(int index)401 static int prev_cluster_nr_need_assist(int index)
402 {
403 int need = 0;
404 int cpu;
405 struct cluster_data *prev_cluster;
406
407 if (index == 0)
408 return 0;
409
410 index--;
411 prev_cluster = &cluster_state[index];
412
413 /*
414 * Next cluster should not assist, while there are isolated cpus
415 * in this cluster.
416 */
417 if (prev_cluster->nr_isolated_cpus)
418 return 0;
419
420 for_each_cpu(cpu, &prev_cluster->cpu_mask)
421 need += nr_stats[cpu].nr;
422
423 need += compute_prev_cluster_misfit_need(index);
424
425 if (need > prev_cluster->active_cpus)
426 need = need - prev_cluster->active_cpus;
427 else
428 need = 0;
429
430 return need;
431 }
432
update_running_avg(void)433 static void update_running_avg(void)
434 {
435 struct cluster_data *cluster;
436 unsigned int index = 0;
437 unsigned long flags;
438 int big_avg = 0;
439
440 sched_get_nr_running_avg(nr_stats);
441
442 spin_lock_irqsave(&state_lock, flags);
443 for_each_cluster(cluster, index) {
444 int nr_need, prev_misfit_need;
445
446 if (!cluster->inited)
447 continue;
448
449 nr_need = compute_cluster_nr_need(index);
450 prev_misfit_need = compute_prev_cluster_misfit_need(index);
451
452
453 cluster->nrrun = nr_need + prev_misfit_need;
454 cluster->max_nr = compute_cluster_max_nr(index);
455 cluster->nr_prev_assist = prev_cluster_nr_need_assist(index);
456 trace_core_ctl_update_nr_need(cluster->first_cpu, nr_need,
457 prev_misfit_need,
458 cluster->nrrun, cluster->max_nr,
459 cluster->nr_prev_assist);
460 big_avg += cluster_real_big_tasks(index);
461 }
462 spin_unlock_irqrestore(&state_lock, flags);
463
464 last_nr_big = big_avg;
465 }
466
467 #define MAX_NR_THRESHOLD 4
468 /* adjust needed CPUs based on current runqueue information */
apply_task_need(const struct cluster_data * cluster,unsigned int new_need)469 static unsigned int apply_task_need(const struct cluster_data *cluster,
470 unsigned int new_need)
471 {
472 /* unisolate all cores if there are enough tasks */
473 if (cluster->nrrun >= cluster->task_thres)
474 return cluster->num_cpus;
475
476 /*
477 * unisolate as many cores as the previous cluster
478 * needs assistance with.
479 */
480 if (cluster->nr_prev_assist >= cluster->nr_prev_assist_thresh)
481 new_need = new_need + cluster->nr_prev_assist;
482
483 /* only unisolate more cores if there are tasks to run */
484 if (cluster->nrrun > new_need)
485 new_need = new_need + 1;
486
487 /*
488 * We don't want tasks to be overcrowded in a cluster.
489 * If any CPU has more than MAX_NR_THRESHOLD in the last
490 * window, bring another CPU to help out.
491 */
492 if (cluster->max_nr > MAX_NR_THRESHOLD)
493 new_need = new_need + 1;
494
495 return new_need;
496 }
497
498 /* ======================= load based core count ====================== */
499
apply_limits(const struct cluster_data * cluster,unsigned int need_cpus)500 static unsigned int apply_limits(const struct cluster_data *cluster,
501 unsigned int need_cpus)
502 {
503 return min(max(cluster->min_cpus, need_cpus), cluster->max_cpus);
504 }
505
get_active_cpu_count(const struct cluster_data * cluster)506 static unsigned int get_active_cpu_count(const struct cluster_data *cluster)
507 {
508 return cluster->num_cpus -
509 sched_isolate_count(&cluster->cpu_mask, true);
510 }
511
is_active(const struct cpu_data * state)512 static bool is_active(const struct cpu_data *state)
513 {
514 return cpu_online(state->cpu) && !cpu_isolated(state->cpu);
515 }
516
adjustment_possible(const struct cluster_data * cluster,unsigned int need)517 static bool adjustment_possible(const struct cluster_data *cluster,
518 unsigned int need)
519 {
520 return (need < cluster->active_cpus || (need > cluster->active_cpus &&
521 cluster->nr_isolated_cpus));
522 }
523
eval_need(struct cluster_data * cluster)524 static bool eval_need(struct cluster_data *cluster)
525 {
526 unsigned long flags;
527 struct cpu_data *c;
528 unsigned int need_cpus = 0, last_need, thres_idx;
529 int ret = 0;
530 bool need_flag = false;
531 unsigned int new_need;
532 s64 now, elapsed;
533
534 if (unlikely(!cluster->inited))
535 return 0;
536
537 spin_lock_irqsave(&state_lock, flags);
538
539 if (cluster->boost || !cluster->enable) {
540 need_cpus = cluster->max_cpus;
541 } else {
542 cluster->active_cpus = get_active_cpu_count(cluster);
543 thres_idx = cluster->active_cpus ? cluster->active_cpus - 1 : 0;
544 list_for_each_entry(c, &cluster->lru, sib) {
545 bool old_is_busy = c->is_busy;
546 int high_irqload = sched_cpu_high_irqload(c->cpu);
547
548 if (c->busy >= cluster->busy_up_thres[thres_idx] ||
549 high_irqload)
550 c->is_busy = true;
551 else if (c->busy < cluster->busy_down_thres[thres_idx])
552 c->is_busy = false;
553 trace_core_ctl_set_busy(c->cpu, c->busy, old_is_busy,
554 c->is_busy, high_irqload);
555 need_cpus += c->is_busy;
556 }
557 need_cpus = apply_task_need(cluster, need_cpus);
558 }
559 new_need = apply_limits(cluster, need_cpus);
560 need_flag = adjustment_possible(cluster, new_need);
561
562 last_need = cluster->need_cpus;
563 now = ktime_to_ms(ktime_get());
564
565 if (new_need > cluster->active_cpus) {
566 ret = 1;
567 } else {
568 /*
569 * When there is no change in need and there are no more
570 * active CPUs than currently needed, just update the
571 * need time stamp and return.
572 */
573 if (new_need == last_need && new_need == cluster->active_cpus) {
574 cluster->need_ts = now;
575 spin_unlock_irqrestore(&state_lock, flags);
576 return 0;
577 }
578
579 elapsed = now - cluster->need_ts;
580 ret = elapsed >= cluster->offline_delay_ms;
581 }
582
583 if (ret) {
584 cluster->need_ts = now;
585 cluster->need_cpus = new_need;
586 }
587 trace_core_ctl_eval_need(cluster->first_cpu, last_need, new_need,
588 ret && need_flag);
589 spin_unlock_irqrestore(&state_lock, flags);
590
591 return ret && need_flag;
592 }
593
apply_need(struct cluster_data * cluster)594 static void apply_need(struct cluster_data *cluster)
595 {
596 if (eval_need(cluster))
597 wake_up_core_ctl_thread(cluster);
598 }
599
600 /* ========================= core count enforcement ==================== */
601
wake_up_core_ctl_thread(struct cluster_data * cluster)602 static void wake_up_core_ctl_thread(struct cluster_data *cluster)
603 {
604 unsigned long flags;
605
606 spin_lock_irqsave(&cluster->pending_lock, flags);
607 cluster->pending = true;
608 spin_unlock_irqrestore(&cluster->pending_lock, flags);
609
610 wake_up_process(cluster->core_ctl_thread);
611 }
612
613 static u64 core_ctl_check_timestamp;
614
core_ctl_set_boost(bool boost)615 int core_ctl_set_boost(bool boost)
616 {
617 unsigned int index = 0;
618 struct cluster_data *cluster = NULL;
619 unsigned long flags;
620 int ret = 0;
621 bool boost_state_changed = false;
622
623 if (unlikely(!initialized))
624 return 0;
625
626 spin_lock_irqsave(&state_lock, flags);
627 for_each_cluster(cluster, index) {
628 if (boost) {
629 boost_state_changed = !cluster->boost;
630 ++cluster->boost;
631 } else {
632 if (!cluster->boost) {
633 ret = -EINVAL;
634 break;
635 } else {
636 --cluster->boost;
637 boost_state_changed = !cluster->boost;
638 }
639 }
640 }
641 spin_unlock_irqrestore(&state_lock, flags);
642
643 if (boost_state_changed) {
644 index = 0;
645 for_each_cluster(cluster, index)
646 apply_need(cluster);
647 }
648
649 if (cluster)
650 trace_core_ctl_set_boost(cluster->boost, ret);
651
652 return ret;
653 }
654 EXPORT_SYMBOL(core_ctl_set_boost);
655
core_ctl_check(u64 window_start)656 void core_ctl_check(u64 window_start)
657 {
658 int cpu;
659 struct cpu_data *c;
660 struct cluster_data *cluster;
661 unsigned int index = 0;
662 unsigned long flags;
663
664 if (unlikely(!initialized))
665 return;
666
667 if (window_start == core_ctl_check_timestamp)
668 return;
669
670 core_ctl_check_timestamp = window_start;
671
672 spin_lock_irqsave(&state_lock, flags);
673 for_each_possible_cpu(cpu) {
674
675 c = &per_cpu(cpu_state, cpu);
676 cluster = c->cluster;
677
678 if (!cluster || !cluster->inited)
679 continue;
680
681 c->busy = sched_get_cpu_util(cpu);
682 }
683 spin_unlock_irqrestore(&state_lock, flags);
684
685 update_running_avg();
686
687 for_each_cluster(cluster, index) {
688 if (eval_need(cluster))
689 wake_up_core_ctl_thread(cluster);
690 }
691 }
692
move_cpu_lru(struct cpu_data * cpu_data)693 static void move_cpu_lru(struct cpu_data *cpu_data)
694 {
695 unsigned long flags;
696
697 spin_lock_irqsave(&state_lock, flags);
698 list_del(&cpu_data->sib);
699 list_add_tail(&cpu_data->sib, &cpu_data->cluster->lru);
700 spin_unlock_irqrestore(&state_lock, flags);
701 }
702
try_to_isolate(struct cluster_data * cluster,unsigned int need)703 static void try_to_isolate(struct cluster_data *cluster, unsigned int need)
704 {
705 struct cpu_data *c, *tmp;
706 unsigned long flags;
707 unsigned int num_cpus = cluster->num_cpus;
708 unsigned int nr_isolated = 0;
709 bool first_pass = cluster->nr_not_preferred_cpus;
710
711 /*
712 * Protect against entry being removed (and added at tail) by other
713 * thread (hotplug).
714 */
715 spin_lock_irqsave(&state_lock, flags);
716 list_for_each_entry_safe(c, tmp, &cluster->lru, sib) {
717 if (!num_cpus--)
718 break;
719
720 if (!is_active(c))
721 continue;
722 if (cluster->active_cpus == need)
723 break;
724 /* Don't isolate busy CPUs. */
725 if (c->is_busy)
726 continue;
727
728 /*
729 * We isolate only the not_preferred CPUs. If none
730 * of the CPUs are selected as not_preferred, then
731 * all CPUs are eligible for isolation.
732 */
733 if (cluster->nr_not_preferred_cpus && !c->not_preferred)
734 continue;
735
736 spin_unlock_irqrestore(&state_lock, flags);
737
738 pr_debug("Trying to isolate CPU%u\n", c->cpu);
739 if (!sched_isolate_cpu(c->cpu)) {
740 c->isolated_by_us = true;
741 move_cpu_lru(c);
742 nr_isolated++;
743 } else {
744 pr_debug("Unable to isolate CPU%u\n", c->cpu);
745 }
746 cluster->active_cpus = get_active_cpu_count(cluster);
747 spin_lock_irqsave(&state_lock, flags);
748 }
749 cluster->nr_isolated_cpus += nr_isolated;
750 spin_unlock_irqrestore(&state_lock, flags);
751
752 again:
753 /*
754 * If the number of active CPUs is within the limits, then
755 * don't force isolation of any busy CPUs.
756 */
757 if (cluster->active_cpus <= cluster->max_cpus)
758 return;
759
760 nr_isolated = 0;
761 num_cpus = cluster->num_cpus;
762 spin_lock_irqsave(&state_lock, flags);
763 list_for_each_entry_safe(c, tmp, &cluster->lru, sib) {
764 if (!num_cpus--)
765 break;
766
767 if (!is_active(c))
768 continue;
769 if (cluster->active_cpus <= cluster->max_cpus)
770 break;
771
772 if (first_pass && !c->not_preferred)
773 continue;
774
775 spin_unlock_irqrestore(&state_lock, flags);
776
777 pr_debug("Trying to isolate CPU%u\n", c->cpu);
778 if (!sched_isolate_cpu(c->cpu)) {
779 c->isolated_by_us = true;
780 move_cpu_lru(c);
781 nr_isolated++;
782 } else {
783 pr_debug("Unable to isolate CPU%u\n", c->cpu);
784 }
785 cluster->active_cpus = get_active_cpu_count(cluster);
786 spin_lock_irqsave(&state_lock, flags);
787 }
788 cluster->nr_isolated_cpus += nr_isolated;
789 spin_unlock_irqrestore(&state_lock, flags);
790
791 if (first_pass && cluster->active_cpus > cluster->max_cpus) {
792 first_pass = false;
793 goto again;
794 }
795 }
796
__try_to_unisolate(struct cluster_data * cluster,unsigned int need,bool force)797 static void __try_to_unisolate(struct cluster_data *cluster,
798 unsigned int need, bool force)
799 {
800 struct cpu_data *c, *tmp;
801 unsigned long flags;
802 unsigned int num_cpus = cluster->num_cpus;
803 unsigned int nr_unisolated = 0;
804
805 /*
806 * Protect against entry being removed (and added at tail) by other
807 * thread (hotplug).
808 */
809 spin_lock_irqsave(&state_lock, flags);
810 list_for_each_entry_safe(c, tmp, &cluster->lru, sib) {
811 if (!num_cpus--)
812 break;
813
814 if (!c->isolated_by_us)
815 continue;
816 if ((cpu_online(c->cpu) && !cpu_isolated(c->cpu)) ||
817 (!force && c->not_preferred))
818 continue;
819 if (cluster->active_cpus == need)
820 break;
821
822 spin_unlock_irqrestore(&state_lock, flags);
823
824 pr_debug("Trying to unisolate CPU%u\n", c->cpu);
825 if (!sched_unisolate_cpu(c->cpu)) {
826 c->isolated_by_us = false;
827 move_cpu_lru(c);
828 nr_unisolated++;
829 } else {
830 pr_debug("Unable to unisolate CPU%u\n", c->cpu);
831 }
832 cluster->active_cpus = get_active_cpu_count(cluster);
833 spin_lock_irqsave(&state_lock, flags);
834 }
835 cluster->nr_isolated_cpus -= nr_unisolated;
836 spin_unlock_irqrestore(&state_lock, flags);
837 }
838
try_to_unisolate(struct cluster_data * cluster,unsigned int need)839 static void try_to_unisolate(struct cluster_data *cluster, unsigned int need)
840 {
841 bool force_use_non_preferred = false;
842
843 __try_to_unisolate(cluster, need, force_use_non_preferred);
844
845 if (cluster->active_cpus == need)
846 return;
847
848 force_use_non_preferred = true;
849 __try_to_unisolate(cluster, need, force_use_non_preferred);
850 }
851
do_core_ctl(struct cluster_data * cluster)852 static void __ref do_core_ctl(struct cluster_data *cluster)
853 {
854 unsigned int need;
855
856 need = apply_limits(cluster, cluster->need_cpus);
857
858 if (adjustment_possible(cluster, need)) {
859 pr_debug("Trying to adjust group %u from %u to %u\n",
860 cluster->first_cpu, cluster->active_cpus, need);
861
862 if (cluster->active_cpus > need)
863 try_to_isolate(cluster, need);
864 else if (cluster->active_cpus < need)
865 try_to_unisolate(cluster, need);
866 }
867 }
868
try_core_ctl(void * data)869 static int __ref try_core_ctl(void *data)
870 {
871 struct cluster_data *cluster = data;
872 unsigned long flags;
873
874 while (1) {
875 set_current_state(TASK_INTERRUPTIBLE);
876 spin_lock_irqsave(&cluster->pending_lock, flags);
877 if (!cluster->pending) {
878 spin_unlock_irqrestore(&cluster->pending_lock, flags);
879 schedule();
880 if (kthread_should_stop())
881 break;
882 spin_lock_irqsave(&cluster->pending_lock, flags);
883 }
884 set_current_state(TASK_RUNNING);
885 cluster->pending = false;
886 spin_unlock_irqrestore(&cluster->pending_lock, flags);
887
888 do_core_ctl(cluster);
889 }
890
891 return 0;
892 }
893
isolation_cpuhp_state(unsigned int cpu,bool online)894 static int isolation_cpuhp_state(unsigned int cpu, bool online)
895 {
896 struct cpu_data *state = &per_cpu(cpu_state, cpu);
897 struct cluster_data *cluster = state->cluster;
898 unsigned int need;
899 bool do_wakeup = false, unisolated = false;
900 unsigned long flags;
901
902 if (unlikely(!cluster || !cluster->inited))
903 return 0;
904
905 if (online) {
906 cluster->active_cpus = get_active_cpu_count(cluster);
907
908 /*
909 * Moving to the end of the list should only happen in
910 * CPU_ONLINE and not on CPU_UP_PREPARE to prevent an
911 * infinite list traversal when thermal (or other entities)
912 * reject trying to online CPUs.
913 */
914 move_cpu_lru(state);
915 } else {
916 /*
917 * We don't want to have a CPU both offline and isolated.
918 * So unisolate a CPU that went down if it was isolated by us.
919 */
920 if (state->isolated_by_us) {
921 sched_unisolate_cpu_unlocked(cpu);
922 state->isolated_by_us = false;
923 unisolated = true;
924 }
925
926 /* Move a CPU to the end of the LRU when it goes offline. */
927 move_cpu_lru(state);
928
929 state->busy = 0;
930 cluster->active_cpus = get_active_cpu_count(cluster);
931 }
932
933 need = apply_limits(cluster, cluster->need_cpus);
934 spin_lock_irqsave(&state_lock, flags);
935 if (unisolated)
936 cluster->nr_isolated_cpus--;
937 do_wakeup = adjustment_possible(cluster, need);
938 spin_unlock_irqrestore(&state_lock, flags);
939 if (do_wakeup)
940 wake_up_core_ctl_thread(cluster);
941
942 return 0;
943 }
944
core_ctl_isolation_online_cpu(unsigned int cpu)945 static int core_ctl_isolation_online_cpu(unsigned int cpu)
946 {
947 return isolation_cpuhp_state(cpu, true);
948 }
949
core_ctl_isolation_dead_cpu(unsigned int cpu)950 static int core_ctl_isolation_dead_cpu(unsigned int cpu)
951 {
952 return isolation_cpuhp_state(cpu, false);
953 }
954
955 /* ============================ init code ============================== */
956
find_cluster_by_first_cpu(unsigned int first_cpu)957 static struct cluster_data *find_cluster_by_first_cpu(unsigned int first_cpu)
958 {
959 unsigned int i;
960
961 for (i = 0; i < num_clusters; ++i) {
962 if (cluster_state[i].first_cpu == first_cpu)
963 return &cluster_state[i];
964 }
965
966 return NULL;
967 }
968
cluster_init(const struct cpumask * mask)969 static int cluster_init(const struct cpumask *mask)
970 {
971 struct device *dev;
972 unsigned int first_cpu = cpumask_first(mask);
973 struct cluster_data *cluster;
974 struct cpu_data *state;
975 unsigned int cpu;
976 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
977
978 if (find_cluster_by_first_cpu(first_cpu))
979 return 0;
980
981 dev = get_cpu_device(first_cpu);
982 if (!dev)
983 return -ENODEV;
984
985 pr_info("Creating CPU group %d\n", first_cpu);
986
987 if (num_clusters == MAX_CLUSTERS) {
988 pr_err("Unsupported number of clusters. Only %u supported\n",
989 MAX_CLUSTERS);
990 return -EINVAL;
991 }
992 cluster = &cluster_state[num_clusters];
993 ++num_clusters;
994
995 cpumask_copy(&cluster->cpu_mask, mask);
996 cluster->num_cpus = cpumask_weight(mask);
997 if (cluster->num_cpus > MAX_CPUS_PER_CLUSTER) {
998 pr_err("HW configuration not supported\n");
999 return -EINVAL;
1000 }
1001 cluster->first_cpu = first_cpu;
1002 cluster->min_cpus = 1;
1003 cluster->max_cpus = cluster->num_cpus;
1004 cluster->need_cpus = cluster->num_cpus;
1005 cluster->offline_delay_ms = 100;
1006 cluster->task_thres = UINT_MAX;
1007 cluster->nr_prev_assist_thresh = UINT_MAX;
1008 cluster->nrrun = cluster->num_cpus;
1009 cluster->enable = true;
1010 cluster->nr_not_preferred_cpus = 0;
1011 INIT_LIST_HEAD(&cluster->lru);
1012 spin_lock_init(&cluster->pending_lock);
1013
1014 for_each_cpu(cpu, mask) {
1015 pr_info("Init CPU%u state\n", cpu);
1016
1017 state = &per_cpu(cpu_state, cpu);
1018 state->cluster = cluster;
1019 state->cpu = cpu;
1020 list_add_tail(&state->sib, &cluster->lru);
1021 }
1022 cluster->active_cpus = get_active_cpu_count(cluster);
1023
1024 cluster->core_ctl_thread = kthread_run(try_core_ctl, (void *) cluster,
1025 "core_ctl/%d", first_cpu);
1026 if (IS_ERR(cluster->core_ctl_thread))
1027 return PTR_ERR(cluster->core_ctl_thread);
1028
1029 sched_setscheduler_nocheck(cluster->core_ctl_thread, SCHED_FIFO,
1030 ¶m);
1031
1032 cluster->inited = true;
1033
1034 kobject_init(&cluster->kobj, &ktype_core_ctl);
1035 return kobject_add(&cluster->kobj, &dev->kobj, "core_ctl");
1036 }
1037
core_ctl_init(void)1038 static int __init core_ctl_init(void)
1039 {
1040 struct sched_cluster *cluster;
1041 int ret;
1042
1043 cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
1044 "core_ctl/isolation:online",
1045 core_ctl_isolation_online_cpu, NULL);
1046
1047 cpuhp_setup_state_nocalls(CPUHP_CORE_CTL_ISOLATION_DEAD,
1048 "core_ctl/isolation:dead",
1049 NULL, core_ctl_isolation_dead_cpu);
1050
1051 for_each_sched_cluster(cluster) {
1052 ret = cluster_init(&cluster->cpus);
1053 if (ret)
1054 pr_warn("unable to create core ctl group: %d\n", ret);
1055 }
1056
1057 initialized = true;
1058 return 0;
1059 }
1060
1061 late_initcall(core_ctl_init);
1062