1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * related thread group sched
4 *
5 */
6 #include <linux/sched.h>
7 #include <linux/cpumask.h>
8 #include <trace/events/walt.h>
9 #define CREATE_TRACE_POINTS
10 #include <trace/events/rtg.h>
11 #undef CREATE_TRACE_POINTS
12
13 #include "../sched.h"
14 #include "rtg.h"
15 #include "../walt.h"
16
17 #ifdef CONFIG_SCHED_RTG_FRAME
18 #include "frame_rtg.h"
19 #endif
20
21 #define ADD_TASK 0
22 #define REM_TASK 1
23
24 #define DEFAULT_GROUP_RATE 60 /* 60FPS */
25 #define DEFAULT_UTIL_INVALID_INTERVAL (~0U) /* ns */
26 #define DEFAULT_UTIL_UPDATE_TIMEOUT 20000000 /* ns */
27 #define DEFAULT_FREQ_UPDATE_INTERVAL 8000000 /* ns */
28
29 struct related_thread_group *related_thread_groups[MAX_NUM_CGROUP_COLOC_ID];
30 static DEFINE_RWLOCK(related_thread_group_lock);
31 static LIST_HEAD(active_related_thread_groups);
32
33 #define for_each_related_thread_group(grp) \
34 list_for_each_entry(grp, &active_related_thread_groups, list)
35
init_task_rtg(struct task_struct * p)36 void init_task_rtg(struct task_struct *p)
37 {
38 rcu_assign_pointer(p->grp, NULL);
39 INIT_LIST_HEAD(&p->grp_list);
40 }
41
task_related_thread_group(struct task_struct * p)42 struct related_thread_group *task_related_thread_group(struct task_struct *p)
43 {
44 return rcu_dereference(p->grp);
45 }
46
47 struct related_thread_group *
lookup_related_thread_group(unsigned int group_id)48 lookup_related_thread_group(unsigned int group_id)
49 {
50 return related_thread_groups[group_id];
51 }
52
alloc_related_thread_groups(void)53 int alloc_related_thread_groups(void)
54 {
55 int i, ret;
56 struct related_thread_group *grp = NULL;
57
58 /* groupd_id = 0 is invalid as it's special id to remove group. */
59 for (i = 1; i < MAX_NUM_CGROUP_COLOC_ID; i++) {
60 grp = kzalloc(sizeof(*grp), GFP_NOWAIT);
61 if (!grp) {
62 ret = -ENOMEM;
63 goto err;
64 }
65
66 grp->id = i;
67 INIT_LIST_HEAD(&grp->tasks);
68 INIT_LIST_HEAD(&grp->list);
69 grp->window_size = NSEC_PER_SEC / DEFAULT_GROUP_RATE;
70 grp->util_invalid_interval = DEFAULT_UTIL_INVALID_INTERVAL;
71 grp->util_update_timeout = DEFAULT_UTIL_UPDATE_TIMEOUT;
72 grp->max_boost = 0;
73 grp->freq_update_interval = DEFAULT_FREQ_UPDATE_INTERVAL;
74 raw_spin_lock_init(&grp->lock);
75
76 related_thread_groups[i] = grp;
77 }
78
79 return 0;
80
81 err:
82 for (i = 1; i < MAX_NUM_CGROUP_COLOC_ID; i++) {
83 grp = lookup_related_thread_group(i);
84 if (grp) {
85 kfree(grp);
86 related_thread_groups[i] = NULL;
87 } else {
88 break;
89 }
90 }
91
92 return ret;
93 }
94
95 /*
96 * Task's cpu usage is accounted in:
97 * rq->curr/prev_runnable_sum, when its ->grp is NULL
98 * grp->cpu_time[cpu]->curr/prev_runnable_sum, when its ->grp is !NULL
99 *
100 * Transfer task's cpu usage between those counters when transitioning between
101 * groups
102 */
transfer_busy_time(struct rq * rq,struct related_thread_group * grp,struct task_struct * p,int event)103 static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp,
104 struct task_struct *p, int event)
105 {
106 u64 wallclock;
107 struct group_cpu_time *cpu_time;
108 u64 *src_curr_runnable_sum, *dst_curr_runnable_sum;
109 u64 *src_prev_runnable_sum, *dst_prev_runnable_sum;
110 u64 *src_nt_curr_runnable_sum, *dst_nt_curr_runnable_sum;
111 u64 *src_nt_prev_runnable_sum, *dst_nt_prev_runnable_sum;
112 int migrate_type;
113 int cpu = cpu_of(rq);
114 bool new_task;
115 int i;
116
117 wallclock = sched_ktime_clock();
118
119 update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
120 update_task_ravg(p, rq, TASK_UPDATE, wallclock, 0);
121 new_task = is_new_task(p);
122
123 cpu_time = &rq->grp_time;
124 if (event == ADD_TASK) {
125 migrate_type = RQ_TO_GROUP;
126
127 src_curr_runnable_sum = &rq->curr_runnable_sum;
128 dst_curr_runnable_sum = &cpu_time->curr_runnable_sum;
129 src_prev_runnable_sum = &rq->prev_runnable_sum;
130 dst_prev_runnable_sum = &cpu_time->prev_runnable_sum;
131
132 src_nt_curr_runnable_sum = &rq->nt_curr_runnable_sum;
133 dst_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
134 src_nt_prev_runnable_sum = &rq->nt_prev_runnable_sum;
135 dst_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
136
137 *src_curr_runnable_sum -= p->ravg.curr_window_cpu[cpu];
138 *src_prev_runnable_sum -= p->ravg.prev_window_cpu[cpu];
139 if (new_task) {
140 *src_nt_curr_runnable_sum -=
141 p->ravg.curr_window_cpu[cpu];
142 *src_nt_prev_runnable_sum -=
143 p->ravg.prev_window_cpu[cpu];
144 }
145
146 update_cluster_load_subtractions(p, cpu,
147 rq->window_start, new_task);
148
149 } else {
150 migrate_type = GROUP_TO_RQ;
151
152 src_curr_runnable_sum = &cpu_time->curr_runnable_sum;
153 dst_curr_runnable_sum = &rq->curr_runnable_sum;
154 src_prev_runnable_sum = &cpu_time->prev_runnable_sum;
155 dst_prev_runnable_sum = &rq->prev_runnable_sum;
156
157 src_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
158 dst_nt_curr_runnable_sum = &rq->nt_curr_runnable_sum;
159 src_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
160 dst_nt_prev_runnable_sum = &rq->nt_prev_runnable_sum;
161
162 *src_curr_runnable_sum -= p->ravg.curr_window;
163 *src_prev_runnable_sum -= p->ravg.prev_window;
164 if (new_task) {
165 *src_nt_curr_runnable_sum -= p->ravg.curr_window;
166 *src_nt_prev_runnable_sum -= p->ravg.prev_window;
167 }
168
169 /*
170 * Need to reset curr/prev windows for all CPUs, not just the
171 * ones in the same cluster. Since inter cluster migrations
172 * did not result in the appropriate book keeping, the values
173 * per CPU would be inaccurate.
174 */
175 for_each_possible_cpu(i) {
176 p->ravg.curr_window_cpu[i] = 0;
177 p->ravg.prev_window_cpu[i] = 0;
178 }
179 }
180
181 *dst_curr_runnable_sum += p->ravg.curr_window;
182 *dst_prev_runnable_sum += p->ravg.prev_window;
183 if (new_task) {
184 *dst_nt_curr_runnable_sum += p->ravg.curr_window;
185 *dst_nt_prev_runnable_sum += p->ravg.prev_window;
186 }
187
188 /*
189 * When a task enter or exits a group, it's curr and prev windows are
190 * moved to a single CPU. This behavior might be sub-optimal in the
191 * exit case, however, it saves us the overhead of handling inter
192 * cluster migration fixups while the task is part of a related group.
193 */
194 p->ravg.curr_window_cpu[cpu] = p->ravg.curr_window;
195 p->ravg.prev_window_cpu[cpu] = p->ravg.prev_window;
196
197 trace_sched_migration_update_sum(p, migrate_type, rq);
198 }
199
200 static void _set_preferred_cluster(struct related_thread_group *grp,
201 int sched_cluster_id);
remove_task_from_group(struct task_struct * p)202 static void remove_task_from_group(struct task_struct *p)
203 {
204 struct related_thread_group *grp = p->grp;
205 struct rq *rq = NULL;
206 bool empty_group = true;
207 struct rq_flags flag;
208 unsigned long irqflag;
209
210 rq = __task_rq_lock(p, &flag);
211 transfer_busy_time(rq, p->grp, p, REM_TASK);
212
213 raw_spin_lock_irqsave(&grp->lock, irqflag);
214 list_del_init(&p->grp_list);
215 rcu_assign_pointer(p->grp, NULL);
216
217 if (p->on_cpu)
218 grp->nr_running--;
219
220 if ((int)grp->nr_running < 0) {
221 WARN_ON(1);
222 grp->nr_running = 0;
223 }
224
225 if (!list_empty(&grp->tasks)) {
226 empty_group = false;
227 } else {
228 #ifdef CONFIG_UCLAMP_TASK
229 grp->max_boost = 0;
230 #endif
231 _set_preferred_cluster(grp, -1);
232 grp->ravg.normalized_util = 0;
233 }
234
235 raw_spin_unlock_irqrestore(&grp->lock, irqflag);
236 __task_rq_unlock(rq, &flag);
237
238 /* Reserved groups cannot be destroyed */
239 if (empty_group && grp->id != DEFAULT_CGROUP_COLOC_ID) {
240 /*
241 * We test whether grp->list is attached with list_empty()
242 * hence re-init the list after deletion.
243 */
244 write_lock(&related_thread_group_lock);
245 list_del_init(&grp->list);
246 write_unlock(&related_thread_group_lock);
247 }
248 }
249
250 static int
add_task_to_group(struct task_struct * p,struct related_thread_group * grp)251 add_task_to_group(struct task_struct *p, struct related_thread_group *grp)
252 {
253 struct rq *rq = NULL;
254 struct rq_flags flag;
255 unsigned long irqflag;
256 #ifdef CONFIG_UCLAMP_TASK
257 int boost;
258 #endif
259
260 /*
261 * Change p->grp under rq->lock. Will prevent races with read-side
262 * reference of p->grp in various hot-paths
263 */
264 rq = __task_rq_lock(p, &flag);
265 transfer_busy_time(rq, grp, p, ADD_TASK);
266
267 raw_spin_lock_irqsave(&grp->lock, irqflag);
268 list_add(&p->grp_list, &grp->tasks);
269 rcu_assign_pointer(p->grp, grp);
270 if (p->on_cpu) {
271 grp->nr_running++;
272 if (grp->nr_running == 1)
273 grp->mark_start = max(grp->mark_start,
274 sched_ktime_clock());
275 }
276
277 #ifdef CONFIG_UCLAMP_TASK
278 boost = (int)uclamp_eff_value(p, UCLAMP_MIN);
279 if (boost > grp->max_boost)
280 grp->max_boost = boost;
281 #endif
282 raw_spin_unlock_irqrestore(&grp->lock, irqflag);
283 __task_rq_unlock(rq, &flag);
284
285 return 0;
286 }
287
__sched_set_group_id(struct task_struct * p,unsigned int group_id)288 static int __sched_set_group_id(struct task_struct *p, unsigned int group_id)
289 {
290 int rc = 0;
291 unsigned long flags;
292 struct related_thread_group *grp = NULL;
293 struct related_thread_group *old_grp = NULL;
294
295 if (group_id >= MAX_NUM_CGROUP_COLOC_ID)
296 return -EINVAL;
297
298 raw_spin_lock_irqsave(&p->pi_lock, flags);
299 old_grp = p->grp;
300 if ((current != p && (p->flags & PF_EXITING)) ||
301 (!old_grp && !group_id))
302 goto done;
303
304 /*
305 * If the system has CONFIG_SCHED_RTG_CGROUP, only tasks in DEFAULT group
306 * can be directly switched to other groups.
307 *
308 * In other cases, Switching from one group to another directly is not permitted.
309 */
310 if (old_grp && group_id) {
311 #ifdef CONFIG_SCHED_RTG_CGROUP
312 if (old_grp->id == DEFAULT_CGROUP_COLOC_ID) {
313 remove_task_from_group(p);
314 } else {
315 #endif
316 rc = -EINVAL;
317 goto done;
318 #ifdef CONFIG_SCHED_RTG_CGROUP
319 }
320 #endif
321 }
322
323 if (!group_id) {
324 remove_task_from_group(p);
325 goto done;
326 }
327
328 grp = lookup_related_thread_group(group_id);
329 write_lock(&related_thread_group_lock);
330 if (list_empty(&grp->list))
331 list_add(&grp->list, &active_related_thread_groups);
332 write_unlock(&related_thread_group_lock);
333
334 rc = add_task_to_group(p, grp);
335 done:
336 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
337
338 return rc;
339 }
340
341 /* group_id == 0: remove task from rtg */
sched_set_group_id(struct task_struct * p,unsigned int group_id)342 int sched_set_group_id(struct task_struct *p, unsigned int group_id)
343 {
344 /* DEFAULT_CGROUP_COLOC_ID is a reserved id */
345 if (group_id == DEFAULT_CGROUP_COLOC_ID)
346 return -EINVAL;
347
348 return __sched_set_group_id(p, group_id);
349 }
350
sched_get_group_id(struct task_struct * p)351 unsigned int sched_get_group_id(struct task_struct *p)
352 {
353 unsigned int group_id;
354 struct related_thread_group *grp = NULL;
355
356 rcu_read_lock();
357 grp = task_related_thread_group(p);
358 group_id = grp ? grp->id : 0;
359 rcu_read_unlock();
360
361 return group_id;
362 }
363
update_group_nr_running(struct task_struct * p,int event,u64 wallclock)364 void update_group_nr_running(struct task_struct *p, int event, u64 wallclock)
365 {
366 struct related_thread_group *grp;
367 bool need_update = false;
368
369 rcu_read_lock();
370 grp = task_related_thread_group(p);
371 if (!grp) {
372 rcu_read_unlock();
373 return;
374 }
375
376 raw_spin_lock(&grp->lock);
377
378 if (event == PICK_NEXT_TASK)
379 grp->nr_running++;
380 else if (event == PUT_PREV_TASK)
381 grp->nr_running--;
382
383 if ((int)grp->nr_running < 0) {
384 WARN_ON(1);
385 grp->nr_running = 0;
386 }
387
388 /* update preferred cluster if no update long */
389 if (wallclock - grp->last_util_update_time > grp->util_update_timeout)
390 need_update = true;
391
392 raw_spin_unlock(&grp->lock);
393
394 rcu_read_unlock();
395
396 if (need_update && grp->rtg_class && grp->rtg_class->sched_update_rtg_tick &&
397 grp->id != DEFAULT_CGROUP_COLOC_ID)
398 grp->rtg_class->sched_update_rtg_tick(grp);
399 }
400
sched_set_group_window_size(unsigned int grp_id,unsigned int window_size)401 int sched_set_group_window_size(unsigned int grp_id, unsigned int window_size)
402 {
403 struct related_thread_group *grp = NULL;
404 unsigned long flag;
405
406 if (!window_size)
407 return -EINVAL;
408
409 grp = lookup_related_thread_group(grp_id);
410 if (!grp) {
411 pr_err("set window size for group %d fail\n", grp_id);
412 return -ENODEV;
413 }
414
415 raw_spin_lock_irqsave(&grp->lock, flag);
416 grp->window_size = window_size;
417 raw_spin_unlock_irqrestore(&grp->lock, flag);
418
419 return 0;
420 }
421
group_time_rollover(struct group_ravg * ravg)422 void group_time_rollover(struct group_ravg *ravg)
423 {
424 ravg->prev_window_load = ravg->curr_window_load;
425 ravg->curr_window_load = 0;
426 ravg->prev_window_exec = ravg->curr_window_exec;
427 ravg->curr_window_exec = 0;
428 }
429
sched_set_group_window_rollover(unsigned int grp_id)430 int sched_set_group_window_rollover(unsigned int grp_id)
431 {
432 struct related_thread_group *grp = NULL;
433 u64 wallclock;
434 unsigned long flag;
435 #ifdef CONFIG_UCLAMP_TASK
436 struct task_struct *p = NULL;
437 int boost;
438 #endif
439
440 grp = lookup_related_thread_group(grp_id);
441 if (!grp) {
442 pr_err("set window start for group %d fail\n", grp_id);
443 return -ENODEV;
444 }
445
446 raw_spin_lock_irqsave(&grp->lock, flag);
447
448 wallclock = sched_ktime_clock();
449 grp->prev_window_time = wallclock - grp->window_start;
450 grp->window_start = wallclock;
451 grp->max_boost = 0;
452
453 #ifdef CONFIG_UCLAMP_TASK
454 list_for_each_entry(p, &grp->tasks, grp_list) {
455 boost = (int)uclamp_eff_value(p, UCLAMP_MIN);
456 if (boost > 0)
457 grp->max_boost = boost;
458 }
459 #endif
460
461 group_time_rollover(&grp->ravg);
462 raw_spin_unlock_irqrestore(&grp->lock, flag);
463
464 return 0;
465 }
466
add_to_group_time(struct related_thread_group * grp,struct rq * rq,u64 wallclock)467 static void add_to_group_time(struct related_thread_group *grp, struct rq *rq, u64 wallclock)
468 {
469 u64 delta_exec, delta_load;
470 u64 mark_start = grp->mark_start;
471 u64 window_start = grp->window_start;
472
473 if (unlikely(wallclock <= mark_start))
474 return;
475
476 /* per group load tracking in RTG */
477 if (likely(mark_start >= window_start)) {
478 /*
479 * ws ms wc
480 * | | |
481 * V V V
482 * |---------------|
483 */
484 delta_exec = wallclock - mark_start;
485 grp->ravg.curr_window_exec += delta_exec;
486
487 delta_load = scale_exec_time(delta_exec, rq);
488 grp->ravg.curr_window_load += delta_load;
489 } else {
490 /*
491 * ms ws wc
492 * | | |
493 * V V V
494 * -----|----------
495 */
496 /* prev window statistic */
497 delta_exec = window_start - mark_start;
498 grp->ravg.prev_window_exec += delta_exec;
499
500 delta_load = scale_exec_time(delta_exec, rq);
501 grp->ravg.prev_window_load += delta_load;
502
503 /* curr window statistic */
504 delta_exec = wallclock - window_start;
505 grp->ravg.curr_window_exec += delta_exec;
506
507 delta_load = scale_exec_time(delta_exec, rq);
508 grp->ravg.curr_window_load += delta_load;
509 }
510 }
511
add_to_group_demand(struct related_thread_group * grp,struct rq * rq,u64 wallclock)512 static inline void add_to_group_demand(struct related_thread_group *grp,
513 struct rq *rq, u64 wallclock)
514 {
515 if (unlikely(wallclock <= grp->window_start))
516 return;
517
518 add_to_group_time(grp, rq, wallclock);
519 }
520
account_busy_for_group_demand(struct task_struct * p,int event)521 static int account_busy_for_group_demand(struct task_struct *p, int event)
522 {
523 /*
524 *No need to bother updating task demand for exiting tasks
525 * or the idle task.
526 */
527 if (exiting_task(p) || is_idle_task(p))
528 return 0;
529
530 if (event == TASK_WAKE || event == TASK_MIGRATE)
531 return 0;
532
533 return 1;
534 }
535
update_group_demand(struct task_struct * p,struct rq * rq,int event,u64 wallclock)536 void update_group_demand(struct task_struct *p, struct rq *rq,
537 int event, u64 wallclock)
538 {
539 struct related_thread_group *grp;
540
541 if (!account_busy_for_group_demand(p, event))
542 return;
543
544 rcu_read_lock();
545 grp = task_related_thread_group(p);
546 if (!grp) {
547 rcu_read_unlock();
548 return;
549 }
550
551 raw_spin_lock(&grp->lock);
552
553 if (grp->nr_running == 1)
554 grp->mark_start = max(grp->mark_start, p->ravg.mark_start);
555
556 add_to_group_demand(grp, rq, wallclock);
557
558 grp->mark_start = wallclock;
559
560 raw_spin_unlock(&grp->lock);
561
562 rcu_read_unlock();
563 }
564
sched_update_rtg_tick(struct task_struct * p)565 void sched_update_rtg_tick(struct task_struct *p)
566 {
567 struct related_thread_group *grp = NULL;
568
569 rcu_read_lock();
570 grp = task_related_thread_group(p);
571 if (!grp || list_empty(&grp->tasks)) {
572 rcu_read_unlock();
573 return;
574 }
575
576 if (grp->rtg_class && grp->rtg_class->sched_update_rtg_tick)
577 grp->rtg_class->sched_update_rtg_tick(grp);
578
579 rcu_read_unlock();
580 }
581
preferred_cluster(struct sched_cluster * cluster,struct task_struct * p)582 int preferred_cluster(struct sched_cluster *cluster, struct task_struct *p)
583 {
584 struct related_thread_group *grp = NULL;
585 int rc = 1;
586
587 rcu_read_lock();
588
589 grp = task_related_thread_group(p);
590 if (grp != NULL)
591 rc = (grp->preferred_cluster == cluster);
592
593 rcu_read_unlock();
594 return rc;
595 }
596
get_cluster_grp_running(int cluster_id)597 unsigned int get_cluster_grp_running(int cluster_id)
598 {
599 struct related_thread_group *grp = NULL;
600 unsigned int total_grp_running = 0;
601 unsigned long flag, rtg_flag;
602 unsigned int i;
603
604 read_lock_irqsave(&related_thread_group_lock, rtg_flag);
605
606 /* grp_id 0 is used for exited tasks */
607 for (i = 1; i < MAX_NUM_CGROUP_COLOC_ID; i++) {
608 grp = lookup_related_thread_group(i);
609 if (!grp)
610 continue;
611
612 raw_spin_lock_irqsave(&grp->lock, flag);
613 if (grp->preferred_cluster != NULL &&
614 grp->preferred_cluster->id == cluster_id)
615 total_grp_running += grp->nr_running;
616 raw_spin_unlock_irqrestore(&grp->lock, flag);
617 }
618 read_unlock_irqrestore(&related_thread_group_lock, rtg_flag);
619
620 return total_grp_running;
621 }
622
_set_preferred_cluster(struct related_thread_group * grp,int sched_cluster_id)623 static void _set_preferred_cluster(struct related_thread_group *grp,
624 int sched_cluster_id)
625 {
626 struct sched_cluster *cluster = NULL;
627 struct sched_cluster *cluster_found = NULL;
628
629 if (sched_cluster_id == -1) {
630 grp->preferred_cluster = NULL;
631 return;
632 }
633
634 for_each_sched_cluster_reverse(cluster) {
635 if (cluster->id == sched_cluster_id) {
636 cluster_found = cluster;
637 break;
638 }
639 }
640
641 if (cluster_found != NULL)
642 grp->preferred_cluster = cluster_found;
643 else
644 pr_err("cannot found sched_cluster_id=%d\n", sched_cluster_id);
645 }
646
647 /*
648 * sched_cluster_id == -1: grp will set to NULL
649 */
set_preferred_cluster(struct related_thread_group * grp,int sched_cluster_id)650 static void set_preferred_cluster(struct related_thread_group *grp,
651 int sched_cluster_id)
652 {
653 unsigned long flag;
654
655 raw_spin_lock_irqsave(&grp->lock, flag);
656 _set_preferred_cluster(grp, sched_cluster_id);
657 raw_spin_unlock_irqrestore(&grp->lock, flag);
658 }
659
sched_set_group_preferred_cluster(unsigned int grp_id,int sched_cluster_id)660 int sched_set_group_preferred_cluster(unsigned int grp_id, int sched_cluster_id)
661 {
662 struct related_thread_group *grp = NULL;
663
664 /* DEFAULT_CGROUP_COLOC_ID is a reserved id */
665 if (grp_id == DEFAULT_CGROUP_COLOC_ID ||
666 grp_id >= MAX_NUM_CGROUP_COLOC_ID)
667 return -EINVAL;
668
669 grp = lookup_related_thread_group(grp_id);
670 if (!grp) {
671 pr_err("set preferred cluster for group %d fail\n", grp_id);
672 return -ENODEV;
673 }
674 set_preferred_cluster(grp, sched_cluster_id);
675
676 return 0;
677 }
678
find_rtg_target(struct task_struct * p)679 struct cpumask *find_rtg_target(struct task_struct *p)
680 {
681 struct related_thread_group *grp = NULL;
682 struct sched_cluster *preferred_cluster = NULL;
683 struct cpumask *rtg_target = NULL;
684
685 rcu_read_lock();
686 grp = task_related_thread_group(p);
687 rcu_read_unlock();
688
689 if (!grp)
690 return NULL;
691
692 preferred_cluster = grp->preferred_cluster;
693 if (!preferred_cluster)
694 return NULL;
695
696 rtg_target = &preferred_cluster->cpus;
697 if (!task_fits_max(p, cpumask_first(rtg_target)))
698 return NULL;
699
700 return rtg_target;
701 }
702
find_rtg_cpu(struct task_struct * p)703 int find_rtg_cpu(struct task_struct *p)
704 {
705 int i;
706 cpumask_t search_cpus = CPU_MASK_NONE;
707 int max_spare_cap_cpu = -1;
708 unsigned long max_spare_cap = 0;
709 int idle_backup_cpu = -1;
710 struct cpumask *preferred_cpus = find_rtg_target(p);
711
712 if (!preferred_cpus)
713 return -1;
714
715 cpumask_and(&search_cpus, p->cpus_ptr, cpu_online_mask);
716 #ifdef CONFIG_CPU_ISOLATION_OPT
717 cpumask_andnot(&search_cpus, &search_cpus, cpu_isolated_mask);
718 #endif
719
720 /* search the perferred idle cpu */
721 for_each_cpu_and(i, &search_cpus, preferred_cpus) {
722 if (is_reserved(i))
723 continue;
724
725 if (idle_cpu(i) || (i == task_cpu(p) && p->state == TASK_RUNNING)) {
726 trace_find_rtg_cpu(p, preferred_cpus, "prefer_idle", i);
727 return i;
728 }
729 }
730
731 for_each_cpu(i, &search_cpus) {
732 unsigned long spare_cap;
733
734 if (sched_cpu_high_irqload(i))
735 continue;
736
737 if (is_reserved(i))
738 continue;
739
740 /* take the Active LB CPU as idle_backup_cpu */
741 if (idle_cpu(i) || (i == task_cpu(p) && p->state == TASK_RUNNING)) {
742 /* find the idle_backup_cpu with max capacity */
743 if (idle_backup_cpu == -1 ||
744 capacity_orig_of(i) > capacity_orig_of(idle_backup_cpu))
745 idle_backup_cpu = i;
746
747 continue;
748 }
749
750 spare_cap = capacity_spare_without(i, p);
751 if (spare_cap > max_spare_cap) {
752 max_spare_cap = spare_cap;
753 max_spare_cap_cpu = i;
754 }
755 }
756
757 if (idle_backup_cpu != -1) {
758 trace_find_rtg_cpu(p, preferred_cpus, "idle_backup", idle_backup_cpu);
759 return idle_backup_cpu;
760 }
761
762 trace_find_rtg_cpu(p, preferred_cpus, "max_spare", max_spare_cap_cpu);
763
764 return max_spare_cap_cpu;
765 }
766
sched_set_group_util_invalid_interval(unsigned int grp_id,unsigned int interval)767 int sched_set_group_util_invalid_interval(unsigned int grp_id,
768 unsigned int interval)
769 {
770 struct related_thread_group *grp = NULL;
771 unsigned long flag;
772
773 if (interval == 0)
774 return -EINVAL;
775
776 /* DEFAULT_CGROUP_COLOC_ID is a reserved id */
777 if (grp_id == DEFAULT_CGROUP_COLOC_ID ||
778 grp_id >= MAX_NUM_CGROUP_COLOC_ID)
779 return -EINVAL;
780
781 grp = lookup_related_thread_group(grp_id);
782 if (!grp) {
783 pr_err("set invalid interval for group %d fail\n", grp_id);
784 return -ENODEV;
785 }
786
787 raw_spin_lock_irqsave(&grp->lock, flag);
788 if ((signed int)interval < 0)
789 grp->util_invalid_interval = DEFAULT_UTIL_INVALID_INTERVAL;
790 else
791 grp->util_invalid_interval = interval * NSEC_PER_MSEC;
792
793 raw_spin_unlock_irqrestore(&grp->lock, flag);
794
795 return 0;
796 }
797
798 static inline bool
group_should_invalid_util(struct related_thread_group * grp,u64 now)799 group_should_invalid_util(struct related_thread_group *grp, u64 now)
800 {
801 if (grp->util_invalid_interval == DEFAULT_UTIL_INVALID_INTERVAL)
802 return false;
803
804 return (now - grp->last_freq_update_time >= grp->util_invalid_interval);
805 }
806
valid_normalized_util(struct related_thread_group * grp)807 static inline bool valid_normalized_util(struct related_thread_group *grp)
808 {
809 struct task_struct *p = NULL;
810 cpumask_t rtg_cpus = CPU_MASK_NONE;
811 bool valid = false;
812
813 if (grp->nr_running != 0) {
814 list_for_each_entry(p, &grp->tasks, grp_list) {
815 get_task_struct(p);
816 if (p->state == TASK_RUNNING)
817 cpumask_set_cpu(task_cpu(p), &rtg_cpus);
818 trace_sched_rtg_task_each(grp->id, grp->nr_running, p);
819 put_task_struct(p);
820 }
821
822 valid = cpumask_intersects(&rtg_cpus,
823 &grp->preferred_cluster->cpus);
824 }
825 trace_sched_rtg_valid_normalized_util(grp->id, grp->nr_running, &rtg_cpus, valid);
826
827 return valid;
828 }
829
sched_get_max_group_util(const struct cpumask * query_cpus,unsigned long * util,unsigned int * freq)830 void sched_get_max_group_util(const struct cpumask *query_cpus,
831 unsigned long *util, unsigned int *freq)
832 {
833 struct related_thread_group *grp = NULL;
834 unsigned long max_grp_util = 0;
835 unsigned int max_grp_freq = 0;
836 u64 now = ktime_get_ns();
837 unsigned long rtg_flag;
838 unsigned long flag;
839
840 /*
841 * sum the prev_runnable_sum for each rtg,
842 * return the max rtg->load
843 */
844 read_lock_irqsave(&related_thread_group_lock, rtg_flag);
845 if (list_empty(&active_related_thread_groups))
846 goto unlock;
847
848 for_each_related_thread_group(grp) {
849 raw_spin_lock_irqsave(&grp->lock, flag);
850 if (!list_empty(&grp->tasks) &&
851 grp->preferred_cluster != NULL &&
852 cpumask_intersects(query_cpus,
853 &grp->preferred_cluster->cpus) &&
854 !group_should_invalid_util(grp, now)) {
855
856 if (grp->ravg.normalized_util > max_grp_util)
857 max_grp_util = grp->ravg.normalized_util;
858 }
859 raw_spin_unlock_irqrestore(&grp->lock, flag);
860 }
861
862 unlock:
863 read_unlock_irqrestore(&related_thread_group_lock, rtg_flag);
864
865 *freq = max_grp_freq;
866 *util = max_grp_util;
867 }
868
best_cluster(struct related_thread_group * grp)869 static struct sched_cluster *best_cluster(struct related_thread_group *grp)
870 {
871 struct sched_cluster *cluster = NULL;
872 struct sched_cluster *max_cluster = NULL;
873 int cpu;
874 unsigned long util = grp->ravg.normalized_util;
875 unsigned long boosted_grp_util = util + grp->max_boost;
876 unsigned long max_cap = 0;
877 unsigned long cap = 0;
878
879 /* find new cluster */
880 for_each_sched_cluster(cluster) {
881 cpu = cpumask_first(&cluster->cpus);
882 cap = capacity_orig_of(cpu);
883 if (cap > max_cap) {
884 max_cap = cap;
885 max_cluster = cluster;
886 }
887
888 if (boosted_grp_util <= cap)
889 return cluster;
890 }
891
892 return max_cluster;
893 }
894
group_should_update_freq(struct related_thread_group * grp,int cpu,unsigned int flags,u64 now)895 static bool group_should_update_freq(struct related_thread_group *grp,
896 int cpu, unsigned int flags, u64 now)
897 {
898 if (!grp)
899 return true;
900
901 if (flags & RTG_FREQ_FORCE_UPDATE) {
902 return true;
903 } else if (flags & RTG_FREQ_NORMAL_UPDATE) {
904 if (now - grp->last_freq_update_time >=
905 grp->freq_update_interval)
906 return true;
907 }
908
909 return false;
910 }
911
sched_set_group_normalized_util(unsigned int grp_id,unsigned long util,unsigned int flag)912 int sched_set_group_normalized_util(unsigned int grp_id, unsigned long util,
913 unsigned int flag)
914 {
915 struct related_thread_group *grp = NULL;
916 bool need_update_prev_freq = false;
917 bool need_update_next_freq = false;
918 u64 now;
919 unsigned long flags;
920 struct sched_cluster *preferred_cluster = NULL;
921 int prev_cpu;
922 int next_cpu;
923
924 grp = lookup_related_thread_group(grp_id);
925 if (!grp) {
926 pr_err("set normalized util for group %d fail\n", grp_id);
927 return -ENODEV;
928 }
929
930 raw_spin_lock_irqsave(&grp->lock, flags);
931
932 if (list_empty(&grp->tasks)) {
933 raw_spin_unlock_irqrestore(&grp->lock, flags);
934 return 0;
935 }
936
937 grp->ravg.normalized_util = util;
938
939 preferred_cluster = best_cluster(grp);
940
941 /* update prev_cluster force when preferred_cluster changed */
942 if (!grp->preferred_cluster) {
943 grp->preferred_cluster = preferred_cluster;
944 } else if (grp->preferred_cluster != preferred_cluster) {
945 prev_cpu = cpumask_first(&grp->preferred_cluster->cpus);
946 grp->preferred_cluster = preferred_cluster;
947
948 need_update_prev_freq = true;
949 }
950
951 if (grp->preferred_cluster != NULL)
952 next_cpu = cpumask_first(&grp->preferred_cluster->cpus);
953 else
954 next_cpu = 0;
955
956 now = ktime_get_ns();
957 grp->last_util_update_time = now;
958 need_update_next_freq =
959 group_should_update_freq(grp, next_cpu, flag, now);
960 if (need_update_next_freq)
961 grp->last_freq_update_time = now;
962
963 raw_spin_unlock_irqrestore(&grp->lock, flags);
964
965 if (need_update_prev_freq)
966 cpufreq_update_util(cpu_rq(prev_cpu),
967 SCHED_CPUFREQ_FORCE_UPDATE | SCHED_CPUFREQ_WALT);
968
969 if (need_update_next_freq)
970 cpufreq_update_util(cpu_rq(next_cpu),
971 SCHED_CPUFREQ_FORCE_UPDATE | SCHED_CPUFREQ_WALT);
972
973 return 0;
974 }
975
sched_set_group_freq_update_interval(unsigned int grp_id,unsigned int interval)976 int sched_set_group_freq_update_interval(unsigned int grp_id, unsigned int interval)
977 {
978 struct related_thread_group *grp = NULL;
979 unsigned long flag;
980
981 if ((signed int)interval <= 0)
982 return -EINVAL;
983
984 /* DEFAULT_CGROUP_COLOC_ID is a reserved id */
985 if (grp_id == DEFAULT_CGROUP_COLOC_ID ||
986 grp_id >= MAX_NUM_CGROUP_COLOC_ID)
987 return -EINVAL;
988
989 grp = lookup_related_thread_group(grp_id);
990 if (!grp) {
991 pr_err("set update interval for group %d fail\n", grp_id);
992 return -ENODEV;
993 }
994
995 raw_spin_lock_irqsave(&grp->lock, flag);
996 grp->freq_update_interval = interval * NSEC_PER_MSEC;
997 raw_spin_unlock_irqrestore(&grp->lock, flag);
998
999 return 0;
1000 }
1001
1002 #ifdef CONFIG_SCHED_RTG_CGROUP
1003 #ifdef CONFIG_UCLAMP_TASK_GROUP
uclamp_task_colocated(struct task_struct * p)1004 static inline bool uclamp_task_colocated(struct task_struct *p)
1005 {
1006 struct cgroup_subsys_state *css;
1007 struct task_group *tg;
1008 bool colocate;
1009
1010 rcu_read_lock();
1011 css = task_css(p, cpu_cgrp_id);
1012 if (!css) {
1013 rcu_read_unlock();
1014 return false;
1015 }
1016 tg = container_of(css, struct task_group, css);
1017 colocate = tg->colocate;
1018 rcu_read_unlock();
1019
1020 return colocate;
1021 }
1022 #else
uclamp_task_colocated(struct task_struct * p)1023 static inline bool uclamp_task_colocated(struct task_struct *p)
1024 {
1025 return false;
1026 }
1027 #endif /* CONFIG_UCLAMP_TASK_GROUP */
1028
add_new_task_to_grp(struct task_struct * new)1029 void add_new_task_to_grp(struct task_struct *new)
1030 {
1031 struct related_thread_group *grp = NULL;
1032 unsigned long flag;
1033
1034 /*
1035 * If the task does not belong to colocated schedtune
1036 * cgroup, nothing to do. We are checking this without
1037 * lock. Even if there is a race, it will be added
1038 * to the co-located cgroup via cgroup attach.
1039 */
1040 if (!uclamp_task_colocated(new))
1041 return;
1042
1043 grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID);
1044 write_lock_irqsave(&related_thread_group_lock, flag);
1045
1046 /*
1047 * It's possible that someone already added the new task to the
1048 * group. or it might have taken out from the colocated schedtune
1049 * cgroup. check these conditions under lock.
1050 */
1051 if (!uclamp_task_colocated(new) || new->grp) {
1052 write_unlock_irqrestore(&related_thread_group_lock, flag);
1053 return;
1054 }
1055
1056 raw_spin_lock(&grp->lock);
1057
1058 rcu_assign_pointer(new->grp, grp);
1059 list_add(&new->grp_list, &grp->tasks);
1060
1061 raw_spin_unlock(&grp->lock);
1062 write_unlock_irqrestore(&related_thread_group_lock, flag);
1063 }
1064
1065
1066 /*
1067 * We create a default colocation group at boot. There is no need to
1068 * synchronize tasks between cgroups at creation time because the
1069 * correct cgroup hierarchy is not available at boot. Therefore cgroup
1070 * colocation is turned off by default even though the colocation group
1071 * itself has been allocated. Furthermore this colocation group cannot
1072 * be destroyted once it has been created. All of this has been as part
1073 * of runtime optimizations.
1074 *
1075 * The job of synchronizing tasks to the colocation group is done when
1076 * the colocation flag in the cgroup is turned on.
1077 */
create_default_coloc_group(void)1078 static int __init create_default_coloc_group(void)
1079 {
1080 struct related_thread_group *grp = NULL;
1081 unsigned long flags;
1082
1083 grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID);
1084 write_lock_irqsave(&related_thread_group_lock, flags);
1085 list_add(&grp->list, &active_related_thread_groups);
1086 write_unlock_irqrestore(&related_thread_group_lock, flags);
1087
1088 return 0;
1089 }
1090 late_initcall(create_default_coloc_group);
1091
sync_cgroup_colocation(struct task_struct * p,bool insert)1092 int sync_cgroup_colocation(struct task_struct *p, bool insert)
1093 {
1094 unsigned int grp_id = insert ? DEFAULT_CGROUP_COLOC_ID : 0;
1095 unsigned int old_grp_id;
1096
1097 if (p) {
1098 old_grp_id = sched_get_group_id(p);
1099 /*
1100 * If the task is already in a group which is not DEFAULT_CGROUP_COLOC_ID,
1101 * we should not change the group id during switch to background.
1102 */
1103 if ((old_grp_id != DEFAULT_CGROUP_COLOC_ID) && (grp_id == 0))
1104 return 0;
1105 }
1106
1107 return __sched_set_group_id(p, grp_id);
1108 }
1109 #endif /* CONFIG_SCHED_RTG_CGROUP */
1110
1111 #ifdef CONFIG_SCHED_RTG_DEBUG
1112 #define seq_printf_rtg(m, x...) \
1113 do { \
1114 if (m) \
1115 seq_printf(m, x); \
1116 else \
1117 printk(x); \
1118 } while (0)
1119
print_rtg_info(struct seq_file * file,const struct related_thread_group * grp)1120 static void print_rtg_info(struct seq_file *file,
1121 const struct related_thread_group *grp)
1122 {
1123 seq_printf_rtg(file, "RTG_ID : %d\n", grp->id);
1124 seq_printf_rtg(file, "RTG_INTERVAL : UPDATE:%lums#INVALID:%lums\n",
1125 grp->freq_update_interval / NSEC_PER_MSEC,
1126 grp->util_invalid_interval / NSEC_PER_MSEC);
1127 seq_printf_rtg(file, "RTG_CLUSTER : %d\n",
1128 grp->preferred_cluster ? grp->preferred_cluster->id : -1);
1129 #ifdef CONFIG_SCHED_RTG_RT_THREAD_LIMIT
1130 seq_printf_rtg(file, "RTG_RT_THREAD_NUM : %d/%d\n",
1131 read_rtg_rt_thread_num(), RTG_MAX_RT_THREAD_NUM);
1132 #endif
1133 }
1134
rtg_task_state_to_char(const struct task_struct * tsk)1135 static char rtg_task_state_to_char(const struct task_struct *tsk)
1136 {
1137 static const char state_char[] = "RSDTtXZPI";
1138 unsigned int tsk_state = READ_ONCE(tsk->state);
1139 unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT;
1140
1141 BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
1142 BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1);
1143
1144 if (tsk_state == TASK_IDLE)
1145 state = TASK_REPORT_IDLE;
1146 return state_char[fls(state)];
1147 }
1148
print_rtg_task_header(struct seq_file * file,const char * header,int run,int nr)1149 static inline void print_rtg_task_header(struct seq_file *file,
1150 const char *header, int run, int nr)
1151 {
1152 seq_printf_rtg(file,
1153 "%s : %d/%d\n"
1154 "STATE COMM PID PRIO CPU\n"
1155 "---------------------------------------------------------\n",
1156 header, run, nr);
1157 }
1158
print_rtg_task(struct seq_file * file,const struct task_struct * tsk)1159 static inline void print_rtg_task(struct seq_file *file,
1160 const struct task_struct *tsk)
1161 {
1162 seq_printf_rtg(file, "%5c %15s %5d %5d %5d(%*pbl)\n",
1163 rtg_task_state_to_char(tsk), tsk->comm, tsk->pid,
1164 tsk->prio, task_cpu(tsk), cpumask_pr_args(tsk->cpus_ptr));
1165 }
1166
print_rtg_threads(struct seq_file * file,const struct related_thread_group * grp)1167 static void print_rtg_threads(struct seq_file *file,
1168 const struct related_thread_group *grp)
1169 {
1170 struct task_struct *tsk = NULL;
1171 int nr_thread = 0;
1172
1173 list_for_each_entry(tsk, &grp->tasks, grp_list)
1174 nr_thread++;
1175
1176 if (!nr_thread)
1177 return;
1178
1179 print_rtg_task_header(file, "RTG_THREADS",
1180 grp->nr_running, nr_thread);
1181 list_for_each_entry(tsk, &grp->tasks, grp_list) {
1182 if (unlikely(!tsk))
1183 continue;
1184 get_task_struct(tsk);
1185 print_rtg_task(file, tsk);
1186 put_task_struct(tsk);
1187 }
1188 seq_printf_rtg(file, "---------------------------------------------------------\n");
1189 }
1190
sched_rtg_debug_show(struct seq_file * file,void * param)1191 static int sched_rtg_debug_show(struct seq_file *file, void *param)
1192 {
1193 struct related_thread_group *grp = NULL;
1194 unsigned long flags;
1195 bool have_task = false;
1196
1197 for_each_related_thread_group(grp) {
1198 if (unlikely(!grp)) {
1199 seq_printf_rtg(file, "RTG none\n");
1200 return 0;
1201 }
1202
1203 raw_spin_lock_irqsave(&grp->lock, flags);
1204 if (list_empty(&grp->tasks)) {
1205 raw_spin_unlock_irqrestore(&grp->lock, flags);
1206 continue;
1207 }
1208
1209 if (!have_task)
1210 have_task = true;
1211
1212 seq_printf_rtg(file, "\n\n");
1213 print_rtg_info(file, grp);
1214 print_rtg_threads(file, grp);
1215 raw_spin_unlock_irqrestore(&grp->lock, flags);
1216 }
1217
1218 if (!have_task)
1219 seq_printf_rtg(file, "RTG tasklist empty\n");
1220
1221 return 0;
1222 }
1223
sched_rtg_debug_release(struct inode * inode,struct file * file)1224 static int sched_rtg_debug_release(struct inode *inode, struct file *file)
1225 {
1226 seq_release(inode, file);
1227 return 0;
1228 }
1229
sched_rtg_debug_open(struct inode * inode,struct file * filp)1230 static int sched_rtg_debug_open(struct inode *inode, struct file *filp)
1231 {
1232 return single_open(filp, sched_rtg_debug_show, NULL);
1233 }
1234
1235 static const struct proc_ops sched_rtg_debug_fops = {
1236 .proc_open = sched_rtg_debug_open,
1237 .proc_read = seq_read,
1238 .proc_lseek = seq_lseek,
1239 .proc_release = sched_rtg_debug_release,
1240 };
1241
init_sched_rtg_debug_procfs(void)1242 static int __init init_sched_rtg_debug_procfs(void)
1243 {
1244 struct proc_dir_entry *pe = NULL;
1245
1246 pe = proc_create("sched_rtg_debug",
1247 0400, NULL, &sched_rtg_debug_fops);
1248 if (unlikely(!pe))
1249 return -ENOMEM;
1250 return 0;
1251 }
1252 late_initcall(init_sched_rtg_debug_procfs);
1253 #endif
1254