• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Deadline Scheduling Class (SCHED_DEADLINE)
4  *
5  * Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS).
6  *
7  * Tasks that periodically executes their instances for less than their
8  * runtime won't miss any of their deadlines.
9  * Tasks that are not periodic or sporadic or that tries to execute more
10  * than their reserved bandwidth will be slowed down (and may potentially
11  * miss some of their deadlines), and won't affect any other task.
12  *
13  * Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>,
14  *                    Juri Lelli <juri.lelli@gmail.com>,
15  *                    Michael Trimarchi <michael@amarulasolutions.com>,
16  *                    Fabio Checconi <fchecconi@gmail.com>
17  */
18 #include "sched.h"
19 #include "pelt.h"
20 #include <linux/cpuset.h>
21 #include <trace/hooks/sched.h>
22 
23 struct dl_bandwidth def_dl_bandwidth;
24 
dl_task_of(struct sched_dl_entity * dl_se)25 static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
26 {
27 	return container_of(dl_se, struct task_struct, dl);
28 }
29 
rq_of_dl_rq(struct dl_rq * dl_rq)30 static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq)
31 {
32 	return container_of(dl_rq, struct rq, dl);
33 }
34 
dl_rq_of_se(struct sched_dl_entity * dl_se)35 static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se)
36 {
37 	struct task_struct *p = dl_task_of(dl_se);
38 	struct rq *rq = task_rq(p);
39 
40 	return &rq->dl;
41 }
42 
on_dl_rq(struct sched_dl_entity * dl_se)43 static inline int on_dl_rq(struct sched_dl_entity *dl_se)
44 {
45 	return !RB_EMPTY_NODE(&dl_se->rb_node);
46 }
47 
48 #ifdef CONFIG_RT_MUTEXES
pi_of(struct sched_dl_entity * dl_se)49 static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
50 {
51 	return dl_se->pi_se;
52 }
53 
is_dl_boosted(struct sched_dl_entity * dl_se)54 static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
55 {
56 	return pi_of(dl_se) != dl_se;
57 }
58 #else
pi_of(struct sched_dl_entity * dl_se)59 static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
60 {
61 	return dl_se;
62 }
63 
is_dl_boosted(struct sched_dl_entity * dl_se)64 static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
65 {
66 	return false;
67 }
68 #endif
69 
70 #ifdef CONFIG_SMP
dl_bw_of(int i)71 static inline struct dl_bw *dl_bw_of(int i)
72 {
73 	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
74 			 "sched RCU must be held");
75 	return &cpu_rq(i)->rd->dl_bw;
76 }
77 
dl_bw_cpus(int i)78 static inline int dl_bw_cpus(int i)
79 {
80 	struct root_domain *rd = cpu_rq(i)->rd;
81 	int cpus;
82 
83 	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
84 			 "sched RCU must be held");
85 
86 	if (cpumask_subset(rd->span, cpu_active_mask))
87 		return cpumask_weight(rd->span);
88 
89 	cpus = 0;
90 
91 	for_each_cpu_and(i, rd->span, cpu_active_mask)
92 		cpus++;
93 
94 	return cpus;
95 }
96 
__dl_bw_capacity(int i)97 static inline unsigned long __dl_bw_capacity(int i)
98 {
99 	struct root_domain *rd = cpu_rq(i)->rd;
100 	unsigned long cap = 0;
101 
102 	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
103 			 "sched RCU must be held");
104 
105 	for_each_cpu_and(i, rd->span, cpu_active_mask)
106 		cap += capacity_orig_of(i);
107 
108 	return cap;
109 }
110 
111 /*
112  * XXX Fix: If 'rq->rd == def_root_domain' perform AC against capacity
113  * of the CPU the task is running on rather rd's \Sum CPU capacity.
114  */
dl_bw_capacity(int i)115 static inline unsigned long dl_bw_capacity(int i)
116 {
117 	if (!sched_asym_cpucap_active() &&
118 	    capacity_orig_of(i) == SCHED_CAPACITY_SCALE) {
119 		return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT;
120 	} else {
121 		return __dl_bw_capacity(i);
122 	}
123 }
124 
dl_bw_visited(int cpu,u64 gen)125 static inline bool dl_bw_visited(int cpu, u64 gen)
126 {
127 	struct root_domain *rd = cpu_rq(cpu)->rd;
128 
129 	if (rd->visit_gen == gen)
130 		return true;
131 
132 	rd->visit_gen = gen;
133 	return false;
134 }
135 #else
dl_bw_of(int i)136 static inline struct dl_bw *dl_bw_of(int i)
137 {
138 	return &cpu_rq(i)->dl.dl_bw;
139 }
140 
dl_bw_cpus(int i)141 static inline int dl_bw_cpus(int i)
142 {
143 	return 1;
144 }
145 
dl_bw_capacity(int i)146 static inline unsigned long dl_bw_capacity(int i)
147 {
148 	return SCHED_CAPACITY_SCALE;
149 }
150 
dl_bw_visited(int cpu,u64 gen)151 static inline bool dl_bw_visited(int cpu, u64 gen)
152 {
153 	return false;
154 }
155 #endif
156 
157 static inline
__add_running_bw(u64 dl_bw,struct dl_rq * dl_rq)158 void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
159 {
160 	u64 old = dl_rq->running_bw;
161 
162 	lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
163 	dl_rq->running_bw += dl_bw;
164 	SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */
165 	SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
166 	/* kick cpufreq (see the comment in kernel/sched/sched.h). */
167 	cpufreq_update_util(rq_of_dl_rq(dl_rq), 0);
168 }
169 
170 static inline
__sub_running_bw(u64 dl_bw,struct dl_rq * dl_rq)171 void __sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
172 {
173 	u64 old = dl_rq->running_bw;
174 
175 	lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
176 	dl_rq->running_bw -= dl_bw;
177 	SCHED_WARN_ON(dl_rq->running_bw > old); /* underflow */
178 	if (dl_rq->running_bw > old)
179 		dl_rq->running_bw = 0;
180 	/* kick cpufreq (see the comment in kernel/sched/sched.h). */
181 	cpufreq_update_util(rq_of_dl_rq(dl_rq), 0);
182 }
183 
184 static inline
__add_rq_bw(u64 dl_bw,struct dl_rq * dl_rq)185 void __add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
186 {
187 	u64 old = dl_rq->this_bw;
188 
189 	lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
190 	dl_rq->this_bw += dl_bw;
191 	SCHED_WARN_ON(dl_rq->this_bw < old); /* overflow */
192 }
193 
194 static inline
__sub_rq_bw(u64 dl_bw,struct dl_rq * dl_rq)195 void __sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
196 {
197 	u64 old = dl_rq->this_bw;
198 
199 	lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
200 	dl_rq->this_bw -= dl_bw;
201 	SCHED_WARN_ON(dl_rq->this_bw > old); /* underflow */
202 	if (dl_rq->this_bw > old)
203 		dl_rq->this_bw = 0;
204 	SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
205 }
206 
207 static inline
add_rq_bw(struct sched_dl_entity * dl_se,struct dl_rq * dl_rq)208 void add_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
209 {
210 	if (!dl_entity_is_special(dl_se))
211 		__add_rq_bw(dl_se->dl_bw, dl_rq);
212 }
213 
214 static inline
sub_rq_bw(struct sched_dl_entity * dl_se,struct dl_rq * dl_rq)215 void sub_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
216 {
217 	if (!dl_entity_is_special(dl_se))
218 		__sub_rq_bw(dl_se->dl_bw, dl_rq);
219 }
220 
221 static inline
add_running_bw(struct sched_dl_entity * dl_se,struct dl_rq * dl_rq)222 void add_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
223 {
224 	if (!dl_entity_is_special(dl_se))
225 		__add_running_bw(dl_se->dl_bw, dl_rq);
226 }
227 
228 static inline
sub_running_bw(struct sched_dl_entity * dl_se,struct dl_rq * dl_rq)229 void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
230 {
231 	if (!dl_entity_is_special(dl_se))
232 		__sub_running_bw(dl_se->dl_bw, dl_rq);
233 }
234 
dl_change_utilization(struct task_struct * p,u64 new_bw)235 static void dl_change_utilization(struct task_struct *p, u64 new_bw)
236 {
237 	struct rq *rq;
238 
239 	BUG_ON(p->dl.flags & SCHED_FLAG_SUGOV);
240 
241 	if (task_on_rq_queued(p))
242 		return;
243 
244 	rq = task_rq(p);
245 	if (p->dl.dl_non_contending) {
246 		sub_running_bw(&p->dl, &rq->dl);
247 		p->dl.dl_non_contending = 0;
248 		/*
249 		 * If the timer handler is currently running and the
250 		 * timer cannot be canceled, inactive_task_timer()
251 		 * will see that dl_not_contending is not set, and
252 		 * will not touch the rq's active utilization,
253 		 * so we are still safe.
254 		 */
255 		if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
256 			put_task_struct(p);
257 	}
258 	__sub_rq_bw(p->dl.dl_bw, &rq->dl);
259 	__add_rq_bw(new_bw, &rq->dl);
260 }
261 
262 /*
263  * The utilization of a task cannot be immediately removed from
264  * the rq active utilization (running_bw) when the task blocks.
265  * Instead, we have to wait for the so called "0-lag time".
266  *
267  * If a task blocks before the "0-lag time", a timer (the inactive
268  * timer) is armed, and running_bw is decreased when the timer
269  * fires.
270  *
271  * If the task wakes up again before the inactive timer fires,
272  * the timer is canceled, whereas if the task wakes up after the
273  * inactive timer fired (and running_bw has been decreased) the
274  * task's utilization has to be added to running_bw again.
275  * A flag in the deadline scheduling entity (dl_non_contending)
276  * is used to avoid race conditions between the inactive timer handler
277  * and task wakeups.
278  *
279  * The following diagram shows how running_bw is updated. A task is
280  * "ACTIVE" when its utilization contributes to running_bw; an
281  * "ACTIVE contending" task is in the TASK_RUNNING state, while an
282  * "ACTIVE non contending" task is a blocked task for which the "0-lag time"
283  * has not passed yet. An "INACTIVE" task is a task for which the "0-lag"
284  * time already passed, which does not contribute to running_bw anymore.
285  *                              +------------------+
286  *             wakeup           |    ACTIVE        |
287  *          +------------------>+   contending     |
288  *          | add_running_bw    |                  |
289  *          |                   +----+------+------+
290  *          |                        |      ^
291  *          |                dequeue |      |
292  * +--------+-------+                |      |
293  * |                |   t >= 0-lag   |      | wakeup
294  * |    INACTIVE    |<---------------+      |
295  * |                | sub_running_bw |      |
296  * +--------+-------+                |      |
297  *          ^                        |      |
298  *          |              t < 0-lag |      |
299  *          |                        |      |
300  *          |                        V      |
301  *          |                   +----+------+------+
302  *          | sub_running_bw    |    ACTIVE        |
303  *          +-------------------+                  |
304  *            inactive timer    |  non contending  |
305  *            fired             +------------------+
306  *
307  * The task_non_contending() function is invoked when a task
308  * blocks, and checks if the 0-lag time already passed or
309  * not (in the first case, it directly updates running_bw;
310  * in the second case, it arms the inactive timer).
311  *
312  * The task_contending() function is invoked when a task wakes
313  * up, and checks if the task is still in the "ACTIVE non contending"
314  * state or not (in the second case, it updates running_bw).
315  */
task_non_contending(struct task_struct * p)316 static void task_non_contending(struct task_struct *p)
317 {
318 	struct sched_dl_entity *dl_se = &p->dl;
319 	struct hrtimer *timer = &dl_se->inactive_timer;
320 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
321 	struct rq *rq = rq_of_dl_rq(dl_rq);
322 	s64 zerolag_time;
323 
324 	/*
325 	 * If this is a non-deadline task that has been boosted,
326 	 * do nothing
327 	 */
328 	if (dl_se->dl_runtime == 0)
329 		return;
330 
331 	if (dl_entity_is_special(dl_se))
332 		return;
333 
334 	WARN_ON(dl_se->dl_non_contending);
335 
336 	zerolag_time = dl_se->deadline -
337 		 div64_long((dl_se->runtime * dl_se->dl_period),
338 			dl_se->dl_runtime);
339 
340 	/*
341 	 * Using relative times instead of the absolute "0-lag time"
342 	 * allows to simplify the code
343 	 */
344 	zerolag_time -= rq_clock(rq);
345 
346 	/*
347 	 * If the "0-lag time" already passed, decrease the active
348 	 * utilization now, instead of starting a timer
349 	 */
350 	if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) {
351 		if (dl_task(p))
352 			sub_running_bw(dl_se, dl_rq);
353 		if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) {
354 			struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
355 
356 			if (READ_ONCE(p->__state) == TASK_DEAD)
357 				sub_rq_bw(&p->dl, &rq->dl);
358 			raw_spin_lock(&dl_b->lock);
359 			__dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
360 			__dl_clear_params(p);
361 			raw_spin_unlock(&dl_b->lock);
362 		}
363 
364 		return;
365 	}
366 
367 	dl_se->dl_non_contending = 1;
368 	get_task_struct(p);
369 	hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL_HARD);
370 }
371 
task_contending(struct sched_dl_entity * dl_se,int flags)372 static void task_contending(struct sched_dl_entity *dl_se, int flags)
373 {
374 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
375 
376 	/*
377 	 * If this is a non-deadline task that has been boosted,
378 	 * do nothing
379 	 */
380 	if (dl_se->dl_runtime == 0)
381 		return;
382 
383 	if (flags & ENQUEUE_MIGRATED)
384 		add_rq_bw(dl_se, dl_rq);
385 
386 	if (dl_se->dl_non_contending) {
387 		dl_se->dl_non_contending = 0;
388 		/*
389 		 * If the timer handler is currently running and the
390 		 * timer cannot be canceled, inactive_task_timer()
391 		 * will see that dl_not_contending is not set, and
392 		 * will not touch the rq's active utilization,
393 		 * so we are still safe.
394 		 */
395 		if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1)
396 			put_task_struct(dl_task_of(dl_se));
397 	} else {
398 		/*
399 		 * Since "dl_non_contending" is not set, the
400 		 * task's utilization has already been removed from
401 		 * active utilization (either when the task blocked,
402 		 * when the "inactive timer" fired).
403 		 * So, add it back.
404 		 */
405 		add_running_bw(dl_se, dl_rq);
406 	}
407 }
408 
is_leftmost(struct task_struct * p,struct dl_rq * dl_rq)409 static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
410 {
411 	struct sched_dl_entity *dl_se = &p->dl;
412 
413 	return dl_rq->root.rb_leftmost == &dl_se->rb_node;
414 }
415 
416 static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
417 
init_dl_bandwidth(struct dl_bandwidth * dl_b,u64 period,u64 runtime)418 void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
419 {
420 	raw_spin_lock_init(&dl_b->dl_runtime_lock);
421 	dl_b->dl_period = period;
422 	dl_b->dl_runtime = runtime;
423 }
424 
init_dl_bw(struct dl_bw * dl_b)425 void init_dl_bw(struct dl_bw *dl_b)
426 {
427 	raw_spin_lock_init(&dl_b->lock);
428 	raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock);
429 	if (global_rt_runtime() == RUNTIME_INF)
430 		dl_b->bw = -1;
431 	else
432 		dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());
433 	raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock);
434 	dl_b->total_bw = 0;
435 }
436 
init_dl_rq(struct dl_rq * dl_rq)437 void init_dl_rq(struct dl_rq *dl_rq)
438 {
439 	dl_rq->root = RB_ROOT_CACHED;
440 
441 #ifdef CONFIG_SMP
442 	/* zero means no -deadline tasks */
443 	dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0;
444 
445 	dl_rq->dl_nr_migratory = 0;
446 	dl_rq->overloaded = 0;
447 	dl_rq->pushable_dl_tasks_root = RB_ROOT_CACHED;
448 #else
449 	init_dl_bw(&dl_rq->dl_bw);
450 #endif
451 
452 	dl_rq->running_bw = 0;
453 	dl_rq->this_bw = 0;
454 	init_dl_rq_bw_ratio(dl_rq);
455 }
456 
457 #ifdef CONFIG_SMP
458 
dl_overloaded(struct rq * rq)459 static inline int dl_overloaded(struct rq *rq)
460 {
461 	return atomic_read(&rq->rd->dlo_count);
462 }
463 
dl_set_overload(struct rq * rq)464 static inline void dl_set_overload(struct rq *rq)
465 {
466 	if (!rq->online)
467 		return;
468 
469 	cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask);
470 	/*
471 	 * Must be visible before the overload count is
472 	 * set (as in sched_rt.c).
473 	 *
474 	 * Matched by the barrier in pull_dl_task().
475 	 */
476 	smp_wmb();
477 	atomic_inc(&rq->rd->dlo_count);
478 }
479 
dl_clear_overload(struct rq * rq)480 static inline void dl_clear_overload(struct rq *rq)
481 {
482 	if (!rq->online)
483 		return;
484 
485 	atomic_dec(&rq->rd->dlo_count);
486 	cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask);
487 }
488 
update_dl_migration(struct dl_rq * dl_rq)489 static void update_dl_migration(struct dl_rq *dl_rq)
490 {
491 	if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) {
492 		if (!dl_rq->overloaded) {
493 			dl_set_overload(rq_of_dl_rq(dl_rq));
494 			dl_rq->overloaded = 1;
495 		}
496 	} else if (dl_rq->overloaded) {
497 		dl_clear_overload(rq_of_dl_rq(dl_rq));
498 		dl_rq->overloaded = 0;
499 	}
500 }
501 
inc_dl_migration(struct sched_dl_entity * dl_se,struct dl_rq * dl_rq)502 static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
503 {
504 	struct task_struct *p = dl_task_of(dl_se);
505 
506 	if (p->nr_cpus_allowed > 1)
507 		dl_rq->dl_nr_migratory++;
508 
509 	update_dl_migration(dl_rq);
510 }
511 
dec_dl_migration(struct sched_dl_entity * dl_se,struct dl_rq * dl_rq)512 static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
513 {
514 	struct task_struct *p = dl_task_of(dl_se);
515 
516 	if (p->nr_cpus_allowed > 1)
517 		dl_rq->dl_nr_migratory--;
518 
519 	update_dl_migration(dl_rq);
520 }
521 
522 #define __node_2_pdl(node) \
523 	rb_entry((node), struct task_struct, pushable_dl_tasks)
524 
__pushable_less(struct rb_node * a,const struct rb_node * b)525 static inline bool __pushable_less(struct rb_node *a, const struct rb_node *b)
526 {
527 	return dl_entity_preempt(&__node_2_pdl(a)->dl, &__node_2_pdl(b)->dl);
528 }
529 
530 /*
531  * The list of pushable -deadline task is not a plist, like in
532  * sched_rt.c, it is an rb-tree with tasks ordered by deadline.
533  */
enqueue_pushable_dl_task(struct rq * rq,struct task_struct * p)534 static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
535 {
536 	struct rb_node *leftmost;
537 
538 	BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
539 
540 	leftmost = rb_add_cached(&p->pushable_dl_tasks,
541 				 &rq->dl.pushable_dl_tasks_root,
542 				 __pushable_less);
543 	if (leftmost)
544 		rq->dl.earliest_dl.next = p->dl.deadline;
545 }
546 
dequeue_pushable_dl_task(struct rq * rq,struct task_struct * p)547 static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
548 {
549 	struct dl_rq *dl_rq = &rq->dl;
550 	struct rb_root_cached *root = &dl_rq->pushable_dl_tasks_root;
551 	struct rb_node *leftmost;
552 
553 	if (RB_EMPTY_NODE(&p->pushable_dl_tasks))
554 		return;
555 
556 	leftmost = rb_erase_cached(&p->pushable_dl_tasks, root);
557 	if (leftmost)
558 		dl_rq->earliest_dl.next = __node_2_pdl(leftmost)->dl.deadline;
559 
560 	RB_CLEAR_NODE(&p->pushable_dl_tasks);
561 }
562 
has_pushable_dl_tasks(struct rq * rq)563 static inline int has_pushable_dl_tasks(struct rq *rq)
564 {
565 	return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root.rb_root);
566 }
567 
568 static int push_dl_task(struct rq *rq);
569 
need_pull_dl_task(struct rq * rq,struct task_struct * prev)570 static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
571 {
572 	return rq->online && dl_task(prev);
573 }
574 
575 static DEFINE_PER_CPU(struct callback_head, dl_push_head);
576 static DEFINE_PER_CPU(struct callback_head, dl_pull_head);
577 
578 static void push_dl_tasks(struct rq *);
579 static void pull_dl_task(struct rq *);
580 
deadline_queue_push_tasks(struct rq * rq)581 static inline void deadline_queue_push_tasks(struct rq *rq)
582 {
583 	if (!has_pushable_dl_tasks(rq))
584 		return;
585 
586 	queue_balance_callback(rq, &per_cpu(dl_push_head, rq->cpu), push_dl_tasks);
587 }
588 
deadline_queue_pull_task(struct rq * rq)589 static inline void deadline_queue_pull_task(struct rq *rq)
590 {
591 	queue_balance_callback(rq, &per_cpu(dl_pull_head, rq->cpu), pull_dl_task);
592 }
593 
594 static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);
595 
dl_task_offline_migration(struct rq * rq,struct task_struct * p)596 static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p)
597 {
598 	struct rq *later_rq = NULL;
599 	struct dl_bw *dl_b;
600 
601 	later_rq = find_lock_later_rq(p, rq);
602 	if (!later_rq) {
603 		int cpu;
604 
605 		/*
606 		 * If we cannot preempt any rq, fall back to pick any
607 		 * online CPU:
608 		 */
609 		cpu = cpumask_any_and(cpu_active_mask, p->cpus_ptr);
610 		if (cpu >= nr_cpu_ids) {
611 			/*
612 			 * Failed to find any suitable CPU.
613 			 * The task will never come back!
614 			 */
615 			BUG_ON(dl_bandwidth_enabled());
616 
617 			/*
618 			 * If admission control is disabled we
619 			 * try a little harder to let the task
620 			 * run.
621 			 */
622 			cpu = cpumask_any(cpu_active_mask);
623 		}
624 		later_rq = cpu_rq(cpu);
625 		double_lock_balance(rq, later_rq);
626 	}
627 
628 	if (p->dl.dl_non_contending || p->dl.dl_throttled) {
629 		/*
630 		 * Inactive timer is armed (or callback is running, but
631 		 * waiting for us to release rq locks). In any case, when it
632 		 * will fire (or continue), it will see running_bw of this
633 		 * task migrated to later_rq (and correctly handle it).
634 		 */
635 		sub_running_bw(&p->dl, &rq->dl);
636 		sub_rq_bw(&p->dl, &rq->dl);
637 
638 		add_rq_bw(&p->dl, &later_rq->dl);
639 		add_running_bw(&p->dl, &later_rq->dl);
640 	} else {
641 		sub_rq_bw(&p->dl, &rq->dl);
642 		add_rq_bw(&p->dl, &later_rq->dl);
643 	}
644 
645 	/*
646 	 * And we finally need to fixup root_domain(s) bandwidth accounting,
647 	 * since p is still hanging out in the old (now moved to default) root
648 	 * domain.
649 	 */
650 	dl_b = &rq->rd->dl_bw;
651 	raw_spin_lock(&dl_b->lock);
652 	__dl_sub(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
653 	raw_spin_unlock(&dl_b->lock);
654 
655 	dl_b = &later_rq->rd->dl_bw;
656 	raw_spin_lock(&dl_b->lock);
657 	__dl_add(dl_b, p->dl.dl_bw, cpumask_weight(later_rq->rd->span));
658 	raw_spin_unlock(&dl_b->lock);
659 
660 	set_task_cpu(p, later_rq->cpu);
661 	double_unlock_balance(later_rq, rq);
662 
663 	return later_rq;
664 }
665 
666 #else
667 
668 static inline
enqueue_pushable_dl_task(struct rq * rq,struct task_struct * p)669 void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
670 {
671 }
672 
673 static inline
dequeue_pushable_dl_task(struct rq * rq,struct task_struct * p)674 void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
675 {
676 }
677 
678 static inline
inc_dl_migration(struct sched_dl_entity * dl_se,struct dl_rq * dl_rq)679 void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
680 {
681 }
682 
683 static inline
dec_dl_migration(struct sched_dl_entity * dl_se,struct dl_rq * dl_rq)684 void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
685 {
686 }
687 
need_pull_dl_task(struct rq * rq,struct task_struct * prev)688 static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
689 {
690 	return false;
691 }
692 
pull_dl_task(struct rq * rq)693 static inline void pull_dl_task(struct rq *rq)
694 {
695 }
696 
deadline_queue_push_tasks(struct rq * rq)697 static inline void deadline_queue_push_tasks(struct rq *rq)
698 {
699 }
700 
deadline_queue_pull_task(struct rq * rq)701 static inline void deadline_queue_pull_task(struct rq *rq)
702 {
703 }
704 #endif /* CONFIG_SMP */
705 
706 static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags);
707 static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags);
708 static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, int flags);
709 
710 /*
711  * We are being explicitly informed that a new instance is starting,
712  * and this means that:
713  *  - the absolute deadline of the entity has to be placed at
714  *    current time + relative deadline;
715  *  - the runtime of the entity has to be set to the maximum value.
716  *
717  * The capability of specifying such event is useful whenever a -deadline
718  * entity wants to (try to!) synchronize its behaviour with the scheduler's
719  * one, and to (try to!) reconcile itself with its own scheduling
720  * parameters.
721  */
setup_new_dl_entity(struct sched_dl_entity * dl_se)722 static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
723 {
724 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
725 	struct rq *rq = rq_of_dl_rq(dl_rq);
726 
727 	WARN_ON(is_dl_boosted(dl_se));
728 	WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
729 
730 	/*
731 	 * We are racing with the deadline timer. So, do nothing because
732 	 * the deadline timer handler will take care of properly recharging
733 	 * the runtime and postponing the deadline
734 	 */
735 	if (dl_se->dl_throttled)
736 		return;
737 
738 	/*
739 	 * We use the regular wall clock time to set deadlines in the
740 	 * future; in fact, we must consider execution overheads (time
741 	 * spent on hardirq context, etc.).
742 	 */
743 	dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline;
744 	dl_se->runtime = dl_se->dl_runtime;
745 }
746 
747 /*
748  * Pure Earliest Deadline First (EDF) scheduling does not deal with the
749  * possibility of a entity lasting more than what it declared, and thus
750  * exhausting its runtime.
751  *
752  * Here we are interested in making runtime overrun possible, but we do
753  * not want a entity which is misbehaving to affect the scheduling of all
754  * other entities.
755  * Therefore, a budgeting strategy called Constant Bandwidth Server (CBS)
756  * is used, in order to confine each entity within its own bandwidth.
757  *
758  * This function deals exactly with that, and ensures that when the runtime
759  * of a entity is replenished, its deadline is also postponed. That ensures
760  * the overrunning entity can't interfere with other entity in the system and
761  * can't make them miss their deadlines. Reasons why this kind of overruns
762  * could happen are, typically, a entity voluntarily trying to overcome its
763  * runtime, or it just underestimated it during sched_setattr().
764  */
replenish_dl_entity(struct sched_dl_entity * dl_se)765 static void replenish_dl_entity(struct sched_dl_entity *dl_se)
766 {
767 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
768 	struct rq *rq = rq_of_dl_rq(dl_rq);
769 
770 	BUG_ON(pi_of(dl_se)->dl_runtime <= 0);
771 
772 	/*
773 	 * This could be the case for a !-dl task that is boosted.
774 	 * Just go with full inherited parameters.
775 	 */
776 	if (dl_se->dl_deadline == 0) {
777 		dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
778 		dl_se->runtime = pi_of(dl_se)->dl_runtime;
779 	}
780 
781 	if (dl_se->dl_yielded && dl_se->runtime > 0)
782 		dl_se->runtime = 0;
783 
784 	/*
785 	 * We keep moving the deadline away until we get some
786 	 * available runtime for the entity. This ensures correct
787 	 * handling of situations where the runtime overrun is
788 	 * arbitrary large.
789 	 */
790 	while (dl_se->runtime <= 0) {
791 		dl_se->deadline += pi_of(dl_se)->dl_period;
792 		dl_se->runtime += pi_of(dl_se)->dl_runtime;
793 	}
794 
795 	/*
796 	 * At this point, the deadline really should be "in
797 	 * the future" with respect to rq->clock. If it's
798 	 * not, we are, for some reason, lagging too much!
799 	 * Anyway, after having warn userspace abut that,
800 	 * we still try to keep the things running by
801 	 * resetting the deadline and the budget of the
802 	 * entity.
803 	 */
804 	if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
805 		printk_deferred_once("sched: DL replenish lagged too much\n");
806 		dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
807 		dl_se->runtime = pi_of(dl_se)->dl_runtime;
808 	}
809 
810 	if (dl_se->dl_yielded)
811 		dl_se->dl_yielded = 0;
812 	if (dl_se->dl_throttled)
813 		dl_se->dl_throttled = 0;
814 }
815 
816 /*
817  * Here we check if --at time t-- an entity (which is probably being
818  * [re]activated or, in general, enqueued) can use its remaining runtime
819  * and its current deadline _without_ exceeding the bandwidth it is
820  * assigned (function returns true if it can't). We are in fact applying
821  * one of the CBS rules: when a task wakes up, if the residual runtime
822  * over residual deadline fits within the allocated bandwidth, then we
823  * can keep the current (absolute) deadline and residual budget without
824  * disrupting the schedulability of the system. Otherwise, we should
825  * refill the runtime and set the deadline a period in the future,
826  * because keeping the current (absolute) deadline of the task would
827  * result in breaking guarantees promised to other tasks (refer to
828  * Documentation/scheduler/sched-deadline.rst for more information).
829  *
830  * This function returns true if:
831  *
832  *   runtime / (deadline - t) > dl_runtime / dl_deadline ,
833  *
834  * IOW we can't recycle current parameters.
835  *
836  * Notice that the bandwidth check is done against the deadline. For
837  * task with deadline equal to period this is the same of using
838  * dl_period instead of dl_deadline in the equation above.
839  */
dl_entity_overflow(struct sched_dl_entity * dl_se,u64 t)840 static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)
841 {
842 	u64 left, right;
843 
844 	/*
845 	 * left and right are the two sides of the equation above,
846 	 * after a bit of shuffling to use multiplications instead
847 	 * of divisions.
848 	 *
849 	 * Note that none of the time values involved in the two
850 	 * multiplications are absolute: dl_deadline and dl_runtime
851 	 * are the relative deadline and the maximum runtime of each
852 	 * instance, runtime is the runtime left for the last instance
853 	 * and (deadline - t), since t is rq->clock, is the time left
854 	 * to the (absolute) deadline. Even if overflowing the u64 type
855 	 * is very unlikely to occur in both cases, here we scale down
856 	 * as we want to avoid that risk at all. Scaling down by 10
857 	 * means that we reduce granularity to 1us. We are fine with it,
858 	 * since this is only a true/false check and, anyway, thinking
859 	 * of anything below microseconds resolution is actually fiction
860 	 * (but still we want to give the user that illusion >;).
861 	 */
862 	left = (pi_of(dl_se)->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
863 	right = ((dl_se->deadline - t) >> DL_SCALE) *
864 		(pi_of(dl_se)->dl_runtime >> DL_SCALE);
865 
866 	return dl_time_before(right, left);
867 }
868 
869 /*
870  * Revised wakeup rule [1]: For self-suspending tasks, rather then
871  * re-initializing task's runtime and deadline, the revised wakeup
872  * rule adjusts the task's runtime to avoid the task to overrun its
873  * density.
874  *
875  * Reasoning: a task may overrun the density if:
876  *    runtime / (deadline - t) > dl_runtime / dl_deadline
877  *
878  * Therefore, runtime can be adjusted to:
879  *     runtime = (dl_runtime / dl_deadline) * (deadline - t)
880  *
881  * In such way that runtime will be equal to the maximum density
882  * the task can use without breaking any rule.
883  *
884  * [1] Luca Abeni, Giuseppe Lipari, and Juri Lelli. 2015. Constant
885  * bandwidth server revisited. SIGBED Rev. 11, 4 (January 2015), 19-24.
886  */
887 static void
update_dl_revised_wakeup(struct sched_dl_entity * dl_se,struct rq * rq)888 update_dl_revised_wakeup(struct sched_dl_entity *dl_se, struct rq *rq)
889 {
890 	u64 laxity = dl_se->deadline - rq_clock(rq);
891 
892 	/*
893 	 * If the task has deadline < period, and the deadline is in the past,
894 	 * it should already be throttled before this check.
895 	 *
896 	 * See update_dl_entity() comments for further details.
897 	 */
898 	WARN_ON(dl_time_before(dl_se->deadline, rq_clock(rq)));
899 
900 	dl_se->runtime = (dl_se->dl_density * laxity) >> BW_SHIFT;
901 }
902 
903 /*
904  * Regarding the deadline, a task with implicit deadline has a relative
905  * deadline == relative period. A task with constrained deadline has a
906  * relative deadline <= relative period.
907  *
908  * We support constrained deadline tasks. However, there are some restrictions
909  * applied only for tasks which do not have an implicit deadline. See
910  * update_dl_entity() to know more about such restrictions.
911  *
912  * The dl_is_implicit() returns true if the task has an implicit deadline.
913  */
dl_is_implicit(struct sched_dl_entity * dl_se)914 static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
915 {
916 	return dl_se->dl_deadline == dl_se->dl_period;
917 }
918 
919 /*
920  * When a deadline entity is placed in the runqueue, its runtime and deadline
921  * might need to be updated. This is done by a CBS wake up rule. There are two
922  * different rules: 1) the original CBS; and 2) the Revisited CBS.
923  *
924  * When the task is starting a new period, the Original CBS is used. In this
925  * case, the runtime is replenished and a new absolute deadline is set.
926  *
927  * When a task is queued before the begin of the next period, using the
928  * remaining runtime and deadline could make the entity to overflow, see
929  * dl_entity_overflow() to find more about runtime overflow. When such case
930  * is detected, the runtime and deadline need to be updated.
931  *
932  * If the task has an implicit deadline, i.e., deadline == period, the Original
933  * CBS is applied. the runtime is replenished and a new absolute deadline is
934  * set, as in the previous cases.
935  *
936  * However, the Original CBS does not work properly for tasks with
937  * deadline < period, which are said to have a constrained deadline. By
938  * applying the Original CBS, a constrained deadline task would be able to run
939  * runtime/deadline in a period. With deadline < period, the task would
940  * overrun the runtime/period allowed bandwidth, breaking the admission test.
941  *
942  * In order to prevent this misbehave, the Revisited CBS is used for
943  * constrained deadline tasks when a runtime overflow is detected. In the
944  * Revisited CBS, rather than replenishing & setting a new absolute deadline,
945  * the remaining runtime of the task is reduced to avoid runtime overflow.
946  * Please refer to the comments update_dl_revised_wakeup() function to find
947  * more about the Revised CBS rule.
948  */
update_dl_entity(struct sched_dl_entity * dl_se)949 static void update_dl_entity(struct sched_dl_entity *dl_se)
950 {
951 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
952 	struct rq *rq = rq_of_dl_rq(dl_rq);
953 
954 	if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
955 	    dl_entity_overflow(dl_se, rq_clock(rq))) {
956 
957 		if (unlikely(!dl_is_implicit(dl_se) &&
958 			     !dl_time_before(dl_se->deadline, rq_clock(rq)) &&
959 			     !is_dl_boosted(dl_se))) {
960 			update_dl_revised_wakeup(dl_se, rq);
961 			return;
962 		}
963 
964 		dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
965 		dl_se->runtime = pi_of(dl_se)->dl_runtime;
966 	}
967 }
968 
dl_next_period(struct sched_dl_entity * dl_se)969 static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
970 {
971 	return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period;
972 }
973 
974 /*
975  * If the entity depleted all its runtime, and if we want it to sleep
976  * while waiting for some new execution time to become available, we
977  * set the bandwidth replenishment timer to the replenishment instant
978  * and try to activate it.
979  *
980  * Notice that it is important for the caller to know if the timer
981  * actually started or not (i.e., the replenishment instant is in
982  * the future or in the past).
983  */
start_dl_timer(struct task_struct * p)984 static int start_dl_timer(struct task_struct *p)
985 {
986 	struct sched_dl_entity *dl_se = &p->dl;
987 	struct hrtimer *timer = &dl_se->dl_timer;
988 	struct rq *rq = task_rq(p);
989 	ktime_t now, act;
990 	s64 delta;
991 
992 	lockdep_assert_rq_held(rq);
993 
994 	/*
995 	 * We want the timer to fire at the deadline, but considering
996 	 * that it is actually coming from rq->clock and not from
997 	 * hrtimer's time base reading.
998 	 */
999 	act = ns_to_ktime(dl_next_period(dl_se));
1000 	now = hrtimer_cb_get_time(timer);
1001 	delta = ktime_to_ns(now) - rq_clock(rq);
1002 	act = ktime_add_ns(act, delta);
1003 
1004 	/*
1005 	 * If the expiry time already passed, e.g., because the value
1006 	 * chosen as the deadline is too small, don't even try to
1007 	 * start the timer in the past!
1008 	 */
1009 	if (ktime_us_delta(act, now) < 0)
1010 		return 0;
1011 
1012 	/*
1013 	 * !enqueued will guarantee another callback; even if one is already in
1014 	 * progress. This ensures a balanced {get,put}_task_struct().
1015 	 *
1016 	 * The race against __run_timer() clearing the enqueued state is
1017 	 * harmless because we're holding task_rq()->lock, therefore the timer
1018 	 * expiring after we've done the check will wait on its task_rq_lock()
1019 	 * and observe our state.
1020 	 */
1021 	if (!hrtimer_is_queued(timer)) {
1022 		get_task_struct(p);
1023 		hrtimer_start(timer, act, HRTIMER_MODE_ABS_HARD);
1024 	}
1025 
1026 	return 1;
1027 }
1028 
1029 /*
1030  * This is the bandwidth enforcement timer callback. If here, we know
1031  * a task is not on its dl_rq, since the fact that the timer was running
1032  * means the task is throttled and needs a runtime replenishment.
1033  *
1034  * However, what we actually do depends on the fact the task is active,
1035  * (it is on its rq) or has been removed from there by a call to
1036  * dequeue_task_dl(). In the former case we must issue the runtime
1037  * replenishment and add the task back to the dl_rq; in the latter, we just
1038  * do nothing but clearing dl_throttled, so that runtime and deadline
1039  * updating (and the queueing back to dl_rq) will be done by the
1040  * next call to enqueue_task_dl().
1041  */
dl_task_timer(struct hrtimer * timer)1042 static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
1043 {
1044 	struct sched_dl_entity *dl_se = container_of(timer,
1045 						     struct sched_dl_entity,
1046 						     dl_timer);
1047 	struct task_struct *p = dl_task_of(dl_se);
1048 	struct rq_flags rf;
1049 	struct rq *rq;
1050 
1051 	rq = task_rq_lock(p, &rf);
1052 
1053 	/*
1054 	 * The task might have changed its scheduling policy to something
1055 	 * different than SCHED_DEADLINE (through switched_from_dl()).
1056 	 */
1057 	if (!dl_task(p))
1058 		goto unlock;
1059 
1060 	/*
1061 	 * The task might have been boosted by someone else and might be in the
1062 	 * boosting/deboosting path, its not throttled.
1063 	 */
1064 	if (is_dl_boosted(dl_se))
1065 		goto unlock;
1066 
1067 	/*
1068 	 * Spurious timer due to start_dl_timer() race; or we already received
1069 	 * a replenishment from rt_mutex_setprio().
1070 	 */
1071 	if (!dl_se->dl_throttled)
1072 		goto unlock;
1073 
1074 	sched_clock_tick();
1075 	update_rq_clock(rq);
1076 
1077 	/*
1078 	 * If the throttle happened during sched-out; like:
1079 	 *
1080 	 *   schedule()
1081 	 *     deactivate_task()
1082 	 *       dequeue_task_dl()
1083 	 *         update_curr_dl()
1084 	 *           start_dl_timer()
1085 	 *         __dequeue_task_dl()
1086 	 *     prev->on_rq = 0;
1087 	 *
1088 	 * We can be both throttled and !queued. Replenish the counter
1089 	 * but do not enqueue -- wait for our wakeup to do that.
1090 	 */
1091 	if (!task_on_rq_queued(p)) {
1092 		replenish_dl_entity(dl_se);
1093 		goto unlock;
1094 	}
1095 
1096 #ifdef CONFIG_SMP
1097 	if (unlikely(!rq->online)) {
1098 		/*
1099 		 * If the runqueue is no longer available, migrate the
1100 		 * task elsewhere. This necessarily changes rq.
1101 		 */
1102 		lockdep_unpin_lock(__rq_lockp(rq), rf.cookie);
1103 		rq = dl_task_offline_migration(rq, p);
1104 		rf.cookie = lockdep_pin_lock(__rq_lockp(rq));
1105 		update_rq_clock(rq);
1106 
1107 		/*
1108 		 * Now that the task has been migrated to the new RQ and we
1109 		 * have that locked, proceed as normal and enqueue the task
1110 		 * there.
1111 		 */
1112 	}
1113 #endif
1114 
1115 	enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
1116 	if (dl_task(rq->curr))
1117 		check_preempt_curr_dl(rq, p, 0);
1118 	else
1119 		resched_curr(rq);
1120 
1121 #ifdef CONFIG_SMP
1122 	/*
1123 	 * Queueing this task back might have overloaded rq, check if we need
1124 	 * to kick someone away.
1125 	 */
1126 	if (has_pushable_dl_tasks(rq)) {
1127 		/*
1128 		 * Nothing relies on rq->lock after this, so its safe to drop
1129 		 * rq->lock.
1130 		 */
1131 		rq_unpin_lock(rq, &rf);
1132 		push_dl_task(rq);
1133 		rq_repin_lock(rq, &rf);
1134 	}
1135 #endif
1136 
1137 unlock:
1138 	task_rq_unlock(rq, p, &rf);
1139 
1140 	/*
1141 	 * This can free the task_struct, including this hrtimer, do not touch
1142 	 * anything related to that after this.
1143 	 */
1144 	put_task_struct(p);
1145 
1146 	return HRTIMER_NORESTART;
1147 }
1148 
init_dl_task_timer(struct sched_dl_entity * dl_se)1149 void init_dl_task_timer(struct sched_dl_entity *dl_se)
1150 {
1151 	struct hrtimer *timer = &dl_se->dl_timer;
1152 
1153 	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
1154 	timer->function = dl_task_timer;
1155 }
1156 
1157 /*
1158  * During the activation, CBS checks if it can reuse the current task's
1159  * runtime and period. If the deadline of the task is in the past, CBS
1160  * cannot use the runtime, and so it replenishes the task. This rule
1161  * works fine for implicit deadline tasks (deadline == period), and the
1162  * CBS was designed for implicit deadline tasks. However, a task with
1163  * constrained deadline (deadline < period) might be awakened after the
1164  * deadline, but before the next period. In this case, replenishing the
1165  * task would allow it to run for runtime / deadline. As in this case
1166  * deadline < period, CBS enables a task to run for more than the
1167  * runtime / period. In a very loaded system, this can cause a domino
1168  * effect, making other tasks miss their deadlines.
1169  *
1170  * To avoid this problem, in the activation of a constrained deadline
1171  * task after the deadline but before the next period, throttle the
1172  * task and set the replenishing timer to the begin of the next period,
1173  * unless it is boosted.
1174  */
dl_check_constrained_dl(struct sched_dl_entity * dl_se)1175 static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
1176 {
1177 	struct task_struct *p = dl_task_of(dl_se);
1178 	struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
1179 
1180 	if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
1181 	    dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
1182 		if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(p)))
1183 			return;
1184 		dl_se->dl_throttled = 1;
1185 		if (dl_se->runtime > 0)
1186 			dl_se->runtime = 0;
1187 	}
1188 }
1189 
1190 static
dl_runtime_exceeded(struct sched_dl_entity * dl_se)1191 int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
1192 {
1193 	return (dl_se->runtime <= 0);
1194 }
1195 
1196 extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
1197 
1198 /*
1199  * This function implements the GRUB accounting rule:
1200  * according to the GRUB reclaiming algorithm, the runtime is
1201  * not decreased as "dq = -dt", but as
1202  * "dq = -max{u / Umax, (1 - Uinact - Uextra)} dt",
1203  * where u is the utilization of the task, Umax is the maximum reclaimable
1204  * utilization, Uinact is the (per-runqueue) inactive utilization, computed
1205  * as the difference between the "total runqueue utilization" and the
1206  * runqueue active utilization, and Uextra is the (per runqueue) extra
1207  * reclaimable utilization.
1208  * Since rq->dl.running_bw and rq->dl.this_bw contain utilizations
1209  * multiplied by 2^BW_SHIFT, the result has to be shifted right by
1210  * BW_SHIFT.
1211  * Since rq->dl.bw_ratio contains 1 / Umax multiplied by 2^RATIO_SHIFT,
1212  * dl_bw is multiped by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
1213  * Since delta is a 64 bit variable, to have an overflow its value
1214  * should be larger than 2^(64 - 20 - 8), which is more than 64 seconds.
1215  * So, overflow is not an issue here.
1216  */
grub_reclaim(u64 delta,struct rq * rq,struct sched_dl_entity * dl_se)1217 static u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
1218 {
1219 	u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */
1220 	u64 u_act;
1221 	u64 u_act_min = (dl_se->dl_bw * rq->dl.bw_ratio) >> RATIO_SHIFT;
1222 
1223 	/*
1224 	 * Instead of computing max{u * bw_ratio, (1 - u_inact - u_extra)},
1225 	 * we compare u_inact + rq->dl.extra_bw with
1226 	 * 1 - (u * rq->dl.bw_ratio >> RATIO_SHIFT), because
1227 	 * u_inact + rq->dl.extra_bw can be larger than
1228 	 * 1 * (so, 1 - u_inact - rq->dl.extra_bw would be negative
1229 	 * leading to wrong results)
1230 	 */
1231 	if (u_inact + rq->dl.extra_bw > BW_UNIT - u_act_min)
1232 		u_act = u_act_min;
1233 	else
1234 		u_act = BW_UNIT - u_inact - rq->dl.extra_bw;
1235 
1236 	return (delta * u_act) >> BW_SHIFT;
1237 }
1238 
1239 /*
1240  * Update the current task's runtime statistics (provided it is still
1241  * a -deadline task and has not been removed from the dl_rq).
1242  */
update_curr_dl(struct rq * rq)1243 static void update_curr_dl(struct rq *rq)
1244 {
1245 	struct task_struct *curr = rq->curr;
1246 	struct sched_dl_entity *dl_se = &curr->dl;
1247 	u64 delta_exec, scaled_delta_exec;
1248 	int cpu = cpu_of(rq);
1249 	u64 now;
1250 
1251 	if (!dl_task(curr) || !on_dl_rq(dl_se))
1252 		return;
1253 
1254 	/*
1255 	 * Consumed budget is computed considering the time as
1256 	 * observed by schedulable tasks (excluding time spent
1257 	 * in hardirq context, etc.). Deadlines are instead
1258 	 * computed using hard walltime. This seems to be the more
1259 	 * natural solution, but the full ramifications of this
1260 	 * approach need further study.
1261 	 */
1262 	now = rq_clock_task(rq);
1263 	delta_exec = now - curr->se.exec_start;
1264 	if (unlikely((s64)delta_exec <= 0)) {
1265 		if (unlikely(dl_se->dl_yielded))
1266 			goto throttle;
1267 		return;
1268 	}
1269 
1270 	schedstat_set(curr->stats.exec_max,
1271 		      max(curr->stats.exec_max, delta_exec));
1272 
1273 	curr->se.sum_exec_runtime += delta_exec;
1274 	account_group_exec_runtime(curr, delta_exec);
1275 
1276 	curr->se.exec_start = now;
1277 	cgroup_account_cputime(curr, delta_exec);
1278 
1279 	if (dl_entity_is_special(dl_se))
1280 		return;
1281 
1282 	/*
1283 	 * For tasks that participate in GRUB, we implement GRUB-PA: the
1284 	 * spare reclaimed bandwidth is used to clock down frequency.
1285 	 *
1286 	 * For the others, we still need to scale reservation parameters
1287 	 * according to current frequency and CPU maximum capacity.
1288 	 */
1289 	if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM)) {
1290 		scaled_delta_exec = grub_reclaim(delta_exec,
1291 						 rq,
1292 						 &curr->dl);
1293 	} else {
1294 		unsigned long scale_freq = arch_scale_freq_capacity(cpu);
1295 		unsigned long scale_cpu = arch_scale_cpu_capacity(cpu);
1296 
1297 		scaled_delta_exec = cap_scale(delta_exec, scale_freq);
1298 		scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu);
1299 	}
1300 
1301 	dl_se->runtime -= scaled_delta_exec;
1302 
1303 throttle:
1304 	if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) {
1305 		dl_se->dl_throttled = 1;
1306 
1307 		/* If requested, inform the user about runtime overruns. */
1308 		if (dl_runtime_exceeded(dl_se) &&
1309 		    (dl_se->flags & SCHED_FLAG_DL_OVERRUN))
1310 			dl_se->dl_overrun = 1;
1311 
1312 		__dequeue_task_dl(rq, curr, 0);
1313 		if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(curr)))
1314 			enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
1315 
1316 		if (!is_leftmost(curr, &rq->dl))
1317 			resched_curr(rq);
1318 	}
1319 
1320 	/*
1321 	 * Because -- for now -- we share the rt bandwidth, we need to
1322 	 * account our runtime there too, otherwise actual rt tasks
1323 	 * would be able to exceed the shared quota.
1324 	 *
1325 	 * Account to the root rt group for now.
1326 	 *
1327 	 * The solution we're working towards is having the RT groups scheduled
1328 	 * using deadline servers -- however there's a few nasties to figure
1329 	 * out before that can happen.
1330 	 */
1331 	if (rt_bandwidth_enabled()) {
1332 		struct rt_rq *rt_rq = &rq->rt;
1333 
1334 		raw_spin_lock(&rt_rq->rt_runtime_lock);
1335 		/*
1336 		 * We'll let actual RT tasks worry about the overflow here, we
1337 		 * have our own CBS to keep us inline; only account when RT
1338 		 * bandwidth is relevant.
1339 		 */
1340 		if (sched_rt_bandwidth_account(rt_rq))
1341 			rt_rq->rt_time += delta_exec;
1342 		raw_spin_unlock(&rt_rq->rt_runtime_lock);
1343 	}
1344 }
1345 
inactive_task_timer(struct hrtimer * timer)1346 static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
1347 {
1348 	struct sched_dl_entity *dl_se = container_of(timer,
1349 						     struct sched_dl_entity,
1350 						     inactive_timer);
1351 	struct task_struct *p = dl_task_of(dl_se);
1352 	struct rq_flags rf;
1353 	struct rq *rq;
1354 
1355 	rq = task_rq_lock(p, &rf);
1356 
1357 	sched_clock_tick();
1358 	update_rq_clock(rq);
1359 
1360 	if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) {
1361 		struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
1362 
1363 		if (READ_ONCE(p->__state) == TASK_DEAD && dl_se->dl_non_contending) {
1364 			sub_running_bw(&p->dl, dl_rq_of_se(&p->dl));
1365 			sub_rq_bw(&p->dl, dl_rq_of_se(&p->dl));
1366 			dl_se->dl_non_contending = 0;
1367 		}
1368 
1369 		raw_spin_lock(&dl_b->lock);
1370 		__dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
1371 		raw_spin_unlock(&dl_b->lock);
1372 		__dl_clear_params(p);
1373 
1374 		goto unlock;
1375 	}
1376 	if (dl_se->dl_non_contending == 0)
1377 		goto unlock;
1378 
1379 	sub_running_bw(dl_se, &rq->dl);
1380 	dl_se->dl_non_contending = 0;
1381 unlock:
1382 	task_rq_unlock(rq, p, &rf);
1383 	put_task_struct(p);
1384 
1385 	return HRTIMER_NORESTART;
1386 }
1387 
init_dl_inactive_task_timer(struct sched_dl_entity * dl_se)1388 void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
1389 {
1390 	struct hrtimer *timer = &dl_se->inactive_timer;
1391 
1392 	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
1393 	timer->function = inactive_task_timer;
1394 }
1395 
1396 #ifdef CONFIG_SMP
1397 
inc_dl_deadline(struct dl_rq * dl_rq,u64 deadline)1398 static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
1399 {
1400 	struct rq *rq = rq_of_dl_rq(dl_rq);
1401 
1402 	if (dl_rq->earliest_dl.curr == 0 ||
1403 	    dl_time_before(deadline, dl_rq->earliest_dl.curr)) {
1404 		if (dl_rq->earliest_dl.curr == 0)
1405 			cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_HIGHER);
1406 		dl_rq->earliest_dl.curr = deadline;
1407 		cpudl_set(&rq->rd->cpudl, rq->cpu, deadline);
1408 	}
1409 }
1410 
dec_dl_deadline(struct dl_rq * dl_rq,u64 deadline)1411 static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
1412 {
1413 	struct rq *rq = rq_of_dl_rq(dl_rq);
1414 
1415 	/*
1416 	 * Since we may have removed our earliest (and/or next earliest)
1417 	 * task we must recompute them.
1418 	 */
1419 	if (!dl_rq->dl_nr_running) {
1420 		dl_rq->earliest_dl.curr = 0;
1421 		dl_rq->earliest_dl.next = 0;
1422 		cpudl_clear(&rq->rd->cpudl, rq->cpu);
1423 		cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
1424 	} else {
1425 		struct rb_node *leftmost = dl_rq->root.rb_leftmost;
1426 		struct sched_dl_entity *entry;
1427 
1428 		entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
1429 		dl_rq->earliest_dl.curr = entry->deadline;
1430 		cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline);
1431 	}
1432 }
1433 
1434 #else
1435 
inc_dl_deadline(struct dl_rq * dl_rq,u64 deadline)1436 static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
dec_dl_deadline(struct dl_rq * dl_rq,u64 deadline)1437 static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
1438 
1439 #endif /* CONFIG_SMP */
1440 
1441 static inline
inc_dl_tasks(struct sched_dl_entity * dl_se,struct dl_rq * dl_rq)1442 void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
1443 {
1444 	int prio = dl_task_of(dl_se)->prio;
1445 	u64 deadline = dl_se->deadline;
1446 
1447 	WARN_ON(!dl_prio(prio));
1448 	dl_rq->dl_nr_running++;
1449 	add_nr_running(rq_of_dl_rq(dl_rq), 1);
1450 
1451 	inc_dl_deadline(dl_rq, deadline);
1452 	inc_dl_migration(dl_se, dl_rq);
1453 }
1454 
1455 static inline
dec_dl_tasks(struct sched_dl_entity * dl_se,struct dl_rq * dl_rq)1456 void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
1457 {
1458 	int prio = dl_task_of(dl_se)->prio;
1459 
1460 	WARN_ON(!dl_prio(prio));
1461 	WARN_ON(!dl_rq->dl_nr_running);
1462 	dl_rq->dl_nr_running--;
1463 	sub_nr_running(rq_of_dl_rq(dl_rq), 1);
1464 
1465 	dec_dl_deadline(dl_rq, dl_se->deadline);
1466 	dec_dl_migration(dl_se, dl_rq);
1467 }
1468 
1469 #define __node_2_dle(node) \
1470 	rb_entry((node), struct sched_dl_entity, rb_node)
1471 
__dl_less(struct rb_node * a,const struct rb_node * b)1472 static inline bool __dl_less(struct rb_node *a, const struct rb_node *b)
1473 {
1474 	return dl_time_before(__node_2_dle(a)->deadline, __node_2_dle(b)->deadline);
1475 }
1476 
__enqueue_dl_entity(struct sched_dl_entity * dl_se)1477 static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
1478 {
1479 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
1480 
1481 	BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node));
1482 
1483 	rb_add_cached(&dl_se->rb_node, &dl_rq->root, __dl_less);
1484 
1485 	inc_dl_tasks(dl_se, dl_rq);
1486 }
1487 
__dequeue_dl_entity(struct sched_dl_entity * dl_se)1488 static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
1489 {
1490 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
1491 
1492 	if (RB_EMPTY_NODE(&dl_se->rb_node))
1493 		return;
1494 
1495 	rb_erase_cached(&dl_se->rb_node, &dl_rq->root);
1496 
1497 	RB_CLEAR_NODE(&dl_se->rb_node);
1498 
1499 	dec_dl_tasks(dl_se, dl_rq);
1500 }
1501 
1502 static void
enqueue_dl_entity(struct sched_dl_entity * dl_se,int flags)1503 enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
1504 {
1505 	BUG_ON(on_dl_rq(dl_se));
1506 
1507 	/*
1508 	 * If this is a wakeup or a new instance, the scheduling
1509 	 * parameters of the task might need updating. Otherwise,
1510 	 * we want a replenishment of its runtime.
1511 	 */
1512 	if (flags & ENQUEUE_WAKEUP) {
1513 		task_contending(dl_se, flags);
1514 		update_dl_entity(dl_se);
1515 	} else if (flags & ENQUEUE_REPLENISH) {
1516 		replenish_dl_entity(dl_se);
1517 	} else if ((flags & ENQUEUE_RESTORE) &&
1518 		  dl_time_before(dl_se->deadline,
1519 				 rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
1520 		setup_new_dl_entity(dl_se);
1521 	}
1522 
1523 	__enqueue_dl_entity(dl_se);
1524 }
1525 
dequeue_dl_entity(struct sched_dl_entity * dl_se)1526 static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
1527 {
1528 	__dequeue_dl_entity(dl_se);
1529 }
1530 
enqueue_task_dl(struct rq * rq,struct task_struct * p,int flags)1531 static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
1532 {
1533 	if (is_dl_boosted(&p->dl)) {
1534 		/*
1535 		 * Because of delays in the detection of the overrun of a
1536 		 * thread's runtime, it might be the case that a thread
1537 		 * goes to sleep in a rt mutex with negative runtime. As
1538 		 * a consequence, the thread will be throttled.
1539 		 *
1540 		 * While waiting for the mutex, this thread can also be
1541 		 * boosted via PI, resulting in a thread that is throttled
1542 		 * and boosted at the same time.
1543 		 *
1544 		 * In this case, the boost overrides the throttle.
1545 		 */
1546 		if (p->dl.dl_throttled) {
1547 			/*
1548 			 * The replenish timer needs to be canceled. No
1549 			 * problem if it fires concurrently: boosted threads
1550 			 * are ignored in dl_task_timer().
1551 			 */
1552 			hrtimer_try_to_cancel(&p->dl.dl_timer);
1553 			p->dl.dl_throttled = 0;
1554 		}
1555 	} else if (!dl_prio(p->normal_prio)) {
1556 		/*
1557 		 * Special case in which we have a !SCHED_DEADLINE task that is going
1558 		 * to be deboosted, but exceeds its runtime while doing so. No point in
1559 		 * replenishing it, as it's going to return back to its original
1560 		 * scheduling class after this. If it has been throttled, we need to
1561 		 * clear the flag, otherwise the task may wake up as throttled after
1562 		 * being boosted again with no means to replenish the runtime and clear
1563 		 * the throttle.
1564 		 */
1565 		p->dl.dl_throttled = 0;
1566 		if (!(flags & ENQUEUE_REPLENISH))
1567 			printk_deferred_once("sched: DL de-boosted task PID %d: REPLENISH flag missing\n",
1568 					     task_pid_nr(p));
1569 
1570 		return;
1571 	}
1572 
1573 	/*
1574 	 * Check if a constrained deadline task was activated
1575 	 * after the deadline but before the next period.
1576 	 * If that is the case, the task will be throttled and
1577 	 * the replenishment timer will be set to the next period.
1578 	 */
1579 	if (!p->dl.dl_throttled && !dl_is_implicit(&p->dl))
1580 		dl_check_constrained_dl(&p->dl);
1581 
1582 	if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) {
1583 		add_rq_bw(&p->dl, &rq->dl);
1584 		add_running_bw(&p->dl, &rq->dl);
1585 	}
1586 
1587 	/*
1588 	 * If p is throttled, we do not enqueue it. In fact, if it exhausted
1589 	 * its budget it needs a replenishment and, since it now is on
1590 	 * its rq, the bandwidth timer callback (which clearly has not
1591 	 * run yet) will take care of this.
1592 	 * However, the active utilization does not depend on the fact
1593 	 * that the task is on the runqueue or not (but depends on the
1594 	 * task's state - in GRUB parlance, "inactive" vs "active contending").
1595 	 * In other words, even if a task is throttled its utilization must
1596 	 * be counted in the active utilization; hence, we need to call
1597 	 * add_running_bw().
1598 	 */
1599 	if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
1600 		if (flags & ENQUEUE_WAKEUP)
1601 			task_contending(&p->dl, flags);
1602 
1603 		return;
1604 	}
1605 
1606 	enqueue_dl_entity(&p->dl, flags);
1607 
1608 	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
1609 		enqueue_pushable_dl_task(rq, p);
1610 }
1611 
__dequeue_task_dl(struct rq * rq,struct task_struct * p,int flags)1612 static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
1613 {
1614 	dequeue_dl_entity(&p->dl);
1615 	dequeue_pushable_dl_task(rq, p);
1616 }
1617 
dequeue_task_dl(struct rq * rq,struct task_struct * p,int flags)1618 static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
1619 {
1620 	update_curr_dl(rq);
1621 	__dequeue_task_dl(rq, p, flags);
1622 
1623 	if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) {
1624 		sub_running_bw(&p->dl, &rq->dl);
1625 		sub_rq_bw(&p->dl, &rq->dl);
1626 	}
1627 
1628 	/*
1629 	 * This check allows to start the inactive timer (or to immediately
1630 	 * decrease the active utilization, if needed) in two cases:
1631 	 * when the task blocks and when it is terminating
1632 	 * (p->state == TASK_DEAD). We can handle the two cases in the same
1633 	 * way, because from GRUB's point of view the same thing is happening
1634 	 * (the task moves from "active contending" to "active non contending"
1635 	 * or "inactive")
1636 	 */
1637 	if (flags & DEQUEUE_SLEEP)
1638 		task_non_contending(p);
1639 }
1640 
1641 /*
1642  * Yield task semantic for -deadline tasks is:
1643  *
1644  *   get off from the CPU until our next instance, with
1645  *   a new runtime. This is of little use now, since we
1646  *   don't have a bandwidth reclaiming mechanism. Anyway,
1647  *   bandwidth reclaiming is planned for the future, and
1648  *   yield_task_dl will indicate that some spare budget
1649  *   is available for other task instances to use it.
1650  */
yield_task_dl(struct rq * rq)1651 static void yield_task_dl(struct rq *rq)
1652 {
1653 	/*
1654 	 * We make the task go to sleep until its current deadline by
1655 	 * forcing its runtime to zero. This way, update_curr_dl() stops
1656 	 * it and the bandwidth timer will wake it up and will give it
1657 	 * new scheduling parameters (thanks to dl_yielded=1).
1658 	 */
1659 	rq->curr->dl.dl_yielded = 1;
1660 
1661 	update_rq_clock(rq);
1662 	update_curr_dl(rq);
1663 	/*
1664 	 * Tell update_rq_clock() that we've just updated,
1665 	 * so we don't do microscopic update in schedule()
1666 	 * and double the fastpath cost.
1667 	 */
1668 	rq_clock_skip_update(rq);
1669 }
1670 
1671 #ifdef CONFIG_SMP
1672 
1673 static int find_later_rq(struct task_struct *task);
1674 
1675 static int
select_task_rq_dl(struct task_struct * p,int cpu,int flags)1676 select_task_rq_dl(struct task_struct *p, int cpu, int flags)
1677 {
1678 	struct task_struct *curr;
1679 	bool select_rq;
1680 	struct rq *rq;
1681 	int target_cpu = -1;
1682 
1683 	trace_android_rvh_select_task_rq_dl(p, cpu, flags & 0xF,
1684 						flags, &target_cpu);
1685 	if (target_cpu >= 0)
1686 		return target_cpu;
1687 
1688 	if (!(flags & WF_TTWU))
1689 		goto out;
1690 
1691 	rq = cpu_rq(cpu);
1692 
1693 	rcu_read_lock();
1694 	curr = READ_ONCE(rq->curr); /* unlocked access */
1695 
1696 	/*
1697 	 * If we are dealing with a -deadline task, we must
1698 	 * decide where to wake it up.
1699 	 * If it has a later deadline and the current task
1700 	 * on this rq can't move (provided the waking task
1701 	 * can!) we prefer to send it somewhere else. On the
1702 	 * other hand, if it has a shorter deadline, we
1703 	 * try to make it stay here, it might be important.
1704 	 */
1705 	select_rq = unlikely(dl_task(curr)) &&
1706 		    (curr->nr_cpus_allowed < 2 ||
1707 		     !dl_entity_preempt(&p->dl, &curr->dl)) &&
1708 		    p->nr_cpus_allowed > 1;
1709 
1710 	/*
1711 	 * Take the capacity of the CPU into account to
1712 	 * ensure it fits the requirement of the task.
1713 	 */
1714 	if (sched_asym_cpucap_active())
1715 		select_rq |= !dl_task_fits_capacity(p, cpu);
1716 
1717 	if (select_rq) {
1718 		int target = find_later_rq(p);
1719 
1720 		if (target != -1 &&
1721 				(dl_time_before(p->dl.deadline,
1722 					cpu_rq(target)->dl.earliest_dl.curr) ||
1723 				(cpu_rq(target)->dl.dl_nr_running == 0)))
1724 			cpu = target;
1725 	}
1726 	rcu_read_unlock();
1727 
1728 out:
1729 	return cpu;
1730 }
1731 
migrate_task_rq_dl(struct task_struct * p,int new_cpu __maybe_unused)1732 static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused)
1733 {
1734 	struct rq_flags rf;
1735 	struct rq *rq;
1736 
1737 	if (READ_ONCE(p->__state) != TASK_WAKING)
1738 		return;
1739 
1740 	rq = task_rq(p);
1741 	/*
1742 	 * Since p->state == TASK_WAKING, set_task_cpu() has been called
1743 	 * from try_to_wake_up(). Hence, p->pi_lock is locked, but
1744 	 * rq->lock is not... So, lock it
1745 	 */
1746 	rq_lock(rq, &rf);
1747 	if (p->dl.dl_non_contending) {
1748 		update_rq_clock(rq);
1749 		sub_running_bw(&p->dl, &rq->dl);
1750 		p->dl.dl_non_contending = 0;
1751 		/*
1752 		 * If the timer handler is currently running and the
1753 		 * timer cannot be canceled, inactive_task_timer()
1754 		 * will see that dl_not_contending is not set, and
1755 		 * will not touch the rq's active utilization,
1756 		 * so we are still safe.
1757 		 */
1758 		if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
1759 			put_task_struct(p);
1760 	}
1761 	sub_rq_bw(&p->dl, &rq->dl);
1762 	rq_unlock(rq, &rf);
1763 }
1764 
check_preempt_equal_dl(struct rq * rq,struct task_struct * p)1765 static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
1766 {
1767 	/*
1768 	 * Current can't be migrated, useless to reschedule,
1769 	 * let's hope p can move out.
1770 	 */
1771 	if (rq->curr->nr_cpus_allowed == 1 ||
1772 	    !cpudl_find(&rq->rd->cpudl, rq->curr, NULL))
1773 		return;
1774 
1775 	/*
1776 	 * p is migratable, so let's not schedule it and
1777 	 * see if it is pushed or pulled somewhere else.
1778 	 */
1779 	if (p->nr_cpus_allowed != 1 &&
1780 	    cpudl_find(&rq->rd->cpudl, p, NULL))
1781 		return;
1782 
1783 	resched_curr(rq);
1784 }
1785 
balance_dl(struct rq * rq,struct task_struct * p,struct rq_flags * rf)1786 static int balance_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
1787 {
1788 	if (!on_dl_rq(&p->dl) && need_pull_dl_task(rq, p)) {
1789 		/*
1790 		 * This is OK, because current is on_cpu, which avoids it being
1791 		 * picked for load-balance and preemption/IRQs are still
1792 		 * disabled avoiding further scheduler activity on it and we've
1793 		 * not yet started the picking loop.
1794 		 */
1795 		rq_unpin_lock(rq, rf);
1796 		pull_dl_task(rq);
1797 		rq_repin_lock(rq, rf);
1798 	}
1799 
1800 	return sched_stop_runnable(rq) || sched_dl_runnable(rq);
1801 }
1802 #endif /* CONFIG_SMP */
1803 
1804 /*
1805  * Only called when both the current and waking task are -deadline
1806  * tasks.
1807  */
check_preempt_curr_dl(struct rq * rq,struct task_struct * p,int flags)1808 static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
1809 				  int flags)
1810 {
1811 	if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {
1812 		resched_curr(rq);
1813 		return;
1814 	}
1815 
1816 #ifdef CONFIG_SMP
1817 	/*
1818 	 * In the unlikely case current and p have the same deadline
1819 	 * let us try to decide what's the best thing to do...
1820 	 */
1821 	if ((p->dl.deadline == rq->curr->dl.deadline) &&
1822 	    !test_tsk_need_resched(rq->curr))
1823 		check_preempt_equal_dl(rq, p);
1824 #endif /* CONFIG_SMP */
1825 }
1826 
1827 #ifdef CONFIG_SCHED_HRTICK
start_hrtick_dl(struct rq * rq,struct task_struct * p)1828 static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
1829 {
1830 	hrtick_start(rq, p->dl.runtime);
1831 }
1832 #else /* !CONFIG_SCHED_HRTICK */
start_hrtick_dl(struct rq * rq,struct task_struct * p)1833 static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
1834 {
1835 }
1836 #endif
1837 
set_next_task_dl(struct rq * rq,struct task_struct * p,bool first)1838 static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first)
1839 {
1840 	p->se.exec_start = rq_clock_task(rq);
1841 
1842 	/* You can't push away the running task */
1843 	dequeue_pushable_dl_task(rq, p);
1844 
1845 	if (!first)
1846 		return;
1847 
1848 	if (hrtick_enabled_dl(rq))
1849 		start_hrtick_dl(rq, p);
1850 
1851 	if (rq->curr->sched_class != &dl_sched_class)
1852 		update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
1853 
1854 	deadline_queue_push_tasks(rq);
1855 }
1856 
pick_next_dl_entity(struct dl_rq * dl_rq)1857 static struct sched_dl_entity *pick_next_dl_entity(struct dl_rq *dl_rq)
1858 {
1859 	struct rb_node *left = rb_first_cached(&dl_rq->root);
1860 
1861 	if (!left)
1862 		return NULL;
1863 
1864 	return rb_entry(left, struct sched_dl_entity, rb_node);
1865 }
1866 
pick_task_dl(struct rq * rq)1867 static struct task_struct *pick_task_dl(struct rq *rq)
1868 {
1869 	struct sched_dl_entity *dl_se;
1870 	struct dl_rq *dl_rq = &rq->dl;
1871 	struct task_struct *p;
1872 
1873 	if (!sched_dl_runnable(rq))
1874 		return NULL;
1875 
1876 	dl_se = pick_next_dl_entity(dl_rq);
1877 	BUG_ON(!dl_se);
1878 	p = dl_task_of(dl_se);
1879 
1880 	return p;
1881 }
1882 
pick_next_task_dl(struct rq * rq)1883 static struct task_struct *pick_next_task_dl(struct rq *rq)
1884 {
1885 	struct task_struct *p;
1886 
1887 	p = pick_task_dl(rq);
1888 	if (p)
1889 		set_next_task_dl(rq, p, true);
1890 
1891 	return p;
1892 }
1893 
put_prev_task_dl(struct rq * rq,struct task_struct * p)1894 static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
1895 {
1896 	update_curr_dl(rq);
1897 
1898 	update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1);
1899 	if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
1900 		enqueue_pushable_dl_task(rq, p);
1901 }
1902 
1903 /*
1904  * scheduler tick hitting a task of our scheduling class.
1905  *
1906  * NOTE: This function can be called remotely by the tick offload that
1907  * goes along full dynticks. Therefore no local assumption can be made
1908  * and everything must be accessed through the @rq and @curr passed in
1909  * parameters.
1910  */
task_tick_dl(struct rq * rq,struct task_struct * p,int queued)1911 static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued)
1912 {
1913 	update_curr_dl(rq);
1914 
1915 	update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1);
1916 	/*
1917 	 * Even when we have runtime, update_curr_dl() might have resulted in us
1918 	 * not being the leftmost task anymore. In that case NEED_RESCHED will
1919 	 * be set and schedule() will start a new hrtick for the next task.
1920 	 */
1921 	if (hrtick_enabled_dl(rq) && queued && p->dl.runtime > 0 &&
1922 	    is_leftmost(p, &rq->dl))
1923 		start_hrtick_dl(rq, p);
1924 }
1925 
task_fork_dl(struct task_struct * p)1926 static void task_fork_dl(struct task_struct *p)
1927 {
1928 	/*
1929 	 * SCHED_DEADLINE tasks cannot fork and this is achieved through
1930 	 * sched_fork()
1931 	 */
1932 }
1933 
1934 #ifdef CONFIG_SMP
1935 
1936 /* Only try algorithms three times */
1937 #define DL_MAX_TRIES 3
1938 
pick_dl_task(struct rq * rq,struct task_struct * p,int cpu)1939 static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
1940 {
1941 	if (!task_running(rq, p) &&
1942 	    cpumask_test_cpu(cpu, &p->cpus_mask))
1943 		return 1;
1944 	return 0;
1945 }
1946 
1947 /*
1948  * Return the earliest pushable rq's task, which is suitable to be executed
1949  * on the CPU, NULL otherwise:
1950  */
pick_earliest_pushable_dl_task(struct rq * rq,int cpu)1951 static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu)
1952 {
1953 	struct rb_node *next_node = rq->dl.pushable_dl_tasks_root.rb_leftmost;
1954 	struct task_struct *p = NULL;
1955 
1956 	if (!has_pushable_dl_tasks(rq))
1957 		return NULL;
1958 
1959 next_node:
1960 	if (next_node) {
1961 		p = rb_entry(next_node, struct task_struct, pushable_dl_tasks);
1962 
1963 		if (pick_dl_task(rq, p, cpu))
1964 			return p;
1965 
1966 		next_node = rb_next(next_node);
1967 		goto next_node;
1968 	}
1969 
1970 	return NULL;
1971 }
1972 
1973 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
1974 
find_later_rq(struct task_struct * task)1975 static int find_later_rq(struct task_struct *task)
1976 {
1977 	struct sched_domain *sd;
1978 	struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
1979 	int this_cpu = smp_processor_id();
1980 	int cpu = task_cpu(task);
1981 
1982 	/* Make sure the mask is initialized first */
1983 	if (unlikely(!later_mask))
1984 		return -1;
1985 
1986 	if (task->nr_cpus_allowed == 1)
1987 		return -1;
1988 
1989 	/*
1990 	 * We have to consider system topology and task affinity
1991 	 * first, then we can look for a suitable CPU.
1992 	 */
1993 	if (!cpudl_find(&task_rq(task)->rd->cpudl, task, later_mask))
1994 		return -1;
1995 
1996 	/*
1997 	 * If we are here, some targets have been found, including
1998 	 * the most suitable which is, among the runqueues where the
1999 	 * current tasks have later deadlines than the task's one, the
2000 	 * rq with the latest possible one.
2001 	 *
2002 	 * Now we check how well this matches with task's
2003 	 * affinity and system topology.
2004 	 *
2005 	 * The last CPU where the task run is our first
2006 	 * guess, since it is most likely cache-hot there.
2007 	 */
2008 	if (cpumask_test_cpu(cpu, later_mask))
2009 		return cpu;
2010 	/*
2011 	 * Check if this_cpu is to be skipped (i.e., it is
2012 	 * not in the mask) or not.
2013 	 */
2014 	if (!cpumask_test_cpu(this_cpu, later_mask))
2015 		this_cpu = -1;
2016 
2017 	rcu_read_lock();
2018 	for_each_domain(cpu, sd) {
2019 		if (sd->flags & SD_WAKE_AFFINE) {
2020 			int best_cpu;
2021 
2022 			/*
2023 			 * If possible, preempting this_cpu is
2024 			 * cheaper than migrating.
2025 			 */
2026 			if (this_cpu != -1 &&
2027 			    cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
2028 				rcu_read_unlock();
2029 				return this_cpu;
2030 			}
2031 
2032 			best_cpu = cpumask_any_and_distribute(later_mask,
2033 							      sched_domain_span(sd));
2034 			/*
2035 			 * Last chance: if a CPU being in both later_mask
2036 			 * and current sd span is valid, that becomes our
2037 			 * choice. Of course, the latest possible CPU is
2038 			 * already under consideration through later_mask.
2039 			 */
2040 			if (best_cpu < nr_cpu_ids) {
2041 				rcu_read_unlock();
2042 				return best_cpu;
2043 			}
2044 		}
2045 	}
2046 	rcu_read_unlock();
2047 
2048 	/*
2049 	 * At this point, all our guesses failed, we just return
2050 	 * 'something', and let the caller sort the things out.
2051 	 */
2052 	if (this_cpu != -1)
2053 		return this_cpu;
2054 
2055 	cpu = cpumask_any_distribute(later_mask);
2056 	if (cpu < nr_cpu_ids)
2057 		return cpu;
2058 
2059 	return -1;
2060 }
2061 
2062 /* Locks the rq it finds */
find_lock_later_rq(struct task_struct * task,struct rq * rq)2063 static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
2064 {
2065 	struct rq *later_rq = NULL;
2066 	int tries;
2067 	int cpu;
2068 
2069 	for (tries = 0; tries < DL_MAX_TRIES; tries++) {
2070 		cpu = find_later_rq(task);
2071 
2072 		if ((cpu == -1) || (cpu == rq->cpu))
2073 			break;
2074 
2075 		later_rq = cpu_rq(cpu);
2076 
2077 		if (later_rq->dl.dl_nr_running &&
2078 		    !dl_time_before(task->dl.deadline,
2079 					later_rq->dl.earliest_dl.curr)) {
2080 			/*
2081 			 * Target rq has tasks of equal or earlier deadline,
2082 			 * retrying does not release any lock and is unlikely
2083 			 * to yield a different result.
2084 			 */
2085 			later_rq = NULL;
2086 			break;
2087 		}
2088 
2089 		/* Retry if something changed. */
2090 		if (double_lock_balance(rq, later_rq)) {
2091 			if (unlikely(task_rq(task) != rq ||
2092 				     !cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
2093 				     task_running(rq, task) ||
2094 				     !dl_task(task) ||
2095 				     is_migration_disabled(task) ||
2096 				     !task_on_rq_queued(task))) {
2097 				double_unlock_balance(rq, later_rq);
2098 				later_rq = NULL;
2099 				break;
2100 			}
2101 		}
2102 
2103 		/*
2104 		 * If the rq we found has no -deadline task, or
2105 		 * its earliest one has a later deadline than our
2106 		 * task, the rq is a good one.
2107 		 */
2108 		if (!later_rq->dl.dl_nr_running ||
2109 		    dl_time_before(task->dl.deadline,
2110 				   later_rq->dl.earliest_dl.curr))
2111 			break;
2112 
2113 		/* Otherwise we try again. */
2114 		double_unlock_balance(rq, later_rq);
2115 		later_rq = NULL;
2116 	}
2117 
2118 	return later_rq;
2119 }
2120 
pick_next_pushable_dl_task(struct rq * rq)2121 static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
2122 {
2123 	struct task_struct *p;
2124 
2125 	if (!has_pushable_dl_tasks(rq))
2126 		return NULL;
2127 
2128 	p = rb_entry(rq->dl.pushable_dl_tasks_root.rb_leftmost,
2129 		     struct task_struct, pushable_dl_tasks);
2130 
2131 	BUG_ON(rq->cpu != task_cpu(p));
2132 	BUG_ON(task_current(rq, p));
2133 	BUG_ON(p->nr_cpus_allowed <= 1);
2134 
2135 	BUG_ON(!task_on_rq_queued(p));
2136 	BUG_ON(!dl_task(p));
2137 
2138 	return p;
2139 }
2140 
2141 /*
2142  * See if the non running -deadline tasks on this rq
2143  * can be sent to some other CPU where they can preempt
2144  * and start executing.
2145  */
push_dl_task(struct rq * rq)2146 static int push_dl_task(struct rq *rq)
2147 {
2148 	struct task_struct *next_task;
2149 	struct rq *later_rq;
2150 	int ret = 0;
2151 
2152 	if (!rq->dl.overloaded)
2153 		return 0;
2154 
2155 	next_task = pick_next_pushable_dl_task(rq);
2156 	if (!next_task)
2157 		return 0;
2158 
2159 retry:
2160 	/*
2161 	 * If next_task preempts rq->curr, and rq->curr
2162 	 * can move away, it makes sense to just reschedule
2163 	 * without going further in pushing next_task.
2164 	 */
2165 	if (dl_task(rq->curr) &&
2166 	    dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
2167 	    rq->curr->nr_cpus_allowed > 1) {
2168 		resched_curr(rq);
2169 		return 0;
2170 	}
2171 
2172 	if (is_migration_disabled(next_task))
2173 		return 0;
2174 
2175 	if (WARN_ON(next_task == rq->curr))
2176 		return 0;
2177 
2178 	/* We might release rq lock */
2179 	get_task_struct(next_task);
2180 
2181 	/* Will lock the rq it'll find */
2182 	later_rq = find_lock_later_rq(next_task, rq);
2183 	if (!later_rq) {
2184 		struct task_struct *task;
2185 
2186 		/*
2187 		 * We must check all this again, since
2188 		 * find_lock_later_rq releases rq->lock and it is
2189 		 * then possible that next_task has migrated.
2190 		 */
2191 		task = pick_next_pushable_dl_task(rq);
2192 		if (task == next_task) {
2193 			/*
2194 			 * The task is still there. We don't try
2195 			 * again, some other CPU will pull it when ready.
2196 			 */
2197 			goto out;
2198 		}
2199 
2200 		if (!task)
2201 			/* No more tasks */
2202 			goto out;
2203 
2204 		put_task_struct(next_task);
2205 		next_task = task;
2206 		goto retry;
2207 	}
2208 
2209 	deactivate_task(rq, next_task, 0);
2210 	set_task_cpu(next_task, later_rq->cpu);
2211 
2212 	/*
2213 	 * Update the later_rq clock here, because the clock is used
2214 	 * by the cpufreq_update_util() inside __add_running_bw().
2215 	 */
2216 	update_rq_clock(later_rq);
2217 	activate_task(later_rq, next_task, ENQUEUE_NOCLOCK);
2218 	ret = 1;
2219 
2220 	resched_curr(later_rq);
2221 
2222 	double_unlock_balance(rq, later_rq);
2223 
2224 out:
2225 	put_task_struct(next_task);
2226 
2227 	return ret;
2228 }
2229 
push_dl_tasks(struct rq * rq)2230 static void push_dl_tasks(struct rq *rq)
2231 {
2232 	/* push_dl_task() will return true if it moved a -deadline task */
2233 	while (push_dl_task(rq))
2234 		;
2235 }
2236 
pull_dl_task(struct rq * this_rq)2237 static void pull_dl_task(struct rq *this_rq)
2238 {
2239 	int this_cpu = this_rq->cpu, cpu;
2240 	struct task_struct *p, *push_task;
2241 	bool resched = false;
2242 	struct rq *src_rq;
2243 	u64 dmin = LONG_MAX;
2244 
2245 	if (likely(!dl_overloaded(this_rq)))
2246 		return;
2247 
2248 	/*
2249 	 * Match the barrier from dl_set_overloaded; this guarantees that if we
2250 	 * see overloaded we must also see the dlo_mask bit.
2251 	 */
2252 	smp_rmb();
2253 
2254 	for_each_cpu(cpu, this_rq->rd->dlo_mask) {
2255 		if (this_cpu == cpu)
2256 			continue;
2257 
2258 		src_rq = cpu_rq(cpu);
2259 
2260 		/*
2261 		 * It looks racy, abd it is! However, as in sched_rt.c,
2262 		 * we are fine with this.
2263 		 */
2264 		if (this_rq->dl.dl_nr_running &&
2265 		    dl_time_before(this_rq->dl.earliest_dl.curr,
2266 				   src_rq->dl.earliest_dl.next))
2267 			continue;
2268 
2269 		/* Might drop this_rq->lock */
2270 		push_task = NULL;
2271 		double_lock_balance(this_rq, src_rq);
2272 
2273 		/*
2274 		 * If there are no more pullable tasks on the
2275 		 * rq, we're done with it.
2276 		 */
2277 		if (src_rq->dl.dl_nr_running <= 1)
2278 			goto skip;
2279 
2280 		p = pick_earliest_pushable_dl_task(src_rq, this_cpu);
2281 
2282 		/*
2283 		 * We found a task to be pulled if:
2284 		 *  - it preempts our current (if there's one),
2285 		 *  - it will preempt the last one we pulled (if any).
2286 		 */
2287 		if (p && dl_time_before(p->dl.deadline, dmin) &&
2288 		    (!this_rq->dl.dl_nr_running ||
2289 		     dl_time_before(p->dl.deadline,
2290 				    this_rq->dl.earliest_dl.curr))) {
2291 			WARN_ON(p == src_rq->curr);
2292 			WARN_ON(!task_on_rq_queued(p));
2293 
2294 			/*
2295 			 * Then we pull iff p has actually an earlier
2296 			 * deadline than the current task of its runqueue.
2297 			 */
2298 			if (dl_time_before(p->dl.deadline,
2299 					   src_rq->curr->dl.deadline))
2300 				goto skip;
2301 
2302 			if (is_migration_disabled(p)) {
2303 				push_task = get_push_task(src_rq);
2304 			} else {
2305 				deactivate_task(src_rq, p, 0);
2306 				set_task_cpu(p, this_cpu);
2307 				activate_task(this_rq, p, 0);
2308 				dmin = p->dl.deadline;
2309 				resched = true;
2310 			}
2311 
2312 			/* Is there any other task even earlier? */
2313 		}
2314 skip:
2315 		double_unlock_balance(this_rq, src_rq);
2316 
2317 		if (push_task) {
2318 			preempt_disable();
2319 			raw_spin_rq_unlock(this_rq);
2320 			stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
2321 					    push_task, &src_rq->push_work);
2322 			preempt_enable();
2323 			raw_spin_rq_lock(this_rq);
2324 		}
2325 	}
2326 
2327 	if (resched)
2328 		resched_curr(this_rq);
2329 }
2330 
2331 /*
2332  * Since the task is not running and a reschedule is not going to happen
2333  * anytime soon on its runqueue, we try pushing it away now.
2334  */
task_woken_dl(struct rq * rq,struct task_struct * p)2335 static void task_woken_dl(struct rq *rq, struct task_struct *p)
2336 {
2337 	if (!task_running(rq, p) &&
2338 	    !test_tsk_need_resched(rq->curr) &&
2339 	    p->nr_cpus_allowed > 1 &&
2340 	    dl_task(rq->curr) &&
2341 	    (rq->curr->nr_cpus_allowed < 2 ||
2342 	     !dl_entity_preempt(&p->dl, &rq->curr->dl))) {
2343 		push_dl_tasks(rq);
2344 	}
2345 }
2346 
set_cpus_allowed_dl(struct task_struct * p,const struct cpumask * new_mask,u32 flags)2347 static void set_cpus_allowed_dl(struct task_struct *p,
2348 				const struct cpumask *new_mask,
2349 				u32 flags)
2350 {
2351 	struct root_domain *src_rd;
2352 	struct rq *rq;
2353 
2354 	BUG_ON(!dl_task(p));
2355 
2356 	rq = task_rq(p);
2357 	src_rd = rq->rd;
2358 	/*
2359 	 * Migrating a SCHED_DEADLINE task between exclusive
2360 	 * cpusets (different root_domains) entails a bandwidth
2361 	 * update. We already made space for us in the destination
2362 	 * domain (see cpuset_can_attach()).
2363 	 */
2364 	if (!cpumask_intersects(src_rd->span, new_mask)) {
2365 		struct dl_bw *src_dl_b;
2366 
2367 		src_dl_b = dl_bw_of(cpu_of(rq));
2368 		/*
2369 		 * We now free resources of the root_domain we are migrating
2370 		 * off. In the worst case, sched_setattr() may temporary fail
2371 		 * until we complete the update.
2372 		 */
2373 		raw_spin_lock(&src_dl_b->lock);
2374 		__dl_sub(src_dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
2375 		raw_spin_unlock(&src_dl_b->lock);
2376 	}
2377 
2378 	set_cpus_allowed_common(p, new_mask, flags);
2379 }
2380 
2381 /* Assumes rq->lock is held */
rq_online_dl(struct rq * rq)2382 static void rq_online_dl(struct rq *rq)
2383 {
2384 	if (rq->dl.overloaded)
2385 		dl_set_overload(rq);
2386 
2387 	cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
2388 	if (rq->dl.dl_nr_running > 0)
2389 		cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr);
2390 }
2391 
2392 /* Assumes rq->lock is held */
rq_offline_dl(struct rq * rq)2393 static void rq_offline_dl(struct rq *rq)
2394 {
2395 	if (rq->dl.overloaded)
2396 		dl_clear_overload(rq);
2397 
2398 	cpudl_clear(&rq->rd->cpudl, rq->cpu);
2399 	cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
2400 }
2401 
init_sched_dl_class(void)2402 void __init init_sched_dl_class(void)
2403 {
2404 	unsigned int i;
2405 
2406 	for_each_possible_cpu(i)
2407 		zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i),
2408 					GFP_KERNEL, cpu_to_node(i));
2409 }
2410 
dl_add_task_root_domain(struct task_struct * p)2411 void dl_add_task_root_domain(struct task_struct *p)
2412 {
2413 	struct rq_flags rf;
2414 	struct rq *rq;
2415 	struct dl_bw *dl_b;
2416 
2417 	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
2418 	if (!dl_task(p)) {
2419 		raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
2420 		return;
2421 	}
2422 
2423 	rq = __task_rq_lock(p, &rf);
2424 
2425 	dl_b = &rq->rd->dl_bw;
2426 	raw_spin_lock(&dl_b->lock);
2427 
2428 	__dl_add(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
2429 
2430 	raw_spin_unlock(&dl_b->lock);
2431 
2432 	task_rq_unlock(rq, p, &rf);
2433 }
2434 
dl_clear_root_domain(struct root_domain * rd)2435 void dl_clear_root_domain(struct root_domain *rd)
2436 {
2437 	unsigned long flags;
2438 
2439 	raw_spin_lock_irqsave(&rd->dl_bw.lock, flags);
2440 	rd->dl_bw.total_bw = 0;
2441 	raw_spin_unlock_irqrestore(&rd->dl_bw.lock, flags);
2442 }
2443 
2444 #endif /* CONFIG_SMP */
2445 
switched_from_dl(struct rq * rq,struct task_struct * p)2446 static void switched_from_dl(struct rq *rq, struct task_struct *p)
2447 {
2448 	/*
2449 	 * task_non_contending() can start the "inactive timer" (if the 0-lag
2450 	 * time is in the future). If the task switches back to dl before
2451 	 * the "inactive timer" fires, it can continue to consume its current
2452 	 * runtime using its current deadline. If it stays outside of
2453 	 * SCHED_DEADLINE until the 0-lag time passes, inactive_task_timer()
2454 	 * will reset the task parameters.
2455 	 */
2456 	if (task_on_rq_queued(p) && p->dl.dl_runtime)
2457 		task_non_contending(p);
2458 
2459 	/*
2460 	 * In case a task is setscheduled out from SCHED_DEADLINE we need to
2461 	 * keep track of that on its cpuset (for correct bandwidth tracking).
2462 	 */
2463 	dec_dl_tasks_cs(p);
2464 
2465 	if (!task_on_rq_queued(p)) {
2466 		/*
2467 		 * Inactive timer is armed. However, p is leaving DEADLINE and
2468 		 * might migrate away from this rq while continuing to run on
2469 		 * some other class. We need to remove its contribution from
2470 		 * this rq running_bw now, or sub_rq_bw (below) will complain.
2471 		 */
2472 		if (p->dl.dl_non_contending)
2473 			sub_running_bw(&p->dl, &rq->dl);
2474 		sub_rq_bw(&p->dl, &rq->dl);
2475 	}
2476 
2477 	/*
2478 	 * We cannot use inactive_task_timer() to invoke sub_running_bw()
2479 	 * at the 0-lag time, because the task could have been migrated
2480 	 * while SCHED_OTHER in the meanwhile.
2481 	 */
2482 	if (p->dl.dl_non_contending)
2483 		p->dl.dl_non_contending = 0;
2484 
2485 	/*
2486 	 * Since this might be the only -deadline task on the rq,
2487 	 * this is the right place to try to pull some other one
2488 	 * from an overloaded CPU, if any.
2489 	 */
2490 	if (!task_on_rq_queued(p) || rq->dl.dl_nr_running)
2491 		return;
2492 
2493 	deadline_queue_pull_task(rq);
2494 }
2495 
2496 /*
2497  * When switching to -deadline, we may overload the rq, then
2498  * we try to push someone off, if possible.
2499  */
switched_to_dl(struct rq * rq,struct task_struct * p)2500 static void switched_to_dl(struct rq *rq, struct task_struct *p)
2501 {
2502 	if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
2503 		put_task_struct(p);
2504 
2505 	/*
2506 	 * In case a task is setscheduled to SCHED_DEADLINE we need to keep
2507 	 * track of that on its cpuset (for correct bandwidth tracking).
2508 	 */
2509 	inc_dl_tasks_cs(p);
2510 
2511 	/* If p is not queued we will update its parameters at next wakeup. */
2512 	if (!task_on_rq_queued(p)) {
2513 		add_rq_bw(&p->dl, &rq->dl);
2514 
2515 		return;
2516 	}
2517 
2518 	if (rq->curr != p) {
2519 #ifdef CONFIG_SMP
2520 		if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
2521 			deadline_queue_push_tasks(rq);
2522 #endif
2523 		if (dl_task(rq->curr))
2524 			check_preempt_curr_dl(rq, p, 0);
2525 		else
2526 			resched_curr(rq);
2527 	} else {
2528 		update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
2529 	}
2530 }
2531 
2532 /*
2533  * If the scheduling parameters of a -deadline task changed,
2534  * a push or pull operation might be needed.
2535  */
prio_changed_dl(struct rq * rq,struct task_struct * p,int oldprio)2536 static void prio_changed_dl(struct rq *rq, struct task_struct *p,
2537 			    int oldprio)
2538 {
2539 	if (task_on_rq_queued(p) || task_current(rq, p)) {
2540 #ifdef CONFIG_SMP
2541 		/*
2542 		 * This might be too much, but unfortunately
2543 		 * we don't have the old deadline value, and
2544 		 * we can't argue if the task is increasing
2545 		 * or lowering its prio, so...
2546 		 */
2547 		if (!rq->dl.overloaded)
2548 			deadline_queue_pull_task(rq);
2549 
2550 		/*
2551 		 * If we now have a earlier deadline task than p,
2552 		 * then reschedule, provided p is still on this
2553 		 * runqueue.
2554 		 */
2555 		if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline))
2556 			resched_curr(rq);
2557 #else
2558 		/*
2559 		 * Again, we don't know if p has a earlier
2560 		 * or later deadline, so let's blindly set a
2561 		 * (maybe not needed) rescheduling point.
2562 		 */
2563 		resched_curr(rq);
2564 #endif /* CONFIG_SMP */
2565 	}
2566 }
2567 
2568 DEFINE_SCHED_CLASS(dl) = {
2569 
2570 	.enqueue_task		= enqueue_task_dl,
2571 	.dequeue_task		= dequeue_task_dl,
2572 	.yield_task		= yield_task_dl,
2573 
2574 	.check_preempt_curr	= check_preempt_curr_dl,
2575 
2576 	.pick_next_task		= pick_next_task_dl,
2577 	.put_prev_task		= put_prev_task_dl,
2578 	.set_next_task		= set_next_task_dl,
2579 
2580 #ifdef CONFIG_SMP
2581 	.balance		= balance_dl,
2582 	.pick_task		= pick_task_dl,
2583 	.select_task_rq		= select_task_rq_dl,
2584 	.migrate_task_rq	= migrate_task_rq_dl,
2585 	.set_cpus_allowed       = set_cpus_allowed_dl,
2586 	.rq_online              = rq_online_dl,
2587 	.rq_offline             = rq_offline_dl,
2588 	.task_woken		= task_woken_dl,
2589 	.find_lock_rq		= find_lock_later_rq,
2590 #endif
2591 
2592 	.task_tick		= task_tick_dl,
2593 	.task_fork              = task_fork_dl,
2594 
2595 	.prio_changed           = prio_changed_dl,
2596 	.switched_from		= switched_from_dl,
2597 	.switched_to		= switched_to_dl,
2598 
2599 	.update_curr		= update_curr_dl,
2600 };
2601 
2602 /* Used for dl_bw check and update, used under sched_rt_handler()::mutex */
2603 static u64 dl_generation;
2604 
sched_dl_global_validate(void)2605 int sched_dl_global_validate(void)
2606 {
2607 	u64 runtime = global_rt_runtime();
2608 	u64 period = global_rt_period();
2609 	u64 new_bw = to_ratio(period, runtime);
2610 	u64 gen = ++dl_generation;
2611 	struct dl_bw *dl_b;
2612 	int cpu, cpus, ret = 0;
2613 	unsigned long flags;
2614 
2615 	/*
2616 	 * Here we want to check the bandwidth not being set to some
2617 	 * value smaller than the currently allocated bandwidth in
2618 	 * any of the root_domains.
2619 	 */
2620 	for_each_possible_cpu(cpu) {
2621 		rcu_read_lock_sched();
2622 
2623 		if (dl_bw_visited(cpu, gen))
2624 			goto next;
2625 
2626 		dl_b = dl_bw_of(cpu);
2627 		cpus = dl_bw_cpus(cpu);
2628 
2629 		raw_spin_lock_irqsave(&dl_b->lock, flags);
2630 		if (new_bw * cpus < dl_b->total_bw)
2631 			ret = -EBUSY;
2632 		raw_spin_unlock_irqrestore(&dl_b->lock, flags);
2633 
2634 next:
2635 		rcu_read_unlock_sched();
2636 
2637 		if (ret)
2638 			break;
2639 	}
2640 
2641 	return ret;
2642 }
2643 
init_dl_rq_bw_ratio(struct dl_rq * dl_rq)2644 static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
2645 {
2646 	if (global_rt_runtime() == RUNTIME_INF) {
2647 		dl_rq->bw_ratio = 1 << RATIO_SHIFT;
2648 		dl_rq->extra_bw = 1 << BW_SHIFT;
2649 	} else {
2650 		dl_rq->bw_ratio = to_ratio(global_rt_runtime(),
2651 			  global_rt_period()) >> (BW_SHIFT - RATIO_SHIFT);
2652 		dl_rq->extra_bw = to_ratio(global_rt_period(),
2653 						    global_rt_runtime());
2654 	}
2655 }
2656 
sched_dl_do_global(void)2657 void sched_dl_do_global(void)
2658 {
2659 	u64 new_bw = -1;
2660 	u64 gen = ++dl_generation;
2661 	struct dl_bw *dl_b;
2662 	int cpu;
2663 	unsigned long flags;
2664 
2665 	def_dl_bandwidth.dl_period = global_rt_period();
2666 	def_dl_bandwidth.dl_runtime = global_rt_runtime();
2667 
2668 	if (global_rt_runtime() != RUNTIME_INF)
2669 		new_bw = to_ratio(global_rt_period(), global_rt_runtime());
2670 
2671 	for_each_possible_cpu(cpu) {
2672 		rcu_read_lock_sched();
2673 
2674 		if (dl_bw_visited(cpu, gen)) {
2675 			rcu_read_unlock_sched();
2676 			continue;
2677 		}
2678 
2679 		dl_b = dl_bw_of(cpu);
2680 
2681 		raw_spin_lock_irqsave(&dl_b->lock, flags);
2682 		dl_b->bw = new_bw;
2683 		raw_spin_unlock_irqrestore(&dl_b->lock, flags);
2684 
2685 		rcu_read_unlock_sched();
2686 		init_dl_rq_bw_ratio(&cpu_rq(cpu)->dl);
2687 	}
2688 }
2689 
2690 /*
2691  * We must be sure that accepting a new task (or allowing changing the
2692  * parameters of an existing one) is consistent with the bandwidth
2693  * constraints. If yes, this function also accordingly updates the currently
2694  * allocated bandwidth to reflect the new situation.
2695  *
2696  * This function is called while holding p's rq->lock.
2697  */
sched_dl_overflow(struct task_struct * p,int policy,const struct sched_attr * attr)2698 int sched_dl_overflow(struct task_struct *p, int policy,
2699 		      const struct sched_attr *attr)
2700 {
2701 	u64 period = attr->sched_period ?: attr->sched_deadline;
2702 	u64 runtime = attr->sched_runtime;
2703 	u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
2704 	int cpus, err = -1, cpu = task_cpu(p);
2705 	struct dl_bw *dl_b = dl_bw_of(cpu);
2706 	unsigned long cap;
2707 
2708 	if (attr->sched_flags & SCHED_FLAG_SUGOV)
2709 		return 0;
2710 
2711 	/* !deadline task may carry old deadline bandwidth */
2712 	if (new_bw == p->dl.dl_bw && task_has_dl_policy(p))
2713 		return 0;
2714 
2715 	/*
2716 	 * Either if a task, enters, leave, or stays -deadline but changes
2717 	 * its parameters, we may need to update accordingly the total
2718 	 * allocated bandwidth of the container.
2719 	 */
2720 	raw_spin_lock(&dl_b->lock);
2721 	cpus = dl_bw_cpus(cpu);
2722 	cap = dl_bw_capacity(cpu);
2723 
2724 	if (dl_policy(policy) && !task_has_dl_policy(p) &&
2725 	    !__dl_overflow(dl_b, cap, 0, new_bw)) {
2726 		if (hrtimer_active(&p->dl.inactive_timer))
2727 			__dl_sub(dl_b, p->dl.dl_bw, cpus);
2728 		__dl_add(dl_b, new_bw, cpus);
2729 		err = 0;
2730 	} else if (dl_policy(policy) && task_has_dl_policy(p) &&
2731 		   !__dl_overflow(dl_b, cap, p->dl.dl_bw, new_bw)) {
2732 		/*
2733 		 * XXX this is slightly incorrect: when the task
2734 		 * utilization decreases, we should delay the total
2735 		 * utilization change until the task's 0-lag point.
2736 		 * But this would require to set the task's "inactive
2737 		 * timer" when the task is not inactive.
2738 		 */
2739 		__dl_sub(dl_b, p->dl.dl_bw, cpus);
2740 		__dl_add(dl_b, new_bw, cpus);
2741 		dl_change_utilization(p, new_bw);
2742 		err = 0;
2743 	} else if (!dl_policy(policy) && task_has_dl_policy(p)) {
2744 		/*
2745 		 * Do not decrease the total deadline utilization here,
2746 		 * switched_from_dl() will take care to do it at the correct
2747 		 * (0-lag) time.
2748 		 */
2749 		err = 0;
2750 	}
2751 	raw_spin_unlock(&dl_b->lock);
2752 
2753 	return err;
2754 }
2755 
2756 /*
2757  * This function initializes the sched_dl_entity of a newly becoming
2758  * SCHED_DEADLINE task.
2759  *
2760  * Only the static values are considered here, the actual runtime and the
2761  * absolute deadline will be properly calculated when the task is enqueued
2762  * for the first time with its new policy.
2763  */
__setparam_dl(struct task_struct * p,const struct sched_attr * attr)2764 void __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
2765 {
2766 	struct sched_dl_entity *dl_se = &p->dl;
2767 
2768 	dl_se->dl_runtime = attr->sched_runtime;
2769 	dl_se->dl_deadline = attr->sched_deadline;
2770 	dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
2771 	dl_se->flags = attr->sched_flags & SCHED_DL_FLAGS;
2772 	dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
2773 	dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
2774 }
2775 
__getparam_dl(struct task_struct * p,struct sched_attr * attr)2776 void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
2777 {
2778 	struct sched_dl_entity *dl_se = &p->dl;
2779 
2780 	attr->sched_priority = p->rt_priority;
2781 	attr->sched_runtime = dl_se->dl_runtime;
2782 	attr->sched_deadline = dl_se->dl_deadline;
2783 	attr->sched_period = dl_se->dl_period;
2784 	attr->sched_flags &= ~SCHED_DL_FLAGS;
2785 	attr->sched_flags |= dl_se->flags;
2786 }
2787 
2788 /*
2789  * Default limits for DL period; on the top end we guard against small util
2790  * tasks still getting ridiculously long effective runtimes, on the bottom end we
2791  * guard against timer DoS.
2792  */
2793 unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
2794 unsigned int sysctl_sched_dl_period_min = 100;     /* 100 us */
2795 
2796 /*
2797  * This function validates the new parameters of a -deadline task.
2798  * We ask for the deadline not being zero, and greater or equal
2799  * than the runtime, as well as the period of being zero or
2800  * greater than deadline. Furthermore, we have to be sure that
2801  * user parameters are above the internal resolution of 1us (we
2802  * check sched_runtime only since it is always the smaller one) and
2803  * below 2^63 ns (we have to check both sched_deadline and
2804  * sched_period, as the latter can be zero).
2805  */
__checkparam_dl(const struct sched_attr * attr)2806 bool __checkparam_dl(const struct sched_attr *attr)
2807 {
2808 	u64 period, max, min;
2809 
2810 	/* special dl tasks don't actually use any parameter */
2811 	if (attr->sched_flags & SCHED_FLAG_SUGOV)
2812 		return true;
2813 
2814 	/* deadline != 0 */
2815 	if (attr->sched_deadline == 0)
2816 		return false;
2817 
2818 	/*
2819 	 * Since we truncate DL_SCALE bits, make sure we're at least
2820 	 * that big.
2821 	 */
2822 	if (attr->sched_runtime < (1ULL << DL_SCALE))
2823 		return false;
2824 
2825 	/*
2826 	 * Since we use the MSB for wrap-around and sign issues, make
2827 	 * sure it's not set (mind that period can be equal to zero).
2828 	 */
2829 	if (attr->sched_deadline & (1ULL << 63) ||
2830 	    attr->sched_period & (1ULL << 63))
2831 		return false;
2832 
2833 	period = attr->sched_period;
2834 	if (!period)
2835 		period = attr->sched_deadline;
2836 
2837 	/* runtime <= deadline <= period (if period != 0) */
2838 	if (period < attr->sched_deadline ||
2839 	    attr->sched_deadline < attr->sched_runtime)
2840 		return false;
2841 
2842 	max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
2843 	min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
2844 
2845 	if (period < min || period > max)
2846 		return false;
2847 
2848 	return true;
2849 }
2850 
2851 /*
2852  * This function clears the sched_dl_entity static params.
2853  */
__dl_clear_params(struct task_struct * p)2854 void __dl_clear_params(struct task_struct *p)
2855 {
2856 	struct sched_dl_entity *dl_se = &p->dl;
2857 
2858 	dl_se->dl_runtime		= 0;
2859 	dl_se->dl_deadline		= 0;
2860 	dl_se->dl_period		= 0;
2861 	dl_se->flags			= 0;
2862 	dl_se->dl_bw			= 0;
2863 	dl_se->dl_density		= 0;
2864 
2865 	dl_se->dl_throttled		= 0;
2866 	dl_se->dl_yielded		= 0;
2867 	dl_se->dl_non_contending	= 0;
2868 	dl_se->dl_overrun		= 0;
2869 
2870 #ifdef CONFIG_RT_MUTEXES
2871 	dl_se->pi_se			= dl_se;
2872 #endif
2873 }
2874 
dl_param_changed(struct task_struct * p,const struct sched_attr * attr)2875 bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
2876 {
2877 	struct sched_dl_entity *dl_se = &p->dl;
2878 
2879 	if (dl_se->dl_runtime != attr->sched_runtime ||
2880 	    dl_se->dl_deadline != attr->sched_deadline ||
2881 	    dl_se->dl_period != attr->sched_period ||
2882 	    dl_se->flags != (attr->sched_flags & SCHED_DL_FLAGS))
2883 		return true;
2884 
2885 	return false;
2886 }
2887 
2888 #ifdef CONFIG_SMP
dl_cpuset_cpumask_can_shrink(const struct cpumask * cur,const struct cpumask * trial)2889 int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
2890 				 const struct cpumask *trial)
2891 {
2892 	int ret = 1, trial_cpus;
2893 	struct dl_bw *cur_dl_b;
2894 	unsigned long flags;
2895 
2896 	rcu_read_lock_sched();
2897 	cur_dl_b = dl_bw_of(cpumask_any(cur));
2898 	trial_cpus = cpumask_weight(trial);
2899 
2900 	raw_spin_lock_irqsave(&cur_dl_b->lock, flags);
2901 	if (cur_dl_b->bw != -1 &&
2902 	    cur_dl_b->bw * trial_cpus < cur_dl_b->total_bw)
2903 		ret = 0;
2904 	raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
2905 	rcu_read_unlock_sched();
2906 
2907 	return ret;
2908 }
2909 
2910 enum dl_bw_request {
2911 	dl_bw_req_check_overflow = 0,
2912 	dl_bw_req_alloc,
2913 	dl_bw_req_free
2914 };
2915 
dl_bw_manage(enum dl_bw_request req,int cpu,u64 dl_bw)2916 static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw)
2917 {
2918 	unsigned long flags;
2919 	struct dl_bw *dl_b;
2920 	bool overflow = 0;
2921 
2922 	rcu_read_lock_sched();
2923 	dl_b = dl_bw_of(cpu);
2924 	raw_spin_lock_irqsave(&dl_b->lock, flags);
2925 
2926 	if (req == dl_bw_req_free) {
2927 		__dl_sub(dl_b, dl_bw, dl_bw_cpus(cpu));
2928 	} else {
2929 		unsigned long cap = dl_bw_capacity(cpu);
2930 
2931 		overflow = __dl_overflow(dl_b, cap, 0, dl_bw);
2932 
2933 		if (req == dl_bw_req_alloc && !overflow) {
2934 			/*
2935 			 * We reserve space in the destination
2936 			 * root_domain, as we can't fail after this point.
2937 			 * We will free resources in the source root_domain
2938 			 * later on (see set_cpus_allowed_dl()).
2939 			 */
2940 			__dl_add(dl_b, dl_bw, dl_bw_cpus(cpu));
2941 		}
2942 	}
2943 
2944 	raw_spin_unlock_irqrestore(&dl_b->lock, flags);
2945 	rcu_read_unlock_sched();
2946 
2947 	return overflow ? -EBUSY : 0;
2948 }
2949 
dl_bw_check_overflow(int cpu)2950 int dl_bw_check_overflow(int cpu)
2951 {
2952 	return dl_bw_manage(dl_bw_req_check_overflow, cpu, 0);
2953 }
2954 
dl_bw_alloc(int cpu,u64 dl_bw)2955 int dl_bw_alloc(int cpu, u64 dl_bw)
2956 {
2957 	return dl_bw_manage(dl_bw_req_alloc, cpu, dl_bw);
2958 }
2959 
dl_bw_free(int cpu,u64 dl_bw)2960 void dl_bw_free(int cpu, u64 dl_bw)
2961 {
2962 	dl_bw_manage(dl_bw_req_free, cpu, dl_bw);
2963 }
2964 #endif
2965 
2966 #ifdef CONFIG_SCHED_DEBUG
print_dl_stats(struct seq_file * m,int cpu)2967 void print_dl_stats(struct seq_file *m, int cpu)
2968 {
2969 	print_dl_rq(m, cpu, &cpu_rq(cpu)->dl);
2970 }
2971 #endif /* CONFIG_SCHED_DEBUG */
2972