1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * RT-Mutexes: simple blocking mutual exclusion locks with PI support
4 *
5 * started by Ingo Molnar and Thomas Gleixner.
6 *
7 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
8 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
9 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
10 * Copyright (C) 2006 Esben Nielsen
11 * Adaptive Spinlocks:
12 * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
13 * and Peter Morreale,
14 * Adaptive Spinlocks simplification:
15 * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
16 *
17 * See Documentation/locking/rt-mutex-design.rst for details.
18 */
19 #include <linux/sched.h>
20 #include <linux/sched/debug.h>
21 #include <linux/sched/deadline.h>
22 #include <linux/sched/signal.h>
23 #include <linux/sched/rt.h>
24 #include <linux/sched/wake_q.h>
25 #include <linux/ww_mutex.h>
26 #include <trace/hooks/dtask.h>
27
28 #include "rtmutex_common.h"
29
30 #ifndef WW_RT
31 # define build_ww_mutex() (false)
32 # define ww_container_of(rtm) NULL
33
__ww_mutex_add_waiter(struct rt_mutex_waiter * waiter,struct rt_mutex * lock,struct ww_acquire_ctx * ww_ctx)34 static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter,
35 struct rt_mutex *lock,
36 struct ww_acquire_ctx *ww_ctx)
37 {
38 return 0;
39 }
40
__ww_mutex_check_waiters(struct rt_mutex * lock,struct ww_acquire_ctx * ww_ctx)41 static inline void __ww_mutex_check_waiters(struct rt_mutex *lock,
42 struct ww_acquire_ctx *ww_ctx)
43 {
44 }
45
ww_mutex_lock_acquired(struct ww_mutex * lock,struct ww_acquire_ctx * ww_ctx)46 static inline void ww_mutex_lock_acquired(struct ww_mutex *lock,
47 struct ww_acquire_ctx *ww_ctx)
48 {
49 }
50
__ww_mutex_check_kill(struct rt_mutex * lock,struct rt_mutex_waiter * waiter,struct ww_acquire_ctx * ww_ctx)51 static inline int __ww_mutex_check_kill(struct rt_mutex *lock,
52 struct rt_mutex_waiter *waiter,
53 struct ww_acquire_ctx *ww_ctx)
54 {
55 return 0;
56 }
57
58 #else
59 # define build_ww_mutex() (true)
60 # define ww_container_of(rtm) container_of(rtm, struct ww_mutex, base)
61 # include "ww_mutex.h"
62 #endif
63
64 /*
65 * lock->owner state tracking:
66 *
67 * lock->owner holds the task_struct pointer of the owner. Bit 0
68 * is used to keep track of the "lock has waiters" state.
69 *
70 * owner bit0
71 * NULL 0 lock is free (fast acquire possible)
72 * NULL 1 lock is free and has waiters and the top waiter
73 * is going to take the lock*
74 * taskpointer 0 lock is held (fast release possible)
75 * taskpointer 1 lock is held and has waiters**
76 *
77 * The fast atomic compare exchange based acquire and release is only
78 * possible when bit 0 of lock->owner is 0.
79 *
80 * (*) It also can be a transitional state when grabbing the lock
81 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
82 * we need to set the bit0 before looking at the lock, and the owner may be
83 * NULL in this small time, hence this can be a transitional state.
84 *
85 * (**) There is a small time when bit 0 is set but there are no
86 * waiters. This can happen when grabbing the lock in the slow path.
87 * To prevent a cmpxchg of the owner releasing the lock, we need to
88 * set this bit before looking at the lock.
89 */
90
91 static __always_inline struct task_struct *
rt_mutex_owner_encode(struct rt_mutex_base * lock,struct task_struct * owner)92 rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner)
93 {
94 unsigned long val = (unsigned long)owner;
95
96 if (rt_mutex_has_waiters(lock))
97 val |= RT_MUTEX_HAS_WAITERS;
98
99 return (struct task_struct *)val;
100 }
101
102 static __always_inline void
rt_mutex_set_owner(struct rt_mutex_base * lock,struct task_struct * owner)103 rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
104 {
105 /*
106 * lock->wait_lock is held but explicit acquire semantics are needed
107 * for a new lock owner so WRITE_ONCE is insufficient.
108 */
109 xchg_acquire(&lock->owner, rt_mutex_owner_encode(lock, owner));
110 }
111
rt_mutex_clear_owner(struct rt_mutex_base * lock)112 static __always_inline void rt_mutex_clear_owner(struct rt_mutex_base *lock)
113 {
114 /* lock->wait_lock is held so the unlock provides release semantics. */
115 WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL));
116 }
117
clear_rt_mutex_waiters(struct rt_mutex_base * lock)118 static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
119 {
120 lock->owner = (struct task_struct *)
121 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
122 }
123
124 static __always_inline void
fixup_rt_mutex_waiters(struct rt_mutex_base * lock,bool acquire_lock)125 fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock)
126 {
127 unsigned long owner, *p = (unsigned long *) &lock->owner;
128
129 if (rt_mutex_has_waiters(lock))
130 return;
131
132 /*
133 * The rbtree has no waiters enqueued, now make sure that the
134 * lock->owner still has the waiters bit set, otherwise the
135 * following can happen:
136 *
137 * CPU 0 CPU 1 CPU2
138 * l->owner=T1
139 * rt_mutex_lock(l)
140 * lock(l->lock)
141 * l->owner = T1 | HAS_WAITERS;
142 * enqueue(T2)
143 * boost()
144 * unlock(l->lock)
145 * block()
146 *
147 * rt_mutex_lock(l)
148 * lock(l->lock)
149 * l->owner = T1 | HAS_WAITERS;
150 * enqueue(T3)
151 * boost()
152 * unlock(l->lock)
153 * block()
154 * signal(->T2) signal(->T3)
155 * lock(l->lock)
156 * dequeue(T2)
157 * deboost()
158 * unlock(l->lock)
159 * lock(l->lock)
160 * dequeue(T3)
161 * ==> wait list is empty
162 * deboost()
163 * unlock(l->lock)
164 * lock(l->lock)
165 * fixup_rt_mutex_waiters()
166 * if (wait_list_empty(l) {
167 * l->owner = owner
168 * owner = l->owner & ~HAS_WAITERS;
169 * ==> l->owner = T1
170 * }
171 * lock(l->lock)
172 * rt_mutex_unlock(l) fixup_rt_mutex_waiters()
173 * if (wait_list_empty(l) {
174 * owner = l->owner & ~HAS_WAITERS;
175 * cmpxchg(l->owner, T1, NULL)
176 * ===> Success (l->owner = NULL)
177 *
178 * l->owner = owner
179 * ==> l->owner = T1
180 * }
181 *
182 * With the check for the waiter bit in place T3 on CPU2 will not
183 * overwrite. All tasks fiddling with the waiters bit are
184 * serialized by l->lock, so nothing else can modify the waiters
185 * bit. If the bit is set then nothing can change l->owner either
186 * so the simple RMW is safe. The cmpxchg() will simply fail if it
187 * happens in the middle of the RMW because the waiters bit is
188 * still set.
189 */
190 owner = READ_ONCE(*p);
191 if (owner & RT_MUTEX_HAS_WAITERS) {
192 /*
193 * See rt_mutex_set_owner() and rt_mutex_clear_owner() on
194 * why xchg_acquire() is used for updating owner for
195 * locking and WRITE_ONCE() for unlocking.
196 *
197 * WRITE_ONCE() would work for the acquire case too, but
198 * in case that the lock acquisition failed it might
199 * force other lockers into the slow path unnecessarily.
200 */
201 if (acquire_lock)
202 xchg_acquire(p, owner & ~RT_MUTEX_HAS_WAITERS);
203 else
204 WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
205 }
206 }
207
208 /*
209 * We can speed up the acquire/release, if there's no debugging state to be
210 * set up.
211 */
212 #ifndef CONFIG_DEBUG_RT_MUTEXES
rt_mutex_cmpxchg_acquire(struct rt_mutex_base * lock,struct task_struct * old,struct task_struct * new)213 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
214 struct task_struct *old,
215 struct task_struct *new)
216 {
217 return try_cmpxchg_acquire(&lock->owner, &old, new);
218 }
219
rt_mutex_cmpxchg_release(struct rt_mutex_base * lock,struct task_struct * old,struct task_struct * new)220 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
221 struct task_struct *old,
222 struct task_struct *new)
223 {
224 return try_cmpxchg_release(&lock->owner, &old, new);
225 }
226
227 /*
228 * Callers must hold the ->wait_lock -- which is the whole purpose as we force
229 * all future threads that attempt to [Rmw] the lock to the slowpath. As such
230 * relaxed semantics suffice.
231 */
mark_rt_mutex_waiters(struct rt_mutex_base * lock)232 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
233 {
234 unsigned long owner, *p = (unsigned long *) &lock->owner;
235
236 do {
237 owner = *p;
238 } while (cmpxchg_relaxed(p, owner,
239 owner | RT_MUTEX_HAS_WAITERS) != owner);
240
241 /*
242 * The cmpxchg loop above is relaxed to avoid back-to-back ACQUIRE
243 * operations in the event of contention. Ensure the successful
244 * cmpxchg is visible.
245 */
246 smp_mb__after_atomic();
247 }
248
249 /*
250 * Safe fastpath aware unlock:
251 * 1) Clear the waiters bit
252 * 2) Drop lock->wait_lock
253 * 3) Try to unlock the lock with cmpxchg
254 */
unlock_rt_mutex_safe(struct rt_mutex_base * lock,unsigned long flags)255 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
256 unsigned long flags)
257 __releases(lock->wait_lock)
258 {
259 struct task_struct *owner = rt_mutex_owner(lock);
260
261 clear_rt_mutex_waiters(lock);
262 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
263 /*
264 * If a new waiter comes in between the unlock and the cmpxchg
265 * we have two situations:
266 *
267 * unlock(wait_lock);
268 * lock(wait_lock);
269 * cmpxchg(p, owner, 0) == owner
270 * mark_rt_mutex_waiters(lock);
271 * acquire(lock);
272 * or:
273 *
274 * unlock(wait_lock);
275 * lock(wait_lock);
276 * mark_rt_mutex_waiters(lock);
277 *
278 * cmpxchg(p, owner, 0) != owner
279 * enqueue_waiter();
280 * unlock(wait_lock);
281 * lock(wait_lock);
282 * wake waiter();
283 * unlock(wait_lock);
284 * lock(wait_lock);
285 * acquire(lock);
286 */
287 return rt_mutex_cmpxchg_release(lock, owner, NULL);
288 }
289
290 #else
rt_mutex_cmpxchg_acquire(struct rt_mutex_base * lock,struct task_struct * old,struct task_struct * new)291 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
292 struct task_struct *old,
293 struct task_struct *new)
294 {
295 return false;
296
297 }
298
rt_mutex_cmpxchg_release(struct rt_mutex_base * lock,struct task_struct * old,struct task_struct * new)299 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
300 struct task_struct *old,
301 struct task_struct *new)
302 {
303 return false;
304 }
305
mark_rt_mutex_waiters(struct rt_mutex_base * lock)306 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
307 {
308 lock->owner = (struct task_struct *)
309 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
310 }
311
312 /*
313 * Simple slow path only version: lock->owner is protected by lock->wait_lock.
314 */
unlock_rt_mutex_safe(struct rt_mutex_base * lock,unsigned long flags)315 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
316 unsigned long flags)
317 __releases(lock->wait_lock)
318 {
319 lock->owner = NULL;
320 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
321 return true;
322 }
323 #endif
324
__waiter_prio(struct task_struct * task)325 static __always_inline int __waiter_prio(struct task_struct *task)
326 {
327 int prio = task->prio;
328
329 if (!rt_prio(prio))
330 return DEFAULT_PRIO;
331
332 return prio;
333 }
334
335 static __always_inline void
waiter_update_prio(struct rt_mutex_waiter * waiter,struct task_struct * task)336 waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
337 {
338 waiter->prio = __waiter_prio(task);
339 waiter->deadline = task->dl.deadline;
340 }
341
342 /*
343 * Only use with rt_mutex_waiter_{less,equal}()
344 */
345 #define task_to_waiter(p) \
346 &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
347
rt_mutex_waiter_less(struct rt_mutex_waiter * left,struct rt_mutex_waiter * right)348 static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
349 struct rt_mutex_waiter *right)
350 {
351 if (left->prio < right->prio)
352 return 1;
353
354 /*
355 * If both waiters have dl_prio(), we check the deadlines of the
356 * associated tasks.
357 * If left waiter has a dl_prio(), and we didn't return 1 above,
358 * then right waiter has a dl_prio() too.
359 */
360 if (dl_prio(left->prio))
361 return dl_time_before(left->deadline, right->deadline);
362
363 return 0;
364 }
365
rt_mutex_waiter_equal(struct rt_mutex_waiter * left,struct rt_mutex_waiter * right)366 static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
367 struct rt_mutex_waiter *right)
368 {
369 if (left->prio != right->prio)
370 return 0;
371
372 /*
373 * If both waiters have dl_prio(), we check the deadlines of the
374 * associated tasks.
375 * If left waiter has a dl_prio(), and we didn't return 0 above,
376 * then right waiter has a dl_prio() too.
377 */
378 if (dl_prio(left->prio))
379 return left->deadline == right->deadline;
380
381 return 1;
382 }
383
rt_mutex_steal(struct rt_mutex_waiter * waiter,struct rt_mutex_waiter * top_waiter)384 static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
385 struct rt_mutex_waiter *top_waiter)
386 {
387 if (rt_mutex_waiter_less(waiter, top_waiter))
388 return true;
389
390 #ifdef RT_MUTEX_BUILD_SPINLOCKS
391 /*
392 * Note that RT tasks are excluded from same priority (lateral)
393 * steals to prevent the introduction of an unbounded latency.
394 */
395 if (rt_prio(waiter->prio) || dl_prio(waiter->prio))
396 return false;
397
398 return rt_mutex_waiter_equal(waiter, top_waiter);
399 #else
400 return false;
401 #endif
402 }
403
404 #define __node_2_waiter(node) \
405 rb_entry((node), struct rt_mutex_waiter, tree_entry)
406
__waiter_less(struct rb_node * a,const struct rb_node * b)407 static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b)
408 {
409 struct rt_mutex_waiter *aw = __node_2_waiter(a);
410 struct rt_mutex_waiter *bw = __node_2_waiter(b);
411
412 if (rt_mutex_waiter_less(aw, bw))
413 return 1;
414
415 if (!build_ww_mutex())
416 return 0;
417
418 if (rt_mutex_waiter_less(bw, aw))
419 return 0;
420
421 /* NOTE: relies on waiter->ww_ctx being set before insertion */
422 if (aw->ww_ctx) {
423 if (!bw->ww_ctx)
424 return 1;
425
426 return (signed long)(aw->ww_ctx->stamp -
427 bw->ww_ctx->stamp) < 0;
428 }
429
430 return 0;
431 }
432
433 static __always_inline void
rt_mutex_enqueue(struct rt_mutex_base * lock,struct rt_mutex_waiter * waiter)434 rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
435 {
436 rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less);
437 }
438
439 static __always_inline void
rt_mutex_dequeue(struct rt_mutex_base * lock,struct rt_mutex_waiter * waiter)440 rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
441 {
442 if (RB_EMPTY_NODE(&waiter->tree_entry))
443 return;
444
445 rb_erase_cached(&waiter->tree_entry, &lock->waiters);
446 RB_CLEAR_NODE(&waiter->tree_entry);
447 }
448
449 #define __node_2_pi_waiter(node) \
450 rb_entry((node), struct rt_mutex_waiter, pi_tree_entry)
451
452 static __always_inline bool
__pi_waiter_less(struct rb_node * a,const struct rb_node * b)453 __pi_waiter_less(struct rb_node *a, const struct rb_node *b)
454 {
455 return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b));
456 }
457
458 static __always_inline void
rt_mutex_enqueue_pi(struct task_struct * task,struct rt_mutex_waiter * waiter)459 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
460 {
461 rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less);
462 }
463
464 static __always_inline void
rt_mutex_dequeue_pi(struct task_struct * task,struct rt_mutex_waiter * waiter)465 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
466 {
467 if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
468 return;
469
470 rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters);
471 RB_CLEAR_NODE(&waiter->pi_tree_entry);
472 }
473
rt_mutex_adjust_prio(struct task_struct * p)474 static __always_inline void rt_mutex_adjust_prio(struct task_struct *p)
475 {
476 struct task_struct *pi_task = NULL;
477
478 lockdep_assert_held(&p->pi_lock);
479
480 if (task_has_pi_waiters(p))
481 pi_task = task_top_pi_waiter(p)->task;
482
483 rt_mutex_setprio(p, pi_task);
484 }
485
486 /* RT mutex specific wake_q wrappers */
rt_mutex_wake_q_add(struct rt_wake_q_head * wqh,struct rt_mutex_waiter * w)487 static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh,
488 struct rt_mutex_waiter *w)
489 {
490 if (IS_ENABLED(CONFIG_PREEMPT_RT) && w->wake_state != TASK_NORMAL) {
491 if (IS_ENABLED(CONFIG_PROVE_LOCKING))
492 WARN_ON_ONCE(wqh->rtlock_task);
493 get_task_struct(w->task);
494 wqh->rtlock_task = w->task;
495 } else {
496 wake_q_add(&wqh->head, w->task);
497 }
498 }
499
rt_mutex_wake_up_q(struct rt_wake_q_head * wqh)500 static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh)
501 {
502 if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) {
503 wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT);
504 put_task_struct(wqh->rtlock_task);
505 wqh->rtlock_task = NULL;
506 }
507
508 if (!wake_q_empty(&wqh->head))
509 wake_up_q(&wqh->head);
510
511 /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */
512 preempt_enable();
513 }
514
515 /*
516 * Deadlock detection is conditional:
517 *
518 * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted
519 * if the detect argument is == RT_MUTEX_FULL_CHAINWALK.
520 *
521 * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always
522 * conducted independent of the detect argument.
523 *
524 * If the waiter argument is NULL this indicates the deboost path and
525 * deadlock detection is disabled independent of the detect argument
526 * and the config settings.
527 */
528 static __always_inline bool
rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter * waiter,enum rtmutex_chainwalk chwalk)529 rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
530 enum rtmutex_chainwalk chwalk)
531 {
532 if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES))
533 return waiter != NULL;
534 return chwalk == RT_MUTEX_FULL_CHAINWALK;
535 }
536
task_blocked_on_lock(struct task_struct * p)537 static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_struct *p)
538 {
539 return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
540 }
541
542 /*
543 * Adjust the priority chain. Also used for deadlock detection.
544 * Decreases task's usage by one - may thus free the task.
545 *
546 * @task: the task owning the mutex (owner) for which a chain walk is
547 * probably needed
548 * @chwalk: do we have to carry out deadlock detection?
549 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
550 * things for a task that has just got its priority adjusted, and
551 * is waiting on a mutex)
552 * @next_lock: the mutex on which the owner of @orig_lock was blocked before
553 * we dropped its pi_lock. Is never dereferenced, only used for
554 * comparison to detect lock chain changes.
555 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
556 * its priority to the mutex owner (can be NULL in the case
557 * depicted above or if the top waiter is gone away and we are
558 * actually deboosting the owner)
559 * @top_task: the current top waiter
560 *
561 * Returns 0 or -EDEADLK.
562 *
563 * Chain walk basics and protection scope
564 *
565 * [R] refcount on task
566 * [P] task->pi_lock held
567 * [L] rtmutex->wait_lock held
568 *
569 * Step Description Protected by
570 * function arguments:
571 * @task [R]
572 * @orig_lock if != NULL @top_task is blocked on it
573 * @next_lock Unprotected. Cannot be
574 * dereferenced. Only used for
575 * comparison.
576 * @orig_waiter if != NULL @top_task is blocked on it
577 * @top_task current, or in case of proxy
578 * locking protected by calling
579 * code
580 * again:
581 * loop_sanity_check();
582 * retry:
583 * [1] lock(task->pi_lock); [R] acquire [P]
584 * [2] waiter = task->pi_blocked_on; [P]
585 * [3] check_exit_conditions_1(); [P]
586 * [4] lock = waiter->lock; [P]
587 * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L]
588 * unlock(task->pi_lock); release [P]
589 * goto retry;
590 * }
591 * [6] check_exit_conditions_2(); [P] + [L]
592 * [7] requeue_lock_waiter(lock, waiter); [P] + [L]
593 * [8] unlock(task->pi_lock); release [P]
594 * put_task_struct(task); release [R]
595 * [9] check_exit_conditions_3(); [L]
596 * [10] task = owner(lock); [L]
597 * get_task_struct(task); [L] acquire [R]
598 * lock(task->pi_lock); [L] acquire [P]
599 * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L]
600 * [12] check_exit_conditions_4(); [P] + [L]
601 * [13] unlock(task->pi_lock); release [P]
602 * unlock(lock->wait_lock); release [L]
603 * goto again;
604 */
rt_mutex_adjust_prio_chain(struct task_struct * task,enum rtmutex_chainwalk chwalk,struct rt_mutex_base * orig_lock,struct rt_mutex_base * next_lock,struct rt_mutex_waiter * orig_waiter,struct task_struct * top_task)605 static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
606 enum rtmutex_chainwalk chwalk,
607 struct rt_mutex_base *orig_lock,
608 struct rt_mutex_base *next_lock,
609 struct rt_mutex_waiter *orig_waiter,
610 struct task_struct *top_task)
611 {
612 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
613 struct rt_mutex_waiter *prerequeue_top_waiter;
614 int ret = 0, depth = 0;
615 struct rt_mutex_base *lock;
616 bool detect_deadlock;
617 bool requeue = true;
618
619 detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
620
621 /*
622 * The (de)boosting is a step by step approach with a lot of
623 * pitfalls. We want this to be preemptible and we want hold a
624 * maximum of two locks per step. So we have to check
625 * carefully whether things change under us.
626 */
627 again:
628 /*
629 * We limit the lock chain length for each invocation.
630 */
631 if (++depth > max_lock_depth) {
632 static int prev_max;
633
634 /*
635 * Print this only once. If the admin changes the limit,
636 * print a new message when reaching the limit again.
637 */
638 if (prev_max != max_lock_depth) {
639 prev_max = max_lock_depth;
640 printk(KERN_WARNING "Maximum lock depth %d reached "
641 "task: %s (%d)\n", max_lock_depth,
642 top_task->comm, task_pid_nr(top_task));
643 }
644 put_task_struct(task);
645
646 return -EDEADLK;
647 }
648
649 /*
650 * We are fully preemptible here and only hold the refcount on
651 * @task. So everything can have changed under us since the
652 * caller or our own code below (goto retry/again) dropped all
653 * locks.
654 */
655 retry:
656 /*
657 * [1] Task cannot go away as we did a get_task() before !
658 */
659 raw_spin_lock_irq(&task->pi_lock);
660
661 /*
662 * [2] Get the waiter on which @task is blocked on.
663 */
664 waiter = task->pi_blocked_on;
665
666 /*
667 * [3] check_exit_conditions_1() protected by task->pi_lock.
668 */
669
670 /*
671 * Check whether the end of the boosting chain has been
672 * reached or the state of the chain has changed while we
673 * dropped the locks.
674 */
675 if (!waiter)
676 goto out_unlock_pi;
677
678 /*
679 * Check the orig_waiter state. After we dropped the locks,
680 * the previous owner of the lock might have released the lock.
681 */
682 if (orig_waiter && !rt_mutex_owner(orig_lock))
683 goto out_unlock_pi;
684
685 /*
686 * We dropped all locks after taking a refcount on @task, so
687 * the task might have moved on in the lock chain or even left
688 * the chain completely and blocks now on an unrelated lock or
689 * on @orig_lock.
690 *
691 * We stored the lock on which @task was blocked in @next_lock,
692 * so we can detect the chain change.
693 */
694 if (next_lock != waiter->lock)
695 goto out_unlock_pi;
696
697 /*
698 * There could be 'spurious' loops in the lock graph due to ww_mutex,
699 * consider:
700 *
701 * P1: A, ww_A, ww_B
702 * P2: ww_B, ww_A
703 * P3: A
704 *
705 * P3 should not return -EDEADLK because it gets trapped in the cycle
706 * created by P1 and P2 (which will resolve -- and runs into
707 * max_lock_depth above). Therefore disable detect_deadlock such that
708 * the below termination condition can trigger once all relevant tasks
709 * are boosted.
710 *
711 * Even when we start with ww_mutex we can disable deadlock detection,
712 * since we would supress a ww_mutex induced deadlock at [6] anyway.
713 * Supressing it here however is not sufficient since we might still
714 * hit [6] due to adjustment driven iteration.
715 *
716 * NOTE: if someone were to create a deadlock between 2 ww_classes we'd
717 * utterly fail to report it; lockdep should.
718 */
719 if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock)
720 detect_deadlock = false;
721
722 /*
723 * Drop out, when the task has no waiters. Note,
724 * top_waiter can be NULL, when we are in the deboosting
725 * mode!
726 */
727 if (top_waiter) {
728 if (!task_has_pi_waiters(task))
729 goto out_unlock_pi;
730 /*
731 * If deadlock detection is off, we stop here if we
732 * are not the top pi waiter of the task. If deadlock
733 * detection is enabled we continue, but stop the
734 * requeueing in the chain walk.
735 */
736 if (top_waiter != task_top_pi_waiter(task)) {
737 if (!detect_deadlock)
738 goto out_unlock_pi;
739 else
740 requeue = false;
741 }
742 }
743
744 /*
745 * If the waiter priority is the same as the task priority
746 * then there is no further priority adjustment necessary. If
747 * deadlock detection is off, we stop the chain walk. If its
748 * enabled we continue, but stop the requeueing in the chain
749 * walk.
750 */
751 if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
752 if (!detect_deadlock)
753 goto out_unlock_pi;
754 else
755 requeue = false;
756 }
757
758 /*
759 * [4] Get the next lock
760 */
761 lock = waiter->lock;
762 /*
763 * [5] We need to trylock here as we are holding task->pi_lock,
764 * which is the reverse lock order versus the other rtmutex
765 * operations.
766 */
767 if (!raw_spin_trylock(&lock->wait_lock)) {
768 raw_spin_unlock_irq(&task->pi_lock);
769 cpu_relax();
770 goto retry;
771 }
772
773 /*
774 * [6] check_exit_conditions_2() protected by task->pi_lock and
775 * lock->wait_lock.
776 *
777 * Deadlock detection. If the lock is the same as the original
778 * lock which caused us to walk the lock chain or if the
779 * current lock is owned by the task which initiated the chain
780 * walk, we detected a deadlock.
781 */
782 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
783 ret = -EDEADLK;
784
785 /*
786 * When the deadlock is due to ww_mutex; also see above. Don't
787 * report the deadlock and instead let the ww_mutex wound/die
788 * logic pick which of the contending threads gets -EDEADLK.
789 *
790 * NOTE: assumes the cycle only contains a single ww_class; any
791 * other configuration and we fail to report; also, see
792 * lockdep.
793 */
794 if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx)
795 ret = 0;
796
797 raw_spin_unlock(&lock->wait_lock);
798 goto out_unlock_pi;
799 }
800
801 /*
802 * If we just follow the lock chain for deadlock detection, no
803 * need to do all the requeue operations. To avoid a truckload
804 * of conditionals around the various places below, just do the
805 * minimum chain walk checks.
806 */
807 if (!requeue) {
808 /*
809 * No requeue[7] here. Just release @task [8]
810 */
811 raw_spin_unlock(&task->pi_lock);
812 put_task_struct(task);
813
814 /*
815 * [9] check_exit_conditions_3 protected by lock->wait_lock.
816 * If there is no owner of the lock, end of chain.
817 */
818 if (!rt_mutex_owner(lock)) {
819 raw_spin_unlock_irq(&lock->wait_lock);
820 return 0;
821 }
822
823 /* [10] Grab the next task, i.e. owner of @lock */
824 task = get_task_struct(rt_mutex_owner(lock));
825 raw_spin_lock(&task->pi_lock);
826
827 /*
828 * No requeue [11] here. We just do deadlock detection.
829 *
830 * [12] Store whether owner is blocked
831 * itself. Decision is made after dropping the locks
832 */
833 next_lock = task_blocked_on_lock(task);
834 /*
835 * Get the top waiter for the next iteration
836 */
837 top_waiter = rt_mutex_top_waiter(lock);
838
839 /* [13] Drop locks */
840 raw_spin_unlock(&task->pi_lock);
841 raw_spin_unlock_irq(&lock->wait_lock);
842
843 /* If owner is not blocked, end of chain. */
844 if (!next_lock)
845 goto out_put_task;
846 goto again;
847 }
848
849 /*
850 * Store the current top waiter before doing the requeue
851 * operation on @lock. We need it for the boost/deboost
852 * decision below.
853 */
854 prerequeue_top_waiter = rt_mutex_top_waiter(lock);
855
856 /* [7] Requeue the waiter in the lock waiter tree. */
857 rt_mutex_dequeue(lock, waiter);
858
859 /*
860 * Update the waiter prio fields now that we're dequeued.
861 *
862 * These values can have changed through either:
863 *
864 * sys_sched_set_scheduler() / sys_sched_setattr()
865 *
866 * or
867 *
868 * DL CBS enforcement advancing the effective deadline.
869 *
870 * Even though pi_waiters also uses these fields, and that tree is only
871 * updated in [11], we can do this here, since we hold [L], which
872 * serializes all pi_waiters access and rb_erase() does not care about
873 * the values of the node being removed.
874 */
875 waiter_update_prio(waiter, task);
876
877 rt_mutex_enqueue(lock, waiter);
878
879 /* [8] Release the task */
880 raw_spin_unlock(&task->pi_lock);
881 put_task_struct(task);
882
883 /*
884 * [9] check_exit_conditions_3 protected by lock->wait_lock.
885 *
886 * We must abort the chain walk if there is no lock owner even
887 * in the dead lock detection case, as we have nothing to
888 * follow here. This is the end of the chain we are walking.
889 */
890 if (!rt_mutex_owner(lock)) {
891 /*
892 * If the requeue [7] above changed the top waiter,
893 * then we need to wake the new top waiter up to try
894 * to get the lock.
895 */
896 top_waiter = rt_mutex_top_waiter(lock);
897 if (prerequeue_top_waiter != top_waiter)
898 wake_up_state(top_waiter->task, top_waiter->wake_state);
899 raw_spin_unlock_irq(&lock->wait_lock);
900 return 0;
901 }
902
903 /* [10] Grab the next task, i.e. the owner of @lock */
904 task = get_task_struct(rt_mutex_owner(lock));
905 raw_spin_lock(&task->pi_lock);
906
907 /* [11] requeue the pi waiters if necessary */
908 if (waiter == rt_mutex_top_waiter(lock)) {
909 /*
910 * The waiter became the new top (highest priority)
911 * waiter on the lock. Replace the previous top waiter
912 * in the owner tasks pi waiters tree with this waiter
913 * and adjust the priority of the owner.
914 */
915 rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
916 rt_mutex_enqueue_pi(task, waiter);
917 rt_mutex_adjust_prio(task);
918
919 } else if (prerequeue_top_waiter == waiter) {
920 /*
921 * The waiter was the top waiter on the lock, but is
922 * no longer the top priority waiter. Replace waiter in
923 * the owner tasks pi waiters tree with the new top
924 * (highest priority) waiter and adjust the priority
925 * of the owner.
926 * The new top waiter is stored in @waiter so that
927 * @waiter == @top_waiter evaluates to true below and
928 * we continue to deboost the rest of the chain.
929 */
930 rt_mutex_dequeue_pi(task, waiter);
931 waiter = rt_mutex_top_waiter(lock);
932 rt_mutex_enqueue_pi(task, waiter);
933 rt_mutex_adjust_prio(task);
934 } else {
935 /*
936 * Nothing changed. No need to do any priority
937 * adjustment.
938 */
939 }
940
941 /*
942 * [12] check_exit_conditions_4() protected by task->pi_lock
943 * and lock->wait_lock. The actual decisions are made after we
944 * dropped the locks.
945 *
946 * Check whether the task which owns the current lock is pi
947 * blocked itself. If yes we store a pointer to the lock for
948 * the lock chain change detection above. After we dropped
949 * task->pi_lock next_lock cannot be dereferenced anymore.
950 */
951 next_lock = task_blocked_on_lock(task);
952 /*
953 * Store the top waiter of @lock for the end of chain walk
954 * decision below.
955 */
956 top_waiter = rt_mutex_top_waiter(lock);
957
958 /* [13] Drop the locks */
959 raw_spin_unlock(&task->pi_lock);
960 raw_spin_unlock_irq(&lock->wait_lock);
961
962 /*
963 * Make the actual exit decisions [12], based on the stored
964 * values.
965 *
966 * We reached the end of the lock chain. Stop right here. No
967 * point to go back just to figure that out.
968 */
969 if (!next_lock)
970 goto out_put_task;
971
972 /*
973 * If the current waiter is not the top waiter on the lock,
974 * then we can stop the chain walk here if we are not in full
975 * deadlock detection mode.
976 */
977 if (!detect_deadlock && waiter != top_waiter)
978 goto out_put_task;
979
980 goto again;
981
982 out_unlock_pi:
983 raw_spin_unlock_irq(&task->pi_lock);
984 out_put_task:
985 put_task_struct(task);
986
987 return ret;
988 }
989
990 /*
991 * Try to take an rt-mutex
992 *
993 * Must be called with lock->wait_lock held and interrupts disabled
994 *
995 * @lock: The lock to be acquired.
996 * @task: The task which wants to acquire the lock
997 * @waiter: The waiter that is queued to the lock's wait tree if the
998 * callsite called task_blocked_on_lock(), otherwise NULL
999 */
1000 static int __sched
try_to_take_rt_mutex(struct rt_mutex_base * lock,struct task_struct * task,struct rt_mutex_waiter * waiter)1001 try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task,
1002 struct rt_mutex_waiter *waiter)
1003 {
1004 lockdep_assert_held(&lock->wait_lock);
1005
1006 /*
1007 * Before testing whether we can acquire @lock, we set the
1008 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
1009 * other tasks which try to modify @lock into the slow path
1010 * and they serialize on @lock->wait_lock.
1011 *
1012 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state
1013 * as explained at the top of this file if and only if:
1014 *
1015 * - There is a lock owner. The caller must fixup the
1016 * transient state if it does a trylock or leaves the lock
1017 * function due to a signal or timeout.
1018 *
1019 * - @task acquires the lock and there are no other
1020 * waiters. This is undone in rt_mutex_set_owner(@task) at
1021 * the end of this function.
1022 */
1023 mark_rt_mutex_waiters(lock);
1024
1025 /*
1026 * If @lock has an owner, give up.
1027 */
1028 if (rt_mutex_owner(lock))
1029 return 0;
1030
1031 /*
1032 * If @waiter != NULL, @task has already enqueued the waiter
1033 * into @lock waiter tree. If @waiter == NULL then this is a
1034 * trylock attempt.
1035 */
1036 if (waiter) {
1037 struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);
1038
1039 /*
1040 * If waiter is the highest priority waiter of @lock,
1041 * or allowed to steal it, take it over.
1042 */
1043 if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) {
1044 /*
1045 * We can acquire the lock. Remove the waiter from the
1046 * lock waiters tree.
1047 */
1048 rt_mutex_dequeue(lock, waiter);
1049 } else {
1050 return 0;
1051 }
1052 } else {
1053 /*
1054 * If the lock has waiters already we check whether @task is
1055 * eligible to take over the lock.
1056 *
1057 * If there are no other waiters, @task can acquire
1058 * the lock. @task->pi_blocked_on is NULL, so it does
1059 * not need to be dequeued.
1060 */
1061 if (rt_mutex_has_waiters(lock)) {
1062 /* Check whether the trylock can steal it. */
1063 if (!rt_mutex_steal(task_to_waiter(task),
1064 rt_mutex_top_waiter(lock)))
1065 return 0;
1066
1067 /*
1068 * The current top waiter stays enqueued. We
1069 * don't have to change anything in the lock
1070 * waiters order.
1071 */
1072 } else {
1073 /*
1074 * No waiters. Take the lock without the
1075 * pi_lock dance.@task->pi_blocked_on is NULL
1076 * and we have no waiters to enqueue in @task
1077 * pi waiters tree.
1078 */
1079 goto takeit;
1080 }
1081 }
1082
1083 /*
1084 * Clear @task->pi_blocked_on. Requires protection by
1085 * @task->pi_lock. Redundant operation for the @waiter == NULL
1086 * case, but conditionals are more expensive than a redundant
1087 * store.
1088 */
1089 raw_spin_lock(&task->pi_lock);
1090 task->pi_blocked_on = NULL;
1091 /*
1092 * Finish the lock acquisition. @task is the new owner. If
1093 * other waiters exist we have to insert the highest priority
1094 * waiter into @task->pi_waiters tree.
1095 */
1096 if (rt_mutex_has_waiters(lock))
1097 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
1098 raw_spin_unlock(&task->pi_lock);
1099
1100 takeit:
1101 /*
1102 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there
1103 * are still waiters or clears it.
1104 */
1105 rt_mutex_set_owner(lock, task);
1106
1107 return 1;
1108 }
1109
1110 /*
1111 * Task blocks on lock.
1112 *
1113 * Prepare waiter and propagate pi chain
1114 *
1115 * This must be called with lock->wait_lock held and interrupts disabled
1116 */
task_blocks_on_rt_mutex(struct rt_mutex_base * lock,struct rt_mutex_waiter * waiter,struct task_struct * task,struct ww_acquire_ctx * ww_ctx,enum rtmutex_chainwalk chwalk)1117 static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
1118 struct rt_mutex_waiter *waiter,
1119 struct task_struct *task,
1120 struct ww_acquire_ctx *ww_ctx,
1121 enum rtmutex_chainwalk chwalk)
1122 {
1123 struct task_struct *owner = rt_mutex_owner(lock);
1124 struct rt_mutex_waiter *top_waiter = waiter;
1125 struct rt_mutex_base *next_lock;
1126 int chain_walk = 0, res;
1127
1128 lockdep_assert_held(&lock->wait_lock);
1129
1130 /*
1131 * Early deadlock detection. We really don't want the task to
1132 * enqueue on itself just to untangle the mess later. It's not
1133 * only an optimization. We drop the locks, so another waiter
1134 * can come in before the chain walk detects the deadlock. So
1135 * the other will detect the deadlock and return -EDEADLOCK,
1136 * which is wrong, as the other waiter is not in a deadlock
1137 * situation.
1138 */
1139 if (owner == task)
1140 return -EDEADLK;
1141
1142 raw_spin_lock(&task->pi_lock);
1143 waiter->task = task;
1144 waiter->lock = lock;
1145 waiter_update_prio(waiter, task);
1146
1147 /* Get the top priority waiter on the lock */
1148 if (rt_mutex_has_waiters(lock))
1149 top_waiter = rt_mutex_top_waiter(lock);
1150 rt_mutex_enqueue(lock, waiter);
1151
1152 task->pi_blocked_on = waiter;
1153
1154 raw_spin_unlock(&task->pi_lock);
1155
1156 if (build_ww_mutex() && ww_ctx) {
1157 struct rt_mutex *rtm;
1158
1159 /* Check whether the waiter should back out immediately */
1160 rtm = container_of(lock, struct rt_mutex, rtmutex);
1161 res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx);
1162 if (res) {
1163 raw_spin_lock(&task->pi_lock);
1164 rt_mutex_dequeue(lock, waiter);
1165 task->pi_blocked_on = NULL;
1166 raw_spin_unlock(&task->pi_lock);
1167 return res;
1168 }
1169 }
1170
1171 if (!owner)
1172 return 0;
1173
1174 raw_spin_lock(&owner->pi_lock);
1175 if (waiter == rt_mutex_top_waiter(lock)) {
1176 rt_mutex_dequeue_pi(owner, top_waiter);
1177 rt_mutex_enqueue_pi(owner, waiter);
1178
1179 rt_mutex_adjust_prio(owner);
1180 if (owner->pi_blocked_on)
1181 chain_walk = 1;
1182 } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
1183 chain_walk = 1;
1184 }
1185
1186 /* Store the lock on which owner is blocked or NULL */
1187 next_lock = task_blocked_on_lock(owner);
1188
1189 raw_spin_unlock(&owner->pi_lock);
1190 /*
1191 * Even if full deadlock detection is on, if the owner is not
1192 * blocked itself, we can avoid finding this out in the chain
1193 * walk.
1194 */
1195 if (!chain_walk || !next_lock)
1196 return 0;
1197
1198 /*
1199 * The owner can't disappear while holding a lock,
1200 * so the owner struct is protected by wait_lock.
1201 * Gets dropped in rt_mutex_adjust_prio_chain()!
1202 */
1203 get_task_struct(owner);
1204
1205 raw_spin_unlock_irq(&lock->wait_lock);
1206
1207 res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
1208 next_lock, waiter, task);
1209
1210 raw_spin_lock_irq(&lock->wait_lock);
1211
1212 return res;
1213 }
1214
1215 /*
1216 * Remove the top waiter from the current tasks pi waiter tree and
1217 * queue it up.
1218 *
1219 * Called with lock->wait_lock held and interrupts disabled.
1220 */
mark_wakeup_next_waiter(struct rt_wake_q_head * wqh,struct rt_mutex_base * lock)1221 static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
1222 struct rt_mutex_base *lock)
1223 {
1224 struct rt_mutex_waiter *waiter;
1225
1226 raw_spin_lock(¤t->pi_lock);
1227
1228 waiter = rt_mutex_top_waiter(lock);
1229
1230 /*
1231 * Remove it from current->pi_waiters and deboost.
1232 *
1233 * We must in fact deboost here in order to ensure we call
1234 * rt_mutex_setprio() to update p->pi_top_task before the
1235 * task unblocks.
1236 */
1237 rt_mutex_dequeue_pi(current, waiter);
1238 rt_mutex_adjust_prio(current);
1239
1240 /*
1241 * As we are waking up the top waiter, and the waiter stays
1242 * queued on the lock until it gets the lock, this lock
1243 * obviously has waiters. Just set the bit here and this has
1244 * the added benefit of forcing all new tasks into the
1245 * slow path making sure no task of lower priority than
1246 * the top waiter can steal this lock.
1247 */
1248 lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
1249
1250 /*
1251 * We deboosted before waking the top waiter task such that we don't
1252 * run two tasks with the 'same' priority (and ensure the
1253 * p->pi_top_task pointer points to a blocked task). This however can
1254 * lead to priority inversion if we would get preempted after the
1255 * deboost but before waking our donor task, hence the preempt_disable()
1256 * before unlock.
1257 *
1258 * Pairs with preempt_enable() in rt_mutex_wake_up_q();
1259 */
1260 preempt_disable();
1261 rt_mutex_wake_q_add(wqh, waiter);
1262 raw_spin_unlock(¤t->pi_lock);
1263 }
1264
__rt_mutex_slowtrylock(struct rt_mutex_base * lock)1265 static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock)
1266 {
1267 int ret = try_to_take_rt_mutex(lock, current, NULL);
1268
1269 /*
1270 * try_to_take_rt_mutex() sets the lock waiters bit
1271 * unconditionally. Clean this up.
1272 */
1273 fixup_rt_mutex_waiters(lock, true);
1274
1275 return ret;
1276 }
1277
1278 /*
1279 * Slow path try-lock function:
1280 */
rt_mutex_slowtrylock(struct rt_mutex_base * lock)1281 static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock)
1282 {
1283 unsigned long flags;
1284 int ret;
1285
1286 /*
1287 * If the lock already has an owner we fail to get the lock.
1288 * This can be done without taking the @lock->wait_lock as
1289 * it is only being read, and this is a trylock anyway.
1290 */
1291 if (rt_mutex_owner(lock))
1292 return 0;
1293
1294 /*
1295 * The mutex has currently no owner. Lock the wait lock and try to
1296 * acquire the lock. We use irqsave here to support early boot calls.
1297 */
1298 raw_spin_lock_irqsave(&lock->wait_lock, flags);
1299
1300 ret = __rt_mutex_slowtrylock(lock);
1301
1302 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1303
1304 return ret;
1305 }
1306
__rt_mutex_trylock(struct rt_mutex_base * lock)1307 static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock)
1308 {
1309 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
1310 return 1;
1311
1312 return rt_mutex_slowtrylock(lock);
1313 }
1314
1315 /*
1316 * Slow path to release a rt-mutex.
1317 */
rt_mutex_slowunlock(struct rt_mutex_base * lock)1318 static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock)
1319 {
1320 DEFINE_RT_WAKE_Q(wqh);
1321 unsigned long flags;
1322
1323 /* irqsave required to support early boot calls */
1324 raw_spin_lock_irqsave(&lock->wait_lock, flags);
1325
1326 debug_rt_mutex_unlock(lock);
1327
1328 /*
1329 * We must be careful here if the fast path is enabled. If we
1330 * have no waiters queued we cannot set owner to NULL here
1331 * because of:
1332 *
1333 * foo->lock->owner = NULL;
1334 * rtmutex_lock(foo->lock); <- fast path
1335 * free = atomic_dec_and_test(foo->refcnt);
1336 * rtmutex_unlock(foo->lock); <- fast path
1337 * if (free)
1338 * kfree(foo);
1339 * raw_spin_unlock(foo->lock->wait_lock);
1340 *
1341 * So for the fastpath enabled kernel:
1342 *
1343 * Nothing can set the waiters bit as long as we hold
1344 * lock->wait_lock. So we do the following sequence:
1345 *
1346 * owner = rt_mutex_owner(lock);
1347 * clear_rt_mutex_waiters(lock);
1348 * raw_spin_unlock(&lock->wait_lock);
1349 * if (cmpxchg(&lock->owner, owner, 0) == owner)
1350 * return;
1351 * goto retry;
1352 *
1353 * The fastpath disabled variant is simple as all access to
1354 * lock->owner is serialized by lock->wait_lock:
1355 *
1356 * lock->owner = NULL;
1357 * raw_spin_unlock(&lock->wait_lock);
1358 */
1359 while (!rt_mutex_has_waiters(lock)) {
1360 /* Drops lock->wait_lock ! */
1361 if (unlock_rt_mutex_safe(lock, flags) == true)
1362 return;
1363 /* Relock the rtmutex and try again */
1364 raw_spin_lock_irqsave(&lock->wait_lock, flags);
1365 }
1366
1367 /*
1368 * The wakeup next waiter path does not suffer from the above
1369 * race. See the comments there.
1370 *
1371 * Queue the next waiter for wakeup once we release the wait_lock.
1372 */
1373 mark_wakeup_next_waiter(&wqh, lock);
1374 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1375
1376 rt_mutex_wake_up_q(&wqh);
1377 }
1378
__rt_mutex_unlock(struct rt_mutex_base * lock)1379 static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock)
1380 {
1381 if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
1382 return;
1383
1384 rt_mutex_slowunlock(lock);
1385 }
1386
1387 #ifdef CONFIG_SMP
rtmutex_spin_on_owner(struct rt_mutex_base * lock,struct rt_mutex_waiter * waiter,struct task_struct * owner)1388 static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
1389 struct rt_mutex_waiter *waiter,
1390 struct task_struct *owner)
1391 {
1392 bool res = true;
1393
1394 rcu_read_lock();
1395 for (;;) {
1396 /* If owner changed, trylock again. */
1397 if (owner != rt_mutex_owner(lock))
1398 break;
1399 /*
1400 * Ensure that @owner is dereferenced after checking that
1401 * the lock owner still matches @owner. If that fails,
1402 * @owner might point to freed memory. If it still matches,
1403 * the rcu_read_lock() ensures the memory stays valid.
1404 */
1405 barrier();
1406 /*
1407 * Stop spinning when:
1408 * - the lock owner has been scheduled out
1409 * - current is not longer the top waiter
1410 * - current is requested to reschedule (redundant
1411 * for CONFIG_PREEMPT_RCU=y)
1412 * - the VCPU on which owner runs is preempted
1413 */
1414 if (!owner->on_cpu || need_resched() ||
1415 !rt_mutex_waiter_is_top_waiter(lock, waiter) ||
1416 vcpu_is_preempted(task_cpu(owner))) {
1417 res = false;
1418 break;
1419 }
1420 cpu_relax();
1421 }
1422 rcu_read_unlock();
1423 return res;
1424 }
1425 #else
rtmutex_spin_on_owner(struct rt_mutex_base * lock,struct rt_mutex_waiter * waiter,struct task_struct * owner)1426 static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
1427 struct rt_mutex_waiter *waiter,
1428 struct task_struct *owner)
1429 {
1430 return false;
1431 }
1432 #endif
1433
1434 #ifdef RT_MUTEX_BUILD_MUTEX
1435 /*
1436 * Functions required for:
1437 * - rtmutex, futex on all kernels
1438 * - mutex and rwsem substitutions on RT kernels
1439 */
1440
1441 /*
1442 * Remove a waiter from a lock and give up
1443 *
1444 * Must be called with lock->wait_lock held and interrupts disabled. It must
1445 * have just failed to try_to_take_rt_mutex().
1446 */
remove_waiter(struct rt_mutex_base * lock,struct rt_mutex_waiter * waiter)1447 static void __sched remove_waiter(struct rt_mutex_base *lock,
1448 struct rt_mutex_waiter *waiter)
1449 {
1450 bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
1451 struct task_struct *owner = rt_mutex_owner(lock);
1452 struct rt_mutex_base *next_lock;
1453
1454 lockdep_assert_held(&lock->wait_lock);
1455
1456 raw_spin_lock(¤t->pi_lock);
1457 rt_mutex_dequeue(lock, waiter);
1458 current->pi_blocked_on = NULL;
1459 raw_spin_unlock(¤t->pi_lock);
1460
1461 /*
1462 * Only update priority if the waiter was the highest priority
1463 * waiter of the lock and there is an owner to update.
1464 */
1465 if (!owner || !is_top_waiter)
1466 return;
1467
1468 raw_spin_lock(&owner->pi_lock);
1469
1470 rt_mutex_dequeue_pi(owner, waiter);
1471
1472 if (rt_mutex_has_waiters(lock))
1473 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
1474
1475 rt_mutex_adjust_prio(owner);
1476
1477 /* Store the lock on which owner is blocked or NULL */
1478 next_lock = task_blocked_on_lock(owner);
1479
1480 raw_spin_unlock(&owner->pi_lock);
1481
1482 /*
1483 * Don't walk the chain, if the owner task is not blocked
1484 * itself.
1485 */
1486 if (!next_lock)
1487 return;
1488
1489 /* gets dropped in rt_mutex_adjust_prio_chain()! */
1490 get_task_struct(owner);
1491
1492 raw_spin_unlock_irq(&lock->wait_lock);
1493
1494 rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
1495 next_lock, NULL, current);
1496
1497 raw_spin_lock_irq(&lock->wait_lock);
1498 }
1499
1500 /**
1501 * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop
1502 * @lock: the rt_mutex to take
1503 * @ww_ctx: WW mutex context pointer
1504 * @state: the state the task should block in (TASK_INTERRUPTIBLE
1505 * or TASK_UNINTERRUPTIBLE)
1506 * @timeout: the pre-initialized and started timer, or NULL for none
1507 * @waiter: the pre-initialized rt_mutex_waiter
1508 *
1509 * Must be called with lock->wait_lock held and interrupts disabled
1510 */
rt_mutex_slowlock_block(struct rt_mutex_base * lock,struct ww_acquire_ctx * ww_ctx,unsigned int state,struct hrtimer_sleeper * timeout,struct rt_mutex_waiter * waiter)1511 static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
1512 struct ww_acquire_ctx *ww_ctx,
1513 unsigned int state,
1514 struct hrtimer_sleeper *timeout,
1515 struct rt_mutex_waiter *waiter)
1516 {
1517 struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
1518 struct task_struct *owner;
1519 int ret = 0;
1520
1521 trace_android_vh_rtmutex_wait_start(lock);
1522 for (;;) {
1523 /* Try to acquire the lock: */
1524 if (try_to_take_rt_mutex(lock, current, waiter))
1525 break;
1526
1527 if (timeout && !timeout->task) {
1528 ret = -ETIMEDOUT;
1529 break;
1530 }
1531 if (signal_pending_state(state, current)) {
1532 ret = -EINTR;
1533 break;
1534 }
1535
1536 if (build_ww_mutex() && ww_ctx) {
1537 ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx);
1538 if (ret)
1539 break;
1540 }
1541
1542 if (waiter == rt_mutex_top_waiter(lock))
1543 owner = rt_mutex_owner(lock);
1544 else
1545 owner = NULL;
1546 raw_spin_unlock_irq(&lock->wait_lock);
1547
1548 if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner))
1549 schedule();
1550
1551 raw_spin_lock_irq(&lock->wait_lock);
1552 set_current_state(state);
1553 }
1554
1555 trace_android_vh_rtmutex_wait_finish(lock);
1556 __set_current_state(TASK_RUNNING);
1557 return ret;
1558 }
1559
rt_mutex_handle_deadlock(int res,int detect_deadlock,struct rt_mutex_waiter * w)1560 static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
1561 struct rt_mutex_waiter *w)
1562 {
1563 /*
1564 * If the result is not -EDEADLOCK or the caller requested
1565 * deadlock detection, nothing to do here.
1566 */
1567 if (res != -EDEADLOCK || detect_deadlock)
1568 return;
1569
1570 if (build_ww_mutex() && w->ww_ctx)
1571 return;
1572
1573 /*
1574 * Yell loudly and stop the task right here.
1575 */
1576 WARN(1, "rtmutex deadlock detected\n");
1577 while (1) {
1578 set_current_state(TASK_INTERRUPTIBLE);
1579 schedule();
1580 }
1581 }
1582
1583 /**
1584 * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held
1585 * @lock: The rtmutex to block lock
1586 * @ww_ctx: WW mutex context pointer
1587 * @state: The task state for sleeping
1588 * @chwalk: Indicator whether full or partial chainwalk is requested
1589 * @waiter: Initializer waiter for blocking
1590 */
__rt_mutex_slowlock(struct rt_mutex_base * lock,struct ww_acquire_ctx * ww_ctx,unsigned int state,enum rtmutex_chainwalk chwalk,struct rt_mutex_waiter * waiter)1591 static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
1592 struct ww_acquire_ctx *ww_ctx,
1593 unsigned int state,
1594 enum rtmutex_chainwalk chwalk,
1595 struct rt_mutex_waiter *waiter)
1596 {
1597 struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
1598 struct ww_mutex *ww = ww_container_of(rtm);
1599 int ret;
1600
1601 lockdep_assert_held(&lock->wait_lock);
1602
1603 /* Try to acquire the lock again: */
1604 if (try_to_take_rt_mutex(lock, current, NULL)) {
1605 if (build_ww_mutex() && ww_ctx) {
1606 __ww_mutex_check_waiters(rtm, ww_ctx);
1607 ww_mutex_lock_acquired(ww, ww_ctx);
1608 }
1609 return 0;
1610 }
1611
1612 set_current_state(state);
1613
1614 ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk);
1615 if (likely(!ret))
1616 ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter);
1617
1618 if (likely(!ret)) {
1619 /* acquired the lock */
1620 if (build_ww_mutex() && ww_ctx) {
1621 if (!ww_ctx->is_wait_die)
1622 __ww_mutex_check_waiters(rtm, ww_ctx);
1623 ww_mutex_lock_acquired(ww, ww_ctx);
1624 }
1625 } else {
1626 __set_current_state(TASK_RUNNING);
1627 remove_waiter(lock, waiter);
1628 rt_mutex_handle_deadlock(ret, chwalk, waiter);
1629 }
1630
1631 /*
1632 * try_to_take_rt_mutex() sets the waiter bit
1633 * unconditionally. We might have to fix that up.
1634 */
1635 fixup_rt_mutex_waiters(lock, true);
1636 return ret;
1637 }
1638
__rt_mutex_slowlock_locked(struct rt_mutex_base * lock,struct ww_acquire_ctx * ww_ctx,unsigned int state)1639 static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
1640 struct ww_acquire_ctx *ww_ctx,
1641 unsigned int state)
1642 {
1643 struct rt_mutex_waiter waiter;
1644 int ret;
1645
1646 rt_mutex_init_waiter(&waiter);
1647 waiter.ww_ctx = ww_ctx;
1648
1649 ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK,
1650 &waiter);
1651
1652 debug_rt_mutex_free_waiter(&waiter);
1653 return ret;
1654 }
1655
1656 /*
1657 * rt_mutex_slowlock - Locking slowpath invoked when fast path fails
1658 * @lock: The rtmutex to block lock
1659 * @ww_ctx: WW mutex context pointer
1660 * @state: The task state for sleeping
1661 */
rt_mutex_slowlock(struct rt_mutex_base * lock,struct ww_acquire_ctx * ww_ctx,unsigned int state)1662 static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
1663 struct ww_acquire_ctx *ww_ctx,
1664 unsigned int state)
1665 {
1666 unsigned long flags;
1667 int ret;
1668
1669 /*
1670 * Technically we could use raw_spin_[un]lock_irq() here, but this can
1671 * be called in early boot if the cmpxchg() fast path is disabled
1672 * (debug, no architecture support). In this case we will acquire the
1673 * rtmutex with lock->wait_lock held. But we cannot unconditionally
1674 * enable interrupts in that early boot case. So we need to use the
1675 * irqsave/restore variants.
1676 */
1677 raw_spin_lock_irqsave(&lock->wait_lock, flags);
1678 ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state);
1679 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1680
1681 return ret;
1682 }
1683
__rt_mutex_lock(struct rt_mutex_base * lock,unsigned int state)1684 static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,
1685 unsigned int state)
1686 {
1687 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
1688 return 0;
1689
1690 return rt_mutex_slowlock(lock, NULL, state);
1691 }
1692 #endif /* RT_MUTEX_BUILD_MUTEX */
1693
1694 #ifdef RT_MUTEX_BUILD_SPINLOCKS
1695 /*
1696 * Functions required for spin/rw_lock substitution on RT kernels
1697 */
1698
1699 /**
1700 * rtlock_slowlock_locked - Slow path lock acquisition for RT locks
1701 * @lock: The underlying RT mutex
1702 */
rtlock_slowlock_locked(struct rt_mutex_base * lock)1703 static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
1704 {
1705 struct rt_mutex_waiter waiter;
1706 struct task_struct *owner;
1707
1708 lockdep_assert_held(&lock->wait_lock);
1709
1710 if (try_to_take_rt_mutex(lock, current, NULL))
1711 return;
1712
1713 rt_mutex_init_rtlock_waiter(&waiter);
1714
1715 /* Save current state and set state to TASK_RTLOCK_WAIT */
1716 current_save_and_set_rtlock_wait_state();
1717
1718 task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK);
1719
1720 for (;;) {
1721 /* Try to acquire the lock again */
1722 if (try_to_take_rt_mutex(lock, current, &waiter))
1723 break;
1724
1725 if (&waiter == rt_mutex_top_waiter(lock))
1726 owner = rt_mutex_owner(lock);
1727 else
1728 owner = NULL;
1729 raw_spin_unlock_irq(&lock->wait_lock);
1730
1731 if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner))
1732 schedule_rtlock();
1733
1734 raw_spin_lock_irq(&lock->wait_lock);
1735 set_current_state(TASK_RTLOCK_WAIT);
1736 }
1737
1738 /* Restore the task state */
1739 current_restore_rtlock_saved_state();
1740
1741 /*
1742 * try_to_take_rt_mutex() sets the waiter bit unconditionally.
1743 * We might have to fix that up:
1744 */
1745 fixup_rt_mutex_waiters(lock, true);
1746 debug_rt_mutex_free_waiter(&waiter);
1747 }
1748
rtlock_slowlock(struct rt_mutex_base * lock)1749 static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
1750 {
1751 unsigned long flags;
1752
1753 raw_spin_lock_irqsave(&lock->wait_lock, flags);
1754 rtlock_slowlock_locked(lock);
1755 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1756 }
1757
1758 #endif /* RT_MUTEX_BUILD_SPINLOCKS */
1759