• Home
  • Raw
  • Download

Lines Matching +full:per +full:- +full:hart

1 // SPDX-License-Identifier: GPL-2.0-or-later
16 * PI-futex support started by Ingo Molnar and Thomas Gleixner
23 * Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
29 * Kirkwood for proof-of-concept implementation.
40 #include <linux/fault-inject.h>
101 * smp_mb(); (A) <-- paired with -.
110 * `--------> smp_mb(); (B)
117 * waiters--; (b) unlock(hash_bucket(futex));
141 * acquiring the lock. It then decrements them again after releasing it -
161 * NOMMU does not have per process address space. Let the compiler optimize
174 * list of 'owned' pi_state instances - these have to be
191 * struct futex_q - The hashed futex queue entry, one per waiting task
192 * @list: priority-sorted list of tasks waiting on this futex
205 * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
294 debugfs_create_bool("ignore-private", mode, dir, in fail_futex_debugfs()
322 atomic_inc(&hb->waiters); in hb_waiters_inc()
337 atomic_dec(&hb->waiters); in hb_waiters_dec()
348 return atomic_read(&hb->waiters); in hb_waiters_pending()
355 * hash_futex - Return the hash bucket in the global hash
364 key->both.offset); in hash_futex()
366 return &futex_queues[hash & (futex_hashsize - 1)]; in hash_futex()
371 * match_futex - Check whether two futex keys are equal
380 && key1->both.word == key2->both.word in match_futex()
381 && key1->both.ptr == key2->both.ptr in match_futex()
382 && key1->both.offset == key2->both.offset); in match_futex()
391 * futex_setup_timer - set up the sleeping hrtimer.
414 hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns); in futex_setup_timer()
422 * This relies on u64 not wrapping in the life-time of the machine; which with
433 * It is important that match_futex() will never have a false-positive, esp.
434 * for PI futexes that can mess up the state. The above argues that false-negatives
443 old = atomic64_read(&inode->i_sequence); in get_inode_sequence_number()
452 old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new); in get_inode_sequence_number()
460 * get_futex_key() - Get parameters which are the keys for a futex
473 * ( inode->i_sequence, page->index, offset_within_page )
479 * ( current->mm, address, 0 )
490 struct mm_struct *mm = current->mm; in get_futex_key()
498 key->both.offset = address % PAGE_SIZE; in get_futex_key()
500 return -EINVAL; in get_futex_key()
501 address -= key->both.offset; in get_futex_key()
504 return -EFAULT; in get_futex_key()
507 return -EFAULT; in get_futex_key()
517 key->private.mm = mm; in get_futex_key()
518 key->private.address = address; in get_futex_key()
525 return -EFAULT; in get_futex_key()
530 * and get read-only access. in get_futex_key()
532 if (err == -EFAULT && rw == FUTEX_READ) { in get_futex_key()
545 * file-backed region case and guards against movement to swap cache. in get_futex_key()
549 * From this point on, mapping will be re-verified if necessary and in get_futex_key()
555 * based on the address. For filesystem-backed pages, the tail is in get_futex_key()
561 mapping = READ_ONCE(page->mapping); in get_futex_key()
564 * If page->mapping is NULL, then it cannot be a PageAnon in get_futex_key()
576 * an unlikely race, but we do need to retry for page->mapping. in get_futex_key()
587 shmem_swizzled = PageSwapCache(page) || page->mapping; in get_futex_key()
594 return -EFAULT; in get_futex_key()
604 * it's a read-only handle, it's expected that futexes attach to in get_futex_key()
613 err = -EFAULT; in get_futex_key()
617 key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ in get_futex_key()
618 key->private.mm = mm; in get_futex_key()
619 key->private.address = address; in get_futex_key()
626 * the page->mapping must be traversed. Ordinarily this should in get_futex_key()
633 * mapping->host can be safely accessed as being a valid inode. in get_futex_key()
637 if (READ_ONCE(page->mapping) != mapping) { in get_futex_key()
644 inode = READ_ONCE(mapping->host); in get_futex_key()
652 key->both.offset |= FUT_OFF_INODE; /* inode-based key */ in get_futex_key()
653 key->shared.i_seq = get_inode_sequence_number(inode); in get_futex_key()
654 key->shared.pgoff = page_to_pgoff(tail); in get_futex_key()
664 * fault_in_user_writeable() - Fault in user address and verify RW access
670 * We have no generic implementation of a non-destructive write to the
677 struct mm_struct *mm = current->mm; in fault_in_user_writeable()
689 * futex_top_waiter() - Return the highest priority waiter on a futex
700 plist_for_each_entry(this, &hb->chain, list) { in futex_top_waiter()
701 if (match_futex(&this->key, key)) in futex_top_waiter()
727 return ret ? -EFAULT : 0; in get_futex_value_locked()
738 if (likely(current->pi_state_cache)) in refill_pi_state_cache()
744 return -ENOMEM; in refill_pi_state_cache()
746 INIT_LIST_HEAD(&pi_state->list); in refill_pi_state_cache()
748 pi_state->owner = NULL; in refill_pi_state_cache()
749 refcount_set(&pi_state->refcount, 1); in refill_pi_state_cache()
750 pi_state->key = FUTEX_KEY_INIT; in refill_pi_state_cache()
752 current->pi_state_cache = pi_state; in refill_pi_state_cache()
759 struct futex_pi_state *pi_state = current->pi_state_cache; in alloc_pi_state()
762 current->pi_state_cache = NULL; in alloc_pi_state()
770 struct task_struct *old_owner = pi_state->owner; in pi_state_update_owner()
772 lockdep_assert_held(&pi_state->pi_mutex.wait_lock); in pi_state_update_owner()
775 raw_spin_lock(&old_owner->pi_lock); in pi_state_update_owner()
776 WARN_ON(list_empty(&pi_state->list)); in pi_state_update_owner()
777 list_del_init(&pi_state->list); in pi_state_update_owner()
778 raw_spin_unlock(&old_owner->pi_lock); in pi_state_update_owner()
782 raw_spin_lock(&new_owner->pi_lock); in pi_state_update_owner()
783 WARN_ON(!list_empty(&pi_state->list)); in pi_state_update_owner()
784 list_add(&pi_state->list, &new_owner->pi_state_list); in pi_state_update_owner()
785 pi_state->owner = new_owner; in pi_state_update_owner()
786 raw_spin_unlock(&new_owner->pi_lock); in pi_state_update_owner()
792 WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount)); in get_pi_state()
804 if (!refcount_dec_and_test(&pi_state->refcount)) in put_pi_state()
808 * If pi_state->owner is NULL, the owner is most probably dying in put_pi_state()
811 if (pi_state->owner) { in put_pi_state()
814 raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags); in put_pi_state()
816 rt_mutex_proxy_unlock(&pi_state->pi_mutex); in put_pi_state()
817 raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); in put_pi_state()
820 if (current->pi_state_cache) { in put_pi_state()
824 * pi_state->list is already empty. in put_pi_state()
825 * clear pi_state->owner. in put_pi_state()
826 * refcount is at 0 - put it back to 1. in put_pi_state()
828 pi_state->owner = NULL; in put_pi_state()
829 refcount_set(&pi_state->refcount, 1); in put_pi_state()
830 current->pi_state_cache = pi_state; in put_pi_state()
838 * Kernel cleans up PI-state, but userspace is likely hosed.
839 * (Robust-futex cleanup is separate and might save the day for userspace.)
843 struct list_head *next, *head = &curr->pi_state_list; in exit_pi_state_list()
855 raw_spin_lock_irq(&curr->pi_lock); in exit_pi_state_list()
857 next = head->next; in exit_pi_state_list()
859 key = pi_state->key; in exit_pi_state_list()
872 if (!refcount_inc_not_zero(&pi_state->refcount)) { in exit_pi_state_list()
873 raw_spin_unlock_irq(&curr->pi_lock); in exit_pi_state_list()
875 raw_spin_lock_irq(&curr->pi_lock); in exit_pi_state_list()
878 raw_spin_unlock_irq(&curr->pi_lock); in exit_pi_state_list()
880 spin_lock(&hb->lock); in exit_pi_state_list()
881 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); in exit_pi_state_list()
882 raw_spin_lock(&curr->pi_lock); in exit_pi_state_list()
884 * We dropped the pi-lock, so re-check whether this in exit_pi_state_list()
885 * task still owns the PI-state: in exit_pi_state_list()
887 if (head->next != next) { in exit_pi_state_list()
888 /* retain curr->pi_lock for the loop invariant */ in exit_pi_state_list()
889 raw_spin_unlock(&pi_state->pi_mutex.wait_lock); in exit_pi_state_list()
890 spin_unlock(&hb->lock); in exit_pi_state_list()
895 WARN_ON(pi_state->owner != curr); in exit_pi_state_list()
896 WARN_ON(list_empty(&pi_state->list)); in exit_pi_state_list()
897 list_del_init(&pi_state->list); in exit_pi_state_list()
898 pi_state->owner = NULL; in exit_pi_state_list()
900 raw_spin_unlock(&curr->pi_lock); in exit_pi_state_list()
901 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); in exit_pi_state_list()
902 spin_unlock(&hb->lock); in exit_pi_state_list()
904 rt_mutex_futex_unlock(&pi_state->pi_mutex); in exit_pi_state_list()
907 raw_spin_lock_irq(&curr->pi_lock); in exit_pi_state_list()
909 raw_spin_unlock_irq(&curr->pi_lock); in exit_pi_state_list()
918 * Waiter | pi_state | pi->owner | uTID | uODIED | ?
920 * [1] NULL | --- | --- | 0 | 0/1 | Valid
921 * [2] NULL | --- | --- | >0 | 0/1 | Valid
923 * [3] Found | NULL | -- | Any | 0/1 | Invalid
953 * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set.
968 * hb->lock:
970 * hb -> futex_q, relation
971 * futex_q -> pi_state, relation
976 * pi_mutex->wait_lock:
982 * p->pi_lock:
984 * p->pi_state_list -> pi_state->list, relation
986 * pi_state->refcount:
993 * hb->lock
994 * pi_mutex->wait_lock
995 * p->pi_lock
1013 * Userspace might have messed up non-PI and PI futexes [3] in attach_to_pi_state()
1016 return -EINVAL; in attach_to_pi_state()
1019 * We get here with hb->lock held, and having found a in attach_to_pi_state()
1021 * has dropped the hb->lock in between queue_me() and unqueue_me_pi(), in attach_to_pi_state()
1030 WARN_ON(!refcount_read(&pi_state->refcount)); in attach_to_pi_state()
1036 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); in attach_to_pi_state()
1057 * pi_state->rt_mutex will fixup owner. in attach_to_pi_state()
1059 if (!pi_state->owner) { in attach_to_pi_state()
1087 if (!pi_state->owner) in attach_to_pi_state()
1096 if (pid != task_pid_vnr(pi_state->owner)) in attach_to_pi_state()
1101 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); in attach_to_pi_state()
1106 ret = -EINVAL; in attach_to_pi_state()
1110 ret = -EAGAIN; in attach_to_pi_state()
1114 ret = -EFAULT; in attach_to_pi_state()
1118 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); in attach_to_pi_state()
1123 * wait_for_owner_exiting - Block until the owner has exited
1131 if (ret != -EBUSY) { in wait_for_owner_exiting()
1136 if (WARN_ON_ONCE(ret == -EBUSY && !exiting)) in wait_for_owner_exiting()
1139 mutex_lock(&exiting->futex_exit_mutex); in wait_for_owner_exiting()
1142 * while the task was in exec()->exec_futex_release() then it can in wait_for_owner_exiting()
1148 mutex_unlock(&exiting->futex_exit_mutex); in wait_for_owner_exiting()
1162 if (tsk && tsk->futex_state != FUTEX_STATE_DEAD) in handle_exit_race()
1163 return -EBUSY; in handle_exit_race()
1174 * tsk->flags |= PF_EXITING; *uaddr == 0x00000PID in handle_exit_race()
1179 * } if (!tsk->flags & PF_EXITING) { in handle_exit_race()
1181 * tsk->futex_state = } else { in handle_exit_race()
1182 * FUTEX_STATE_DEAD; if (tsk->futex_state != in handle_exit_race()
1184 * return -EAGAIN; in handle_exit_race()
1185 * return -ESRCH; <--- FAIL in handle_exit_race()
1195 return -EFAULT; in handle_exit_race()
1199 return -EAGAIN; in handle_exit_race()
1206 return -ESRCH; in handle_exit_race()
1222 * We are the first waiter - try to look up the real owner and attach in attach_to_pi_owner()
1229 return -EAGAIN; in attach_to_pi_owner()
1234 if (unlikely(p->flags & PF_KTHREAD)) { in attach_to_pi_owner()
1236 return -EPERM; in attach_to_pi_owner()
1242 * in futex_exit_release(), we do this protected by p->pi_lock: in attach_to_pi_owner()
1244 raw_spin_lock_irq(&p->pi_lock); in attach_to_pi_owner()
1245 if (unlikely(p->futex_state != FUTEX_STATE_OK)) { in attach_to_pi_owner()
1253 raw_spin_unlock_irq(&p->pi_lock); in attach_to_pi_owner()
1263 if (ret == -EBUSY) in attach_to_pi_owner()
1273 * This creates pi_state, we have hb->lock held, this means nothing can in attach_to_pi_owner()
1282 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p); in attach_to_pi_owner()
1285 pi_state->key = *key; in attach_to_pi_owner()
1287 WARN_ON(!list_empty(&pi_state->list)); in attach_to_pi_owner()
1288 list_add(&pi_state->list, &p->pi_state_list); in attach_to_pi_owner()
1290 * Assignment without holding pi_state->pi_mutex.wait_lock is safe in attach_to_pi_owner()
1293 pi_state->owner = p; in attach_to_pi_owner()
1294 raw_spin_unlock_irq(&p->pi_lock); in attach_to_pi_owner()
1315 return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps); in lookup_pi_state()
1318 * We are the first waiter - try to look up the owner based on in lookup_pi_state()
1330 return -EFAULT; in lock_pi_update_atomic()
1337 return curval != uval ? -EAGAIN : 0; in lock_pi_update_atomic()
1341 * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex
1354 * - 0 - ready to wait;
1355 * - 1 - acquired the lock;
1356 * - <0 - error
1358 * The hb->lock and futex_key refs shall be held by the caller.
1360 * @exiting is only set when the return value is -EBUSY. If so, this holds
1380 return -EFAULT; in futex_lock_pi_atomic()
1383 return -EFAULT; in futex_lock_pi_atomic()
1389 return -EDEADLK; in futex_lock_pi_atomic()
1392 return -EDEADLK; in futex_lock_pi_atomic()
1400 return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps); in futex_lock_pi_atomic()
1428 * the kernel and blocked on hb->lock. in futex_lock_pi_atomic()
1443 * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket
1446 * The q->lock_ptr must not be NULL and must be held by the caller.
1452 if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list))) in __unqueue_futex()
1454 lockdep_assert_held(q->lock_ptr); in __unqueue_futex()
1456 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); in __unqueue_futex()
1457 plist_del(&q->list, &hb->chain); in __unqueue_futex()
1469 struct task_struct *p = q->task; in mark_wake_futex()
1471 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n")) in mark_wake_futex()
1477 * The waiting task can free the futex_q as soon as q->lock_ptr = NULL in mark_wake_futex()
1483 smp_store_release(&q->lock_ptr, NULL); in mark_wake_futex()
1487 * the hb->lock. in mark_wake_futex()
1503 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); in wake_futex_pi()
1506 * As per the comment in futex_unlock_pi() this should not happen. in wake_futex_pi()
1513 ret = -EAGAIN; in wake_futex_pi()
1525 ret = -EFAULT; in wake_futex_pi()
1533 * try the TID->0 transition) raced with a waiter setting the in wake_futex_pi()
1538 ret = -EAGAIN; in wake_futex_pi()
1540 ret = -EINVAL; in wake_futex_pi()
1550 postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); in wake_futex_pi()
1554 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); in wake_futex_pi()
1569 spin_lock(&hb1->lock); in double_lock_hb()
1571 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING); in double_lock_hb()
1573 spin_lock(&hb2->lock); in double_lock_hb()
1574 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING); in double_lock_hb()
1581 spin_unlock(&hb1->lock); in double_unlock_hb()
1583 spin_unlock(&hb2->lock); in double_unlock_hb()
1599 return -EINVAL; in futex_wake()
1611 spin_lock(&hb->lock); in futex_wake()
1613 plist_for_each_entry_safe(this, next, &hb->chain, list) { in futex_wake()
1614 if (match_futex (&this->key, &key)) { in futex_wake()
1615 if (this->pi_state || this->rt_waiter) { in futex_wake()
1616 ret = -EINVAL; in futex_wake()
1621 if (!(this->bitset & bitset)) in futex_wake()
1630 spin_unlock(&hb->lock); in futex_wake()
1645 char comm[sizeof(current->comm)]; in futex_atomic_op_inuser()
1647 * kill this print and return -EINVAL when userspace in futex_atomic_op_inuser()
1677 return -ENOSYS; in futex_atomic_op_inuser()
1713 unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) { in futex_wake_op()
1722 if (op_ret == -EFAULT) { in futex_wake_op()
1737 plist_for_each_entry_safe(this, next, &hb1->chain, list) { in futex_wake_op()
1738 if (match_futex (&this->key, &key1)) { in futex_wake_op()
1739 if (this->pi_state || this->rt_waiter) { in futex_wake_op()
1740 ret = -EINVAL; in futex_wake_op()
1751 plist_for_each_entry_safe(this, next, &hb2->chain, list) { in futex_wake_op()
1752 if (match_futex (&this->key, &key2)) { in futex_wake_op()
1753 if (this->pi_state || this->rt_waiter) { in futex_wake_op()
1754 ret = -EINVAL; in futex_wake_op()
1772 * requeue_futex() - Requeue a futex_q from one hb to another
1787 if (likely(&hb1->chain != &hb2->chain)) { in requeue_futex()
1788 plist_del(&q->list, &hb1->chain); in requeue_futex()
1791 plist_add(&q->list, &hb2->chain); in requeue_futex()
1792 q->lock_ptr = &hb2->lock; in requeue_futex()
1794 q->key = *key2; in requeue_futex()
1798 * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
1807 * atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock
1809 * with both q->lock_ptr and hb->lock held.
1815 q->key = *key; in requeue_pi_wake_futex()
1819 WARN_ON(!q->rt_waiter); in requeue_pi_wake_futex()
1820 q->rt_waiter = NULL; in requeue_pi_wake_futex()
1822 q->lock_ptr = &hb->lock; in requeue_pi_wake_futex()
1824 wake_up_state(q->task, TASK_NORMAL); in requeue_pi_wake_futex()
1828 * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter
1844 * @exiting is only set when the return value is -EBUSY. If so, this holds
1849 * - 0 - failed to acquire the lock atomically;
1850 * - >0 - acquired the lock, return value is vpid of the top_waiter
1851 * - <0 - error
1864 return -EFAULT; in futex_proxy_trylock_atomic()
1867 return -EFAULT; in futex_proxy_trylock_atomic()
1884 if (!match_futex(top_waiter->requeue_pi_key, key2)) in futex_proxy_trylock_atomic()
1885 return -EINVAL; in futex_proxy_trylock_atomic()
1892 vpid = task_pid_vnr(top_waiter->task); in futex_proxy_trylock_atomic()
1893 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, in futex_proxy_trylock_atomic()
1903 * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
1908 * @nr_requeue: number of waiters to requeue (0-INT_MAX)
1910 * @requeue_pi: if we are attempting to requeue from a non-pi futex to a
1917 * - >=0 - on success, the number of tasks requeued or woken;
1918 * - <0 - on error
1932 return -EINVAL; in futex_requeue()
1935 * When PI not supported: return -ENOSYS if requeue_pi is true, in futex_requeue()
1941 return -ENOSYS; in futex_requeue()
1949 return -EINVAL; in futex_requeue()
1956 return -ENOMEM; in futex_requeue()
1961 * waiters and no owner. However, second and third wake-ups in futex_requeue()
1968 return -EINVAL; in futex_requeue()
1985 return -EINVAL; in futex_requeue()
2013 ret = -EAGAIN; in futex_requeue()
2018 if (requeue_pi && (task_count - nr_wake < nr_requeue)) { in futex_requeue()
2065 case -EFAULT: in futex_requeue()
2072 case -EBUSY: in futex_requeue()
2073 case -EAGAIN: in futex_requeue()
2076 * - EBUSY: Owner is exiting and we just wait for the in futex_requeue()
2078 * - EAGAIN: The user space value changed. in futex_requeue()
2095 plist_for_each_entry_safe(this, next, &hb1->chain, list) { in futex_requeue()
2096 if (task_count - nr_wake >= nr_requeue) in futex_requeue()
2099 if (!match_futex(&this->key, &key1)) in futex_requeue()
2109 if ((requeue_pi && !this->rt_waiter) || in futex_requeue()
2110 (!requeue_pi && this->rt_waiter) || in futex_requeue()
2111 this->pi_state) { in futex_requeue()
2112 ret = -EINVAL; in futex_requeue()
2127 if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) { in futex_requeue()
2128 ret = -EINVAL; in futex_requeue()
2143 this->pi_state = pi_state; in futex_requeue()
2144 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, in futex_requeue()
2145 this->rt_waiter, in futex_requeue()
2146 this->task); in futex_requeue()
2151 * this->pi_state because the waiter needs the in futex_requeue()
2167 this->pi_state = NULL; in futex_requeue()
2193 /* The key must be already stored in q->key. */
2195 __acquires(&hb->lock) in queue_lock()
2199 hb = hash_futex(&q->key); in queue_lock()
2203 * a potential waker won't miss a to-be-slept task that is in queue_lock()
2211 q->lock_ptr = &hb->lock; in queue_lock()
2213 spin_lock(&hb->lock); in queue_lock()
2219 __releases(&hb->lock) in queue_unlock()
2221 spin_unlock(&hb->lock); in queue_unlock()
2231 * - either the real thread-priority for the real-time threads in __queue_me()
2233 * - or MAX_RT_PRIO for non-RT threads. in __queue_me()
2234 * Thus, all RT-threads are woken first in priority order, and in __queue_me()
2237 prio = min(current->normal_prio, MAX_RT_PRIO); in __queue_me()
2239 plist_node_init(&q->list, prio); in __queue_me()
2240 plist_add(&q->list, &hb->chain); in __queue_me()
2241 q->task = current; in __queue_me()
2245 * queue_me() - Enqueue the futex_q on the futex_hash_bucket
2249 * The hb->lock must be held by the caller, and is released here. A call to
2257 __releases(&hb->lock) in queue_me()
2260 spin_unlock(&hb->lock); in queue_me()
2264 * unqueue_me() - Remove the futex_q from its futex_hash_bucket
2267 * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must
2271 * - 1 - if the futex_q was still queued (and we removed unqueued it);
2272 * - 0 - if the futex_q was already removed by the waking thread
2282 * q->lock_ptr can change between this read and the following spin_lock. in unqueue_me()
2283 * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and in unqueue_me()
2286 lock_ptr = READ_ONCE(q->lock_ptr); in unqueue_me()
2290 * q->lock_ptr can change between reading it and in unqueue_me()
2295 * q->lock_ptr must have changed (maybe several times) in unqueue_me()
2302 if (unlikely(lock_ptr != q->lock_ptr)) { in unqueue_me()
2308 BUG_ON(q->pi_state); in unqueue_me()
2323 __releases(q->lock_ptr) in unqueue_me_pi()
2327 BUG_ON(!q->pi_state); in unqueue_me_pi()
2328 put_pi_state(q->pi_state); in unqueue_me_pi()
2329 q->pi_state = NULL; in unqueue_me_pi()
2331 spin_unlock(q->lock_ptr); in unqueue_me_pi()
2337 struct futex_pi_state *pi_state = q->pi_state; in __fixup_pi_state_owner()
2342 oldowner = pi_state->owner; in __fixup_pi_state_owner()
2347 * - we stole the lock and pi_state->owner needs updating to reflect in __fixup_pi_state_owner()
2352 * - someone stole our lock and we need to fix things to point to the in __fixup_pi_state_owner()
2377 if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { in __fixup_pi_state_owner()
2386 newowner = rt_mutex_owner(&pi_state->pi_mutex); in __fixup_pi_state_owner()
2396 err = -EAGAIN; in __fixup_pi_state_owner()
2413 if (!pi_state->owner) in __fixup_pi_state_owner()
2449 * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely in __fixup_pi_state_owner()
2450 * drop hb->lock since the caller owns the hb -> futex_q relation. in __fixup_pi_state_owner()
2451 * Dropping the pi_mutex->wait_lock requires the state revalidate. in __fixup_pi_state_owner()
2454 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); in __fixup_pi_state_owner()
2455 spin_unlock(q->lock_ptr); in __fixup_pi_state_owner()
2458 case -EFAULT: in __fixup_pi_state_owner()
2462 case -EAGAIN: in __fixup_pi_state_owner()
2472 spin_lock(q->lock_ptr); in __fixup_pi_state_owner()
2473 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); in __fixup_pi_state_owner()
2478 if (pi_state->owner != oldowner) in __fixup_pi_state_owner()
2481 /* Retry if err was -EAGAIN or the fault in succeeded */ in __fixup_pi_state_owner()
2498 * The rtmutex has an owner - either current or some other in __fixup_pi_state_owner()
2501 pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex)); in __fixup_pi_state_owner()
2509 struct futex_pi_state *pi_state = q->pi_state; in fixup_pi_state_owner()
2512 lockdep_assert_held(q->lock_ptr); in fixup_pi_state_owner()
2514 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); in fixup_pi_state_owner()
2516 raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); in fixup_pi_state_owner()
2523 * fixup_owner() - Post lock pi_state and corner case management
2533 * - 1 - success, lock taken;
2534 * - 0 - success, lock not taken;
2535 * - <0 - on error (-EFAULT)
2542 * did a lock-steal - fix up the PI-state in that case: in fixup_owner()
2544 * Speculative pi_state->owner read (we don't hold wait_lock); in fixup_owner()
2545 * since we own the lock pi_state->owner == current is the in fixup_owner()
2548 if (q->pi_state->owner != current) in fixup_owner()
2558 * Another speculative read; pi_state->owner == current is unstable in fixup_owner()
2561 if (q->pi_state->owner == current) in fixup_owner()
2568 if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current)) in fixup_owner()
2575 * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal
2600 if (likely(!plist_node_empty(&q->list))) { in futex_wait_queue_me()
2606 if (!timeout || timeout->task) in futex_wait_queue_me()
2613 * futex_wait_setup() - Prepare to wait on a futex
2626 * - 0 - uaddr contains val and hb has been locked;
2627 * - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
2636 * Access the page AFTER the hash-bucket is locked. in futex_wait_setup()
2644 * any cond. If we locked the hash-bucket after testing *uaddr, that in futex_wait_setup()
2648 * On the other hand, we insert q and release the hash-bucket only in futex_wait_setup()
2654 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ); in futex_wait_setup()
2678 ret = -EWOULDBLOCK; in futex_wait_setup()
2694 return -EINVAL; in futex_wait()
2698 current->timer_slack_ns); in futex_wait()
2716 ret = -ETIMEDOUT; in futex_wait()
2717 if (to && !to->task) in futex_wait()
2727 ret = -ERESTARTSYS; in futex_wait()
2731 restart = &current->restart_block; in futex_wait()
2732 restart->futex.uaddr = uaddr; in futex_wait()
2733 restart->futex.val = val; in futex_wait()
2734 restart->futex.time = *abs_time; in futex_wait()
2735 restart->futex.bitset = bitset; in futex_wait()
2736 restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; in futex_wait()
2742 hrtimer_cancel(&to->timer); in futex_wait()
2743 destroy_hrtimer_on_stack(&to->timer); in futex_wait()
2751 u32 __user *uaddr = restart->futex.uaddr; in futex_wait_restart()
2754 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { in futex_wait_restart()
2755 t = restart->futex.time; in futex_wait_restart()
2758 restart->fn = do_no_restart_syscall; in futex_wait_restart()
2760 return (long)futex_wait(uaddr, restart->futex.flags, in futex_wait_restart()
2761 restart->futex.val, tp, restart->futex.bitset); in futex_wait_restart()
2766 * Userspace tried a 0 -> TID atomic transition of the futex value
2769 * on rt-mutexes, it does PI, etc. (Due to races the kernel might see
2785 return -ENOSYS; in futex_lock_pi()
2788 return -ENOMEM; in futex_lock_pi()
2812 case -EFAULT: in futex_lock_pi()
2814 case -EBUSY: in futex_lock_pi()
2815 case -EAGAIN: in futex_lock_pi()
2818 * - EBUSY: Task is exiting and we just wait for the in futex_lock_pi()
2820 * - EAGAIN: The user space value changed. in futex_lock_pi()
2844 ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex); in futex_lock_pi()
2846 ret = ret ? 0 : -EWOULDBLOCK; in futex_lock_pi()
2853 * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not in futex_lock_pi()
2855 * include hb->lock in the blocking chain, even through we'll not in in futex_lock_pi()
2856 * fact hold it while blocking. This will lead it to report -EDEADLK in futex_lock_pi()
2859 * Therefore acquire wait_lock while holding hb->lock, but drop the in futex_lock_pi()
2861 * interleaves with futex_unlock_pi() -- which does a similar lock in futex_lock_pi()
2862 * handoff -- such that the latter can observe the futex_q::pi_state in futex_lock_pi()
2865 raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); in futex_lock_pi()
2872 ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); in futex_lock_pi()
2873 raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); in futex_lock_pi()
2884 ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); in futex_lock_pi()
2890 * first acquire the hb->lock before removing the lock from the in futex_lock_pi()
2897 if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter)) in futex_lock_pi()
2908 * the lock, clear our -ETIMEDOUT or -EINTR. in futex_lock_pi()
2922 hrtimer_cancel(&to->timer); in futex_lock_pi()
2923 destroy_hrtimer_on_stack(&to->timer); in futex_lock_pi()
2925 return ret != -EINTR ? ret : -ERESTARTNOINTR; in futex_lock_pi()
2941 * Userspace attempted a TID -> 0 atomic transition, and failed.
2942 * This is the in-kernel slowpath: we look up the PI state (if any),
2943 * and do the rt-mutex unlock.
2954 return -ENOSYS; in futex_unlock_pi()
2958 return -EFAULT; in futex_unlock_pi()
2963 return -EPERM; in futex_unlock_pi()
2970 spin_lock(&hb->lock); in futex_unlock_pi()
2979 struct futex_pi_state *pi_state = top_waiter->pi_state; in futex_unlock_pi()
2981 ret = -EINVAL; in futex_unlock_pi()
2989 if (pi_state->owner != current) in futex_unlock_pi()
2994 * By taking wait_lock while still holding hb->lock, we ensure in futex_unlock_pi()
3003 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); in futex_unlock_pi()
3004 spin_unlock(&hb->lock); in futex_unlock_pi()
3006 /* drops pi_state->pi_mutex.wait_lock */ in futex_unlock_pi()
3018 * pagefault, so retry the user-access and the wakeup: in futex_unlock_pi()
3020 if (ret == -EFAULT) in futex_unlock_pi()
3026 if (ret == -EAGAIN) in futex_unlock_pi()
3038 * on hb->lock. So we can safely ignore them. We do neither in futex_unlock_pi()
3043 spin_unlock(&hb->lock); in futex_unlock_pi()
3045 case -EFAULT: in futex_unlock_pi()
3048 case -EAGAIN: in futex_unlock_pi()
3060 ret = (curval == uval) ? 0 : -EAGAIN; in futex_unlock_pi()
3063 spin_unlock(&hb->lock); in futex_unlock_pi()
3081 * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex
3093 * - 0 = no early wakeup detected;
3094 * - <0 = -ETIMEDOUT or -ERESTARTNOINTR
3110 if (!match_futex(&q->key, key2)) { in handle_early_requeue_pi_wakeup()
3111 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr)); in handle_early_requeue_pi_wakeup()
3116 plist_del(&q->list, &hb->chain); in handle_early_requeue_pi_wakeup()
3120 ret = -EWOULDBLOCK; in handle_early_requeue_pi_wakeup()
3121 if (timeout && !timeout->task) in handle_early_requeue_pi_wakeup()
3122 ret = -ETIMEDOUT; in handle_early_requeue_pi_wakeup()
3124 ret = -ERESTARTNOINTR; in handle_early_requeue_pi_wakeup()
3130 * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
3131 * @uaddr: the futex we initially wait on (non-pi)
3137 * @uaddr2: the pi futex we will take prior to returning to user-space
3147 * via the following--
3153 * If 3, cleanup and return -ERESTARTNOINTR.
3161 * If 6, return -EWOULDBLOCK (restarting the syscall would do the same).
3163 * If 4 or 7, we cleanup and return with -ETIMEDOUT.
3166 * - 0 - On success;
3167 * - <0 - On error
3181 return -ENOSYS; in futex_wait_requeue_pi()
3184 return -EINVAL; in futex_wait_requeue_pi()
3187 return -EINVAL; in futex_wait_requeue_pi()
3190 current->timer_slack_ns); in futex_wait_requeue_pi()
3220 ret = -EINVAL; in futex_wait_requeue_pi()
3227 spin_lock(&hb->lock); in futex_wait_requeue_pi()
3229 spin_unlock(&hb->lock); in futex_wait_requeue_pi()
3235 * we took the hb->lock above, we also know that futex_requeue() has in futex_wait_requeue_pi()
3246 * did a lock-steal - fix up the PI-state in that case. in futex_wait_requeue_pi()
3248 if (q.pi_state && (q.pi_state->owner != current)) { in futex_wait_requeue_pi()
3258 * Adjust the return value. It's either -EFAULT or in futex_wait_requeue_pi()
3272 pi_mutex = &q.pi_state->pi_mutex; in futex_wait_requeue_pi()
3287 * acquired the lock, clear -ETIMEDOUT or -EINTR. in futex_wait_requeue_pi()
3296 if (ret == -EINTR) { in futex_wait_requeue_pi()
3301 * -EWOULDBLOCK. Save the overhead of the restart and return in futex_wait_requeue_pi()
3302 * -EWOULDBLOCK directly. in futex_wait_requeue_pi()
3304 ret = -EWOULDBLOCK; in futex_wait_requeue_pi()
3309 hrtimer_cancel(&to->timer); in futex_wait_requeue_pi()
3310 destroy_hrtimer_on_stack(&to->timer); in futex_wait_requeue_pi()
3319 * Implementation: user-space maintains a per-thread list of locks it
3324 * per-thread. Userspace also maintains a per-thread 'list_op_pending'
3331 * sys_set_robust_list() - Set the robust-futex list head of a task
3332 * @head: pointer to the list-head
3333 * @len: length of the list-head, as userspace expects
3339 return -ENOSYS; in SYSCALL_DEFINE2()
3344 return -EINVAL; in SYSCALL_DEFINE2()
3346 current->robust_list = head; in SYSCALL_DEFINE2()
3352 * sys_get_robust_list() - Get the robust-futex list head of a task
3354 * @head_ptr: pointer to a list-head pointer, the kernel fills it in
3366 return -ENOSYS; in SYSCALL_DEFINE3()
3370 ret = -ESRCH; in SYSCALL_DEFINE3()
3379 ret = -EPERM; in SYSCALL_DEFINE3()
3383 head = p->robust_list; in SYSCALL_DEFINE3()
3387 return -EFAULT; in SYSCALL_DEFINE3()
3401 * Process a futex-list entry, check whether it's owned by the
3412 return -1; in handle_futex_death()
3416 return -1; in handle_futex_death()
3435 * 1) task->robust_list->list_op_pending != NULL in handle_futex_death()
3462 * futex_wake() even if OWNER_DIED is already set - in handle_futex_death()
3464 * thread-death.) The rest of the cleanup is done in in handle_futex_death()
3480 case -EFAULT: in handle_futex_death()
3482 return -1; in handle_futex_death()
3485 case -EAGAIN: in handle_futex_death()
3499 * Wake robust non-PI futexes here. The wakeup of in handle_futex_death()
3509 * Fetch a robust-list pointer. Bit 0 signals PI futexes:
3518 return -EFAULT; in fetch_robust_entry()
3527 * Walk curr->robust_list (very carefully, it's a userspace list!)
3530 * We silently return on any sign of list-walking problem.
3534 struct robust_list_head __user *head = curr->robust_list; in exit_robust_list()
3548 if (fetch_robust_entry(&entry, &head->list.next, &pi)) in exit_robust_list()
3553 if (get_user(futex_offset, &head->futex_offset)) in exit_robust_list()
3556 * Fetch any possibly pending lock-add first, and handle it in exit_robust_list()
3559 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip)) in exit_robust_list()
3563 while (entry != &head->list) { in exit_robust_list()
3568 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi); in exit_robust_list()
3585 if (!--limit) in exit_robust_list()
3599 if (unlikely(tsk->robust_list)) { in futex_cleanup()
3601 tsk->robust_list = NULL; in futex_cleanup()
3605 if (unlikely(tsk->compat_robust_list)) { in futex_cleanup()
3607 tsk->compat_robust_list = NULL; in futex_cleanup()
3611 if (unlikely(!list_empty(&tsk->pi_state_list))) in futex_cleanup()
3616 * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD
3635 if (tsk->futex_state == FUTEX_STATE_EXITING) in futex_exit_recursive()
3636 mutex_unlock(&tsk->futex_exit_mutex); in futex_exit_recursive()
3637 tsk->futex_state = FUTEX_STATE_DEAD; in futex_exit_recursive()
3645 * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in in futex_cleanup_begin()
3648 mutex_lock(&tsk->futex_exit_mutex); in futex_cleanup_begin()
3651 * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock. in futex_cleanup_begin()
3653 * This ensures that all subsequent checks of tsk->futex_state in in futex_cleanup_begin()
3655 * tsk->pi_lock held. in futex_cleanup_begin()
3658 * the state change under tsk->pi_lock by a concurrent waiter must in futex_cleanup_begin()
3661 raw_spin_lock_irq(&tsk->pi_lock); in futex_cleanup_begin()
3662 tsk->futex_state = FUTEX_STATE_EXITING; in futex_cleanup_begin()
3663 raw_spin_unlock_irq(&tsk->pi_lock); in futex_cleanup_begin()
3672 tsk->futex_state = state; in futex_cleanup_end()
3677 mutex_unlock(&tsk->futex_exit_mutex); in futex_cleanup_end()
3717 return -ENOSYS; in do_futex()
3727 return -ENOSYS; in do_futex()
3760 return -ENOSYS; in do_futex()
3777 return -EFAULT; in SYSCALL_DEFINE6()
3779 return -EFAULT; in SYSCALL_DEFINE6()
3781 return -EINVAL; in SYSCALL_DEFINE6()
3803 * Fetch a robust-list pointer. Bit 0 signals PI futexes:
3810 return -EFAULT; in compat_fetch_robust_entry()
3828 * Walk curr->robust_list (very carefully, it's a userspace list!)
3831 * We silently return on any sign of list-walking problem.
3835 struct compat_robust_list_head __user *head = curr->compat_robust_list; in compat_exit_robust_list()
3850 if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) in compat_exit_robust_list()
3855 if (get_user(futex_offset, &head->futex_offset)) in compat_exit_robust_list()
3858 * Fetch any possibly pending lock-add first, and handle it in compat_exit_robust_list()
3862 &head->list_op_pending, &pip)) in compat_exit_robust_list()
3866 while (entry != (struct robust_list __user *) &head->list) { in compat_exit_robust_list()
3872 (compat_uptr_t __user *)&entry->next, &next_pi); in compat_exit_robust_list()
3892 if (!--limit) in compat_exit_robust_list()
3909 return -ENOSYS; in COMPAT_SYSCALL_DEFINE2()
3912 return -EINVAL; in COMPAT_SYSCALL_DEFINE2()
3914 current->compat_robust_list = head; in COMPAT_SYSCALL_DEFINE2()
3928 return -ENOSYS; in COMPAT_SYSCALL_DEFINE3()
3932 ret = -ESRCH; in COMPAT_SYSCALL_DEFINE3()
3941 ret = -EPERM; in COMPAT_SYSCALL_DEFINE3()
3945 head = p->compat_robust_list; in COMPAT_SYSCALL_DEFINE3()
3949 return -EFAULT; in COMPAT_SYSCALL_DEFINE3()
3973 return -EFAULT; in SYSCALL_DEFINE6()
3975 return -EINVAL; in SYSCALL_DEFINE6()
4003 * guaranteed to fault and we get -EFAULT on functional in futex_detect_cmpxchg()
4004 * implementation, the non-functional ones will return in futex_detect_cmpxchg()
4005 * -ENOSYS. in futex_detect_cmpxchg()
4007 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT) in futex_detect_cmpxchg()