Lines Matching +full:poll +full:- +full:timeout +full:- +full:ms
1 // SPDX-License-Identifier: GPL-2.0-or-later
18 #include <linux/poll.h>
47 * 2) ep->mtx (mutex)
48 * 3) ep->lock (rwlock)
51 * We need a rwlock (ep->lock) because we manipulate objects
52 * from inside the poll callback, that might be triggered from
54 * So we can't sleep inside the poll callback and hence we need
58 * mutex (ep->mtx). It is acquired during the event transfer loop,
73 * It is necessary to acquire multiple "ep->mtx"es at once in the
76 * epoll_ctl(e1, EPOLL_CTL_ADD, e2), e1->mtx will always be acquired
77 * before e2->mtx). Since we disallow cycles of epoll file
78 * descriptors, this ensures that the mutexes are well-ordered. In
82 * It is possible to drop the "ep->mtx" and to use the global
83 * mutex "epmutex" (together with "ep->lock") to have it working,
84 * but having "ep->mtx" will make the interface more scalable.
86 * normal operations the epoll private "ep->mtx" will guarantee
103 #define EP_UNACTIVE_PTR ((void *) -1L)
149 * Works together "struct eventpoll"->ovflist in keeping the
157 /* Number of active wait queue attached to poll operations */
160 /* List containing poll wait queues */
193 /* Wait queue used by file->poll() */
208 * holding ->lock.
224 /* used to track busy poll napi_id */
234 /* Wait structure used by the poll hooks */
252 /* Wrapper struct used by poll queueing */
318 return f->f_op == &eventpoll_fops; in is_file_epoll()
325 ffd->file = file; in ep_set_ffd()
326 ffd->fd = fd; in ep_set_ffd()
333 return (p1->file > p2->file ? +1: in ep_cmp_ffd()
334 (p1->file < p2->file ? -1 : p1->fd - p2->fd)); in ep_cmp_ffd()
340 return !list_empty(&epi->rdllink); in ep_is_linked()
351 return container_of(p, struct eppoll_entry, wait)->base; in ep_item_from_wait()
357 return container_of(p, struct ep_pqueue, pt)->epi; in ep_item_from_epqueue()
360 /* Initialize the poll safe wake up structure */
363 INIT_LIST_HEAD(&ncalls->tasks_call_list); in ep_nested_calls_init()
364 spin_lock_init(&ncalls->lock); in ep_nested_calls_init()
368 * ep_events_available - Checks if ready events might be available.
377 return !list_empty_careful(&ep->rdllist) || in ep_events_available()
378 READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR; in ep_events_available()
390 * Busy poll if globally on and supporting sockets found && no events,
397 unsigned int napi_id = READ_ONCE(ep->napi_id); in ep_busy_loop()
405 if (ep->napi_id) in ep_reset_busy_poll_napi_id()
406 ep->napi_id = 0; in ep_reset_busy_poll_napi_id()
410 * Set epoll busy poll NAPI ID from sk.
423 sock = sock_from_file(epi->ffd.file, &err); in ep_set_busy_poll_napi_id()
427 sk = sock->sk; in ep_set_busy_poll_napi_id()
431 napi_id = READ_ONCE(sk->sk_napi_id); in ep_set_busy_poll_napi_id()
432 ep = epi->ep; in ep_set_busy_poll_napi_id()
434 /* Non-NAPI IDs can be rejected in ep_set_busy_poll_napi_id()
438 if (napi_id < MIN_NAPI_ID || napi_id == ep->napi_id) in ep_set_busy_poll_napi_id()
441 /* record NAPI ID for use in next busy poll */ in ep_set_busy_poll_napi_id()
442 ep->napi_id = napi_id; in ep_set_busy_poll_napi_id()
462 * ep_call_nested - Perform a bound (possibly) nested call, by checking
465 * no re-entered.
473 * Returns: Returns the code returned by the @nproc callback, or -1 if
482 struct list_head *lsthead = &ncalls->tasks_call_list; in ep_call_nested()
486 spin_lock_irqsave(&ncalls->lock, flags); in ep_call_nested()
494 if (tncur->ctx == ctx && in ep_call_nested()
495 (tncur->cookie == cookie || ++call_nests > EP_MAX_NESTS)) { in ep_call_nested()
500 error = -1; in ep_call_nested()
510 spin_unlock_irqrestore(&ncalls->lock, flags); in ep_call_nested()
516 spin_lock_irqsave(&ncalls->lock, flags); in ep_call_nested()
519 spin_unlock_irqrestore(&ncalls->lock, flags); in ep_call_nested()
537 * issue a wake_up() on its poll wake list. Epoll (efd1) has installed a
541 * the waiters on its poll wait list (efd2). So it calls ep_poll_safewake()
560 * it might be natural to create a per-cpu nest count. However, since in ep_poll_safewake()
561 * we can recurse on ep->poll_wait.lock, and a non-raw spinlock can in ep_poll_safewake()
562 * schedule() in the -rt kernel, the per-cpu variable are no longer in ep_poll_safewake()
574 if ((is_file_epoll(epi->ffd.file))) { in ep_poll_safewake()
575 ep_src = epi->ffd.file->private_data; in ep_poll_safewake()
576 nests = ep_src->nests; in ep_poll_safewake()
581 spin_lock_irqsave_nested(&ep->poll_wait.lock, flags, nests); in ep_poll_safewake()
582 ep->nests = nests + 1; in ep_poll_safewake()
583 wake_up_locked_poll(&ep->poll_wait, EPOLLIN | pollflags); in ep_poll_safewake()
584 ep->nests = 0; in ep_poll_safewake()
585 spin_unlock_irqrestore(&ep->poll_wait.lock, flags); in ep_poll_safewake()
593 wake_up_poll(&ep->poll_wait, EPOLLIN | pollflags); in ep_poll_safewake()
604 * If it is cleared by POLLFREE, it should be rcu-safe. in ep_remove_wait_queue()
607 * we rely on whead->lock. in ep_remove_wait_queue()
609 whead = smp_load_acquire(&pwq->whead); in ep_remove_wait_queue()
611 remove_wait_queue(whead, &pwq->wait); in ep_remove_wait_queue()
616 * This function unregisters poll callbacks from the associated file
622 struct list_head *lsthead = &epi->pwqlist; in ep_unregister_pollwait()
628 list_del(&pwq->llink); in ep_unregister_pollwait()
634 /* call only when ep->mtx is held */
637 return rcu_dereference_check(epi->ws, lockdep_is_held(&epi->ep->mtx)); in ep_wakeup_source()
640 /* call only when ep->mtx is held */
651 return rcu_access_pointer(epi->ws) ? true : false; in ep_has_wakeup_source()
654 /* call when ep->mtx cannot be held (ep_poll_callback) */
660 ws = rcu_dereference(epi->ws); in ep_pm_stay_awake_rcu()
667 * ep_scan_ready_list - Scans the ready list in a way that makes possible for
668 * the scan code, to call f_op->poll(). Also allows for
674 * @depth: The current depth of recursive f_op->poll calls.
675 * @ep_locked: caller already holds ep->mtx
696 mutex_lock_nested(&ep->mtx, depth); in ep_scan_ready_list()
699 * Steal the ready list, and re-init the original one to the in ep_scan_ready_list()
700 * empty list. Also, set ep->ovflist to NULL so that events in ep_scan_ready_list()
702 * have the poll callback to queue directly on ep->rdllist, in ep_scan_ready_list()
706 write_lock_irq(&ep->lock); in ep_scan_ready_list()
707 list_splice_init(&ep->rdllist, &txlist); in ep_scan_ready_list()
708 WRITE_ONCE(ep->ovflist, NULL); in ep_scan_ready_list()
709 write_unlock_irq(&ep->lock); in ep_scan_ready_list()
716 write_lock_irq(&ep->lock); in ep_scan_ready_list()
719 * other events might have been queued by the poll callback. in ep_scan_ready_list()
720 * We re-insert them inside the main ready-list here. in ep_scan_ready_list()
722 for (nepi = READ_ONCE(ep->ovflist); (epi = nepi) != NULL; in ep_scan_ready_list()
723 nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { in ep_scan_ready_list()
727 * queued into ->ovflist but the "txlist" might already in ep_scan_ready_list()
732 * ->ovflist is LIFO, so we have to reverse it in order in ep_scan_ready_list()
735 list_add(&epi->rdllink, &ep->rdllist); in ep_scan_ready_list()
740 * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after in ep_scan_ready_list()
742 * ep->rdllist. in ep_scan_ready_list()
744 WRITE_ONCE(ep->ovflist, EP_UNACTIVE_PTR); in ep_scan_ready_list()
747 * Quickly re-inject items left on "txlist". in ep_scan_ready_list()
749 list_splice(&txlist, &ep->rdllist); in ep_scan_ready_list()
750 __pm_relax(ep->ws); in ep_scan_ready_list()
752 if (!list_empty(&ep->rdllist)) { in ep_scan_ready_list()
753 if (waitqueue_active(&ep->wq)) in ep_scan_ready_list()
754 wake_up(&ep->wq); in ep_scan_ready_list()
757 write_unlock_irq(&ep->lock); in ep_scan_ready_list()
760 mutex_unlock(&ep->mtx); in ep_scan_ready_list()
777 struct file *file = epi->ffd.file; in ep_remove()
782 * Removes poll wait queue hooks. in ep_remove()
787 spin_lock(&file->f_lock); in ep_remove()
788 list_del_rcu(&epi->fllink); in ep_remove()
789 spin_unlock(&file->f_lock); in ep_remove()
791 rb_erase_cached(&epi->rbn, &ep->rbr); in ep_remove()
793 write_lock_irq(&ep->lock); in ep_remove()
795 list_del_init(&epi->rdllink); in ep_remove()
796 write_unlock_irq(&ep->lock); in ep_remove()
801 * field epi->rcu, since we are trying to minimize the size of in ep_remove()
803 * ep->mtx. The rcu read side, reverse_path_check_proc(), does not make in ep_remove()
806 call_rcu(&epi->rcu, epi_rcu_free); in ep_remove()
808 atomic_long_dec(&ep->user->epoll_watches); in ep_remove()
819 if (waitqueue_active(&ep->poll_wait)) in ep_free()
825 * We do not need to hold "ep->mtx" here because the epoll file in ep_free()
833 * Walks through the whole tree by unregistering poll callbacks. in ep_free()
835 for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { in ep_free()
844 * point we are sure no poll callbacks will be lingering around, and also by in ep_free()
846 * us during this operation. So we can avoid the lock on "ep->lock". in ep_free()
847 * We do not need to lock ep->mtx, either, we only do it to prevent in ep_free()
850 mutex_lock(&ep->mtx); in ep_free()
851 while ((rbp = rb_first_cached(&ep->rbr)) != NULL) { in ep_free()
856 mutex_unlock(&ep->mtx); in ep_free()
859 mutex_destroy(&ep->mtx); in ep_free()
860 free_uid(ep->user); in ep_free()
861 wakeup_source_unregister(ep->ws); in ep_free()
867 struct eventpoll *ep = file->private_data; in ep_eventpoll_release()
882 * the ep->mtx so we need to start from depth=1, such that mutex_lock_nested()
891 pt->_key = epi->event.events; in ep_item_poll()
892 if (!is_file_epoll(epi->ffd.file)) in ep_item_poll()
893 return vfs_poll(epi->ffd.file, pt) & epi->event.events; in ep_item_poll()
895 ep = epi->ffd.file->private_data; in ep_item_poll()
896 poll_wait(epi->ffd.file, &ep->poll_wait, pt); in ep_item_poll()
897 locked = pt && (pt->_qproc == ep_ptable_queue_proc); in ep_item_poll()
899 return ep_scan_ready_list(epi->ffd.file->private_data, in ep_item_poll()
901 locked) & epi->event.events; in ep_item_poll()
919 * Item has been dropped into the ready list by the poll in ep_read_events_proc()
924 list_del_init(&epi->rdllink); in ep_read_events_proc()
933 struct eventpoll *ep = file->private_data; in ep_eventpoll_poll()
936 /* Insert inside our poll wait queue */ in ep_eventpoll_poll()
937 poll_wait(file, &ep->poll_wait, wait); in ep_eventpoll_poll()
950 struct eventpoll *ep = f->private_data; in ep_show_fdinfo()
953 mutex_lock(&ep->mtx); in ep_show_fdinfo()
954 for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { in ep_show_fdinfo()
956 struct inode *inode = file_inode(epi->ffd.file); in ep_show_fdinfo()
960 epi->ffd.fd, epi->event.events, in ep_show_fdinfo()
961 (long long)epi->event.data, in ep_show_fdinfo()
962 (long long)epi->ffd.file->f_pos, in ep_show_fdinfo()
963 inode->i_ino, inode->i_sb->s_dev); in ep_show_fdinfo()
967 mutex_unlock(&ep->mtx); in ep_show_fdinfo()
977 .poll = ep_eventpoll_poll,
992 * We don't want to get "file->f_lock" because it is not in eventpoll_release_file()
999 * "ep->mtx" after "epmutex" because ep_remove() requires it when called in eventpoll_release_file()
1005 list_for_each_entry_safe(epi, next, &file->f_ep_links, fllink) { in eventpoll_release_file()
1006 ep = epi->ep; in eventpoll_release_file()
1007 mutex_lock_nested(&ep->mtx, 0); in eventpoll_release_file()
1009 mutex_unlock(&ep->mtx); in eventpoll_release_file()
1021 error = -ENOMEM; in ep_alloc()
1026 mutex_init(&ep->mtx); in ep_alloc()
1027 rwlock_init(&ep->lock); in ep_alloc()
1028 init_waitqueue_head(&ep->wq); in ep_alloc()
1029 init_waitqueue_head(&ep->poll_wait); in ep_alloc()
1030 INIT_LIST_HEAD(&ep->rdllist); in ep_alloc()
1031 ep->rbr = RB_ROOT_CACHED; in ep_alloc()
1032 ep->ovflist = EP_UNACTIVE_PTR; in ep_alloc()
1033 ep->user = user; in ep_alloc()
1057 for (rbp = ep->rbr.rb_root.rb_node; rbp; ) { in ep_find()
1059 kcmp = ep_cmp_ffd(&ffd, &epi->ffd); in ep_find()
1061 rbp = rbp->rb_right; in ep_find()
1063 rbp = rbp->rb_left; in ep_find()
1079 for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { in ep_find_tfd()
1081 if (epi->ffd.fd == tfd) { in ep_find_tfd()
1085 toff--; in ep_find_tfd()
1101 return ERR_PTR(-EINVAL); in get_epoll_tfile_raw_ptr()
1103 ep = file->private_data; in get_epoll_tfile_raw_ptr()
1105 mutex_lock(&ep->mtx); in get_epoll_tfile_raw_ptr()
1108 file_raw = epi->ffd.file; in get_epoll_tfile_raw_ptr()
1110 file_raw = ERR_PTR(-ENOENT); in get_epoll_tfile_raw_ptr()
1111 mutex_unlock(&ep->mtx); in get_epoll_tfile_raw_ptr()
1141 * This is simple 'new->next = head' operation, but cmpxchg() in list_add_tail_lockless()
1144 * new->next == new. in list_add_tail_lockless()
1146 if (cmpxchg(&new->next, new, head) != new) in list_add_tail_lockless()
1150 * Initially ->next of a new element must be updated with the head in list_add_tail_lockless()
1152 * exchanged. XCHG guarantees memory ordering, thus ->next should be in list_add_tail_lockless()
1154 * swapped before prev->next is updated. in list_add_tail_lockless()
1157 prev = xchg(&head->prev, new); in list_add_tail_lockless()
1160 * It is safe to modify prev->next and new->prev, because a new element in list_add_tail_lockless()
1161 * is added only to the tail and new->next is updated before XCHG. in list_add_tail_lockless()
1164 prev->next = new; in list_add_tail_lockless()
1165 new->prev = prev; in list_add_tail_lockless()
1171 * Chains a new epi entry to the tail of the ep->ovflist in a lockless way,
1178 struct eventpoll *ep = epi->ep; in chain_epi_lockless()
1181 if (epi->next != EP_UNACTIVE_PTR) in chain_epi_lockless()
1185 if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR) in chain_epi_lockless()
1189 epi->next = xchg(&ep->ovflist, epi); in chain_epi_lockless()
1200 * events from another file descriptors, thus all modifications to ->rdllist
1201 * or ->ovflist are lockless. Read lock is paired with the write lock from
1206 * concurrently for the same @epi from different CPUs if poll table was inited
1216 struct eventpoll *ep = epi->ep; in ep_poll_callback()
1221 read_lock_irqsave(&ep->lock, flags); in ep_poll_callback()
1226 * If the event mask does not contain any poll(2) event, we consider the in ep_poll_callback()
1231 if (!(epi->event.events & ~EP_PRIVATE_BITS)) in ep_poll_callback()
1240 if (pollflags && !(pollflags & epi->event.events)) in ep_poll_callback()
1245 * (because we're accessing user memory, and because of linux f_op->poll() in ep_poll_callback()
1247 * chained in ep->ovflist and requeued later on. in ep_poll_callback()
1249 if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) { in ep_poll_callback()
1254 if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) in ep_poll_callback()
1259 * Wake up ( if active ) both the eventpoll wait list and the ->poll() in ep_poll_callback()
1262 if (waitqueue_active(&ep->wq)) { in ep_poll_callback()
1263 if ((epi->event.events & EPOLLEXCLUSIVE) && in ep_poll_callback()
1267 if (epi->event.events & EPOLLIN) in ep_poll_callback()
1271 if (epi->event.events & EPOLLOUT) in ep_poll_callback()
1279 wake_up(&ep->wq); in ep_poll_callback()
1281 if (waitqueue_active(&ep->poll_wait)) in ep_poll_callback()
1285 read_unlock_irqrestore(&ep->lock, flags); in ep_poll_callback()
1291 if (!(epi->event.events & EPOLLEXCLUSIVE)) in ep_poll_callback()
1297 * ->whead = NULL and do another remove_wait_queue() after in ep_poll_callback()
1300 list_del_init(&wait->entry); in ep_poll_callback()
1302 * ->whead != NULL protects us from the race with ep_free() in ep_poll_callback()
1303 * or ep_remove(), ep_remove_wait_queue() takes whead->lock in ep_poll_callback()
1307 smp_store_release(&ep_pwq_from_wait(wait)->whead, NULL); in ep_poll_callback()
1323 if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL))) { in ep_ptable_queue_proc()
1324 init_waitqueue_func_entry(&pwq->wait, ep_poll_callback); in ep_ptable_queue_proc()
1325 pwq->whead = whead; in ep_ptable_queue_proc()
1326 pwq->base = epi; in ep_ptable_queue_proc()
1327 if (epi->event.events & EPOLLEXCLUSIVE) in ep_ptable_queue_proc()
1328 add_wait_queue_exclusive(whead, &pwq->wait); in ep_ptable_queue_proc()
1330 add_wait_queue(whead, &pwq->wait); in ep_ptable_queue_proc()
1331 list_add_tail(&pwq->llink, &epi->pwqlist); in ep_ptable_queue_proc()
1332 epi->nwait++; in ep_ptable_queue_proc()
1335 epi->nwait = -1; in ep_ptable_queue_proc()
1342 struct rb_node **p = &ep->rbr.rb_root.rb_node, *parent = NULL; in ep_rbtree_insert()
1349 kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd); in ep_rbtree_insert()
1351 p = &parent->rb_right; in ep_rbtree_insert()
1354 p = &parent->rb_left; in ep_rbtree_insert()
1356 rb_link_node(&epi->rbn, parent, p); in ep_rbtree_insert()
1357 rb_insert_color_cached(&epi->rbn, &ep->rbr, leftmost); in ep_rbtree_insert()
1384 return -1; in path_count_inc()
1405 list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) { in reverse_path_check_proc()
1406 child_file = epi->ep->file; in reverse_path_check_proc()
1408 if (list_empty(&child_file->f_ep_links)) { in reverse_path_check_proc()
1410 error = -1; in reverse_path_check_proc()
1431 * reverse_path_check - The tfile_check_list is list of file *, which have
1438 * -1 otherwise.
1462 if (!epi->ep->ws) { in ep_create_wakeup_source()
1463 epi->ep->ws = wakeup_source_register(NULL, "eventpoll"); in ep_create_wakeup_source()
1464 if (!epi->ep->ws) in ep_create_wakeup_source()
1465 return -ENOMEM; in ep_create_wakeup_source()
1468 take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry); in ep_create_wakeup_source()
1473 return -ENOMEM; in ep_create_wakeup_source()
1474 rcu_assign_pointer(epi->ws, ws); in ep_create_wakeup_source()
1484 RCU_INIT_POINTER(epi->ws, NULL); in ep_destroy_wakeup_source()
1509 user_watches = atomic_long_read(&ep->user->epoll_watches); in ep_insert()
1511 return -ENOSPC; in ep_insert()
1513 return -ENOMEM; in ep_insert()
1516 INIT_LIST_HEAD(&epi->rdllink); in ep_insert()
1517 INIT_LIST_HEAD(&epi->fllink); in ep_insert()
1518 INIT_LIST_HEAD(&epi->pwqlist); in ep_insert()
1519 epi->ep = ep; in ep_insert()
1520 ep_set_ffd(&epi->ffd, tfile, fd); in ep_insert()
1521 epi->event = *event; in ep_insert()
1522 epi->nwait = 0; in ep_insert()
1523 epi->next = EP_UNACTIVE_PTR; in ep_insert()
1524 if (epi->event.events & EPOLLWAKEUP) { in ep_insert()
1529 RCU_INIT_POINTER(epi->ws, NULL); in ep_insert()
1533 spin_lock(&tfile->f_lock); in ep_insert()
1534 list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links); in ep_insert()
1535 spin_unlock(&tfile->f_lock); in ep_insert()
1544 error = -EINVAL; in ep_insert()
1548 /* Initialize the poll table using the queue callback */ in ep_insert()
1553 * Attach the item to the poll hooks and get current event bits. in ep_insert()
1556 * this operation completes, the poll callback can start hitting in ep_insert()
1562 * We have to check if something went wrong during the poll wait queue in ep_insert()
1566 error = -ENOMEM; in ep_insert()
1567 if (epi->nwait < 0) in ep_insert()
1571 write_lock_irq(&ep->lock); in ep_insert()
1578 list_add_tail(&epi->rdllink, &ep->rdllist); in ep_insert()
1582 if (waitqueue_active(&ep->wq)) in ep_insert()
1583 wake_up(&ep->wq); in ep_insert()
1584 if (waitqueue_active(&ep->poll_wait)) in ep_insert()
1588 write_unlock_irq(&ep->lock); in ep_insert()
1590 atomic_long_inc(&ep->user->epoll_watches); in ep_insert()
1601 spin_lock(&tfile->f_lock); in ep_insert()
1602 list_del_rcu(&epi->fllink); in ep_insert()
1603 spin_unlock(&tfile->f_lock); in ep_insert()
1605 rb_erase_cached(&epi->rbn, &ep->rbr); in ep_insert()
1609 * allocated wait queue. Note that we don't care about the ep->ovflist in ep_insert()
1613 write_lock_irq(&ep->lock); in ep_insert()
1615 list_del_init(&epi->rdllink); in ep_insert()
1616 write_unlock_irq(&ep->lock); in ep_insert()
1641 * Set the new event interest mask before calling f_op->poll(); in ep_modify()
1643 * f_op->poll() call and the new event set registering. in ep_modify()
1645 epi->event.events = event->events; /* need barrier below */ in ep_modify()
1646 epi->event.data = event->data; /* protected by mtx */ in ep_modify()
1647 if (epi->event.events & EPOLLWAKEUP) { in ep_modify()
1659 * event occurs immediately after we call f_op->poll(). in ep_modify()
1660 * We need this because we did not take ep->lock while in ep_modify()
1662 * ep->lock). in ep_modify()
1665 * when calling f_op->poll(). This barrier also in ep_modify()
1669 * This barrier will now guarantee ep_poll_callback or f_op->poll in ep_modify()
1681 write_lock_irq(&ep->lock); in ep_modify()
1683 list_add_tail(&epi->rdllink, &ep->rdllist); in ep_modify()
1687 if (waitqueue_active(&ep->wq)) in ep_modify()
1688 wake_up(&ep->wq); in ep_modify()
1689 if (waitqueue_active(&ep->poll_wait)) in ep_modify()
1692 write_unlock_irq(&ep->lock); in ep_modify()
1708 struct epoll_event __user *uevent = esed->events; in ep_send_events_proc()
1713 esed->res = 0; in ep_send_events_proc()
1720 lockdep_assert_held(&ep->mtx); in ep_send_events_proc()
1723 if (esed->res >= esed->maxevents) in ep_send_events_proc()
1727 * Activate ep->ws before deactivating epi->ws to prevent in ep_send_events_proc()
1728 * triggering auto-suspend here (in case we reactive epi->ws in ep_send_events_proc()
1731 * This could be rearranged to delay the deactivation of epi->ws in ep_send_events_proc()
1732 * instead, but then epi->ws would temporarily be out of sync in ep_send_events_proc()
1737 if (ws->active) in ep_send_events_proc()
1738 __pm_stay_awake(ep->ws); in ep_send_events_proc()
1742 list_del_init(&epi->rdllink); in ep_send_events_proc()
1745 * If the event mask intersect the caller-requested one, in ep_send_events_proc()
1747 * is holding ep->mtx, so no operations coming from userspace in ep_send_events_proc()
1754 if (__put_user(revents, &uevent->events) || in ep_send_events_proc()
1755 __put_user(epi->event.data, &uevent->data)) { in ep_send_events_proc()
1756 list_add(&epi->rdllink, head); in ep_send_events_proc()
1758 if (!esed->res) in ep_send_events_proc()
1759 esed->res = -EFAULT; in ep_send_events_proc()
1762 esed->res++; in ep_send_events_proc()
1764 if (epi->event.events & EPOLLONESHOT) in ep_send_events_proc()
1765 epi->event.events &= EP_PRIVATE_BITS; in ep_send_events_proc()
1766 else if (!(epi->event.events & EPOLLET)) { in ep_send_events_proc()
1773 * into ep->rdllist besides us. The epoll_ctl() in ep_send_events_proc()
1776 * poll callback will queue them in ep->ovflist. in ep_send_events_proc()
1778 list_add_tail(&epi->rdllink, &ep->rdllist); in ep_send_events_proc()
1798 static inline struct timespec64 ep_set_mstimeout(long ms) in ep_set_mstimeout() argument
1801 .tv_sec = ms / MSEC_PER_SEC, in ep_set_mstimeout()
1802 .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC), in ep_set_mstimeout()
1820 list_del_init(&wq_entry->entry); in ep_autoremove_wake_function()
1825 * ep_poll - Retrieves ready events, and delivers them to the caller supplied
1832 * @timeout: Maximum timeout for the ready events fetch operation, in
1833 * milliseconds. If the @timeout is zero, the function will not block,
1834 * while if the @timeout is less than zero, the function will block
1842 int maxevents, long timeout) in ep_poll() argument
1851 if (timeout > 0) { in ep_poll()
1852 struct timespec64 end_time = ep_set_mstimeout(timeout); in ep_poll()
1857 } else if (timeout == 0) { in ep_poll()
1867 write_lock_irq(&ep->lock); in ep_poll()
1869 write_unlock_irq(&ep->lock); in ep_poll()
1884 * Busy poll timed out. Drop NAPI ID for now, we can add in ep_poll()
1897 * lost. This is also good performance-wise, because on in ep_poll()
1899 * explicitly, thus ep->lock is not taken, which halts the in ep_poll()
1911 write_lock_irq(&ep->lock); in ep_poll()
1921 * plays with two lists (->rdllist and ->ovflist) and there in ep_poll()
1929 res = -EINTR; in ep_poll()
1931 __add_wait_queue_exclusive(&ep->wq, &wait); in ep_poll()
1933 write_unlock_irq(&ep->lock); in ep_poll()
1950 write_lock_irq(&ep->lock); in ep_poll()
1953 * means that the thread was woken up after its timeout expired in ep_poll()
1959 __remove_wait_queue(&ep->wq, &wait); in ep_poll()
1960 write_unlock_irq(&ep->lock); in ep_poll()
1966 * Always short-circuit for fatal signals to allow in ep_poll()
1971 res = -EINTR; in ep_poll()
1975 * there's still timeout left over, we go trying again in search of in ep_poll()
1986 * ep_loop_check_proc - Callback function to be passed to the @ep_call_nested()
1993 * @cookie: Original cookie for this call. This is the top-of-the-chain epoll
1998 * structure @ep does not violate the constraints, or -1 otherwise.
2004 struct eventpoll *ep = file->private_data; in ep_loop_check_proc()
2009 mutex_lock_nested(&ep->mtx, call_nests + 1); in ep_loop_check_proc()
2010 ep->gen = loop_check_gen; in ep_loop_check_proc()
2011 for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { in ep_loop_check_proc()
2013 if (unlikely(is_file_epoll(epi->ffd.file))) { in ep_loop_check_proc()
2014 ep_tovisit = epi->ffd.file->private_data; in ep_loop_check_proc()
2015 if (ep_tovisit->gen == loop_check_gen) in ep_loop_check_proc()
2018 ep_loop_check_proc, epi->ffd.file, in ep_loop_check_proc()
2031 if (list_empty(&epi->ffd.file->f_tfile_llink)) { in ep_loop_check_proc()
2032 if (get_file_rcu(epi->ffd.file)) in ep_loop_check_proc()
2033 list_add(&epi->ffd.file->f_tfile_llink, in ep_loop_check_proc()
2038 mutex_unlock(&ep->mtx); in ep_loop_check_proc()
2044 * ep_loop_check - Performs a check to verify that adding an epoll file (@file)
2052 * structure @ep does not violate the constraints, or -1 otherwise.
2068 list_del_init(&file->f_tfile_llink); in clear_tfile_check_list()
2087 return -EINVAL; in do_epoll_create()
2109 ep->file = file; in do_epoll_create()
2128 return -EINVAL; in SYSCALL_DEFINE1()
2142 return -EAGAIN; in epoll_mutex_lock()
2155 error = -EBADF; in do_epoll_ctl()
2165 /* The target file descriptor must support poll */ in do_epoll_ctl()
2166 error = -EPERM; in do_epoll_ctl()
2179 error = -EINVAL; in do_epoll_ctl()
2188 if (ep_op_has_event(op) && (epds->events & EPOLLEXCLUSIVE)) { in do_epoll_ctl()
2192 (epds->events & ~EPOLLEXCLUSIVE_OK_BITS))) in do_epoll_ctl()
2200 ep = f.file->private_data; in do_epoll_ctl()
2217 error = epoll_mutex_lock(&ep->mtx, 0, nonblock); in do_epoll_ctl()
2221 if (!list_empty(&f.file->f_ep_links) || in do_epoll_ctl()
2222 ep->gen == loop_check_gen || in do_epoll_ctl()
2224 mutex_unlock(&ep->mtx); in do_epoll_ctl()
2231 error = -ELOOP; in do_epoll_ctl()
2236 list_add(&tf.file->f_tfile_llink, in do_epoll_ctl()
2239 error = epoll_mutex_lock(&ep->mtx, 0, nonblock); in do_epoll_ctl()
2243 tep = tf.file->private_data; in do_epoll_ctl()
2244 error = epoll_mutex_lock(&tep->mtx, 1, nonblock); in do_epoll_ctl()
2246 mutex_unlock(&ep->mtx); in do_epoll_ctl()
2260 error = -EINVAL; in do_epoll_ctl()
2264 epds->events |= EPOLLERR | EPOLLHUP; in do_epoll_ctl()
2267 error = -EEXIST; in do_epoll_ctl()
2273 error = -ENOENT; in do_epoll_ctl()
2277 if (!(epi->event.events & EPOLLEXCLUSIVE)) { in do_epoll_ctl()
2278 epds->events |= EPOLLERR | EPOLLHUP; in do_epoll_ctl()
2282 error = -ENOENT; in do_epoll_ctl()
2286 mutex_unlock(&tep->mtx); in do_epoll_ctl()
2287 mutex_unlock(&ep->mtx); in do_epoll_ctl()
2316 return -EFAULT; in SYSCALL_DEFINE4()
2326 int maxevents, int timeout) in do_epoll_wait() argument
2334 return -EINVAL; in do_epoll_wait()
2338 return -EFAULT; in do_epoll_wait()
2343 return -EBADF; in do_epoll_wait()
2349 error = -EINVAL; in do_epoll_wait()
2357 ep = f.file->private_data; in do_epoll_wait()
2360 error = ep_poll(ep, events, maxevents, timeout); in do_epoll_wait()
2368 int, maxevents, int, timeout) in SYSCALL_DEFINE4() argument
2370 return do_epoll_wait(epfd, events, maxevents, timeout); in SYSCALL_DEFINE4()
2378 int, maxevents, int, timeout, const sigset_t __user *, sigmask, in SYSCALL_DEFINE6() argument
2391 error = do_epoll_wait(epfd, events, maxevents, timeout); in SYSCALL_DEFINE6()
2392 restore_saved_sigmask_unless(error == -EINTR); in SYSCALL_DEFINE6()
2400 int, maxevents, int, timeout, in COMPAT_SYSCALL_DEFINE6() argument
2414 err = do_epoll_wait(epfd, events, maxevents, timeout); in COMPAT_SYSCALL_DEFINE6()
2415 restore_saved_sigmask_unless(err == -EINTR); in COMPAT_SYSCALL_DEFINE6()
2429 max_user_watches = (((si.totalram - si.totalhigh) / 25) << PAGE_SHIFT) / in eventpoll_init()
2441 * using an extra cache line on 64-bit (and smaller) CPUs in eventpoll_init()