1commit 3f8d5361ed1695de9f967dc2cb34b5f5a54d7c34 2Author: zhaoxc0502 <zhaoxc0502@thundersoft.com> 3Date: Thu Jun 16 17:12:47 2022 +0800 4 5 linux_fs 6 7 Change-Id: I873ae7d7b33cb7dc5143952c18515768fcb2ea55 8 9diff --git a/fs/aio.c b/fs/aio.c 10index 5e5333d72..bd182bcca 100644 11--- a/fs/aio.c 12+++ b/fs/aio.c 13@@ -182,9 +182,8 @@ struct poll_iocb { 14 struct file *file; 15 struct wait_queue_head *head; 16 __poll_t events; 17+ bool done; 18 bool cancelled; 19- bool work_scheduled; 20- bool work_need_resched; 21 struct wait_queue_entry wait; 22 struct work_struct work; 23 }; 24@@ -1622,51 +1621,6 @@ static void aio_poll_put_work(struct work_struct *work) 25 iocb_put(iocb); 26 } 27 28-/* 29- * Safely lock the waitqueue which the request is on, synchronizing with the 30- * case where the ->poll() provider decides to free its waitqueue early. 31- * 32- * Returns true on success, meaning that req->head->lock was locked, req->wait 33- * is on req->head, and an RCU read lock was taken. Returns false if the 34- * request was already removed from its waitqueue (which might no longer exist). 35- */ 36-static bool poll_iocb_lock_wq(struct poll_iocb *req) 37-{ 38- wait_queue_head_t *head; 39- 40- /* 41- * While we hold the waitqueue lock and the waitqueue is nonempty, 42- * wake_up_pollfree() will wait for us. However, taking the waitqueue 43- * lock in the first place can race with the waitqueue being freed. 44- * 45- * We solve this as eventpoll does: by taking advantage of the fact that 46- * all users of wake_up_pollfree() will RCU-delay the actual free. If 47- * we enter rcu_read_lock() and see that the pointer to the queue is 48- * non-NULL, we can then lock it without the memory being freed out from 49- * under us, then check whether the request is still on the queue. 50- * 51- * Keep holding rcu_read_lock() as long as we hold the queue lock, in 52- * case the caller deletes the entry from the queue, leaving it empty. 53- * In that case, only RCU prevents the queue memory from being freed. 54- */ 55- rcu_read_lock(); 56- head = smp_load_acquire(&req->head); 57- if (head) { 58- spin_lock(&head->lock); 59- if (!list_empty(&req->wait.entry)) 60- return true; 61- spin_unlock(&head->lock); 62- } 63- rcu_read_unlock(); 64- return false; 65-} 66- 67-static void poll_iocb_unlock_wq(struct poll_iocb *req) 68-{ 69- spin_unlock(&req->head->lock); 70- rcu_read_unlock(); 71-} 72- 73 static void aio_poll_complete_work(struct work_struct *work) 74 { 75 struct poll_iocb *req = container_of(work, struct poll_iocb, work); 76@@ -1686,27 +1640,14 @@ static void aio_poll_complete_work(struct work_struct *work) 77 * avoid further branches in the fast path. 78 */ 79 spin_lock_irq(&ctx->ctx_lock); 80- if (poll_iocb_lock_wq(req)) { 81- if (!mask && !READ_ONCE(req->cancelled)) { 82- /* 83- * The request isn't actually ready to be completed yet. 84- * Reschedule completion if another wakeup came in. 85- */ 86- if (req->work_need_resched) { 87- schedule_work(&req->work); 88- req->work_need_resched = false; 89- } else { 90- req->work_scheduled = false; 91- } 92- poll_iocb_unlock_wq(req); 93- spin_unlock_irq(&ctx->ctx_lock); 94- return; 95- } 96- list_del_init(&req->wait.entry); 97- poll_iocb_unlock_wq(req); 98- } /* else, POLLFREE has freed the waitqueue, so we must complete */ 99+ if (!mask && !READ_ONCE(req->cancelled)) { 100+ add_wait_queue(req->head, &req->wait); 101+ spin_unlock_irq(&ctx->ctx_lock); 102+ return; 103+ } 104 list_del_init(&iocb->ki_list); 105 iocb->ki_res.res = mangle_poll(mask); 106+ req->done = true; 107 spin_unlock_irq(&ctx->ctx_lock); 108 109 iocb_put(iocb); 110@@ -1718,14 +1659,13 @@ static int aio_poll_cancel(struct kiocb *iocb) 111 struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw); 112 struct poll_iocb *req = &aiocb->poll; 113 114- if (poll_iocb_lock_wq(req)) { 115- WRITE_ONCE(req->cancelled, true); 116- if (!req->work_scheduled) { 117- schedule_work(&aiocb->poll.work); 118- req->work_scheduled = true; 119- } 120- poll_iocb_unlock_wq(req); 121- } /* else, the request was force-cancelled by POLLFREE already */ 122+ spin_lock(&req->head->lock); 123+ WRITE_ONCE(req->cancelled, true); 124+ if (!list_empty(&req->wait.entry)) { 125+ list_del_init(&req->wait.entry); 126+ schedule_work(&aiocb->poll.work); 127+ } 128+ spin_unlock(&req->head->lock); 129 130 return 0; 131 } 132@@ -1742,26 +1682,20 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, 133 if (mask && !(mask & req->events)) 134 return 0; 135 136- /* 137- * Complete the request inline if possible. This requires that three 138- * conditions be met: 139- * 1. An event mask must have been passed. If a plain wakeup was done 140- * instead, then mask == 0 and we have to call vfs_poll() to get 141- * the events, so inline completion isn't possible. 142- * 2. The completion work must not have already been scheduled. 143- * 3. ctx_lock must not be busy. We have to use trylock because we 144- * already hold the waitqueue lock, so this inverts the normal 145- * locking order. Use irqsave/irqrestore because not all 146- * filesystems (e.g. fuse) call this function with IRQs disabled, 147- * yet IRQs have to be disabled before ctx_lock is obtained. 148- */ 149- if (mask && !req->work_scheduled && 150- spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { 151+ list_del_init(&req->wait.entry); 152+ 153+ if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { 154 struct kioctx *ctx = iocb->ki_ctx; 155 156- list_del_init(&req->wait.entry); 157+ /* 158+ * Try to complete the iocb inline if we can. Use 159+ * irqsave/irqrestore because not all filesystems (e.g. fuse) 160+ * call this function with IRQs disabled and because IRQs 161+ * have to be disabled before ctx_lock is obtained. 162+ */ 163 list_del(&iocb->ki_list); 164 iocb->ki_res.res = mangle_poll(mask); 165+ req->done = true; 166 if (iocb->ki_eventfd && eventfd_signal_count()) { 167 iocb = NULL; 168 INIT_WORK(&req->work, aio_poll_put_work); 169@@ -1771,43 +1705,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, 170 if (iocb) 171 iocb_put(iocb); 172 } else { 173- /* 174- * Schedule the completion work if needed. If it was already 175- * scheduled, record that another wakeup came in. 176- * 177- * Don't remove the request from the waitqueue here, as it might 178- * not actually be complete yet (we won't know until vfs_poll() 179- * is called), and we must not miss any wakeups. POLLFREE is an 180- * exception to this; see below. 181- */ 182- if (req->work_scheduled) { 183- req->work_need_resched = true; 184- } else { 185- schedule_work(&req->work); 186- req->work_scheduled = true; 187- } 188- 189- /* 190- * If the waitqueue is being freed early but we can't complete 191- * the request inline, we have to tear down the request as best 192- * we can. That means immediately removing the request from its 193- * waitqueue and preventing all further accesses to the 194- * waitqueue via the request. We also need to schedule the 195- * completion work (done above). Also mark the request as 196- * cancelled, to potentially skip an unneeded call to ->poll(). 197- */ 198- if (mask & POLLFREE) { 199- WRITE_ONCE(req->cancelled, true); 200- list_del_init(&req->wait.entry); 201- 202- /* 203- * Careful: this *must* be the last step, since as soon 204- * as req->head is NULL'ed out, the request can be 205- * completed and freed, since aio_poll_complete_work() 206- * will no longer need to take the waitqueue lock. 207- */ 208- smp_store_release(&req->head, NULL); 209- } 210+ schedule_work(&req->work); 211 } 212 return 1; 213 } 214@@ -1815,7 +1713,6 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, 215 struct aio_poll_table { 216 struct poll_table_struct pt; 217 struct aio_kiocb *iocb; 218- bool queued; 219 int error; 220 }; 221 222@@ -1826,12 +1723,11 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head, 223 struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt); 224 225 /* multiple wait queues per file are not supported */ 226- if (unlikely(pt->queued)) { 227+ if (unlikely(pt->iocb->poll.head)) { 228 pt->error = -EINVAL; 229 return; 230 } 231 232- pt->queued = true; 233 pt->error = 0; 234 pt->iocb->poll.head = head; 235 add_wait_queue(head, &pt->iocb->poll.wait); 236@@ -1856,14 +1752,12 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) 237 req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP; 238 239 req->head = NULL; 240+ req->done = false; 241 req->cancelled = false; 242- req->work_scheduled = false; 243- req->work_need_resched = false; 244 245 apt.pt._qproc = aio_poll_queue_proc; 246 apt.pt._key = req->events; 247 apt.iocb = aiocb; 248- apt.queued = false; 249 apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */ 250 251 /* initialized the list so that we can do list_empty checks */ 252@@ -1872,35 +1766,23 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) 253 254 mask = vfs_poll(req->file, &apt.pt) & req->events; 255 spin_lock_irq(&ctx->ctx_lock); 256- if (likely(apt.queued)) { 257- bool on_queue = poll_iocb_lock_wq(req); 258- 259- if (!on_queue || req->work_scheduled) { 260- /* 261- * aio_poll_wake() already either scheduled the async 262- * completion work, or completed the request inline. 263- */ 264- if (apt.error) /* unsupported case: multiple queues */ 265+ if (likely(req->head)) { 266+ spin_lock(&req->head->lock); 267+ if (unlikely(list_empty(&req->wait.entry))) { 268+ if (apt.error) 269 cancel = true; 270 apt.error = 0; 271 mask = 0; 272 } 273 if (mask || apt.error) { 274- /* Steal to complete synchronously. */ 275 list_del_init(&req->wait.entry); 276 } else if (cancel) { 277- /* Cancel if possible (may be too late though). */ 278 WRITE_ONCE(req->cancelled, true); 279- } else if (on_queue) { 280- /* 281- * Actually waiting for an event, so add the request to 282- * active_reqs so that it can be cancelled if needed. 283- */ 284+ } else if (!req->done) { /* actually waiting for an event */ 285 list_add_tail(&aiocb->ki_list, &ctx->active_reqs); 286 aiocb->ki_cancel = aio_poll_cancel; 287 } 288- if (on_queue) 289- poll_iocb_unlock_wq(req); 290+ spin_unlock(&req->head->lock); 291 } 292 if (mask) { /* no async, we'd stolen it */ 293 aiocb->ki_res.res = mangle_poll(mask); 294diff --git a/fs/signalfd.c b/fs/signalfd.c 295index b94fb5f81..456046e15 100644 296--- a/fs/signalfd.c 297+++ b/fs/signalfd.c 298@@ -35,7 +35,17 @@ 299 300 void signalfd_cleanup(struct sighand_struct *sighand) 301 { 302- wake_up_pollfree(&sighand->signalfd_wqh); 303+ wait_queue_head_t *wqh = &sighand->signalfd_wqh; 304+ /* 305+ * The lockless check can race with remove_wait_queue() in progress, 306+ * but in this case its caller should run under rcu_read_lock() and 307+ * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return. 308+ */ 309+ if (likely(!waitqueue_active(wqh))) 310+ return; 311+ 312+ /* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */ 313+ wake_up_poll(wqh, EPOLLHUP | POLLFREE); 314 } 315 316 struct signalfd_ctx { 317