• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1commit 3f8d5361ed1695de9f967dc2cb34b5f5a54d7c34
2Author: zhaoxc0502 <zhaoxc0502@thundersoft.com>
3Date:   Thu Jun 16 17:12:47 2022 +0800
4
5    linux_fs
6
7    Change-Id: I873ae7d7b33cb7dc5143952c18515768fcb2ea55
8
9diff --git a/fs/aio.c b/fs/aio.c
10index 5e5333d72..bd182bcca 100644
11--- a/fs/aio.c
12+++ b/fs/aio.c
13@@ -182,9 +182,8 @@ struct poll_iocb {
14 	struct file		*file;
15 	struct wait_queue_head	*head;
16 	__poll_t		events;
17+	bool			done;
18 	bool			cancelled;
19-	bool			work_scheduled;
20-	bool			work_need_resched;
21 	struct wait_queue_entry	wait;
22 	struct work_struct	work;
23 };
24@@ -1622,51 +1621,6 @@ static void aio_poll_put_work(struct work_struct *work)
25 	iocb_put(iocb);
26 }
27
28-/*
29- * Safely lock the waitqueue which the request is on, synchronizing with the
30- * case where the ->poll() provider decides to free its waitqueue early.
31- *
32- * Returns true on success, meaning that req->head->lock was locked, req->wait
33- * is on req->head, and an RCU read lock was taken.  Returns false if the
34- * request was already removed from its waitqueue (which might no longer exist).
35- */
36-static bool poll_iocb_lock_wq(struct poll_iocb *req)
37-{
38-	wait_queue_head_t *head;
39-
40-	/*
41-	 * While we hold the waitqueue lock and the waitqueue is nonempty,
42-	 * wake_up_pollfree() will wait for us.  However, taking the waitqueue
43-	 * lock in the first place can race with the waitqueue being freed.
44-	 *
45-	 * We solve this as eventpoll does: by taking advantage of the fact that
46-	 * all users of wake_up_pollfree() will RCU-delay the actual free.  If
47-	 * we enter rcu_read_lock() and see that the pointer to the queue is
48-	 * non-NULL, we can then lock it without the memory being freed out from
49-	 * under us, then check whether the request is still on the queue.
50-	 *
51-	 * Keep holding rcu_read_lock() as long as we hold the queue lock, in
52-	 * case the caller deletes the entry from the queue, leaving it empty.
53-	 * In that case, only RCU prevents the queue memory from being freed.
54-	 */
55-	rcu_read_lock();
56-	head = smp_load_acquire(&req->head);
57-	if (head) {
58-		spin_lock(&head->lock);
59-		if (!list_empty(&req->wait.entry))
60-			return true;
61-		spin_unlock(&head->lock);
62-	}
63-	rcu_read_unlock();
64-	return false;
65-}
66-
67-static void poll_iocb_unlock_wq(struct poll_iocb *req)
68-{
69-	spin_unlock(&req->head->lock);
70-	rcu_read_unlock();
71-}
72-
73 static void aio_poll_complete_work(struct work_struct *work)
74 {
75 	struct poll_iocb *req = container_of(work, struct poll_iocb, work);
76@@ -1686,27 +1640,14 @@ static void aio_poll_complete_work(struct work_struct *work)
77 	 * avoid further branches in the fast path.
78 	 */
79 	spin_lock_irq(&ctx->ctx_lock);
80-	if (poll_iocb_lock_wq(req)) {
81-		if (!mask && !READ_ONCE(req->cancelled)) {
82-			/*
83-			 * The request isn't actually ready to be completed yet.
84-			 * Reschedule completion if another wakeup came in.
85-			 */
86-			if (req->work_need_resched) {
87-				schedule_work(&req->work);
88-				req->work_need_resched = false;
89-			} else {
90-				req->work_scheduled = false;
91-			}
92-			poll_iocb_unlock_wq(req);
93-			spin_unlock_irq(&ctx->ctx_lock);
94-			return;
95-		}
96-		list_del_init(&req->wait.entry);
97-		poll_iocb_unlock_wq(req);
98-	} /* else, POLLFREE has freed the waitqueue, so we must complete */
99+	if (!mask && !READ_ONCE(req->cancelled)) {
100+		add_wait_queue(req->head, &req->wait);
101+		spin_unlock_irq(&ctx->ctx_lock);
102+		return;
103+	}
104 	list_del_init(&iocb->ki_list);
105 	iocb->ki_res.res = mangle_poll(mask);
106+	req->done = true;
107 	spin_unlock_irq(&ctx->ctx_lock);
108
109 	iocb_put(iocb);
110@@ -1718,14 +1659,13 @@ static int aio_poll_cancel(struct kiocb *iocb)
111 	struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
112 	struct poll_iocb *req = &aiocb->poll;
113
114-	if (poll_iocb_lock_wq(req)) {
115-		WRITE_ONCE(req->cancelled, true);
116-		if (!req->work_scheduled) {
117-			schedule_work(&aiocb->poll.work);
118-			req->work_scheduled = true;
119-		}
120-		poll_iocb_unlock_wq(req);
121-	} /* else, the request was force-cancelled by POLLFREE already */
122+	spin_lock(&req->head->lock);
123+	WRITE_ONCE(req->cancelled, true);
124+	if (!list_empty(&req->wait.entry)) {
125+		list_del_init(&req->wait.entry);
126+		schedule_work(&aiocb->poll.work);
127+	}
128+	spin_unlock(&req->head->lock);
129
130 	return 0;
131 }
132@@ -1742,26 +1682,20 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
133 	if (mask && !(mask & req->events))
134 		return 0;
135
136-	/*
137-	 * Complete the request inline if possible.  This requires that three
138-	 * conditions be met:
139-	 *   1. An event mask must have been passed.  If a plain wakeup was done
140-	 *	instead, then mask == 0 and we have to call vfs_poll() to get
141-	 *	the events, so inline completion isn't possible.
142-	 *   2. The completion work must not have already been scheduled.
143-	 *   3. ctx_lock must not be busy.  We have to use trylock because we
144-	 *	already hold the waitqueue lock, so this inverts the normal
145-	 *	locking order.  Use irqsave/irqrestore because not all
146-	 *	filesystems (e.g. fuse) call this function with IRQs disabled,
147-	 *	yet IRQs have to be disabled before ctx_lock is obtained.
148-	 */
149-	if (mask && !req->work_scheduled &&
150-	    spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
151+	list_del_init(&req->wait.entry);
152+
153+	if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
154 		struct kioctx *ctx = iocb->ki_ctx;
155
156-		list_del_init(&req->wait.entry);
157+		/*
158+		 * Try to complete the iocb inline if we can. Use
159+		 * irqsave/irqrestore because not all filesystems (e.g. fuse)
160+		 * call this function with IRQs disabled and because IRQs
161+		 * have to be disabled before ctx_lock is obtained.
162+		 */
163 		list_del(&iocb->ki_list);
164 		iocb->ki_res.res = mangle_poll(mask);
165+		req->done = true;
166 		if (iocb->ki_eventfd && eventfd_signal_count()) {
167 			iocb = NULL;
168 			INIT_WORK(&req->work, aio_poll_put_work);
169@@ -1771,43 +1705,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
170 		if (iocb)
171 			iocb_put(iocb);
172 	} else {
173-		/*
174-		 * Schedule the completion work if needed.  If it was already
175-		 * scheduled, record that another wakeup came in.
176-		 *
177-		 * Don't remove the request from the waitqueue here, as it might
178-		 * not actually be complete yet (we won't know until vfs_poll()
179-		 * is called), and we must not miss any wakeups.  POLLFREE is an
180-		 * exception to this; see below.
181-		 */
182-		if (req->work_scheduled) {
183-			req->work_need_resched = true;
184-		} else {
185-			schedule_work(&req->work);
186-			req->work_scheduled = true;
187-		}
188-
189-		/*
190-		 * If the waitqueue is being freed early but we can't complete
191-		 * the request inline, we have to tear down the request as best
192-		 * we can.  That means immediately removing the request from its
193-		 * waitqueue and preventing all further accesses to the
194-		 * waitqueue via the request.  We also need to schedule the
195-		 * completion work (done above).  Also mark the request as
196-		 * cancelled, to potentially skip an unneeded call to ->poll().
197-		 */
198-		if (mask & POLLFREE) {
199-			WRITE_ONCE(req->cancelled, true);
200-			list_del_init(&req->wait.entry);
201-
202-			/*
203-			 * Careful: this *must* be the last step, since as soon
204-			 * as req->head is NULL'ed out, the request can be
205-			 * completed and freed, since aio_poll_complete_work()
206-			 * will no longer need to take the waitqueue lock.
207-			 */
208-			smp_store_release(&req->head, NULL);
209-		}
210+		schedule_work(&req->work);
211 	}
212 	return 1;
213 }
214@@ -1815,7 +1713,6 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
215 struct aio_poll_table {
216 	struct poll_table_struct	pt;
217 	struct aio_kiocb		*iocb;
218-	bool				queued;
219 	int				error;
220 };
221
222@@ -1826,12 +1723,11 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
223 	struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt);
224
225 	/* multiple wait queues per file are not supported */
226-	if (unlikely(pt->queued)) {
227+	if (unlikely(pt->iocb->poll.head)) {
228 		pt->error = -EINVAL;
229 		return;
230 	}
231
232-	pt->queued = true;
233 	pt->error = 0;
234 	pt->iocb->poll.head = head;
235 	add_wait_queue(head, &pt->iocb->poll.wait);
236@@ -1856,14 +1752,12 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
237 	req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
238
239 	req->head = NULL;
240+	req->done = false;
241 	req->cancelled = false;
242-	req->work_scheduled = false;
243-	req->work_need_resched = false;
244
245 	apt.pt._qproc = aio_poll_queue_proc;
246 	apt.pt._key = req->events;
247 	apt.iocb = aiocb;
248-	apt.queued = false;
249 	apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
250
251 	/* initialized the list so that we can do list_empty checks */
252@@ -1872,35 +1766,23 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
253
254 	mask = vfs_poll(req->file, &apt.pt) & req->events;
255 	spin_lock_irq(&ctx->ctx_lock);
256-	if (likely(apt.queued)) {
257-		bool on_queue = poll_iocb_lock_wq(req);
258-
259-		if (!on_queue || req->work_scheduled) {
260-			/*
261-			 * aio_poll_wake() already either scheduled the async
262-			 * completion work, or completed the request inline.
263-			 */
264-			if (apt.error) /* unsupported case: multiple queues */
265+	if (likely(req->head)) {
266+		spin_lock(&req->head->lock);
267+		if (unlikely(list_empty(&req->wait.entry))) {
268+			if (apt.error)
269 				cancel = true;
270 			apt.error = 0;
271 			mask = 0;
272 		}
273 		if (mask || apt.error) {
274-			/* Steal to complete synchronously. */
275 			list_del_init(&req->wait.entry);
276 		} else if (cancel) {
277-			/* Cancel if possible (may be too late though). */
278 			WRITE_ONCE(req->cancelled, true);
279-		} else if (on_queue) {
280-			/*
281-			 * Actually waiting for an event, so add the request to
282-			 * active_reqs so that it can be cancelled if needed.
283-			 */
284+		} else if (!req->done) { /* actually waiting for an event */
285 			list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
286 			aiocb->ki_cancel = aio_poll_cancel;
287 		}
288-		if (on_queue)
289-			poll_iocb_unlock_wq(req);
290+		spin_unlock(&req->head->lock);
291 	}
292 	if (mask) { /* no async, we'd stolen it */
293 		aiocb->ki_res.res = mangle_poll(mask);
294diff --git a/fs/signalfd.c b/fs/signalfd.c
295index b94fb5f81..456046e15 100644
296--- a/fs/signalfd.c
297+++ b/fs/signalfd.c
298@@ -35,7 +35,17 @@
299
300 void signalfd_cleanup(struct sighand_struct *sighand)
301 {
302-	wake_up_pollfree(&sighand->signalfd_wqh);
303+	wait_queue_head_t *wqh = &sighand->signalfd_wqh;
304+	/*
305+	 * The lockless check can race with remove_wait_queue() in progress,
306+	 * but in this case its caller should run under rcu_read_lock() and
307+	 * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return.
308+	 */
309+	if (likely(!waitqueue_active(wqh)))
310+		return;
311+
312+	/* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */
313+	wake_up_poll(wqh, EPOLLHUP | POLLFREE);
314 }
315
316 struct signalfd_ctx {
317