• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Network filesystem read subrequest result collection, assessment and
3  * retrying.
4  *
5  * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
6  * Written by David Howells (dhowells@redhat.com)
7  */
8 
9 #include <linux/export.h>
10 #include <linux/fs.h>
11 #include <linux/mm.h>
12 #include <linux/pagemap.h>
13 #include <linux/slab.h>
14 #include <linux/task_io_accounting_ops.h>
15 #include "internal.h"
16 
17 /*
18  * Clear the unread part of an I/O request.
19  */
netfs_clear_unread(struct netfs_io_subrequest * subreq)20 static void netfs_clear_unread(struct netfs_io_subrequest *subreq)
21 {
22 	netfs_reset_iter(subreq);
23 	WARN_ON_ONCE(subreq->len - subreq->transferred != iov_iter_count(&subreq->io_iter));
24 	iov_iter_zero(iov_iter_count(&subreq->io_iter), &subreq->io_iter);
25 	if (subreq->start + subreq->transferred >= subreq->rreq->i_size)
26 		__set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
27 }
28 
29 /*
30  * Flush, mark and unlock a folio that's now completely read.  If we want to
31  * cache the folio, we set the group to NETFS_FOLIO_COPY_TO_CACHE, mark it
32  * dirty and let writeback handle it.
33  */
netfs_unlock_read_folio(struct netfs_io_subrequest * subreq,struct netfs_io_request * rreq,struct folio_queue * folioq,int slot)34 static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
35 				    struct netfs_io_request *rreq,
36 				    struct folio_queue *folioq,
37 				    int slot)
38 {
39 	struct netfs_folio *finfo;
40 	struct folio *folio = folioq_folio(folioq, slot);
41 
42 	flush_dcache_folio(folio);
43 	folio_mark_uptodate(folio);
44 
45 	if (!test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
46 		finfo = netfs_folio_info(folio);
47 		if (finfo) {
48 			trace_netfs_folio(folio, netfs_folio_trace_filled_gaps);
49 			if (finfo->netfs_group)
50 				folio_change_private(folio, finfo->netfs_group);
51 			else
52 				folio_detach_private(folio);
53 			kfree(finfo);
54 		}
55 
56 		if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) {
57 			if (!WARN_ON_ONCE(folio_get_private(folio) != NULL)) {
58 				trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
59 				folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE);
60 				folio_mark_dirty(folio);
61 			}
62 		} else {
63 			trace_netfs_folio(folio, netfs_folio_trace_read_done);
64 		}
65 
66 		folioq_clear(folioq, slot);
67 	} else {
68 		// TODO: Use of PG_private_2 is deprecated.
69 		if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags))
70 			netfs_pgpriv2_mark_copy_to_cache(subreq, rreq, folioq, slot);
71 		else
72 			folioq_clear(folioq, slot);
73 	}
74 
75 	if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
76 		if (folio->index == rreq->no_unlock_folio &&
77 		    test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) {
78 			_debug("no unlock");
79 		} else {
80 			trace_netfs_folio(folio, netfs_folio_trace_read_unlock);
81 			folio_unlock(folio);
82 		}
83 	}
84 }
85 
86 /*
87  * Unlock any folios that are now completely read.  Returns true if the
88  * subrequest is removed from the list.
89  */
netfs_consume_read_data(struct netfs_io_subrequest * subreq,bool was_async)90 static bool netfs_consume_read_data(struct netfs_io_subrequest *subreq, bool was_async)
91 {
92 	struct netfs_io_subrequest *prev, *next;
93 	struct netfs_io_request *rreq = subreq->rreq;
94 	struct folio_queue *folioq = subreq->curr_folioq;
95 	size_t avail, prev_donated, next_donated, fsize, part, excess;
96 	loff_t fpos, start;
97 	loff_t fend;
98 	int slot = subreq->curr_folioq_slot;
99 
100 	if (WARN(subreq->transferred > subreq->len,
101 		 "Subreq overread: R%x[%x] %zu > %zu",
102 		 rreq->debug_id, subreq->debug_index,
103 		 subreq->transferred, subreq->len))
104 		subreq->transferred = subreq->len;
105 
106 next_folio:
107 	fsize = PAGE_SIZE << subreq->curr_folio_order;
108 	fpos = round_down(subreq->start + subreq->consumed, fsize);
109 	fend = fpos + fsize;
110 
111 	if (WARN_ON_ONCE(!folioq) ||
112 	    WARN_ON_ONCE(!folioq_folio(folioq, slot)) ||
113 	    WARN_ON_ONCE(folioq_folio(folioq, slot)->index != fpos / PAGE_SIZE)) {
114 		pr_err("R=%08x[%x] s=%llx-%llx ctl=%zx/%zx/%zx sl=%u\n",
115 		       rreq->debug_id, subreq->debug_index,
116 		       subreq->start, subreq->start + subreq->transferred - 1,
117 		       subreq->consumed, subreq->transferred, subreq->len,
118 		       slot);
119 		if (folioq) {
120 			struct folio *folio = folioq_folio(folioq, slot);
121 
122 			pr_err("folioq: orders=%02x%02x%02x%02x\n",
123 			       folioq->orders[0], folioq->orders[1],
124 			       folioq->orders[2], folioq->orders[3]);
125 			if (folio)
126 				pr_err("folio: %llx-%llx ix=%llx o=%u qo=%u\n",
127 				       fpos, fend - 1, folio_pos(folio), folio_order(folio),
128 				       folioq_folio_order(folioq, slot));
129 		}
130 	}
131 
132 donation_changed:
133 	/* Try to consume the current folio if we've hit or passed the end of
134 	 * it.  There's a possibility that this subreq doesn't start at the
135 	 * beginning of the folio, in which case we need to donate to/from the
136 	 * preceding subreq.
137 	 *
138 	 * We also need to include any potential donation back from the
139 	 * following subreq.
140 	 */
141 	prev_donated = READ_ONCE(subreq->prev_donated);
142 	next_donated =  READ_ONCE(subreq->next_donated);
143 	if (prev_donated || next_donated) {
144 		spin_lock_bh(&rreq->lock);
145 		prev_donated = subreq->prev_donated;
146 		next_donated =  subreq->next_donated;
147 		subreq->start -= prev_donated;
148 		subreq->len += prev_donated;
149 		subreq->transferred += prev_donated;
150 		prev_donated = subreq->prev_donated = 0;
151 		if (subreq->transferred == subreq->len) {
152 			subreq->len += next_donated;
153 			subreq->transferred += next_donated;
154 			next_donated = subreq->next_donated = 0;
155 		}
156 		trace_netfs_sreq(subreq, netfs_sreq_trace_add_donations);
157 		spin_unlock_bh(&rreq->lock);
158 	}
159 
160 	avail = subreq->transferred;
161 	if (avail == subreq->len)
162 		avail += next_donated;
163 	start = subreq->start;
164 	if (subreq->consumed == 0) {
165 		start -= prev_donated;
166 		avail += prev_donated;
167 	} else {
168 		start += subreq->consumed;
169 		avail -= subreq->consumed;
170 	}
171 	part = umin(avail, fsize);
172 
173 	trace_netfs_progress(subreq, start, avail, part);
174 
175 	if (start + avail >= fend) {
176 		if (fpos == start) {
177 			/* Flush, unlock and mark for caching any folio we've just read. */
178 			subreq->consumed = fend - subreq->start;
179 			netfs_unlock_read_folio(subreq, rreq, folioq, slot);
180 			folioq_mark2(folioq, slot);
181 			if (subreq->consumed >= subreq->len)
182 				goto remove_subreq;
183 		} else if (fpos < start) {
184 			excess = fend - subreq->start;
185 
186 			spin_lock_bh(&rreq->lock);
187 			/* If we complete first on a folio split with the
188 			 * preceding subreq, donate to that subreq - otherwise
189 			 * we get the responsibility.
190 			 */
191 			if (subreq->prev_donated != prev_donated) {
192 				spin_unlock_bh(&rreq->lock);
193 				goto donation_changed;
194 			}
195 
196 			if (list_is_first(&subreq->rreq_link, &rreq->subrequests)) {
197 				spin_unlock_bh(&rreq->lock);
198 				pr_err("Can't donate prior to front\n");
199 				goto bad;
200 			}
201 
202 			prev = list_prev_entry(subreq, rreq_link);
203 			WRITE_ONCE(prev->next_donated, prev->next_donated + excess);
204 			subreq->start += excess;
205 			subreq->len -= excess;
206 			subreq->transferred -= excess;
207 			trace_netfs_donate(rreq, subreq, prev, excess,
208 					   netfs_trace_donate_tail_to_prev);
209 			trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_prev);
210 
211 			if (subreq->consumed >= subreq->len)
212 				goto remove_subreq_locked;
213 			spin_unlock_bh(&rreq->lock);
214 		} else {
215 			pr_err("fpos > start\n");
216 			goto bad;
217 		}
218 
219 		/* Advance the rolling buffer to the next folio. */
220 		slot++;
221 		if (slot >= folioq_nr_slots(folioq)) {
222 			slot = 0;
223 			folioq = folioq->next;
224 			subreq->curr_folioq = folioq;
225 		}
226 		subreq->curr_folioq_slot = slot;
227 		if (folioq && folioq_folio(folioq, slot))
228 			subreq->curr_folio_order = folioq->orders[slot];
229 		if (!was_async)
230 			cond_resched();
231 		goto next_folio;
232 	}
233 
234 	/* Deal with partial progress. */
235 	if (subreq->transferred < subreq->len)
236 		return false;
237 
238 	/* Donate the remaining downloaded data to one of the neighbouring
239 	 * subrequests.  Note that we may race with them doing the same thing.
240 	 */
241 	spin_lock_bh(&rreq->lock);
242 
243 	if (subreq->prev_donated != prev_donated ||
244 	    subreq->next_donated != next_donated) {
245 		spin_unlock_bh(&rreq->lock);
246 		cond_resched();
247 		goto donation_changed;
248 	}
249 
250 	/* Deal with the trickiest case: that this subreq is in the middle of a
251 	 * folio, not touching either edge, but finishes first.  In such a
252 	 * case, we donate to the previous subreq, if there is one and if it is
253 	 * contiguous, so that the donation is only handled when that completes
254 	 * - and remove this subreq from the list.
255 	 *
256 	 * If the previous subreq finished first, we will have acquired their
257 	 * donation and should be able to unlock folios and/or donate nextwards.
258 	 */
259 	if (!subreq->consumed &&
260 	    !prev_donated &&
261 	    !list_is_first(&subreq->rreq_link, &rreq->subrequests)) {
262 		prev = list_prev_entry(subreq, rreq_link);
263 		if (subreq->start == prev->start + prev->len) {
264 			WRITE_ONCE(prev->next_donated, prev->next_donated + subreq->len);
265 			subreq->start += subreq->len;
266 			subreq->len = 0;
267 			subreq->transferred = 0;
268 			trace_netfs_donate(rreq, subreq, prev, subreq->len,
269 					   netfs_trace_donate_to_prev);
270 			trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_prev);
271 			goto remove_subreq_locked;
272 		}
273 	}
274 
275 	/* If we can't donate down the chain, donate up the chain instead. */
276 	excess = subreq->len - subreq->consumed + next_donated;
277 
278 	if (!subreq->consumed)
279 		excess += prev_donated;
280 
281 	if (list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
282 		rreq->prev_donated = excess;
283 		trace_netfs_donate(rreq, subreq, NULL, excess,
284 				   netfs_trace_donate_to_deferred_next);
285 	} else {
286 		next = list_next_entry(subreq, rreq_link);
287 		WRITE_ONCE(next->prev_donated, next->prev_donated + excess);
288 		trace_netfs_donate(rreq, subreq, next, excess,
289 				   netfs_trace_donate_to_next);
290 	}
291 	trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_next);
292 	subreq->len = subreq->consumed;
293 	subreq->transferred = subreq->consumed;
294 	goto remove_subreq_locked;
295 
296 remove_subreq:
297 	spin_lock_bh(&rreq->lock);
298 remove_subreq_locked:
299 	subreq->consumed = subreq->len;
300 	list_del(&subreq->rreq_link);
301 	spin_unlock_bh(&rreq->lock);
302 	netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_consumed);
303 	return true;
304 
305 bad:
306 	/* Errr... prev and next both donated to us, but insufficient to finish
307 	 * the folio.
308 	 */
309 	printk("R=%08x[%x] s=%llx-%llx %zx/%zx/%zx\n",
310 	       rreq->debug_id, subreq->debug_index,
311 	       subreq->start, subreq->start + subreq->transferred - 1,
312 	       subreq->consumed, subreq->transferred, subreq->len);
313 	printk("folio: %llx-%llx\n", fpos, fend - 1);
314 	printk("donated: prev=%zx next=%zx\n", prev_donated, next_donated);
315 	printk("s=%llx av=%zx part=%zx\n", start, avail, part);
316 	BUG();
317 }
318 
319 /*
320  * Do page flushing and suchlike after DIO.
321  */
netfs_rreq_assess_dio(struct netfs_io_request * rreq)322 static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
323 {
324 	struct netfs_io_subrequest *subreq;
325 	unsigned int i;
326 
327 	/* Collect unbuffered reads and direct reads, adding up the transfer
328 	 * sizes until we find the first short or failed subrequest.
329 	 */
330 	list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
331 		rreq->transferred += subreq->transferred;
332 
333 		if (subreq->transferred < subreq->len ||
334 		    test_bit(NETFS_SREQ_FAILED, &subreq->flags)) {
335 			rreq->error = subreq->error;
336 			break;
337 		}
338 	}
339 
340 	if (rreq->origin == NETFS_DIO_READ) {
341 		for (i = 0; i < rreq->direct_bv_count; i++) {
342 			flush_dcache_page(rreq->direct_bv[i].bv_page);
343 			// TODO: cifs marks pages in the destination buffer
344 			// dirty under some circumstances after a read.  Do we
345 			// need to do that too?
346 			set_page_dirty(rreq->direct_bv[i].bv_page);
347 		}
348 	}
349 
350 	if (rreq->iocb) {
351 		rreq->iocb->ki_pos += rreq->transferred;
352 		if (rreq->iocb->ki_complete)
353 			rreq->iocb->ki_complete(
354 				rreq->iocb, rreq->error ? rreq->error : rreq->transferred);
355 	}
356 	if (rreq->netfs_ops->done)
357 		rreq->netfs_ops->done(rreq);
358 	if (rreq->origin == NETFS_DIO_READ)
359 		inode_dio_end(rreq->inode);
360 }
361 
362 /*
363  * Assess the state of a read request and decide what to do next.
364  *
365  * Note that we're in normal kernel thread context at this point, possibly
366  * running on a workqueue.
367  */
netfs_rreq_assess(struct netfs_io_request * rreq)368 static void netfs_rreq_assess(struct netfs_io_request *rreq)
369 {
370 	trace_netfs_rreq(rreq, netfs_rreq_trace_assess);
371 
372 	//netfs_rreq_is_still_valid(rreq);
373 
374 	if (test_and_clear_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags)) {
375 		netfs_retry_reads(rreq);
376 		return;
377 	}
378 
379 	if (rreq->origin == NETFS_DIO_READ ||
380 	    rreq->origin == NETFS_READ_GAPS)
381 		netfs_rreq_assess_dio(rreq);
382 	task_io_account_read(rreq->transferred);
383 
384 	trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip);
385 	clear_and_wake_up_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
386 
387 	trace_netfs_rreq(rreq, netfs_rreq_trace_done);
388 	netfs_clear_subrequests(rreq, false);
389 	netfs_unlock_abandoned_read_pages(rreq);
390 	if (unlikely(test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)))
391 		netfs_pgpriv2_write_to_the_cache(rreq);
392 }
393 
netfs_read_termination_worker(struct work_struct * work)394 void netfs_read_termination_worker(struct work_struct *work)
395 {
396 	struct netfs_io_request *rreq =
397 		container_of(work, struct netfs_io_request, work);
398 	netfs_see_request(rreq, netfs_rreq_trace_see_work);
399 	netfs_rreq_assess(rreq);
400 	netfs_put_request(rreq, false, netfs_rreq_trace_put_work_complete);
401 }
402 
403 /*
404  * Handle the completion of all outstanding I/O operations on a read request.
405  * We inherit a ref from the caller.
406  */
netfs_rreq_terminated(struct netfs_io_request * rreq,bool was_async)407 void netfs_rreq_terminated(struct netfs_io_request *rreq, bool was_async)
408 {
409 	if (!was_async)
410 		return netfs_rreq_assess(rreq);
411 	if (!work_pending(&rreq->work)) {
412 		netfs_get_request(rreq, netfs_rreq_trace_get_work);
413 		if (!queue_work(system_unbound_wq, &rreq->work))
414 			netfs_put_request(rreq, was_async, netfs_rreq_trace_put_work_nq);
415 	}
416 }
417 
418 /**
419  * netfs_read_subreq_progress - Note progress of a read operation.
420  * @subreq: The read request that has terminated.
421  * @was_async: True if we're in an asynchronous context.
422  *
423  * This tells the read side of netfs lib that a contributory I/O operation has
424  * made some progress and that it may be possible to unlock some folios.
425  *
426  * Before calling, the filesystem should update subreq->transferred to track
427  * the amount of data copied into the output buffer.
428  *
429  * If @was_async is true, the caller might be running in softirq or interrupt
430  * context and we can't sleep.
431  */
netfs_read_subreq_progress(struct netfs_io_subrequest * subreq,bool was_async)432 void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq,
433 				bool was_async)
434 {
435 	struct netfs_io_request *rreq = subreq->rreq;
436 
437 	trace_netfs_sreq(subreq, netfs_sreq_trace_progress);
438 
439 	if (subreq->transferred > subreq->consumed &&
440 	    (rreq->origin == NETFS_READAHEAD ||
441 	     rreq->origin == NETFS_READPAGE ||
442 	     rreq->origin == NETFS_READ_FOR_WRITE)) {
443 		netfs_consume_read_data(subreq, was_async);
444 		__clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
445 	}
446 }
447 EXPORT_SYMBOL(netfs_read_subreq_progress);
448 
449 /**
450  * netfs_read_subreq_terminated - Note the termination of an I/O operation.
451  * @subreq: The I/O request that has terminated.
452  * @error: Error code indicating type of completion.
453  * @was_async: The termination was asynchronous
454  *
455  * This tells the read helper that a contributory I/O operation has terminated,
456  * one way or another, and that it should integrate the results.
457  *
458  * The caller indicates the outcome of the operation through @error, supplying
459  * 0 to indicate a successful or retryable transfer (if NETFS_SREQ_NEED_RETRY
460  * is set) or a negative error code.  The helper will look after reissuing I/O
461  * operations as appropriate and writing downloaded data to the cache.
462  *
463  * Before calling, the filesystem should update subreq->transferred to track
464  * the amount of data copied into the output buffer.
465  *
466  * If @was_async is true, the caller might be running in softirq or interrupt
467  * context and we can't sleep.
468  */
netfs_read_subreq_terminated(struct netfs_io_subrequest * subreq,int error,bool was_async)469 void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq,
470 				  int error, bool was_async)
471 {
472 	struct netfs_io_request *rreq = subreq->rreq;
473 
474 	switch (subreq->source) {
475 	case NETFS_READ_FROM_CACHE:
476 		netfs_stat(&netfs_n_rh_read_done);
477 		break;
478 	case NETFS_DOWNLOAD_FROM_SERVER:
479 		netfs_stat(&netfs_n_rh_download_done);
480 		break;
481 	default:
482 		break;
483 	}
484 
485 	if (rreq->origin != NETFS_DIO_READ) {
486 		/* Collect buffered reads.
487 		 *
488 		 * If the read completed validly short, then we can clear the
489 		 * tail before going on to unlock the folios.
490 		 */
491 		if (error == 0 && subreq->transferred < subreq->len &&
492 		    (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags) ||
493 		     test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags))) {
494 			netfs_clear_unread(subreq);
495 			subreq->transferred = subreq->len;
496 			trace_netfs_sreq(subreq, netfs_sreq_trace_clear);
497 		}
498 		if (subreq->transferred > subreq->consumed &&
499 		    (rreq->origin == NETFS_READAHEAD ||
500 		     rreq->origin == NETFS_READPAGE ||
501 		     rreq->origin == NETFS_READ_FOR_WRITE)) {
502 			netfs_consume_read_data(subreq, was_async);
503 			__clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
504 		}
505 		rreq->transferred += subreq->transferred;
506 	}
507 
508 	/* Deal with retry requests, short reads and errors.  If we retry
509 	 * but don't make progress, we abandon the attempt.
510 	 */
511 	if (!error && subreq->transferred < subreq->len) {
512 		if (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags)) {
513 			trace_netfs_sreq(subreq, netfs_sreq_trace_hit_eof);
514 		} else {
515 			trace_netfs_sreq(subreq, netfs_sreq_trace_short);
516 			if (subreq->transferred > subreq->consumed) {
517 				__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
518 				__clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
519 				set_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags);
520 			} else if (!__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) {
521 				__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
522 				set_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags);
523 			} else {
524 				__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
525 				error = -ENODATA;
526 			}
527 		}
528 	}
529 
530 	subreq->error = error;
531 	trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
532 
533 	if (unlikely(error < 0)) {
534 		trace_netfs_failure(rreq, subreq, error, netfs_fail_read);
535 		if (subreq->source == NETFS_READ_FROM_CACHE) {
536 			netfs_stat(&netfs_n_rh_read_failed);
537 		} else {
538 			netfs_stat(&netfs_n_rh_download_failed);
539 			set_bit(NETFS_RREQ_FAILED, &rreq->flags);
540 			rreq->error = subreq->error;
541 		}
542 	}
543 
544 	if (atomic_dec_and_test(&rreq->nr_outstanding))
545 		netfs_rreq_terminated(rreq, was_async);
546 
547 	netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
548 }
549 EXPORT_SYMBOL(netfs_read_subreq_terminated);
550