• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 1991, 1992  Linus Torvalds
4  * Copyright (C) 2001  Andrea Arcangeli <andrea@suse.de> SuSE
5  * Copyright (C) 2016 - 2020 Christoph Hellwig
6  */
7 #include <linux/init.h>
8 #include <linux/mm.h>
9 #include <linux/blkdev.h>
10 #include <linux/buffer_head.h>
11 #include <linux/mpage.h>
12 #include <linux/uio.h>
13 #include <linux/namei.h>
14 #include <linux/task_io_accounting_ops.h>
15 #include <linux/falloc.h>
16 #include <linux/suspend.h>
17 #include <linux/fs.h>
18 #include "blk.h"
19 
bdev_file_inode(struct file * file)20 static struct inode *bdev_file_inode(struct file *file)
21 {
22 	return file->f_mapping->host;
23 }
24 
blkdev_get_block(struct inode * inode,sector_t iblock,struct buffer_head * bh,int create)25 static int blkdev_get_block(struct inode *inode, sector_t iblock,
26 		struct buffer_head *bh, int create)
27 {
28 	bh->b_bdev = I_BDEV(inode);
29 	bh->b_blocknr = iblock;
30 	set_buffer_mapped(bh);
31 	return 0;
32 }
33 
dio_bio_write_op(struct kiocb * iocb)34 static unsigned int dio_bio_write_op(struct kiocb *iocb)
35 {
36 	unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
37 
38 	/* avoid the need for a I/O completion work item */
39 	if (iocb->ki_flags & IOCB_DSYNC)
40 		op |= REQ_FUA;
41 	return op;
42 }
43 
44 #define DIO_INLINE_BIO_VECS 4
45 
blkdev_bio_end_io_simple(struct bio * bio)46 static void blkdev_bio_end_io_simple(struct bio *bio)
47 {
48 	struct task_struct *waiter = bio->bi_private;
49 
50 	WRITE_ONCE(bio->bi_private, NULL);
51 	blk_wake_io_task(waiter);
52 }
53 
__blkdev_direct_IO_simple(struct kiocb * iocb,struct iov_iter * iter,unsigned int nr_pages)54 static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
55 		struct iov_iter *iter, unsigned int nr_pages)
56 {
57 	struct file *file = iocb->ki_filp;
58 	struct block_device *bdev = I_BDEV(bdev_file_inode(file));
59 	struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
60 	loff_t pos = iocb->ki_pos;
61 	bool should_dirty = false;
62 	struct bio bio;
63 	ssize_t ret;
64 	blk_qc_t qc;
65 
66 	if ((pos | iov_iter_alignment(iter)) &
67 	    (bdev_logical_block_size(bdev) - 1))
68 		return -EINVAL;
69 
70 	if (nr_pages <= DIO_INLINE_BIO_VECS)
71 		vecs = inline_vecs;
72 	else {
73 		vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
74 				     GFP_KERNEL);
75 		if (!vecs)
76 			return -ENOMEM;
77 	}
78 
79 	bio_init(&bio, vecs, nr_pages);
80 	bio_set_dev(&bio, bdev);
81 	bio.bi_iter.bi_sector = pos >> 9;
82 	bio.bi_write_hint = iocb->ki_hint;
83 	bio.bi_private = current;
84 	bio.bi_end_io = blkdev_bio_end_io_simple;
85 	bio.bi_ioprio = iocb->ki_ioprio;
86 
87 	ret = bio_iov_iter_get_pages(&bio, iter);
88 	if (unlikely(ret))
89 		goto out;
90 	ret = bio.bi_iter.bi_size;
91 
92 	if (iov_iter_rw(iter) == READ) {
93 		bio.bi_opf = REQ_OP_READ;
94 		if (iter_is_iovec(iter))
95 			should_dirty = true;
96 	} else {
97 		bio.bi_opf = dio_bio_write_op(iocb);
98 		task_io_account_write(ret);
99 	}
100 	if (iocb->ki_flags & IOCB_NOWAIT)
101 		bio.bi_opf |= REQ_NOWAIT;
102 	if (iocb->ki_flags & IOCB_HIPRI)
103 		bio_set_polled(&bio, iocb);
104 
105 	qc = submit_bio(&bio);
106 	for (;;) {
107 		set_current_state(TASK_UNINTERRUPTIBLE);
108 		if (!READ_ONCE(bio.bi_private))
109 			break;
110 		if (!(iocb->ki_flags & IOCB_HIPRI) ||
111 		    !blk_poll(bdev_get_queue(bdev), qc, true))
112 			blk_io_schedule();
113 	}
114 	__set_current_state(TASK_RUNNING);
115 
116 	bio_release_pages(&bio, should_dirty);
117 	if (unlikely(bio.bi_status))
118 		ret = blk_status_to_errno(bio.bi_status);
119 
120 out:
121 	if (vecs != inline_vecs)
122 		kfree(vecs);
123 
124 	bio_uninit(&bio);
125 
126 	return ret;
127 }
128 
129 struct blkdev_dio {
130 	union {
131 		struct kiocb		*iocb;
132 		struct task_struct	*waiter;
133 	};
134 	size_t			size;
135 	atomic_t		ref;
136 	bool			multi_bio : 1;
137 	bool			should_dirty : 1;
138 	bool			is_sync : 1;
139 	struct bio		bio;
140 };
141 
142 static struct bio_set blkdev_dio_pool;
143 
blkdev_iopoll(struct kiocb * kiocb,bool wait)144 static int blkdev_iopoll(struct kiocb *kiocb, bool wait)
145 {
146 	struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
147 	struct request_queue *q = bdev_get_queue(bdev);
148 
149 	return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait);
150 }
151 
blkdev_bio_end_io(struct bio * bio)152 static void blkdev_bio_end_io(struct bio *bio)
153 {
154 	struct blkdev_dio *dio = bio->bi_private;
155 	bool should_dirty = dio->should_dirty;
156 
157 	if (bio->bi_status && !dio->bio.bi_status)
158 		dio->bio.bi_status = bio->bi_status;
159 
160 	if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
161 		if (!dio->is_sync) {
162 			struct kiocb *iocb = dio->iocb;
163 			ssize_t ret;
164 
165 			if (likely(!dio->bio.bi_status)) {
166 				ret = dio->size;
167 				iocb->ki_pos += ret;
168 			} else {
169 				ret = blk_status_to_errno(dio->bio.bi_status);
170 			}
171 
172 			dio->iocb->ki_complete(iocb, ret, 0);
173 			if (dio->multi_bio)
174 				bio_put(&dio->bio);
175 		} else {
176 			struct task_struct *waiter = dio->waiter;
177 
178 			WRITE_ONCE(dio->waiter, NULL);
179 			blk_wake_io_task(waiter);
180 		}
181 	}
182 
183 	if (should_dirty) {
184 		bio_check_pages_dirty(bio);
185 	} else {
186 		bio_release_pages(bio, false);
187 		bio_put(bio);
188 	}
189 }
190 
__blkdev_direct_IO(struct kiocb * iocb,struct iov_iter * iter,unsigned int nr_pages)191 static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
192 		unsigned int nr_pages)
193 {
194 	struct file *file = iocb->ki_filp;
195 	struct inode *inode = bdev_file_inode(file);
196 	struct block_device *bdev = I_BDEV(inode);
197 	struct blk_plug plug;
198 	struct blkdev_dio *dio;
199 	struct bio *bio;
200 	bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
201 	bool is_read = (iov_iter_rw(iter) == READ), is_sync;
202 	loff_t pos = iocb->ki_pos;
203 	blk_qc_t qc = BLK_QC_T_NONE;
204 	int ret = 0;
205 
206 	if ((pos | iov_iter_alignment(iter)) &
207 	    (bdev_logical_block_size(bdev) - 1))
208 		return -EINVAL;
209 
210 	bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);
211 
212 	dio = container_of(bio, struct blkdev_dio, bio);
213 	dio->is_sync = is_sync = is_sync_kiocb(iocb);
214 	if (dio->is_sync) {
215 		dio->waiter = current;
216 		bio_get(bio);
217 	} else {
218 		dio->iocb = iocb;
219 	}
220 
221 	dio->size = 0;
222 	dio->multi_bio = false;
223 	dio->should_dirty = is_read && iter_is_iovec(iter);
224 
225 	/*
226 	 * Don't plug for HIPRI/polled IO, as those should go straight
227 	 * to issue
228 	 */
229 	if (!is_poll)
230 		blk_start_plug(&plug);
231 
232 	for (;;) {
233 		bio_set_dev(bio, bdev);
234 		bio->bi_iter.bi_sector = pos >> 9;
235 		bio->bi_write_hint = iocb->ki_hint;
236 		bio->bi_private = dio;
237 		bio->bi_end_io = blkdev_bio_end_io;
238 		bio->bi_ioprio = iocb->ki_ioprio;
239 
240 		ret = bio_iov_iter_get_pages(bio, iter);
241 		if (unlikely(ret)) {
242 			bio->bi_status = BLK_STS_IOERR;
243 			bio_endio(bio);
244 			break;
245 		}
246 		if (iocb->ki_flags & IOCB_NOWAIT) {
247 			/*
248 			 * This is nonblocking IO, and we need to allocate
249 			 * another bio if we have data left to map. As we
250 			 * cannot guarantee that one of the sub bios will not
251 			 * fail getting issued FOR NOWAIT and as error results
252 			 * are coalesced across all of them, be safe and ask for
253 			 * a retry of this from blocking context.
254 			 */
255 			if (unlikely(iov_iter_count(iter))) {
256 				bio_release_pages(bio, false);
257 				bio_clear_flag(bio, BIO_REFFED);
258 				bio_put(bio);
259 				blk_finish_plug(&plug);
260 				return -EAGAIN;
261 			}
262 			bio->bi_opf |= REQ_NOWAIT;
263 		}
264 
265 		if (is_read) {
266 			bio->bi_opf = REQ_OP_READ;
267 			if (dio->should_dirty)
268 				bio_set_pages_dirty(bio);
269 		} else {
270 			bio->bi_opf = dio_bio_write_op(iocb);
271 			task_io_account_write(bio->bi_iter.bi_size);
272 		}
273 		dio->size += bio->bi_iter.bi_size;
274 		pos += bio->bi_iter.bi_size;
275 
276 		nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS);
277 		if (!nr_pages) {
278 			bool polled = false;
279 
280 			if (iocb->ki_flags & IOCB_HIPRI) {
281 				bio_set_polled(bio, iocb);
282 				polled = true;
283 			}
284 
285 			qc = submit_bio(bio);
286 
287 			if (polled)
288 				WRITE_ONCE(iocb->ki_cookie, qc);
289 			break;
290 		}
291 
292 		if (!dio->multi_bio) {
293 			/*
294 			 * AIO needs an extra reference to ensure the dio
295 			 * structure which is embedded into the first bio
296 			 * stays around.
297 			 */
298 			if (!is_sync)
299 				bio_get(bio);
300 			dio->multi_bio = true;
301 			atomic_set(&dio->ref, 2);
302 		} else {
303 			atomic_inc(&dio->ref);
304 		}
305 
306 		submit_bio(bio);
307 		bio = bio_alloc(GFP_KERNEL, nr_pages);
308 	}
309 
310 	if (!is_poll)
311 		blk_finish_plug(&plug);
312 
313 	if (!is_sync)
314 		return -EIOCBQUEUED;
315 
316 	for (;;) {
317 		set_current_state(TASK_UNINTERRUPTIBLE);
318 		if (!READ_ONCE(dio->waiter))
319 			break;
320 
321 		if (!(iocb->ki_flags & IOCB_HIPRI) ||
322 		    !blk_poll(bdev_get_queue(bdev), qc, true))
323 			blk_io_schedule();
324 	}
325 	__set_current_state(TASK_RUNNING);
326 
327 	if (!ret)
328 		ret = blk_status_to_errno(dio->bio.bi_status);
329 	if (likely(!ret))
330 		ret = dio->size;
331 
332 	bio_put(&dio->bio);
333 	return ret;
334 }
335 
blkdev_direct_IO(struct kiocb * iocb,struct iov_iter * iter)336 static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
337 {
338 	unsigned int nr_pages;
339 
340 	if (!iov_iter_count(iter))
341 		return 0;
342 
343 	nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
344 	if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS)
345 		return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
346 
347 	return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
348 }
349 
blkdev_writepage(struct page * page,struct writeback_control * wbc)350 static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
351 {
352 	return block_write_full_page(page, blkdev_get_block, wbc);
353 }
354 
blkdev_readpage(struct file * file,struct page * page)355 static int blkdev_readpage(struct file * file, struct page * page)
356 {
357 	return block_read_full_page(page, blkdev_get_block);
358 }
359 
blkdev_readahead(struct readahead_control * rac)360 static void blkdev_readahead(struct readahead_control *rac)
361 {
362 	mpage_readahead(rac, blkdev_get_block);
363 }
364 
blkdev_write_begin(struct file * file,struct address_space * mapping,loff_t pos,unsigned len,unsigned flags,struct page ** pagep,void ** fsdata)365 static int blkdev_write_begin(struct file *file, struct address_space *mapping,
366 		loff_t pos, unsigned len, unsigned flags, struct page **pagep,
367 		void **fsdata)
368 {
369 	return block_write_begin(mapping, pos, len, flags, pagep,
370 				 blkdev_get_block);
371 }
372 
blkdev_write_end(struct file * file,struct address_space * mapping,loff_t pos,unsigned len,unsigned copied,struct page * page,void * fsdata)373 static int blkdev_write_end(struct file *file, struct address_space *mapping,
374 		loff_t pos, unsigned len, unsigned copied, struct page *page,
375 		void *fsdata)
376 {
377 	int ret;
378 	ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
379 
380 	unlock_page(page);
381 	put_page(page);
382 
383 	return ret;
384 }
385 
blkdev_writepages(struct address_space * mapping,struct writeback_control * wbc)386 static int blkdev_writepages(struct address_space *mapping,
387 			     struct writeback_control *wbc)
388 {
389 	return generic_writepages(mapping, wbc);
390 }
391 
392 const struct address_space_operations def_blk_aops = {
393 	.set_page_dirty	= __set_page_dirty_buffers,
394 	.readpage	= blkdev_readpage,
395 	.readahead	= blkdev_readahead,
396 	.writepage	= blkdev_writepage,
397 	.write_begin	= blkdev_write_begin,
398 	.write_end	= blkdev_write_end,
399 	.writepages	= blkdev_writepages,
400 	.direct_IO	= blkdev_direct_IO,
401 	.migratepage	= buffer_migrate_page_norefs,
402 	.is_dirty_writeback = buffer_check_dirty_writeback,
403 };
404 
405 /*
406  * for a block special file file_inode(file)->i_size is zero
407  * so we compute the size by hand (just as in block_read/write above)
408  */
blkdev_llseek(struct file * file,loff_t offset,int whence)409 static loff_t blkdev_llseek(struct file *file, loff_t offset, int whence)
410 {
411 	struct inode *bd_inode = bdev_file_inode(file);
412 	loff_t retval;
413 
414 	inode_lock(bd_inode);
415 	retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
416 	inode_unlock(bd_inode);
417 	return retval;
418 }
419 
blkdev_fsync(struct file * filp,loff_t start,loff_t end,int datasync)420 static int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
421 		int datasync)
422 {
423 	struct inode *bd_inode = bdev_file_inode(filp);
424 	struct block_device *bdev = I_BDEV(bd_inode);
425 	int error;
426 
427 	error = file_write_and_wait_range(filp, start, end);
428 	if (error)
429 		return error;
430 
431 	/*
432 	 * There is no need to serialise calls to blkdev_issue_flush with
433 	 * i_mutex and doing so causes performance issues with concurrent
434 	 * O_SYNC writers to a block device.
435 	 */
436 	error = blkdev_issue_flush(bdev);
437 	if (error == -EOPNOTSUPP)
438 		error = 0;
439 
440 	return error;
441 }
442 
blkdev_open(struct inode * inode,struct file * filp)443 static int blkdev_open(struct inode *inode, struct file *filp)
444 {
445 	struct block_device *bdev;
446 
447 	/*
448 	 * Preserve backwards compatibility and allow large file access
449 	 * even if userspace doesn't ask for it explicitly. Some mkfs
450 	 * binary needs it. We might want to drop this workaround
451 	 * during an unstable branch.
452 	 */
453 	filp->f_flags |= O_LARGEFILE;
454 	filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
455 
456 	if (filp->f_flags & O_NDELAY)
457 		filp->f_mode |= FMODE_NDELAY;
458 	if (filp->f_flags & O_EXCL)
459 		filp->f_mode |= FMODE_EXCL;
460 	if ((filp->f_flags & O_ACCMODE) == 3)
461 		filp->f_mode |= FMODE_WRITE_IOCTL;
462 
463 	bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp);
464 	if (IS_ERR(bdev))
465 		return PTR_ERR(bdev);
466 	filp->f_mapping = bdev->bd_inode->i_mapping;
467 	filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
468 	return 0;
469 }
470 
blkdev_close(struct inode * inode,struct file * filp)471 static int blkdev_close(struct inode *inode, struct file *filp)
472 {
473 	struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
474 
475 	blkdev_put(bdev, filp->f_mode);
476 	return 0;
477 }
478 
block_ioctl(struct file * file,unsigned cmd,unsigned long arg)479 static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
480 {
481 	struct block_device *bdev = I_BDEV(bdev_file_inode(file));
482 	fmode_t mode = file->f_mode;
483 
484 	/*
485 	 * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
486 	 * to updated it before every ioctl.
487 	 */
488 	if (file->f_flags & O_NDELAY)
489 		mode |= FMODE_NDELAY;
490 	else
491 		mode &= ~FMODE_NDELAY;
492 
493 	return blkdev_ioctl(bdev, mode, cmd, arg);
494 }
495 
496 /*
497  * Write data to the block device.  Only intended for the block device itself
498  * and the raw driver which basically is a fake block device.
499  *
500  * Does not take i_mutex for the write and thus is not for general purpose
501  * use.
502  */
blkdev_write_iter(struct kiocb * iocb,struct iov_iter * from)503 static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
504 {
505 	struct file *file = iocb->ki_filp;
506 	struct inode *bd_inode = bdev_file_inode(file);
507 	loff_t size = i_size_read(bd_inode);
508 	struct blk_plug plug;
509 	size_t shorted = 0;
510 	ssize_t ret;
511 
512 	if (bdev_read_only(I_BDEV(bd_inode)))
513 		return -EPERM;
514 
515 	if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev))
516 		return -ETXTBSY;
517 
518 	if (!iov_iter_count(from))
519 		return 0;
520 
521 	if (iocb->ki_pos >= size)
522 		return -ENOSPC;
523 
524 	if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
525 		return -EOPNOTSUPP;
526 
527 	size -= iocb->ki_pos;
528 	if (iov_iter_count(from) > size) {
529 		shorted = iov_iter_count(from) - size;
530 		iov_iter_truncate(from, size);
531 	}
532 
533 	blk_start_plug(&plug);
534 	ret = __generic_file_write_iter(iocb, from);
535 	if (ret > 0)
536 		ret = generic_write_sync(iocb, ret);
537 	iov_iter_reexpand(from, iov_iter_count(from) + shorted);
538 	blk_finish_plug(&plug);
539 	return ret;
540 }
541 
blkdev_read_iter(struct kiocb * iocb,struct iov_iter * to)542 static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
543 {
544 	struct file *file = iocb->ki_filp;
545 	struct inode *bd_inode = bdev_file_inode(file);
546 	loff_t size = i_size_read(bd_inode);
547 	loff_t pos = iocb->ki_pos;
548 	size_t shorted = 0;
549 	ssize_t ret;
550 
551 	if (pos >= size)
552 		return 0;
553 
554 	size -= pos;
555 	if (iov_iter_count(to) > size) {
556 		shorted = iov_iter_count(to) - size;
557 		iov_iter_truncate(to, size);
558 	}
559 
560 	ret = generic_file_read_iter(iocb, to);
561 	iov_iter_reexpand(to, iov_iter_count(to) + shorted);
562 	return ret;
563 }
564 
565 #define	BLKDEV_FALLOC_FL_SUPPORTED					\
566 		(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |		\
567 		 FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
568 
blkdev_fallocate(struct file * file,int mode,loff_t start,loff_t len)569 static long blkdev_fallocate(struct file *file, int mode, loff_t start,
570 			     loff_t len)
571 {
572 	struct inode *inode = bdev_file_inode(file);
573 	struct block_device *bdev = I_BDEV(inode);
574 	loff_t end = start + len - 1;
575 	loff_t isize;
576 	int error;
577 
578 	/* Fail if we don't recognize the flags. */
579 	if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
580 		return -EOPNOTSUPP;
581 
582 	/* Don't go off the end of the device. */
583 	isize = i_size_read(bdev->bd_inode);
584 	if (start >= isize)
585 		return -EINVAL;
586 	if (end >= isize) {
587 		if (mode & FALLOC_FL_KEEP_SIZE) {
588 			len = isize - start;
589 			end = start + len - 1;
590 		} else
591 			return -EINVAL;
592 	}
593 
594 	/*
595 	 * Don't allow IO that isn't aligned to logical block size.
596 	 */
597 	if ((start | len) & (bdev_logical_block_size(bdev) - 1))
598 		return -EINVAL;
599 
600 	filemap_invalidate_lock(inode->i_mapping);
601 
602 	/*
603 	 * Invalidate the page cache, including dirty pages, for valid
604 	 * de-allocate mode calls to fallocate().
605 	 */
606 	switch (mode) {
607 	case FALLOC_FL_ZERO_RANGE:
608 	case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
609 		error = truncate_bdev_range(bdev, file->f_mode, start, end);
610 		if (error)
611 			goto fail;
612 
613 		error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
614 					    GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
615 		break;
616 	case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
617 		error = truncate_bdev_range(bdev, file->f_mode, start, end);
618 		if (error)
619 			goto fail;
620 
621 		error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
622 					     GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
623 		break;
624 	case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
625 		error = truncate_bdev_range(bdev, file->f_mode, start, end);
626 		if (error)
627 			goto fail;
628 
629 		error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
630 					     GFP_KERNEL, 0);
631 		break;
632 	default:
633 		error = -EOPNOTSUPP;
634 	}
635 
636  fail:
637 	filemap_invalidate_unlock(inode->i_mapping);
638 	return error;
639 }
640 
641 const struct file_operations def_blk_fops = {
642 	.open		= blkdev_open,
643 	.release	= blkdev_close,
644 	.llseek		= blkdev_llseek,
645 	.read_iter	= blkdev_read_iter,
646 	.write_iter	= blkdev_write_iter,
647 	.iopoll		= blkdev_iopoll,
648 	.mmap		= generic_file_mmap,
649 	.fsync		= blkdev_fsync,
650 	.unlocked_ioctl	= block_ioctl,
651 #ifdef CONFIG_COMPAT
652 	.compat_ioctl	= compat_blkdev_ioctl,
653 #endif
654 	.splice_read	= generic_file_splice_read,
655 	.splice_write	= iter_file_splice_write,
656 	.fallocate	= blkdev_fallocate,
657 };
658 
blkdev_init(void)659 static __init int blkdev_init(void)
660 {
661 	return bioset_init(&blkdev_dio_pool, 4,
662 				offsetof(struct blkdev_dio, bio),
663 				BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE);
664 }
665 module_init(blkdev_init);
666