1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * linux/fs/read_write.c
4 *
5 * Copyright (C) 1991, 1992 Linus Torvalds
6 */
7
8 #include <linux/slab.h>
9 #include <linux/stat.h>
10 #include <linux/sched/xacct.h>
11 #include <linux/fcntl.h>
12 #include <linux/file.h>
13 #include <linux/uio.h>
14 #include <linux/fsnotify.h>
15 #include <linux/security.h>
16 #include <linux/export.h>
17 #include <linux/syscalls.h>
18 #include <linux/pagemap.h>
19 #include <linux/page_size_compat.h>
20 #include <linux/splice.h>
21 #include <linux/compat.h>
22 #include <linux/mount.h>
23 #include <linux/fs.h>
24 #include "internal.h"
25
26 #include <linux/uaccess.h>
27 #include <asm/unistd.h>
28
29 const struct file_operations generic_ro_fops = {
30 .llseek = generic_file_llseek,
31 .read_iter = generic_file_read_iter,
32 .mmap = generic_file_readonly_mmap,
33 .splice_read = filemap_splice_read,
34 };
35
36 EXPORT_SYMBOL(generic_ro_fops);
37
unsigned_offsets(struct file * file)38 static inline bool unsigned_offsets(struct file *file)
39 {
40 return file->f_op->fop_flags & FOP_UNSIGNED_OFFSET;
41 }
42
43 /**
44 * vfs_setpos_cookie - update the file offset for lseek and reset cookie
45 * @file: file structure in question
46 * @offset: file offset to seek to
47 * @maxsize: maximum file size
48 * @cookie: cookie to reset
49 *
50 * Update the file offset to the value specified by @offset if the given
51 * offset is valid and it is not equal to the current file offset and
52 * reset the specified cookie to indicate that a seek happened.
53 *
54 * Return the specified offset on success and -EINVAL on invalid offset.
55 */
vfs_setpos_cookie(struct file * file,loff_t offset,loff_t maxsize,u64 * cookie)56 static loff_t vfs_setpos_cookie(struct file *file, loff_t offset,
57 loff_t maxsize, u64 *cookie)
58 {
59 if (offset < 0 && !unsigned_offsets(file))
60 return -EINVAL;
61 if (offset > maxsize)
62 return -EINVAL;
63
64 if (offset != file->f_pos) {
65 file->f_pos = offset;
66 if (cookie)
67 *cookie = 0;
68 }
69 return offset;
70 }
71
72 /**
73 * vfs_setpos - update the file offset for lseek
74 * @file: file structure in question
75 * @offset: file offset to seek to
76 * @maxsize: maximum file size
77 *
78 * This is a low-level filesystem helper for updating the file offset to
79 * the value specified by @offset if the given offset is valid and it is
80 * not equal to the current file offset.
81 *
82 * Return the specified offset on success and -EINVAL on invalid offset.
83 */
vfs_setpos(struct file * file,loff_t offset,loff_t maxsize)84 loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
85 {
86 return vfs_setpos_cookie(file, offset, maxsize, NULL);
87 }
88 EXPORT_SYMBOL(vfs_setpos);
89
90 /**
91 * must_set_pos - check whether f_pos has to be updated
92 * @file: file to seek on
93 * @offset: offset to use
94 * @whence: type of seek operation
95 * @eof: end of file
96 *
97 * Check whether f_pos needs to be updated and update @offset according
98 * to @whence.
99 *
100 * Return: 0 if f_pos doesn't need to be updated, 1 if f_pos has to be
101 * updated, and negative error code on failure.
102 */
must_set_pos(struct file * file,loff_t * offset,int whence,loff_t eof)103 static int must_set_pos(struct file *file, loff_t *offset, int whence, loff_t eof)
104 {
105 switch (whence) {
106 case SEEK_END:
107 *offset += eof;
108 break;
109 case SEEK_CUR:
110 /*
111 * Here we special-case the lseek(fd, 0, SEEK_CUR)
112 * position-querying operation. Avoid rewriting the "same"
113 * f_pos value back to the file because a concurrent read(),
114 * write() or lseek() might have altered it
115 */
116 if (*offset == 0) {
117 *offset = file->f_pos;
118 return 0;
119 }
120 break;
121 case SEEK_DATA:
122 /*
123 * In the generic case the entire file is data, so as long as
124 * offset isn't at the end of the file then the offset is data.
125 */
126 if ((unsigned long long)*offset >= eof)
127 return -ENXIO;
128 break;
129 case SEEK_HOLE:
130 /*
131 * There is a virtual hole at the end of the file, so as long as
132 * offset isn't i_size or larger, return i_size.
133 */
134 if ((unsigned long long)*offset >= eof)
135 return -ENXIO;
136 *offset = eof;
137 break;
138 }
139
140 return 1;
141 }
142
143 /**
144 * generic_file_llseek_size - generic llseek implementation for regular files
145 * @file: file structure to seek on
146 * @offset: file offset to seek to
147 * @whence: type of seek
148 * @maxsize: max size of this file in file system
149 * @eof: offset used for SEEK_END position
150 *
151 * This is a variant of generic_file_llseek that allows passing in a custom
152 * maximum file size and a custom EOF position, for e.g. hashed directories
153 *
154 * Synchronization:
155 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms)
156 * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes.
157 * read/writes behave like SEEK_SET against seeks.
158 */
159 loff_t
generic_file_llseek_size(struct file * file,loff_t offset,int whence,loff_t maxsize,loff_t eof)160 generic_file_llseek_size(struct file *file, loff_t offset, int whence,
161 loff_t maxsize, loff_t eof)
162 {
163 int ret;
164
165 ret = must_set_pos(file, &offset, whence, eof);
166 if (ret < 0)
167 return ret;
168 if (ret == 0)
169 return offset;
170
171 if (whence == SEEK_CUR) {
172 /*
173 * f_lock protects against read/modify/write race with
174 * other SEEK_CURs. Note that parallel writes and reads
175 * behave like SEEK_SET.
176 */
177 guard(spinlock)(&file->f_lock);
178 return vfs_setpos(file, file->f_pos + offset, maxsize);
179 }
180
181 return vfs_setpos(file, offset, maxsize);
182 }
183 EXPORT_SYMBOL(generic_file_llseek_size);
184
185 /**
186 * generic_llseek_cookie - versioned llseek implementation
187 * @file: file structure to seek on
188 * @offset: file offset to seek to
189 * @whence: type of seek
190 * @cookie: cookie to update
191 *
192 * See generic_file_llseek for a general description and locking assumptions.
193 *
194 * In contrast to generic_file_llseek, this function also resets a
195 * specified cookie to indicate a seek took place.
196 */
generic_llseek_cookie(struct file * file,loff_t offset,int whence,u64 * cookie)197 loff_t generic_llseek_cookie(struct file *file, loff_t offset, int whence,
198 u64 *cookie)
199 {
200 struct inode *inode = file->f_mapping->host;
201 loff_t maxsize = inode->i_sb->s_maxbytes;
202 loff_t eof = i_size_read(inode);
203 int ret;
204
205 if (WARN_ON_ONCE(!cookie))
206 return -EINVAL;
207
208 /*
209 * Require that this is only used for directories that guarantee
210 * synchronization between readdir and seek so that an update to
211 * @cookie is correctly synchronized with concurrent readdir.
212 */
213 if (WARN_ON_ONCE(!(file->f_mode & FMODE_ATOMIC_POS)))
214 return -EINVAL;
215
216 ret = must_set_pos(file, &offset, whence, eof);
217 if (ret < 0)
218 return ret;
219 if (ret == 0)
220 return offset;
221
222 /* No need to hold f_lock because we know that f_pos_lock is held. */
223 if (whence == SEEK_CUR)
224 return vfs_setpos_cookie(file, file->f_pos + offset, maxsize, cookie);
225
226 return vfs_setpos_cookie(file, offset, maxsize, cookie);
227 }
228 EXPORT_SYMBOL(generic_llseek_cookie);
229
230 /**
231 * generic_file_llseek - generic llseek implementation for regular files
232 * @file: file structure to seek on
233 * @offset: file offset to seek to
234 * @whence: type of seek
235 *
236 * This is a generic implemenation of ->llseek useable for all normal local
237 * filesystems. It just updates the file offset to the value specified by
238 * @offset and @whence.
239 */
generic_file_llseek(struct file * file,loff_t offset,int whence)240 loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
241 {
242 struct inode *inode = file->f_mapping->host;
243
244 return generic_file_llseek_size(file, offset, whence,
245 inode->i_sb->s_maxbytes,
246 i_size_read(inode));
247 }
248 EXPORT_SYMBOL(generic_file_llseek);
249
250 /**
251 * fixed_size_llseek - llseek implementation for fixed-sized devices
252 * @file: file structure to seek on
253 * @offset: file offset to seek to
254 * @whence: type of seek
255 * @size: size of the file
256 *
257 */
fixed_size_llseek(struct file * file,loff_t offset,int whence,loff_t size)258 loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size)
259 {
260 switch (whence) {
261 case SEEK_SET: case SEEK_CUR: case SEEK_END:
262 return generic_file_llseek_size(file, offset, whence,
263 size, size);
264 default:
265 return -EINVAL;
266 }
267 }
268 EXPORT_SYMBOL(fixed_size_llseek);
269
270 /**
271 * no_seek_end_llseek - llseek implementation for fixed-sized devices
272 * @file: file structure to seek on
273 * @offset: file offset to seek to
274 * @whence: type of seek
275 *
276 */
no_seek_end_llseek(struct file * file,loff_t offset,int whence)277 loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence)
278 {
279 switch (whence) {
280 case SEEK_SET: case SEEK_CUR:
281 return generic_file_llseek_size(file, offset, whence,
282 OFFSET_MAX, 0);
283 default:
284 return -EINVAL;
285 }
286 }
287 EXPORT_SYMBOL(no_seek_end_llseek);
288
289 /**
290 * no_seek_end_llseek_size - llseek implementation for fixed-sized devices
291 * @file: file structure to seek on
292 * @offset: file offset to seek to
293 * @whence: type of seek
294 * @size: maximal offset allowed
295 *
296 */
no_seek_end_llseek_size(struct file * file,loff_t offset,int whence,loff_t size)297 loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size)
298 {
299 switch (whence) {
300 case SEEK_SET: case SEEK_CUR:
301 return generic_file_llseek_size(file, offset, whence,
302 size, 0);
303 default:
304 return -EINVAL;
305 }
306 }
307 EXPORT_SYMBOL(no_seek_end_llseek_size);
308
309 /**
310 * noop_llseek - No Operation Performed llseek implementation
311 * @file: file structure to seek on
312 * @offset: file offset to seek to
313 * @whence: type of seek
314 *
315 * This is an implementation of ->llseek useable for the rare special case when
316 * userspace expects the seek to succeed but the (device) file is actually not
317 * able to perform the seek. In this case you use noop_llseek() instead of
318 * falling back to the default implementation of ->llseek.
319 */
noop_llseek(struct file * file,loff_t offset,int whence)320 loff_t noop_llseek(struct file *file, loff_t offset, int whence)
321 {
322 return file->f_pos;
323 }
324 EXPORT_SYMBOL(noop_llseek);
325
default_llseek(struct file * file,loff_t offset,int whence)326 loff_t default_llseek(struct file *file, loff_t offset, int whence)
327 {
328 struct inode *inode = file_inode(file);
329 loff_t retval;
330
331 inode_lock(inode);
332 switch (whence) {
333 case SEEK_END:
334 offset += i_size_read(inode);
335 break;
336 case SEEK_CUR:
337 if (offset == 0) {
338 retval = file->f_pos;
339 goto out;
340 }
341 offset += file->f_pos;
342 break;
343 case SEEK_DATA:
344 /*
345 * In the generic case the entire file is data, so as
346 * long as offset isn't at the end of the file then the
347 * offset is data.
348 */
349 if (offset >= inode->i_size) {
350 retval = -ENXIO;
351 goto out;
352 }
353 break;
354 case SEEK_HOLE:
355 /*
356 * There is a virtual hole at the end of the file, so
357 * as long as offset isn't i_size or larger, return
358 * i_size.
359 */
360 if (offset >= inode->i_size) {
361 retval = -ENXIO;
362 goto out;
363 }
364 offset = inode->i_size;
365 break;
366 }
367 retval = -EINVAL;
368 if (offset >= 0 || unsigned_offsets(file)) {
369 if (offset != file->f_pos)
370 file->f_pos = offset;
371 retval = offset;
372 }
373 out:
374 inode_unlock(inode);
375 return retval;
376 }
377 EXPORT_SYMBOL(default_llseek);
378
vfs_llseek(struct file * file,loff_t offset,int whence)379 loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
380 {
381 if (!(file->f_mode & FMODE_LSEEK))
382 return -ESPIPE;
383 return file->f_op->llseek(file, offset, whence);
384 }
385 EXPORT_SYMBOL(vfs_llseek);
386
ksys_lseek(unsigned int fd,off_t offset,unsigned int whence)387 static off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence)
388 {
389 off_t retval;
390 struct fd f = fdget_pos(fd);
391 if (!fd_file(f))
392 return -EBADF;
393
394 retval = -EINVAL;
395 if (whence <= SEEK_MAX) {
396 loff_t res = vfs_llseek(fd_file(f), offset, whence);
397 retval = res;
398 if (res != (loff_t)retval)
399 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
400 }
401 fdput_pos(f);
402 return retval;
403 }
404
SYSCALL_DEFINE3(lseek,unsigned int,fd,off_t,offset,unsigned int,whence)405 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
406 {
407 return ksys_lseek(fd, offset, whence);
408 }
409
410 #ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE3(lseek,unsigned int,fd,compat_off_t,offset,unsigned int,whence)411 COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence)
412 {
413 return ksys_lseek(fd, offset, whence);
414 }
415 #endif
416
417 #if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT) || \
418 defined(__ARCH_WANT_SYS_LLSEEK)
SYSCALL_DEFINE5(llseek,unsigned int,fd,unsigned long,offset_high,unsigned long,offset_low,loff_t __user *,result,unsigned int,whence)419 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
420 unsigned long, offset_low, loff_t __user *, result,
421 unsigned int, whence)
422 {
423 int retval;
424 struct fd f = fdget_pos(fd);
425 loff_t offset;
426
427 if (!fd_file(f))
428 return -EBADF;
429
430 retval = -EINVAL;
431 if (whence > SEEK_MAX)
432 goto out_putf;
433
434 offset = vfs_llseek(fd_file(f), ((loff_t) offset_high << 32) | offset_low,
435 whence);
436
437 retval = (int)offset;
438 if (offset >= 0) {
439 retval = -EFAULT;
440 if (!copy_to_user(result, &offset, sizeof(offset)))
441 retval = 0;
442 }
443 out_putf:
444 fdput_pos(f);
445 return retval;
446 }
447 #endif
448
rw_verify_area(int read_write,struct file * file,const loff_t * ppos,size_t count)449 int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
450 {
451 int mask = read_write == READ ? MAY_READ : MAY_WRITE;
452 int ret;
453
454 if (unlikely((ssize_t) count < 0))
455 return -EINVAL;
456
457 if (ppos) {
458 loff_t pos = *ppos;
459
460 if (unlikely(pos < 0)) {
461 if (!unsigned_offsets(file))
462 return -EINVAL;
463 if (count >= -pos) /* both values are in 0..LLONG_MAX */
464 return -EOVERFLOW;
465 } else if (unlikely((loff_t) (pos + count) < 0)) {
466 if (!unsigned_offsets(file))
467 return -EINVAL;
468 }
469 }
470
471 ret = security_file_permission(file, mask);
472 if (ret)
473 return ret;
474
475 return fsnotify_file_area_perm(file, mask, ppos, count);
476 }
477 EXPORT_SYMBOL(rw_verify_area);
478
new_sync_read(struct file * filp,char __user * buf,size_t len,loff_t * ppos)479 static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
480 {
481 struct kiocb kiocb;
482 struct iov_iter iter;
483 ssize_t ret;
484
485 init_sync_kiocb(&kiocb, filp);
486 kiocb.ki_pos = (ppos ? *ppos : 0);
487 iov_iter_ubuf(&iter, ITER_DEST, buf, len);
488
489 ret = filp->f_op->read_iter(&kiocb, &iter);
490 BUG_ON(ret == -EIOCBQUEUED);
491 if (ppos)
492 *ppos = kiocb.ki_pos;
493 return ret;
494 }
495
warn_unsupported(struct file * file,const char * op)496 static int warn_unsupported(struct file *file, const char *op)
497 {
498 pr_warn_ratelimited(
499 "kernel %s not supported for file %pD4 (pid: %d comm: %.20s)\n",
500 op, file, current->pid, current->comm);
501 return -EINVAL;
502 }
503
__kernel_read(struct file * file,void * buf,size_t count,loff_t * pos)504 ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
505 {
506 struct kvec iov = {
507 .iov_base = buf,
508 .iov_len = min_t(size_t, count, MAX_RW_COUNT),
509 };
510 struct kiocb kiocb;
511 struct iov_iter iter;
512 ssize_t ret;
513
514 if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ)))
515 return -EINVAL;
516 if (!(file->f_mode & FMODE_CAN_READ))
517 return -EINVAL;
518 /*
519 * Also fail if ->read_iter and ->read are both wired up as that
520 * implies very convoluted semantics.
521 */
522 if (unlikely(!file->f_op->read_iter || file->f_op->read))
523 return warn_unsupported(file, "read");
524
525 init_sync_kiocb(&kiocb, file);
526 kiocb.ki_pos = pos ? *pos : 0;
527 iov_iter_kvec(&iter, ITER_DEST, &iov, 1, iov.iov_len);
528 ret = file->f_op->read_iter(&kiocb, &iter);
529 if (ret > 0) {
530 if (pos)
531 *pos = kiocb.ki_pos;
532 fsnotify_access(file);
533 add_rchar(current, ret);
534 }
535 inc_syscr(current);
536 return ret;
537 }
538
kernel_read(struct file * file,void * buf,size_t count,loff_t * pos)539 ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
540 {
541 ssize_t ret;
542
543 ret = rw_verify_area(READ, file, pos, count);
544 if (ret)
545 return ret;
546 return __kernel_read(file, buf, count, pos);
547 }
548 EXPORT_SYMBOL(kernel_read);
549
vfs_read(struct file * file,char __user * buf,size_t count,loff_t * pos)550 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
551 {
552 ssize_t ret;
553
554 if (!(file->f_mode & FMODE_READ))
555 return -EBADF;
556 if (!(file->f_mode & FMODE_CAN_READ))
557 return -EINVAL;
558 if (unlikely(!access_ok(buf, count)))
559 return -EFAULT;
560
561 ret = rw_verify_area(READ, file, pos, count);
562 if (ret)
563 return ret;
564 if (count > MAX_RW_COUNT)
565 count = MAX_RW_COUNT;
566
567 if (file->f_op->read)
568 ret = file->f_op->read(file, buf, count, pos);
569 else if (file->f_op->read_iter)
570 ret = new_sync_read(file, buf, count, pos);
571 else
572 ret = -EINVAL;
573 if (ret > 0) {
574 fsnotify_access(file);
575 add_rchar(current, ret);
576 }
577 inc_syscr(current);
578 return ret;
579 }
580
new_sync_write(struct file * filp,const char __user * buf,size_t len,loff_t * ppos)581 static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
582 {
583 struct kiocb kiocb;
584 struct iov_iter iter;
585 ssize_t ret;
586
587 init_sync_kiocb(&kiocb, filp);
588 kiocb.ki_pos = (ppos ? *ppos : 0);
589 iov_iter_ubuf(&iter, ITER_SOURCE, (void __user *)buf, len);
590
591 ret = filp->f_op->write_iter(&kiocb, &iter);
592 BUG_ON(ret == -EIOCBQUEUED);
593 if (ret > 0 && ppos)
594 *ppos = kiocb.ki_pos;
595 return ret;
596 }
597
598 /* caller is responsible for file_start_write/file_end_write */
__kernel_write_iter(struct file * file,struct iov_iter * from,loff_t * pos)599 ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos)
600 {
601 struct kiocb kiocb;
602 ssize_t ret;
603
604 if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE)))
605 return -EBADF;
606 if (!(file->f_mode & FMODE_CAN_WRITE))
607 return -EINVAL;
608 /*
609 * Also fail if ->write_iter and ->write are both wired up as that
610 * implies very convoluted semantics.
611 */
612 if (unlikely(!file->f_op->write_iter || file->f_op->write))
613 return warn_unsupported(file, "write");
614
615 init_sync_kiocb(&kiocb, file);
616 kiocb.ki_pos = pos ? *pos : 0;
617 ret = file->f_op->write_iter(&kiocb, from);
618 if (ret > 0) {
619 if (pos)
620 *pos = kiocb.ki_pos;
621 fsnotify_modify(file);
622 add_wchar(current, ret);
623 }
624 inc_syscw(current);
625 return ret;
626 }
627
628 /* caller is responsible for file_start_write/file_end_write */
__kernel_write(struct file * file,const void * buf,size_t count,loff_t * pos)629 ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
630 {
631 struct kvec iov = {
632 .iov_base = (void *)buf,
633 .iov_len = min_t(size_t, count, MAX_RW_COUNT),
634 };
635 struct iov_iter iter;
636 iov_iter_kvec(&iter, ITER_SOURCE, &iov, 1, iov.iov_len);
637 return __kernel_write_iter(file, &iter, pos);
638 }
639 /*
640 * This "EXPORT_SYMBOL_GPL()" is more of a "EXPORT_SYMBOL_DONTUSE()",
641 * but autofs is one of the few internal kernel users that actually
642 * wants this _and_ can be built as a module. So we need to export
643 * this symbol for autofs, even though it really isn't appropriate
644 * for any other kernel modules.
645 */
646 EXPORT_SYMBOL_GPL(__kernel_write);
647
kernel_write(struct file * file,const void * buf,size_t count,loff_t * pos)648 ssize_t kernel_write(struct file *file, const void *buf, size_t count,
649 loff_t *pos)
650 {
651 ssize_t ret;
652
653 ret = rw_verify_area(WRITE, file, pos, count);
654 if (ret)
655 return ret;
656
657 file_start_write(file);
658 ret = __kernel_write(file, buf, count, pos);
659 file_end_write(file);
660 return ret;
661 }
662 EXPORT_SYMBOL(kernel_write);
663
vfs_write(struct file * file,const char __user * buf,size_t count,loff_t * pos)664 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
665 {
666 ssize_t ret;
667
668 if (!(file->f_mode & FMODE_WRITE))
669 return -EBADF;
670 if (!(file->f_mode & FMODE_CAN_WRITE))
671 return -EINVAL;
672 if (unlikely(!access_ok(buf, count)))
673 return -EFAULT;
674
675 ret = rw_verify_area(WRITE, file, pos, count);
676 if (ret)
677 return ret;
678 if (count > MAX_RW_COUNT)
679 count = MAX_RW_COUNT;
680 file_start_write(file);
681 if (file->f_op->write)
682 ret = file->f_op->write(file, buf, count, pos);
683 else if (file->f_op->write_iter)
684 ret = new_sync_write(file, buf, count, pos);
685 else
686 ret = -EINVAL;
687 if (ret > 0) {
688 fsnotify_modify(file);
689 add_wchar(current, ret);
690 }
691 inc_syscw(current);
692 file_end_write(file);
693 return ret;
694 }
695
696 /* file_ppos returns &file->f_pos or NULL if file is stream */
file_ppos(struct file * file)697 static inline loff_t *file_ppos(struct file *file)
698 {
699 return file->f_mode & FMODE_STREAM ? NULL : &file->f_pos;
700 }
701
ksys_read(unsigned int fd,char __user * buf,size_t count)702 ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count)
703 {
704 struct fd f = fdget_pos(fd);
705 ssize_t ret = -EBADF;
706
707 if (fd_file(f)) {
708 loff_t pos, *ppos = file_ppos(fd_file(f));
709 if (ppos) {
710 pos = *ppos;
711 ppos = &pos;
712 }
713 ret = vfs_read(fd_file(f), buf, count, ppos);
714 if (ret >= 0 && ppos)
715 fd_file(f)->f_pos = pos;
716 fdput_pos(f);
717 }
718 return ret;
719 }
720
SYSCALL_DEFINE3(read,unsigned int,fd,char __user *,buf,size_t,count)721 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
722 {
723 return ksys_read(fd, buf, count);
724 }
725
ksys_write(unsigned int fd,const char __user * buf,size_t count)726 ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count)
727 {
728 struct fd f = fdget_pos(fd);
729 ssize_t ret = -EBADF;
730
731 if (fd_file(f)) {
732 loff_t pos, *ppos = file_ppos(fd_file(f));
733 if (ppos) {
734 pos = *ppos;
735 ppos = &pos;
736 }
737 ret = vfs_write(fd_file(f), buf, count, ppos);
738 if (ret >= 0 && ppos)
739 fd_file(f)->f_pos = pos;
740 fdput_pos(f);
741 }
742
743 return ret;
744 }
745
SYSCALL_DEFINE3(write,unsigned int,fd,const char __user *,buf,size_t,count)746 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
747 size_t, count)
748 {
749 return ksys_write(fd, buf, count);
750 }
751
ksys_pread64(unsigned int fd,char __user * buf,size_t count,loff_t pos)752 ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count,
753 loff_t pos)
754 {
755 struct fd f;
756 ssize_t ret = -EBADF;
757
758 if (pos < 0)
759 return -EINVAL;
760
761 f = fdget(fd);
762 if (fd_file(f)) {
763 ret = -ESPIPE;
764
765 /*
766 * If userspace thinks the pages are larger than they actually are,
767 * adjust the offset and count to compensate.
768 *
769 * NOTE: We only need to adjust the position here since pagemap_read()
770 * handles updating the count.
771 */
772 if (__is_emulated_pagemap_file(fd_file(f)))
773 pos *= __PAGE_SIZE / PAGE_SIZE;
774
775 if (fd_file(f)->f_mode & FMODE_PREAD)
776 ret = vfs_read(fd_file(f), buf, count, &pos);
777 fdput(f);
778 }
779
780 return ret;
781 }
782
SYSCALL_DEFINE4(pread64,unsigned int,fd,char __user *,buf,size_t,count,loff_t,pos)783 SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
784 size_t, count, loff_t, pos)
785 {
786 return ksys_pread64(fd, buf, count, pos);
787 }
788
789 #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_PREAD64)
COMPAT_SYSCALL_DEFINE5(pread64,unsigned int,fd,char __user *,buf,size_t,count,compat_arg_u64_dual (pos))790 COMPAT_SYSCALL_DEFINE5(pread64, unsigned int, fd, char __user *, buf,
791 size_t, count, compat_arg_u64_dual(pos))
792 {
793 return ksys_pread64(fd, buf, count, compat_arg_u64_glue(pos));
794 }
795 #endif
796
ksys_pwrite64(unsigned int fd,const char __user * buf,size_t count,loff_t pos)797 ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf,
798 size_t count, loff_t pos)
799 {
800 struct fd f;
801 ssize_t ret = -EBADF;
802
803 if (pos < 0)
804 return -EINVAL;
805
806 f = fdget(fd);
807 if (fd_file(f)) {
808 ret = -ESPIPE;
809 if (fd_file(f)->f_mode & FMODE_PWRITE)
810 ret = vfs_write(fd_file(f), buf, count, &pos);
811 fdput(f);
812 }
813
814 return ret;
815 }
816
SYSCALL_DEFINE4(pwrite64,unsigned int,fd,const char __user *,buf,size_t,count,loff_t,pos)817 SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
818 size_t, count, loff_t, pos)
819 {
820 return ksys_pwrite64(fd, buf, count, pos);
821 }
822
823 #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_PWRITE64)
COMPAT_SYSCALL_DEFINE5(pwrite64,unsigned int,fd,const char __user *,buf,size_t,count,compat_arg_u64_dual (pos))824 COMPAT_SYSCALL_DEFINE5(pwrite64, unsigned int, fd, const char __user *, buf,
825 size_t, count, compat_arg_u64_dual(pos))
826 {
827 return ksys_pwrite64(fd, buf, count, compat_arg_u64_glue(pos));
828 }
829 #endif
830
do_iter_readv_writev(struct file * filp,struct iov_iter * iter,loff_t * ppos,int type,rwf_t flags)831 static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
832 loff_t *ppos, int type, rwf_t flags)
833 {
834 struct kiocb kiocb;
835 ssize_t ret;
836
837 init_sync_kiocb(&kiocb, filp);
838 ret = kiocb_set_rw_flags(&kiocb, flags, type);
839 if (ret)
840 return ret;
841 kiocb.ki_pos = (ppos ? *ppos : 0);
842
843 if (type == READ)
844 ret = filp->f_op->read_iter(&kiocb, iter);
845 else
846 ret = filp->f_op->write_iter(&kiocb, iter);
847 BUG_ON(ret == -EIOCBQUEUED);
848 if (ppos)
849 *ppos = kiocb.ki_pos;
850 return ret;
851 }
852
853 /* Do it by hand, with file-ops */
do_loop_readv_writev(struct file * filp,struct iov_iter * iter,loff_t * ppos,int type,rwf_t flags)854 static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
855 loff_t *ppos, int type, rwf_t flags)
856 {
857 ssize_t ret = 0;
858
859 if (flags & ~RWF_HIPRI)
860 return -EOPNOTSUPP;
861
862 while (iov_iter_count(iter)) {
863 ssize_t nr;
864
865 if (type == READ) {
866 nr = filp->f_op->read(filp, iter_iov_addr(iter),
867 iter_iov_len(iter), ppos);
868 } else {
869 nr = filp->f_op->write(filp, iter_iov_addr(iter),
870 iter_iov_len(iter), ppos);
871 }
872
873 if (nr < 0) {
874 if (!ret)
875 ret = nr;
876 break;
877 }
878 ret += nr;
879 if (nr != iter_iov_len(iter))
880 break;
881 iov_iter_advance(iter, nr);
882 }
883
884 return ret;
885 }
886
vfs_iocb_iter_read(struct file * file,struct kiocb * iocb,struct iov_iter * iter)887 ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
888 struct iov_iter *iter)
889 {
890 size_t tot_len;
891 ssize_t ret = 0;
892
893 if (!file->f_op->read_iter)
894 return -EINVAL;
895 if (!(file->f_mode & FMODE_READ))
896 return -EBADF;
897 if (!(file->f_mode & FMODE_CAN_READ))
898 return -EINVAL;
899
900 tot_len = iov_iter_count(iter);
901 if (!tot_len)
902 goto out;
903 ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len);
904 if (ret < 0)
905 return ret;
906
907 ret = file->f_op->read_iter(iocb, iter);
908 out:
909 if (ret >= 0)
910 fsnotify_access(file);
911 return ret;
912 }
913 EXPORT_SYMBOL(vfs_iocb_iter_read);
914
vfs_iter_read(struct file * file,struct iov_iter * iter,loff_t * ppos,rwf_t flags)915 ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
916 rwf_t flags)
917 {
918 size_t tot_len;
919 ssize_t ret = 0;
920
921 if (!file->f_op->read_iter)
922 return -EINVAL;
923 if (!(file->f_mode & FMODE_READ))
924 return -EBADF;
925 if (!(file->f_mode & FMODE_CAN_READ))
926 return -EINVAL;
927
928 tot_len = iov_iter_count(iter);
929 if (!tot_len)
930 goto out;
931 ret = rw_verify_area(READ, file, ppos, tot_len);
932 if (ret < 0)
933 return ret;
934
935 ret = do_iter_readv_writev(file, iter, ppos, READ, flags);
936 out:
937 if (ret >= 0)
938 fsnotify_access(file);
939 return ret;
940 }
941 EXPORT_SYMBOL(vfs_iter_read);
942
943 /*
944 * Caller is responsible for calling kiocb_end_write() on completion
945 * if async iocb was queued.
946 */
vfs_iocb_iter_write(struct file * file,struct kiocb * iocb,struct iov_iter * iter)947 ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
948 struct iov_iter *iter)
949 {
950 size_t tot_len;
951 ssize_t ret = 0;
952
953 if (!file->f_op->write_iter)
954 return -EINVAL;
955 if (!(file->f_mode & FMODE_WRITE))
956 return -EBADF;
957 if (!(file->f_mode & FMODE_CAN_WRITE))
958 return -EINVAL;
959
960 tot_len = iov_iter_count(iter);
961 if (!tot_len)
962 return 0;
963 ret = rw_verify_area(WRITE, file, &iocb->ki_pos, tot_len);
964 if (ret < 0)
965 return ret;
966
967 kiocb_start_write(iocb);
968 ret = file->f_op->write_iter(iocb, iter);
969 if (ret != -EIOCBQUEUED)
970 kiocb_end_write(iocb);
971 if (ret > 0)
972 fsnotify_modify(file);
973
974 return ret;
975 }
976 EXPORT_SYMBOL(vfs_iocb_iter_write);
977
vfs_iter_write(struct file * file,struct iov_iter * iter,loff_t * ppos,rwf_t flags)978 ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
979 rwf_t flags)
980 {
981 size_t tot_len;
982 ssize_t ret;
983
984 if (!(file->f_mode & FMODE_WRITE))
985 return -EBADF;
986 if (!(file->f_mode & FMODE_CAN_WRITE))
987 return -EINVAL;
988 if (!file->f_op->write_iter)
989 return -EINVAL;
990
991 tot_len = iov_iter_count(iter);
992 if (!tot_len)
993 return 0;
994
995 ret = rw_verify_area(WRITE, file, ppos, tot_len);
996 if (ret < 0)
997 return ret;
998
999 file_start_write(file);
1000 ret = do_iter_readv_writev(file, iter, ppos, WRITE, flags);
1001 if (ret > 0)
1002 fsnotify_modify(file);
1003 file_end_write(file);
1004
1005 return ret;
1006 }
1007 EXPORT_SYMBOL(vfs_iter_write);
1008
vfs_readv(struct file * file,const struct iovec __user * vec,unsigned long vlen,loff_t * pos,rwf_t flags)1009 static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
1010 unsigned long vlen, loff_t *pos, rwf_t flags)
1011 {
1012 struct iovec iovstack[UIO_FASTIOV];
1013 struct iovec *iov = iovstack;
1014 struct iov_iter iter;
1015 size_t tot_len;
1016 ssize_t ret = 0;
1017
1018 if (!(file->f_mode & FMODE_READ))
1019 return -EBADF;
1020 if (!(file->f_mode & FMODE_CAN_READ))
1021 return -EINVAL;
1022
1023 ret = import_iovec(ITER_DEST, vec, vlen, ARRAY_SIZE(iovstack), &iov,
1024 &iter);
1025 if (ret < 0)
1026 return ret;
1027
1028 tot_len = iov_iter_count(&iter);
1029 if (!tot_len)
1030 goto out;
1031
1032 ret = rw_verify_area(READ, file, pos, tot_len);
1033 if (ret < 0)
1034 goto out;
1035
1036 if (file->f_op->read_iter)
1037 ret = do_iter_readv_writev(file, &iter, pos, READ, flags);
1038 else
1039 ret = do_loop_readv_writev(file, &iter, pos, READ, flags);
1040 out:
1041 if (ret >= 0)
1042 fsnotify_access(file);
1043 kfree(iov);
1044 return ret;
1045 }
1046
vfs_writev(struct file * file,const struct iovec __user * vec,unsigned long vlen,loff_t * pos,rwf_t flags)1047 static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
1048 unsigned long vlen, loff_t *pos, rwf_t flags)
1049 {
1050 struct iovec iovstack[UIO_FASTIOV];
1051 struct iovec *iov = iovstack;
1052 struct iov_iter iter;
1053 size_t tot_len;
1054 ssize_t ret = 0;
1055
1056 if (!(file->f_mode & FMODE_WRITE))
1057 return -EBADF;
1058 if (!(file->f_mode & FMODE_CAN_WRITE))
1059 return -EINVAL;
1060
1061 ret = import_iovec(ITER_SOURCE, vec, vlen, ARRAY_SIZE(iovstack), &iov,
1062 &iter);
1063 if (ret < 0)
1064 return ret;
1065
1066 tot_len = iov_iter_count(&iter);
1067 if (!tot_len)
1068 goto out;
1069
1070 ret = rw_verify_area(WRITE, file, pos, tot_len);
1071 if (ret < 0)
1072 goto out;
1073
1074 file_start_write(file);
1075 if (file->f_op->write_iter)
1076 ret = do_iter_readv_writev(file, &iter, pos, WRITE, flags);
1077 else
1078 ret = do_loop_readv_writev(file, &iter, pos, WRITE, flags);
1079 if (ret > 0)
1080 fsnotify_modify(file);
1081 file_end_write(file);
1082 out:
1083 kfree(iov);
1084 return ret;
1085 }
1086
do_readv(unsigned long fd,const struct iovec __user * vec,unsigned long vlen,rwf_t flags)1087 static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
1088 unsigned long vlen, rwf_t flags)
1089 {
1090 struct fd f = fdget_pos(fd);
1091 ssize_t ret = -EBADF;
1092
1093 if (fd_file(f)) {
1094 loff_t pos, *ppos = file_ppos(fd_file(f));
1095 if (ppos) {
1096 pos = *ppos;
1097 ppos = &pos;
1098 }
1099 ret = vfs_readv(fd_file(f), vec, vlen, ppos, flags);
1100 if (ret >= 0 && ppos)
1101 fd_file(f)->f_pos = pos;
1102 fdput_pos(f);
1103 }
1104
1105 if (ret > 0)
1106 add_rchar(current, ret);
1107 inc_syscr(current);
1108 return ret;
1109 }
1110
do_writev(unsigned long fd,const struct iovec __user * vec,unsigned long vlen,rwf_t flags)1111 static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec,
1112 unsigned long vlen, rwf_t flags)
1113 {
1114 struct fd f = fdget_pos(fd);
1115 ssize_t ret = -EBADF;
1116
1117 if (fd_file(f)) {
1118 loff_t pos, *ppos = file_ppos(fd_file(f));
1119 if (ppos) {
1120 pos = *ppos;
1121 ppos = &pos;
1122 }
1123 ret = vfs_writev(fd_file(f), vec, vlen, ppos, flags);
1124 if (ret >= 0 && ppos)
1125 fd_file(f)->f_pos = pos;
1126 fdput_pos(f);
1127 }
1128
1129 if (ret > 0)
1130 add_wchar(current, ret);
1131 inc_syscw(current);
1132 return ret;
1133 }
1134
pos_from_hilo(unsigned long high,unsigned long low)1135 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
1136 {
1137 #define HALF_LONG_BITS (BITS_PER_LONG / 2)
1138 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
1139 }
1140
do_preadv(unsigned long fd,const struct iovec __user * vec,unsigned long vlen,loff_t pos,rwf_t flags)1141 static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
1142 unsigned long vlen, loff_t pos, rwf_t flags)
1143 {
1144 struct fd f;
1145 ssize_t ret = -EBADF;
1146
1147 if (pos < 0)
1148 return -EINVAL;
1149
1150 f = fdget(fd);
1151 if (fd_file(f)) {
1152 ret = -ESPIPE;
1153 if (fd_file(f)->f_mode & FMODE_PREAD)
1154 ret = vfs_readv(fd_file(f), vec, vlen, &pos, flags);
1155 fdput(f);
1156 }
1157
1158 if (ret > 0)
1159 add_rchar(current, ret);
1160 inc_syscr(current);
1161 return ret;
1162 }
1163
do_pwritev(unsigned long fd,const struct iovec __user * vec,unsigned long vlen,loff_t pos,rwf_t flags)1164 static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec,
1165 unsigned long vlen, loff_t pos, rwf_t flags)
1166 {
1167 struct fd f;
1168 ssize_t ret = -EBADF;
1169
1170 if (pos < 0)
1171 return -EINVAL;
1172
1173 f = fdget(fd);
1174 if (fd_file(f)) {
1175 ret = -ESPIPE;
1176 if (fd_file(f)->f_mode & FMODE_PWRITE)
1177 ret = vfs_writev(fd_file(f), vec, vlen, &pos, flags);
1178 fdput(f);
1179 }
1180
1181 if (ret > 0)
1182 add_wchar(current, ret);
1183 inc_syscw(current);
1184 return ret;
1185 }
1186
SYSCALL_DEFINE3(readv,unsigned long,fd,const struct iovec __user *,vec,unsigned long,vlen)1187 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
1188 unsigned long, vlen)
1189 {
1190 return do_readv(fd, vec, vlen, 0);
1191 }
1192
SYSCALL_DEFINE3(writev,unsigned long,fd,const struct iovec __user *,vec,unsigned long,vlen)1193 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
1194 unsigned long, vlen)
1195 {
1196 return do_writev(fd, vec, vlen, 0);
1197 }
1198
SYSCALL_DEFINE5(preadv,unsigned long,fd,const struct iovec __user *,vec,unsigned long,vlen,unsigned long,pos_l,unsigned long,pos_h)1199 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
1200 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
1201 {
1202 loff_t pos = pos_from_hilo(pos_h, pos_l);
1203
1204 return do_preadv(fd, vec, vlen, pos, 0);
1205 }
1206
SYSCALL_DEFINE6(preadv2,unsigned long,fd,const struct iovec __user *,vec,unsigned long,vlen,unsigned long,pos_l,unsigned long,pos_h,rwf_t,flags)1207 SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec,
1208 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
1209 rwf_t, flags)
1210 {
1211 loff_t pos = pos_from_hilo(pos_h, pos_l);
1212
1213 if (pos == -1)
1214 return do_readv(fd, vec, vlen, flags);
1215
1216 return do_preadv(fd, vec, vlen, pos, flags);
1217 }
1218
SYSCALL_DEFINE5(pwritev,unsigned long,fd,const struct iovec __user *,vec,unsigned long,vlen,unsigned long,pos_l,unsigned long,pos_h)1219 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
1220 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
1221 {
1222 loff_t pos = pos_from_hilo(pos_h, pos_l);
1223
1224 return do_pwritev(fd, vec, vlen, pos, 0);
1225 }
1226
SYSCALL_DEFINE6(pwritev2,unsigned long,fd,const struct iovec __user *,vec,unsigned long,vlen,unsigned long,pos_l,unsigned long,pos_h,rwf_t,flags)1227 SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
1228 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
1229 rwf_t, flags)
1230 {
1231 loff_t pos = pos_from_hilo(pos_h, pos_l);
1232
1233 if (pos == -1)
1234 return do_writev(fd, vec, vlen, flags);
1235
1236 return do_pwritev(fd, vec, vlen, pos, flags);
1237 }
1238
1239 /*
1240 * Various compat syscalls. Note that they all pretend to take a native
1241 * iovec - import_iovec will properly treat those as compat_iovecs based on
1242 * in_compat_syscall().
1243 */
1244 #ifdef CONFIG_COMPAT
1245 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
COMPAT_SYSCALL_DEFINE4(preadv64,unsigned long,fd,const struct iovec __user *,vec,unsigned long,vlen,loff_t,pos)1246 COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
1247 const struct iovec __user *, vec,
1248 unsigned long, vlen, loff_t, pos)
1249 {
1250 return do_preadv(fd, vec, vlen, pos, 0);
1251 }
1252 #endif
1253
COMPAT_SYSCALL_DEFINE5(preadv,compat_ulong_t,fd,const struct iovec __user *,vec,compat_ulong_t,vlen,u32,pos_low,u32,pos_high)1254 COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
1255 const struct iovec __user *, vec,
1256 compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
1257 {
1258 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1259
1260 return do_preadv(fd, vec, vlen, pos, 0);
1261 }
1262
1263 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
COMPAT_SYSCALL_DEFINE5(preadv64v2,unsigned long,fd,const struct iovec __user *,vec,unsigned long,vlen,loff_t,pos,rwf_t,flags)1264 COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
1265 const struct iovec __user *, vec,
1266 unsigned long, vlen, loff_t, pos, rwf_t, flags)
1267 {
1268 if (pos == -1)
1269 return do_readv(fd, vec, vlen, flags);
1270 return do_preadv(fd, vec, vlen, pos, flags);
1271 }
1272 #endif
1273
COMPAT_SYSCALL_DEFINE6(preadv2,compat_ulong_t,fd,const struct iovec __user *,vec,compat_ulong_t,vlen,u32,pos_low,u32,pos_high,rwf_t,flags)1274 COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
1275 const struct iovec __user *, vec,
1276 compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
1277 rwf_t, flags)
1278 {
1279 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1280
1281 if (pos == -1)
1282 return do_readv(fd, vec, vlen, flags);
1283 return do_preadv(fd, vec, vlen, pos, flags);
1284 }
1285
1286 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64
COMPAT_SYSCALL_DEFINE4(pwritev64,unsigned long,fd,const struct iovec __user *,vec,unsigned long,vlen,loff_t,pos)1287 COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
1288 const struct iovec __user *, vec,
1289 unsigned long, vlen, loff_t, pos)
1290 {
1291 return do_pwritev(fd, vec, vlen, pos, 0);
1292 }
1293 #endif
1294
COMPAT_SYSCALL_DEFINE5(pwritev,compat_ulong_t,fd,const struct iovec __user *,vec,compat_ulong_t,vlen,u32,pos_low,u32,pos_high)1295 COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
1296 const struct iovec __user *,vec,
1297 compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
1298 {
1299 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1300
1301 return do_pwritev(fd, vec, vlen, pos, 0);
1302 }
1303
1304 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
COMPAT_SYSCALL_DEFINE5(pwritev64v2,unsigned long,fd,const struct iovec __user *,vec,unsigned long,vlen,loff_t,pos,rwf_t,flags)1305 COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
1306 const struct iovec __user *, vec,
1307 unsigned long, vlen, loff_t, pos, rwf_t, flags)
1308 {
1309 if (pos == -1)
1310 return do_writev(fd, vec, vlen, flags);
1311 return do_pwritev(fd, vec, vlen, pos, flags);
1312 }
1313 #endif
1314
COMPAT_SYSCALL_DEFINE6(pwritev2,compat_ulong_t,fd,const struct iovec __user *,vec,compat_ulong_t,vlen,u32,pos_low,u32,pos_high,rwf_t,flags)1315 COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
1316 const struct iovec __user *,vec,
1317 compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags)
1318 {
1319 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1320
1321 if (pos == -1)
1322 return do_writev(fd, vec, vlen, flags);
1323 return do_pwritev(fd, vec, vlen, pos, flags);
1324 }
1325 #endif /* CONFIG_COMPAT */
1326
do_sendfile(int out_fd,int in_fd,loff_t * ppos,size_t count,loff_t max)1327 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
1328 size_t count, loff_t max)
1329 {
1330 struct fd in, out;
1331 struct inode *in_inode, *out_inode;
1332 struct pipe_inode_info *opipe;
1333 loff_t pos;
1334 loff_t out_pos;
1335 ssize_t retval;
1336 int fl;
1337
1338 /*
1339 * Get input file, and verify that it is ok..
1340 */
1341 retval = -EBADF;
1342 in = fdget(in_fd);
1343 if (!fd_file(in))
1344 goto out;
1345 if (!(fd_file(in)->f_mode & FMODE_READ))
1346 goto fput_in;
1347 retval = -ESPIPE;
1348 if (!ppos) {
1349 pos = fd_file(in)->f_pos;
1350 } else {
1351 pos = *ppos;
1352 if (!(fd_file(in)->f_mode & FMODE_PREAD))
1353 goto fput_in;
1354 }
1355 retval = rw_verify_area(READ, fd_file(in), &pos, count);
1356 if (retval < 0)
1357 goto fput_in;
1358 if (count > MAX_RW_COUNT)
1359 count = MAX_RW_COUNT;
1360
1361 /*
1362 * Get output file, and verify that it is ok..
1363 */
1364 retval = -EBADF;
1365 out = fdget(out_fd);
1366 if (!fd_file(out))
1367 goto fput_in;
1368 if (!(fd_file(out)->f_mode & FMODE_WRITE))
1369 goto fput_out;
1370 in_inode = file_inode(fd_file(in));
1371 out_inode = file_inode(fd_file(out));
1372 out_pos = fd_file(out)->f_pos;
1373
1374 if (!max)
1375 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
1376
1377 if (unlikely(pos + count > max)) {
1378 retval = -EOVERFLOW;
1379 if (pos >= max)
1380 goto fput_out;
1381 count = max - pos;
1382 }
1383
1384 fl = 0;
1385 #if 0
1386 /*
1387 * We need to debate whether we can enable this or not. The
1388 * man page documents EAGAIN return for the output at least,
1389 * and the application is arguably buggy if it doesn't expect
1390 * EAGAIN on a non-blocking file descriptor.
1391 */
1392 if (fd_file(in)->f_flags & O_NONBLOCK)
1393 fl = SPLICE_F_NONBLOCK;
1394 #endif
1395 opipe = get_pipe_info(fd_file(out), true);
1396 if (!opipe) {
1397 retval = rw_verify_area(WRITE, fd_file(out), &out_pos, count);
1398 if (retval < 0)
1399 goto fput_out;
1400 retval = do_splice_direct(fd_file(in), &pos, fd_file(out), &out_pos,
1401 count, fl);
1402 } else {
1403 if (fd_file(out)->f_flags & O_NONBLOCK)
1404 fl |= SPLICE_F_NONBLOCK;
1405
1406 retval = splice_file_to_pipe(fd_file(in), opipe, &pos, count, fl);
1407 }
1408
1409 if (retval > 0) {
1410 add_rchar(current, retval);
1411 add_wchar(current, retval);
1412 fsnotify_access(fd_file(in));
1413 fsnotify_modify(fd_file(out));
1414 fd_file(out)->f_pos = out_pos;
1415 if (ppos)
1416 *ppos = pos;
1417 else
1418 fd_file(in)->f_pos = pos;
1419 }
1420
1421 inc_syscr(current);
1422 inc_syscw(current);
1423 if (pos > max)
1424 retval = -EOVERFLOW;
1425
1426 fput_out:
1427 fdput(out);
1428 fput_in:
1429 fdput(in);
1430 out:
1431 return retval;
1432 }
1433
SYSCALL_DEFINE4(sendfile,int,out_fd,int,in_fd,off_t __user *,offset,size_t,count)1434 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
1435 {
1436 loff_t pos;
1437 off_t off;
1438 ssize_t ret;
1439
1440 if (offset) {
1441 if (unlikely(get_user(off, offset)))
1442 return -EFAULT;
1443 pos = off;
1444 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
1445 if (unlikely(put_user(pos, offset)))
1446 return -EFAULT;
1447 return ret;
1448 }
1449
1450 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1451 }
1452
SYSCALL_DEFINE4(sendfile64,int,out_fd,int,in_fd,loff_t __user *,offset,size_t,count)1453 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
1454 {
1455 loff_t pos;
1456 ssize_t ret;
1457
1458 if (offset) {
1459 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
1460 return -EFAULT;
1461 ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
1462 if (unlikely(put_user(pos, offset)))
1463 return -EFAULT;
1464 return ret;
1465 }
1466
1467 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1468 }
1469
1470 #ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE4(sendfile,int,out_fd,int,in_fd,compat_off_t __user *,offset,compat_size_t,count)1471 COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd,
1472 compat_off_t __user *, offset, compat_size_t, count)
1473 {
1474 loff_t pos;
1475 off_t off;
1476 ssize_t ret;
1477
1478 if (offset) {
1479 if (unlikely(get_user(off, offset)))
1480 return -EFAULT;
1481 pos = off;
1482 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
1483 if (unlikely(put_user(pos, offset)))
1484 return -EFAULT;
1485 return ret;
1486 }
1487
1488 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1489 }
1490
COMPAT_SYSCALL_DEFINE4(sendfile64,int,out_fd,int,in_fd,compat_loff_t __user *,offset,compat_size_t,count)1491 COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
1492 compat_loff_t __user *, offset, compat_size_t, count)
1493 {
1494 loff_t pos;
1495 ssize_t ret;
1496
1497 if (offset) {
1498 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
1499 return -EFAULT;
1500 ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
1501 if (unlikely(put_user(pos, offset)))
1502 return -EFAULT;
1503 return ret;
1504 }
1505
1506 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1507 }
1508 #endif
1509
1510 /*
1511 * Performs necessary checks before doing a file copy
1512 *
1513 * Can adjust amount of bytes to copy via @req_count argument.
1514 * Returns appropriate error code that caller should return or
1515 * zero in case the copy should be allowed.
1516 */
generic_copy_file_checks(struct file * file_in,loff_t pos_in,struct file * file_out,loff_t pos_out,size_t * req_count,unsigned int flags)1517 static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
1518 struct file *file_out, loff_t pos_out,
1519 size_t *req_count, unsigned int flags)
1520 {
1521 struct inode *inode_in = file_inode(file_in);
1522 struct inode *inode_out = file_inode(file_out);
1523 uint64_t count = *req_count;
1524 loff_t size_in;
1525 int ret;
1526
1527 ret = generic_file_rw_checks(file_in, file_out);
1528 if (ret)
1529 return ret;
1530
1531 /*
1532 * We allow some filesystems to handle cross sb copy, but passing
1533 * a file of the wrong filesystem type to filesystem driver can result
1534 * in an attempt to dereference the wrong type of ->private_data, so
1535 * avoid doing that until we really have a good reason.
1536 *
1537 * nfs and cifs define several different file_system_type structures
1538 * and several different sets of file_operations, but they all end up
1539 * using the same ->copy_file_range() function pointer.
1540 */
1541 if (flags & COPY_FILE_SPLICE) {
1542 /* cross sb splice is allowed */
1543 } else if (file_out->f_op->copy_file_range) {
1544 if (file_in->f_op->copy_file_range !=
1545 file_out->f_op->copy_file_range)
1546 return -EXDEV;
1547 } else if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) {
1548 return -EXDEV;
1549 }
1550
1551 /* Don't touch certain kinds of inodes */
1552 if (IS_IMMUTABLE(inode_out))
1553 return -EPERM;
1554
1555 if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
1556 return -ETXTBSY;
1557
1558 /* Ensure offsets don't wrap. */
1559 if (pos_in + count < pos_in || pos_out + count < pos_out)
1560 return -EOVERFLOW;
1561
1562 /* Shorten the copy to EOF */
1563 size_in = i_size_read(inode_in);
1564 if (pos_in >= size_in)
1565 count = 0;
1566 else
1567 count = min(count, size_in - (uint64_t)pos_in);
1568
1569 ret = generic_write_check_limits(file_out, pos_out, &count);
1570 if (ret)
1571 return ret;
1572
1573 /* Don't allow overlapped copying within the same file. */
1574 if (inode_in == inode_out &&
1575 pos_out + count > pos_in &&
1576 pos_out < pos_in + count)
1577 return -EINVAL;
1578
1579 *req_count = count;
1580 return 0;
1581 }
1582
1583 /*
1584 * copy_file_range() differs from regular file read and write in that it
1585 * specifically allows return partial success. When it does so is up to
1586 * the copy_file_range method.
1587 */
vfs_copy_file_range(struct file * file_in,loff_t pos_in,struct file * file_out,loff_t pos_out,size_t len,unsigned int flags)1588 ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
1589 struct file *file_out, loff_t pos_out,
1590 size_t len, unsigned int flags)
1591 {
1592 ssize_t ret;
1593 bool splice = flags & COPY_FILE_SPLICE;
1594 bool samesb = file_inode(file_in)->i_sb == file_inode(file_out)->i_sb;
1595
1596 if (flags & ~COPY_FILE_SPLICE)
1597 return -EINVAL;
1598
1599 ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
1600 flags);
1601 if (unlikely(ret))
1602 return ret;
1603
1604 ret = rw_verify_area(READ, file_in, &pos_in, len);
1605 if (unlikely(ret))
1606 return ret;
1607
1608 ret = rw_verify_area(WRITE, file_out, &pos_out, len);
1609 if (unlikely(ret))
1610 return ret;
1611
1612 if (len == 0)
1613 return 0;
1614
1615 file_start_write(file_out);
1616
1617 /*
1618 * Cloning is supported by more file systems, so we implement copy on
1619 * same sb using clone, but for filesystems where both clone and copy
1620 * are supported (e.g. nfs,cifs), we only call the copy method.
1621 */
1622 if (!splice && file_out->f_op->copy_file_range) {
1623 ret = file_out->f_op->copy_file_range(file_in, pos_in,
1624 file_out, pos_out,
1625 len, flags);
1626 } else if (!splice && file_in->f_op->remap_file_range && samesb) {
1627 ret = file_in->f_op->remap_file_range(file_in, pos_in,
1628 file_out, pos_out,
1629 min_t(loff_t, MAX_RW_COUNT, len),
1630 REMAP_FILE_CAN_SHORTEN);
1631 /* fallback to splice */
1632 if (ret <= 0)
1633 splice = true;
1634 } else if (samesb) {
1635 /* Fallback to splice for same sb copy for backward compat */
1636 splice = true;
1637 }
1638
1639 file_end_write(file_out);
1640
1641 if (!splice)
1642 goto done;
1643
1644 /*
1645 * We can get here for same sb copy of filesystems that do not implement
1646 * ->copy_file_range() in case filesystem does not support clone or in
1647 * case filesystem supports clone but rejected the clone request (e.g.
1648 * because it was not block aligned).
1649 *
1650 * In both cases, fall back to kernel copy so we are able to maintain a
1651 * consistent story about which filesystems support copy_file_range()
1652 * and which filesystems do not, that will allow userspace tools to
1653 * make consistent desicions w.r.t using copy_file_range().
1654 *
1655 * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE
1656 * for server-side-copy between any two sb.
1657 *
1658 * In any case, we call do_splice_direct() and not splice_file_range(),
1659 * without file_start_write() held, to avoid possible deadlocks related
1660 * to splicing from input file, while file_start_write() is held on
1661 * the output file on a different sb.
1662 */
1663 ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
1664 min_t(size_t, len, MAX_RW_COUNT), 0);
1665 done:
1666 if (ret > 0) {
1667 fsnotify_access(file_in);
1668 add_rchar(current, ret);
1669 fsnotify_modify(file_out);
1670 add_wchar(current, ret);
1671 }
1672
1673 inc_syscr(current);
1674 inc_syscw(current);
1675
1676 return ret;
1677 }
1678 EXPORT_SYMBOL(vfs_copy_file_range);
1679
SYSCALL_DEFINE6(copy_file_range,int,fd_in,loff_t __user *,off_in,int,fd_out,loff_t __user *,off_out,size_t,len,unsigned int,flags)1680 SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
1681 int, fd_out, loff_t __user *, off_out,
1682 size_t, len, unsigned int, flags)
1683 {
1684 loff_t pos_in;
1685 loff_t pos_out;
1686 struct fd f_in;
1687 struct fd f_out;
1688 ssize_t ret = -EBADF;
1689
1690 f_in = fdget(fd_in);
1691 if (!fd_file(f_in))
1692 goto out2;
1693
1694 f_out = fdget(fd_out);
1695 if (!fd_file(f_out))
1696 goto out1;
1697
1698 ret = -EFAULT;
1699 if (off_in) {
1700 if (copy_from_user(&pos_in, off_in, sizeof(loff_t)))
1701 goto out;
1702 } else {
1703 pos_in = fd_file(f_in)->f_pos;
1704 }
1705
1706 if (off_out) {
1707 if (copy_from_user(&pos_out, off_out, sizeof(loff_t)))
1708 goto out;
1709 } else {
1710 pos_out = fd_file(f_out)->f_pos;
1711 }
1712
1713 ret = -EINVAL;
1714 if (flags != 0)
1715 goto out;
1716
1717 ret = vfs_copy_file_range(fd_file(f_in), pos_in, fd_file(f_out), pos_out, len,
1718 flags);
1719 if (ret > 0) {
1720 pos_in += ret;
1721 pos_out += ret;
1722
1723 if (off_in) {
1724 if (copy_to_user(off_in, &pos_in, sizeof(loff_t)))
1725 ret = -EFAULT;
1726 } else {
1727 fd_file(f_in)->f_pos = pos_in;
1728 }
1729
1730 if (off_out) {
1731 if (copy_to_user(off_out, &pos_out, sizeof(loff_t)))
1732 ret = -EFAULT;
1733 } else {
1734 fd_file(f_out)->f_pos = pos_out;
1735 }
1736 }
1737
1738 out:
1739 fdput(f_out);
1740 out1:
1741 fdput(f_in);
1742 out2:
1743 return ret;
1744 }
1745
1746 /*
1747 * Don't operate on ranges the page cache doesn't support, and don't exceed the
1748 * LFS limits. If pos is under the limit it becomes a short access. If it
1749 * exceeds the limit we return -EFBIG.
1750 */
generic_write_check_limits(struct file * file,loff_t pos,loff_t * count)1751 int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count)
1752 {
1753 struct inode *inode = file->f_mapping->host;
1754 loff_t max_size = inode->i_sb->s_maxbytes;
1755 loff_t limit = rlimit(RLIMIT_FSIZE);
1756
1757 if (limit != RLIM_INFINITY) {
1758 if (pos >= limit) {
1759 send_sig(SIGXFSZ, current, 0);
1760 return -EFBIG;
1761 }
1762 *count = min(*count, limit - pos);
1763 }
1764
1765 if (!(file->f_flags & O_LARGEFILE))
1766 max_size = MAX_NON_LFS;
1767
1768 if (unlikely(pos >= max_size))
1769 return -EFBIG;
1770
1771 *count = min(*count, max_size - pos);
1772
1773 return 0;
1774 }
1775 EXPORT_SYMBOL_GPL(generic_write_check_limits);
1776
1777 /* Like generic_write_checks(), but takes size of write instead of iter. */
generic_write_checks_count(struct kiocb * iocb,loff_t * count)1778 int generic_write_checks_count(struct kiocb *iocb, loff_t *count)
1779 {
1780 struct file *file = iocb->ki_filp;
1781 struct inode *inode = file->f_mapping->host;
1782
1783 if (IS_SWAPFILE(inode))
1784 return -ETXTBSY;
1785
1786 if (!*count)
1787 return 0;
1788
1789 if (iocb->ki_flags & IOCB_APPEND)
1790 iocb->ki_pos = i_size_read(inode);
1791
1792 if ((iocb->ki_flags & IOCB_NOWAIT) &&
1793 !((iocb->ki_flags & IOCB_DIRECT) ||
1794 (file->f_op->fop_flags & FOP_BUFFER_WASYNC)))
1795 return -EINVAL;
1796
1797 return generic_write_check_limits(iocb->ki_filp, iocb->ki_pos, count);
1798 }
1799 EXPORT_SYMBOL(generic_write_checks_count);
1800
1801 /*
1802 * Performs necessary checks before doing a write
1803 *
1804 * Can adjust writing position or amount of bytes to write.
1805 * Returns appropriate error code that caller should return or
1806 * zero in case that write should be allowed.
1807 */
generic_write_checks(struct kiocb * iocb,struct iov_iter * from)1808 ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
1809 {
1810 loff_t count = iov_iter_count(from);
1811 int ret;
1812
1813 ret = generic_write_checks_count(iocb, &count);
1814 if (ret)
1815 return ret;
1816
1817 iov_iter_truncate(from, count);
1818 return iov_iter_count(from);
1819 }
1820 EXPORT_SYMBOL(generic_write_checks);
1821
1822 /*
1823 * Performs common checks before doing a file copy/clone
1824 * from @file_in to @file_out.
1825 */
generic_file_rw_checks(struct file * file_in,struct file * file_out)1826 int generic_file_rw_checks(struct file *file_in, struct file *file_out)
1827 {
1828 struct inode *inode_in = file_inode(file_in);
1829 struct inode *inode_out = file_inode(file_out);
1830
1831 /* Don't copy dirs, pipes, sockets... */
1832 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
1833 return -EISDIR;
1834 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
1835 return -EINVAL;
1836
1837 if (!(file_in->f_mode & FMODE_READ) ||
1838 !(file_out->f_mode & FMODE_WRITE) ||
1839 (file_out->f_flags & O_APPEND))
1840 return -EBADF;
1841
1842 return 0;
1843 }
1844
generic_atomic_write_valid(struct kiocb * iocb,struct iov_iter * iter)1845 int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter)
1846 {
1847 size_t len = iov_iter_count(iter);
1848
1849 if (!iter_is_ubuf(iter))
1850 return -EINVAL;
1851
1852 if (!is_power_of_2(len))
1853 return -EINVAL;
1854
1855 if (!IS_ALIGNED(iocb->ki_pos, len))
1856 return -EINVAL;
1857
1858 if (!(iocb->ki_flags & IOCB_DIRECT))
1859 return -EOPNOTSUPP;
1860
1861 return 0;
1862 }
1863