1 /*
2 * linux/fs/open.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
6
7 #include <linux/string.h>
8 #include <linux/mm.h>
9 #include <linux/file.h>
10 #include <linux/fdtable.h>
11 #include <linux/fsnotify.h>
12 #include <linux/module.h>
13 #include <linux/tty.h>
14 #include <linux/namei.h>
15 #include <linux/backing-dev.h>
16 #include <linux/capability.h>
17 #include <linux/securebits.h>
18 #include <linux/security.h>
19 #include <linux/mount.h>
20 #include <linux/fcntl.h>
21 #include <linux/slab.h>
22 #include <asm/uaccess.h>
23 #include <linux/fs.h>
24 #include <linux/personality.h>
25 #include <linux/pagemap.h>
26 #include <linux/syscalls.h>
27 #include <linux/rcupdate.h>
28 #include <linux/audit.h>
29 #include <linux/falloc.h>
30 #include <linux/fs_struct.h>
31 #include <linux/ima.h>
32 #include <linux/dnotify.h>
33 #include <linux/compat.h>
34
35 #include "internal.h"
36
do_truncate2(struct vfsmount * mnt,struct dentry * dentry,loff_t length,unsigned int time_attrs,struct file * filp)37 int do_truncate2(struct vfsmount *mnt, struct dentry *dentry, loff_t length,
38 unsigned int time_attrs, struct file *filp)
39 {
40 int ret;
41 struct iattr newattrs;
42
43 /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
44 if (length < 0)
45 return -EINVAL;
46
47 newattrs.ia_size = length;
48 newattrs.ia_valid = ATTR_SIZE | time_attrs;
49 if (filp) {
50 newattrs.ia_file = filp;
51 newattrs.ia_valid |= ATTR_FILE;
52 }
53
54 /* Remove suid/sgid on truncate too */
55 ret = should_remove_suid(dentry);
56 if (ret)
57 newattrs.ia_valid |= ret | ATTR_FORCE;
58
59 mutex_lock(&dentry->d_inode->i_mutex);
60 /* Note any delegations or leases have already been broken: */
61 ret = notify_change2(mnt, dentry, &newattrs, NULL);
62 mutex_unlock(&dentry->d_inode->i_mutex);
63 return ret;
64 }
do_truncate(struct dentry * dentry,loff_t length,unsigned int time_attrs,struct file * filp)65 int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
66 struct file *filp)
67 {
68 return do_truncate2(NULL, dentry, length, time_attrs, filp);
69 }
70
vfs_truncate(struct path * path,loff_t length)71 long vfs_truncate(struct path *path, loff_t length)
72 {
73 struct inode *inode;
74 struct vfsmount *mnt;
75 long error;
76
77 inode = path->dentry->d_inode;
78 mnt = path->mnt;
79
80 /* For directories it's -EISDIR, for other non-regulars - -EINVAL */
81 if (S_ISDIR(inode->i_mode))
82 return -EISDIR;
83 if (!S_ISREG(inode->i_mode))
84 return -EINVAL;
85
86 error = mnt_want_write(path->mnt);
87 if (error)
88 goto out;
89
90 error = inode_permission2(mnt, inode, MAY_WRITE);
91 if (error)
92 goto mnt_drop_write_and_out;
93
94 error = -EPERM;
95 if (IS_APPEND(inode))
96 goto mnt_drop_write_and_out;
97
98 error = get_write_access(inode);
99 if (error)
100 goto mnt_drop_write_and_out;
101
102 /*
103 * Make sure that there are no leases. get_write_access() protects
104 * against the truncate racing with a lease-granting setlease().
105 */
106 error = break_lease(inode, O_WRONLY);
107 if (error)
108 goto put_write_and_out;
109
110 error = locks_verify_truncate(inode, NULL, length);
111 if (!error)
112 error = security_path_truncate(path);
113 if (!error)
114 error = do_truncate2(mnt, path->dentry, length, 0, NULL);
115
116 put_write_and_out:
117 put_write_access(inode);
118 mnt_drop_write_and_out:
119 mnt_drop_write(path->mnt);
120 out:
121 return error;
122 }
123 EXPORT_SYMBOL_GPL(vfs_truncate);
124
do_sys_truncate(const char __user * pathname,loff_t length)125 static long do_sys_truncate(const char __user *pathname, loff_t length)
126 {
127 unsigned int lookup_flags = LOOKUP_FOLLOW;
128 struct path path;
129 int error;
130
131 if (length < 0) /* sorry, but loff_t says... */
132 return -EINVAL;
133
134 retry:
135 error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
136 if (!error) {
137 error = vfs_truncate(&path, length);
138 path_put(&path);
139 }
140 if (retry_estale(error, lookup_flags)) {
141 lookup_flags |= LOOKUP_REVAL;
142 goto retry;
143 }
144 return error;
145 }
146
SYSCALL_DEFINE2(truncate,const char __user *,path,long,length)147 SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
148 {
149 return do_sys_truncate(path, length);
150 }
151
152 #ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE2(truncate,const char __user *,path,compat_off_t,length)153 COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length)
154 {
155 return do_sys_truncate(path, length);
156 }
157 #endif
158
do_sys_ftruncate(unsigned int fd,loff_t length,int small)159 static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
160 {
161 struct inode *inode;
162 struct dentry *dentry;
163 struct vfsmount *mnt;
164 struct fd f;
165 int error;
166
167 error = -EINVAL;
168 if (length < 0)
169 goto out;
170 error = -EBADF;
171 f = fdget(fd);
172 if (!f.file)
173 goto out;
174
175 /* explicitly opened as large or we are on 64-bit box */
176 if (f.file->f_flags & O_LARGEFILE)
177 small = 0;
178
179 dentry = f.file->f_path.dentry;
180 mnt = f.file->f_path.mnt;
181 inode = dentry->d_inode;
182 error = -EINVAL;
183 if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE))
184 goto out_putf;
185
186 error = -EINVAL;
187 /* Cannot ftruncate over 2^31 bytes without large file support */
188 if (small && length > MAX_NON_LFS)
189 goto out_putf;
190
191 error = -EPERM;
192 if (IS_APPEND(inode))
193 goto out_putf;
194
195 sb_start_write(inode->i_sb);
196 error = locks_verify_truncate(inode, f.file, length);
197 if (!error)
198 error = security_path_truncate(&f.file->f_path);
199 if (!error)
200 error = do_truncate2(mnt, dentry, length, ATTR_MTIME|ATTR_CTIME, f.file);
201 sb_end_write(inode->i_sb);
202 out_putf:
203 fdput(f);
204 out:
205 return error;
206 }
207
SYSCALL_DEFINE2(ftruncate,unsigned int,fd,unsigned long,length)208 SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length)
209 {
210 return do_sys_ftruncate(fd, length, 1);
211 }
212
213 #ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE2(ftruncate,unsigned int,fd,compat_ulong_t,length)214 COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_ulong_t, length)
215 {
216 return do_sys_ftruncate(fd, length, 1);
217 }
218 #endif
219
220 /* LFS versions of truncate are only needed on 32 bit machines */
221 #if BITS_PER_LONG == 32
SYSCALL_DEFINE2(truncate64,const char __user *,path,loff_t,length)222 SYSCALL_DEFINE2(truncate64, const char __user *, path, loff_t, length)
223 {
224 return do_sys_truncate(path, length);
225 }
226
SYSCALL_DEFINE2(ftruncate64,unsigned int,fd,loff_t,length)227 SYSCALL_DEFINE2(ftruncate64, unsigned int, fd, loff_t, length)
228 {
229 return do_sys_ftruncate(fd, length, 0);
230 }
231 #endif /* BITS_PER_LONG == 32 */
232
233
do_fallocate(struct file * file,int mode,loff_t offset,loff_t len)234 int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
235 {
236 struct inode *inode = file_inode(file);
237 long ret;
238
239 if (offset < 0 || len <= 0)
240 return -EINVAL;
241
242 /* Return error if mode is not supported */
243 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
244 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
245 return -EOPNOTSUPP;
246
247 /* Punch hole and zero range are mutually exclusive */
248 if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
249 (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
250 return -EOPNOTSUPP;
251
252 /* Punch hole must have keep size set */
253 if ((mode & FALLOC_FL_PUNCH_HOLE) &&
254 !(mode & FALLOC_FL_KEEP_SIZE))
255 return -EOPNOTSUPP;
256
257 /* Collapse range should only be used exclusively. */
258 if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
259 (mode & ~FALLOC_FL_COLLAPSE_RANGE))
260 return -EINVAL;
261
262 if (!(file->f_mode & FMODE_WRITE))
263 return -EBADF;
264
265 /*
266 * We can only allow pure fallocate on append only files
267 */
268 if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
269 return -EPERM;
270
271 if (IS_IMMUTABLE(inode))
272 return -EPERM;
273
274 /*
275 * We cannot allow any fallocate operation on an active swapfile
276 */
277 if (IS_SWAPFILE(inode))
278 return -ETXTBSY;
279
280 /*
281 * Revalidate the write permissions, in case security policy has
282 * changed since the files were opened.
283 */
284 ret = security_file_permission(file, MAY_WRITE);
285 if (ret)
286 return ret;
287
288 if (S_ISFIFO(inode->i_mode))
289 return -ESPIPE;
290
291 /*
292 * Let individual file system decide if it supports preallocation
293 * for directories or not.
294 */
295 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
296 return -ENODEV;
297
298 /* Check for wrap through zero too */
299 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
300 return -EFBIG;
301
302 if (!file->f_op->fallocate)
303 return -EOPNOTSUPP;
304
305 sb_start_write(inode->i_sb);
306 ret = file->f_op->fallocate(file, mode, offset, len);
307 sb_end_write(inode->i_sb);
308 return ret;
309 }
310
SYSCALL_DEFINE4(fallocate,int,fd,int,mode,loff_t,offset,loff_t,len)311 SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
312 {
313 struct fd f = fdget(fd);
314 int error = -EBADF;
315
316 if (f.file) {
317 error = do_fallocate(f.file, mode, offset, len);
318 fdput(f);
319 }
320 return error;
321 }
322
323 /*
324 * access() needs to use the real uid/gid, not the effective uid/gid.
325 * We do this by temporarily clearing all FS-related capabilities and
326 * switching the fsuid/fsgid around to the real ones.
327 */
SYSCALL_DEFINE3(faccessat,int,dfd,const char __user *,filename,int,mode)328 SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
329 {
330 const struct cred *old_cred;
331 struct cred *override_cred;
332 struct path path;
333 struct inode *inode;
334 struct vfsmount *mnt;
335 int res;
336 unsigned int lookup_flags = LOOKUP_FOLLOW;
337
338 if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
339 return -EINVAL;
340
341 override_cred = prepare_creds();
342 if (!override_cred)
343 return -ENOMEM;
344
345 override_cred->fsuid = override_cred->uid;
346 override_cred->fsgid = override_cred->gid;
347
348 if (!issecure(SECURE_NO_SETUID_FIXUP)) {
349 /* Clear the capabilities if we switch to a non-root user */
350 kuid_t root_uid = make_kuid(override_cred->user_ns, 0);
351 if (!uid_eq(override_cred->uid, root_uid))
352 cap_clear(override_cred->cap_effective);
353 else
354 override_cred->cap_effective =
355 override_cred->cap_permitted;
356 }
357
358 old_cred = override_creds(override_cred);
359 retry:
360 res = user_path_at(dfd, filename, lookup_flags, &path);
361 if (res)
362 goto out;
363
364 inode = path.dentry->d_inode;
365 mnt = path.mnt;
366
367 if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
368 /*
369 * MAY_EXEC on regular files is denied if the fs is mounted
370 * with the "noexec" flag.
371 */
372 res = -EACCES;
373 if (path.mnt->mnt_flags & MNT_NOEXEC)
374 goto out_path_release;
375 }
376
377 res = inode_permission2(mnt, inode, mode | MAY_ACCESS);
378 /* SuS v2 requires we report a read only fs too */
379 if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
380 goto out_path_release;
381 /*
382 * This is a rare case where using __mnt_is_readonly()
383 * is OK without a mnt_want/drop_write() pair. Since
384 * no actual write to the fs is performed here, we do
385 * not need to telegraph to that to anyone.
386 *
387 * By doing this, we accept that this access is
388 * inherently racy and know that the fs may change
389 * state before we even see this result.
390 */
391 if (__mnt_is_readonly(path.mnt))
392 res = -EROFS;
393
394 out_path_release:
395 path_put(&path);
396 if (retry_estale(res, lookup_flags)) {
397 lookup_flags |= LOOKUP_REVAL;
398 goto retry;
399 }
400 out:
401 revert_creds(old_cred);
402 put_cred(override_cred);
403 return res;
404 }
405
SYSCALL_DEFINE2(access,const char __user *,filename,int,mode)406 SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
407 {
408 return sys_faccessat(AT_FDCWD, filename, mode);
409 }
410
SYSCALL_DEFINE1(chdir,const char __user *,filename)411 SYSCALL_DEFINE1(chdir, const char __user *, filename)
412 {
413 struct path path;
414 int error;
415 unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
416 retry:
417 error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
418 if (error)
419 goto out;
420
421 error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
422 if (error)
423 goto dput_and_out;
424
425 set_fs_pwd(current->fs, &path);
426
427 dput_and_out:
428 path_put(&path);
429 if (retry_estale(error, lookup_flags)) {
430 lookup_flags |= LOOKUP_REVAL;
431 goto retry;
432 }
433 out:
434 return error;
435 }
436
SYSCALL_DEFINE1(fchdir,unsigned int,fd)437 SYSCALL_DEFINE1(fchdir, unsigned int, fd)
438 {
439 struct fd f = fdget_raw(fd);
440 struct inode *inode;
441 struct vfsmount *mnt;
442 int error = -EBADF;
443
444 error = -EBADF;
445 if (!f.file)
446 goto out;
447
448 inode = file_inode(f.file);
449 mnt = f.file->f_path.mnt;
450
451 error = -ENOTDIR;
452 if (!S_ISDIR(inode->i_mode))
453 goto out_putf;
454
455 error = inode_permission2(mnt, inode, MAY_EXEC | MAY_CHDIR);
456 if (!error)
457 set_fs_pwd(current->fs, &f.file->f_path);
458 out_putf:
459 fdput(f);
460 out:
461 return error;
462 }
463
SYSCALL_DEFINE1(chroot,const char __user *,filename)464 SYSCALL_DEFINE1(chroot, const char __user *, filename)
465 {
466 struct path path;
467 int error;
468 unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
469 retry:
470 error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
471 if (error)
472 goto out;
473
474 error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
475 if (error)
476 goto dput_and_out;
477
478 error = -EPERM;
479 if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT))
480 goto dput_and_out;
481 error = security_path_chroot(&path);
482 if (error)
483 goto dput_and_out;
484
485 set_fs_root(current->fs, &path);
486 error = 0;
487 dput_and_out:
488 path_put(&path);
489 if (retry_estale(error, lookup_flags)) {
490 lookup_flags |= LOOKUP_REVAL;
491 goto retry;
492 }
493 out:
494 return error;
495 }
496
chmod_common(struct path * path,umode_t mode)497 static int chmod_common(struct path *path, umode_t mode)
498 {
499 struct inode *inode = path->dentry->d_inode;
500 struct inode *delegated_inode = NULL;
501 struct iattr newattrs;
502 int error;
503
504 error = mnt_want_write(path->mnt);
505 if (error)
506 return error;
507 retry_deleg:
508 mutex_lock(&inode->i_mutex);
509 error = security_path_chmod(path, mode);
510 if (error)
511 goto out_unlock;
512 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
513 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
514 error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode);
515 out_unlock:
516 mutex_unlock(&inode->i_mutex);
517 if (delegated_inode) {
518 error = break_deleg_wait(&delegated_inode);
519 if (!error)
520 goto retry_deleg;
521 }
522 mnt_drop_write(path->mnt);
523 return error;
524 }
525
SYSCALL_DEFINE2(fchmod,unsigned int,fd,umode_t,mode)526 SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
527 {
528 struct fd f = fdget(fd);
529 int err = -EBADF;
530
531 if (f.file) {
532 audit_inode(NULL, f.file->f_path.dentry, 0);
533 err = chmod_common(&f.file->f_path, mode);
534 fdput(f);
535 }
536 return err;
537 }
538
SYSCALL_DEFINE3(fchmodat,int,dfd,const char __user *,filename,umode_t,mode)539 SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode)
540 {
541 struct path path;
542 int error;
543 unsigned int lookup_flags = LOOKUP_FOLLOW;
544 retry:
545 error = user_path_at(dfd, filename, lookup_flags, &path);
546 if (!error) {
547 error = chmod_common(&path, mode);
548 path_put(&path);
549 if (retry_estale(error, lookup_flags)) {
550 lookup_flags |= LOOKUP_REVAL;
551 goto retry;
552 }
553 }
554 return error;
555 }
556
SYSCALL_DEFINE2(chmod,const char __user *,filename,umode_t,mode)557 SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
558 {
559 return sys_fchmodat(AT_FDCWD, filename, mode);
560 }
561
chown_common(struct path * path,uid_t user,gid_t group)562 static int chown_common(struct path *path, uid_t user, gid_t group)
563 {
564 struct inode *inode = path->dentry->d_inode;
565 struct inode *delegated_inode = NULL;
566 int error;
567 struct iattr newattrs;
568 kuid_t uid;
569 kgid_t gid;
570
571 uid = make_kuid(current_user_ns(), user);
572 gid = make_kgid(current_user_ns(), group);
573
574 retry_deleg:
575 newattrs.ia_valid = ATTR_CTIME;
576 if (user != (uid_t) -1) {
577 if (!uid_valid(uid))
578 return -EINVAL;
579 newattrs.ia_valid |= ATTR_UID;
580 newattrs.ia_uid = uid;
581 }
582 if (group != (gid_t) -1) {
583 if (!gid_valid(gid))
584 return -EINVAL;
585 newattrs.ia_valid |= ATTR_GID;
586 newattrs.ia_gid = gid;
587 }
588 if (!S_ISDIR(inode->i_mode))
589 newattrs.ia_valid |=
590 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
591 mutex_lock(&inode->i_mutex);
592 error = security_path_chown(path, uid, gid);
593 if (!error)
594 error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode);
595 mutex_unlock(&inode->i_mutex);
596 if (delegated_inode) {
597 error = break_deleg_wait(&delegated_inode);
598 if (!error)
599 goto retry_deleg;
600 }
601 return error;
602 }
603
SYSCALL_DEFINE5(fchownat,int,dfd,const char __user *,filename,uid_t,user,gid_t,group,int,flag)604 SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
605 gid_t, group, int, flag)
606 {
607 struct path path;
608 int error = -EINVAL;
609 int lookup_flags;
610
611 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
612 goto out;
613
614 lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
615 if (flag & AT_EMPTY_PATH)
616 lookup_flags |= LOOKUP_EMPTY;
617 retry:
618 error = user_path_at(dfd, filename, lookup_flags, &path);
619 if (error)
620 goto out;
621 error = mnt_want_write(path.mnt);
622 if (error)
623 goto out_release;
624 error = chown_common(&path, user, group);
625 mnt_drop_write(path.mnt);
626 out_release:
627 path_put(&path);
628 if (retry_estale(error, lookup_flags)) {
629 lookup_flags |= LOOKUP_REVAL;
630 goto retry;
631 }
632 out:
633 return error;
634 }
635
SYSCALL_DEFINE3(chown,const char __user *,filename,uid_t,user,gid_t,group)636 SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
637 {
638 return sys_fchownat(AT_FDCWD, filename, user, group, 0);
639 }
640
SYSCALL_DEFINE3(lchown,const char __user *,filename,uid_t,user,gid_t,group)641 SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
642 {
643 return sys_fchownat(AT_FDCWD, filename, user, group,
644 AT_SYMLINK_NOFOLLOW);
645 }
646
SYSCALL_DEFINE3(fchown,unsigned int,fd,uid_t,user,gid_t,group)647 SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
648 {
649 struct fd f = fdget(fd);
650 int error = -EBADF;
651
652 if (!f.file)
653 goto out;
654
655 error = mnt_want_write_file(f.file);
656 if (error)
657 goto out_fput;
658 audit_inode(NULL, f.file->f_path.dentry, 0);
659 error = chown_common(&f.file->f_path, user, group);
660 mnt_drop_write_file(f.file);
661 out_fput:
662 fdput(f);
663 out:
664 return error;
665 }
666
open_check_o_direct(struct file * f)667 int open_check_o_direct(struct file *f)
668 {
669 /* NB: we're sure to have correct a_ops only after f_op->open */
670 if (f->f_flags & O_DIRECT) {
671 if (!f->f_mapping->a_ops ||
672 ((!f->f_mapping->a_ops->direct_IO) &&
673 (!f->f_mapping->a_ops->get_xip_mem))) {
674 return -EINVAL;
675 }
676 }
677 return 0;
678 }
679
do_dentry_open(struct file * f,struct inode * inode,int (* open)(struct inode *,struct file *),const struct cred * cred)680 static int do_dentry_open(struct file *f,
681 struct inode *inode,
682 int (*open)(struct inode *, struct file *),
683 const struct cred *cred)
684 {
685 static const struct file_operations empty_fops = {};
686 int error;
687
688 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
689 FMODE_PREAD | FMODE_PWRITE;
690
691 path_get(&f->f_path);
692 f->f_inode = inode;
693 f->f_mapping = inode->i_mapping;
694
695 if (unlikely(f->f_flags & O_PATH)) {
696 f->f_mode = FMODE_PATH;
697 f->f_op = &empty_fops;
698 return 0;
699 }
700
701 if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
702 error = get_write_access(inode);
703 if (unlikely(error))
704 goto cleanup_file;
705 error = __mnt_want_write(f->f_path.mnt);
706 if (unlikely(error)) {
707 put_write_access(inode);
708 goto cleanup_file;
709 }
710 f->f_mode |= FMODE_WRITER;
711 }
712
713 /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
714 if (S_ISREG(inode->i_mode))
715 f->f_mode |= FMODE_ATOMIC_POS;
716
717 f->f_op = fops_get(inode->i_fop);
718 if (unlikely(WARN_ON(!f->f_op))) {
719 error = -ENODEV;
720 goto cleanup_all;
721 }
722
723 error = security_file_open(f, cred);
724 if (error)
725 goto cleanup_all;
726
727 error = break_lease(inode, f->f_flags);
728 if (error)
729 goto cleanup_all;
730
731 if (!open)
732 open = f->f_op->open;
733 if (open) {
734 error = open(inode, f);
735 if (error)
736 goto cleanup_all;
737 }
738 if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
739 i_readcount_inc(inode);
740 if ((f->f_mode & FMODE_READ) &&
741 likely(f->f_op->read || f->f_op->aio_read || f->f_op->read_iter))
742 f->f_mode |= FMODE_CAN_READ;
743 if ((f->f_mode & FMODE_WRITE) &&
744 likely(f->f_op->write || f->f_op->aio_write || f->f_op->write_iter))
745 f->f_mode |= FMODE_CAN_WRITE;
746
747 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
748
749 file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
750
751 return 0;
752
753 cleanup_all:
754 fops_put(f->f_op);
755 if (f->f_mode & FMODE_WRITER) {
756 put_write_access(inode);
757 __mnt_drop_write(f->f_path.mnt);
758 }
759 cleanup_file:
760 path_put(&f->f_path);
761 f->f_path.mnt = NULL;
762 f->f_path.dentry = NULL;
763 f->f_inode = NULL;
764 return error;
765 }
766
767 /**
768 * finish_open - finish opening a file
769 * @file: file pointer
770 * @dentry: pointer to dentry
771 * @open: open callback
772 * @opened: state of open
773 *
774 * This can be used to finish opening a file passed to i_op->atomic_open().
775 *
776 * If the open callback is set to NULL, then the standard f_op->open()
777 * filesystem callback is substituted.
778 *
779 * NB: the dentry reference is _not_ consumed. If, for example, the dentry is
780 * the return value of d_splice_alias(), then the caller needs to perform dput()
781 * on it after finish_open().
782 *
783 * On successful return @file is a fully instantiated open file. After this, if
784 * an error occurs in ->atomic_open(), it needs to clean up with fput().
785 *
786 * Returns zero on success or -errno if the open failed.
787 */
finish_open(struct file * file,struct dentry * dentry,int (* open)(struct inode *,struct file *),int * opened)788 int finish_open(struct file *file, struct dentry *dentry,
789 int (*open)(struct inode *, struct file *),
790 int *opened)
791 {
792 int error;
793 BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
794
795 file->f_path.dentry = dentry;
796 error = do_dentry_open(file, d_backing_inode(dentry), open,
797 current_cred());
798 if (!error)
799 *opened |= FILE_OPENED;
800
801 return error;
802 }
803 EXPORT_SYMBOL(finish_open);
804
805 /**
806 * finish_no_open - finish ->atomic_open() without opening the file
807 *
808 * @file: file pointer
809 * @dentry: dentry or NULL (as returned from ->lookup())
810 *
811 * This can be used to set the result of a successful lookup in ->atomic_open().
812 *
813 * NB: unlike finish_open() this function does consume the dentry reference and
814 * the caller need not dput() it.
815 *
816 * Returns "1" which must be the return value of ->atomic_open() after having
817 * called this function.
818 */
finish_no_open(struct file * file,struct dentry * dentry)819 int finish_no_open(struct file *file, struct dentry *dentry)
820 {
821 file->f_path.dentry = dentry;
822 return 1;
823 }
824 EXPORT_SYMBOL(finish_no_open);
825
826 /**
827 * vfs_open - open the file at the given path
828 * @path: path to open
829 * @file: newly allocated file with f_flag initialized
830 * @cred: credentials to use
831 */
vfs_open(const struct path * path,struct file * file,const struct cred * cred)832 int vfs_open(const struct path *path, struct file *file,
833 const struct cred *cred)
834 {
835 struct dentry *dentry = path->dentry;
836 struct inode *inode = dentry->d_inode;
837
838 file->f_path = *path;
839 if (dentry->d_flags & DCACHE_OP_SELECT_INODE) {
840 inode = dentry->d_op->d_select_inode(dentry, file->f_flags);
841 if (IS_ERR(inode))
842 return PTR_ERR(inode);
843 }
844
845 return do_dentry_open(file, inode, NULL, cred);
846 }
847
dentry_open(const struct path * path,int flags,const struct cred * cred)848 struct file *dentry_open(const struct path *path, int flags,
849 const struct cred *cred)
850 {
851 int error;
852 struct file *f;
853
854 validate_creds(cred);
855
856 /* We must always pass in a valid mount pointer. */
857 BUG_ON(!path->mnt);
858
859 f = get_empty_filp();
860 if (!IS_ERR(f)) {
861 f->f_flags = flags;
862 error = vfs_open(path, f, cred);
863 if (!error) {
864 /* from now on we need fput() to dispose of f */
865 error = open_check_o_direct(f);
866 if (error) {
867 fput(f);
868 f = ERR_PTR(error);
869 }
870 } else {
871 put_filp(f);
872 f = ERR_PTR(error);
873 }
874 }
875 return f;
876 }
877 EXPORT_SYMBOL(dentry_open);
878
build_open_flags(int flags,umode_t mode,struct open_flags * op)879 static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
880 {
881 int lookup_flags = 0;
882 int acc_mode;
883
884 if (flags & (O_CREAT | __O_TMPFILE))
885 op->mode = (mode & S_IALLUGO) | S_IFREG;
886 else
887 op->mode = 0;
888
889 /* Must never be set by userspace */
890 flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC;
891
892 /*
893 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
894 * check for O_DSYNC if the need any syncing at all we enforce it's
895 * always set instead of having to deal with possibly weird behaviour
896 * for malicious applications setting only __O_SYNC.
897 */
898 if (flags & __O_SYNC)
899 flags |= O_DSYNC;
900
901 if (flags & __O_TMPFILE) {
902 if ((flags & O_TMPFILE_MASK) != O_TMPFILE)
903 return -EINVAL;
904 acc_mode = MAY_OPEN | ACC_MODE(flags);
905 if (!(acc_mode & MAY_WRITE))
906 return -EINVAL;
907 } else if (flags & O_PATH) {
908 /*
909 * If we have O_PATH in the open flag. Then we
910 * cannot have anything other than the below set of flags
911 */
912 flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
913 acc_mode = 0;
914 } else {
915 acc_mode = MAY_OPEN | ACC_MODE(flags);
916 }
917
918 op->open_flag = flags;
919
920 /* O_TRUNC implies we need access checks for write permissions */
921 if (flags & O_TRUNC)
922 acc_mode |= MAY_WRITE;
923
924 /* Allow the LSM permission hook to distinguish append
925 access from general write access. */
926 if (flags & O_APPEND)
927 acc_mode |= MAY_APPEND;
928
929 op->acc_mode = acc_mode;
930
931 op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
932
933 if (flags & O_CREAT) {
934 op->intent |= LOOKUP_CREATE;
935 if (flags & O_EXCL)
936 op->intent |= LOOKUP_EXCL;
937 }
938
939 if (flags & O_DIRECTORY)
940 lookup_flags |= LOOKUP_DIRECTORY;
941 if (!(flags & O_NOFOLLOW))
942 lookup_flags |= LOOKUP_FOLLOW;
943 op->lookup_flags = lookup_flags;
944 return 0;
945 }
946
947 /**
948 * file_open_name - open file and return file pointer
949 *
950 * @name: struct filename containing path to open
951 * @flags: open flags as per the open(2) second argument
952 * @mode: mode for the new file if O_CREAT is set, else ignored
953 *
954 * This is the helper to open a file from kernelspace if you really
955 * have to. But in generally you should not do this, so please move
956 * along, nothing to see here..
957 */
file_open_name(struct filename * name,int flags,umode_t mode)958 struct file *file_open_name(struct filename *name, int flags, umode_t mode)
959 {
960 struct open_flags op;
961 int err = build_open_flags(flags, mode, &op);
962 return err ? ERR_PTR(err) : do_filp_open(AT_FDCWD, name, &op);
963 }
964
965 /**
966 * filp_open - open file and return file pointer
967 *
968 * @filename: path to open
969 * @flags: open flags as per the open(2) second argument
970 * @mode: mode for the new file if O_CREAT is set, else ignored
971 *
972 * This is the helper to open a file from kernelspace if you really
973 * have to. But in generally you should not do this, so please move
974 * along, nothing to see here..
975 */
filp_open(const char * filename,int flags,umode_t mode)976 struct file *filp_open(const char *filename, int flags, umode_t mode)
977 {
978 struct filename name = {.name = filename};
979 return file_open_name(&name, flags, mode);
980 }
981 EXPORT_SYMBOL(filp_open);
982
file_open_root(struct dentry * dentry,struct vfsmount * mnt,const char * filename,int flags,umode_t mode)983 struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
984 const char *filename, int flags, umode_t mode)
985 {
986 struct open_flags op;
987 int err = build_open_flags(flags, mode, &op);
988 if (err)
989 return ERR_PTR(err);
990 if (!filename && (flags & O_DIRECTORY))
991 if (!dentry->d_inode->i_op->lookup)
992 return ERR_PTR(-ENOTDIR);
993 return do_file_open_root(dentry, mnt, filename, &op);
994 }
995 EXPORT_SYMBOL(file_open_root);
996
do_sys_open(int dfd,const char __user * filename,int flags,umode_t mode)997 long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
998 {
999 struct open_flags op;
1000 int fd = build_open_flags(flags, mode, &op);
1001 struct filename *tmp;
1002
1003 if (fd)
1004 return fd;
1005
1006 tmp = getname(filename);
1007 if (IS_ERR(tmp))
1008 return PTR_ERR(tmp);
1009
1010 fd = get_unused_fd_flags(flags);
1011 if (fd >= 0) {
1012 struct file *f = do_filp_open(dfd, tmp, &op);
1013 if (IS_ERR(f)) {
1014 put_unused_fd(fd);
1015 fd = PTR_ERR(f);
1016 } else {
1017 fsnotify_open(f);
1018 fd_install(fd, f);
1019 }
1020 }
1021 putname(tmp);
1022 return fd;
1023 }
1024
SYSCALL_DEFINE3(open,const char __user *,filename,int,flags,umode_t,mode)1025 SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
1026 {
1027 if (force_o_largefile())
1028 flags |= O_LARGEFILE;
1029
1030 return do_sys_open(AT_FDCWD, filename, flags, mode);
1031 }
1032
SYSCALL_DEFINE4(openat,int,dfd,const char __user *,filename,int,flags,umode_t,mode)1033 SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
1034 umode_t, mode)
1035 {
1036 if (force_o_largefile())
1037 flags |= O_LARGEFILE;
1038
1039 return do_sys_open(dfd, filename, flags, mode);
1040 }
1041
1042 #ifndef __alpha__
1043
1044 /*
1045 * For backward compatibility? Maybe this should be moved
1046 * into arch/i386 instead?
1047 */
SYSCALL_DEFINE2(creat,const char __user *,pathname,umode_t,mode)1048 SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
1049 {
1050 return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
1051 }
1052
1053 #endif
1054
1055 /*
1056 * "id" is the POSIX thread ID. We use the
1057 * files pointer for this..
1058 */
filp_close(struct file * filp,fl_owner_t id)1059 int filp_close(struct file *filp, fl_owner_t id)
1060 {
1061 int retval = 0;
1062
1063 if (!file_count(filp)) {
1064 printk(KERN_ERR "VFS: Close: file count is 0\n");
1065 return 0;
1066 }
1067
1068 if (filp->f_op->flush)
1069 retval = filp->f_op->flush(filp, id);
1070
1071 if (likely(!(filp->f_mode & FMODE_PATH))) {
1072 dnotify_flush(filp, id);
1073 locks_remove_posix(filp, id);
1074 }
1075 fput(filp);
1076 return retval;
1077 }
1078
1079 EXPORT_SYMBOL(filp_close);
1080
1081 /*
1082 * Careful here! We test whether the file pointer is NULL before
1083 * releasing the fd. This ensures that one clone task can't release
1084 * an fd while another clone is opening it.
1085 */
SYSCALL_DEFINE1(close,unsigned int,fd)1086 SYSCALL_DEFINE1(close, unsigned int, fd)
1087 {
1088 int retval = __close_fd(current->files, fd);
1089
1090 /* can't restart close syscall because file table entry was cleared */
1091 if (unlikely(retval == -ERESTARTSYS ||
1092 retval == -ERESTARTNOINTR ||
1093 retval == -ERESTARTNOHAND ||
1094 retval == -ERESTART_RESTARTBLOCK))
1095 retval = -EINTR;
1096
1097 return retval;
1098 }
1099 EXPORT_SYMBOL(sys_close);
1100
1101 /*
1102 * This routine simulates a hangup on the tty, to arrange that users
1103 * are given clean terminals at login time.
1104 */
SYSCALL_DEFINE0(vhangup)1105 SYSCALL_DEFINE0(vhangup)
1106 {
1107 if (capable(CAP_SYS_TTY_CONFIG)) {
1108 tty_vhangup_self();
1109 return 0;
1110 }
1111 return -EPERM;
1112 }
1113
1114 /*
1115 * Called when an inode is about to be open.
1116 * We use this to disallow opening large files on 32bit systems if
1117 * the caller didn't specify O_LARGEFILE. On 64bit systems we force
1118 * on this flag in sys_open.
1119 */
generic_file_open(struct inode * inode,struct file * filp)1120 int generic_file_open(struct inode * inode, struct file * filp)
1121 {
1122 if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
1123 return -EOVERFLOW;
1124 return 0;
1125 }
1126
1127 EXPORT_SYMBOL(generic_file_open);
1128
1129 /*
1130 * This is used by subsystems that don't want seekable
1131 * file descriptors. The function is not supposed to ever fail, the only
1132 * reason it returns an 'int' and not 'void' is so that it can be plugged
1133 * directly into file_operations structure.
1134 */
nonseekable_open(struct inode * inode,struct file * filp)1135 int nonseekable_open(struct inode *inode, struct file *filp)
1136 {
1137 filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1138 return 0;
1139 }
1140
1141 EXPORT_SYMBOL(nonseekable_open);
1142