• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   *  linux/fs/open.c
4   *
5   *  Copyright (C) 1991, 1992  Linus Torvalds
6   */
7  
8  #include <linux/string.h>
9  #include <linux/mm.h>
10  #include <linux/file.h>
11  #include <linux/fdtable.h>
12  #include <linux/fsnotify.h>
13  #include <linux/module.h>
14  #include <linux/tty.h>
15  #include <linux/namei.h>
16  #include <linux/backing-dev.h>
17  #include <linux/capability.h>
18  #include <linux/securebits.h>
19  #include <linux/security.h>
20  #include <linux/mount.h>
21  #include <linux/fcntl.h>
22  #include <linux/slab.h>
23  #include <linux/uaccess.h>
24  #include <linux/fs.h>
25  #include <linux/personality.h>
26  #include <linux/pagemap.h>
27  #include <linux/syscalls.h>
28  #include <linux/rcupdate.h>
29  #include <linux/audit.h>
30  #include <linux/falloc.h>
31  #include <linux/fs_struct.h>
32  #include <linux/ima.h>
33  #include <linux/dnotify.h>
34  #include <linux/compat.h>
35  
36  #include "internal.h"
37  
do_truncate(struct dentry * dentry,loff_t length,unsigned int time_attrs,struct file * filp)38  int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
39  	struct file *filp)
40  {
41  	int ret;
42  	struct iattr newattrs;
43  
44  	/* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
45  	if (length < 0)
46  		return -EINVAL;
47  
48  	newattrs.ia_size = length;
49  	newattrs.ia_valid = ATTR_SIZE | time_attrs;
50  	if (filp) {
51  		newattrs.ia_file = filp;
52  		newattrs.ia_valid |= ATTR_FILE;
53  	}
54  
55  	/* Remove suid, sgid, and file capabilities on truncate too */
56  	ret = dentry_needs_remove_privs(dentry);
57  	if (ret < 0)
58  		return ret;
59  	if (ret)
60  		newattrs.ia_valid |= ret | ATTR_FORCE;
61  
62  	inode_lock(dentry->d_inode);
63  	/* Note any delegations or leases have already been broken: */
64  	ret = notify_change(dentry, &newattrs, NULL);
65  	inode_unlock(dentry->d_inode);
66  	return ret;
67  }
68  
vfs_truncate(const struct path * path,loff_t length)69  long vfs_truncate(const struct path *path, loff_t length)
70  {
71  	struct inode *inode;
72  	long error;
73  
74  	inode = path->dentry->d_inode;
75  
76  	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
77  	if (S_ISDIR(inode->i_mode))
78  		return -EISDIR;
79  	if (!S_ISREG(inode->i_mode))
80  		return -EINVAL;
81  
82  	error = mnt_want_write(path->mnt);
83  	if (error)
84  		goto out;
85  
86  	error = inode_permission(inode, MAY_WRITE);
87  	if (error)
88  		goto mnt_drop_write_and_out;
89  
90  	error = -EPERM;
91  	if (IS_APPEND(inode))
92  		goto mnt_drop_write_and_out;
93  
94  	error = get_write_access(inode);
95  	if (error)
96  		goto mnt_drop_write_and_out;
97  
98  	/*
99  	 * Make sure that there are no leases.  get_write_access() protects
100  	 * against the truncate racing with a lease-granting setlease().
101  	 */
102  	error = break_lease(inode, O_WRONLY);
103  	if (error)
104  		goto put_write_and_out;
105  
106  	error = locks_verify_truncate(inode, NULL, length);
107  	if (!error)
108  		error = security_path_truncate(path);
109  	if (!error)
110  		error = do_truncate(path->dentry, length, 0, NULL);
111  
112  put_write_and_out:
113  	put_write_access(inode);
114  mnt_drop_write_and_out:
115  	mnt_drop_write(path->mnt);
116  out:
117  	return error;
118  }
119  EXPORT_SYMBOL_GPL(vfs_truncate);
120  
do_sys_truncate(const char __user * pathname,loff_t length)121  long do_sys_truncate(const char __user *pathname, loff_t length)
122  {
123  	unsigned int lookup_flags = LOOKUP_FOLLOW;
124  	struct path path;
125  	int error;
126  
127  	if (length < 0)	/* sorry, but loff_t says... */
128  		return -EINVAL;
129  
130  retry:
131  	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
132  	if (!error) {
133  		error = vfs_truncate(&path, length);
134  		path_put(&path);
135  	}
136  	if (retry_estale(error, lookup_flags)) {
137  		lookup_flags |= LOOKUP_REVAL;
138  		goto retry;
139  	}
140  	return error;
141  }
142  
SYSCALL_DEFINE2(truncate,const char __user *,path,long,length)143  SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
144  {
145  	return do_sys_truncate(path, length);
146  }
147  
148  #ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE2(truncate,const char __user *,path,compat_off_t,length)149  COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length)
150  {
151  	return do_sys_truncate(path, length);
152  }
153  #endif
154  
do_sys_ftruncate(unsigned int fd,loff_t length,int small)155  long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
156  {
157  	struct inode *inode;
158  	struct dentry *dentry;
159  	struct fd f;
160  	int error;
161  
162  	error = -EINVAL;
163  	if (length < 0)
164  		goto out;
165  	error = -EBADF;
166  	f = fdget(fd);
167  	if (!f.file)
168  		goto out;
169  
170  	/* explicitly opened as large or we are on 64-bit box */
171  	if (f.file->f_flags & O_LARGEFILE)
172  		small = 0;
173  
174  	dentry = f.file->f_path.dentry;
175  	inode = dentry->d_inode;
176  	error = -EINVAL;
177  	if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE))
178  		goto out_putf;
179  
180  	error = -EINVAL;
181  	/* Cannot ftruncate over 2^31 bytes without large file support */
182  	if (small && length > MAX_NON_LFS)
183  		goto out_putf;
184  
185  	error = -EPERM;
186  	/* Check IS_APPEND on real upper inode */
187  	if (IS_APPEND(file_inode(f.file)))
188  		goto out_putf;
189  
190  	sb_start_write(inode->i_sb);
191  	error = locks_verify_truncate(inode, f.file, length);
192  	if (!error)
193  		error = security_path_truncate(&f.file->f_path);
194  	if (!error)
195  		error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file);
196  	sb_end_write(inode->i_sb);
197  out_putf:
198  	fdput(f);
199  out:
200  	return error;
201  }
202  
SYSCALL_DEFINE2(ftruncate,unsigned int,fd,unsigned long,length)203  SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length)
204  {
205  	return do_sys_ftruncate(fd, length, 1);
206  }
207  
208  #ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE2(ftruncate,unsigned int,fd,compat_ulong_t,length)209  COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_ulong_t, length)
210  {
211  	return do_sys_ftruncate(fd, length, 1);
212  }
213  #endif
214  
215  /* LFS versions of truncate are only needed on 32 bit machines */
216  #if BITS_PER_LONG == 32
SYSCALL_DEFINE2(truncate64,const char __user *,path,loff_t,length)217  SYSCALL_DEFINE2(truncate64, const char __user *, path, loff_t, length)
218  {
219  	return do_sys_truncate(path, length);
220  }
221  
SYSCALL_DEFINE2(ftruncate64,unsigned int,fd,loff_t,length)222  SYSCALL_DEFINE2(ftruncate64, unsigned int, fd, loff_t, length)
223  {
224  	return do_sys_ftruncate(fd, length, 0);
225  }
226  #endif /* BITS_PER_LONG == 32 */
227  
228  
vfs_fallocate(struct file * file,int mode,loff_t offset,loff_t len)229  int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
230  {
231  	struct inode *inode = file_inode(file);
232  	long ret;
233  
234  	if (offset < 0 || len <= 0)
235  		return -EINVAL;
236  
237  	/* Return error if mode is not supported */
238  	if (mode & ~FALLOC_FL_SUPPORTED_MASK)
239  		return -EOPNOTSUPP;
240  
241  	/* Punch hole and zero range are mutually exclusive */
242  	if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
243  	    (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
244  		return -EOPNOTSUPP;
245  
246  	/* Punch hole must have keep size set */
247  	if ((mode & FALLOC_FL_PUNCH_HOLE) &&
248  	    !(mode & FALLOC_FL_KEEP_SIZE))
249  		return -EOPNOTSUPP;
250  
251  	/* Collapse range should only be used exclusively. */
252  	if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
253  	    (mode & ~FALLOC_FL_COLLAPSE_RANGE))
254  		return -EINVAL;
255  
256  	/* Insert range should only be used exclusively. */
257  	if ((mode & FALLOC_FL_INSERT_RANGE) &&
258  	    (mode & ~FALLOC_FL_INSERT_RANGE))
259  		return -EINVAL;
260  
261  	/* Unshare range should only be used with allocate mode. */
262  	if ((mode & FALLOC_FL_UNSHARE_RANGE) &&
263  	    (mode & ~(FALLOC_FL_UNSHARE_RANGE | FALLOC_FL_KEEP_SIZE)))
264  		return -EINVAL;
265  
266  	if (!(file->f_mode & FMODE_WRITE))
267  		return -EBADF;
268  
269  	/*
270  	 * We can only allow pure fallocate on append only files
271  	 */
272  	if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
273  		return -EPERM;
274  
275  	if (IS_IMMUTABLE(inode))
276  		return -EPERM;
277  
278  	/*
279  	 * We cannot allow any fallocate operation on an active swapfile
280  	 */
281  	if (IS_SWAPFILE(inode))
282  		return -ETXTBSY;
283  
284  	/*
285  	 * Revalidate the write permissions, in case security policy has
286  	 * changed since the files were opened.
287  	 */
288  	ret = security_file_permission(file, MAY_WRITE);
289  	if (ret)
290  		return ret;
291  
292  	if (S_ISFIFO(inode->i_mode))
293  		return -ESPIPE;
294  
295  	if (S_ISDIR(inode->i_mode))
296  		return -EISDIR;
297  
298  	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
299  		return -ENODEV;
300  
301  	/* Check for wrap through zero too */
302  	if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
303  		return -EFBIG;
304  
305  	if (!file->f_op->fallocate)
306  		return -EOPNOTSUPP;
307  
308  	file_start_write(file);
309  	ret = file->f_op->fallocate(file, mode, offset, len);
310  
311  	/*
312  	 * Create inotify and fanotify events.
313  	 *
314  	 * To keep the logic simple always create events if fallocate succeeds.
315  	 * This implies that events are even created if the file size remains
316  	 * unchanged, e.g. when using flag FALLOC_FL_KEEP_SIZE.
317  	 */
318  	if (ret == 0)
319  		fsnotify_modify(file);
320  
321  	file_end_write(file);
322  	return ret;
323  }
324  EXPORT_SYMBOL_GPL(vfs_fallocate);
325  
ksys_fallocate(int fd,int mode,loff_t offset,loff_t len)326  int ksys_fallocate(int fd, int mode, loff_t offset, loff_t len)
327  {
328  	struct fd f = fdget(fd);
329  	int error = -EBADF;
330  
331  	if (f.file) {
332  		error = vfs_fallocate(f.file, mode, offset, len);
333  		fdput(f);
334  	}
335  	return error;
336  }
337  
SYSCALL_DEFINE4(fallocate,int,fd,int,mode,loff_t,offset,loff_t,len)338  SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
339  {
340  	return ksys_fallocate(fd, mode, offset, len);
341  }
342  
343  /*
344   * access() needs to use the real uid/gid, not the effective uid/gid.
345   * We do this by temporarily clearing all FS-related capabilities and
346   * switching the fsuid/fsgid around to the real ones.
347   */
access_override_creds(void)348  static const struct cred *access_override_creds(void)
349  {
350  	const struct cred *old_cred;
351  	struct cred *override_cred;
352  
353  	override_cred = prepare_creds();
354  	if (!override_cred)
355  		return NULL;
356  
357  	override_cred->fsuid = override_cred->uid;
358  	override_cred->fsgid = override_cred->gid;
359  
360  	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
361  		/* Clear the capabilities if we switch to a non-root user */
362  		kuid_t root_uid = make_kuid(override_cred->user_ns, 0);
363  		if (!uid_eq(override_cred->uid, root_uid))
364  			cap_clear(override_cred->cap_effective);
365  		else
366  			override_cred->cap_effective =
367  				override_cred->cap_permitted;
368  	}
369  
370  	/*
371  	 * The new set of credentials can *only* be used in
372  	 * task-synchronous circumstances, and does not need
373  	 * RCU freeing, unless somebody then takes a separate
374  	 * reference to it.
375  	 *
376  	 * NOTE! This is _only_ true because this credential
377  	 * is used purely for override_creds() that installs
378  	 * it as the subjective cred. Other threads will be
379  	 * accessing ->real_cred, not the subjective cred.
380  	 *
381  	 * If somebody _does_ make a copy of this (using the
382  	 * 'get_current_cred()' function), that will clear the
383  	 * non_rcu field, because now that other user may be
384  	 * expecting RCU freeing. But normal thread-synchronous
385  	 * cred accesses will keep things non-RCY.
386  	 */
387  	override_cred->non_rcu = 1;
388  
389  	old_cred = override_creds(override_cred);
390  
391  	/* override_cred() gets its own ref */
392  	put_cred(override_cred);
393  
394  	return old_cred;
395  }
396  
do_faccessat(int dfd,const char __user * filename,int mode,int flags)397  static long do_faccessat(int dfd, const char __user *filename, int mode, int flags)
398  {
399  	struct path path;
400  	struct inode *inode;
401  	int res;
402  	unsigned int lookup_flags = LOOKUP_FOLLOW;
403  	const struct cred *old_cred = NULL;
404  
405  	if (mode & ~S_IRWXO)	/* where's F_OK, X_OK, W_OK, R_OK? */
406  		return -EINVAL;
407  
408  	if (flags & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH))
409  		return -EINVAL;
410  
411  	if (flags & AT_SYMLINK_NOFOLLOW)
412  		lookup_flags &= ~LOOKUP_FOLLOW;
413  	if (flags & AT_EMPTY_PATH)
414  		lookup_flags |= LOOKUP_EMPTY;
415  
416  	if (!(flags & AT_EACCESS)) {
417  		old_cred = access_override_creds();
418  		if (!old_cred)
419  			return -ENOMEM;
420  	}
421  
422  retry:
423  	res = user_path_at(dfd, filename, lookup_flags, &path);
424  	if (res)
425  		goto out;
426  
427  	inode = d_backing_inode(path.dentry);
428  
429  	if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
430  		/*
431  		 * MAY_EXEC on regular files is denied if the fs is mounted
432  		 * with the "noexec" flag.
433  		 */
434  		res = -EACCES;
435  		if (path_noexec(&path))
436  			goto out_path_release;
437  	}
438  
439  	res = inode_permission(inode, mode | MAY_ACCESS);
440  	/* SuS v2 requires we report a read only fs too */
441  	if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
442  		goto out_path_release;
443  	/*
444  	 * This is a rare case where using __mnt_is_readonly()
445  	 * is OK without a mnt_want/drop_write() pair.  Since
446  	 * no actual write to the fs is performed here, we do
447  	 * not need to telegraph to that to anyone.
448  	 *
449  	 * By doing this, we accept that this access is
450  	 * inherently racy and know that the fs may change
451  	 * state before we even see this result.
452  	 */
453  	if (__mnt_is_readonly(path.mnt))
454  		res = -EROFS;
455  
456  out_path_release:
457  	path_put(&path);
458  	if (retry_estale(res, lookup_flags)) {
459  		lookup_flags |= LOOKUP_REVAL;
460  		goto retry;
461  	}
462  out:
463  	if (old_cred)
464  		revert_creds(old_cred);
465  
466  	return res;
467  }
468  
SYSCALL_DEFINE3(faccessat,int,dfd,const char __user *,filename,int,mode)469  SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
470  {
471  	return do_faccessat(dfd, filename, mode, 0);
472  }
473  
SYSCALL_DEFINE4(faccessat2,int,dfd,const char __user *,filename,int,mode,int,flags)474  SYSCALL_DEFINE4(faccessat2, int, dfd, const char __user *, filename, int, mode,
475  		int, flags)
476  {
477  	return do_faccessat(dfd, filename, mode, flags);
478  }
479  
SYSCALL_DEFINE2(access,const char __user *,filename,int,mode)480  SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
481  {
482  	return do_faccessat(AT_FDCWD, filename, mode, 0);
483  }
484  
SYSCALL_DEFINE1(chdir,const char __user *,filename)485  SYSCALL_DEFINE1(chdir, const char __user *, filename)
486  {
487  	struct path path;
488  	int error;
489  	unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
490  retry:
491  	error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
492  	if (error)
493  		goto out;
494  
495  	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
496  	if (error)
497  		goto dput_and_out;
498  
499  	set_fs_pwd(current->fs, &path);
500  
501  dput_and_out:
502  	path_put(&path);
503  	if (retry_estale(error, lookup_flags)) {
504  		lookup_flags |= LOOKUP_REVAL;
505  		goto retry;
506  	}
507  out:
508  	return error;
509  }
510  
SYSCALL_DEFINE1(fchdir,unsigned int,fd)511  SYSCALL_DEFINE1(fchdir, unsigned int, fd)
512  {
513  	struct fd f = fdget_raw(fd);
514  	int error;
515  
516  	error = -EBADF;
517  	if (!f.file)
518  		goto out;
519  
520  	error = -ENOTDIR;
521  	if (!d_can_lookup(f.file->f_path.dentry))
522  		goto out_putf;
523  
524  	error = inode_permission(file_inode(f.file), MAY_EXEC | MAY_CHDIR);
525  	if (!error)
526  		set_fs_pwd(current->fs, &f.file->f_path);
527  out_putf:
528  	fdput(f);
529  out:
530  	return error;
531  }
532  
SYSCALL_DEFINE1(chroot,const char __user *,filename)533  SYSCALL_DEFINE1(chroot, const char __user *, filename)
534  {
535  	struct path path;
536  	int error;
537  	unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
538  retry:
539  	error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
540  	if (error)
541  		goto out;
542  
543  	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
544  	if (error)
545  		goto dput_and_out;
546  
547  	error = -EPERM;
548  	if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT))
549  		goto dput_and_out;
550  	error = security_path_chroot(&path);
551  	if (error)
552  		goto dput_and_out;
553  
554  	set_fs_root(current->fs, &path);
555  	error = 0;
556  dput_and_out:
557  	path_put(&path);
558  	if (retry_estale(error, lookup_flags)) {
559  		lookup_flags |= LOOKUP_REVAL;
560  		goto retry;
561  	}
562  out:
563  	return error;
564  }
565  
chmod_common(const struct path * path,umode_t mode)566  int chmod_common(const struct path *path, umode_t mode)
567  {
568  	struct inode *inode = path->dentry->d_inode;
569  	struct inode *delegated_inode = NULL;
570  	struct iattr newattrs;
571  	int error;
572  
573  	error = mnt_want_write(path->mnt);
574  	if (error)
575  		return error;
576  retry_deleg:
577  	inode_lock(inode);
578  	error = security_path_chmod(path, mode);
579  	if (error)
580  		goto out_unlock;
581  	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
582  	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
583  	error = notify_change(path->dentry, &newattrs, &delegated_inode);
584  out_unlock:
585  	inode_unlock(inode);
586  	if (delegated_inode) {
587  		error = break_deleg_wait(&delegated_inode);
588  		if (!error)
589  			goto retry_deleg;
590  	}
591  	mnt_drop_write(path->mnt);
592  	return error;
593  }
594  
vfs_fchmod(struct file * file,umode_t mode)595  int vfs_fchmod(struct file *file, umode_t mode)
596  {
597  	audit_file(file);
598  	return chmod_common(&file->f_path, mode);
599  }
600  
SYSCALL_DEFINE2(fchmod,unsigned int,fd,umode_t,mode)601  SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
602  {
603  	struct fd f = fdget(fd);
604  	int err = -EBADF;
605  
606  	if (f.file) {
607  		err = vfs_fchmod(f.file, mode);
608  		fdput(f);
609  	}
610  	return err;
611  }
612  
do_fchmodat(int dfd,const char __user * filename,umode_t mode)613  static int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
614  {
615  	struct path path;
616  	int error;
617  	unsigned int lookup_flags = LOOKUP_FOLLOW;
618  retry:
619  	error = user_path_at(dfd, filename, lookup_flags, &path);
620  	if (!error) {
621  		error = chmod_common(&path, mode);
622  		path_put(&path);
623  		if (retry_estale(error, lookup_flags)) {
624  			lookup_flags |= LOOKUP_REVAL;
625  			goto retry;
626  		}
627  	}
628  	return error;
629  }
630  
SYSCALL_DEFINE3(fchmodat,int,dfd,const char __user *,filename,umode_t,mode)631  SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename,
632  		umode_t, mode)
633  {
634  	return do_fchmodat(dfd, filename, mode);
635  }
636  
SYSCALL_DEFINE2(chmod,const char __user *,filename,umode_t,mode)637  SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
638  {
639  	return do_fchmodat(AT_FDCWD, filename, mode);
640  }
641  
chown_common(const struct path * path,uid_t user,gid_t group)642  int chown_common(const struct path *path, uid_t user, gid_t group)
643  {
644  	struct inode *inode = path->dentry->d_inode;
645  	struct inode *delegated_inode = NULL;
646  	int error;
647  	struct iattr newattrs;
648  	kuid_t uid;
649  	kgid_t gid;
650  
651  	uid = make_kuid(current_user_ns(), user);
652  	gid = make_kgid(current_user_ns(), group);
653  
654  retry_deleg:
655  	newattrs.ia_valid =  ATTR_CTIME;
656  	if (user != (uid_t) -1) {
657  		if (!uid_valid(uid))
658  			return -EINVAL;
659  		newattrs.ia_valid |= ATTR_UID;
660  		newattrs.ia_uid = uid;
661  	}
662  	if (group != (gid_t) -1) {
663  		if (!gid_valid(gid))
664  			return -EINVAL;
665  		newattrs.ia_valid |= ATTR_GID;
666  		newattrs.ia_gid = gid;
667  	}
668  	if (!S_ISDIR(inode->i_mode))
669  		newattrs.ia_valid |=
670  			ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
671  	inode_lock(inode);
672  	error = security_path_chown(path, uid, gid);
673  	if (!error)
674  		error = notify_change(path->dentry, &newattrs, &delegated_inode);
675  	inode_unlock(inode);
676  	if (delegated_inode) {
677  		error = break_deleg_wait(&delegated_inode);
678  		if (!error)
679  			goto retry_deleg;
680  	}
681  	return error;
682  }
683  
do_fchownat(int dfd,const char __user * filename,uid_t user,gid_t group,int flag)684  int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
685  		int flag)
686  {
687  	struct path path;
688  	int error = -EINVAL;
689  	int lookup_flags;
690  
691  	if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
692  		goto out;
693  
694  	lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
695  	if (flag & AT_EMPTY_PATH)
696  		lookup_flags |= LOOKUP_EMPTY;
697  retry:
698  	error = user_path_at(dfd, filename, lookup_flags, &path);
699  	if (error)
700  		goto out;
701  	error = mnt_want_write(path.mnt);
702  	if (error)
703  		goto out_release;
704  	error = chown_common(&path, user, group);
705  	mnt_drop_write(path.mnt);
706  out_release:
707  	path_put(&path);
708  	if (retry_estale(error, lookup_flags)) {
709  		lookup_flags |= LOOKUP_REVAL;
710  		goto retry;
711  	}
712  out:
713  	return error;
714  }
715  
SYSCALL_DEFINE5(fchownat,int,dfd,const char __user *,filename,uid_t,user,gid_t,group,int,flag)716  SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
717  		gid_t, group, int, flag)
718  {
719  	return do_fchownat(dfd, filename, user, group, flag);
720  }
721  
SYSCALL_DEFINE3(chown,const char __user *,filename,uid_t,user,gid_t,group)722  SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
723  {
724  	return do_fchownat(AT_FDCWD, filename, user, group, 0);
725  }
726  
SYSCALL_DEFINE3(lchown,const char __user *,filename,uid_t,user,gid_t,group)727  SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
728  {
729  	return do_fchownat(AT_FDCWD, filename, user, group,
730  			   AT_SYMLINK_NOFOLLOW);
731  }
732  
vfs_fchown(struct file * file,uid_t user,gid_t group)733  int vfs_fchown(struct file *file, uid_t user, gid_t group)
734  {
735  	int error;
736  
737  	error = mnt_want_write_file(file);
738  	if (error)
739  		return error;
740  	audit_file(file);
741  	error = chown_common(&file->f_path, user, group);
742  	mnt_drop_write_file(file);
743  	return error;
744  }
745  
ksys_fchown(unsigned int fd,uid_t user,gid_t group)746  int ksys_fchown(unsigned int fd, uid_t user, gid_t group)
747  {
748  	struct fd f = fdget(fd);
749  	int error = -EBADF;
750  
751  	if (f.file) {
752  		error = vfs_fchown(f.file, user, group);
753  		fdput(f);
754  	}
755  	return error;
756  }
757  
SYSCALL_DEFINE3(fchown,unsigned int,fd,uid_t,user,gid_t,group)758  SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
759  {
760  	return ksys_fchown(fd, user, group);
761  }
762  
do_dentry_open(struct file * f,struct inode * inode,int (* open)(struct inode *,struct file *))763  static int do_dentry_open(struct file *f,
764  			  struct inode *inode,
765  			  int (*open)(struct inode *, struct file *))
766  {
767  	static const struct file_operations empty_fops = {};
768  	int error;
769  
770  	path_get(&f->f_path);
771  	f->f_inode = inode;
772  	f->f_mapping = inode->i_mapping;
773  	f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
774  	f->f_sb_err = file_sample_sb_err(f);
775  
776  	if (unlikely(f->f_flags & O_PATH)) {
777  		f->f_mode = FMODE_PATH | FMODE_OPENED;
778  		f->f_op = &empty_fops;
779  		return 0;
780  	}
781  
782  	if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
783  		error = get_write_access(inode);
784  		if (unlikely(error))
785  			goto cleanup_file;
786  		error = __mnt_want_write(f->f_path.mnt);
787  		if (unlikely(error)) {
788  			put_write_access(inode);
789  			goto cleanup_file;
790  		}
791  		f->f_mode |= FMODE_WRITER;
792  	}
793  
794  	/* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
795  	if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))
796  		f->f_mode |= FMODE_ATOMIC_POS;
797  
798  	f->f_op = fops_get(inode->i_fop);
799  	if (WARN_ON(!f->f_op)) {
800  		error = -ENODEV;
801  		goto cleanup_all;
802  	}
803  
804  	error = security_file_open(f);
805  	if (error)
806  		goto cleanup_all;
807  
808  	error = break_lease(locks_inode(f), f->f_flags);
809  	if (error)
810  		goto cleanup_all;
811  
812  	/* normally all 3 are set; ->open() can clear them if needed */
813  	f->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
814  	if (!open)
815  		open = f->f_op->open;
816  	if (open) {
817  		error = open(inode, f);
818  		if (error)
819  			goto cleanup_all;
820  	}
821  	f->f_mode |= FMODE_OPENED;
822  	if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
823  		i_readcount_inc(inode);
824  	if ((f->f_mode & FMODE_READ) &&
825  	     likely(f->f_op->read || f->f_op->read_iter))
826  		f->f_mode |= FMODE_CAN_READ;
827  	if ((f->f_mode & FMODE_WRITE) &&
828  	     likely(f->f_op->write || f->f_op->write_iter))
829  		f->f_mode |= FMODE_CAN_WRITE;
830  
831  	f->f_write_hint = WRITE_LIFE_NOT_SET;
832  	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
833  
834  	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
835  
836  	/* NB: we're sure to have correct a_ops only after f_op->open */
837  	if (f->f_flags & O_DIRECT) {
838  		if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
839  			return -EINVAL;
840  	}
841  
842  	/*
843  	 * XXX: Huge page cache doesn't support writing yet. Drop all page
844  	 * cache for this file before processing writes.
845  	 */
846  	if ((f->f_mode & FMODE_WRITE) && filemap_nr_thps(inode->i_mapping))
847  		truncate_pagecache(inode, 0);
848  
849  	return 0;
850  
851  cleanup_all:
852  	if (WARN_ON_ONCE(error > 0))
853  		error = -EINVAL;
854  	fops_put(f->f_op);
855  	if (f->f_mode & FMODE_WRITER) {
856  		put_write_access(inode);
857  		__mnt_drop_write(f->f_path.mnt);
858  	}
859  cleanup_file:
860  	path_put(&f->f_path);
861  	f->f_path.mnt = NULL;
862  	f->f_path.dentry = NULL;
863  	f->f_inode = NULL;
864  	return error;
865  }
866  
867  /**
868   * finish_open - finish opening a file
869   * @file: file pointer
870   * @dentry: pointer to dentry
871   * @open: open callback
872   * @opened: state of open
873   *
874   * This can be used to finish opening a file passed to i_op->atomic_open().
875   *
876   * If the open callback is set to NULL, then the standard f_op->open()
877   * filesystem callback is substituted.
878   *
879   * NB: the dentry reference is _not_ consumed.  If, for example, the dentry is
880   * the return value of d_splice_alias(), then the caller needs to perform dput()
881   * on it after finish_open().
882   *
883   * Returns zero on success or -errno if the open failed.
884   */
finish_open(struct file * file,struct dentry * dentry,int (* open)(struct inode *,struct file *))885  int finish_open(struct file *file, struct dentry *dentry,
886  		int (*open)(struct inode *, struct file *))
887  {
888  	BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
889  
890  	file->f_path.dentry = dentry;
891  	return do_dentry_open(file, d_backing_inode(dentry), open);
892  }
893  EXPORT_SYMBOL(finish_open);
894  
895  /**
896   * finish_no_open - finish ->atomic_open() without opening the file
897   *
898   * @file: file pointer
899   * @dentry: dentry or NULL (as returned from ->lookup())
900   *
901   * This can be used to set the result of a successful lookup in ->atomic_open().
902   *
903   * NB: unlike finish_open() this function does consume the dentry reference and
904   * the caller need not dput() it.
905   *
906   * Returns "0" which must be the return value of ->atomic_open() after having
907   * called this function.
908   */
finish_no_open(struct file * file,struct dentry * dentry)909  int finish_no_open(struct file *file, struct dentry *dentry)
910  {
911  	file->f_path.dentry = dentry;
912  	return 0;
913  }
914  EXPORT_SYMBOL(finish_no_open);
915  
file_path(struct file * filp,char * buf,int buflen)916  char *file_path(struct file *filp, char *buf, int buflen)
917  {
918  	return d_path(&filp->f_path, buf, buflen);
919  }
920  EXPORT_SYMBOL(file_path);
921  
922  /**
923   * vfs_open - open the file at the given path
924   * @path: path to open
925   * @file: newly allocated file with f_flag initialized
926   * @cred: credentials to use
927   */
vfs_open(const struct path * path,struct file * file)928  int vfs_open(const struct path *path, struct file *file)
929  {
930  	file->f_path = *path;
931  	return do_dentry_open(file, d_backing_inode(path->dentry), NULL);
932  }
933  
dentry_open(const struct path * path,int flags,const struct cred * cred)934  struct file *dentry_open(const struct path *path, int flags,
935  			 const struct cred *cred)
936  {
937  	int error;
938  	struct file *f;
939  
940  	validate_creds(cred);
941  
942  	/* We must always pass in a valid mount pointer. */
943  	BUG_ON(!path->mnt);
944  
945  	f = alloc_empty_file(flags, cred);
946  	if (!IS_ERR(f)) {
947  		error = vfs_open(path, f);
948  		if (error) {
949  			fput(f);
950  			f = ERR_PTR(error);
951  		}
952  	}
953  	return f;
954  }
955  EXPORT_SYMBOL(dentry_open);
956  
open_with_fake_path(const struct path * path,int flags,struct inode * inode,const struct cred * cred)957  struct file *open_with_fake_path(const struct path *path, int flags,
958  				struct inode *inode, const struct cred *cred)
959  {
960  	struct file *f = alloc_empty_file_noaccount(flags, cred);
961  	if (!IS_ERR(f)) {
962  		int error;
963  
964  		f->f_path = *path;
965  		error = do_dentry_open(f, inode, NULL);
966  		if (error) {
967  			fput(f);
968  			f = ERR_PTR(error);
969  		}
970  	}
971  	return f;
972  }
973  EXPORT_SYMBOL(open_with_fake_path);
974  
975  #define WILL_CREATE(flags)	(flags & (O_CREAT | __O_TMPFILE))
976  #define O_PATH_FLAGS		(O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC)
977  
build_open_how(int flags,umode_t mode)978  inline struct open_how build_open_how(int flags, umode_t mode)
979  {
980  	struct open_how how = {
981  		.flags = flags & VALID_OPEN_FLAGS,
982  		.mode = mode & S_IALLUGO,
983  	};
984  
985  	/* O_PATH beats everything else. */
986  	if (how.flags & O_PATH)
987  		how.flags &= O_PATH_FLAGS;
988  	/* Modes should only be set for create-like flags. */
989  	if (!WILL_CREATE(how.flags))
990  		how.mode = 0;
991  	return how;
992  }
993  
build_open_flags(const struct open_how * how,struct open_flags * op)994  inline int build_open_flags(const struct open_how *how, struct open_flags *op)
995  {
996  	u64 flags = how->flags;
997  	u64 strip = FMODE_NONOTIFY | O_CLOEXEC;
998  	int lookup_flags = 0;
999  	int acc_mode = ACC_MODE(flags);
1000  
1001  	BUILD_BUG_ON_MSG(upper_32_bits(VALID_OPEN_FLAGS),
1002  			 "struct open_flags doesn't yet handle flags > 32 bits");
1003  
1004  	/*
1005  	 * Strip flags that either shouldn't be set by userspace like
1006  	 * FMODE_NONOTIFY or that aren't relevant in determining struct
1007  	 * open_flags like O_CLOEXEC.
1008  	 */
1009  	flags &= ~strip;
1010  
1011  	/*
1012  	 * Older syscalls implicitly clear all of the invalid flags or argument
1013  	 * values before calling build_open_flags(), but openat2(2) checks all
1014  	 * of its arguments.
1015  	 */
1016  	if (flags & ~VALID_OPEN_FLAGS)
1017  		return -EINVAL;
1018  	if (how->resolve & ~VALID_RESOLVE_FLAGS)
1019  		return -EINVAL;
1020  
1021  	/* Scoping flags are mutually exclusive. */
1022  	if ((how->resolve & RESOLVE_BENEATH) && (how->resolve & RESOLVE_IN_ROOT))
1023  		return -EINVAL;
1024  
1025  	/* Deal with the mode. */
1026  	if (WILL_CREATE(flags)) {
1027  		if (how->mode & ~S_IALLUGO)
1028  			return -EINVAL;
1029  		op->mode = how->mode | S_IFREG;
1030  	} else {
1031  		if (how->mode != 0)
1032  			return -EINVAL;
1033  		op->mode = 0;
1034  	}
1035  
1036  	/*
1037  	 * In order to ensure programs get explicit errors when trying to use
1038  	 * O_TMPFILE on old kernels, O_TMPFILE is implemented such that it
1039  	 * looks like (O_DIRECTORY|O_RDWR & ~O_CREAT) to old kernels. But we
1040  	 * have to require userspace to explicitly set it.
1041  	 */
1042  	if (flags & __O_TMPFILE) {
1043  		if ((flags & O_TMPFILE_MASK) != O_TMPFILE)
1044  			return -EINVAL;
1045  		if (!(acc_mode & MAY_WRITE))
1046  			return -EINVAL;
1047  	}
1048  	if (flags & O_PATH) {
1049  		/* O_PATH only permits certain other flags to be set. */
1050  		if (flags & ~O_PATH_FLAGS)
1051  			return -EINVAL;
1052  		acc_mode = 0;
1053  	}
1054  
1055  	/*
1056  	 * O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only
1057  	 * check for O_DSYNC if the need any syncing at all we enforce it's
1058  	 * always set instead of having to deal with possibly weird behaviour
1059  	 * for malicious applications setting only __O_SYNC.
1060  	 */
1061  	if (flags & __O_SYNC)
1062  		flags |= O_DSYNC;
1063  
1064  	op->open_flag = flags;
1065  
1066  	/* O_TRUNC implies we need access checks for write permissions */
1067  	if (flags & O_TRUNC)
1068  		acc_mode |= MAY_WRITE;
1069  
1070  	/* Allow the LSM permission hook to distinguish append
1071  	   access from general write access. */
1072  	if (flags & O_APPEND)
1073  		acc_mode |= MAY_APPEND;
1074  
1075  	op->acc_mode = acc_mode;
1076  
1077  	op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
1078  
1079  	if (flags & O_CREAT) {
1080  		op->intent |= LOOKUP_CREATE;
1081  		if (flags & O_EXCL) {
1082  			op->intent |= LOOKUP_EXCL;
1083  			flags |= O_NOFOLLOW;
1084  		}
1085  	}
1086  
1087  	if (flags & O_DIRECTORY)
1088  		lookup_flags |= LOOKUP_DIRECTORY;
1089  	if (!(flags & O_NOFOLLOW))
1090  		lookup_flags |= LOOKUP_FOLLOW;
1091  
1092  	if (how->resolve & RESOLVE_NO_XDEV)
1093  		lookup_flags |= LOOKUP_NO_XDEV;
1094  	if (how->resolve & RESOLVE_NO_MAGICLINKS)
1095  		lookup_flags |= LOOKUP_NO_MAGICLINKS;
1096  	if (how->resolve & RESOLVE_NO_SYMLINKS)
1097  		lookup_flags |= LOOKUP_NO_SYMLINKS;
1098  	if (how->resolve & RESOLVE_BENEATH)
1099  		lookup_flags |= LOOKUP_BENEATH;
1100  	if (how->resolve & RESOLVE_IN_ROOT)
1101  		lookup_flags |= LOOKUP_IN_ROOT;
1102  
1103  	op->lookup_flags = lookup_flags;
1104  	return 0;
1105  }
1106  
1107  /**
1108   * file_open_name - open file and return file pointer
1109   *
1110   * @name:	struct filename containing path to open
1111   * @flags:	open flags as per the open(2) second argument
1112   * @mode:	mode for the new file if O_CREAT is set, else ignored
1113   *
1114   * This is the helper to open a file from kernelspace if you really
1115   * have to.  But in generally you should not do this, so please move
1116   * along, nothing to see here..
1117   */
file_open_name(struct filename * name,int flags,umode_t mode)1118  struct file *file_open_name(struct filename *name, int flags, umode_t mode)
1119  {
1120  	struct open_flags op;
1121  	struct open_how how = build_open_how(flags, mode);
1122  	int err = build_open_flags(&how, &op);
1123  	if (err)
1124  		return ERR_PTR(err);
1125  	return do_filp_open(AT_FDCWD, name, &op);
1126  }
1127  
1128  /**
1129   * filp_open - open file and return file pointer
1130   *
1131   * @filename:	path to open
1132   * @flags:	open flags as per the open(2) second argument
1133   * @mode:	mode for the new file if O_CREAT is set, else ignored
1134   *
1135   * This is the helper to open a file from kernelspace if you really
1136   * have to.  But in generally you should not do this, so please move
1137   * along, nothing to see here..
1138   */
filp_open(const char * filename,int flags,umode_t mode)1139  struct file *filp_open(const char *filename, int flags, umode_t mode)
1140  {
1141  	struct filename *name = getname_kernel(filename);
1142  	struct file *file = ERR_CAST(name);
1143  
1144  	if (!IS_ERR(name)) {
1145  		file = file_open_name(name, flags, mode);
1146  		putname(name);
1147  	}
1148  	return file;
1149  }
1150  EXPORT_SYMBOL(filp_open);
1151  
file_open_root(const struct path * root,const char * filename,int flags,umode_t mode)1152  struct file *file_open_root(const struct path *root,
1153  			    const char *filename, int flags, umode_t mode)
1154  {
1155  	struct open_flags op;
1156  	struct open_how how = build_open_how(flags, mode);
1157  	int err = build_open_flags(&how, &op);
1158  	if (err)
1159  		return ERR_PTR(err);
1160  	return do_file_open_root(root, filename, &op);
1161  }
1162  EXPORT_SYMBOL(file_open_root);
1163  
do_sys_openat2(int dfd,const char __user * filename,struct open_how * how)1164  static long do_sys_openat2(int dfd, const char __user *filename,
1165  			   struct open_how *how)
1166  {
1167  	struct open_flags op;
1168  	int fd = build_open_flags(how, &op);
1169  	struct filename *tmp;
1170  
1171  	if (fd)
1172  		return fd;
1173  
1174  	tmp = getname(filename);
1175  	if (IS_ERR(tmp))
1176  		return PTR_ERR(tmp);
1177  
1178  	fd = get_unused_fd_flags(how->flags);
1179  	if (fd >= 0) {
1180  		struct file *f = do_filp_open(dfd, tmp, &op);
1181  		if (IS_ERR(f)) {
1182  			put_unused_fd(fd);
1183  			fd = PTR_ERR(f);
1184  		} else {
1185  			fsnotify_open(f);
1186  			fd_install(fd, f);
1187  		}
1188  	}
1189  	putname(tmp);
1190  	return fd;
1191  }
1192  
do_sys_open(int dfd,const char __user * filename,int flags,umode_t mode)1193  long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
1194  {
1195  	struct open_how how = build_open_how(flags, mode);
1196  	return do_sys_openat2(dfd, filename, &how);
1197  }
1198  
1199  
SYSCALL_DEFINE3(open,const char __user *,filename,int,flags,umode_t,mode)1200  SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
1201  {
1202  	if (force_o_largefile())
1203  		flags |= O_LARGEFILE;
1204  	return do_sys_open(AT_FDCWD, filename, flags, mode);
1205  }
1206  
SYSCALL_DEFINE4(openat,int,dfd,const char __user *,filename,int,flags,umode_t,mode)1207  SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
1208  		umode_t, mode)
1209  {
1210  	if (force_o_largefile())
1211  		flags |= O_LARGEFILE;
1212  	return do_sys_open(dfd, filename, flags, mode);
1213  }
1214  
SYSCALL_DEFINE4(openat2,int,dfd,const char __user *,filename,struct open_how __user *,how,size_t,usize)1215  SYSCALL_DEFINE4(openat2, int, dfd, const char __user *, filename,
1216  		struct open_how __user *, how, size_t, usize)
1217  {
1218  	int err;
1219  	struct open_how tmp;
1220  
1221  	BUILD_BUG_ON(sizeof(struct open_how) < OPEN_HOW_SIZE_VER0);
1222  	BUILD_BUG_ON(sizeof(struct open_how) != OPEN_HOW_SIZE_LATEST);
1223  
1224  	if (unlikely(usize < OPEN_HOW_SIZE_VER0))
1225  		return -EINVAL;
1226  
1227  	err = copy_struct_from_user(&tmp, sizeof(tmp), how, usize);
1228  	if (err)
1229  		return err;
1230  
1231  	/* O_LARGEFILE is only allowed for non-O_PATH. */
1232  	if (!(tmp.flags & O_PATH) && force_o_largefile())
1233  		tmp.flags |= O_LARGEFILE;
1234  
1235  	return do_sys_openat2(dfd, filename, &tmp);
1236  }
1237  
1238  #ifdef CONFIG_COMPAT
1239  /*
1240   * Exactly like sys_open(), except that it doesn't set the
1241   * O_LARGEFILE flag.
1242   */
COMPAT_SYSCALL_DEFINE3(open,const char __user *,filename,int,flags,umode_t,mode)1243  COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
1244  {
1245  	return do_sys_open(AT_FDCWD, filename, flags, mode);
1246  }
1247  
1248  /*
1249   * Exactly like sys_openat(), except that it doesn't set the
1250   * O_LARGEFILE flag.
1251   */
COMPAT_SYSCALL_DEFINE4(openat,int,dfd,const char __user *,filename,int,flags,umode_t,mode)1252  COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode)
1253  {
1254  	return do_sys_open(dfd, filename, flags, mode);
1255  }
1256  #endif
1257  
1258  #ifndef __alpha__
1259  
1260  /*
1261   * For backward compatibility?  Maybe this should be moved
1262   * into arch/i386 instead?
1263   */
SYSCALL_DEFINE2(creat,const char __user *,pathname,umode_t,mode)1264  SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
1265  {
1266  	int flags = O_CREAT | O_WRONLY | O_TRUNC;
1267  
1268  	if (force_o_largefile())
1269  		flags |= O_LARGEFILE;
1270  	return do_sys_open(AT_FDCWD, pathname, flags, mode);
1271  }
1272  #endif
1273  
1274  /*
1275   * "id" is the POSIX thread ID. We use the
1276   * files pointer for this..
1277   */
filp_close(struct file * filp,fl_owner_t id)1278  int filp_close(struct file *filp, fl_owner_t id)
1279  {
1280  	int retval = 0;
1281  
1282  	if (!file_count(filp)) {
1283  		printk(KERN_ERR "VFS: Close: file count is 0\n");
1284  		return 0;
1285  	}
1286  
1287  	if (filp->f_op->flush)
1288  		retval = filp->f_op->flush(filp, id);
1289  
1290  	if (likely(!(filp->f_mode & FMODE_PATH))) {
1291  		dnotify_flush(filp, id);
1292  		locks_remove_posix(filp, id);
1293  	}
1294  	fput(filp);
1295  	return retval;
1296  }
1297  
1298  EXPORT_SYMBOL(filp_close);
1299  
1300  /*
1301   * Careful here! We test whether the file pointer is NULL before
1302   * releasing the fd. This ensures that one clone task can't release
1303   * an fd while another clone is opening it.
1304   */
SYSCALL_DEFINE1(close,unsigned int,fd)1305  SYSCALL_DEFINE1(close, unsigned int, fd)
1306  {
1307  	int retval = __close_fd(current->files, fd);
1308  
1309  	/* can't restart close syscall because file table entry was cleared */
1310  	if (unlikely(retval == -ERESTARTSYS ||
1311  		     retval == -ERESTARTNOINTR ||
1312  		     retval == -ERESTARTNOHAND ||
1313  		     retval == -ERESTART_RESTARTBLOCK))
1314  		retval = -EINTR;
1315  
1316  	return retval;
1317  }
1318  
1319  /**
1320   * close_range() - Close all file descriptors in a given range.
1321   *
1322   * @fd:     starting file descriptor to close
1323   * @max_fd: last file descriptor to close
1324   * @flags:  reserved for future extensions
1325   *
1326   * This closes a range of file descriptors. All file descriptors
1327   * from @fd up to and including @max_fd are closed.
1328   * Currently, errors to close a given file descriptor are ignored.
1329   */
SYSCALL_DEFINE3(close_range,unsigned int,fd,unsigned int,max_fd,unsigned int,flags)1330  SYSCALL_DEFINE3(close_range, unsigned int, fd, unsigned int, max_fd,
1331  		unsigned int, flags)
1332  {
1333  	return __close_range(fd, max_fd, flags);
1334  }
1335  
1336  /*
1337   * This routine simulates a hangup on the tty, to arrange that users
1338   * are given clean terminals at login time.
1339   */
SYSCALL_DEFINE0(vhangup)1340  SYSCALL_DEFINE0(vhangup)
1341  {
1342  	if (capable(CAP_SYS_TTY_CONFIG)) {
1343  		tty_vhangup_self();
1344  		return 0;
1345  	}
1346  	return -EPERM;
1347  }
1348  
1349  /*
1350   * Called when an inode is about to be open.
1351   * We use this to disallow opening large files on 32bit systems if
1352   * the caller didn't specify O_LARGEFILE.  On 64bit systems we force
1353   * on this flag in sys_open.
1354   */
generic_file_open(struct inode * inode,struct file * filp)1355  int generic_file_open(struct inode * inode, struct file * filp)
1356  {
1357  	if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
1358  		return -EOVERFLOW;
1359  	return 0;
1360  }
1361  
1362  EXPORT_SYMBOL(generic_file_open);
1363  
1364  /*
1365   * This is used by subsystems that don't want seekable
1366   * file descriptors. The function is not supposed to ever fail, the only
1367   * reason it returns an 'int' and not 'void' is so that it can be plugged
1368   * directly into file_operations structure.
1369   */
nonseekable_open(struct inode * inode,struct file * filp)1370  int nonseekable_open(struct inode *inode, struct file *filp)
1371  {
1372  	filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1373  	return 0;
1374  }
1375  
1376  EXPORT_SYMBOL(nonseekable_open);
1377  
1378  /*
1379   * stream_open is used by subsystems that want stream-like file descriptors.
1380   * Such file descriptors are not seekable and don't have notion of position
1381   * (file.f_pos is always 0 and ppos passed to .read()/.write() is always NULL).
1382   * Contrary to file descriptors of other regular files, .read() and .write()
1383   * can run simultaneously.
1384   *
1385   * stream_open never fails and is marked to return int so that it could be
1386   * directly used as file_operations.open .
1387   */
stream_open(struct inode * inode,struct file * filp)1388  int stream_open(struct inode *inode, struct file *filp)
1389  {
1390  	filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS);
1391  	filp->f_mode |= FMODE_STREAM;
1392  	return 0;
1393  }
1394  
1395  EXPORT_SYMBOL(stream_open);
1396