• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/fdtable.h>
12 #include <linux/pagemap.h>
13 #include <linux/file.h>
14 #include <linux/filter.h>
15 #include <linux/fs_context.h>
16 #include <linux/sched.h>
17 #include <linux/namei.h>
18 #include <linux/slab.h>
19 #include <linux/xattr.h>
20 #include <linux/iversion.h>
21 #include <linux/posix_acl.h>
22 #include <linux/security.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <trace/hooks/tmpfile.h>
26 
27 #include "../internal.h"
28 
fuse_advise_use_readdirplus(struct inode * dir)29 static void fuse_advise_use_readdirplus(struct inode *dir)
30 {
31 	struct fuse_inode *fi = get_fuse_inode(dir);
32 
33 	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
34 }
35 
36 #if BITS_PER_LONG >= 64 && !defined(CONFIG_FUSE_BPF)
__fuse_dentry_settime(struct dentry * entry,u64 time)37 static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
38 {
39 	entry->d_fsdata = (void *) time;
40 }
41 
fuse_dentry_time(const struct dentry * entry)42 static inline u64 fuse_dentry_time(const struct dentry *entry)
43 {
44 	return (u64)entry->d_fsdata;
45 }
46 
47 #else
48 
__fuse_dentry_settime(struct dentry * dentry,u64 time)49 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
50 {
51 	((struct fuse_dentry *) dentry->d_fsdata)->time = time;
52 }
53 
fuse_dentry_time(const struct dentry * entry)54 static inline u64 fuse_dentry_time(const struct dentry *entry)
55 {
56 	return ((struct fuse_dentry *) entry->d_fsdata)->time;
57 }
58 #endif
59 
fuse_dentry_settime(struct dentry * dentry,u64 time)60 static void fuse_dentry_settime(struct dentry *dentry, u64 time)
61 {
62 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
63 	bool delete = !time && fc->delete_stale;
64 	/*
65 	 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
66 	 * Don't care about races, either way it's just an optimization
67 	 */
68 	if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
69 	    (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
70 		spin_lock(&dentry->d_lock);
71 		if (!delete)
72 			dentry->d_flags &= ~DCACHE_OP_DELETE;
73 		else
74 			dentry->d_flags |= DCACHE_OP_DELETE;
75 		spin_unlock(&dentry->d_lock);
76 	}
77 
78 	__fuse_dentry_settime(dentry, time);
79 }
80 
fuse_init_dentry_root(struct dentry * root,struct file * backing_dir)81 void fuse_init_dentry_root(struct dentry *root, struct file *backing_dir)
82 {
83 #ifdef CONFIG_FUSE_BPF
84 	struct fuse_dentry *fuse_dentry = root->d_fsdata;
85 
86 	if (backing_dir) {
87 		fuse_dentry->backing_path = backing_dir->f_path;
88 		path_get(&fuse_dentry->backing_path);
89 	}
90 #endif
91 }
92 
93 /*
94  * Set dentry and possibly attribute timeouts from the lookup/mk*
95  * replies
96  */
fuse_change_entry_timeout(struct dentry * entry,struct fuse_entry_out * o)97 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
98 {
99 	fuse_dentry_settime(entry,
100 		time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
101 }
102 
entry_attr_timeout(struct fuse_entry_out * o)103 u64 entry_attr_timeout(struct fuse_entry_out *o)
104 {
105 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
106 }
107 
fuse_invalidate_attr_mask(struct inode * inode,u32 mask)108 static void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
109 {
110 	set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
111 }
112 
113 /*
114  * Mark the attributes as stale, so that at the next call to
115  * ->getattr() they will be fetched from userspace
116  */
fuse_invalidate_attr(struct inode * inode)117 void fuse_invalidate_attr(struct inode *inode)
118 {
119 	fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
120 }
121 
fuse_dir_changed(struct inode * dir)122 static void fuse_dir_changed(struct inode *dir)
123 {
124 	fuse_invalidate_attr(dir);
125 	inode_maybe_inc_iversion(dir, false);
126 }
127 
128 /**
129  * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
130  * atime is not used.
131  */
fuse_invalidate_atime(struct inode * inode)132 void fuse_invalidate_atime(struct inode *inode)
133 {
134 	if (!IS_RDONLY(inode))
135 		fuse_invalidate_attr_mask(inode, STATX_ATIME);
136 }
137 
138 /*
139  * Just mark the entry as stale, so that a next attempt to look it up
140  * will result in a new lookup call to userspace
141  *
142  * This is called when a dentry is about to become negative and the
143  * timeout is unknown (unlink, rmdir, rename and in some cases
144  * lookup)
145  */
fuse_invalidate_entry_cache(struct dentry * entry)146 void fuse_invalidate_entry_cache(struct dentry *entry)
147 {
148 	fuse_dentry_settime(entry, 0);
149 }
150 
151 /*
152  * Same as fuse_invalidate_entry_cache(), but also try to remove the
153  * dentry from the hash
154  */
fuse_invalidate_entry(struct dentry * entry)155 static void fuse_invalidate_entry(struct dentry *entry)
156 {
157 	d_invalidate(entry);
158 	fuse_invalidate_entry_cache(entry);
159 }
160 
fuse_lookup_init(struct fuse_conn * fc,struct fuse_args * args,u64 nodeid,const struct qstr * name,struct fuse_entry_out * outarg,struct fuse_entry_bpf_out * bpf_outarg)161 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
162 			     u64 nodeid, const struct qstr *name,
163 			     struct fuse_entry_out *outarg,
164 			     struct fuse_entry_bpf_out *bpf_outarg)
165 {
166 	memset(outarg, 0, sizeof(struct fuse_entry_out));
167 	args->opcode = FUSE_LOOKUP;
168 	args->nodeid = nodeid;
169 	args->in_numargs = 1;
170 	args->in_args[0].size = name->len + 1;
171 	args->in_args[0].value = name->name;
172 	args->out_argvar = true;
173 	args->out_numargs = 2;
174 	args->out_args[0].size = sizeof(struct fuse_entry_out);
175 	args->out_args[0].value = outarg;
176 	args->out_args[1].size = sizeof(struct fuse_entry_bpf_out);
177 	args->out_args[1].value = bpf_outarg;
178 }
179 
180 #ifdef CONFIG_FUSE_BPF
backing_data_changed(struct fuse_inode * fi,struct dentry * entry,struct fuse_entry_bpf * bpf_arg)181 static bool backing_data_changed(struct fuse_inode *fi, struct dentry *entry,
182 				 struct fuse_entry_bpf *bpf_arg)
183 {
184 	struct path new_backing_path;
185 	struct inode *new_backing_inode;
186 	struct bpf_prog *bpf = NULL;
187 	int err;
188 	bool ret = true;
189 
190 	if (!entry || !fi->backing_inode) {
191 		ret = false;
192 		goto put_backing_file;
193 	}
194 
195 	get_fuse_backing_path(entry, &new_backing_path);
196 	new_backing_inode = fi->backing_inode;
197 	ihold(new_backing_inode);
198 
199 	err = fuse_handle_backing(bpf_arg, &new_backing_inode, &new_backing_path);
200 
201 	if (err)
202 		goto put_inode;
203 
204 	err = fuse_handle_bpf_prog(bpf_arg, entry->d_parent->d_inode, &bpf);
205 	if (err)
206 		goto put_bpf;
207 
208 	ret = (bpf != fi->bpf || fi->backing_inode != new_backing_inode ||
209 			!path_equal(&get_fuse_dentry(entry)->backing_path, &new_backing_path));
210 put_bpf:
211 	if (bpf)
212 		bpf_prog_put(bpf);
213 put_inode:
214 	iput(new_backing_inode);
215 	path_put(&new_backing_path);
216 put_backing_file:
217 	if (bpf_arg->backing_file)
218 		fput(bpf_arg->backing_file);
219 	return ret;
220 }
221 #endif
222 
223 /*
224  * Check whether the dentry is still valid
225  *
226  * If the entry validity timeout has expired and the dentry is
227  * positive, try to redo the lookup.  If the lookup results in a
228  * different inode, then let the VFS invalidate the dentry and redo
229  * the lookup once more.  If the lookup results in the same inode,
230  * then refresh the attributes, timeouts and mark the dentry valid.
231  */
fuse_dentry_revalidate(struct dentry * entry,unsigned int flags)232 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
233 {
234 	struct inode *inode;
235 	struct dentry *parent;
236 	struct fuse_mount *fm;
237 	struct fuse_inode *fi;
238 	int ret;
239 
240 	inode = d_inode_rcu(entry);
241 	if (inode && fuse_is_bad(inode))
242 		goto invalid;
243 
244 #ifdef CONFIG_FUSE_BPF
245 	/* TODO: Do we need bpf support for revalidate?
246 	 * If the lower filesystem says the entry is invalid, FUSE probably shouldn't
247 	 * try to fix that without going through the normal lookup path...
248 	 */
249 	if (get_fuse_dentry(entry)->backing_path.dentry) {
250 		ret = fuse_revalidate_backing(entry, flags);
251 		if (ret <= 0) {
252 			goto out;
253 		}
254 	}
255 #endif
256 	if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
257 		 (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) {
258 		struct fuse_entry_out outarg;
259 		struct fuse_entry_bpf bpf_arg;
260 		FUSE_ARGS(args);
261 		struct fuse_forget_link *forget;
262 		u64 attr_version;
263 
264 		/* For negative dentries, always do a fresh lookup */
265 		if (!inode)
266 			goto invalid;
267 
268 		ret = -ECHILD;
269 		if (flags & LOOKUP_RCU)
270 			goto out;
271 		fm = get_fuse_mount(inode);
272 
273 		parent = dget_parent(entry);
274 
275 #ifdef CONFIG_FUSE_BPF
276 		/* TODO: Once we're handling timeouts for backing inodes, do a
277 		 * bpf based lookup_revalidate here.
278 		 */
279 		if (get_fuse_inode(parent->d_inode)->backing_inode) {
280 			dput(parent);
281 			ret = 1;
282 			goto out;
283 		}
284 #endif
285 		forget = fuse_alloc_forget();
286 		ret = -ENOMEM;
287 		if (!forget) {
288 			dput(parent);
289 			goto out;
290 		}
291 
292 		attr_version = fuse_get_attr_version(fm->fc);
293 
294 		fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)),
295 				 &entry->d_name, &outarg, &bpf_arg.out);
296 		ret = fuse_simple_request(fm, &args);
297 		dput(parent);
298 
299 		/* Zero nodeid is same as -ENOENT */
300 		if (!ret && !outarg.nodeid)
301 			ret = -ENOENT;
302 		if (!ret || ret == sizeof(bpf_arg.out)) {
303 			fi = get_fuse_inode(inode);
304 			if (outarg.nodeid != get_node_id(inode) ||
305 #ifdef CONFIG_FUSE_BPF
306 			    (ret == sizeof(bpf_arg.out) &&
307 					    backing_data_changed(fi, entry, &bpf_arg)) ||
308 #endif
309 			    (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
310 				fuse_queue_forget(fm->fc, forget,
311 						  outarg.nodeid, 1);
312 				goto invalid;
313 			}
314 			spin_lock(&fi->lock);
315 			fi->nlookup++;
316 			spin_unlock(&fi->lock);
317 		}
318 		kfree(forget);
319 		if (ret == -ENOMEM || ret == -EINTR)
320 			goto out;
321 		if (ret || fuse_invalid_attr(&outarg.attr) ||
322 		    fuse_stale_inode(inode, outarg.generation, &outarg.attr))
323 			goto invalid;
324 
325 		forget_all_cached_acls(inode);
326 		fuse_change_attributes(inode, &outarg.attr,
327 				       entry_attr_timeout(&outarg),
328 				       attr_version);
329 		fuse_change_entry_timeout(entry, &outarg);
330 	} else if (inode) {
331 		fi = get_fuse_inode(inode);
332 		if (flags & LOOKUP_RCU) {
333 			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
334 				return -ECHILD;
335 		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
336 			parent = dget_parent(entry);
337 			fuse_advise_use_readdirplus(d_inode(parent));
338 			dput(parent);
339 		}
340 	}
341 	ret = 1;
342 out:
343 	return ret;
344 
345 invalid:
346 	ret = 0;
347 	goto out;
348 }
349 
350 #if BITS_PER_LONG < 64 || defined(CONFIG_FUSE_BPF)
fuse_dentry_init(struct dentry * dentry)351 static int fuse_dentry_init(struct dentry *dentry)
352 {
353 	dentry->d_fsdata = kzalloc(sizeof(struct fuse_dentry),
354 				   GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
355 
356 	return dentry->d_fsdata ? 0 : -ENOMEM;
357 }
fuse_dentry_release(struct dentry * dentry)358 static void fuse_dentry_release(struct dentry *dentry)
359 {
360 	struct fuse_dentry *fd = dentry->d_fsdata;
361 
362 #ifdef CONFIG_FUSE_BPF
363 	if (fd && fd->backing_path.dentry)
364 		path_put(&fd->backing_path);
365 
366 	if (fd && fd->bpf)
367 		bpf_prog_put(fd->bpf);
368 #endif
369 
370 	kfree_rcu(fd, rcu);
371 }
372 #endif
373 
fuse_dentry_delete(const struct dentry * dentry)374 static int fuse_dentry_delete(const struct dentry *dentry)
375 {
376 	return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
377 }
378 
379 /*
380  * Create a fuse_mount object with a new superblock (with path->dentry
381  * as the root), and return that mount so it can be auto-mounted on
382  * @path.
383  */
fuse_dentry_automount(struct path * path)384 static struct vfsmount *fuse_dentry_automount(struct path *path)
385 {
386 	struct fs_context *fsc;
387 	struct vfsmount *mnt;
388 	struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
389 
390 	fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
391 	if (IS_ERR(fsc))
392 		return ERR_CAST(fsc);
393 
394 	/* Pass the FUSE inode of the mount for fuse_get_tree_submount() */
395 	fsc->fs_private = mp_fi;
396 
397 	/* Create the submount */
398 	mnt = fc_mount(fsc);
399 	if (!IS_ERR(mnt))
400 		mntget(mnt);
401 
402 	put_fs_context(fsc);
403 	return mnt;
404 }
405 
406 /*
407  * Get the canonical path. Since we must translate to a path, this must be done
408  * in the context of the userspace daemon, however, the userspace daemon cannot
409  * look up paths on its own. Instead, we handle the lookup as a special case
410  * inside of the write request.
411  */
fuse_dentry_canonical_path(const struct path * path,struct path * canonical_path)412 static void fuse_dentry_canonical_path(const struct path *path,
413 				       struct path *canonical_path)
414 {
415 	struct inode *inode = d_inode(path->dentry);
416 	//struct fuse_conn *fc = get_fuse_conn(inode);
417 	struct fuse_mount *fm = get_fuse_mount_super(path->mnt->mnt_sb);
418 	FUSE_ARGS(args);
419 	char *path_name;
420 	int err;
421 
422 #ifdef CONFIG_FUSE_BPF
423 	struct fuse_err_ret fer;
424 
425 	fer = fuse_bpf_backing(inode, struct fuse_dummy_io,
426 			       fuse_canonical_path_initialize,
427 			       fuse_canonical_path_backing,
428 			       fuse_canonical_path_finalize, path,
429 			       canonical_path);
430 	if (fer.ret) {
431 		if (IS_ERR(fer.result))
432 			canonical_path->dentry = fer.result;
433 		return;
434 	}
435 #endif
436 
437 	path_name = (char *)get_zeroed_page(GFP_KERNEL);
438 	if (!path_name) {
439 		canonical_path->dentry = ERR_PTR(-ENOMEM);
440 		return;
441 	}
442 
443 	args.opcode = FUSE_CANONICAL_PATH;
444 	args.nodeid = get_node_id(inode);
445 	args.in_numargs = 0;
446 	args.out_numargs = 1;
447 	args.out_args[0].size = PATH_MAX;
448 	args.out_args[0].value = path_name;
449 	args.canonical_path = canonical_path;
450 	args.out_argvar = 1;
451 
452 	err = fuse_simple_request(fm, &args);
453 	free_page((unsigned long)path_name);
454 	if (err > 0)
455 		return;
456 	if (err < 0) {
457 		canonical_path->dentry = ERR_PTR(err);
458 		return;
459 	}
460 
461 	canonical_path->dentry = path->dentry;
462 	canonical_path->mnt = path->mnt;
463 	path_get(canonical_path);
464 	return;
465 }
466 
467 const struct dentry_operations fuse_dentry_operations = {
468 	.d_revalidate	= fuse_dentry_revalidate,
469 	.d_delete	= fuse_dentry_delete,
470 #if BITS_PER_LONG < 64 || defined(CONFIG_FUSE_BPF)
471 	.d_init		= fuse_dentry_init,
472 	.d_release	= fuse_dentry_release,
473 #endif
474 	.d_automount	= fuse_dentry_automount,
475 	.d_canonical_path = fuse_dentry_canonical_path,
476 };
477 
478 const struct dentry_operations fuse_root_dentry_operations = {
479 #if BITS_PER_LONG < 64 || defined(CONFIG_FUSE_BPF)
480 	.d_init		= fuse_dentry_init,
481 	.d_release	= fuse_dentry_release,
482 #endif
483 };
484 
fuse_valid_type(int m)485 int fuse_valid_type(int m)
486 {
487 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
488 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
489 }
490 
fuse_invalid_attr(struct fuse_attr * attr)491 bool fuse_invalid_attr(struct fuse_attr *attr)
492 {
493 	return !fuse_valid_type(attr->mode) ||
494 		attr->size > LLONG_MAX;
495 }
496 
fuse_lookup_name(struct super_block * sb,u64 nodeid,const struct qstr * name,struct fuse_entry_out * outarg,struct dentry * entry,struct inode ** inode)497 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
498 		     struct fuse_entry_out *outarg,
499 		     struct dentry *entry,
500 		     struct inode **inode)
501 {
502 	struct fuse_mount *fm = get_fuse_mount_super(sb);
503 	FUSE_ARGS(args);
504 	struct fuse_entry_bpf bpf_arg = {0};
505 	struct fuse_forget_link *forget;
506 	u64 attr_version;
507 	int err;
508 
509 	*inode = NULL;
510 	err = -ENAMETOOLONG;
511 	if (name->len > FUSE_NAME_MAX)
512 		goto out;
513 
514 	forget = fuse_alloc_forget();
515 	err = -ENOMEM;
516 	if (!forget)
517 		goto out;
518 
519 	attr_version = fuse_get_attr_version(fm->fc);
520 
521 	fuse_lookup_init(fm->fc, &args, nodeid, name, outarg, &bpf_arg.out);
522 	err = fuse_simple_request(fm, &args);
523 
524 #ifdef CONFIG_FUSE_BPF
525 	if (err == sizeof(bpf_arg.out)) {
526 		/* TODO Make sure this handles invalid handles */
527 		struct file *backing_file;
528 		struct inode *backing_inode;
529 
530 		err = -ENOENT;
531 		if (!entry)
532 			goto out_put_forget;
533 
534 		err = -EINVAL;
535 		backing_file = bpf_arg.backing_file;
536 		if (!backing_file)
537 			goto out_put_forget;
538 
539 		if (IS_ERR(backing_file)) {
540 			err = PTR_ERR(backing_file);
541 			goto out_put_forget;
542 		}
543 
544 		backing_inode = backing_file->f_inode;
545 		*inode = fuse_iget_backing(sb, outarg->nodeid, backing_inode);
546 		if (!*inode)
547 			goto out_put_forget;
548 
549 		err = fuse_handle_backing(&bpf_arg,
550 				&get_fuse_inode(*inode)->backing_inode,
551 				&get_fuse_dentry(entry)->backing_path);
552 		if (!err)
553 			err = fuse_handle_bpf_prog(&bpf_arg, NULL,
554 					   &get_fuse_inode(*inode)->bpf);
555 		if (err) {
556 			iput(*inode);
557 			*inode = NULL;
558 			goto out_put_forget;
559 		}
560 	} else
561 #endif
562 	{
563 		/* Zero nodeid is same as -ENOENT, but with valid timeout */
564 		if (err || !outarg->nodeid)
565 			goto out_put_forget;
566 
567 		err = -EIO;
568 		if (!outarg->nodeid)
569 			goto out_put_forget;
570 		if (fuse_invalid_attr(&outarg->attr))
571 			goto out_put_forget;
572 
573 		*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
574 				   &outarg->attr, entry_attr_timeout(outarg),
575 				   attr_version);
576 	}
577 
578 	err = -ENOMEM;
579 	if (!*inode && outarg->nodeid) {
580 		fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
581 		goto out;
582 	}
583 	err = 0;
584 
585  out_put_forget:
586 	kfree(forget);
587  out:
588 	if (bpf_arg.backing_file)
589 		fput(bpf_arg.backing_file);
590 	return err;
591 }
592 
fuse_lookup(struct inode * dir,struct dentry * entry,unsigned int flags)593 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
594 				  unsigned int flags)
595 {
596 	int err;
597 	struct fuse_entry_out outarg;
598 	struct inode *inode;
599 	struct dentry *newent;
600 	bool outarg_valid = true;
601 	bool locked;
602 
603 #ifdef CONFIG_FUSE_BPF
604 	struct fuse_err_ret fer;
605 
606 	fer = fuse_bpf_backing(dir, struct fuse_lookup_io,
607 			       fuse_lookup_initialize, fuse_lookup_backing,
608 			       fuse_lookup_finalize,
609 			       dir, entry, flags);
610 	if (fer.ret)
611 		return fer.result;
612 #endif
613 
614 	if (fuse_is_bad(dir))
615 		return ERR_PTR(-EIO);
616 
617 	locked = fuse_lock_inode(dir);
618 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
619 			       &outarg, entry, &inode);
620 	fuse_unlock_inode(dir, locked);
621 	if (err == -ENOENT) {
622 		outarg_valid = false;
623 		err = 0;
624 	}
625 	if (err)
626 		goto out_err;
627 
628 	err = -EIO;
629 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
630 		goto out_iput;
631 
632 	newent = d_splice_alias(inode, entry);
633 	err = PTR_ERR(newent);
634 	if (IS_ERR(newent))
635 		goto out_err;
636 
637 	entry = newent ? newent : entry;
638 	if (outarg_valid)
639 		fuse_change_entry_timeout(entry, &outarg);
640 	else
641 		fuse_invalidate_entry_cache(entry);
642 
643 	if (inode)
644 		fuse_advise_use_readdirplus(dir);
645 	return newent;
646 
647  out_iput:
648 	iput(inode);
649  out_err:
650 	return ERR_PTR(err);
651 }
652 
get_security_context(struct dentry * entry,umode_t mode,void ** security_ctx,u32 * security_ctxlen)653 static int get_security_context(struct dentry *entry, umode_t mode,
654 				void **security_ctx, u32 *security_ctxlen)
655 {
656 	struct fuse_secctx *fctx;
657 	struct fuse_secctx_header *header;
658 	void *ctx = NULL, *ptr;
659 	u32 ctxlen, total_len = sizeof(*header);
660 	int err, nr_ctx = 0;
661 	const char *name;
662 	size_t namelen;
663 
664 	err = security_dentry_init_security(entry, mode, &entry->d_name,
665 					    &name, &ctx, &ctxlen);
666 	if (err) {
667 		if (err != -EOPNOTSUPP)
668 			goto out_err;
669 		/* No LSM is supporting this security hook. Ignore error */
670 		ctxlen = 0;
671 		ctx = NULL;
672 	}
673 
674 	if (ctxlen) {
675 		nr_ctx = 1;
676 		namelen = strlen(name) + 1;
677 		err = -EIO;
678 		if (WARN_ON(namelen > XATTR_NAME_MAX + 1 || ctxlen > S32_MAX))
679 			goto out_err;
680 		total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen + ctxlen);
681 	}
682 
683 	err = -ENOMEM;
684 	header = ptr = kzalloc(total_len, GFP_KERNEL);
685 	if (!ptr)
686 		goto out_err;
687 
688 	header->nr_secctx = nr_ctx;
689 	header->size = total_len;
690 	ptr += sizeof(*header);
691 	if (nr_ctx) {
692 		fctx = ptr;
693 		fctx->size = ctxlen;
694 		ptr += sizeof(*fctx);
695 
696 		strcpy(ptr, name);
697 		ptr += namelen;
698 
699 		memcpy(ptr, ctx, ctxlen);
700 	}
701 	*security_ctxlen = total_len;
702 	*security_ctx = header;
703 	err = 0;
704 out_err:
705 	kfree(ctx);
706 	return err;
707 }
708 
709 /*
710  * Atomic create+open operation
711  *
712  * If the filesystem doesn't support this, then fall back to separate
713  * 'mknod' + 'open' requests.
714  */
fuse_create_open(struct inode * dir,struct dentry * entry,struct file * file,unsigned int flags,umode_t mode)715 static int fuse_create_open(struct inode *dir, struct dentry *entry,
716 			    struct file *file, unsigned int flags,
717 			    umode_t mode)
718 {
719 	int err;
720 	struct inode *inode;
721 	struct fuse_conn *fc = get_fuse_conn(dir);
722 	struct fuse_mount *fm = get_fuse_mount(dir);
723 	FUSE_ARGS(args);
724 	struct fuse_forget_link *forget;
725 	struct fuse_create_in inarg;
726 	struct fuse_open_out outopen;
727 	struct fuse_entry_out outentry;
728 	struct fuse_inode *fi;
729 	struct fuse_file *ff;
730 	void *security_ctx = NULL;
731 	u32 security_ctxlen;
732 	bool trunc = flags & O_TRUNC;
733 
734 	/* Userspace expects S_IFREG in create mode */
735 	BUG_ON((mode & S_IFMT) != S_IFREG);
736 
737 #ifdef CONFIG_FUSE_BPF
738 	{
739 		struct fuse_err_ret fer;
740 
741 		fer = fuse_bpf_backing(dir, struct fuse_create_open_io,
742 				       fuse_create_open_initialize,
743 				       fuse_create_open_backing,
744 				       fuse_create_open_finalize,
745 				       dir, entry, file, flags, mode);
746 		if (fer.ret)
747 			return PTR_ERR(fer.result);
748 	}
749 #endif
750 
751 	forget = fuse_alloc_forget();
752 	err = -ENOMEM;
753 	if (!forget)
754 		goto out_err;
755 
756 	err = -ENOMEM;
757 	ff = fuse_file_alloc(fm);
758 	if (!ff)
759 		goto out_put_forget_req;
760 
761 	if (!fm->fc->dont_mask)
762 		mode &= ~current_umask();
763 
764 	flags &= ~O_NOCTTY;
765 	memset(&inarg, 0, sizeof(inarg));
766 	memset(&outentry, 0, sizeof(outentry));
767 	inarg.flags = flags;
768 	inarg.mode = mode;
769 	inarg.umask = current_umask();
770 
771 	if (fm->fc->handle_killpriv_v2 && trunc &&
772 	    !(flags & O_EXCL) && !capable(CAP_FSETID)) {
773 		inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
774 	}
775 
776 	args.opcode = FUSE_CREATE;
777 	args.nodeid = get_node_id(dir);
778 	args.in_numargs = 2;
779 	args.in_args[0].size = sizeof(inarg);
780 	args.in_args[0].value = &inarg;
781 	args.in_args[1].size = entry->d_name.len + 1;
782 	args.in_args[1].value = entry->d_name.name;
783 	args.out_numargs = 2;
784 	args.out_args[0].size = sizeof(outentry);
785 	args.out_args[0].value = &outentry;
786 	args.out_args[1].size = sizeof(outopen);
787 	args.out_args[1].value = &outopen;
788 
789 	if (fm->fc->init_security) {
790 		err = get_security_context(entry, mode, &security_ctx,
791 					   &security_ctxlen);
792 		if (err)
793 			goto out_put_forget_req;
794 
795 		args.in_numargs = 3;
796 		args.in_args[2].size = security_ctxlen;
797 		args.in_args[2].value = security_ctx;
798 	}
799 
800 	err = fuse_simple_request(fm, &args);
801 	kfree(security_ctx);
802 	if (err)
803 		goto out_free_ff;
804 
805 	err = -EIO;
806 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
807 	    fuse_invalid_attr(&outentry.attr))
808 		goto out_free_ff;
809 
810 	ff->fh = outopen.fh;
811 	ff->nodeid = outentry.nodeid;
812 	ff->open_flags = outopen.open_flags;
813 	fuse_passthrough_setup(fc, ff, &outopen);
814 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
815 			  &outentry.attr, entry_attr_timeout(&outentry), 0);
816 	if (!inode) {
817 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
818 		fuse_sync_release(NULL, ff, flags);
819 		fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
820 		err = -ENOMEM;
821 		goto out_err;
822 	}
823 	kfree(forget);
824 	d_instantiate(entry, inode);
825 	fuse_change_entry_timeout(entry, &outentry);
826 	fuse_dir_changed(dir);
827 	err = finish_open(file, entry, generic_file_open);
828 	if (err) {
829 		fi = get_fuse_inode(inode);
830 		fuse_sync_release(fi, ff, flags);
831 	} else {
832 		file->private_data = ff;
833 		fuse_finish_open(inode, file);
834 		if (fm->fc->atomic_o_trunc && trunc)
835 			truncate_pagecache(inode, 0);
836 		else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
837 			invalidate_inode_pages2(inode->i_mapping);
838 	}
839 	return err;
840 
841 out_free_ff:
842 	fuse_file_free(ff);
843 out_put_forget_req:
844 	kfree(forget);
845 out_err:
846 	return err;
847 }
848 
849 static int fuse_mknod(struct user_namespace *, struct inode *, struct dentry *,
850 		      umode_t, dev_t);
fuse_atomic_open(struct inode * dir,struct dentry * entry,struct file * file,unsigned flags,umode_t mode)851 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
852 			    struct file *file, unsigned flags,
853 			    umode_t mode)
854 {
855 	int err;
856 	struct fuse_conn *fc = get_fuse_conn(dir);
857 	struct dentry *res = NULL;
858 
859 	if (fuse_is_bad(dir))
860 		return -EIO;
861 
862 	if (d_in_lookup(entry)) {
863 		res = fuse_lookup(dir, entry, 0);
864 		if (IS_ERR(res))
865 			return PTR_ERR(res);
866 
867 		if (res)
868 			entry = res;
869 	}
870 
871 	if (!(flags & O_CREAT) || d_really_is_positive(entry))
872 		goto no_open;
873 
874 	/* Only creates */
875 	file->f_mode |= FMODE_CREATED;
876 
877 	if (fc->no_create)
878 		goto mknod;
879 
880 	err = fuse_create_open(dir, entry, file, flags, mode);
881 	if (err == -ENOSYS) {
882 		fc->no_create = 1;
883 		goto mknod;
884 	}
885 out_dput:
886 	dput(res);
887 	return err;
888 
889 mknod:
890 	err = fuse_mknod(&init_user_ns, dir, entry, mode, 0);
891 	if (err)
892 		goto out_dput;
893 no_open:
894 	return finish_no_open(file, res);
895 }
896 
897 /*
898  * Code shared between mknod, mkdir, symlink and link
899  */
create_new_entry(struct fuse_mount * fm,struct fuse_args * args,struct inode * dir,struct dentry * entry,umode_t mode)900 static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args,
901 			    struct inode *dir, struct dentry *entry,
902 			    umode_t mode)
903 {
904 	struct fuse_entry_out outarg;
905 	struct inode *inode;
906 	struct dentry *d;
907 	int err;
908 	struct fuse_forget_link *forget;
909 	void *security_ctx = NULL;
910 	u32 security_ctxlen;
911 	int err_nlink = 0;
912 	bool skip_splice = false;
913 
914 	if (fuse_is_bad(dir))
915 		return -EIO;
916 
917 	forget = fuse_alloc_forget();
918 	if (!forget)
919 		return -ENOMEM;
920 
921 	memset(&outarg, 0, sizeof(outarg));
922 	args->nodeid = get_node_id(dir);
923 	args->out_numargs = 1;
924 	args->out_args[0].size = sizeof(outarg);
925 	args->out_args[0].value = &outarg;
926 
927 	if (fm->fc->init_security && args->opcode != FUSE_LINK) {
928 		bool skip_ctxargset = false;
929 		err = get_security_context(entry, mode, &security_ctx,
930 					   &security_ctxlen);
931 		if (err)
932 			goto out_put_forget_req;
933 
934 		trace_android_vh_tmpfile_secctx(args, security_ctxlen,
935 						security_ctx, &skip_ctxargset);
936 		BUG_ON(args->in_numargs != 2);
937 		if (!skip_ctxargset) {
938 			args->in_numargs = 3;
939 			args->in_args[2].size = security_ctxlen;
940 			args->in_args[2].value = security_ctx;
941 		}
942 	}
943 
944 	err = fuse_simple_request(fm, args);
945 	kfree(security_ctx);
946 	if (err)
947 		goto out_put_forget_req;
948 
949 	err = -EIO;
950 	if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
951 		goto out_put_forget_req;
952 
953 	if ((outarg.attr.mode ^ mode) & S_IFMT)
954 		goto out_put_forget_req;
955 
956 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
957 			  &outarg.attr, entry_attr_timeout(&outarg), 0);
958 	if (!inode) {
959 		fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
960 		return -ENOMEM;
961 	}
962 	trace_android_vh_tmpfile_create_check_inode(args, inode, &err_nlink);
963 	if (err_nlink) {
964 		fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
965 		return err_nlink;
966 	}
967 	kfree(forget);
968 
969 	d_drop(entry);
970 	trace_android_rvh_tmpfile_create(args, &d, entry, inode, &skip_splice);
971 	if (!skip_splice)
972 		d = d_splice_alias(inode, entry);
973 	if (IS_ERR(d))
974 		return PTR_ERR(d);
975 
976 	if (d) {
977 		fuse_change_entry_timeout(d, &outarg);
978 		dput(d);
979 	} else {
980 		fuse_change_entry_timeout(entry, &outarg);
981 	}
982 	fuse_dir_changed(dir);
983 	return 0;
984 
985  out_put_forget_req:
986 	kfree(forget);
987 	return err;
988 }
989 
fuse_mknod(struct user_namespace * mnt_userns,struct inode * dir,struct dentry * entry,umode_t mode,dev_t rdev)990 static int fuse_mknod(struct user_namespace *mnt_userns, struct inode *dir,
991 		      struct dentry *entry, umode_t mode, dev_t rdev)
992 {
993 	struct fuse_mknod_in inarg;
994 	struct fuse_mount *fm = get_fuse_mount(dir);
995 	FUSE_ARGS(args);
996 
997 #ifdef CONFIG_FUSE_BPF
998 	struct fuse_err_ret fer;
999 
1000 	fer = fuse_bpf_backing(dir, struct fuse_mknod_in,
1001 			fuse_mknod_initialize, fuse_mknod_backing,
1002 			fuse_mknod_finalize,
1003 			dir, entry, mode, rdev);
1004 	if (fer.ret)
1005 		return PTR_ERR(fer.result);
1006 #endif
1007 
1008 	if (!fm->fc->dont_mask)
1009 		mode &= ~current_umask();
1010 
1011 	memset(&inarg, 0, sizeof(inarg));
1012 	inarg.mode = mode;
1013 	inarg.rdev = new_encode_dev(rdev);
1014 	inarg.umask = current_umask();
1015 	args.opcode = FUSE_MKNOD;
1016 	args.in_numargs = 2;
1017 	args.in_args[0].size = sizeof(inarg);
1018 	args.in_args[0].value = &inarg;
1019 	args.in_args[1].size = entry->d_name.len + 1;
1020 	args.in_args[1].value = entry->d_name.name;
1021 	return create_new_entry(fm, &args, dir, entry, mode);
1022 }
1023 
fuse_create(struct user_namespace * mnt_userns,struct inode * dir,struct dentry * entry,umode_t mode,bool excl)1024 static int fuse_create(struct user_namespace *mnt_userns, struct inode *dir,
1025 		       struct dentry *entry, umode_t mode, bool excl)
1026 {
1027 	return fuse_mknod(&init_user_ns, dir, entry, mode, 0);
1028 }
1029 
fuse_mkdir(struct user_namespace * mnt_userns,struct inode * dir,struct dentry * entry,umode_t mode)1030 static int fuse_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
1031 		      struct dentry *entry, umode_t mode)
1032 {
1033 	struct fuse_mkdir_in inarg;
1034 	struct fuse_mount *fm = get_fuse_mount(dir);
1035 	FUSE_ARGS(args);
1036 
1037 #ifdef CONFIG_FUSE_BPF
1038 	struct fuse_err_ret fer;
1039 
1040 	fer = fuse_bpf_backing(dir, struct fuse_mkdir_in,
1041 			fuse_mkdir_initialize, fuse_mkdir_backing,
1042 			fuse_mkdir_finalize,
1043 			dir, entry, mode);
1044 	if (fer.ret)
1045 		return PTR_ERR(fer.result);
1046 #endif
1047 
1048 	if (!fm->fc->dont_mask)
1049 		mode &= ~current_umask();
1050 
1051 	memset(&inarg, 0, sizeof(inarg));
1052 	inarg.mode = mode;
1053 	inarg.umask = current_umask();
1054 	args.opcode = FUSE_MKDIR;
1055 	args.in_numargs = 2;
1056 	args.in_args[0].size = sizeof(inarg);
1057 	args.in_args[0].value = &inarg;
1058 	args.in_args[1].size = entry->d_name.len + 1;
1059 	args.in_args[1].value = entry->d_name.name;
1060 	return create_new_entry(fm, &args, dir, entry, S_IFDIR);
1061 }
1062 
fuse_tmpfile(struct user_namespace * mnt_userns,struct inode * dir,struct dentry * entry,umode_t mode)1063 static int fuse_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
1064 			struct dentry *entry, umode_t mode)
1065 {
1066 	int ret = -EOPNOTSUPP;
1067 
1068 	trace_android_rvh_tmpfile_handle_op(dir, entry, mode, &create_new_entry,
1069 					    &ret);
1070 	return ret;
1071 }
1072 
fuse_symlink(struct user_namespace * mnt_userns,struct inode * dir,struct dentry * entry,const char * link)1073 static int fuse_symlink(struct user_namespace *mnt_userns, struct inode *dir,
1074 			struct dentry *entry, const char *link)
1075 {
1076 	struct fuse_mount *fm = get_fuse_mount(dir);
1077 	unsigned len = strlen(link) + 1;
1078 	FUSE_ARGS(args);
1079 
1080 #ifdef CONFIG_FUSE_BPF
1081 	struct fuse_err_ret fer;
1082 
1083 	fer = fuse_bpf_backing(dir, struct fuse_dummy_io,
1084 			fuse_symlink_initialize, fuse_symlink_backing,
1085 			fuse_symlink_finalize,
1086 			dir, entry, link, len);
1087 	if (fer.ret)
1088 		return PTR_ERR(fer.result);
1089 #endif
1090 
1091 	args.opcode = FUSE_SYMLINK;
1092 	args.in_numargs = 2;
1093 	args.in_args[0].size = entry->d_name.len + 1;
1094 	args.in_args[0].value = entry->d_name.name;
1095 	args.in_args[1].size = len;
1096 	args.in_args[1].value = link;
1097 	return create_new_entry(fm, &args, dir, entry, S_IFLNK);
1098 }
1099 
fuse_flush_time_update(struct inode * inode)1100 void fuse_flush_time_update(struct inode *inode)
1101 {
1102 	int err = sync_inode_metadata(inode, 1);
1103 
1104 	mapping_set_error(inode->i_mapping, err);
1105 }
1106 
fuse_update_ctime(struct inode * inode)1107 void fuse_update_ctime(struct inode *inode)
1108 {
1109 	if (!IS_NOCMTIME(inode)) {
1110 		inode->i_ctime = current_time(inode);
1111 		mark_inode_dirty_sync(inode);
1112 		fuse_flush_time_update(inode);
1113 	}
1114 }
1115 
fuse_unlink(struct inode * dir,struct dentry * entry)1116 static int fuse_unlink(struct inode *dir, struct dentry *entry)
1117 {
1118 	int err;
1119 	struct fuse_mount *fm = get_fuse_mount(dir);
1120 	FUSE_ARGS(args);
1121 
1122 	if (fuse_is_bad(dir))
1123 		return -EIO;
1124 
1125 #ifdef CONFIG_FUSE_BPF
1126 	{
1127 		struct fuse_err_ret fer;
1128 
1129 		fer = fuse_bpf_backing(dir, struct fuse_dummy_io,
1130 					fuse_unlink_initialize,
1131 					fuse_unlink_backing,
1132 					fuse_unlink_finalize,
1133 					dir, entry);
1134 		if (fer.ret)
1135 			return PTR_ERR(fer.result);
1136 	}
1137 #endif
1138 
1139 	args.opcode = FUSE_UNLINK;
1140 	args.nodeid = get_node_id(dir);
1141 	args.in_numargs = 1;
1142 	args.in_args[0].size = entry->d_name.len + 1;
1143 	args.in_args[0].value = entry->d_name.name;
1144 	err = fuse_simple_request(fm, &args);
1145 	if (!err) {
1146 		struct inode *inode = d_inode(entry);
1147 		struct fuse_inode *fi = get_fuse_inode(inode);
1148 
1149 		spin_lock(&fi->lock);
1150 		fi->attr_version = atomic64_inc_return(&fm->fc->attr_version);
1151 		/*
1152 		 * If i_nlink == 0 then unlink doesn't make sense, yet this can
1153 		 * happen if userspace filesystem is careless.  It would be
1154 		 * difficult to enforce correct nlink usage so just ignore this
1155 		 * condition here
1156 		 */
1157 		if (inode->i_nlink > 0)
1158 			drop_nlink(inode);
1159 		spin_unlock(&fi->lock);
1160 		fuse_invalidate_attr(inode);
1161 		fuse_dir_changed(dir);
1162 		fuse_invalidate_entry_cache(entry);
1163 		fuse_update_ctime(inode);
1164 	} else if (err == -EINTR)
1165 		fuse_invalidate_entry(entry);
1166 	return err;
1167 }
1168 
fuse_rmdir(struct inode * dir,struct dentry * entry)1169 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
1170 {
1171 	int err;
1172 	struct fuse_mount *fm = get_fuse_mount(dir);
1173 	FUSE_ARGS(args);
1174 
1175 	if (fuse_is_bad(dir))
1176 		return -EIO;
1177 
1178 #ifdef CONFIG_FUSE_BPF
1179 	{
1180 		struct fuse_err_ret fer;
1181 
1182 		fer = fuse_bpf_backing(dir, struct fuse_dummy_io,
1183 					fuse_rmdir_initialize,
1184 					fuse_rmdir_backing,
1185 					fuse_rmdir_finalize,
1186 					dir, entry);
1187 		if (fer.ret)
1188 			return PTR_ERR(fer.result);
1189 	}
1190 #endif
1191 
1192 	args.opcode = FUSE_RMDIR;
1193 	args.nodeid = get_node_id(dir);
1194 	args.in_numargs = 1;
1195 	args.in_args[0].size = entry->d_name.len + 1;
1196 	args.in_args[0].value = entry->d_name.name;
1197 	err = fuse_simple_request(fm, &args);
1198 	if (!err) {
1199 		clear_nlink(d_inode(entry));
1200 		fuse_dir_changed(dir);
1201 		fuse_invalidate_entry_cache(entry);
1202 	} else if (err == -EINTR)
1203 		fuse_invalidate_entry(entry);
1204 	return err;
1205 }
1206 
fuse_rename_common(struct inode * olddir,struct dentry * oldent,struct inode * newdir,struct dentry * newent,unsigned int flags,int opcode,size_t argsize)1207 static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
1208 			      struct inode *newdir, struct dentry *newent,
1209 			      unsigned int flags, int opcode, size_t argsize)
1210 {
1211 	int err;
1212 	struct fuse_rename2_in inarg;
1213 	struct fuse_mount *fm = get_fuse_mount(olddir);
1214 	FUSE_ARGS(args);
1215 
1216 	memset(&inarg, 0, argsize);
1217 	inarg.newdir = get_node_id(newdir);
1218 	inarg.flags = flags;
1219 	args.opcode = opcode;
1220 	args.nodeid = get_node_id(olddir);
1221 	args.in_numargs = 3;
1222 	args.in_args[0].size = argsize;
1223 	args.in_args[0].value = &inarg;
1224 	args.in_args[1].size = oldent->d_name.len + 1;
1225 	args.in_args[1].value = oldent->d_name.name;
1226 	args.in_args[2].size = newent->d_name.len + 1;
1227 	args.in_args[2].value = newent->d_name.name;
1228 	err = fuse_simple_request(fm, &args);
1229 	if (!err) {
1230 		/* ctime changes */
1231 		fuse_invalidate_attr(d_inode(oldent));
1232 		fuse_update_ctime(d_inode(oldent));
1233 
1234 		if (flags & RENAME_EXCHANGE) {
1235 			fuse_invalidate_attr(d_inode(newent));
1236 			fuse_update_ctime(d_inode(newent));
1237 		}
1238 
1239 		fuse_dir_changed(olddir);
1240 		if (olddir != newdir)
1241 			fuse_dir_changed(newdir);
1242 
1243 		/* newent will end up negative */
1244 		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
1245 			fuse_invalidate_attr(d_inode(newent));
1246 			fuse_invalidate_entry_cache(newent);
1247 			fuse_update_ctime(d_inode(newent));
1248 		}
1249 	} else if (err == -EINTR) {
1250 		/* If request was interrupted, DEITY only knows if the
1251 		   rename actually took place.  If the invalidation
1252 		   fails (e.g. some process has CWD under the renamed
1253 		   directory), then there can be inconsistency between
1254 		   the dcache and the real filesystem.  Tough luck. */
1255 		fuse_invalidate_entry(oldent);
1256 		if (d_really_is_positive(newent))
1257 			fuse_invalidate_entry(newent);
1258 	}
1259 
1260 	return err;
1261 }
1262 
fuse_rename2(struct user_namespace * mnt_userns,struct inode * olddir,struct dentry * oldent,struct inode * newdir,struct dentry * newent,unsigned int flags)1263 static int fuse_rename2(struct user_namespace *mnt_userns, struct inode *olddir,
1264 			struct dentry *oldent, struct inode *newdir,
1265 			struct dentry *newent, unsigned int flags)
1266 {
1267 	struct fuse_conn *fc = get_fuse_conn(olddir);
1268 	int err;
1269 
1270 	if (fuse_is_bad(olddir))
1271 		return -EIO;
1272 
1273 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
1274 		return -EINVAL;
1275 
1276 	if (flags) {
1277 #ifdef CONFIG_FUSE_BPF
1278 		struct fuse_err_ret fer;
1279 
1280 		fer = fuse_bpf_backing(olddir, struct fuse_rename2_in,
1281 						fuse_rename2_initialize, fuse_rename2_backing,
1282 						fuse_rename2_finalize,
1283 						olddir, oldent, newdir, newent, flags);
1284 		if (fer.ret)
1285 			return PTR_ERR(fer.result);
1286 #endif
1287 
1288 		/* TODO: how should this go with bpfs involved? */
1289 		if (fc->no_rename2 || fc->minor < 23)
1290 			return -EINVAL;
1291 
1292 		err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
1293 					 FUSE_RENAME2,
1294 					 sizeof(struct fuse_rename2_in));
1295 		if (err == -ENOSYS) {
1296 			fc->no_rename2 = 1;
1297 			err = -EINVAL;
1298 		}
1299 	} else {
1300 #ifdef CONFIG_FUSE_BPF
1301 		struct fuse_err_ret fer;
1302 
1303 		fer = fuse_bpf_backing(olddir, struct fuse_rename_in,
1304 						fuse_rename_initialize, fuse_rename_backing,
1305 						fuse_rename_finalize,
1306 						olddir, oldent, newdir, newent);
1307 		if (fer.ret)
1308 			return PTR_ERR(fer.result);
1309 #endif
1310 
1311 		err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
1312 					 FUSE_RENAME,
1313 					 sizeof(struct fuse_rename_in));
1314 	}
1315 
1316 	return err;
1317 }
1318 
fuse_link(struct dentry * entry,struct inode * newdir,struct dentry * newent)1319 static int fuse_link(struct dentry *entry, struct inode *newdir,
1320 		     struct dentry *newent)
1321 {
1322 	int err;
1323 	struct fuse_link_in inarg;
1324 	struct inode *inode = d_inode(entry);
1325 	struct fuse_mount *fm = get_fuse_mount(inode);
1326 	FUSE_ARGS(args);
1327 
1328 #ifdef CONFIG_FUSE_BPF
1329 	struct fuse_err_ret fer;
1330 
1331 	fer = fuse_bpf_backing(inode, struct fuse_link_in, fuse_link_initialize,
1332 			       fuse_link_backing, fuse_link_finalize, entry,
1333 			       newdir, newent);
1334 	if (fer.ret)
1335 		return PTR_ERR(fer.result);
1336 #endif
1337 
1338 	memset(&inarg, 0, sizeof(inarg));
1339 	inarg.oldnodeid = get_node_id(inode);
1340 	args.opcode = FUSE_LINK;
1341 	args.in_numargs = 2;
1342 	args.in_args[0].size = sizeof(inarg);
1343 	args.in_args[0].value = &inarg;
1344 	args.in_args[1].size = newent->d_name.len + 1;
1345 	args.in_args[1].value = newent->d_name.name;
1346 	err = create_new_entry(fm, &args, newdir, newent, inode->i_mode);
1347 	/* Contrary to "normal" filesystems it can happen that link
1348 	   makes two "logical" inodes point to the same "physical"
1349 	   inode.  We invalidate the attributes of the old one, so it
1350 	   will reflect changes in the backing inode (link count,
1351 	   etc.)
1352 	*/
1353 	if (!err) {
1354 		struct fuse_inode *fi = get_fuse_inode(inode);
1355 
1356 		spin_lock(&fi->lock);
1357 		fi->attr_version = atomic64_inc_return(&fm->fc->attr_version);
1358 		if (likely(inode->i_nlink < UINT_MAX))
1359 			inc_nlink(inode);
1360 		spin_unlock(&fi->lock);
1361 		fuse_invalidate_attr(inode);
1362 		fuse_update_ctime(inode);
1363 	} else if (err == -EINTR) {
1364 		fuse_invalidate_attr(inode);
1365 	}
1366 	return err;
1367 }
1368 
fuse_fillattr(struct inode * inode,struct fuse_attr * attr,struct kstat * stat)1369 void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
1370 			  struct kstat *stat)
1371 {
1372 	unsigned int blkbits;
1373 	struct fuse_conn *fc = get_fuse_conn(inode);
1374 
1375 	/* see the comment in fuse_change_attributes() */
1376 	if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
1377 		attr->size = i_size_read(inode);
1378 		attr->mtime = inode->i_mtime.tv_sec;
1379 		attr->mtimensec = inode->i_mtime.tv_nsec;
1380 		attr->ctime = inode->i_ctime.tv_sec;
1381 		attr->ctimensec = inode->i_ctime.tv_nsec;
1382 	}
1383 
1384 	stat->dev = inode->i_sb->s_dev;
1385 	stat->ino = attr->ino;
1386 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
1387 	stat->nlink = attr->nlink;
1388 	stat->uid = make_kuid(fc->user_ns, attr->uid);
1389 	stat->gid = make_kgid(fc->user_ns, attr->gid);
1390 	stat->rdev = inode->i_rdev;
1391 	stat->atime.tv_sec = attr->atime;
1392 	stat->atime.tv_nsec = attr->atimensec;
1393 	stat->mtime.tv_sec = attr->mtime;
1394 	stat->mtime.tv_nsec = attr->mtimensec;
1395 	stat->ctime.tv_sec = attr->ctime;
1396 	stat->ctime.tv_nsec = attr->ctimensec;
1397 	stat->size = attr->size;
1398 	stat->blocks = attr->blocks;
1399 
1400 	if (attr->blksize != 0)
1401 		blkbits = ilog2(attr->blksize);
1402 	else
1403 		blkbits = inode->i_sb->s_blocksize_bits;
1404 
1405 	stat->blksize = 1 << blkbits;
1406 }
1407 
fuse_do_getattr(struct inode * inode,struct kstat * stat,struct file * file)1408 static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
1409 			   struct file *file)
1410 {
1411 	int err;
1412 	struct fuse_getattr_in inarg;
1413 	struct fuse_attr_out outarg;
1414 	struct fuse_mount *fm = get_fuse_mount(inode);
1415 	FUSE_ARGS(args);
1416 	u64 attr_version;
1417 
1418 	attr_version = fuse_get_attr_version(fm->fc);
1419 
1420 	memset(&inarg, 0, sizeof(inarg));
1421 	memset(&outarg, 0, sizeof(outarg));
1422 	/* Directories have separate file-handle space */
1423 	if (file && S_ISREG(inode->i_mode)) {
1424 		struct fuse_file *ff = file->private_data;
1425 
1426 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1427 		inarg.fh = ff->fh;
1428 	}
1429 	args.opcode = FUSE_GETATTR;
1430 	args.nodeid = get_node_id(inode);
1431 	args.in_numargs = 1;
1432 	args.in_args[0].size = sizeof(inarg);
1433 	args.in_args[0].value = &inarg;
1434 	args.out_numargs = 1;
1435 	args.out_args[0].size = sizeof(outarg);
1436 	args.out_args[0].value = &outarg;
1437 	err = fuse_simple_request(fm, &args);
1438 	if (!err)
1439 		err = finalize_attr(inode, &outarg, attr_version, stat);
1440 	return err;
1441 }
1442 
fuse_update_get_attr(struct inode * inode,struct file * file,const struct path * path,struct kstat * stat,u32 request_mask,unsigned int flags)1443 static int fuse_update_get_attr(struct inode *inode, struct file *file,
1444 				const struct path *path,
1445 				struct kstat *stat, u32 request_mask,
1446 				unsigned int flags)
1447 {
1448 	struct fuse_inode *fi = get_fuse_inode(inode);
1449 	int err = 0;
1450 	bool sync;
1451 
1452 #ifdef CONFIG_FUSE_BPF
1453 	struct fuse_err_ret fer;
1454 
1455 	fer = fuse_bpf_backing(inode, struct fuse_getattr_io,
1456 			       fuse_getattr_initialize,	fuse_getattr_backing,
1457 			       fuse_getattr_finalize,
1458 			       path->dentry, stat, request_mask, flags);
1459 	if (fer.ret)
1460 		return PTR_ERR(fer.result);
1461 #endif
1462 
1463 	if (flags & AT_STATX_FORCE_SYNC)
1464 		sync = true;
1465 	else if (flags & AT_STATX_DONT_SYNC)
1466 		sync = false;
1467 	else if (request_mask & READ_ONCE(fi->inval_mask))
1468 		sync = true;
1469 	else
1470 		sync = time_before64(fi->i_time, get_jiffies_64());
1471 
1472 	if (sync) {
1473 		forget_all_cached_acls(inode);
1474 		err = fuse_do_getattr(inode, stat, file);
1475 	} else if (stat) {
1476 		generic_fillattr(&init_user_ns, inode, stat);
1477 		stat->mode = fi->orig_i_mode;
1478 		stat->ino = fi->orig_ino;
1479 	}
1480 
1481 	return err;
1482 }
1483 
fuse_update_attributes(struct inode * inode,struct file * file)1484 int fuse_update_attributes(struct inode *inode, struct file *file)
1485 {
1486 	/* Do *not* need to get atime for internal purposes */
1487 	return fuse_update_get_attr(inode, file, &file->f_path, NULL,
1488 				    STATX_BASIC_STATS & ~STATX_ATIME, 0);
1489 }
1490 
fuse_reverse_inval_entry(struct fuse_conn * fc,u64 parent_nodeid,u64 child_nodeid,struct qstr * name)1491 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
1492 			     u64 child_nodeid, struct qstr *name)
1493 {
1494 	int err = -ENOTDIR;
1495 	struct inode *parent;
1496 	struct dentry *dir;
1497 	struct dentry *entry;
1498 
1499 	parent = fuse_ilookup(fc, parent_nodeid, NULL);
1500 	if (!parent)
1501 		return -ENOENT;
1502 
1503 	inode_lock_nested(parent, I_MUTEX_PARENT);
1504 	if (!S_ISDIR(parent->i_mode))
1505 		goto unlock;
1506 
1507 	err = -ENOENT;
1508 	dir = d_find_alias(parent);
1509 	if (!dir)
1510 		goto unlock;
1511 
1512 	name->hash = full_name_hash(dir, name->name, name->len);
1513 	entry = d_lookup(dir, name);
1514 	dput(dir);
1515 	if (!entry)
1516 		goto unlock;
1517 
1518 	fuse_dir_changed(parent);
1519 	fuse_invalidate_entry(entry);
1520 
1521 	if (child_nodeid != 0 && d_really_is_positive(entry)) {
1522 		inode_lock(d_inode(entry));
1523 		if (get_node_id(d_inode(entry)) != child_nodeid) {
1524 			err = -ENOENT;
1525 			goto badentry;
1526 		}
1527 		if (d_mountpoint(entry)) {
1528 			err = -EBUSY;
1529 			goto badentry;
1530 		}
1531 		if (d_is_dir(entry)) {
1532 			shrink_dcache_parent(entry);
1533 			if (!simple_empty(entry)) {
1534 				err = -ENOTEMPTY;
1535 				goto badentry;
1536 			}
1537 			d_inode(entry)->i_flags |= S_DEAD;
1538 		}
1539 		dont_mount(entry);
1540 		clear_nlink(d_inode(entry));
1541 		err = 0;
1542  badentry:
1543 		inode_unlock(d_inode(entry));
1544 		if (!err)
1545 			d_delete(entry);
1546 	} else {
1547 		err = 0;
1548 	}
1549 	dput(entry);
1550 
1551  unlock:
1552 	inode_unlock(parent);
1553 	iput(parent);
1554 	return err;
1555 }
1556 
1557 /*
1558  * Calling into a user-controlled filesystem gives the filesystem
1559  * daemon ptrace-like capabilities over the current process.  This
1560  * means, that the filesystem daemon is able to record the exact
1561  * filesystem operations performed, and can also control the behavior
1562  * of the requester process in otherwise impossible ways.  For example
1563  * it can delay the operation for arbitrary length of time allowing
1564  * DoS against the requester.
1565  *
1566  * For this reason only those processes can call into the filesystem,
1567  * for which the owner of the mount has ptrace privilege.  This
1568  * excludes processes started by other users, suid or sgid processes.
1569  */
fuse_allow_current_process(struct fuse_conn * fc)1570 int fuse_allow_current_process(struct fuse_conn *fc)
1571 {
1572 	const struct cred *cred;
1573 
1574 	if (fc->allow_other)
1575 		return current_in_userns(fc->user_ns);
1576 
1577 	cred = current_cred();
1578 	if (uid_eq(cred->euid, fc->user_id) &&
1579 	    uid_eq(cred->suid, fc->user_id) &&
1580 	    uid_eq(cred->uid,  fc->user_id) &&
1581 	    gid_eq(cred->egid, fc->group_id) &&
1582 	    gid_eq(cred->sgid, fc->group_id) &&
1583 	    gid_eq(cred->gid,  fc->group_id))
1584 		return 1;
1585 
1586 	return 0;
1587 }
1588 
fuse_access(struct inode * inode,int mask)1589 static int fuse_access(struct inode *inode, int mask)
1590 {
1591 	struct fuse_mount *fm = get_fuse_mount(inode);
1592 	FUSE_ARGS(args);
1593 	struct fuse_access_in inarg;
1594 	int err;
1595 
1596 #ifdef CONFIG_FUSE_BPF
1597 	struct fuse_err_ret fer;
1598 
1599 	fer = fuse_bpf_backing(inode, struct fuse_access_in,
1600 			       fuse_access_initialize, fuse_access_backing,
1601 			       fuse_access_finalize, inode, mask);
1602 	if (fer.ret)
1603 		return PTR_ERR(fer.result);
1604 #endif
1605 
1606 	BUG_ON(mask & MAY_NOT_BLOCK);
1607 
1608 	if (fm->fc->no_access)
1609 		return 0;
1610 
1611 	memset(&inarg, 0, sizeof(inarg));
1612 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1613 	args.opcode = FUSE_ACCESS;
1614 	args.nodeid = get_node_id(inode);
1615 	args.in_numargs = 1;
1616 	args.in_args[0].size = sizeof(inarg);
1617 	args.in_args[0].value = &inarg;
1618 	err = fuse_simple_request(fm, &args);
1619 	if (err == -ENOSYS) {
1620 		fm->fc->no_access = 1;
1621 		err = 0;
1622 	}
1623 	return err;
1624 }
1625 
fuse_perm_getattr(struct inode * inode,int mask)1626 static int fuse_perm_getattr(struct inode *inode, int mask)
1627 {
1628 	if (mask & MAY_NOT_BLOCK)
1629 		return -ECHILD;
1630 
1631 	forget_all_cached_acls(inode);
1632 	return fuse_do_getattr(inode, NULL, NULL);
1633 }
1634 
1635 /*
1636  * Check permission.  The two basic access models of FUSE are:
1637  *
1638  * 1) Local access checking ('default_permissions' mount option) based
1639  * on file mode.  This is the plain old disk filesystem permission
1640  * modell.
1641  *
1642  * 2) "Remote" access checking, where server is responsible for
1643  * checking permission in each inode operation.  An exception to this
1644  * is if ->permission() was invoked from sys_access() in which case an
1645  * access request is sent.  Execute permission is still checked
1646  * locally based on file mode.
1647  */
fuse_permission(struct user_namespace * mnt_userns,struct inode * inode,int mask)1648 static int fuse_permission(struct user_namespace *mnt_userns,
1649 			   struct inode *inode, int mask)
1650 {
1651 	struct fuse_conn *fc = get_fuse_conn(inode);
1652 	bool refreshed = false;
1653 	int err = 0;
1654 	struct fuse_inode *fi = get_fuse_inode(inode);
1655 #ifdef CONFIG_FUSE_BPF
1656 	struct fuse_err_ret fer;
1657 #endif
1658 
1659 	if (fuse_is_bad(inode))
1660 		return -EIO;
1661 
1662 	if (!fuse_allow_current_process(fc))
1663 		return -EACCES;
1664 
1665 #ifdef CONFIG_FUSE_BPF
1666 	fer = fuse_bpf_backing(inode, struct fuse_access_in,
1667 			       fuse_access_initialize, fuse_access_backing,
1668 			       fuse_access_finalize, inode, mask);
1669 	if (fer.ret)
1670 		return PTR_ERR(fer.result);
1671 #endif
1672 
1673 	/*
1674 	 * If attributes are needed, refresh them before proceeding
1675 	 */
1676 	if (fc->default_permissions ||
1677 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1678 		u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1679 
1680 		if (perm_mask & READ_ONCE(fi->inval_mask) ||
1681 		    time_before64(fi->i_time, get_jiffies_64())) {
1682 			refreshed = true;
1683 
1684 			err = fuse_perm_getattr(inode, mask);
1685 			if (err)
1686 				return err;
1687 		}
1688 	}
1689 
1690 	if (fc->default_permissions) {
1691 		err = generic_permission(&init_user_ns, inode, mask);
1692 
1693 		/* If permission is denied, try to refresh file
1694 		   attributes.  This is also needed, because the root
1695 		   node will at first have no permissions */
1696 		if (err == -EACCES && !refreshed) {
1697 			err = fuse_perm_getattr(inode, mask);
1698 			if (!err)
1699 				err = generic_permission(&init_user_ns,
1700 							 inode, mask);
1701 		}
1702 
1703 		/* Note: the opposite of the above test does not
1704 		   exist.  So if permissions are revoked this won't be
1705 		   noticed immediately, only after the attribute
1706 		   timeout has expired */
1707 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1708 		err = fuse_access(inode, mask);
1709 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1710 		if (!(inode->i_mode & S_IXUGO)) {
1711 			if (refreshed)
1712 				return -EACCES;
1713 
1714 			err = fuse_perm_getattr(inode, mask);
1715 			if (!err && !(inode->i_mode & S_IXUGO))
1716 				return -EACCES;
1717 		}
1718 	}
1719 	return err;
1720 }
1721 
fuse_readlink_page(struct inode * inode,struct page * page)1722 static int fuse_readlink_page(struct inode *inode, struct page *page)
1723 {
1724 	struct fuse_mount *fm = get_fuse_mount(inode);
1725 	struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
1726 	struct fuse_args_pages ap = {
1727 		.num_pages = 1,
1728 		.pages = &page,
1729 		.descs = &desc,
1730 	};
1731 	char *link;
1732 	ssize_t res;
1733 
1734 	ap.args.opcode = FUSE_READLINK;
1735 	ap.args.nodeid = get_node_id(inode);
1736 	ap.args.out_pages = true;
1737 	ap.args.out_argvar = true;
1738 	ap.args.page_zeroing = true;
1739 	ap.args.out_numargs = 1;
1740 	ap.args.out_args[0].size = desc.length;
1741 	res = fuse_simple_request(fm, &ap.args);
1742 
1743 	fuse_invalidate_atime(inode);
1744 
1745 	if (res < 0)
1746 		return res;
1747 
1748 	if (WARN_ON(res >= PAGE_SIZE))
1749 		return -EIO;
1750 
1751 	link = page_address(page);
1752 	link[res] = '\0';
1753 
1754 	return 0;
1755 }
1756 
fuse_get_link(struct dentry * dentry,struct inode * inode,struct delayed_call * callback)1757 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
1758 				 struct delayed_call *callback)
1759 {
1760 	struct fuse_conn *fc = get_fuse_conn(inode);
1761 	struct page *page;
1762 	int err;
1763 
1764 	err = -EIO;
1765 	if (fuse_is_bad(inode))
1766 		goto out_err;
1767 
1768 #ifdef CONFIG_FUSE_BPF
1769 	{
1770 		struct fuse_err_ret fer;
1771 		const char *out = NULL;
1772 
1773 		fer = fuse_bpf_backing(inode, struct fuse_dummy_io,
1774 				       fuse_get_link_initialize,
1775 				       fuse_get_link_backing,
1776 				       fuse_get_link_finalize,
1777 				       inode, dentry, callback, &out);
1778 		if (fer.ret)
1779 			return fer.result ?: out;
1780 	}
1781 #endif
1782 
1783 	if (fc->cache_symlinks)
1784 		return page_get_link(dentry, inode, callback);
1785 
1786 	err = -ECHILD;
1787 	if (!dentry)
1788 		goto out_err;
1789 
1790 	page = alloc_page(GFP_KERNEL);
1791 	err = -ENOMEM;
1792 	if (!page)
1793 		goto out_err;
1794 
1795 	err = fuse_readlink_page(inode, page);
1796 	if (err) {
1797 		__free_page(page);
1798 		goto out_err;
1799 	}
1800 
1801 	set_delayed_call(callback, page_put_link, page);
1802 
1803 	return page_address(page);
1804 
1805 out_err:
1806 	return ERR_PTR(err);
1807 }
1808 
fuse_dir_open(struct inode * inode,struct file * file)1809 static int fuse_dir_open(struct inode *inode, struct file *file)
1810 {
1811 	return fuse_open_common(inode, file, true);
1812 }
1813 
fuse_dir_release(struct inode * inode,struct file * file)1814 static int fuse_dir_release(struct inode *inode, struct file *file)
1815 {
1816 	fuse_release_common(file, true);
1817 	return 0;
1818 }
1819 
fuse_dir_fsync(struct file * file,loff_t start,loff_t end,int datasync)1820 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1821 			  int datasync)
1822 {
1823 	struct inode *inode = file->f_mapping->host;
1824 	struct fuse_conn *fc = get_fuse_conn(inode);
1825 	int err;
1826 
1827 	if (fuse_is_bad(inode))
1828 		return -EIO;
1829 
1830 #ifdef CONFIG_FUSE_BPF
1831 	{
1832 		struct fuse_err_ret fer;
1833 
1834 		fer = fuse_bpf_backing(inode, struct fuse_fsync_in,
1835 				fuse_dir_fsync_initialize, fuse_fsync_backing,
1836 				fuse_fsync_finalize,
1837 				file, start, end, datasync);
1838 		if (fer.ret)
1839 			return PTR_ERR(fer.result);
1840 	}
1841 #endif
1842 
1843 	if (fc->no_fsyncdir)
1844 		return 0;
1845 
1846 	inode_lock(inode);
1847 	err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
1848 	if (err == -ENOSYS) {
1849 		fc->no_fsyncdir = 1;
1850 		err = 0;
1851 	}
1852 	inode_unlock(inode);
1853 
1854 	return err;
1855 }
1856 
fuse_dir_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1857 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1858 			    unsigned long arg)
1859 {
1860 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1861 
1862 	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1863 	if (fc->minor < 18)
1864 		return -ENOTTY;
1865 
1866 	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1867 }
1868 
fuse_dir_compat_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1869 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1870 				   unsigned long arg)
1871 {
1872 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1873 
1874 	if (fc->minor < 18)
1875 		return -ENOTTY;
1876 
1877 	return fuse_ioctl_common(file, cmd, arg,
1878 				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1879 }
1880 
1881 /*
1882  * Prevent concurrent writepages on inode
1883  *
1884  * This is done by adding a negative bias to the inode write counter
1885  * and waiting for all pending writes to finish.
1886  */
fuse_set_nowrite(struct inode * inode)1887 void fuse_set_nowrite(struct inode *inode)
1888 {
1889 	struct fuse_inode *fi = get_fuse_inode(inode);
1890 
1891 	BUG_ON(!inode_is_locked(inode));
1892 
1893 	spin_lock(&fi->lock);
1894 	BUG_ON(fi->writectr < 0);
1895 	fi->writectr += FUSE_NOWRITE;
1896 	spin_unlock(&fi->lock);
1897 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1898 }
1899 
1900 /*
1901  * Allow writepages on inode
1902  *
1903  * Remove the bias from the writecounter and send any queued
1904  * writepages.
1905  */
__fuse_release_nowrite(struct inode * inode)1906 static void __fuse_release_nowrite(struct inode *inode)
1907 {
1908 	struct fuse_inode *fi = get_fuse_inode(inode);
1909 
1910 	BUG_ON(fi->writectr != FUSE_NOWRITE);
1911 	fi->writectr = 0;
1912 	fuse_flush_writepages(inode);
1913 }
1914 
fuse_release_nowrite(struct inode * inode)1915 void fuse_release_nowrite(struct inode *inode)
1916 {
1917 	struct fuse_inode *fi = get_fuse_inode(inode);
1918 
1919 	spin_lock(&fi->lock);
1920 	__fuse_release_nowrite(inode);
1921 	spin_unlock(&fi->lock);
1922 }
1923 
fuse_setattr_fill(struct fuse_conn * fc,struct fuse_args * args,struct inode * inode,struct fuse_setattr_in * inarg_p,struct fuse_attr_out * outarg_p)1924 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1925 			      struct inode *inode,
1926 			      struct fuse_setattr_in *inarg_p,
1927 			      struct fuse_attr_out *outarg_p)
1928 {
1929 	args->opcode = FUSE_SETATTR;
1930 	args->nodeid = get_node_id(inode);
1931 	args->in_numargs = 1;
1932 	args->in_args[0].size = sizeof(*inarg_p);
1933 	args->in_args[0].value = inarg_p;
1934 	args->out_numargs = 1;
1935 	args->out_args[0].size = sizeof(*outarg_p);
1936 	args->out_args[0].value = outarg_p;
1937 }
1938 
1939 /*
1940  * Flush inode->i_mtime to the server
1941  */
fuse_flush_times(struct inode * inode,struct fuse_file * ff)1942 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1943 {
1944 	struct fuse_mount *fm = get_fuse_mount(inode);
1945 	FUSE_ARGS(args);
1946 	struct fuse_setattr_in inarg;
1947 	struct fuse_attr_out outarg;
1948 
1949 	memset(&inarg, 0, sizeof(inarg));
1950 	memset(&outarg, 0, sizeof(outarg));
1951 
1952 	inarg.valid = FATTR_MTIME;
1953 	inarg.mtime = inode->i_mtime.tv_sec;
1954 	inarg.mtimensec = inode->i_mtime.tv_nsec;
1955 	if (fm->fc->minor >= 23) {
1956 		inarg.valid |= FATTR_CTIME;
1957 		inarg.ctime = inode->i_ctime.tv_sec;
1958 		inarg.ctimensec = inode->i_ctime.tv_nsec;
1959 	}
1960 	if (ff) {
1961 		inarg.valid |= FATTR_FH;
1962 		inarg.fh = ff->fh;
1963 	}
1964 	fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
1965 
1966 	return fuse_simple_request(fm, &args);
1967 }
1968 
1969 /*
1970  * Set attributes, and at the same time refresh them.
1971  *
1972  * Truncation is slightly complicated, because the 'truncate' request
1973  * may fail, in which case we don't want to touch the mapping.
1974  * vmtruncate() doesn't allow for this case, so do the rlimit checking
1975  * and the actual truncation by hand.
1976  */
fuse_do_setattr(struct dentry * dentry,struct iattr * attr,struct file * file)1977 int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
1978 		    struct file *file)
1979 {
1980 	struct inode *inode = d_inode(dentry);
1981 	struct fuse_mount *fm = get_fuse_mount(inode);
1982 	struct fuse_conn *fc = fm->fc;
1983 	struct fuse_inode *fi = get_fuse_inode(inode);
1984 	struct address_space *mapping = inode->i_mapping;
1985 	FUSE_ARGS(args);
1986 	struct fuse_setattr_in inarg;
1987 	struct fuse_attr_out outarg;
1988 	bool is_truncate = false;
1989 	bool is_wb = fc->writeback_cache;
1990 	loff_t oldsize;
1991 	int err;
1992 	bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
1993 	bool fault_blocked = false;
1994 
1995 #ifdef CONFIG_FUSE_BPF
1996 	struct fuse_err_ret fer;
1997 
1998 	fer = fuse_bpf_backing(inode, struct fuse_setattr_io,
1999 			       fuse_setattr_initialize, fuse_setattr_backing,
2000 			       fuse_setattr_finalize, dentry, attr, file);
2001 	if (fer.ret)
2002 		return PTR_ERR(fer.result);
2003 #endif
2004 
2005 	if (!fc->default_permissions)
2006 		attr->ia_valid |= ATTR_FORCE;
2007 
2008 	err = setattr_prepare(&init_user_ns, dentry, attr);
2009 	if (err)
2010 		return err;
2011 
2012 	if (attr->ia_valid & ATTR_SIZE) {
2013 		if (WARN_ON(!S_ISREG(inode->i_mode)))
2014 			return -EIO;
2015 		is_truncate = true;
2016 	}
2017 
2018 	if (FUSE_IS_DAX(inode) && is_truncate) {
2019 		filemap_invalidate_lock(mapping);
2020 		fault_blocked = true;
2021 		err = fuse_dax_break_layouts(inode, 0, 0);
2022 		if (err) {
2023 			filemap_invalidate_unlock(mapping);
2024 			return err;
2025 		}
2026 	}
2027 
2028 	if (attr->ia_valid & ATTR_OPEN) {
2029 		/* This is coming from open(..., ... | O_TRUNC); */
2030 		WARN_ON(!(attr->ia_valid & ATTR_SIZE));
2031 		WARN_ON(attr->ia_size != 0);
2032 		if (fc->atomic_o_trunc) {
2033 			/*
2034 			 * No need to send request to userspace, since actual
2035 			 * truncation has already been done by OPEN.  But still
2036 			 * need to truncate page cache.
2037 			 */
2038 			i_size_write(inode, 0);
2039 			truncate_pagecache(inode, 0);
2040 			goto out;
2041 		}
2042 		file = NULL;
2043 	}
2044 
2045 	/* Flush dirty data/metadata before non-truncate SETATTR */
2046 	if (is_wb && S_ISREG(inode->i_mode) &&
2047 	    attr->ia_valid &
2048 			(ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
2049 			 ATTR_TIMES_SET)) {
2050 		err = write_inode_now(inode, true);
2051 		if (err)
2052 			return err;
2053 
2054 		fuse_set_nowrite(inode);
2055 		fuse_release_nowrite(inode);
2056 	}
2057 
2058 	if (is_truncate) {
2059 		fuse_set_nowrite(inode);
2060 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2061 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
2062 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
2063 	}
2064 
2065 	memset(&inarg, 0, sizeof(inarg));
2066 	memset(&outarg, 0, sizeof(outarg));
2067 	iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
2068 	if (file) {
2069 		struct fuse_file *ff = file->private_data;
2070 		inarg.valid |= FATTR_FH;
2071 		inarg.fh = ff->fh;
2072 	}
2073 
2074 	/* Kill suid/sgid for non-directory chown unconditionally */
2075 	if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) &&
2076 	    attr->ia_valid & (ATTR_UID | ATTR_GID))
2077 		inarg.valid |= FATTR_KILL_SUIDGID;
2078 
2079 	if (attr->ia_valid & ATTR_SIZE) {
2080 		/* For mandatory locking in truncate */
2081 		inarg.valid |= FATTR_LOCKOWNER;
2082 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
2083 
2084 		/* Kill suid/sgid for truncate only if no CAP_FSETID */
2085 		if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
2086 			inarg.valid |= FATTR_KILL_SUIDGID;
2087 	}
2088 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
2089 	err = fuse_simple_request(fm, &args);
2090 	if (err) {
2091 		if (err == -EINTR)
2092 			fuse_invalidate_attr(inode);
2093 		goto error;
2094 	}
2095 
2096 	if (fuse_invalid_attr(&outarg.attr) ||
2097 	    inode_wrong_type(inode, outarg.attr.mode)) {
2098 		fuse_make_bad(inode);
2099 		err = -EIO;
2100 		goto error;
2101 	}
2102 
2103 	spin_lock(&fi->lock);
2104 	/* the kernel maintains i_mtime locally */
2105 	if (trust_local_cmtime) {
2106 		if (attr->ia_valid & ATTR_MTIME)
2107 			inode->i_mtime = attr->ia_mtime;
2108 		if (attr->ia_valid & ATTR_CTIME)
2109 			inode->i_ctime = attr->ia_ctime;
2110 		/* FIXME: clear I_DIRTY_SYNC? */
2111 	}
2112 
2113 	fuse_change_attributes_common(inode, &outarg.attr,
2114 				      attr_timeout(&outarg));
2115 	oldsize = inode->i_size;
2116 	/* see the comment in fuse_change_attributes() */
2117 	if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
2118 		i_size_write(inode, outarg.attr.size);
2119 
2120 	if (is_truncate) {
2121 		/* NOTE: this may release/reacquire fi->lock */
2122 		__fuse_release_nowrite(inode);
2123 	}
2124 	spin_unlock(&fi->lock);
2125 
2126 	/*
2127 	 * Only call invalidate_inode_pages2() after removing
2128 	 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
2129 	 */
2130 	if ((is_truncate || !is_wb) &&
2131 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
2132 		truncate_pagecache(inode, outarg.attr.size);
2133 		invalidate_inode_pages2(mapping);
2134 	}
2135 
2136 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2137 out:
2138 	if (fault_blocked)
2139 		filemap_invalidate_unlock(mapping);
2140 
2141 	return 0;
2142 
2143 error:
2144 	if (is_truncate)
2145 		fuse_release_nowrite(inode);
2146 
2147 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2148 
2149 	if (fault_blocked)
2150 		filemap_invalidate_unlock(mapping);
2151 	return err;
2152 }
2153 
fuse_setattr(struct user_namespace * mnt_userns,struct dentry * entry,struct iattr * attr)2154 static int fuse_setattr(struct user_namespace *mnt_userns, struct dentry *entry,
2155 			struct iattr *attr)
2156 {
2157 	struct inode *inode = d_inode(entry);
2158 	struct fuse_conn *fc = get_fuse_conn(inode);
2159 	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
2160 	int ret;
2161 
2162 	if (fuse_is_bad(inode))
2163 		return -EIO;
2164 
2165 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
2166 		return -EACCES;
2167 
2168 	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
2169 		attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
2170 				    ATTR_MODE);
2171 
2172 		/*
2173 		 * The only sane way to reliably kill suid/sgid is to do it in
2174 		 * the userspace filesystem
2175 		 *
2176 		 * This should be done on write(), truncate() and chown().
2177 		 */
2178 		if (!fc->handle_killpriv && !fc->handle_killpriv_v2) {
2179 #ifdef CONFIG_FUSE_BPF
2180 			struct fuse_err_ret fer;
2181 
2182 			/*
2183 			 * ia_mode calculation may have used stale i_mode.
2184 			 * Refresh and recalculate.
2185 			 */
2186 			fer = fuse_bpf_backing(inode, struct fuse_getattr_io,
2187 					       fuse_getattr_initialize,	fuse_getattr_backing,
2188 					       fuse_getattr_finalize,
2189 					       entry, NULL, 0, 0);
2190 			if (fer.ret)
2191 				ret = PTR_ERR(fer.result);
2192 			else
2193 #endif
2194 				ret = fuse_do_getattr(inode, NULL, file);
2195 			if (ret)
2196 				return ret;
2197 
2198 			attr->ia_mode = inode->i_mode;
2199 			if (inode->i_mode & S_ISUID) {
2200 				attr->ia_valid |= ATTR_MODE;
2201 				attr->ia_mode &= ~S_ISUID;
2202 			}
2203 			if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
2204 				attr->ia_valid |= ATTR_MODE;
2205 				attr->ia_mode &= ~S_ISGID;
2206 			}
2207 		}
2208 	}
2209 	if (!attr->ia_valid)
2210 		return 0;
2211 
2212 	ret = fuse_do_setattr(entry, attr, file);
2213 	if (!ret) {
2214 		/*
2215 		 * If filesystem supports acls it may have updated acl xattrs in
2216 		 * the filesystem, so forget cached acls for the inode.
2217 		 */
2218 		if (fc->posix_acl)
2219 			forget_all_cached_acls(inode);
2220 
2221 		/* Directory mode changed, may need to revalidate access */
2222 		if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
2223 			fuse_invalidate_entry_cache(entry);
2224 	}
2225 	return ret;
2226 }
2227 
fuse_getattr(struct user_namespace * mnt_userns,const struct path * path,struct kstat * stat,u32 request_mask,unsigned int flags)2228 static int fuse_getattr(struct user_namespace *mnt_userns,
2229 			const struct path *path, struct kstat *stat,
2230 			u32 request_mask, unsigned int flags)
2231 {
2232 	struct inode *inode = d_inode(path->dentry);
2233 	struct fuse_conn *fc = get_fuse_conn(inode);
2234 
2235 	if (fuse_is_bad(inode))
2236 		return -EIO;
2237 
2238 	if (!fuse_allow_current_process(fc)) {
2239 		if (!request_mask) {
2240 			/*
2241 			 * If user explicitly requested *nothing* then don't
2242 			 * error out, but return st_dev only.
2243 			 */
2244 			stat->result_mask = 0;
2245 			stat->dev = inode->i_sb->s_dev;
2246 			return 0;
2247 		}
2248 		return -EACCES;
2249 	}
2250 
2251 	return fuse_update_get_attr(inode, NULL, path, stat, request_mask,
2252 				    flags);
2253 }
2254 
2255 static const struct inode_operations fuse_dir_inode_operations = {
2256 	.lookup		= fuse_lookup,
2257 	.mkdir		= fuse_mkdir,
2258 	.symlink	= fuse_symlink,
2259 	.unlink		= fuse_unlink,
2260 	.rmdir		= fuse_rmdir,
2261 	.rename		= fuse_rename2,
2262 	.link		= fuse_link,
2263 	.setattr	= fuse_setattr,
2264 	.create		= fuse_create,
2265 	.atomic_open	= fuse_atomic_open,
2266 	.tmpfile        = fuse_tmpfile,
2267 	.mknod		= fuse_mknod,
2268 	.permission	= fuse_permission,
2269 	.getattr	= fuse_getattr,
2270 	.listxattr	= fuse_listxattr,
2271 	.get_acl	= fuse_get_acl,
2272 	.set_acl	= fuse_set_acl,
2273 	.fileattr_get	= fuse_fileattr_get,
2274 	.fileattr_set	= fuse_fileattr_set,
2275 };
2276 
2277 static const struct file_operations fuse_dir_operations = {
2278 	.llseek		= generic_file_llseek,
2279 	.read		= generic_read_dir,
2280 	.iterate_shared	= fuse_readdir,
2281 	.open		= fuse_dir_open,
2282 	.release	= fuse_dir_release,
2283 	.fsync		= fuse_dir_fsync,
2284 	.unlocked_ioctl	= fuse_dir_ioctl,
2285 	.compat_ioctl	= fuse_dir_compat_ioctl,
2286 };
2287 
2288 static const struct inode_operations fuse_common_inode_operations = {
2289 	.setattr	= fuse_setattr,
2290 	.permission	= fuse_permission,
2291 	.getattr	= fuse_getattr,
2292 	.listxattr	= fuse_listxattr,
2293 	.get_acl	= fuse_get_acl,
2294 	.set_acl	= fuse_set_acl,
2295 	.fileattr_get	= fuse_fileattr_get,
2296 	.fileattr_set	= fuse_fileattr_set,
2297 };
2298 
2299 static const struct inode_operations fuse_symlink_inode_operations = {
2300 	.setattr	= fuse_setattr,
2301 	.get_link	= fuse_get_link,
2302 	.getattr	= fuse_getattr,
2303 	.listxattr	= fuse_listxattr,
2304 };
2305 
fuse_init_common(struct inode * inode)2306 void fuse_init_common(struct inode *inode)
2307 {
2308 	inode->i_op = &fuse_common_inode_operations;
2309 }
2310 
fuse_init_dir(struct inode * inode)2311 void fuse_init_dir(struct inode *inode)
2312 {
2313 	struct fuse_inode *fi = get_fuse_inode(inode);
2314 
2315 	inode->i_op = &fuse_dir_inode_operations;
2316 	inode->i_fop = &fuse_dir_operations;
2317 
2318 	spin_lock_init(&fi->rdc.lock);
2319 	fi->rdc.cached = false;
2320 	fi->rdc.size = 0;
2321 	fi->rdc.pos = 0;
2322 	fi->rdc.version = 0;
2323 }
2324 
fuse_symlink_readpage(struct file * null,struct page * page)2325 static int fuse_symlink_readpage(struct file *null, struct page *page)
2326 {
2327 	int err = fuse_readlink_page(page->mapping->host, page);
2328 
2329 	if (!err)
2330 		SetPageUptodate(page);
2331 
2332 	unlock_page(page);
2333 
2334 	return err;
2335 }
2336 
2337 static const struct address_space_operations fuse_symlink_aops = {
2338 	.readpage	= fuse_symlink_readpage,
2339 };
2340 
fuse_init_symlink(struct inode * inode)2341 void fuse_init_symlink(struct inode *inode)
2342 {
2343 	inode->i_op = &fuse_symlink_inode_operations;
2344 	inode->i_data.a_ops = &fuse_symlink_aops;
2345 	inode_nohighmem(inode);
2346 }
2347