1 /*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7 */
8
9 #include "fuse_i.h"
10
11 #include <linux/pagemap.h>
12 #include <linux/slab.h>
13 #include <linux/file.h>
14 #include <linux/seq_file.h>
15 #include <linux/init.h>
16 #include <linux/module.h>
17 #include <linux/moduleparam.h>
18 #include <linux/fs_context.h>
19 #include <linux/fs_parser.h>
20 #include <linux/statfs.h>
21 #include <linux/random.h>
22 #include <linux/sched.h>
23 #include <linux/exportfs.h>
24 #include <linux/posix_acl.h>
25 #include <linux/pid_namespace.h>
26 #include <uapi/linux/magic.h>
27
28 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
29 MODULE_DESCRIPTION("Filesystem in Userspace");
30 MODULE_LICENSE("GPL");
31
32 static struct kmem_cache *fuse_inode_cachep;
33 struct list_head fuse_conn_list;
34 DEFINE_MUTEX(fuse_mutex);
35
36 static int set_global_limit(const char *val, const struct kernel_param *kp);
37
38 unsigned max_user_bgreq;
39 module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
40 &max_user_bgreq, 0644);
41 __MODULE_PARM_TYPE(max_user_bgreq, "uint");
42 MODULE_PARM_DESC(max_user_bgreq,
43 "Global limit for the maximum number of backgrounded requests an "
44 "unprivileged user can set");
45
46 unsigned max_user_congthresh;
47 module_param_call(max_user_congthresh, set_global_limit, param_get_uint,
48 &max_user_congthresh, 0644);
49 __MODULE_PARM_TYPE(max_user_congthresh, "uint");
50 MODULE_PARM_DESC(max_user_congthresh,
51 "Global limit for the maximum congestion threshold an "
52 "unprivileged user can set");
53
54 #define FUSE_DEFAULT_BLKSIZE 512
55
56 /** Maximum number of outstanding background requests */
57 #define FUSE_DEFAULT_MAX_BACKGROUND 12
58
59 /** Congestion starts at 75% of maximum */
60 #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)
61
62 #ifdef CONFIG_BLOCK
63 static struct file_system_type fuseblk_fs_type;
64 #endif
65
fuse_alloc_forget(void)66 struct fuse_forget_link *fuse_alloc_forget(void)
67 {
68 return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT);
69 }
70
fuse_alloc_submount_lookup(void)71 static struct fuse_submount_lookup *fuse_alloc_submount_lookup(void)
72 {
73 struct fuse_submount_lookup *sl;
74
75 sl = kzalloc(sizeof(struct fuse_submount_lookup), GFP_KERNEL_ACCOUNT);
76 if (!sl)
77 return NULL;
78 sl->forget = fuse_alloc_forget();
79 if (!sl->forget)
80 goto out_free;
81
82 return sl;
83
84 out_free:
85 kfree(sl);
86 return NULL;
87 }
88
fuse_alloc_inode(struct super_block * sb)89 static struct inode *fuse_alloc_inode(struct super_block *sb)
90 {
91 struct fuse_inode *fi;
92
93 fi = alloc_inode_sb(sb, fuse_inode_cachep, GFP_KERNEL);
94 if (!fi)
95 return NULL;
96
97 fi->i_time = 0;
98 fi->inval_mask = ~0;
99 #ifdef CONFIG_FUSE_BPF
100 fi->backing_inode = NULL;
101 fi->bpf = NULL;
102 #endif
103
104 fi->nodeid = 0;
105 fi->nlookup = 0;
106 fi->attr_version = 0;
107 fi->orig_ino = 0;
108 fi->state = 0;
109 fi->submount_lookup = NULL;
110 mutex_init(&fi->mutex);
111 spin_lock_init(&fi->lock);
112 fi->forget = fuse_alloc_forget();
113 if (!fi->forget)
114 goto out_free;
115
116 if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi))
117 goto out_free_forget;
118
119 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
120 fuse_inode_backing_set(fi, NULL);
121
122 return &fi->inode;
123
124 out_free_forget:
125 kfree(fi->forget);
126 out_free:
127 kmem_cache_free(fuse_inode_cachep, fi);
128 return NULL;
129 }
130
fuse_free_inode(struct inode * inode)131 static void fuse_free_inode(struct inode *inode)
132 {
133 struct fuse_inode *fi = get_fuse_inode(inode);
134
135 mutex_destroy(&fi->mutex);
136 kfree(fi->forget);
137 #ifdef CONFIG_FUSE_DAX
138 kfree(fi->dax);
139 #endif
140 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
141 fuse_backing_put(fuse_inode_backing(fi));
142
143 #ifdef CONFIG_FUSE_BPF
144 if (fi->bpf)
145 bpf_prog_put(fi->bpf);
146 #endif
147 kmem_cache_free(fuse_inode_cachep, fi);
148 }
149
fuse_cleanup_submount_lookup(struct fuse_conn * fc,struct fuse_submount_lookup * sl)150 static void fuse_cleanup_submount_lookup(struct fuse_conn *fc,
151 struct fuse_submount_lookup *sl)
152 {
153 if (!refcount_dec_and_test(&sl->count))
154 return;
155
156 fuse_queue_forget(fc, sl->forget, sl->nodeid, 1);
157 sl->forget = NULL;
158 kfree(sl);
159 }
160
fuse_evict_inode(struct inode * inode)161 static void fuse_evict_inode(struct inode *inode)
162 {
163 struct fuse_inode *fi = get_fuse_inode(inode);
164
165 /* Will write inode on close/munmap and in all other dirtiers */
166 WARN_ON(inode->i_state & I_DIRTY_INODE);
167 truncate_inode_pages_final(&inode->i_data);
168 clear_inode(inode);
169 if (inode->i_sb->s_flags & SB_ACTIVE) {
170 struct fuse_conn *fc = get_fuse_conn(inode);
171
172 if (FUSE_IS_DAX(inode))
173 fuse_dax_inode_cleanup(inode);
174 if (fi->nlookup) {
175 fuse_queue_forget(fc, fi->forget, fi->nodeid,
176 fi->nlookup);
177 fi->forget = NULL;
178 }
179
180 if (fi->submount_lookup) {
181 fuse_cleanup_submount_lookup(fc, fi->submount_lookup);
182 fi->submount_lookup = NULL;
183 }
184 }
185 if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
186 WARN_ON(fi->iocachectr != 0);
187 WARN_ON(fi->iopassctr != 0);
188 WARN_ON(!list_empty(&fi->write_files));
189 WARN_ON(!list_empty(&fi->queued_writes));
190 }
191 }
192
193 #ifdef CONFIG_FUSE_BPF
fuse_destroy_inode(struct inode * inode)194 static void fuse_destroy_inode(struct inode *inode)
195 {
196 struct fuse_inode *fi = get_fuse_inode(inode);
197
198 iput(fi->backing_inode);
199 }
200 #endif
201
fuse_reconfigure(struct fs_context * fsc)202 static int fuse_reconfigure(struct fs_context *fsc)
203 {
204 struct super_block *sb = fsc->root->d_sb;
205
206 sync_filesystem(sb);
207 if (fsc->sb_flags & SB_MANDLOCK)
208 return -EINVAL;
209
210 return 0;
211 }
212
213 /*
214 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
215 * so that it will fit.
216 */
fuse_squash_ino(u64 ino64)217 static ino_t fuse_squash_ino(u64 ino64)
218 {
219 ino_t ino = (ino_t) ino64;
220 if (sizeof(ino_t) < sizeof(u64))
221 ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8;
222 return ino;
223 }
224
fuse_fill_attr_from_inode(struct fuse_attr * attr,const struct fuse_inode * fi)225 static void fuse_fill_attr_from_inode(struct fuse_attr *attr,
226 const struct fuse_inode *fi)
227 {
228 struct timespec64 atime = inode_get_atime(&fi->inode);
229 struct timespec64 mtime = inode_get_mtime(&fi->inode);
230 struct timespec64 ctime = inode_get_ctime(&fi->inode);
231
232 *attr = (struct fuse_attr){
233 .ino = fi->inode.i_ino,
234 .size = fi->inode.i_size,
235 .blocks = fi->inode.i_blocks,
236 .atime = atime.tv_sec,
237 .mtime = mtime.tv_sec,
238 .ctime = ctime.tv_sec,
239 .atimensec = atime.tv_nsec,
240 .mtimensec = mtime.tv_nsec,
241 .ctimensec = ctime.tv_nsec,
242 .mode = fi->inode.i_mode,
243 .nlink = fi->inode.i_nlink,
244 .uid = __kuid_val(fi->inode.i_uid),
245 .gid = __kgid_val(fi->inode.i_gid),
246 .rdev = fi->inode.i_rdev,
247 .blksize = 1u << fi->inode.i_blkbits,
248 };
249 }
250
fuse_change_attributes_common(struct inode * inode,struct fuse_attr * attr,struct fuse_statx * sx,u64 attr_valid,u32 cache_mask)251 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
252 struct fuse_statx *sx,
253 u64 attr_valid, u32 cache_mask)
254 {
255 struct fuse_conn *fc = get_fuse_conn(inode);
256 struct fuse_inode *fi = get_fuse_inode(inode);
257
258 lockdep_assert_held(&fi->lock);
259
260 fi->attr_version = atomic64_inc_return(&fc->attr_version);
261 fi->i_time = attr_valid;
262 /* Clear basic stats from invalid mask */
263 set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0);
264
265 inode->i_ino = fuse_squash_ino(attr->ino);
266 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
267 set_nlink(inode, attr->nlink);
268 inode->i_uid = make_kuid(fc->user_ns, attr->uid);
269 inode->i_gid = make_kgid(fc->user_ns, attr->gid);
270 inode->i_blocks = attr->blocks;
271
272 /* Sanitize nsecs */
273 attr->atimensec = min_t(u32, attr->atimensec, NSEC_PER_SEC - 1);
274 attr->mtimensec = min_t(u32, attr->mtimensec, NSEC_PER_SEC - 1);
275 attr->ctimensec = min_t(u32, attr->ctimensec, NSEC_PER_SEC - 1);
276
277 inode_set_atime(inode, attr->atime, attr->atimensec);
278 /* mtime from server may be stale due to local buffered write */
279 if (!(cache_mask & STATX_MTIME)) {
280 inode_set_mtime(inode, attr->mtime, attr->mtimensec);
281 }
282 if (!(cache_mask & STATX_CTIME)) {
283 inode_set_ctime(inode, attr->ctime, attr->ctimensec);
284 }
285 if (sx) {
286 /* Sanitize nsecs */
287 sx->btime.tv_nsec =
288 min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
289
290 /*
291 * Btime has been queried, cache is valid (whether or not btime
292 * is available or not) so clear STATX_BTIME from inval_mask.
293 *
294 * Availability of the btime attribute is indicated in
295 * FUSE_I_BTIME
296 */
297 set_mask_bits(&fi->inval_mask, STATX_BTIME, 0);
298 if (sx->mask & STATX_BTIME) {
299 set_bit(FUSE_I_BTIME, &fi->state);
300 fi->i_btime.tv_sec = sx->btime.tv_sec;
301 fi->i_btime.tv_nsec = sx->btime.tv_nsec;
302 }
303 }
304
305 if (attr->blksize != 0)
306 inode->i_blkbits = ilog2(attr->blksize);
307 else
308 inode->i_blkbits = inode->i_sb->s_blocksize_bits;
309
310 /*
311 * Don't set the sticky bit in i_mode, unless we want the VFS
312 * to check permissions. This prevents failures due to the
313 * check in may_delete().
314 */
315 fi->orig_i_mode = inode->i_mode;
316 if (!fc->default_permissions)
317 inode->i_mode &= ~S_ISVTX;
318
319 fi->orig_ino = attr->ino;
320
321 /*
322 * We are refreshing inode data and it is possible that another
323 * client set suid/sgid or security.capability xattr. So clear
324 * S_NOSEC. Ideally, we could have cleared it only if suid/sgid
325 * was set or if security.capability xattr was set. But we don't
326 * know if security.capability has been set or not. So clear it
327 * anyway. Its less efficient but should be safe.
328 */
329 inode->i_flags &= ~S_NOSEC;
330 }
331
fuse_get_cache_mask(struct inode * inode)332 u32 fuse_get_cache_mask(struct inode *inode)
333 {
334 struct fuse_conn *fc = get_fuse_conn(inode);
335
336 if (!fc->writeback_cache || !S_ISREG(inode->i_mode))
337 return 0;
338
339 return STATX_MTIME | STATX_CTIME | STATX_SIZE;
340 }
341
fuse_change_attributes(struct inode * inode,struct fuse_attr * attr,struct fuse_statx * sx,u64 attr_valid,u64 attr_version)342 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
343 struct fuse_statx *sx,
344 u64 attr_valid, u64 attr_version)
345 {
346 struct fuse_conn *fc = get_fuse_conn(inode);
347 struct fuse_inode *fi = get_fuse_inode(inode);
348 u32 cache_mask;
349 loff_t oldsize;
350 struct timespec64 old_mtime;
351
352 spin_lock(&fi->lock);
353 /*
354 * In case of writeback_cache enabled, writes update mtime, ctime and
355 * may update i_size. In these cases trust the cached value in the
356 * inode.
357 */
358 cache_mask = fuse_get_cache_mask(inode);
359 if (cache_mask & STATX_SIZE)
360 attr->size = i_size_read(inode);
361
362 if (cache_mask & STATX_MTIME) {
363 attr->mtime = inode_get_mtime_sec(inode);
364 attr->mtimensec = inode_get_mtime_nsec(inode);
365 }
366 if (cache_mask & STATX_CTIME) {
367 attr->ctime = inode_get_ctime_sec(inode);
368 attr->ctimensec = inode_get_ctime_nsec(inode);
369 }
370
371 if ((attr_version != 0 && fi->attr_version > attr_version) ||
372 test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
373 spin_unlock(&fi->lock);
374 return;
375 }
376
377 old_mtime = inode_get_mtime(inode);
378 fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask);
379
380 oldsize = inode->i_size;
381 /*
382 * In case of writeback_cache enabled, the cached writes beyond EOF
383 * extend local i_size without keeping userspace server in sync. So,
384 * attr->size coming from server can be stale. We cannot trust it.
385 */
386 if (!(cache_mask & STATX_SIZE))
387 i_size_write(inode, attr->size);
388 spin_unlock(&fi->lock);
389
390 if (!cache_mask && S_ISREG(inode->i_mode)) {
391 bool inval = false;
392
393 if (oldsize != attr->size) {
394 truncate_pagecache(inode, attr->size);
395 if (!fc->explicit_inval_data)
396 inval = true;
397 } else if (fc->auto_inval_data) {
398 struct timespec64 new_mtime = {
399 .tv_sec = attr->mtime,
400 .tv_nsec = attr->mtimensec,
401 };
402
403 /*
404 * Auto inval mode also checks and invalidates if mtime
405 * has changed.
406 */
407 if (!timespec64_equal(&old_mtime, &new_mtime))
408 inval = true;
409 }
410
411 if (inval)
412 invalidate_inode_pages2(inode->i_mapping);
413 }
414
415 if (IS_ENABLED(CONFIG_FUSE_DAX))
416 fuse_dax_dontcache(inode, attr->flags);
417 }
418
fuse_init_submount_lookup(struct fuse_submount_lookup * sl,u64 nodeid)419 static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl,
420 u64 nodeid)
421 {
422 sl->nodeid = nodeid;
423 refcount_set(&sl->count, 1);
424 }
425
fuse_init_inode(struct inode * inode,struct fuse_attr * attr,struct fuse_conn * fc)426 static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr,
427 struct fuse_conn *fc)
428 {
429 inode->i_mode = attr->mode & S_IFMT;
430 inode->i_size = attr->size;
431 inode_set_mtime(inode, attr->mtime, attr->mtimensec);
432 inode_set_ctime(inode, attr->ctime, attr->ctimensec);
433 if (S_ISREG(inode->i_mode)) {
434 fuse_init_common(inode);
435 fuse_init_file_inode(inode, attr->flags);
436 } else if (S_ISDIR(inode->i_mode))
437 fuse_init_dir(inode);
438 else if (S_ISLNK(inode->i_mode))
439 fuse_init_symlink(inode);
440 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
441 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
442 fuse_init_common(inode);
443 init_special_inode(inode, inode->i_mode, attr->rdev);
444 } else
445 BUG();
446 /*
447 * Ensure that we don't cache acls for daemons without FUSE_POSIX_ACL
448 * so they see the exact same behavior as before.
449 */
450 if (!fc->posix_acl)
451 inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE;
452 }
453
454 struct fuse_inode_identifier {
455 u64 nodeid;
456 struct inode *backing_inode;
457 };
458
fuse_inode_eq(struct inode * inode,void * _nodeidp)459 static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
460 {
461 struct fuse_inode_identifier *fii =
462 (struct fuse_inode_identifier *) _nodeidp;
463 struct fuse_inode *fi = get_fuse_inode(inode);
464
465 return fii->nodeid == fi->nodeid;
466 }
467
fuse_inode_backing_eq(struct inode * inode,void * _nodeidp)468 static int fuse_inode_backing_eq(struct inode *inode, void *_nodeidp)
469 {
470 struct fuse_inode_identifier *fii =
471 (struct fuse_inode_identifier *) _nodeidp;
472 struct fuse_inode *fi = get_fuse_inode(inode);
473
474 return fii->nodeid == fi->nodeid
475 #ifdef CONFIG_FUSE_BPF
476 && fii->backing_inode == fi->backing_inode
477 #endif
478 ;
479 }
480
fuse_inode_set(struct inode * inode,void * _nodeidp)481 static int fuse_inode_set(struct inode *inode, void *_nodeidp)
482 {
483 struct fuse_inode_identifier *fii =
484 (struct fuse_inode_identifier *) _nodeidp;
485 struct fuse_inode *fi = get_fuse_inode(inode);
486
487 fi->nodeid = fii->nodeid;
488
489 return 0;
490 }
491
fuse_inode_bpf_backing_set(struct inode * inode,void * _nodeidp)492 static int fuse_inode_bpf_backing_set(struct inode *inode, void *_nodeidp)
493 {
494 struct fuse_inode_identifier *fii =
495 (struct fuse_inode_identifier *) _nodeidp;
496 struct fuse_inode *fi = get_fuse_inode(inode);
497
498 fi->nodeid = fii->nodeid;
499 #ifdef CONFIG_FUSE_BPF
500 fi->backing_inode = fii->backing_inode;
501 if (fi->backing_inode)
502 ihold(fi->backing_inode);
503 #endif
504
505 return 0;
506 }
507
fuse_iget_backing(struct super_block * sb,u64 nodeid,struct inode * backing_inode)508 struct inode *fuse_iget_backing(struct super_block *sb, u64 nodeid,
509 struct inode *backing_inode)
510 {
511 struct inode *inode;
512 struct fuse_inode *fi;
513 struct fuse_conn *fc = get_fuse_conn_super(sb);
514 struct fuse_inode_identifier fii = {
515 .nodeid = nodeid,
516 .backing_inode = backing_inode,
517 };
518 struct fuse_attr attr;
519 unsigned long hash = (unsigned long) backing_inode;
520
521 if (nodeid)
522 hash = nodeid;
523
524 fuse_fill_attr_from_inode(&attr, get_fuse_inode(backing_inode));
525 inode = iget5_locked(sb, hash, fuse_inode_backing_eq,
526 fuse_inode_bpf_backing_set, &fii);
527 if (!inode)
528 return NULL;
529
530 if ((inode->i_state & I_NEW)) {
531 inode->i_flags |= S_NOATIME;
532 if (!fc->writeback_cache)
533 inode->i_flags |= S_NOCMTIME;
534 fuse_init_common(inode);
535 unlock_new_inode(inode);
536 }
537
538 fi = get_fuse_inode(inode);
539 fuse_init_inode(inode, &attr, fc);
540 spin_lock(&fi->lock);
541 fi->nlookup++;
542 spin_unlock(&fi->lock);
543
544 return inode;
545 }
546
fuse_iget(struct super_block * sb,u64 nodeid,int generation,struct fuse_attr * attr,u64 attr_valid,u64 attr_version)547 struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
548 int generation, struct fuse_attr *attr,
549 u64 attr_valid, u64 attr_version)
550 {
551 struct inode *inode;
552 struct fuse_inode *fi;
553 struct fuse_conn *fc = get_fuse_conn_super(sb);
554 struct fuse_inode_identifier fii = {
555 .nodeid = nodeid,
556 };
557
558 /*
559 * Auto mount points get their node id from the submount root, which is
560 * not a unique identifier within this filesystem.
561 *
562 * To avoid conflicts, do not place submount points into the inode hash
563 * table.
564 */
565 if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) &&
566 S_ISDIR(attr->mode)) {
567 struct fuse_inode *fi;
568
569 inode = new_inode(sb);
570 if (!inode)
571 return NULL;
572
573 fuse_init_inode(inode, attr, fc);
574 fi = get_fuse_inode(inode);
575 fi->nodeid = nodeid;
576 fi->submount_lookup = fuse_alloc_submount_lookup();
577 if (!fi->submount_lookup) {
578 iput(inode);
579 return NULL;
580 }
581 /* Sets nlookup = 1 on fi->submount_lookup->nlookup */
582 fuse_init_submount_lookup(fi->submount_lookup, nodeid);
583 inode->i_flags |= S_AUTOMOUNT;
584 goto done;
585 }
586
587 retry:
588 inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &fii);
589 if (!inode)
590 return NULL;
591
592 if ((inode->i_state & I_NEW)) {
593 inode->i_flags |= S_NOATIME;
594 if (!fc->writeback_cache || !S_ISREG(attr->mode))
595 inode->i_flags |= S_NOCMTIME;
596 inode->i_generation = generation;
597 fuse_init_inode(inode, attr, fc);
598 unlock_new_inode(inode);
599 } else if (fuse_stale_inode(inode, generation, attr)) {
600 /* nodeid was reused, any I/O on the old inode should fail */
601 fuse_make_bad(inode);
602 if (inode != d_inode(sb->s_root)) {
603 remove_inode_hash(inode);
604 iput(inode);
605 goto retry;
606 }
607 }
608 fi = get_fuse_inode(inode);
609 spin_lock(&fi->lock);
610 fi->nlookup++;
611 spin_unlock(&fi->lock);
612 done:
613 fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version);
614
615 return inode;
616 }
617
fuse_ilookup(struct fuse_conn * fc,u64 nodeid,struct fuse_mount ** fm)618 struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid,
619 struct fuse_mount **fm)
620 {
621 struct fuse_mount *fm_iter;
622 struct inode *inode;
623 struct fuse_inode_identifier fii = {
624 .nodeid = nodeid,
625 };
626
627 WARN_ON(!rwsem_is_locked(&fc->killsb));
628 list_for_each_entry(fm_iter, &fc->mounts, fc_entry) {
629 if (!fm_iter->sb)
630 continue;
631
632 inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &fii);
633 if (inode) {
634 if (fm)
635 *fm = fm_iter;
636 return inode;
637 }
638 }
639
640 return NULL;
641 }
642
fuse_reverse_inval_inode(struct fuse_conn * fc,u64 nodeid,loff_t offset,loff_t len)643 int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid,
644 loff_t offset, loff_t len)
645 {
646 struct fuse_inode *fi;
647 struct inode *inode;
648 pgoff_t pg_start;
649 pgoff_t pg_end;
650
651 inode = fuse_ilookup(fc, nodeid, NULL);
652 if (!inode)
653 return -ENOENT;
654
655 fi = get_fuse_inode(inode);
656 spin_lock(&fi->lock);
657 fi->attr_version = atomic64_inc_return(&fc->attr_version);
658 spin_unlock(&fi->lock);
659
660 fuse_invalidate_attr(inode);
661 forget_all_cached_acls(inode);
662 if (offset >= 0) {
663 pg_start = offset >> PAGE_SHIFT;
664 if (len <= 0)
665 pg_end = -1;
666 else
667 pg_end = (offset + len - 1) >> PAGE_SHIFT;
668 invalidate_inode_pages2_range(inode->i_mapping,
669 pg_start, pg_end);
670 }
671 iput(inode);
672 return 0;
673 }
674
fuse_lock_inode(struct inode * inode)675 bool fuse_lock_inode(struct inode *inode)
676 {
677 bool locked = false;
678
679 if (!get_fuse_conn(inode)->parallel_dirops) {
680 mutex_lock(&get_fuse_inode(inode)->mutex);
681 locked = true;
682 }
683
684 return locked;
685 }
686
fuse_unlock_inode(struct inode * inode,bool locked)687 void fuse_unlock_inode(struct inode *inode, bool locked)
688 {
689 if (locked)
690 mutex_unlock(&get_fuse_inode(inode)->mutex);
691 }
692
fuse_umount_begin(struct super_block * sb)693 static void fuse_umount_begin(struct super_block *sb)
694 {
695 struct fuse_conn *fc = get_fuse_conn_super(sb);
696
697 if (fc->no_force_umount)
698 return;
699
700 fuse_abort_conn(fc);
701
702 // Only retire block-device-based superblocks.
703 if (sb->s_bdev != NULL)
704 retire_super(sb);
705 }
706
fuse_send_destroy(struct fuse_mount * fm)707 static void fuse_send_destroy(struct fuse_mount *fm)
708 {
709 if (fm->fc->conn_init) {
710 FUSE_ARGS(args);
711
712 args.opcode = FUSE_DESTROY;
713 args.force = true;
714 args.nocreds = true;
715 fuse_simple_request(fm, &args);
716 }
717 }
718
fuse_statfs(struct dentry * dentry,struct kstatfs * buf)719 static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
720 {
721 struct super_block *sb = dentry->d_sb;
722 struct fuse_mount *fm = get_fuse_mount_super(sb);
723 FUSE_ARGS(args);
724 struct fuse_statfs_out outarg;
725 int err;
726 #ifdef CONFIG_FUSE_BPF
727 struct fuse_err_ret fer;
728 #endif
729
730 if (!fuse_allow_current_process(fm->fc)) {
731 buf->f_type = FUSE_SUPER_MAGIC;
732 return 0;
733 }
734
735 #ifdef CONFIG_FUSE_BPF
736 fer = fuse_bpf_backing(dentry->d_inode, struct fuse_statfs_out,
737 fuse_statfs_initialize, fuse_statfs_backing,
738 fuse_statfs_finalize,
739 dentry, buf);
740 if (fer.ret)
741 return PTR_ERR(fer.result);
742 #endif
743
744 memset(&outarg, 0, sizeof(outarg));
745 args.in_numargs = 0;
746 args.opcode = FUSE_STATFS;
747 args.nodeid = get_node_id(d_inode(dentry));
748 args.out_numargs = 1;
749 args.out_args[0].size = sizeof(outarg);
750 args.out_args[0].value = &outarg;
751 err = fuse_simple_request(fm, &args);
752 if (!err)
753 convert_fuse_statfs(buf, &outarg.st);
754 return err;
755 }
756
fuse_sync_bucket_alloc(void)757 static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void)
758 {
759 struct fuse_sync_bucket *bucket;
760
761 bucket = kzalloc(sizeof(*bucket), GFP_KERNEL | __GFP_NOFAIL);
762 if (bucket) {
763 init_waitqueue_head(&bucket->waitq);
764 /* Initial active count */
765 atomic_set(&bucket->count, 1);
766 }
767 return bucket;
768 }
769
fuse_sync_fs_writes(struct fuse_conn * fc)770 static void fuse_sync_fs_writes(struct fuse_conn *fc)
771 {
772 struct fuse_sync_bucket *bucket, *new_bucket;
773 int count;
774
775 new_bucket = fuse_sync_bucket_alloc();
776 spin_lock(&fc->lock);
777 bucket = rcu_dereference_protected(fc->curr_bucket, 1);
778 count = atomic_read(&bucket->count);
779 WARN_ON(count < 1);
780 /* No outstanding writes? */
781 if (count == 1) {
782 spin_unlock(&fc->lock);
783 kfree(new_bucket);
784 return;
785 }
786
787 /*
788 * Completion of new bucket depends on completion of this bucket, so add
789 * one more count.
790 */
791 atomic_inc(&new_bucket->count);
792 rcu_assign_pointer(fc->curr_bucket, new_bucket);
793 spin_unlock(&fc->lock);
794 /*
795 * Drop initial active count. At this point if all writes in this and
796 * ancestor buckets complete, the count will go to zero and this task
797 * will be woken up.
798 */
799 atomic_dec(&bucket->count);
800
801 wait_event(bucket->waitq, atomic_read(&bucket->count) == 0);
802
803 /* Drop temp count on descendant bucket */
804 fuse_sync_bucket_dec(new_bucket);
805 kfree_rcu(bucket, rcu);
806 }
807
fuse_sync_fs(struct super_block * sb,int wait)808 static int fuse_sync_fs(struct super_block *sb, int wait)
809 {
810 struct fuse_mount *fm = get_fuse_mount_super(sb);
811 struct fuse_conn *fc = fm->fc;
812 struct fuse_syncfs_in inarg;
813 FUSE_ARGS(args);
814 int err;
815
816 /*
817 * Userspace cannot handle the wait == 0 case. Avoid a
818 * gratuitous roundtrip.
819 */
820 if (!wait)
821 return 0;
822
823 /* The filesystem is being unmounted. Nothing to do. */
824 if (!sb->s_root)
825 return 0;
826
827 if (!fc->sync_fs)
828 return 0;
829
830 fuse_sync_fs_writes(fc);
831
832 memset(&inarg, 0, sizeof(inarg));
833 args.in_numargs = 1;
834 args.in_args[0].size = sizeof(inarg);
835 args.in_args[0].value = &inarg;
836 args.opcode = FUSE_SYNCFS;
837 args.nodeid = get_node_id(sb->s_root->d_inode);
838 args.out_numargs = 0;
839
840 err = fuse_simple_request(fm, &args);
841 if (err == -ENOSYS) {
842 fc->sync_fs = 0;
843 err = 0;
844 }
845
846 return err;
847 }
848
849 enum {
850 OPT_SOURCE,
851 OPT_SUBTYPE,
852 OPT_FD,
853 OPT_ROOTMODE,
854 OPT_USER_ID,
855 OPT_GROUP_ID,
856 OPT_DEFAULT_PERMISSIONS,
857 OPT_ALLOW_OTHER,
858 OPT_MAX_READ,
859 OPT_BLKSIZE,
860 OPT_ROOT_BPF,
861 OPT_ROOT_DIR,
862 OPT_NO_DAEMON,
863 OPT_ERR
864 };
865
866 static const struct fs_parameter_spec fuse_fs_parameters[] = {
867 fsparam_string ("source", OPT_SOURCE),
868 fsparam_u32 ("fd", OPT_FD),
869 fsparam_u32oct ("rootmode", OPT_ROOTMODE),
870 fsparam_uid ("user_id", OPT_USER_ID),
871 fsparam_gid ("group_id", OPT_GROUP_ID),
872 fsparam_flag ("default_permissions", OPT_DEFAULT_PERMISSIONS),
873 fsparam_flag ("allow_other", OPT_ALLOW_OTHER),
874 fsparam_u32 ("max_read", OPT_MAX_READ),
875 fsparam_u32 ("blksize", OPT_BLKSIZE),
876 fsparam_string ("subtype", OPT_SUBTYPE),
877 fsparam_u32 ("root_bpf", OPT_ROOT_BPF),
878 fsparam_u32 ("root_dir", OPT_ROOT_DIR),
879 fsparam_flag ("no_daemon", OPT_NO_DAEMON),
880 {}
881 };
882
fuse_parse_param(struct fs_context * fsc,struct fs_parameter * param)883 static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param)
884 {
885 struct fs_parse_result result;
886 struct fuse_fs_context *ctx = fsc->fs_private;
887 int opt;
888 kuid_t kuid;
889 kgid_t kgid;
890
891 if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
892 /*
893 * Ignore options coming from mount(MS_REMOUNT) for backward
894 * compatibility.
895 */
896 if (fsc->oldapi)
897 return 0;
898
899 return invalfc(fsc, "No changes allowed in reconfigure");
900 }
901
902 opt = fs_parse(fsc, fuse_fs_parameters, param, &result);
903 if (opt < 0)
904 return opt;
905
906 switch (opt) {
907 case OPT_SOURCE:
908 if (fsc->source)
909 return invalfc(fsc, "Multiple sources specified");
910 fsc->source = param->string;
911 param->string = NULL;
912 break;
913
914 case OPT_SUBTYPE:
915 if (ctx->subtype)
916 return invalfc(fsc, "Multiple subtypes specified");
917 ctx->subtype = param->string;
918 param->string = NULL;
919 return 0;
920
921 case OPT_FD:
922 ctx->fd = result.uint_32;
923 ctx->fd_present = true;
924 break;
925
926 case OPT_ROOTMODE:
927 if (!fuse_valid_type(result.uint_32))
928 return invalfc(fsc, "Invalid rootmode");
929 ctx->rootmode = result.uint_32;
930 ctx->rootmode_present = true;
931 break;
932
933 case OPT_USER_ID:
934 kuid = result.uid;
935 /*
936 * The requested uid must be representable in the
937 * filesystem's idmapping.
938 */
939 if (!kuid_has_mapping(fsc->user_ns, kuid))
940 return invalfc(fsc, "Invalid user_id");
941 ctx->user_id = kuid;
942 ctx->user_id_present = true;
943 break;
944
945 case OPT_GROUP_ID:
946 kgid = result.gid;
947 /*
948 * The requested gid must be representable in the
949 * filesystem's idmapping.
950 */
951 if (!kgid_has_mapping(fsc->user_ns, kgid))
952 return invalfc(fsc, "Invalid group_id");
953 ctx->group_id = kgid;
954 ctx->group_id_present = true;
955 break;
956
957 case OPT_DEFAULT_PERMISSIONS:
958 ctx->default_permissions = true;
959 break;
960
961 case OPT_ALLOW_OTHER:
962 ctx->allow_other = true;
963 break;
964
965 case OPT_MAX_READ:
966 ctx->max_read = result.uint_32;
967 break;
968
969 case OPT_BLKSIZE:
970 if (!ctx->is_bdev)
971 return invalfc(fsc, "blksize only supported for fuseblk");
972 ctx->blksize = result.uint_32;
973 break;
974
975 case OPT_ROOT_BPF:
976 ctx->root_bpf = bpf_prog_get_type_dev(result.uint_32,
977 BPF_PROG_TYPE_FUSE, false);
978 if (IS_ERR(ctx->root_bpf)) {
979 ctx->root_bpf = NULL;
980 return invalfc(fsc, "Unable to open bpf program");
981 }
982 break;
983
984 case OPT_ROOT_DIR:
985 ctx->root_dir = fget(result.uint_32);
986 if (!ctx->root_dir)
987 return invalfc(fsc, "Unable to open root directory");
988 break;
989
990 case OPT_NO_DAEMON:
991 ctx->no_daemon = true;
992 ctx->fd_present = true;
993 break;
994
995 default:
996 return -EINVAL;
997 }
998
999 return 0;
1000 }
1001
fuse_free_fsc(struct fs_context * fsc)1002 static void fuse_free_fsc(struct fs_context *fsc)
1003 {
1004 struct fuse_fs_context *ctx = fsc->fs_private;
1005
1006 if (ctx) {
1007 if (ctx->root_dir)
1008 fput(ctx->root_dir);
1009 if (ctx->root_bpf)
1010 bpf_prog_put(ctx->root_bpf);
1011 kfree(ctx->subtype);
1012 kfree(ctx);
1013 }
1014 }
1015
fuse_show_options(struct seq_file * m,struct dentry * root)1016 static int fuse_show_options(struct seq_file *m, struct dentry *root)
1017 {
1018 struct super_block *sb = root->d_sb;
1019 struct fuse_conn *fc = get_fuse_conn_super(sb);
1020
1021 if (fc->legacy_opts_show) {
1022 seq_printf(m, ",user_id=%u",
1023 from_kuid_munged(fc->user_ns, fc->user_id));
1024 seq_printf(m, ",group_id=%u",
1025 from_kgid_munged(fc->user_ns, fc->group_id));
1026 if (fc->default_permissions)
1027 seq_puts(m, ",default_permissions");
1028 if (fc->allow_other)
1029 seq_puts(m, ",allow_other");
1030 if (fc->max_read != ~0)
1031 seq_printf(m, ",max_read=%u", fc->max_read);
1032 if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
1033 seq_printf(m, ",blksize=%lu", sb->s_blocksize);
1034 }
1035 #ifdef CONFIG_FUSE_DAX
1036 if (fc->dax_mode == FUSE_DAX_ALWAYS)
1037 seq_puts(m, ",dax=always");
1038 else if (fc->dax_mode == FUSE_DAX_NEVER)
1039 seq_puts(m, ",dax=never");
1040 else if (fc->dax_mode == FUSE_DAX_INODE_USER)
1041 seq_puts(m, ",dax=inode");
1042 #endif
1043
1044 return 0;
1045 }
1046
fuse_iqueue_init(struct fuse_iqueue * fiq,const struct fuse_iqueue_ops * ops,void * priv)1047 static void fuse_iqueue_init(struct fuse_iqueue *fiq,
1048 const struct fuse_iqueue_ops *ops,
1049 void *priv)
1050 {
1051 memset(fiq, 0, sizeof(struct fuse_iqueue));
1052 spin_lock_init(&fiq->lock);
1053 init_waitqueue_head(&fiq->waitq);
1054 INIT_LIST_HEAD(&fiq->pending);
1055 INIT_LIST_HEAD(&fiq->interrupts);
1056 fiq->forget_list_tail = &fiq->forget_list_head;
1057 fiq->connected = 1;
1058 fiq->ops = ops;
1059 fiq->priv = priv;
1060 }
1061
fuse_pqueue_init(struct fuse_pqueue * fpq)1062 static void fuse_pqueue_init(struct fuse_pqueue *fpq)
1063 {
1064 unsigned int i;
1065
1066 spin_lock_init(&fpq->lock);
1067 for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
1068 INIT_LIST_HEAD(&fpq->processing[i]);
1069 INIT_LIST_HEAD(&fpq->io);
1070 fpq->connected = 1;
1071 }
1072
fuse_conn_init(struct fuse_conn * fc,struct fuse_mount * fm,struct user_namespace * user_ns,const struct fuse_iqueue_ops * fiq_ops,void * fiq_priv)1073 void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
1074 struct user_namespace *user_ns,
1075 const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv)
1076 {
1077 memset(fc, 0, sizeof(*fc));
1078 spin_lock_init(&fc->lock);
1079 spin_lock_init(&fc->bg_lock);
1080 init_rwsem(&fc->killsb);
1081 refcount_set(&fc->count, 1);
1082 atomic_set(&fc->dev_count, 1);
1083 init_waitqueue_head(&fc->blocked_waitq);
1084 fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv);
1085 INIT_LIST_HEAD(&fc->bg_queue);
1086 INIT_LIST_HEAD(&fc->entry);
1087 INIT_LIST_HEAD(&fc->devices);
1088 atomic_set(&fc->num_waiting, 0);
1089 fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
1090 fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
1091 atomic64_set(&fc->khctr, 0);
1092 fc->polled_files = RB_ROOT;
1093 fc->blocked = 0;
1094 fc->initialized = 0;
1095 fc->connected = 1;
1096 atomic64_set(&fc->attr_version, 1);
1097 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
1098 fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
1099 fc->user_ns = get_user_ns(user_ns);
1100 fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
1101 fc->max_pages_limit = FUSE_MAX_MAX_PAGES;
1102
1103 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
1104 fuse_backing_files_init(fc);
1105
1106 INIT_LIST_HEAD(&fc->mounts);
1107 list_add(&fm->fc_entry, &fc->mounts);
1108 fm->fc = fc;
1109 }
1110 EXPORT_SYMBOL_GPL(fuse_conn_init);
1111
delayed_release(struct rcu_head * p)1112 static void delayed_release(struct rcu_head *p)
1113 {
1114 struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu);
1115
1116 put_user_ns(fc->user_ns);
1117 fc->release(fc);
1118 }
1119
fuse_conn_put(struct fuse_conn * fc)1120 void fuse_conn_put(struct fuse_conn *fc)
1121 {
1122 if (refcount_dec_and_test(&fc->count)) {
1123 struct fuse_iqueue *fiq = &fc->iq;
1124 struct fuse_sync_bucket *bucket;
1125
1126 if (IS_ENABLED(CONFIG_FUSE_DAX))
1127 fuse_dax_conn_free(fc);
1128 if (fiq->ops->release)
1129 fiq->ops->release(fiq);
1130 put_pid_ns(fc->pid_ns);
1131 bucket = rcu_dereference_protected(fc->curr_bucket, 1);
1132 if (bucket) {
1133 WARN_ON(atomic_read(&bucket->count) != 1);
1134 kfree(bucket);
1135 }
1136 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
1137 fuse_backing_files_free(fc);
1138 call_rcu(&fc->rcu, delayed_release);
1139 }
1140 }
1141 EXPORT_SYMBOL_GPL(fuse_conn_put);
1142
fuse_conn_get(struct fuse_conn * fc)1143 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
1144 {
1145 refcount_inc(&fc->count);
1146 return fc;
1147 }
1148 EXPORT_SYMBOL_GPL(fuse_conn_get);
1149
fuse_get_root_inode(struct super_block * sb,unsigned int mode,struct bpf_prog * root_bpf,struct file * backing_fd)1150 static struct inode *fuse_get_root_inode(struct super_block *sb,
1151 unsigned int mode,
1152 struct bpf_prog *root_bpf,
1153 struct file *backing_fd)
1154 {
1155 struct fuse_attr attr;
1156 struct inode *inode;
1157
1158 memset(&attr, 0, sizeof(attr));
1159 attr.mode = mode;
1160 attr.ino = FUSE_ROOT_ID;
1161 attr.nlink = 1;
1162 inode = fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0);
1163 if (!inode)
1164 return NULL;
1165
1166 #ifdef CONFIG_FUSE_BPF
1167 get_fuse_inode(inode)->bpf = root_bpf;
1168 if (root_bpf)
1169 bpf_prog_inc(root_bpf);
1170
1171 if (backing_fd) {
1172 get_fuse_inode(inode)->backing_inode = backing_fd->f_inode;
1173 ihold(backing_fd->f_inode);
1174 }
1175 #endif
1176
1177 return inode;
1178 }
1179
1180 struct fuse_inode_handle {
1181 u64 nodeid;
1182 u32 generation;
1183 };
1184
fuse_get_dentry(struct super_block * sb,struct fuse_inode_handle * handle)1185 static struct dentry *fuse_get_dentry(struct super_block *sb,
1186 struct fuse_inode_handle *handle)
1187 {
1188 struct fuse_conn *fc = get_fuse_conn_super(sb);
1189 struct inode *inode;
1190 struct dentry *entry;
1191 int err = -ESTALE;
1192 struct fuse_inode_identifier fii = {
1193 .nodeid = handle->nodeid,
1194 };
1195
1196 if (handle->nodeid == 0)
1197 goto out_err;
1198
1199 inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &fii);
1200 if (!inode) {
1201 struct fuse_entry_out outarg;
1202 const struct qstr name = QSTR_INIT(".", 1);
1203
1204 if (!fc->export_support)
1205 goto out_err;
1206
1207 err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
1208 NULL, &inode);
1209 if (err && err != -ENOENT)
1210 goto out_err;
1211 if (err || !inode) {
1212 err = -ESTALE;
1213 goto out_err;
1214 }
1215 err = -EIO;
1216 if (get_node_id(inode) != handle->nodeid)
1217 goto out_iput;
1218 }
1219 err = -ESTALE;
1220 if (inode->i_generation != handle->generation)
1221 goto out_iput;
1222
1223 entry = d_obtain_alias(inode);
1224 if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID)
1225 fuse_invalidate_entry_cache(entry);
1226
1227 return entry;
1228
1229 out_iput:
1230 iput(inode);
1231 out_err:
1232 return ERR_PTR(err);
1233 }
1234
fuse_encode_fh(struct inode * inode,u32 * fh,int * max_len,struct inode * parent)1235 static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len,
1236 struct inode *parent)
1237 {
1238 int len = parent ? 6 : 3;
1239 u64 nodeid;
1240 u32 generation;
1241
1242 if (*max_len < len) {
1243 *max_len = len;
1244 return FILEID_INVALID;
1245 }
1246
1247 nodeid = get_fuse_inode(inode)->nodeid;
1248 generation = inode->i_generation;
1249
1250 fh[0] = (u32)(nodeid >> 32);
1251 fh[1] = (u32)(nodeid & 0xffffffff);
1252 fh[2] = generation;
1253
1254 if (parent) {
1255 nodeid = get_fuse_inode(parent)->nodeid;
1256 generation = parent->i_generation;
1257
1258 fh[3] = (u32)(nodeid >> 32);
1259 fh[4] = (u32)(nodeid & 0xffffffff);
1260 fh[5] = generation;
1261 }
1262
1263 *max_len = len;
1264 return parent ? FILEID_INO64_GEN_PARENT : FILEID_INO64_GEN;
1265 }
1266
fuse_fh_to_dentry(struct super_block * sb,struct fid * fid,int fh_len,int fh_type)1267 static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
1268 struct fid *fid, int fh_len, int fh_type)
1269 {
1270 struct fuse_inode_handle handle;
1271
1272 if ((fh_type != FILEID_INO64_GEN &&
1273 fh_type != FILEID_INO64_GEN_PARENT) || fh_len < 3)
1274 return NULL;
1275
1276 handle.nodeid = (u64) fid->raw[0] << 32;
1277 handle.nodeid |= (u64) fid->raw[1];
1278 handle.generation = fid->raw[2];
1279 return fuse_get_dentry(sb, &handle);
1280 }
1281
fuse_fh_to_parent(struct super_block * sb,struct fid * fid,int fh_len,int fh_type)1282 static struct dentry *fuse_fh_to_parent(struct super_block *sb,
1283 struct fid *fid, int fh_len, int fh_type)
1284 {
1285 struct fuse_inode_handle parent;
1286
1287 if (fh_type != FILEID_INO64_GEN_PARENT || fh_len < 6)
1288 return NULL;
1289
1290 parent.nodeid = (u64) fid->raw[3] << 32;
1291 parent.nodeid |= (u64) fid->raw[4];
1292 parent.generation = fid->raw[5];
1293 return fuse_get_dentry(sb, &parent);
1294 }
1295
fuse_get_parent(struct dentry * child)1296 static struct dentry *fuse_get_parent(struct dentry *child)
1297 {
1298 struct inode *child_inode = d_inode(child);
1299 struct fuse_conn *fc = get_fuse_conn(child_inode);
1300 struct inode *inode;
1301 struct dentry *parent;
1302 struct fuse_entry_out outarg;
1303 const struct qstr name = QSTR_INIT("..", 2);
1304 int err;
1305
1306 if (!fc->export_support)
1307 return ERR_PTR(-ESTALE);
1308
1309 err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
1310 &name, &outarg, NULL, &inode);
1311 if (err) {
1312 if (err == -ENOENT)
1313 return ERR_PTR(-ESTALE);
1314 return ERR_PTR(err);
1315 }
1316
1317 parent = d_obtain_alias(inode);
1318 if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID)
1319 fuse_invalidate_entry_cache(parent);
1320
1321 return parent;
1322 }
1323
1324 /* only for fid encoding; no support for file handle */
1325 static const struct export_operations fuse_export_fid_operations = {
1326 .encode_fh = fuse_encode_fh,
1327 };
1328
1329 static const struct export_operations fuse_export_operations = {
1330 .fh_to_dentry = fuse_fh_to_dentry,
1331 .fh_to_parent = fuse_fh_to_parent,
1332 .encode_fh = fuse_encode_fh,
1333 .get_parent = fuse_get_parent,
1334 };
1335
1336 static const struct super_operations fuse_super_operations = {
1337 .alloc_inode = fuse_alloc_inode,
1338 #ifdef CONFIG_FUSE_BPF
1339 .destroy_inode = fuse_destroy_inode,
1340 #endif
1341 .free_inode = fuse_free_inode,
1342 .evict_inode = fuse_evict_inode,
1343 .write_inode = fuse_write_inode,
1344 .drop_inode = generic_delete_inode,
1345 .umount_begin = fuse_umount_begin,
1346 .statfs = fuse_statfs,
1347 .sync_fs = fuse_sync_fs,
1348 .show_options = fuse_show_options,
1349 };
1350
sanitize_global_limit(unsigned * limit)1351 static void sanitize_global_limit(unsigned *limit)
1352 {
1353 /*
1354 * The default maximum number of async requests is calculated to consume
1355 * 1/2^13 of the total memory, assuming 392 bytes per request.
1356 */
1357 if (*limit == 0)
1358 *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392;
1359
1360 if (*limit >= 1 << 16)
1361 *limit = (1 << 16) - 1;
1362 }
1363
set_global_limit(const char * val,const struct kernel_param * kp)1364 static int set_global_limit(const char *val, const struct kernel_param *kp)
1365 {
1366 int rv;
1367
1368 rv = param_set_uint(val, kp);
1369 if (rv)
1370 return rv;
1371
1372 sanitize_global_limit((unsigned *)kp->arg);
1373
1374 return 0;
1375 }
1376
process_init_limits(struct fuse_conn * fc,struct fuse_init_out * arg)1377 static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
1378 {
1379 int cap_sys_admin = capable(CAP_SYS_ADMIN);
1380
1381 if (arg->minor < 13)
1382 return;
1383
1384 sanitize_global_limit(&max_user_bgreq);
1385 sanitize_global_limit(&max_user_congthresh);
1386
1387 spin_lock(&fc->bg_lock);
1388 if (arg->max_background) {
1389 fc->max_background = arg->max_background;
1390
1391 if (!cap_sys_admin && fc->max_background > max_user_bgreq)
1392 fc->max_background = max_user_bgreq;
1393 }
1394 if (arg->congestion_threshold) {
1395 fc->congestion_threshold = arg->congestion_threshold;
1396
1397 if (!cap_sys_admin &&
1398 fc->congestion_threshold > max_user_congthresh)
1399 fc->congestion_threshold = max_user_congthresh;
1400 }
1401 spin_unlock(&fc->bg_lock);
1402 }
1403
1404 struct fuse_init_args {
1405 struct fuse_args args;
1406 struct fuse_init_in in;
1407 struct fuse_init_out out;
1408 };
1409
process_init_reply(struct fuse_mount * fm,struct fuse_args * args,int error)1410 static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
1411 int error)
1412 {
1413 struct fuse_conn *fc = fm->fc;
1414 struct fuse_init_args *ia = container_of(args, typeof(*ia), args);
1415 struct fuse_init_out *arg = &ia->out;
1416 bool ok = true;
1417
1418 if (error || arg->major != FUSE_KERNEL_VERSION)
1419 ok = false;
1420 else {
1421 unsigned long ra_pages;
1422
1423 process_init_limits(fc, arg);
1424
1425 if (arg->minor >= 6) {
1426 u64 flags = arg->flags;
1427
1428 if (flags & FUSE_INIT_EXT)
1429 flags |= (u64) arg->flags2 << 32;
1430
1431 ra_pages = arg->max_readahead / PAGE_SIZE;
1432 if (flags & FUSE_ASYNC_READ)
1433 fc->async_read = 1;
1434 if (!(flags & FUSE_POSIX_LOCKS))
1435 fc->no_lock = 1;
1436 if (arg->minor >= 17) {
1437 if (!(flags & FUSE_FLOCK_LOCKS))
1438 fc->no_flock = 1;
1439 } else {
1440 if (!(flags & FUSE_POSIX_LOCKS))
1441 fc->no_flock = 1;
1442 }
1443 if (flags & FUSE_ATOMIC_O_TRUNC)
1444 fc->atomic_o_trunc = 1;
1445 if (arg->minor >= 9) {
1446 /* LOOKUP has dependency on proto version */
1447 if (flags & FUSE_EXPORT_SUPPORT)
1448 fc->export_support = 1;
1449 }
1450 if (flags & FUSE_BIG_WRITES)
1451 fc->big_writes = 1;
1452 if (flags & FUSE_DONT_MASK)
1453 fc->dont_mask = 1;
1454 if (flags & FUSE_AUTO_INVAL_DATA)
1455 fc->auto_inval_data = 1;
1456 else if (flags & FUSE_EXPLICIT_INVAL_DATA)
1457 fc->explicit_inval_data = 1;
1458 if (flags & FUSE_DO_READDIRPLUS) {
1459 fc->do_readdirplus = 1;
1460 if (flags & FUSE_READDIRPLUS_AUTO)
1461 fc->readdirplus_auto = 1;
1462 }
1463 if (flags & FUSE_ASYNC_DIO)
1464 fc->async_dio = 1;
1465 if (flags & FUSE_WRITEBACK_CACHE)
1466 fc->writeback_cache = 1;
1467 if (flags & FUSE_PARALLEL_DIROPS)
1468 fc->parallel_dirops = 1;
1469 if (flags & FUSE_HANDLE_KILLPRIV)
1470 fc->handle_killpriv = 1;
1471 if (arg->time_gran && arg->time_gran <= 1000000000)
1472 fm->sb->s_time_gran = arg->time_gran;
1473 if ((flags & FUSE_POSIX_ACL)) {
1474 fc->default_permissions = 1;
1475 fc->posix_acl = 1;
1476 }
1477 if (flags & FUSE_CACHE_SYMLINKS)
1478 fc->cache_symlinks = 1;
1479 if (flags & FUSE_ABORT_ERROR)
1480 fc->abort_err = 1;
1481 if (flags & FUSE_MAX_PAGES) {
1482 fc->max_pages =
1483 min_t(unsigned int, fc->max_pages_limit,
1484 max_t(unsigned int, arg->max_pages, 1));
1485 }
1486 if (IS_ENABLED(CONFIG_FUSE_DAX)) {
1487 if (flags & FUSE_MAP_ALIGNMENT &&
1488 !fuse_dax_check_alignment(fc, arg->map_alignment)) {
1489 ok = false;
1490 }
1491 if (flags & FUSE_HAS_INODE_DAX)
1492 fc->inode_dax = 1;
1493 }
1494 if (flags & FUSE_HANDLE_KILLPRIV_V2) {
1495 fc->handle_killpriv_v2 = 1;
1496 fm->sb->s_flags |= SB_NOSEC;
1497 }
1498 if (flags & FUSE_SETXATTR_EXT)
1499 fc->setxattr_ext = 1;
1500 if (flags & FUSE_SECURITY_CTX)
1501 fc->init_security = 1;
1502 if (flags & FUSE_CREATE_SUPP_GROUP)
1503 fc->create_supp_group = 1;
1504 if (flags & FUSE_DIRECT_IO_ALLOW_MMAP)
1505 fc->direct_io_allow_mmap = 1;
1506 /*
1507 * max_stack_depth is the max stack depth of FUSE fs,
1508 * so it has to be at least 1 to support passthrough
1509 * to backing files.
1510 *
1511 * with max_stack_depth > 1, the backing files can be
1512 * on a stacked fs (e.g. overlayfs) themselves and with
1513 * max_stack_depth == 1, FUSE fs can be stacked as the
1514 * underlying fs of a stacked fs (e.g. overlayfs).
1515 */
1516 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH) &&
1517 (flags & FUSE_PASSTHROUGH) &&
1518 arg->max_stack_depth > 0 &&
1519 arg->max_stack_depth <= FILESYSTEM_MAX_STACK_DEPTH) {
1520 fc->passthrough = 1;
1521 fc->max_stack_depth = arg->max_stack_depth;
1522 fm->sb->s_stack_depth = arg->max_stack_depth;
1523 }
1524 if (flags & FUSE_NO_EXPORT_SUPPORT)
1525 fm->sb->s_export_op = &fuse_export_fid_operations;
1526 if (flags & FUSE_ALLOW_IDMAP) {
1527 if (fc->default_permissions)
1528 fm->sb->s_iflags &= ~SB_I_NOIDMAP;
1529 else
1530 ok = false;
1531 }
1532 } else {
1533 ra_pages = fc->max_read / PAGE_SIZE;
1534 fc->no_lock = 1;
1535 fc->no_flock = 1;
1536 }
1537
1538 fm->sb->s_bdi->ra_pages =
1539 min(fm->sb->s_bdi->ra_pages, ra_pages);
1540 fc->minor = arg->minor;
1541 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
1542 fc->max_write = max_t(unsigned, 4096, fc->max_write);
1543 fc->conn_init = 1;
1544 }
1545 kfree(ia);
1546
1547 if (!ok) {
1548 fc->conn_init = 0;
1549 fc->conn_error = 1;
1550 }
1551
1552 fuse_set_initialized(fc);
1553 wake_up_all(&fc->blocked_waitq);
1554 }
1555
fuse_send_init(struct fuse_mount * fm)1556 void fuse_send_init(struct fuse_mount *fm)
1557 {
1558 struct fuse_init_args *ia;
1559 u64 flags;
1560
1561 ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL);
1562
1563 ia->in.major = FUSE_KERNEL_VERSION;
1564 ia->in.minor = FUSE_KERNEL_MINOR_VERSION;
1565 ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE;
1566 flags =
1567 FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
1568 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
1569 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
1570 FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
1571 FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
1572 FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
1573 FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
1574 FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
1575 FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
1576 FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT |
1577 FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP |
1578 FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP |
1579 FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND | FUSE_ALLOW_IDMAP;
1580 #ifdef CONFIG_FUSE_DAX
1581 if (fm->fc->dax)
1582 flags |= FUSE_MAP_ALIGNMENT;
1583 if (fuse_is_inode_dax_mode(fm->fc->dax_mode))
1584 flags |= FUSE_HAS_INODE_DAX;
1585 #endif
1586 if (fm->fc->auto_submounts)
1587 flags |= FUSE_SUBMOUNTS;
1588 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
1589 flags |= FUSE_PASSTHROUGH;
1590
1591 ia->in.flags = flags;
1592 ia->in.flags2 = flags >> 32;
1593
1594 ia->args.opcode = FUSE_INIT;
1595 ia->args.in_numargs = 1;
1596 ia->args.in_args[0].size = sizeof(ia->in);
1597 ia->args.in_args[0].value = &ia->in;
1598 ia->args.out_numargs = 1;
1599 /* Variable length argument used for backward compatibility
1600 with interface version < 7.5. Rest of init_out is zeroed
1601 by do_get_request(), so a short reply is not a problem */
1602 ia->args.out_argvar = true;
1603 ia->args.out_args[0].size = sizeof(ia->out);
1604 ia->args.out_args[0].value = &ia->out;
1605 ia->args.force = true;
1606 ia->args.nocreds = true;
1607 ia->args.end = process_init_reply;
1608
1609 if (unlikely(fm->fc->no_daemon) || fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0)
1610 process_init_reply(fm, &ia->args, -ENOTCONN);
1611 }
1612 EXPORT_SYMBOL_GPL(fuse_send_init);
1613
fuse_free_conn(struct fuse_conn * fc)1614 void fuse_free_conn(struct fuse_conn *fc)
1615 {
1616 WARN_ON(!list_empty(&fc->devices));
1617 kfree(fc);
1618 }
1619 EXPORT_SYMBOL_GPL(fuse_free_conn);
1620
fuse_bdi_init(struct fuse_conn * fc,struct super_block * sb)1621 static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
1622 {
1623 int err;
1624 char *suffix = "";
1625
1626 if (sb->s_bdev) {
1627 suffix = "-fuseblk";
1628 /*
1629 * sb->s_bdi points to blkdev's bdi however we want to redirect
1630 * it to our private bdi...
1631 */
1632 bdi_put(sb->s_bdi);
1633 sb->s_bdi = &noop_backing_dev_info;
1634 }
1635 err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev),
1636 MINOR(fc->dev), suffix);
1637 if (err)
1638 return err;
1639
1640 /* fuse does it's own writeback accounting */
1641 sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT;
1642 sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT;
1643
1644 /*
1645 * For a single fuse filesystem use max 1% of dirty +
1646 * writeback threshold.
1647 *
1648 * This gives about 1M of write buffer for memory maps on a
1649 * machine with 1G and 10% dirty_ratio, which should be more
1650 * than enough.
1651 *
1652 * Privileged users can raise it by writing to
1653 *
1654 * /sys/class/bdi/<bdi>/max_ratio
1655 */
1656 bdi_set_max_ratio(sb->s_bdi, 1);
1657
1658 return 0;
1659 }
1660
fuse_dev_alloc(void)1661 struct fuse_dev *fuse_dev_alloc(void)
1662 {
1663 struct fuse_dev *fud;
1664 struct list_head *pq;
1665
1666 fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL);
1667 if (!fud)
1668 return NULL;
1669
1670 pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
1671 if (!pq) {
1672 kfree(fud);
1673 return NULL;
1674 }
1675
1676 fud->pq.processing = pq;
1677 fuse_pqueue_init(&fud->pq);
1678
1679 return fud;
1680 }
1681 EXPORT_SYMBOL_GPL(fuse_dev_alloc);
1682
fuse_dev_install(struct fuse_dev * fud,struct fuse_conn * fc)1683 void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc)
1684 {
1685 fud->fc = fuse_conn_get(fc);
1686 spin_lock(&fc->lock);
1687 list_add_tail(&fud->entry, &fc->devices);
1688 spin_unlock(&fc->lock);
1689 }
1690 EXPORT_SYMBOL_GPL(fuse_dev_install);
1691
fuse_dev_alloc_install(struct fuse_conn * fc)1692 struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc)
1693 {
1694 struct fuse_dev *fud;
1695
1696 fud = fuse_dev_alloc();
1697 if (!fud)
1698 return NULL;
1699
1700 fuse_dev_install(fud, fc);
1701 return fud;
1702 }
1703 EXPORT_SYMBOL_GPL(fuse_dev_alloc_install);
1704
fuse_dev_free(struct fuse_dev * fud)1705 void fuse_dev_free(struct fuse_dev *fud)
1706 {
1707 struct fuse_conn *fc = fud->fc;
1708
1709 if (fc) {
1710 spin_lock(&fc->lock);
1711 list_del(&fud->entry);
1712 spin_unlock(&fc->lock);
1713
1714 fuse_conn_put(fc);
1715 }
1716 kfree(fud->pq.processing);
1717 kfree(fud);
1718 }
1719 EXPORT_SYMBOL_GPL(fuse_dev_free);
1720
fuse_sb_defaults(struct super_block * sb)1721 static void fuse_sb_defaults(struct super_block *sb)
1722 {
1723 sb->s_magic = FUSE_SUPER_MAGIC;
1724 sb->s_op = &fuse_super_operations;
1725 sb->s_xattr = fuse_xattr_handlers;
1726 sb->s_maxbytes = MAX_LFS_FILESIZE;
1727 sb->s_time_gran = 1;
1728 sb->s_export_op = &fuse_export_operations;
1729 sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
1730 sb->s_iflags |= SB_I_NOIDMAP;
1731 if (sb->s_user_ns != &init_user_ns)
1732 sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
1733 sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
1734 }
1735
fuse_fill_super_submount(struct super_block * sb,struct fuse_inode * parent_fi)1736 static int fuse_fill_super_submount(struct super_block *sb,
1737 struct fuse_inode *parent_fi)
1738 {
1739 struct fuse_mount *fm = get_fuse_mount_super(sb);
1740 struct super_block *parent_sb = parent_fi->inode.i_sb;
1741 struct fuse_attr root_attr;
1742 struct inode *root;
1743 struct fuse_submount_lookup *sl;
1744 struct fuse_inode *fi;
1745
1746 fuse_sb_defaults(sb);
1747 fm->sb = sb;
1748
1749 WARN_ON(sb->s_bdi != &noop_backing_dev_info);
1750 sb->s_bdi = bdi_get(parent_sb->s_bdi);
1751
1752 sb->s_xattr = parent_sb->s_xattr;
1753 sb->s_export_op = parent_sb->s_export_op;
1754 sb->s_time_gran = parent_sb->s_time_gran;
1755 sb->s_blocksize = parent_sb->s_blocksize;
1756 sb->s_blocksize_bits = parent_sb->s_blocksize_bits;
1757 sb->s_subtype = kstrdup(parent_sb->s_subtype, GFP_KERNEL);
1758 if (parent_sb->s_subtype && !sb->s_subtype)
1759 return -ENOMEM;
1760
1761 fuse_fill_attr_from_inode(&root_attr, parent_fi);
1762 root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0);
1763 /*
1764 * This inode is just a duplicate, so it is not looked up and
1765 * its nlookup should not be incremented. fuse_iget() does
1766 * that, though, so undo it here.
1767 */
1768 fi = get_fuse_inode(root);
1769 fi->nlookup--;
1770
1771 sb->s_d_op = &fuse_dentry_operations;
1772 sb->s_root = d_make_root(root);
1773 if (!sb->s_root)
1774 return -ENOMEM;
1775
1776 /*
1777 * Grab the parent's submount_lookup pointer and take a
1778 * reference on the shared nlookup from the parent. This is to
1779 * prevent the last forget for this nodeid from getting
1780 * triggered until all users have finished with it.
1781 */
1782 sl = parent_fi->submount_lookup;
1783 WARN_ON(!sl);
1784 if (sl) {
1785 refcount_inc(&sl->count);
1786 fi->submount_lookup = sl;
1787 }
1788
1789 return 0;
1790 }
1791
1792 /* Filesystem context private data holds the FUSE inode of the mount point */
fuse_get_tree_submount(struct fs_context * fsc)1793 static int fuse_get_tree_submount(struct fs_context *fsc)
1794 {
1795 struct fuse_mount *fm;
1796 struct fuse_inode *mp_fi = fsc->fs_private;
1797 struct fuse_conn *fc = get_fuse_conn(&mp_fi->inode);
1798 struct super_block *sb;
1799 int err;
1800
1801 fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL);
1802 if (!fm)
1803 return -ENOMEM;
1804
1805 fm->fc = fuse_conn_get(fc);
1806 fsc->s_fs_info = fm;
1807 sb = sget_fc(fsc, NULL, set_anon_super_fc);
1808 if (fsc->s_fs_info)
1809 fuse_mount_destroy(fm);
1810 if (IS_ERR(sb))
1811 return PTR_ERR(sb);
1812
1813 /* Initialize superblock, making @mp_fi its root */
1814 err = fuse_fill_super_submount(sb, mp_fi);
1815 if (err) {
1816 deactivate_locked_super(sb);
1817 return err;
1818 }
1819
1820 down_write(&fc->killsb);
1821 list_add_tail(&fm->fc_entry, &fc->mounts);
1822 up_write(&fc->killsb);
1823
1824 sb->s_flags |= SB_ACTIVE;
1825 fsc->root = dget(sb->s_root);
1826
1827 return 0;
1828 }
1829
1830 static const struct fs_context_operations fuse_context_submount_ops = {
1831 .get_tree = fuse_get_tree_submount,
1832 };
1833
fuse_init_fs_context_submount(struct fs_context * fsc)1834 int fuse_init_fs_context_submount(struct fs_context *fsc)
1835 {
1836 fsc->ops = &fuse_context_submount_ops;
1837 return 0;
1838 }
1839 EXPORT_SYMBOL_GPL(fuse_init_fs_context_submount);
1840
fuse_fill_super_common(struct super_block * sb,struct fuse_fs_context * ctx)1841 int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
1842 {
1843 struct fuse_dev *fud = NULL;
1844 struct fuse_mount *fm = get_fuse_mount_super(sb);
1845 struct fuse_conn *fc = fm->fc;
1846 struct inode *root;
1847 struct dentry *root_dentry;
1848 int err;
1849
1850 err = -EINVAL;
1851 if (sb->s_flags & SB_MANDLOCK)
1852 goto err;
1853
1854 rcu_assign_pointer(fc->curr_bucket, fuse_sync_bucket_alloc());
1855 fuse_sb_defaults(sb);
1856
1857 if (ctx->is_bdev) {
1858 #ifdef CONFIG_BLOCK
1859 err = -EINVAL;
1860 if (!sb_set_blocksize(sb, ctx->blksize))
1861 goto err;
1862 #endif
1863 } else {
1864 sb->s_blocksize = PAGE_SIZE;
1865 sb->s_blocksize_bits = PAGE_SHIFT;
1866 }
1867
1868 sb->s_subtype = ctx->subtype;
1869 ctx->subtype = NULL;
1870 if (IS_ENABLED(CONFIG_FUSE_DAX)) {
1871 err = fuse_dax_conn_alloc(fc, ctx->dax_mode, ctx->dax_dev);
1872 if (err)
1873 goto err;
1874 }
1875
1876 if (ctx->fudptr) {
1877 err = -ENOMEM;
1878 fud = fuse_dev_alloc_install(fc);
1879 if (!fud)
1880 goto err_free_dax;
1881 }
1882
1883 fc->dev = sb->s_dev;
1884 fm->sb = sb;
1885 err = fuse_bdi_init(fc, sb);
1886 if (err)
1887 goto err_dev_free;
1888
1889 /* Handle umasking inside the fuse code */
1890 if (sb->s_flags & SB_POSIXACL)
1891 fc->dont_mask = 1;
1892 sb->s_flags |= SB_POSIXACL;
1893
1894 fc->default_permissions = ctx->default_permissions;
1895 fc->allow_other = ctx->allow_other;
1896 fc->user_id = ctx->user_id;
1897 fc->group_id = ctx->group_id;
1898 fc->legacy_opts_show = ctx->legacy_opts_show;
1899 fc->max_read = max_t(unsigned int, 4096, ctx->max_read);
1900 fc->destroy = ctx->destroy;
1901 fc->no_control = ctx->no_control;
1902 fc->no_force_umount = ctx->no_force_umount;
1903 fc->no_daemon = ctx->no_daemon;
1904
1905 err = -ENOMEM;
1906 root = fuse_get_root_inode(sb, ctx->rootmode, ctx->root_bpf,
1907 ctx->root_dir);
1908 sb->s_d_op = &fuse_root_dentry_operations;
1909 root_dentry = d_make_root(root);
1910 if (!root_dentry)
1911 goto err_dev_free;
1912 fuse_init_dentry_root(root_dentry, ctx->root_dir);
1913 /* Root dentry doesn't have .d_revalidate */
1914 sb->s_d_op = &fuse_dentry_operations;
1915
1916 mutex_lock(&fuse_mutex);
1917 err = -EINVAL;
1918 if (ctx->fudptr && *ctx->fudptr)
1919 goto err_unlock;
1920
1921 err = fuse_ctl_add_conn(fc);
1922 if (err)
1923 goto err_unlock;
1924
1925 list_add_tail(&fc->entry, &fuse_conn_list);
1926 sb->s_root = root_dentry;
1927 if (ctx->fudptr)
1928 *ctx->fudptr = fud;
1929 mutex_unlock(&fuse_mutex);
1930 return 0;
1931
1932 err_unlock:
1933 mutex_unlock(&fuse_mutex);
1934 dput(root_dentry);
1935 err_dev_free:
1936 if (fud)
1937 fuse_dev_free(fud);
1938 err_free_dax:
1939 if (IS_ENABLED(CONFIG_FUSE_DAX))
1940 fuse_dax_conn_free(fc);
1941 err:
1942 return err;
1943 }
1944 EXPORT_SYMBOL_GPL(fuse_fill_super_common);
1945
fuse_fill_super(struct super_block * sb,struct fs_context * fsc)1946 static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
1947 {
1948 struct fuse_fs_context *ctx = fsc->fs_private;
1949 int err;
1950
1951 if (!ctx->no_daemon) {
1952 if (!ctx->file || !ctx->rootmode_present ||
1953 !ctx->user_id_present || !ctx->group_id_present)
1954 return -EINVAL;
1955
1956 /*
1957 * Require mount to happen from the same user namespace which
1958 * opened /dev/fuse to prevent potential attacks.
1959 */
1960 if ((ctx->file->f_op != &fuse_dev_operations) ||
1961 (ctx->file->f_cred->user_ns != sb->s_user_ns))
1962 return -EINVAL;
1963 ctx->fudptr = &ctx->file->private_data;
1964 }
1965
1966 err = fuse_fill_super_common(sb, ctx);
1967 if (err)
1968 return err;
1969 /* file->private_data shall be visible on all CPUs after this */
1970 smp_mb();
1971 fuse_send_init(get_fuse_mount_super(sb));
1972 return 0;
1973 }
1974
1975 /*
1976 * This is the path where user supplied an already initialized fuse dev. In
1977 * this case never create a new super if the old one is gone.
1978 */
fuse_set_no_super(struct super_block * sb,struct fs_context * fsc)1979 static int fuse_set_no_super(struct super_block *sb, struct fs_context *fsc)
1980 {
1981 return -ENOTCONN;
1982 }
1983
fuse_test_super(struct super_block * sb,struct fs_context * fsc)1984 static int fuse_test_super(struct super_block *sb, struct fs_context *fsc)
1985 {
1986
1987 return fsc->sget_key == get_fuse_conn_super(sb);
1988 }
1989
fuse_get_tree(struct fs_context * fsc)1990 static int fuse_get_tree(struct fs_context *fsc)
1991 {
1992 struct fuse_fs_context *ctx = fsc->fs_private;
1993 struct fuse_dev *fud;
1994 struct fuse_conn *fc;
1995 struct fuse_mount *fm;
1996 struct super_block *sb;
1997 int err;
1998
1999 fc = kmalloc(sizeof(*fc), GFP_KERNEL);
2000 if (!fc)
2001 return -ENOMEM;
2002
2003 fm = kzalloc(sizeof(*fm), GFP_KERNEL);
2004 if (!fm) {
2005 kfree(fc);
2006 return -ENOMEM;
2007 }
2008
2009 fuse_conn_init(fc, fm, fsc->user_ns, &fuse_dev_fiq_ops, NULL);
2010 fc->release = fuse_free_conn;
2011
2012 fsc->s_fs_info = fm;
2013
2014 if (ctx->fd_present)
2015 ctx->file = fget(ctx->fd);
2016
2017 if (IS_ENABLED(CONFIG_BLOCK) && ctx->is_bdev) {
2018 err = get_tree_bdev(fsc, fuse_fill_super);
2019 goto out;
2020 }
2021 /*
2022 * While block dev mount can be initialized with a dummy device fd
2023 * (found by device name), normal fuse mounts can't
2024 */
2025 err = -EINVAL;
2026 if (!ctx->file)
2027 goto out;
2028
2029 /*
2030 * Allow creating a fuse mount with an already initialized fuse
2031 * connection
2032 */
2033 fud = READ_ONCE(ctx->file->private_data);
2034 if (ctx->file->f_op == &fuse_dev_operations && fud) {
2035 fsc->sget_key = fud->fc;
2036 sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super);
2037 err = PTR_ERR_OR_ZERO(sb);
2038 if (!IS_ERR(sb))
2039 fsc->root = dget(sb->s_root);
2040 } else {
2041 err = get_tree_nodev(fsc, fuse_fill_super);
2042 }
2043 out:
2044 if (fsc->s_fs_info)
2045 fuse_mount_destroy(fm);
2046 if (ctx->file)
2047 fput(ctx->file);
2048 return err;
2049 }
2050
2051 static const struct fs_context_operations fuse_context_ops = {
2052 .free = fuse_free_fsc,
2053 .parse_param = fuse_parse_param,
2054 .reconfigure = fuse_reconfigure,
2055 .get_tree = fuse_get_tree,
2056 };
2057
2058 /*
2059 * Set up the filesystem mount context.
2060 */
fuse_init_fs_context(struct fs_context * fsc)2061 static int fuse_init_fs_context(struct fs_context *fsc)
2062 {
2063 struct fuse_fs_context *ctx;
2064
2065 ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL);
2066 if (!ctx)
2067 return -ENOMEM;
2068
2069 ctx->max_read = ~0;
2070 ctx->blksize = FUSE_DEFAULT_BLKSIZE;
2071 ctx->legacy_opts_show = true;
2072
2073 #ifdef CONFIG_BLOCK
2074 if (fsc->fs_type == &fuseblk_fs_type) {
2075 ctx->is_bdev = true;
2076 ctx->destroy = true;
2077 }
2078 #endif
2079
2080 fsc->fs_private = ctx;
2081 fsc->ops = &fuse_context_ops;
2082 return 0;
2083 }
2084
fuse_mount_remove(struct fuse_mount * fm)2085 bool fuse_mount_remove(struct fuse_mount *fm)
2086 {
2087 struct fuse_conn *fc = fm->fc;
2088 bool last = false;
2089
2090 down_write(&fc->killsb);
2091 list_del_init(&fm->fc_entry);
2092 if (list_empty(&fc->mounts))
2093 last = true;
2094 up_write(&fc->killsb);
2095
2096 return last;
2097 }
2098 EXPORT_SYMBOL_GPL(fuse_mount_remove);
2099
fuse_conn_destroy(struct fuse_mount * fm)2100 void fuse_conn_destroy(struct fuse_mount *fm)
2101 {
2102 struct fuse_conn *fc = fm->fc;
2103
2104 if (fc->destroy)
2105 fuse_send_destroy(fm);
2106
2107 fuse_abort_conn(fc);
2108 fuse_wait_aborted(fc);
2109
2110 if (!list_empty(&fc->entry)) {
2111 mutex_lock(&fuse_mutex);
2112 list_del(&fc->entry);
2113 fuse_ctl_remove_conn(fc);
2114 mutex_unlock(&fuse_mutex);
2115 }
2116 }
2117 EXPORT_SYMBOL_GPL(fuse_conn_destroy);
2118
fuse_sb_destroy(struct super_block * sb)2119 static void fuse_sb_destroy(struct super_block *sb)
2120 {
2121 struct fuse_mount *fm = get_fuse_mount_super(sb);
2122 bool last;
2123
2124 if (sb->s_root) {
2125 last = fuse_mount_remove(fm);
2126 if (last)
2127 fuse_conn_destroy(fm);
2128 }
2129 }
2130
fuse_mount_destroy(struct fuse_mount * fm)2131 void fuse_mount_destroy(struct fuse_mount *fm)
2132 {
2133 fuse_conn_put(fm->fc);
2134 kfree_rcu(fm, rcu);
2135 }
2136 EXPORT_SYMBOL(fuse_mount_destroy);
2137
fuse_kill_sb_anon(struct super_block * sb)2138 static void fuse_kill_sb_anon(struct super_block *sb)
2139 {
2140 fuse_sb_destroy(sb);
2141 kill_anon_super(sb);
2142 fuse_mount_destroy(get_fuse_mount_super(sb));
2143 }
2144
2145 static struct file_system_type fuse_fs_type = {
2146 .owner = THIS_MODULE,
2147 .name = "fuse",
2148 .fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT | FS_ALLOW_IDMAP,
2149 .init_fs_context = fuse_init_fs_context,
2150 .parameters = fuse_fs_parameters,
2151 .kill_sb = fuse_kill_sb_anon,
2152 };
2153 MODULE_ALIAS_FS("fuse");
2154
2155 #ifdef CONFIG_BLOCK
fuse_kill_sb_blk(struct super_block * sb)2156 static void fuse_kill_sb_blk(struct super_block *sb)
2157 {
2158 fuse_sb_destroy(sb);
2159 kill_block_super(sb);
2160 fuse_mount_destroy(get_fuse_mount_super(sb));
2161 }
2162
2163 static struct file_system_type fuseblk_fs_type = {
2164 .owner = THIS_MODULE,
2165 .name = "fuseblk",
2166 .init_fs_context = fuse_init_fs_context,
2167 .parameters = fuse_fs_parameters,
2168 .kill_sb = fuse_kill_sb_blk,
2169 .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE | FS_ALLOW_IDMAP,
2170 };
2171 MODULE_ALIAS_FS("fuseblk");
2172
register_fuseblk(void)2173 static inline int register_fuseblk(void)
2174 {
2175 return register_filesystem(&fuseblk_fs_type);
2176 }
2177
unregister_fuseblk(void)2178 static inline void unregister_fuseblk(void)
2179 {
2180 unregister_filesystem(&fuseblk_fs_type);
2181 }
2182 #else
register_fuseblk(void)2183 static inline int register_fuseblk(void)
2184 {
2185 return 0;
2186 }
2187
unregister_fuseblk(void)2188 static inline void unregister_fuseblk(void)
2189 {
2190 }
2191 #endif
2192
fuse_inode_init_once(void * foo)2193 static void fuse_inode_init_once(void *foo)
2194 {
2195 struct inode *inode = foo;
2196
2197 inode_init_once(inode);
2198 }
2199
fuse_fs_init(void)2200 static int __init fuse_fs_init(void)
2201 {
2202 int err;
2203
2204 fuse_inode_cachep = kmem_cache_create("fuse_inode",
2205 sizeof(struct fuse_inode), 0,
2206 SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT,
2207 fuse_inode_init_once);
2208 err = -ENOMEM;
2209 if (!fuse_inode_cachep)
2210 goto out;
2211
2212 err = register_fuseblk();
2213 if (err)
2214 goto out2;
2215
2216 err = register_filesystem(&fuse_fs_type);
2217 if (err)
2218 goto out3;
2219
2220 return 0;
2221
2222 out3:
2223 unregister_fuseblk();
2224 out2:
2225 kmem_cache_destroy(fuse_inode_cachep);
2226 out:
2227 return err;
2228 }
2229
fuse_fs_cleanup(void)2230 static void fuse_fs_cleanup(void)
2231 {
2232 unregister_filesystem(&fuse_fs_type);
2233 unregister_fuseblk();
2234
2235 /*
2236 * Make sure all delayed rcu free inodes are flushed before we
2237 * destroy cache.
2238 */
2239 rcu_barrier();
2240 kmem_cache_destroy(fuse_inode_cachep);
2241 }
2242
2243 static struct kobject *fuse_kobj;
2244
2245 #ifdef CONFIG_FUSE_PASSTHROUGH
fuse_passthrough_show(struct kobject * kobj,struct kobj_attribute * attr,char * buff)2246 static ssize_t fuse_passthrough_show(struct kobject *kobj,
2247 struct kobj_attribute *attr, char *buff)
2248 {
2249 return sysfs_emit(buff, "supported\n");
2250 }
2251
2252 static struct kobj_attribute fuse_passthrough_attr =
2253 __ATTR_RO(fuse_passthrough);
2254 #endif
2255
2256 #ifdef CONFIG_FUSE_BPF
fuse_bpf_show(struct kobject * kobj,struct kobj_attribute * attr,char * buff)2257 static ssize_t fuse_bpf_show(struct kobject *kobj,
2258 struct kobj_attribute *attr, char *buff)
2259 {
2260 return sysfs_emit(buff, "supported\n");
2261 }
2262
2263 static struct kobj_attribute fuse_bpf_attr =
2264 __ATTR_RO(fuse_bpf);
2265 #endif
2266
2267 static struct attribute *fuse_features[] = {
2268 #ifdef CONFIG_FUSE_BPF
2269 &fuse_bpf_attr.attr,
2270 #endif
2271 #ifdef CONFIG_FUSE_PASSTHROUGH
2272 &fuse_passthrough_attr.attr,
2273 #endif
2274 NULL,
2275 };
2276
2277 static const struct attribute_group fuse_features_group = {
2278 .name = "features",
2279 .attrs = fuse_features,
2280 };
2281
2282 /*
2283 * TODO Remove this once fuse-bpf is upstreamed
2284 *
2285 * bpf_prog_type_fuse exports the bpf_prog_type_fuse 'constant', which cannot be
2286 * constant until the code is upstreamed
2287 */
bpf_prog_type_fuse_show(struct kobject * kobj,struct kobj_attribute * attr,char * buff)2288 static ssize_t bpf_prog_type_fuse_show(struct kobject *kobj,
2289 struct kobj_attribute *attr, char *buff)
2290 {
2291 return sysfs_emit(buff, "%d\n", BPF_PROG_TYPE_FUSE);
2292 }
2293
2294 static struct kobj_attribute bpf_prog_type_fuse_attr =
2295 __ATTR_RO(bpf_prog_type_fuse);
2296
2297 static struct attribute *bpf_attributes[] = {
2298 &bpf_prog_type_fuse_attr.attr,
2299 NULL,
2300 };
2301
2302 static const struct attribute_group bpf_attr_group = {
2303 .attrs = bpf_attributes,
2304 };
2305
2306 static const struct attribute_group *attribute_groups[] = {
2307 &fuse_features_group,
2308 &bpf_attr_group,
2309 NULL
2310 };
2311
2312 /* TODO remove to here */
2313
fuse_sysfs_init(void)2314 static int fuse_sysfs_init(void)
2315 {
2316 int err;
2317
2318 fuse_kobj = kobject_create_and_add("fuse", fs_kobj);
2319 if (!fuse_kobj) {
2320 err = -ENOMEM;
2321 goto out_err;
2322 }
2323
2324 err = sysfs_create_mount_point(fuse_kobj, "connections");
2325 if (err)
2326 goto out_fuse_unregister;
2327
2328 /* TODO Remove when BPF_PROG_TYPE_FUSE is upstreamed */
2329 err = sysfs_create_groups(fuse_kobj, attribute_groups);
2330 if (err)
2331 goto out_fuse_remove_mount_point;
2332
2333 return 0;
2334
2335 out_fuse_remove_mount_point:
2336 sysfs_remove_mount_point(fuse_kobj, "connections");
2337 out_fuse_unregister:
2338 kobject_put(fuse_kobj);
2339 out_err:
2340 return err;
2341 }
2342
fuse_sysfs_cleanup(void)2343 static void fuse_sysfs_cleanup(void)
2344 {
2345 sysfs_remove_groups(fuse_kobj, attribute_groups);
2346 sysfs_remove_mount_point(fuse_kobj, "connections");
2347 kobject_put(fuse_kobj);
2348 }
2349
fuse_init(void)2350 static int __init fuse_init(void)
2351 {
2352 int res;
2353
2354 pr_info("init (API version %i.%i)\n",
2355 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
2356
2357 INIT_LIST_HEAD(&fuse_conn_list);
2358 res = fuse_fs_init();
2359 if (res)
2360 goto err;
2361
2362 res = fuse_dev_init();
2363 if (res)
2364 goto err_fs_cleanup;
2365
2366 res = fuse_sysfs_init();
2367 if (res)
2368 goto err_dev_cleanup;
2369
2370 res = fuse_ctl_init();
2371 if (res)
2372 goto err_sysfs_cleanup;
2373
2374 #ifdef CONFIG_FUSE_BPF
2375 res = fuse_bpf_init();
2376 if (res)
2377 goto err_ctl_cleanup;
2378 #endif
2379
2380 sanitize_global_limit(&max_user_bgreq);
2381 sanitize_global_limit(&max_user_congthresh);
2382
2383 return 0;
2384
2385 #ifdef CONFIG_FUSE_BPF
2386 err_ctl_cleanup:
2387 fuse_ctl_cleanup();
2388 #endif
2389 err_sysfs_cleanup:
2390 fuse_sysfs_cleanup();
2391 err_dev_cleanup:
2392 fuse_dev_cleanup();
2393 err_fs_cleanup:
2394 fuse_fs_cleanup();
2395 err:
2396 return res;
2397 }
2398
fuse_exit(void)2399 static void __exit fuse_exit(void)
2400 {
2401 pr_debug("exit\n");
2402
2403 fuse_ctl_cleanup();
2404 fuse_sysfs_cleanup();
2405 fuse_fs_cleanup();
2406 #ifdef CONFIG_FUSE_BPF
2407 fuse_bpf_cleanup();
2408 #endif
2409 fuse_dev_cleanup();
2410 }
2411
2412 module_init(fuse_init);
2413 module_exit(fuse_exit);
2414