1 /*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2018 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7 */
8
9
10 #include "fuse_i.h"
11 #include <linux/iversion.h>
12 #include <linux/posix_acl.h>
13 #include <linux/pagemap.h>
14 #include <linux/highmem.h>
15
fuse_use_readdirplus(struct inode * dir,struct dir_context * ctx)16 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
17 {
18 struct fuse_conn *fc = get_fuse_conn(dir);
19 struct fuse_inode *fi = get_fuse_inode(dir);
20
21 if (!fc->do_readdirplus)
22 return false;
23 if (fi->nodeid == 0)
24 return false;
25 if (!fc->readdirplus_auto)
26 return true;
27 if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
28 return true;
29 if (ctx->pos == 0)
30 return true;
31 return false;
32 }
33
fuse_add_dirent_to_cache(struct file * file,struct fuse_dirent * dirent,loff_t pos)34 static void fuse_add_dirent_to_cache(struct file *file,
35 struct fuse_dirent *dirent, loff_t pos)
36 {
37 struct fuse_inode *fi = get_fuse_inode(file_inode(file));
38 size_t reclen = FUSE_DIRENT_SIZE(dirent);
39 pgoff_t index;
40 struct page *page;
41 loff_t size;
42 u64 version;
43 unsigned int offset;
44 void *addr;
45
46 spin_lock(&fi->rdc.lock);
47 /*
48 * Is cache already completed? Or this entry does not go at the end of
49 * cache?
50 */
51 if (fi->rdc.cached || pos != fi->rdc.pos) {
52 spin_unlock(&fi->rdc.lock);
53 return;
54 }
55 version = fi->rdc.version;
56 size = fi->rdc.size;
57 offset = size & ~PAGE_MASK;
58 index = size >> PAGE_SHIFT;
59 /* Dirent doesn't fit in current page? Jump to next page. */
60 if (offset + reclen > PAGE_SIZE) {
61 index++;
62 offset = 0;
63 }
64 spin_unlock(&fi->rdc.lock);
65
66 if (offset) {
67 page = find_lock_page(file->f_mapping, index);
68 } else {
69 page = find_or_create_page(file->f_mapping, index,
70 mapping_gfp_mask(file->f_mapping));
71 }
72 if (!page)
73 return;
74
75 spin_lock(&fi->rdc.lock);
76 /* Raced with another readdir */
77 if (fi->rdc.version != version || fi->rdc.size != size ||
78 WARN_ON(fi->rdc.pos != pos))
79 goto unlock;
80
81 addr = kmap_local_page(page);
82 if (!offset) {
83 clear_page(addr);
84 SetPageUptodate(page);
85 }
86 memcpy(addr + offset, dirent, reclen);
87 kunmap_local(addr);
88 fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
89 fi->rdc.pos = dirent->off;
90 unlock:
91 spin_unlock(&fi->rdc.lock);
92 unlock_page(page);
93 put_page(page);
94 }
95
fuse_readdir_cache_end(struct file * file,loff_t pos)96 static void fuse_readdir_cache_end(struct file *file, loff_t pos)
97 {
98 struct fuse_inode *fi = get_fuse_inode(file_inode(file));
99 loff_t end;
100
101 spin_lock(&fi->rdc.lock);
102 /* does cache end position match current position? */
103 if (fi->rdc.pos != pos) {
104 spin_unlock(&fi->rdc.lock);
105 return;
106 }
107
108 fi->rdc.cached = true;
109 end = ALIGN(fi->rdc.size, PAGE_SIZE);
110 spin_unlock(&fi->rdc.lock);
111
112 /* truncate unused tail of cache */
113 truncate_inode_pages(file->f_mapping, end);
114 }
115
fuse_emit(struct file * file,struct dir_context * ctx,struct fuse_dirent * dirent)116 static bool fuse_emit(struct file *file, struct dir_context *ctx,
117 struct fuse_dirent *dirent)
118 {
119 struct fuse_file *ff = file->private_data;
120
121 if (ff->open_flags & FOPEN_CACHE_DIR)
122 fuse_add_dirent_to_cache(file, dirent, ctx->pos);
123
124 return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
125 dirent->type);
126 }
127
parse_dirfile(char * buf,size_t nbytes,struct file * file,struct dir_context * ctx)128 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
129 struct dir_context *ctx)
130 {
131 while (nbytes >= FUSE_NAME_OFFSET) {
132 struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
133 size_t reclen = FUSE_DIRENT_SIZE(dirent);
134 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
135 return -EIO;
136 if (reclen > nbytes)
137 break;
138 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
139 return -EIO;
140
141 if (!fuse_emit(file, ctx, dirent))
142 break;
143
144 buf += reclen;
145 nbytes -= reclen;
146 ctx->pos = dirent->off;
147 }
148
149 return 0;
150 }
151
fuse_direntplus_link(struct file * file,struct fuse_direntplus * direntplus,u64 attr_version)152 static int fuse_direntplus_link(struct file *file,
153 struct fuse_direntplus *direntplus,
154 u64 attr_version)
155 {
156 struct fuse_entry_out *o = &direntplus->entry_out;
157 struct fuse_dirent *dirent = &direntplus->dirent;
158 struct dentry *parent = file->f_path.dentry;
159 struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
160 struct dentry *dentry;
161 struct dentry *alias;
162 struct inode *dir = d_inode(parent);
163 struct fuse_conn *fc;
164 struct inode *inode;
165 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
166
167 if (!o->nodeid) {
168 /*
169 * Unlike in the case of fuse_lookup, zero nodeid does not mean
170 * ENOENT. Instead, it only means the userspace filesystem did
171 * not want to return attributes/handle for this entry.
172 *
173 * So do nothing.
174 */
175 return 0;
176 }
177
178 if (name.name[0] == '.') {
179 /*
180 * We could potentially refresh the attributes of the directory
181 * and its parent?
182 */
183 if (name.len == 1)
184 return 0;
185 if (name.name[1] == '.' && name.len == 2)
186 return 0;
187 }
188
189 if (invalid_nodeid(o->nodeid))
190 return -EIO;
191 if (fuse_invalid_attr(&o->attr))
192 return -EIO;
193
194 fc = get_fuse_conn(dir);
195
196 name.hash = full_name_hash(parent, name.name, name.len);
197 dentry = d_lookup(parent, &name);
198 if (!dentry) {
199 retry:
200 dentry = d_alloc_parallel(parent, &name, &wq);
201 if (IS_ERR(dentry))
202 return PTR_ERR(dentry);
203 }
204 if (!d_in_lookup(dentry)) {
205 struct fuse_inode *fi;
206 inode = d_inode(dentry);
207 if (inode && get_node_id(inode) != o->nodeid)
208 inode = NULL;
209 if (!inode ||
210 fuse_stale_inode(inode, o->generation, &o->attr)) {
211 if (inode)
212 fuse_make_bad(inode);
213 d_invalidate(dentry);
214 dput(dentry);
215 goto retry;
216 }
217 if (fuse_is_bad(inode)) {
218 dput(dentry);
219 return -EIO;
220 }
221
222 fi = get_fuse_inode(inode);
223 spin_lock(&fi->lock);
224 fi->nlookup++;
225 spin_unlock(&fi->lock);
226
227 forget_all_cached_acls(inode);
228 fuse_change_attributes(inode, &o->attr, NULL,
229 ATTR_TIMEOUT(o),
230 attr_version);
231 /*
232 * The other branch comes via fuse_iget()
233 * which bumps nlookup inside
234 */
235 } else {
236 inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
237 &o->attr, ATTR_TIMEOUT(o),
238 attr_version);
239 if (!inode)
240 inode = ERR_PTR(-ENOMEM);
241
242 alias = d_splice_alias(inode, dentry);
243 d_lookup_done(dentry);
244 if (alias) {
245 dput(dentry);
246 dentry = alias;
247 }
248 if (IS_ERR(dentry)) {
249 if (!IS_ERR(inode)) {
250 struct fuse_inode *fi = get_fuse_inode(inode);
251
252 spin_lock(&fi->lock);
253 fi->nlookup--;
254 spin_unlock(&fi->lock);
255 }
256 return PTR_ERR(dentry);
257 }
258 }
259 if (fc->readdirplus_auto)
260 set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
261 fuse_change_entry_timeout(dentry, o);
262
263 dput(dentry);
264 return 0;
265 }
266
fuse_force_forget(struct file * file,u64 nodeid)267 static void fuse_force_forget(struct file *file, u64 nodeid)
268 {
269 struct inode *inode = file_inode(file);
270 struct fuse_mount *fm = get_fuse_mount(inode);
271 struct fuse_forget_in inarg;
272 FUSE_ARGS(args);
273
274 memset(&inarg, 0, sizeof(inarg));
275 inarg.nlookup = 1;
276 args.opcode = FUSE_FORGET;
277 args.nodeid = nodeid;
278 args.in_numargs = 1;
279 args.in_args[0].size = sizeof(inarg);
280 args.in_args[0].value = &inarg;
281 args.force = true;
282 args.noreply = true;
283
284 fuse_simple_request(fm, &args);
285 /* ignore errors */
286 }
287
parse_dirplusfile(char * buf,size_t nbytes,struct file * file,struct dir_context * ctx,u64 attr_version)288 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
289 struct dir_context *ctx, u64 attr_version)
290 {
291 struct fuse_direntplus *direntplus;
292 struct fuse_dirent *dirent;
293 size_t reclen;
294 int over = 0;
295 int ret;
296
297 while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
298 direntplus = (struct fuse_direntplus *) buf;
299 dirent = &direntplus->dirent;
300 reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
301
302 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
303 return -EIO;
304 if (reclen > nbytes)
305 break;
306 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
307 return -EIO;
308
309 if (!over) {
310 /* We fill entries into dstbuf only as much as
311 it can hold. But we still continue iterating
312 over remaining entries to link them. If not,
313 we need to send a FORGET for each of those
314 which we did not link.
315 */
316 over = !fuse_emit(file, ctx, dirent);
317 if (!over)
318 ctx->pos = dirent->off;
319 }
320
321 buf += reclen;
322 nbytes -= reclen;
323
324 ret = fuse_direntplus_link(file, direntplus, attr_version);
325 if (ret)
326 fuse_force_forget(file, direntplus->entry_out.nodeid);
327 }
328
329 return 0;
330 }
331
fuse_readdir_uncached(struct file * file,struct dir_context * ctx)332 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
333 {
334 int plus;
335 ssize_t res;
336 struct page *page;
337 struct inode *inode = file_inode(file);
338 struct fuse_mount *fm = get_fuse_mount(inode);
339 struct fuse_io_args ia = {};
340 struct fuse_args_pages *ap = &ia.ap;
341 struct fuse_page_desc desc = { .length = PAGE_SIZE };
342 u64 attr_version = 0;
343 bool locked;
344
345 page = alloc_page(GFP_KERNEL);
346 if (!page)
347 return -ENOMEM;
348
349 plus = fuse_use_readdirplus(inode, ctx);
350 ap->args.out_pages = true;
351 ap->num_pages = 1;
352 ap->pages = &page;
353 ap->descs = &desc;
354 if (plus) {
355 attr_version = fuse_get_attr_version(fm->fc);
356 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
357 FUSE_READDIRPLUS);
358 } else {
359 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
360 FUSE_READDIR);
361 }
362 locked = fuse_lock_inode(inode);
363 res = fuse_simple_request(fm, &ap->args);
364 fuse_unlock_inode(inode, locked);
365 if (res >= 0) {
366 if (!res) {
367 struct fuse_file *ff = file->private_data;
368
369 if (ff->open_flags & FOPEN_CACHE_DIR)
370 fuse_readdir_cache_end(file, ctx->pos);
371 } else if (plus) {
372 res = parse_dirplusfile(page_address(page), res,
373 file, ctx, attr_version);
374 } else {
375 res = parse_dirfile(page_address(page), res, file,
376 ctx);
377 }
378 }
379
380 __free_page(page);
381 fuse_invalidate_atime(inode);
382 return res;
383 }
384
385 enum fuse_parse_result {
386 FOUND_ERR = -1,
387 FOUND_NONE = 0,
388 FOUND_SOME,
389 FOUND_ALL,
390 };
391
fuse_parse_cache(struct fuse_file * ff,void * addr,unsigned int size,struct dir_context * ctx)392 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
393 void *addr, unsigned int size,
394 struct dir_context *ctx)
395 {
396 unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
397 enum fuse_parse_result res = FOUND_NONE;
398
399 WARN_ON(offset >= size);
400
401 for (;;) {
402 struct fuse_dirent *dirent = addr + offset;
403 unsigned int nbytes = size - offset;
404 size_t reclen;
405
406 if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
407 break;
408
409 reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
410
411 if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
412 return FOUND_ERR;
413 if (WARN_ON(reclen > nbytes))
414 return FOUND_ERR;
415 if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
416 return FOUND_ERR;
417
418 if (ff->readdir.pos == ctx->pos) {
419 res = FOUND_SOME;
420 if (!dir_emit(ctx, dirent->name, dirent->namelen,
421 dirent->ino, dirent->type))
422 return FOUND_ALL;
423 ctx->pos = dirent->off;
424 }
425 ff->readdir.pos = dirent->off;
426 ff->readdir.cache_off += reclen;
427
428 offset += reclen;
429 }
430
431 return res;
432 }
433
fuse_rdc_reset(struct inode * inode)434 static void fuse_rdc_reset(struct inode *inode)
435 {
436 struct fuse_inode *fi = get_fuse_inode(inode);
437
438 fi->rdc.cached = false;
439 fi->rdc.version++;
440 fi->rdc.size = 0;
441 fi->rdc.pos = 0;
442 }
443
444 #define UNCACHED 1
445
fuse_readdir_cached(struct file * file,struct dir_context * ctx)446 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
447 {
448 struct fuse_file *ff = file->private_data;
449 struct inode *inode = file_inode(file);
450 struct fuse_conn *fc = get_fuse_conn(inode);
451 struct fuse_inode *fi = get_fuse_inode(inode);
452 enum fuse_parse_result res;
453 pgoff_t index;
454 unsigned int size;
455 struct page *page;
456 void *addr;
457
458 /* Seeked? If so, reset the cache stream */
459 if (ff->readdir.pos != ctx->pos) {
460 ff->readdir.pos = 0;
461 ff->readdir.cache_off = 0;
462 }
463
464 /*
465 * We're just about to start reading into the cache or reading the
466 * cache; both cases require an up-to-date mtime value.
467 */
468 if (!ctx->pos && fc->auto_inval_data) {
469 int err = fuse_update_attributes(inode, file, STATX_MTIME);
470
471 if (err)
472 return err;
473 }
474
475 retry:
476 spin_lock(&fi->rdc.lock);
477 retry_locked:
478 if (!fi->rdc.cached) {
479 /* Starting cache? Set cache mtime. */
480 if (!ctx->pos && !fi->rdc.size) {
481 fi->rdc.mtime = inode_get_mtime(inode);
482 fi->rdc.iversion = inode_query_iversion(inode);
483 }
484 spin_unlock(&fi->rdc.lock);
485 return UNCACHED;
486 }
487 /*
488 * When at the beginning of the directory (i.e. just after opendir(3) or
489 * rewinddir(3)), then need to check whether directory contents have
490 * changed, and reset the cache if so.
491 */
492 if (!ctx->pos) {
493 struct timespec64 mtime = inode_get_mtime(inode);
494
495 if (inode_peek_iversion(inode) != fi->rdc.iversion ||
496 !timespec64_equal(&fi->rdc.mtime, &mtime)) {
497 fuse_rdc_reset(inode);
498 goto retry_locked;
499 }
500 }
501
502 /*
503 * If cache version changed since the last getdents() call, then reset
504 * the cache stream.
505 */
506 if (ff->readdir.version != fi->rdc.version) {
507 ff->readdir.pos = 0;
508 ff->readdir.cache_off = 0;
509 }
510 /*
511 * If at the beginning of the cache, than reset version to
512 * current.
513 */
514 if (ff->readdir.pos == 0)
515 ff->readdir.version = fi->rdc.version;
516
517 WARN_ON(fi->rdc.size < ff->readdir.cache_off);
518
519 index = ff->readdir.cache_off >> PAGE_SHIFT;
520
521 if (index == (fi->rdc.size >> PAGE_SHIFT))
522 size = fi->rdc.size & ~PAGE_MASK;
523 else
524 size = PAGE_SIZE;
525 spin_unlock(&fi->rdc.lock);
526
527 /* EOF? */
528 if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
529 return 0;
530
531 page = find_get_page_flags(file->f_mapping, index,
532 FGP_ACCESSED | FGP_LOCK);
533 /* Page gone missing, then re-added to cache, but not initialized? */
534 if (page && !PageUptodate(page)) {
535 unlock_page(page);
536 put_page(page);
537 page = NULL;
538 }
539 spin_lock(&fi->rdc.lock);
540 if (!page) {
541 /*
542 * Uh-oh: page gone missing, cache is useless
543 */
544 if (fi->rdc.version == ff->readdir.version)
545 fuse_rdc_reset(inode);
546 goto retry_locked;
547 }
548
549 /* Make sure it's still the same version after getting the page. */
550 if (ff->readdir.version != fi->rdc.version) {
551 spin_unlock(&fi->rdc.lock);
552 unlock_page(page);
553 put_page(page);
554 goto retry;
555 }
556 spin_unlock(&fi->rdc.lock);
557
558 /*
559 * Contents of the page are now protected against changing by holding
560 * the page lock.
561 */
562 addr = kmap_local_page(page);
563 res = fuse_parse_cache(ff, addr, size, ctx);
564 kunmap_local(addr);
565 unlock_page(page);
566 put_page(page);
567
568 if (res == FOUND_ERR)
569 return -EIO;
570
571 if (res == FOUND_ALL)
572 return 0;
573
574 if (size == PAGE_SIZE) {
575 /* We hit end of page: skip to next page. */
576 ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
577 goto retry;
578 }
579
580 /*
581 * End of cache reached. If found position, then we are done, otherwise
582 * need to fall back to uncached, since the position we were looking for
583 * wasn't in the cache.
584 */
585 return res == FOUND_SOME ? 0 : UNCACHED;
586 }
587
fuse_readdir(struct file * file,struct dir_context * ctx)588 int fuse_readdir(struct file *file, struct dir_context *ctx)
589 {
590 struct fuse_file *ff = file->private_data;
591 struct inode *inode = file_inode(file);
592 int err;
593
594 #ifdef CONFIG_FUSE_BPF
595 struct fuse_err_ret fer;
596 bool allow_force;
597 bool force_again = false;
598 bool is_continued = false;
599
600 again:
601 fer = fuse_bpf_backing(inode, struct fuse_read_io,
602 fuse_readdir_initialize, fuse_readdir_backing,
603 fuse_readdir_finalize,
604 file, ctx, &force_again, &allow_force, is_continued);
605 if (force_again && !IS_ERR(fer.result)) {
606 is_continued = true;
607 goto again;
608 }
609
610 if (fer.ret)
611 return PTR_ERR(fer.result);
612 #endif
613
614 if (fuse_is_bad(inode))
615 return -EIO;
616
617 err = UNCACHED;
618 if (ff->open_flags & FOPEN_CACHE_DIR)
619 err = fuse_readdir_cached(file, ctx);
620 if (err == UNCACHED)
621 err = fuse_readdir_uncached(file, ctx);
622
623 return err;
624 }
625