1 /*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2018 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7 */
8
9
10 #include "fuse_i.h"
11 #include <linux/iversion.h>
12 #include <linux/posix_acl.h>
13 #include <linux/pagemap.h>
14 #include <linux/highmem.h>
15
fuse_use_readdirplus(struct inode * dir,struct dir_context * ctx)16 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
17 {
18 struct fuse_conn *fc = get_fuse_conn(dir);
19 struct fuse_inode *fi = get_fuse_inode(dir);
20
21 if (!fc->do_readdirplus)
22 return false;
23 if (fi->nodeid == 0)
24 return false;
25 if (!fc->readdirplus_auto)
26 return true;
27 if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
28 return true;
29 if (ctx->pos == 0)
30 return true;
31 return false;
32 }
33
fuse_add_dirent_to_cache(struct file * file,struct fuse_dirent * dirent,loff_t pos)34 static void fuse_add_dirent_to_cache(struct file *file,
35 struct fuse_dirent *dirent, loff_t pos)
36 {
37 struct fuse_inode *fi = get_fuse_inode(file_inode(file));
38 size_t reclen = FUSE_DIRENT_SIZE(dirent);
39 pgoff_t index;
40 struct page *page;
41 loff_t size;
42 u64 version;
43 unsigned int offset;
44 void *addr;
45
46 spin_lock(&fi->rdc.lock);
47 /*
48 * Is cache already completed? Or this entry does not go at the end of
49 * cache?
50 */
51 if (fi->rdc.cached || pos != fi->rdc.pos) {
52 spin_unlock(&fi->rdc.lock);
53 return;
54 }
55 version = fi->rdc.version;
56 size = fi->rdc.size;
57 offset = size & ~PAGE_MASK;
58 index = size >> PAGE_SHIFT;
59 /* Dirent doesn't fit in current page? Jump to next page. */
60 if (offset + reclen > PAGE_SIZE) {
61 index++;
62 offset = 0;
63 }
64 spin_unlock(&fi->rdc.lock);
65
66 if (offset) {
67 page = find_lock_page(file->f_mapping, index);
68 } else {
69 page = find_or_create_page(file->f_mapping, index,
70 mapping_gfp_mask(file->f_mapping));
71 }
72 if (!page)
73 return;
74
75 spin_lock(&fi->rdc.lock);
76 /* Raced with another readdir */
77 if (fi->rdc.version != version || fi->rdc.size != size ||
78 WARN_ON(fi->rdc.pos != pos))
79 goto unlock;
80
81 addr = kmap_atomic(page);
82 if (!offset) {
83 clear_page(addr);
84 SetPageUptodate(page);
85 }
86 memcpy(addr + offset, dirent, reclen);
87 kunmap_atomic(addr);
88 fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
89 fi->rdc.pos = dirent->off;
90 unlock:
91 spin_unlock(&fi->rdc.lock);
92 unlock_page(page);
93 put_page(page);
94 }
95
fuse_readdir_cache_end(struct file * file,loff_t pos)96 static void fuse_readdir_cache_end(struct file *file, loff_t pos)
97 {
98 struct fuse_inode *fi = get_fuse_inode(file_inode(file));
99 loff_t end;
100
101 spin_lock(&fi->rdc.lock);
102 /* does cache end position match current position? */
103 if (fi->rdc.pos != pos) {
104 spin_unlock(&fi->rdc.lock);
105 return;
106 }
107
108 fi->rdc.cached = true;
109 end = ALIGN(fi->rdc.size, PAGE_SIZE);
110 spin_unlock(&fi->rdc.lock);
111
112 /* truncate unused tail of cache */
113 truncate_inode_pages(file->f_mapping, end);
114 }
115
fuse_emit(struct file * file,struct dir_context * ctx,struct fuse_dirent * dirent)116 static bool fuse_emit(struct file *file, struct dir_context *ctx,
117 struct fuse_dirent *dirent)
118 {
119 struct fuse_file *ff = file->private_data;
120
121 if (ff->open_flags & FOPEN_CACHE_DIR)
122 fuse_add_dirent_to_cache(file, dirent, ctx->pos);
123
124 return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
125 dirent->type);
126 }
127
parse_dirfile(char * buf,size_t nbytes,struct file * file,struct dir_context * ctx)128 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
129 struct dir_context *ctx)
130 {
131 while (nbytes >= FUSE_NAME_OFFSET) {
132 struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
133 size_t reclen = FUSE_DIRENT_SIZE(dirent);
134 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
135 return -EIO;
136 if (reclen > nbytes)
137 break;
138 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
139 return -EIO;
140
141 if (!fuse_emit(file, ctx, dirent))
142 break;
143
144 buf += reclen;
145 nbytes -= reclen;
146 ctx->pos = dirent->off;
147 }
148
149 return 0;
150 }
151
fuse_direntplus_link(struct file * file,struct fuse_direntplus * direntplus,u64 attr_version)152 static int fuse_direntplus_link(struct file *file,
153 struct fuse_direntplus *direntplus,
154 u64 attr_version)
155 {
156 struct fuse_entry_out *o = &direntplus->entry_out;
157 struct fuse_dirent *dirent = &direntplus->dirent;
158 struct dentry *parent = file->f_path.dentry;
159 struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
160 struct dentry *dentry;
161 struct dentry *alias;
162 struct inode *dir = d_inode(parent);
163 struct fuse_conn *fc;
164 struct inode *inode;
165 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
166
167 if (!o->nodeid) {
168 /*
169 * Unlike in the case of fuse_lookup, zero nodeid does not mean
170 * ENOENT. Instead, it only means the userspace filesystem did
171 * not want to return attributes/handle for this entry.
172 *
173 * So do nothing.
174 */
175 return 0;
176 }
177
178 if (name.name[0] == '.') {
179 /*
180 * We could potentially refresh the attributes of the directory
181 * and its parent?
182 */
183 if (name.len == 1)
184 return 0;
185 if (name.name[1] == '.' && name.len == 2)
186 return 0;
187 }
188
189 if (invalid_nodeid(o->nodeid))
190 return -EIO;
191 if (fuse_invalid_attr(&o->attr))
192 return -EIO;
193
194 fc = get_fuse_conn(dir);
195
196 name.hash = full_name_hash(parent, name.name, name.len);
197 dentry = d_lookup(parent, &name);
198 if (!dentry) {
199 retry:
200 dentry = d_alloc_parallel(parent, &name, &wq);
201 if (IS_ERR(dentry))
202 return PTR_ERR(dentry);
203 }
204 if (!d_in_lookup(dentry)) {
205 struct fuse_inode *fi;
206 inode = d_inode(dentry);
207 if (inode && get_node_id(inode) != o->nodeid)
208 inode = NULL;
209 if (!inode ||
210 fuse_stale_inode(inode, o->generation, &o->attr)) {
211 if (inode)
212 fuse_make_bad(inode);
213 d_invalidate(dentry);
214 dput(dentry);
215 goto retry;
216 }
217 if (fuse_is_bad(inode)) {
218 dput(dentry);
219 return -EIO;
220 }
221
222 fi = get_fuse_inode(inode);
223 spin_lock(&fi->lock);
224 fi->nlookup++;
225 spin_unlock(&fi->lock);
226
227 forget_all_cached_acls(inode);
228 fuse_change_attributes(inode, &o->attr,
229 entry_attr_timeout(o),
230 attr_version);
231 /*
232 * The other branch comes via fuse_iget()
233 * which bumps nlookup inside
234 */
235 } else {
236 inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
237 &o->attr, entry_attr_timeout(o),
238 attr_version);
239 if (!inode)
240 inode = ERR_PTR(-ENOMEM);
241
242 alias = d_splice_alias(inode, dentry);
243 d_lookup_done(dentry);
244 if (alias) {
245 dput(dentry);
246 dentry = alias;
247 }
248 if (IS_ERR(dentry)) {
249 if (!IS_ERR(inode)) {
250 struct fuse_inode *fi = get_fuse_inode(inode);
251
252 spin_lock(&fi->lock);
253 fi->nlookup--;
254 spin_unlock(&fi->lock);
255 }
256 return PTR_ERR(dentry);
257 }
258 }
259 if (fc->readdirplus_auto)
260 set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
261 fuse_change_entry_timeout(dentry, o);
262
263 dput(dentry);
264 return 0;
265 }
266
fuse_force_forget(struct file * file,u64 nodeid)267 static void fuse_force_forget(struct file *file, u64 nodeid)
268 {
269 struct inode *inode = file_inode(file);
270 struct fuse_mount *fm = get_fuse_mount(inode);
271 struct fuse_forget_in inarg;
272 FUSE_ARGS(args);
273
274 memset(&inarg, 0, sizeof(inarg));
275 inarg.nlookup = 1;
276 args.opcode = FUSE_FORGET;
277 args.nodeid = nodeid;
278 args.in_numargs = 1;
279 args.in_args[0].size = sizeof(inarg);
280 args.in_args[0].value = &inarg;
281 args.force = true;
282 args.noreply = true;
283
284 fuse_simple_request(fm, &args);
285 /* ignore errors */
286 }
287
parse_dirplusfile(char * buf,size_t nbytes,struct file * file,struct dir_context * ctx,u64 attr_version)288 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
289 struct dir_context *ctx, u64 attr_version)
290 {
291 struct fuse_direntplus *direntplus;
292 struct fuse_dirent *dirent;
293 size_t reclen;
294 int over = 0;
295 int ret;
296
297 while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
298 direntplus = (struct fuse_direntplus *) buf;
299 dirent = &direntplus->dirent;
300 reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
301
302 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
303 return -EIO;
304 if (reclen > nbytes)
305 break;
306 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
307 return -EIO;
308
309 if (!over) {
310 /* We fill entries into dstbuf only as much as
311 it can hold. But we still continue iterating
312 over remaining entries to link them. If not,
313 we need to send a FORGET for each of those
314 which we did not link.
315 */
316 over = !fuse_emit(file, ctx, dirent);
317 if (!over)
318 ctx->pos = dirent->off;
319 }
320
321 buf += reclen;
322 nbytes -= reclen;
323
324 ret = fuse_direntplus_link(file, direntplus, attr_version);
325 if (ret)
326 fuse_force_forget(file, direntplus->entry_out.nodeid);
327 }
328
329 return 0;
330 }
331
fuse_readdir_uncached(struct file * file,struct dir_context * ctx)332 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
333 {
334 int plus;
335 ssize_t res;
336 struct page *page;
337 struct inode *inode = file_inode(file);
338 struct fuse_mount *fm = get_fuse_mount(inode);
339 struct fuse_io_args ia = {};
340 struct fuse_args_pages *ap = &ia.ap;
341 struct fuse_page_desc desc = { .length = PAGE_SIZE };
342 u64 attr_version = 0;
343 bool locked;
344
345 page = alloc_page(GFP_KERNEL);
346 if (!page)
347 return -ENOMEM;
348
349 plus = fuse_use_readdirplus(inode, ctx);
350 ap->args.out_pages = true;
351 ap->num_pages = 1;
352 ap->pages = &page;
353 ap->descs = &desc;
354 if (plus) {
355 attr_version = fuse_get_attr_version(fm->fc);
356 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
357 FUSE_READDIRPLUS);
358 } else {
359 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
360 FUSE_READDIR);
361 }
362 locked = fuse_lock_inode(inode);
363 res = fuse_simple_request(fm, &ap->args);
364 fuse_unlock_inode(inode, locked);
365 if (res >= 0) {
366 if (!res) {
367 struct fuse_file *ff = file->private_data;
368
369 if (ff->open_flags & FOPEN_CACHE_DIR)
370 fuse_readdir_cache_end(file, ctx->pos);
371 } else if (plus) {
372 res = parse_dirplusfile(page_address(page), res,
373 file, ctx, attr_version);
374 } else {
375 res = parse_dirfile(page_address(page), res, file,
376 ctx);
377 }
378 }
379
380 __free_page(page);
381 fuse_invalidate_atime(inode);
382 return res;
383 }
384
385 enum fuse_parse_result {
386 FOUND_ERR = -1,
387 FOUND_NONE = 0,
388 FOUND_SOME,
389 FOUND_ALL,
390 };
391
fuse_parse_cache(struct fuse_file * ff,void * addr,unsigned int size,struct dir_context * ctx)392 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
393 void *addr, unsigned int size,
394 struct dir_context *ctx)
395 {
396 unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
397 enum fuse_parse_result res = FOUND_NONE;
398
399 WARN_ON(offset >= size);
400
401 for (;;) {
402 struct fuse_dirent *dirent = addr + offset;
403 unsigned int nbytes = size - offset;
404 size_t reclen;
405
406 if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
407 break;
408
409 reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
410
411 if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
412 return FOUND_ERR;
413 if (WARN_ON(reclen > nbytes))
414 return FOUND_ERR;
415 if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
416 return FOUND_ERR;
417
418 if (ff->readdir.pos == ctx->pos) {
419 res = FOUND_SOME;
420 if (!dir_emit(ctx, dirent->name, dirent->namelen,
421 dirent->ino, dirent->type))
422 return FOUND_ALL;
423 ctx->pos = dirent->off;
424 }
425 ff->readdir.pos = dirent->off;
426 ff->readdir.cache_off += reclen;
427
428 offset += reclen;
429 }
430
431 return res;
432 }
433
fuse_rdc_reset(struct inode * inode)434 static void fuse_rdc_reset(struct inode *inode)
435 {
436 struct fuse_inode *fi = get_fuse_inode(inode);
437
438 fi->rdc.cached = false;
439 fi->rdc.version++;
440 fi->rdc.size = 0;
441 fi->rdc.pos = 0;
442 }
443
444 #define UNCACHED 1
445
fuse_readdir_cached(struct file * file,struct dir_context * ctx)446 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
447 {
448 struct fuse_file *ff = file->private_data;
449 struct inode *inode = file_inode(file);
450 struct fuse_conn *fc = get_fuse_conn(inode);
451 struct fuse_inode *fi = get_fuse_inode(inode);
452 enum fuse_parse_result res;
453 pgoff_t index;
454 unsigned int size;
455 struct page *page;
456 void *addr;
457
458 /* Seeked? If so, reset the cache stream */
459 if (ff->readdir.pos != ctx->pos) {
460 ff->readdir.pos = 0;
461 ff->readdir.cache_off = 0;
462 }
463
464 /*
465 * We're just about to start reading into the cache or reading the
466 * cache; both cases require an up-to-date mtime value.
467 */
468 if (!ctx->pos && fc->auto_inval_data) {
469 int err = fuse_update_attributes(inode, file);
470
471 if (err)
472 return err;
473 }
474
475 retry:
476 spin_lock(&fi->rdc.lock);
477 retry_locked:
478 if (!fi->rdc.cached) {
479 /* Starting cache? Set cache mtime. */
480 if (!ctx->pos && !fi->rdc.size) {
481 fi->rdc.mtime = inode->i_mtime;
482 fi->rdc.iversion = inode_query_iversion(inode);
483 }
484 spin_unlock(&fi->rdc.lock);
485 return UNCACHED;
486 }
487 /*
488 * When at the beginning of the directory (i.e. just after opendir(3) or
489 * rewinddir(3)), then need to check whether directory contents have
490 * changed, and reset the cache if so.
491 */
492 if (!ctx->pos) {
493 if (inode_peek_iversion(inode) != fi->rdc.iversion ||
494 !timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
495 fuse_rdc_reset(inode);
496 goto retry_locked;
497 }
498 }
499
500 /*
501 * If cache version changed since the last getdents() call, then reset
502 * the cache stream.
503 */
504 if (ff->readdir.version != fi->rdc.version) {
505 ff->readdir.pos = 0;
506 ff->readdir.cache_off = 0;
507 }
508 /*
509 * If at the beginning of the cache, than reset version to
510 * current.
511 */
512 if (ff->readdir.pos == 0)
513 ff->readdir.version = fi->rdc.version;
514
515 WARN_ON(fi->rdc.size < ff->readdir.cache_off);
516
517 index = ff->readdir.cache_off >> PAGE_SHIFT;
518
519 if (index == (fi->rdc.size >> PAGE_SHIFT))
520 size = fi->rdc.size & ~PAGE_MASK;
521 else
522 size = PAGE_SIZE;
523 spin_unlock(&fi->rdc.lock);
524
525 /* EOF? */
526 if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
527 return 0;
528
529 page = find_get_page_flags(file->f_mapping, index,
530 FGP_ACCESSED | FGP_LOCK);
531 /* Page gone missing, then re-added to cache, but not initialized? */
532 if (page && !PageUptodate(page)) {
533 unlock_page(page);
534 put_page(page);
535 page = NULL;
536 }
537 spin_lock(&fi->rdc.lock);
538 if (!page) {
539 /*
540 * Uh-oh: page gone missing, cache is useless
541 */
542 if (fi->rdc.version == ff->readdir.version)
543 fuse_rdc_reset(inode);
544 goto retry_locked;
545 }
546
547 /* Make sure it's still the same version after getting the page. */
548 if (ff->readdir.version != fi->rdc.version) {
549 spin_unlock(&fi->rdc.lock);
550 unlock_page(page);
551 put_page(page);
552 goto retry;
553 }
554 spin_unlock(&fi->rdc.lock);
555
556 /*
557 * Contents of the page are now protected against changing by holding
558 * the page lock.
559 */
560 addr = kmap(page);
561 res = fuse_parse_cache(ff, addr, size, ctx);
562 kunmap(page);
563 unlock_page(page);
564 put_page(page);
565
566 if (res == FOUND_ERR)
567 return -EIO;
568
569 if (res == FOUND_ALL)
570 return 0;
571
572 if (size == PAGE_SIZE) {
573 /* We hit end of page: skip to next page. */
574 ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
575 goto retry;
576 }
577
578 /*
579 * End of cache reached. If found position, then we are done, otherwise
580 * need to fall back to uncached, since the position we were looking for
581 * wasn't in the cache.
582 */
583 return res == FOUND_SOME ? 0 : UNCACHED;
584 }
585
fuse_readdir(struct file * file,struct dir_context * ctx)586 int fuse_readdir(struct file *file, struct dir_context *ctx)
587 {
588 struct fuse_file *ff = file->private_data;
589 struct inode *inode = file_inode(file);
590 int err;
591
592 #ifdef CONFIG_FUSE_BPF
593 struct fuse_err_ret fer;
594 bool allow_force;
595 bool force_again = false;
596 bool is_continued = false;
597
598 again:
599 fer = fuse_bpf_backing(inode, struct fuse_read_io,
600 fuse_readdir_initialize, fuse_readdir_backing,
601 fuse_readdir_finalize,
602 file, ctx, &force_again, &allow_force, is_continued);
603 if (force_again && !IS_ERR(fer.result)) {
604 is_continued = true;
605 goto again;
606 }
607
608 if (fer.ret)
609 return PTR_ERR(fer.result);
610 #endif
611
612 if (fuse_is_bad(inode))
613 return -EIO;
614
615 mutex_lock(&ff->readdir.lock);
616
617 err = UNCACHED;
618 if (ff->open_flags & FOPEN_CACHE_DIR)
619 err = fuse_readdir_cached(file, ctx);
620 if (err == UNCACHED)
621 err = fuse_readdir_uncached(file, ctx);
622
623 mutex_unlock(&ff->readdir.lock);
624
625 return err;
626 }
627