1 /* 2 * Open file cache. 3 * 4 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> 5 */ 6 7 #include <linux/hash.h> 8 #include <linux/slab.h> 9 #include <linux/file.h> 10 #include <linux/sched.h> 11 #include <linux/list_lru.h> 12 #include <linux/fsnotify_backend.h> 13 #include <linux/fsnotify.h> 14 #include <linux/seq_file.h> 15 16 #include "vfs.h" 17 #include "nfsd.h" 18 #include "nfsfh.h" 19 #include "netns.h" 20 #include "filecache.h" 21 #include "trace.h" 22 23 #define NFSDDBG_FACILITY NFSDDBG_FH 24 25 /* FIXME: dynamically size this for the machine somehow? */ 26 #define NFSD_FILE_HASH_BITS 12 27 #define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) 28 #define NFSD_LAUNDRETTE_DELAY (2 * HZ) 29 30 #define NFSD_FILE_SHUTDOWN (1) 31 #define NFSD_FILE_LRU_THRESHOLD (4096UL) 32 #define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) 33 34 /* We only care about NFSD_MAY_READ/WRITE for this cache */ 35 #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) 36 37 struct nfsd_fcache_bucket { 38 struct hlist_head nfb_head; 39 spinlock_t nfb_lock; 40 unsigned int nfb_count; 41 unsigned int nfb_maxcount; 42 }; 43 44 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); 45 46 struct nfsd_fcache_disposal { 47 struct list_head list; 48 struct work_struct work; 49 struct net *net; 50 spinlock_t lock; 51 struct list_head freeme; 52 struct rcu_head rcu; 53 }; 54 55 static struct workqueue_struct *nfsd_filecache_wq __read_mostly; 56 57 static struct kmem_cache *nfsd_file_slab; 58 static struct kmem_cache *nfsd_file_mark_slab; 59 static struct nfsd_fcache_bucket *nfsd_file_hashtbl; 60 static struct list_lru nfsd_file_lru; 61 static long nfsd_file_lru_flags; 62 static struct fsnotify_group *nfsd_file_fsnotify_group; 63 static atomic_long_t nfsd_filecache_count; 64 static struct delayed_work nfsd_filecache_laundrette; 65 static DEFINE_SPINLOCK(laundrette_lock); 66 static LIST_HEAD(laundrettes); 67 68 static void nfsd_file_gc(void); 69 70 static void nfsd_file_schedule_laundrette(void)71 nfsd_file_schedule_laundrette(void) 72 { 73 long count = atomic_long_read(&nfsd_filecache_count); 74 75 if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) 76 return; 77 78 queue_delayed_work(system_wq, &nfsd_filecache_laundrette, 79 NFSD_LAUNDRETTE_DELAY); 80 } 81 82 static void nfsd_file_slab_free(struct rcu_head * rcu)83 nfsd_file_slab_free(struct rcu_head *rcu) 84 { 85 struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); 86 87 put_cred(nf->nf_cred); 88 kmem_cache_free(nfsd_file_slab, nf); 89 } 90 91 static void nfsd_file_mark_free(struct fsnotify_mark * mark)92 nfsd_file_mark_free(struct fsnotify_mark *mark) 93 { 94 struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, 95 nfm_mark); 96 97 kmem_cache_free(nfsd_file_mark_slab, nfm); 98 } 99 100 static struct nfsd_file_mark * nfsd_file_mark_get(struct nfsd_file_mark * nfm)101 nfsd_file_mark_get(struct nfsd_file_mark *nfm) 102 { 103 if (!refcount_inc_not_zero(&nfm->nfm_ref)) 104 return NULL; 105 return nfm; 106 } 107 108 static void nfsd_file_mark_put(struct nfsd_file_mark * nfm)109 nfsd_file_mark_put(struct nfsd_file_mark *nfm) 110 { 111 if (refcount_dec_and_test(&nfm->nfm_ref)) { 112 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); 113 fsnotify_put_mark(&nfm->nfm_mark); 114 } 115 } 116 117 static struct nfsd_file_mark * nfsd_file_mark_find_or_create(struct nfsd_file * nf)118 nfsd_file_mark_find_or_create(struct nfsd_file *nf) 119 { 120 int err; 121 struct fsnotify_mark *mark; 122 struct nfsd_file_mark *nfm = NULL, *new; 123 struct inode *inode = nf->nf_inode; 124 125 do { 126 mutex_lock(&nfsd_file_fsnotify_group->mark_mutex); 127 mark = fsnotify_find_mark(&inode->i_fsnotify_marks, 128 nfsd_file_fsnotify_group); 129 if (mark) { 130 nfm = nfsd_file_mark_get(container_of(mark, 131 struct nfsd_file_mark, 132 nfm_mark)); 133 mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); 134 if (nfm) { 135 fsnotify_put_mark(mark); 136 break; 137 } 138 /* Avoid soft lockup race with nfsd_file_mark_put() */ 139 fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); 140 fsnotify_put_mark(mark); 141 } else 142 mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); 143 144 /* allocate a new nfm */ 145 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); 146 if (!new) 147 return NULL; 148 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); 149 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; 150 refcount_set(&new->nfm_ref, 1); 151 152 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); 153 154 /* 155 * If the add was successful, then return the object. 156 * Otherwise, we need to put the reference we hold on the 157 * nfm_mark. The fsnotify code will take a reference and put 158 * it on failure, so we can't just free it directly. It's also 159 * not safe to call fsnotify_destroy_mark on it as the 160 * mark->group will be NULL. Thus, we can't let the nfm_ref 161 * counter drive the destruction at this point. 162 */ 163 if (likely(!err)) 164 nfm = new; 165 else 166 fsnotify_put_mark(&new->nfm_mark); 167 } while (unlikely(err == -EEXIST)); 168 169 return nfm; 170 } 171 172 static struct nfsd_file * nfsd_file_alloc(struct inode * inode,unsigned int may,unsigned int hashval,struct net * net)173 nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, 174 struct net *net) 175 { 176 struct nfsd_file *nf; 177 178 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 179 if (nf) { 180 INIT_HLIST_NODE(&nf->nf_node); 181 INIT_LIST_HEAD(&nf->nf_lru); 182 nf->nf_file = NULL; 183 nf->nf_cred = get_current_cred(); 184 nf->nf_net = net; 185 nf->nf_flags = 0; 186 nf->nf_inode = inode; 187 nf->nf_hashval = hashval; 188 refcount_set(&nf->nf_ref, 1); 189 nf->nf_may = may & NFSD_FILE_MAY_MASK; 190 nf->nf_mark = NULL; 191 trace_nfsd_file_alloc(nf); 192 } 193 return nf; 194 } 195 196 static bool nfsd_file_free(struct nfsd_file * nf)197 nfsd_file_free(struct nfsd_file *nf) 198 { 199 bool flush = false; 200 201 trace_nfsd_file_put_final(nf); 202 if (nf->nf_mark) 203 nfsd_file_mark_put(nf->nf_mark); 204 if (nf->nf_file) { 205 get_file(nf->nf_file); 206 filp_close(nf->nf_file, NULL); 207 fput(nf->nf_file); 208 flush = true; 209 } 210 call_rcu(&nf->nf_rcu, nfsd_file_slab_free); 211 return flush; 212 } 213 214 static bool nfsd_file_check_writeback(struct nfsd_file * nf)215 nfsd_file_check_writeback(struct nfsd_file *nf) 216 { 217 struct file *file = nf->nf_file; 218 struct address_space *mapping; 219 220 if (!file || !(file->f_mode & FMODE_WRITE)) 221 return false; 222 mapping = file->f_mapping; 223 return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || 224 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); 225 } 226 227 static int nfsd_file_check_write_error(struct nfsd_file * nf)228 nfsd_file_check_write_error(struct nfsd_file *nf) 229 { 230 struct file *file = nf->nf_file; 231 232 if (!file || !(file->f_mode & FMODE_WRITE)) 233 return 0; 234 return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); 235 } 236 237 static void nfsd_file_do_unhash(struct nfsd_file * nf)238 nfsd_file_do_unhash(struct nfsd_file *nf) 239 { 240 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 241 242 trace_nfsd_file_unhash(nf); 243 244 if (nfsd_file_check_write_error(nf)) 245 nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id)); 246 --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; 247 hlist_del_rcu(&nf->nf_node); 248 atomic_long_dec(&nfsd_filecache_count); 249 } 250 251 static bool nfsd_file_unhash(struct nfsd_file * nf)252 nfsd_file_unhash(struct nfsd_file *nf) 253 { 254 if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 255 nfsd_file_do_unhash(nf); 256 if (!list_empty(&nf->nf_lru)) 257 list_lru_del(&nfsd_file_lru, &nf->nf_lru); 258 return true; 259 } 260 return false; 261 } 262 263 /* 264 * Return true if the file was unhashed. 265 */ 266 static bool nfsd_file_unhash_and_release_locked(struct nfsd_file * nf,struct list_head * dispose)267 nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) 268 { 269 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 270 271 trace_nfsd_file_unhash_and_release_locked(nf); 272 if (!nfsd_file_unhash(nf)) 273 return false; 274 /* keep final reference for nfsd_file_lru_dispose */ 275 if (refcount_dec_not_one(&nf->nf_ref)) 276 return true; 277 278 list_add(&nf->nf_lru, dispose); 279 return true; 280 } 281 282 static void nfsd_file_put_noref(struct nfsd_file * nf)283 nfsd_file_put_noref(struct nfsd_file *nf) 284 { 285 trace_nfsd_file_put(nf); 286 287 if (refcount_dec_and_test(&nf->nf_ref)) { 288 WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); 289 nfsd_file_free(nf); 290 } 291 } 292 293 void nfsd_file_put(struct nfsd_file * nf)294 nfsd_file_put(struct nfsd_file *nf) 295 { 296 bool is_hashed; 297 298 set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); 299 if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) { 300 nfsd_file_put_noref(nf); 301 return; 302 } 303 304 filemap_flush(nf->nf_file->f_mapping); 305 is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; 306 nfsd_file_put_noref(nf); 307 if (is_hashed) 308 nfsd_file_schedule_laundrette(); 309 if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) 310 nfsd_file_gc(); 311 } 312 313 struct nfsd_file * nfsd_file_get(struct nfsd_file * nf)314 nfsd_file_get(struct nfsd_file *nf) 315 { 316 if (likely(refcount_inc_not_zero(&nf->nf_ref))) 317 return nf; 318 return NULL; 319 } 320 321 static void nfsd_file_dispose_list(struct list_head * dispose)322 nfsd_file_dispose_list(struct list_head *dispose) 323 { 324 struct nfsd_file *nf; 325 326 while(!list_empty(dispose)) { 327 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 328 list_del(&nf->nf_lru); 329 nfsd_file_put_noref(nf); 330 } 331 } 332 333 static void nfsd_file_dispose_list_sync(struct list_head * dispose)334 nfsd_file_dispose_list_sync(struct list_head *dispose) 335 { 336 bool flush = false; 337 struct nfsd_file *nf; 338 339 while(!list_empty(dispose)) { 340 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 341 list_del(&nf->nf_lru); 342 if (!refcount_dec_and_test(&nf->nf_ref)) 343 continue; 344 if (nfsd_file_free(nf)) 345 flush = true; 346 } 347 if (flush) 348 flush_delayed_fput(); 349 } 350 351 static void nfsd_file_list_remove_disposal(struct list_head * dst,struct nfsd_fcache_disposal * l)352 nfsd_file_list_remove_disposal(struct list_head *dst, 353 struct nfsd_fcache_disposal *l) 354 { 355 spin_lock(&l->lock); 356 list_splice_init(&l->freeme, dst); 357 spin_unlock(&l->lock); 358 } 359 360 static void nfsd_file_list_add_disposal(struct list_head * files,struct net * net)361 nfsd_file_list_add_disposal(struct list_head *files, struct net *net) 362 { 363 struct nfsd_fcache_disposal *l; 364 365 rcu_read_lock(); 366 list_for_each_entry_rcu(l, &laundrettes, list) { 367 if (l->net == net) { 368 spin_lock(&l->lock); 369 list_splice_tail_init(files, &l->freeme); 370 spin_unlock(&l->lock); 371 queue_work(nfsd_filecache_wq, &l->work); 372 break; 373 } 374 } 375 rcu_read_unlock(); 376 } 377 378 static void nfsd_file_list_add_pernet(struct list_head * dst,struct list_head * src,struct net * net)379 nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, 380 struct net *net) 381 { 382 struct nfsd_file *nf, *tmp; 383 384 list_for_each_entry_safe(nf, tmp, src, nf_lru) { 385 if (nf->nf_net == net) 386 list_move_tail(&nf->nf_lru, dst); 387 } 388 } 389 390 static void nfsd_file_dispose_list_delayed(struct list_head * dispose)391 nfsd_file_dispose_list_delayed(struct list_head *dispose) 392 { 393 LIST_HEAD(list); 394 struct nfsd_file *nf; 395 396 while(!list_empty(dispose)) { 397 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 398 nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); 399 nfsd_file_list_add_disposal(&list, nf->nf_net); 400 } 401 } 402 403 /* 404 * Note this can deadlock with nfsd_file_cache_purge. 405 */ 406 static enum lru_status nfsd_file_lru_cb(struct list_head * item,struct list_lru_one * lru,spinlock_t * lock,void * arg)407 nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, 408 spinlock_t *lock, void *arg) 409 __releases(lock) 410 __acquires(lock) 411 { 412 struct list_head *head = arg; 413 struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); 414 415 /* 416 * Do a lockless refcount check. The hashtable holds one reference, so 417 * we look to see if anything else has a reference, or if any have 418 * been put since the shrinker last ran. Those don't get unhashed and 419 * released. 420 * 421 * Note that in the put path, we set the flag and then decrement the 422 * counter. Here we check the counter and then test and clear the flag. 423 * That order is deliberate to ensure that we can do this locklessly. 424 */ 425 if (refcount_read(&nf->nf_ref) > 1) 426 goto out_skip; 427 428 /* 429 * Don't throw out files that are still undergoing I/O or 430 * that have uncleared errors pending. 431 */ 432 if (nfsd_file_check_writeback(nf)) 433 goto out_skip; 434 435 if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) 436 goto out_skip; 437 438 if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 439 goto out_skip; 440 441 list_lru_isolate_move(lru, &nf->nf_lru, head); 442 return LRU_REMOVED; 443 out_skip: 444 return LRU_SKIP; 445 } 446 447 static unsigned long nfsd_file_lru_walk_list(struct shrink_control * sc)448 nfsd_file_lru_walk_list(struct shrink_control *sc) 449 { 450 LIST_HEAD(head); 451 struct nfsd_file *nf; 452 unsigned long ret; 453 454 if (sc) 455 ret = list_lru_shrink_walk(&nfsd_file_lru, sc, 456 nfsd_file_lru_cb, &head); 457 else 458 ret = list_lru_walk(&nfsd_file_lru, 459 nfsd_file_lru_cb, 460 &head, LONG_MAX); 461 list_for_each_entry(nf, &head, nf_lru) { 462 spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 463 nfsd_file_do_unhash(nf); 464 spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 465 } 466 nfsd_file_dispose_list_delayed(&head); 467 return ret; 468 } 469 470 static void nfsd_file_gc(void)471 nfsd_file_gc(void) 472 { 473 nfsd_file_lru_walk_list(NULL); 474 } 475 476 static void nfsd_file_gc_worker(struct work_struct * work)477 nfsd_file_gc_worker(struct work_struct *work) 478 { 479 nfsd_file_gc(); 480 nfsd_file_schedule_laundrette(); 481 } 482 483 static unsigned long nfsd_file_lru_count(struct shrinker * s,struct shrink_control * sc)484 nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) 485 { 486 return list_lru_count(&nfsd_file_lru); 487 } 488 489 static unsigned long nfsd_file_lru_scan(struct shrinker * s,struct shrink_control * sc)490 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) 491 { 492 return nfsd_file_lru_walk_list(sc); 493 } 494 495 static struct shrinker nfsd_file_shrinker = { 496 .scan_objects = nfsd_file_lru_scan, 497 .count_objects = nfsd_file_lru_count, 498 .seeks = 1, 499 }; 500 501 static void __nfsd_file_close_inode(struct inode * inode,unsigned int hashval,struct list_head * dispose)502 __nfsd_file_close_inode(struct inode *inode, unsigned int hashval, 503 struct list_head *dispose) 504 { 505 struct nfsd_file *nf; 506 struct hlist_node *tmp; 507 508 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 509 hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { 510 if (inode == nf->nf_inode) 511 nfsd_file_unhash_and_release_locked(nf, dispose); 512 } 513 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 514 } 515 516 /** 517 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 518 * @inode: inode of the file to attempt to remove 519 * 520 * Walk the whole hash bucket, looking for any files that correspond to "inode". 521 * If any do, then unhash them and put the hashtable reference to them and 522 * destroy any that had their last reference put. Also ensure that any of the 523 * fputs also have their final __fput done as well. 524 */ 525 void nfsd_file_close_inode_sync(struct inode * inode)526 nfsd_file_close_inode_sync(struct inode *inode) 527 { 528 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 529 NFSD_FILE_HASH_BITS); 530 LIST_HEAD(dispose); 531 532 __nfsd_file_close_inode(inode, hashval, &dispose); 533 trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); 534 nfsd_file_dispose_list_sync(&dispose); 535 } 536 537 /** 538 * nfsd_file_close_inode - attempt a delayed close of a nfsd_file 539 * @inode: inode of the file to attempt to remove 540 * 541 * Walk the whole hash bucket, looking for any files that correspond to "inode". 542 * If any do, then unhash them and put the hashtable reference to them and 543 * destroy any that had their last reference put. 544 */ 545 static void nfsd_file_close_inode(struct inode * inode)546 nfsd_file_close_inode(struct inode *inode) 547 { 548 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 549 NFSD_FILE_HASH_BITS); 550 LIST_HEAD(dispose); 551 552 __nfsd_file_close_inode(inode, hashval, &dispose); 553 trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); 554 nfsd_file_dispose_list_delayed(&dispose); 555 } 556 557 /** 558 * nfsd_file_delayed_close - close unused nfsd_files 559 * @work: dummy 560 * 561 * Walk the LRU list and close any entries that have not been used since 562 * the last scan. 563 * 564 * Note this can deadlock with nfsd_file_cache_purge. 565 */ 566 static void nfsd_file_delayed_close(struct work_struct * work)567 nfsd_file_delayed_close(struct work_struct *work) 568 { 569 LIST_HEAD(head); 570 struct nfsd_fcache_disposal *l = container_of(work, 571 struct nfsd_fcache_disposal, work); 572 573 nfsd_file_list_remove_disposal(&head, l); 574 nfsd_file_dispose_list(&head); 575 } 576 577 static int nfsd_file_lease_notifier_call(struct notifier_block * nb,unsigned long arg,void * data)578 nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, 579 void *data) 580 { 581 struct file_lock *fl = data; 582 583 /* Only close files for F_SETLEASE leases */ 584 if (fl->fl_flags & FL_LEASE) 585 nfsd_file_close_inode_sync(file_inode(fl->fl_file)); 586 return 0; 587 } 588 589 static struct notifier_block nfsd_file_lease_notifier = { 590 .notifier_call = nfsd_file_lease_notifier_call, 591 }; 592 593 static int nfsd_file_fsnotify_handle_event(struct fsnotify_mark * mark,u32 mask,struct inode * inode,struct inode * dir,const struct qstr * name,u32 cookie)594 nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, 595 struct inode *inode, struct inode *dir, 596 const struct qstr *name, u32 cookie) 597 { 598 trace_nfsd_file_fsnotify_handle_event(inode, mask); 599 600 /* Should be no marks on non-regular files */ 601 if (!S_ISREG(inode->i_mode)) { 602 WARN_ON_ONCE(1); 603 return 0; 604 } 605 606 /* don't close files if this was not the last link */ 607 if (mask & FS_ATTRIB) { 608 if (inode->i_nlink) 609 return 0; 610 } 611 612 nfsd_file_close_inode(inode); 613 return 0; 614 } 615 616 617 static const struct fsnotify_ops nfsd_file_fsnotify_ops = { 618 .handle_inode_event = nfsd_file_fsnotify_handle_event, 619 .free_mark = nfsd_file_mark_free, 620 }; 621 622 int nfsd_file_cache_init(void)623 nfsd_file_cache_init(void) 624 { 625 int ret = -ENOMEM; 626 unsigned int i; 627 628 clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 629 630 if (nfsd_file_hashtbl) 631 return 0; 632 633 nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); 634 if (!nfsd_filecache_wq) 635 goto out; 636 637 nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE, 638 sizeof(*nfsd_file_hashtbl), GFP_KERNEL); 639 if (!nfsd_file_hashtbl) { 640 pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); 641 goto out_err; 642 } 643 644 nfsd_file_slab = kmem_cache_create("nfsd_file", 645 sizeof(struct nfsd_file), 0, 0, NULL); 646 if (!nfsd_file_slab) { 647 pr_err("nfsd: unable to create nfsd_file_slab\n"); 648 goto out_err; 649 } 650 651 nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", 652 sizeof(struct nfsd_file_mark), 0, 0, NULL); 653 if (!nfsd_file_mark_slab) { 654 pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); 655 goto out_err; 656 } 657 658 659 ret = list_lru_init(&nfsd_file_lru); 660 if (ret) { 661 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); 662 goto out_err; 663 } 664 665 ret = register_shrinker(&nfsd_file_shrinker); 666 if (ret) { 667 pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); 668 goto out_lru; 669 } 670 671 ret = lease_register_notifier(&nfsd_file_lease_notifier); 672 if (ret) { 673 pr_err("nfsd: unable to register lease notifier: %d\n", ret); 674 goto out_shrinker; 675 } 676 677 nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops); 678 if (IS_ERR(nfsd_file_fsnotify_group)) { 679 pr_err("nfsd: unable to create fsnotify group: %ld\n", 680 PTR_ERR(nfsd_file_fsnotify_group)); 681 ret = PTR_ERR(nfsd_file_fsnotify_group); 682 nfsd_file_fsnotify_group = NULL; 683 goto out_notifier; 684 } 685 686 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 687 INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); 688 spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); 689 } 690 691 INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); 692 out: 693 return ret; 694 out_notifier: 695 lease_unregister_notifier(&nfsd_file_lease_notifier); 696 out_shrinker: 697 unregister_shrinker(&nfsd_file_shrinker); 698 out_lru: 699 list_lru_destroy(&nfsd_file_lru); 700 out_err: 701 kmem_cache_destroy(nfsd_file_slab); 702 nfsd_file_slab = NULL; 703 kmem_cache_destroy(nfsd_file_mark_slab); 704 nfsd_file_mark_slab = NULL; 705 kvfree(nfsd_file_hashtbl); 706 nfsd_file_hashtbl = NULL; 707 destroy_workqueue(nfsd_filecache_wq); 708 nfsd_filecache_wq = NULL; 709 goto out; 710 } 711 712 /* 713 * Note this can deadlock with nfsd_file_lru_cb. 714 */ 715 void nfsd_file_cache_purge(struct net * net)716 nfsd_file_cache_purge(struct net *net) 717 { 718 unsigned int i; 719 struct nfsd_file *nf; 720 struct hlist_node *next; 721 LIST_HEAD(dispose); 722 bool del; 723 724 if (!nfsd_file_hashtbl) 725 return; 726 727 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 728 struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i]; 729 730 spin_lock(&nfb->nfb_lock); 731 hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) { 732 if (net && nf->nf_net != net) 733 continue; 734 del = nfsd_file_unhash_and_release_locked(nf, &dispose); 735 736 /* 737 * Deadlock detected! Something marked this entry as 738 * unhased, but hasn't removed it from the hash list. 739 */ 740 WARN_ON_ONCE(!del); 741 } 742 spin_unlock(&nfb->nfb_lock); 743 nfsd_file_dispose_list(&dispose); 744 } 745 } 746 747 static struct nfsd_fcache_disposal * nfsd_alloc_fcache_disposal(struct net * net)748 nfsd_alloc_fcache_disposal(struct net *net) 749 { 750 struct nfsd_fcache_disposal *l; 751 752 l = kmalloc(sizeof(*l), GFP_KERNEL); 753 if (!l) 754 return NULL; 755 INIT_WORK(&l->work, nfsd_file_delayed_close); 756 l->net = net; 757 spin_lock_init(&l->lock); 758 INIT_LIST_HEAD(&l->freeme); 759 return l; 760 } 761 762 static void nfsd_free_fcache_disposal(struct nfsd_fcache_disposal * l)763 nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) 764 { 765 rcu_assign_pointer(l->net, NULL); 766 cancel_work_sync(&l->work); 767 nfsd_file_dispose_list(&l->freeme); 768 kfree_rcu(l, rcu); 769 } 770 771 static void nfsd_add_fcache_disposal(struct nfsd_fcache_disposal * l)772 nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l) 773 { 774 spin_lock(&laundrette_lock); 775 list_add_tail_rcu(&l->list, &laundrettes); 776 spin_unlock(&laundrette_lock); 777 } 778 779 static void nfsd_del_fcache_disposal(struct nfsd_fcache_disposal * l)780 nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l) 781 { 782 spin_lock(&laundrette_lock); 783 list_del_rcu(&l->list); 784 spin_unlock(&laundrette_lock); 785 } 786 787 static int nfsd_alloc_fcache_disposal_net(struct net * net)788 nfsd_alloc_fcache_disposal_net(struct net *net) 789 { 790 struct nfsd_fcache_disposal *l; 791 792 l = nfsd_alloc_fcache_disposal(net); 793 if (!l) 794 return -ENOMEM; 795 nfsd_add_fcache_disposal(l); 796 return 0; 797 } 798 799 static void nfsd_free_fcache_disposal_net(struct net * net)800 nfsd_free_fcache_disposal_net(struct net *net) 801 { 802 struct nfsd_fcache_disposal *l; 803 804 rcu_read_lock(); 805 list_for_each_entry_rcu(l, &laundrettes, list) { 806 if (l->net != net) 807 continue; 808 nfsd_del_fcache_disposal(l); 809 rcu_read_unlock(); 810 nfsd_free_fcache_disposal(l); 811 return; 812 } 813 rcu_read_unlock(); 814 } 815 816 int nfsd_file_cache_start_net(struct net * net)817 nfsd_file_cache_start_net(struct net *net) 818 { 819 return nfsd_alloc_fcache_disposal_net(net); 820 } 821 822 void nfsd_file_cache_shutdown_net(struct net * net)823 nfsd_file_cache_shutdown_net(struct net *net) 824 { 825 nfsd_file_cache_purge(net); 826 nfsd_free_fcache_disposal_net(net); 827 } 828 829 void nfsd_file_cache_shutdown(void)830 nfsd_file_cache_shutdown(void) 831 { 832 set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 833 834 lease_unregister_notifier(&nfsd_file_lease_notifier); 835 unregister_shrinker(&nfsd_file_shrinker); 836 /* 837 * make sure all callers of nfsd_file_lru_cb are done before 838 * calling nfsd_file_cache_purge 839 */ 840 cancel_delayed_work_sync(&nfsd_filecache_laundrette); 841 nfsd_file_cache_purge(NULL); 842 list_lru_destroy(&nfsd_file_lru); 843 rcu_barrier(); 844 fsnotify_put_group(nfsd_file_fsnotify_group); 845 nfsd_file_fsnotify_group = NULL; 846 kmem_cache_destroy(nfsd_file_slab); 847 nfsd_file_slab = NULL; 848 fsnotify_wait_marks_destroyed(); 849 kmem_cache_destroy(nfsd_file_mark_slab); 850 nfsd_file_mark_slab = NULL; 851 kvfree(nfsd_file_hashtbl); 852 nfsd_file_hashtbl = NULL; 853 destroy_workqueue(nfsd_filecache_wq); 854 nfsd_filecache_wq = NULL; 855 } 856 857 static bool nfsd_match_cred(const struct cred * c1,const struct cred * c2)858 nfsd_match_cred(const struct cred *c1, const struct cred *c2) 859 { 860 int i; 861 862 if (!uid_eq(c1->fsuid, c2->fsuid)) 863 return false; 864 if (!gid_eq(c1->fsgid, c2->fsgid)) 865 return false; 866 if (c1->group_info == NULL || c2->group_info == NULL) 867 return c1->group_info == c2->group_info; 868 if (c1->group_info->ngroups != c2->group_info->ngroups) 869 return false; 870 for (i = 0; i < c1->group_info->ngroups; i++) { 871 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) 872 return false; 873 } 874 return true; 875 } 876 877 static struct nfsd_file * nfsd_file_find_locked(struct inode * inode,unsigned int may_flags,unsigned int hashval,struct net * net)878 nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, 879 unsigned int hashval, struct net *net) 880 { 881 struct nfsd_file *nf; 882 unsigned char need = may_flags & NFSD_FILE_MAY_MASK; 883 884 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 885 nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) { 886 if (nf->nf_may != need) 887 continue; 888 if (nf->nf_inode != inode) 889 continue; 890 if (nf->nf_net != net) 891 continue; 892 if (!nfsd_match_cred(nf->nf_cred, current_cred())) 893 continue; 894 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 895 continue; 896 if (nfsd_file_get(nf) != NULL) 897 return nf; 898 } 899 return NULL; 900 } 901 902 /** 903 * nfsd_file_is_cached - are there any cached open files for this fh? 904 * @inode: inode of the file to check 905 * 906 * Scan the hashtable for open files that match this fh. Returns true if there 907 * are any, and false if not. 908 */ 909 bool nfsd_file_is_cached(struct inode * inode)910 nfsd_file_is_cached(struct inode *inode) 911 { 912 bool ret = false; 913 struct nfsd_file *nf; 914 unsigned int hashval; 915 916 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 917 918 rcu_read_lock(); 919 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 920 nf_node) { 921 if (inode == nf->nf_inode) { 922 ret = true; 923 break; 924 } 925 } 926 rcu_read_unlock(); 927 trace_nfsd_file_is_cached(inode, hashval, (int)ret); 928 return ret; 929 } 930 931 __be32 nfsd_file_acquire(struct svc_rqst * rqstp,struct svc_fh * fhp,unsigned int may_flags,struct nfsd_file ** pnf)932 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 933 unsigned int may_flags, struct nfsd_file **pnf) 934 { 935 __be32 status; 936 struct net *net = SVC_NET(rqstp); 937 struct nfsd_file *nf, *new; 938 struct inode *inode; 939 unsigned int hashval; 940 bool retry = true; 941 942 /* FIXME: skip this if fh_dentry is already set? */ 943 status = fh_verify(rqstp, fhp, S_IFREG, 944 may_flags|NFSD_MAY_OWNER_OVERRIDE); 945 if (status != nfs_ok) 946 return status; 947 948 inode = d_inode(fhp->fh_dentry); 949 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 950 retry: 951 rcu_read_lock(); 952 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 953 rcu_read_unlock(); 954 if (nf) 955 goto wait_for_construction; 956 957 new = nfsd_file_alloc(inode, may_flags, hashval, net); 958 if (!new) { 959 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, 960 NULL, nfserr_jukebox); 961 return nfserr_jukebox; 962 } 963 964 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 965 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 966 if (nf == NULL) 967 goto open_file; 968 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 969 nfsd_file_slab_free(&new->nf_rcu); 970 971 wait_for_construction: 972 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 973 974 /* Did construction of this file fail? */ 975 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 976 if (!retry) { 977 status = nfserr_jukebox; 978 goto out; 979 } 980 retry = false; 981 nfsd_file_put_noref(nf); 982 goto retry; 983 } 984 985 this_cpu_inc(nfsd_file_cache_hits); 986 987 status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); 988 out: 989 if (status == nfs_ok) { 990 *pnf = nf; 991 } else { 992 nfsd_file_put(nf); 993 nf = NULL; 994 } 995 996 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); 997 return status; 998 open_file: 999 nf = new; 1000 /* Take reference for the hashtable */ 1001 refcount_inc(&nf->nf_ref); 1002 __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); 1003 __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); 1004 list_lru_add(&nfsd_file_lru, &nf->nf_lru); 1005 hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); 1006 ++nfsd_file_hashtbl[hashval].nfb_count; 1007 nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, 1008 nfsd_file_hashtbl[hashval].nfb_count); 1009 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 1010 if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD) 1011 nfsd_file_gc(); 1012 1013 nf->nf_mark = nfsd_file_mark_find_or_create(nf); 1014 if (nf->nf_mark) 1015 status = nfsd_open_verified(rqstp, fhp, S_IFREG, 1016 may_flags, &nf->nf_file); 1017 else 1018 status = nfserr_jukebox; 1019 /* 1020 * If construction failed, or we raced with a call to unlink() 1021 * then unhash. 1022 */ 1023 if (status != nfs_ok || inode->i_nlink == 0) { 1024 bool do_free; 1025 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 1026 do_free = nfsd_file_unhash(nf); 1027 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 1028 if (do_free) 1029 nfsd_file_put_noref(nf); 1030 } 1031 clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); 1032 smp_mb__after_atomic(); 1033 wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); 1034 goto out; 1035 } 1036 1037 /* 1038 * Note that fields may be added, removed or reordered in the future. Programs 1039 * scraping this file for info should test the labels to ensure they're 1040 * getting the correct field. 1041 */ nfsd_file_cache_stats_show(struct seq_file * m,void * v)1042 static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) 1043 { 1044 unsigned int i, count = 0, longest = 0; 1045 unsigned long hits = 0; 1046 1047 /* 1048 * No need for spinlocks here since we're not terribly interested in 1049 * accuracy. We do take the nfsd_mutex simply to ensure that we 1050 * don't end up racing with server shutdown 1051 */ 1052 mutex_lock(&nfsd_mutex); 1053 if (nfsd_file_hashtbl) { 1054 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 1055 count += nfsd_file_hashtbl[i].nfb_count; 1056 longest = max(longest, nfsd_file_hashtbl[i].nfb_count); 1057 } 1058 } 1059 mutex_unlock(&nfsd_mutex); 1060 1061 for_each_possible_cpu(i) 1062 hits += per_cpu(nfsd_file_cache_hits, i); 1063 1064 seq_printf(m, "total entries: %u\n", count); 1065 seq_printf(m, "longest chain: %u\n", longest); 1066 seq_printf(m, "cache hits: %lu\n", hits); 1067 return 0; 1068 } 1069 nfsd_file_cache_stats_open(struct inode * inode,struct file * file)1070 int nfsd_file_cache_stats_open(struct inode *inode, struct file *file) 1071 { 1072 return single_open(file, nfsd_file_cache_stats_show, NULL); 1073 } 1074