• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   * Open file cache.
3   *
4   * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
5   */
6  
7  #include <linux/hash.h>
8  #include <linux/slab.h>
9  #include <linux/file.h>
10  #include <linux/sched.h>
11  #include <linux/list_lru.h>
12  #include <linux/fsnotify_backend.h>
13  #include <linux/fsnotify.h>
14  #include <linux/seq_file.h>
15  
16  #include "vfs.h"
17  #include "nfsd.h"
18  #include "nfsfh.h"
19  #include "netns.h"
20  #include "filecache.h"
21  #include "trace.h"
22  
23  #define NFSDDBG_FACILITY	NFSDDBG_FH
24  
25  /* FIXME: dynamically size this for the machine somehow? */
26  #define NFSD_FILE_HASH_BITS                   12
27  #define NFSD_FILE_HASH_SIZE                  (1 << NFSD_FILE_HASH_BITS)
28  #define NFSD_LAUNDRETTE_DELAY		     (2 * HZ)
29  
30  #define NFSD_FILE_SHUTDOWN		     (1)
31  #define NFSD_FILE_LRU_THRESHOLD		     (4096UL)
32  #define NFSD_FILE_LRU_LIMIT		     (NFSD_FILE_LRU_THRESHOLD << 2)
33  
34  /* We only care about NFSD_MAY_READ/WRITE for this cache */
35  #define NFSD_FILE_MAY_MASK	(NFSD_MAY_READ|NFSD_MAY_WRITE)
36  
37  struct nfsd_fcache_bucket {
38  	struct hlist_head	nfb_head;
39  	spinlock_t		nfb_lock;
40  	unsigned int		nfb_count;
41  	unsigned int		nfb_maxcount;
42  };
43  
44  static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
45  
46  struct nfsd_fcache_disposal {
47  	struct list_head list;
48  	struct work_struct work;
49  	struct net *net;
50  	spinlock_t lock;
51  	struct list_head freeme;
52  	struct rcu_head rcu;
53  };
54  
55  static struct workqueue_struct *nfsd_filecache_wq __read_mostly;
56  
57  static struct kmem_cache		*nfsd_file_slab;
58  static struct kmem_cache		*nfsd_file_mark_slab;
59  static struct nfsd_fcache_bucket	*nfsd_file_hashtbl;
60  static struct list_lru			nfsd_file_lru;
61  static long				nfsd_file_lru_flags;
62  static struct fsnotify_group		*nfsd_file_fsnotify_group;
63  static atomic_long_t			nfsd_filecache_count;
64  static struct delayed_work		nfsd_filecache_laundrette;
65  static DEFINE_SPINLOCK(laundrette_lock);
66  static LIST_HEAD(laundrettes);
67  
68  static void nfsd_file_gc(void);
69  
70  static void
nfsd_file_schedule_laundrette(void)71  nfsd_file_schedule_laundrette(void)
72  {
73  	long count = atomic_long_read(&nfsd_filecache_count);
74  
75  	if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags))
76  		return;
77  
78  	queue_delayed_work(system_wq, &nfsd_filecache_laundrette,
79  			NFSD_LAUNDRETTE_DELAY);
80  }
81  
82  static void
nfsd_file_slab_free(struct rcu_head * rcu)83  nfsd_file_slab_free(struct rcu_head *rcu)
84  {
85  	struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu);
86  
87  	put_cred(nf->nf_cred);
88  	kmem_cache_free(nfsd_file_slab, nf);
89  }
90  
91  static void
nfsd_file_mark_free(struct fsnotify_mark * mark)92  nfsd_file_mark_free(struct fsnotify_mark *mark)
93  {
94  	struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark,
95  						  nfm_mark);
96  
97  	kmem_cache_free(nfsd_file_mark_slab, nfm);
98  }
99  
100  static struct nfsd_file_mark *
nfsd_file_mark_get(struct nfsd_file_mark * nfm)101  nfsd_file_mark_get(struct nfsd_file_mark *nfm)
102  {
103  	if (!refcount_inc_not_zero(&nfm->nfm_ref))
104  		return NULL;
105  	return nfm;
106  }
107  
108  static void
nfsd_file_mark_put(struct nfsd_file_mark * nfm)109  nfsd_file_mark_put(struct nfsd_file_mark *nfm)
110  {
111  	if (refcount_dec_and_test(&nfm->nfm_ref)) {
112  		fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group);
113  		fsnotify_put_mark(&nfm->nfm_mark);
114  	}
115  }
116  
117  static struct nfsd_file_mark *
nfsd_file_mark_find_or_create(struct nfsd_file * nf)118  nfsd_file_mark_find_or_create(struct nfsd_file *nf)
119  {
120  	int			err;
121  	struct fsnotify_mark	*mark;
122  	struct nfsd_file_mark	*nfm = NULL, *new;
123  	struct inode *inode = nf->nf_inode;
124  
125  	do {
126  		mutex_lock(&nfsd_file_fsnotify_group->mark_mutex);
127  		mark = fsnotify_find_mark(&inode->i_fsnotify_marks,
128  				nfsd_file_fsnotify_group);
129  		if (mark) {
130  			nfm = nfsd_file_mark_get(container_of(mark,
131  						 struct nfsd_file_mark,
132  						 nfm_mark));
133  			mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
134  			if (nfm) {
135  				fsnotify_put_mark(mark);
136  				break;
137  			}
138  			/* Avoid soft lockup race with nfsd_file_mark_put() */
139  			fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group);
140  			fsnotify_put_mark(mark);
141  		} else
142  			mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
143  
144  		/* allocate a new nfm */
145  		new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
146  		if (!new)
147  			return NULL;
148  		fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group);
149  		new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF;
150  		refcount_set(&new->nfm_ref, 1);
151  
152  		err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0);
153  
154  		/*
155  		 * If the add was successful, then return the object.
156  		 * Otherwise, we need to put the reference we hold on the
157  		 * nfm_mark. The fsnotify code will take a reference and put
158  		 * it on failure, so we can't just free it directly. It's also
159  		 * not safe to call fsnotify_destroy_mark on it as the
160  		 * mark->group will be NULL. Thus, we can't let the nfm_ref
161  		 * counter drive the destruction at this point.
162  		 */
163  		if (likely(!err))
164  			nfm = new;
165  		else
166  			fsnotify_put_mark(&new->nfm_mark);
167  	} while (unlikely(err == -EEXIST));
168  
169  	return nfm;
170  }
171  
172  static struct nfsd_file *
nfsd_file_alloc(struct inode * inode,unsigned int may,unsigned int hashval,struct net * net)173  nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
174  		struct net *net)
175  {
176  	struct nfsd_file *nf;
177  
178  	nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
179  	if (nf) {
180  		INIT_HLIST_NODE(&nf->nf_node);
181  		INIT_LIST_HEAD(&nf->nf_lru);
182  		nf->nf_file = NULL;
183  		nf->nf_cred = get_current_cred();
184  		nf->nf_net = net;
185  		nf->nf_flags = 0;
186  		nf->nf_inode = inode;
187  		nf->nf_hashval = hashval;
188  		refcount_set(&nf->nf_ref, 1);
189  		nf->nf_may = may & NFSD_FILE_MAY_MASK;
190  		nf->nf_mark = NULL;
191  		trace_nfsd_file_alloc(nf);
192  	}
193  	return nf;
194  }
195  
196  static bool
nfsd_file_free(struct nfsd_file * nf)197  nfsd_file_free(struct nfsd_file *nf)
198  {
199  	bool flush = false;
200  
201  	trace_nfsd_file_put_final(nf);
202  	if (nf->nf_mark)
203  		nfsd_file_mark_put(nf->nf_mark);
204  	if (nf->nf_file) {
205  		get_file(nf->nf_file);
206  		filp_close(nf->nf_file, NULL);
207  		fput(nf->nf_file);
208  		flush = true;
209  	}
210  	call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
211  	return flush;
212  }
213  
214  static bool
nfsd_file_check_writeback(struct nfsd_file * nf)215  nfsd_file_check_writeback(struct nfsd_file *nf)
216  {
217  	struct file *file = nf->nf_file;
218  	struct address_space *mapping;
219  
220  	if (!file || !(file->f_mode & FMODE_WRITE))
221  		return false;
222  	mapping = file->f_mapping;
223  	return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ||
224  		mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
225  }
226  
227  static int
nfsd_file_check_write_error(struct nfsd_file * nf)228  nfsd_file_check_write_error(struct nfsd_file *nf)
229  {
230  	struct file *file = nf->nf_file;
231  
232  	if (!file || !(file->f_mode & FMODE_WRITE))
233  		return 0;
234  	return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
235  }
236  
237  static void
nfsd_file_do_unhash(struct nfsd_file * nf)238  nfsd_file_do_unhash(struct nfsd_file *nf)
239  {
240  	lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
241  
242  	trace_nfsd_file_unhash(nf);
243  
244  	if (nfsd_file_check_write_error(nf))
245  		nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id));
246  	--nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
247  	hlist_del_rcu(&nf->nf_node);
248  	atomic_long_dec(&nfsd_filecache_count);
249  }
250  
251  static bool
nfsd_file_unhash(struct nfsd_file * nf)252  nfsd_file_unhash(struct nfsd_file *nf)
253  {
254  	if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
255  		nfsd_file_do_unhash(nf);
256  		if (!list_empty(&nf->nf_lru))
257  			list_lru_del(&nfsd_file_lru, &nf->nf_lru);
258  		return true;
259  	}
260  	return false;
261  }
262  
263  /*
264   * Return true if the file was unhashed.
265   */
266  static bool
nfsd_file_unhash_and_release_locked(struct nfsd_file * nf,struct list_head * dispose)267  nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose)
268  {
269  	lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
270  
271  	trace_nfsd_file_unhash_and_release_locked(nf);
272  	if (!nfsd_file_unhash(nf))
273  		return false;
274  	/* keep final reference for nfsd_file_lru_dispose */
275  	if (refcount_dec_not_one(&nf->nf_ref))
276  		return true;
277  
278  	list_add(&nf->nf_lru, dispose);
279  	return true;
280  }
281  
282  static void
nfsd_file_put_noref(struct nfsd_file * nf)283  nfsd_file_put_noref(struct nfsd_file *nf)
284  {
285  	trace_nfsd_file_put(nf);
286  
287  	if (refcount_dec_and_test(&nf->nf_ref)) {
288  		WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
289  		nfsd_file_free(nf);
290  	}
291  }
292  
293  void
nfsd_file_put(struct nfsd_file * nf)294  nfsd_file_put(struct nfsd_file *nf)
295  {
296  	bool is_hashed;
297  
298  	set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
299  	if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) {
300  		nfsd_file_put_noref(nf);
301  		return;
302  	}
303  
304  	filemap_flush(nf->nf_file->f_mapping);
305  	is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
306  	nfsd_file_put_noref(nf);
307  	if (is_hashed)
308  		nfsd_file_schedule_laundrette();
309  	if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)
310  		nfsd_file_gc();
311  }
312  
313  struct nfsd_file *
nfsd_file_get(struct nfsd_file * nf)314  nfsd_file_get(struct nfsd_file *nf)
315  {
316  	if (likely(refcount_inc_not_zero(&nf->nf_ref)))
317  		return nf;
318  	return NULL;
319  }
320  
321  static void
nfsd_file_dispose_list(struct list_head * dispose)322  nfsd_file_dispose_list(struct list_head *dispose)
323  {
324  	struct nfsd_file *nf;
325  
326  	while(!list_empty(dispose)) {
327  		nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
328  		list_del(&nf->nf_lru);
329  		nfsd_file_put_noref(nf);
330  	}
331  }
332  
333  static void
nfsd_file_dispose_list_sync(struct list_head * dispose)334  nfsd_file_dispose_list_sync(struct list_head *dispose)
335  {
336  	bool flush = false;
337  	struct nfsd_file *nf;
338  
339  	while(!list_empty(dispose)) {
340  		nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
341  		list_del(&nf->nf_lru);
342  		if (!refcount_dec_and_test(&nf->nf_ref))
343  			continue;
344  		if (nfsd_file_free(nf))
345  			flush = true;
346  	}
347  	if (flush)
348  		flush_delayed_fput();
349  }
350  
351  static void
nfsd_file_list_remove_disposal(struct list_head * dst,struct nfsd_fcache_disposal * l)352  nfsd_file_list_remove_disposal(struct list_head *dst,
353  		struct nfsd_fcache_disposal *l)
354  {
355  	spin_lock(&l->lock);
356  	list_splice_init(&l->freeme, dst);
357  	spin_unlock(&l->lock);
358  }
359  
360  static void
nfsd_file_list_add_disposal(struct list_head * files,struct net * net)361  nfsd_file_list_add_disposal(struct list_head *files, struct net *net)
362  {
363  	struct nfsd_fcache_disposal *l;
364  
365  	rcu_read_lock();
366  	list_for_each_entry_rcu(l, &laundrettes, list) {
367  		if (l->net == net) {
368  			spin_lock(&l->lock);
369  			list_splice_tail_init(files, &l->freeme);
370  			spin_unlock(&l->lock);
371  			queue_work(nfsd_filecache_wq, &l->work);
372  			break;
373  		}
374  	}
375  	rcu_read_unlock();
376  }
377  
378  static void
nfsd_file_list_add_pernet(struct list_head * dst,struct list_head * src,struct net * net)379  nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src,
380  		struct net *net)
381  {
382  	struct nfsd_file *nf, *tmp;
383  
384  	list_for_each_entry_safe(nf, tmp, src, nf_lru) {
385  		if (nf->nf_net == net)
386  			list_move_tail(&nf->nf_lru, dst);
387  	}
388  }
389  
390  static void
nfsd_file_dispose_list_delayed(struct list_head * dispose)391  nfsd_file_dispose_list_delayed(struct list_head *dispose)
392  {
393  	LIST_HEAD(list);
394  	struct nfsd_file *nf;
395  
396  	while(!list_empty(dispose)) {
397  		nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
398  		nfsd_file_list_add_pernet(&list, dispose, nf->nf_net);
399  		nfsd_file_list_add_disposal(&list, nf->nf_net);
400  	}
401  }
402  
403  /*
404   * Note this can deadlock with nfsd_file_cache_purge.
405   */
406  static enum lru_status
nfsd_file_lru_cb(struct list_head * item,struct list_lru_one * lru,spinlock_t * lock,void * arg)407  nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
408  		 spinlock_t *lock, void *arg)
409  	__releases(lock)
410  	__acquires(lock)
411  {
412  	struct list_head *head = arg;
413  	struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
414  
415  	/*
416  	 * Do a lockless refcount check. The hashtable holds one reference, so
417  	 * we look to see if anything else has a reference, or if any have
418  	 * been put since the shrinker last ran. Those don't get unhashed and
419  	 * released.
420  	 *
421  	 * Note that in the put path, we set the flag and then decrement the
422  	 * counter. Here we check the counter and then test and clear the flag.
423  	 * That order is deliberate to ensure that we can do this locklessly.
424  	 */
425  	if (refcount_read(&nf->nf_ref) > 1)
426  		goto out_skip;
427  
428  	/*
429  	 * Don't throw out files that are still undergoing I/O or
430  	 * that have uncleared errors pending.
431  	 */
432  	if (nfsd_file_check_writeback(nf))
433  		goto out_skip;
434  
435  	if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags))
436  		goto out_skip;
437  
438  	if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags))
439  		goto out_skip;
440  
441  	list_lru_isolate_move(lru, &nf->nf_lru, head);
442  	return LRU_REMOVED;
443  out_skip:
444  	return LRU_SKIP;
445  }
446  
447  static unsigned long
nfsd_file_lru_walk_list(struct shrink_control * sc)448  nfsd_file_lru_walk_list(struct shrink_control *sc)
449  {
450  	LIST_HEAD(head);
451  	struct nfsd_file *nf;
452  	unsigned long ret;
453  
454  	if (sc)
455  		ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
456  				nfsd_file_lru_cb, &head);
457  	else
458  		ret = list_lru_walk(&nfsd_file_lru,
459  				nfsd_file_lru_cb,
460  				&head, LONG_MAX);
461  	list_for_each_entry(nf, &head, nf_lru) {
462  		spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
463  		nfsd_file_do_unhash(nf);
464  		spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
465  	}
466  	nfsd_file_dispose_list_delayed(&head);
467  	return ret;
468  }
469  
470  static void
nfsd_file_gc(void)471  nfsd_file_gc(void)
472  {
473  	nfsd_file_lru_walk_list(NULL);
474  }
475  
476  static void
nfsd_file_gc_worker(struct work_struct * work)477  nfsd_file_gc_worker(struct work_struct *work)
478  {
479  	nfsd_file_gc();
480  	nfsd_file_schedule_laundrette();
481  }
482  
483  static unsigned long
nfsd_file_lru_count(struct shrinker * s,struct shrink_control * sc)484  nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
485  {
486  	return list_lru_count(&nfsd_file_lru);
487  }
488  
489  static unsigned long
nfsd_file_lru_scan(struct shrinker * s,struct shrink_control * sc)490  nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
491  {
492  	return nfsd_file_lru_walk_list(sc);
493  }
494  
495  static struct shrinker	nfsd_file_shrinker = {
496  	.scan_objects = nfsd_file_lru_scan,
497  	.count_objects = nfsd_file_lru_count,
498  	.seeks = 1,
499  };
500  
501  static void
__nfsd_file_close_inode(struct inode * inode,unsigned int hashval,struct list_head * dispose)502  __nfsd_file_close_inode(struct inode *inode, unsigned int hashval,
503  			struct list_head *dispose)
504  {
505  	struct nfsd_file	*nf;
506  	struct hlist_node	*tmp;
507  
508  	spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
509  	hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) {
510  		if (inode == nf->nf_inode)
511  			nfsd_file_unhash_and_release_locked(nf, dispose);
512  	}
513  	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
514  }
515  
516  /**
517   * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
518   * @inode: inode of the file to attempt to remove
519   *
520   * Walk the whole hash bucket, looking for any files that correspond to "inode".
521   * If any do, then unhash them and put the hashtable reference to them and
522   * destroy any that had their last reference put. Also ensure that any of the
523   * fputs also have their final __fput done as well.
524   */
525  void
nfsd_file_close_inode_sync(struct inode * inode)526  nfsd_file_close_inode_sync(struct inode *inode)
527  {
528  	unsigned int		hashval = (unsigned int)hash_long(inode->i_ino,
529  						NFSD_FILE_HASH_BITS);
530  	LIST_HEAD(dispose);
531  
532  	__nfsd_file_close_inode(inode, hashval, &dispose);
533  	trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose));
534  	nfsd_file_dispose_list_sync(&dispose);
535  }
536  
537  /**
538   * nfsd_file_close_inode - attempt a delayed close of a nfsd_file
539   * @inode: inode of the file to attempt to remove
540   *
541   * Walk the whole hash bucket, looking for any files that correspond to "inode".
542   * If any do, then unhash them and put the hashtable reference to them and
543   * destroy any that had their last reference put.
544   */
545  static void
nfsd_file_close_inode(struct inode * inode)546  nfsd_file_close_inode(struct inode *inode)
547  {
548  	unsigned int		hashval = (unsigned int)hash_long(inode->i_ino,
549  						NFSD_FILE_HASH_BITS);
550  	LIST_HEAD(dispose);
551  
552  	__nfsd_file_close_inode(inode, hashval, &dispose);
553  	trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose));
554  	nfsd_file_dispose_list_delayed(&dispose);
555  }
556  
557  /**
558   * nfsd_file_delayed_close - close unused nfsd_files
559   * @work: dummy
560   *
561   * Walk the LRU list and close any entries that have not been used since
562   * the last scan.
563   *
564   * Note this can deadlock with nfsd_file_cache_purge.
565   */
566  static void
nfsd_file_delayed_close(struct work_struct * work)567  nfsd_file_delayed_close(struct work_struct *work)
568  {
569  	LIST_HEAD(head);
570  	struct nfsd_fcache_disposal *l = container_of(work,
571  			struct nfsd_fcache_disposal, work);
572  
573  	nfsd_file_list_remove_disposal(&head, l);
574  	nfsd_file_dispose_list(&head);
575  }
576  
577  static int
nfsd_file_lease_notifier_call(struct notifier_block * nb,unsigned long arg,void * data)578  nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
579  			    void *data)
580  {
581  	struct file_lock *fl = data;
582  
583  	/* Only close files for F_SETLEASE leases */
584  	if (fl->fl_flags & FL_LEASE)
585  		nfsd_file_close_inode_sync(file_inode(fl->fl_file));
586  	return 0;
587  }
588  
589  static struct notifier_block nfsd_file_lease_notifier = {
590  	.notifier_call = nfsd_file_lease_notifier_call,
591  };
592  
593  static int
nfsd_file_fsnotify_handle_event(struct fsnotify_mark * mark,u32 mask,struct inode * inode,struct inode * dir,const struct qstr * name,u32 cookie)594  nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask,
595  				struct inode *inode, struct inode *dir,
596  				const struct qstr *name, u32 cookie)
597  {
598  	trace_nfsd_file_fsnotify_handle_event(inode, mask);
599  
600  	/* Should be no marks on non-regular files */
601  	if (!S_ISREG(inode->i_mode)) {
602  		WARN_ON_ONCE(1);
603  		return 0;
604  	}
605  
606  	/* don't close files if this was not the last link */
607  	if (mask & FS_ATTRIB) {
608  		if (inode->i_nlink)
609  			return 0;
610  	}
611  
612  	nfsd_file_close_inode(inode);
613  	return 0;
614  }
615  
616  
617  static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
618  	.handle_inode_event = nfsd_file_fsnotify_handle_event,
619  	.free_mark = nfsd_file_mark_free,
620  };
621  
622  int
nfsd_file_cache_init(void)623  nfsd_file_cache_init(void)
624  {
625  	int		ret = -ENOMEM;
626  	unsigned int	i;
627  
628  	clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
629  
630  	if (nfsd_file_hashtbl)
631  		return 0;
632  
633  	nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0);
634  	if (!nfsd_filecache_wq)
635  		goto out;
636  
637  	nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE,
638  				sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
639  	if (!nfsd_file_hashtbl) {
640  		pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n");
641  		goto out_err;
642  	}
643  
644  	nfsd_file_slab = kmem_cache_create("nfsd_file",
645  				sizeof(struct nfsd_file), 0, 0, NULL);
646  	if (!nfsd_file_slab) {
647  		pr_err("nfsd: unable to create nfsd_file_slab\n");
648  		goto out_err;
649  	}
650  
651  	nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark",
652  					sizeof(struct nfsd_file_mark), 0, 0, NULL);
653  	if (!nfsd_file_mark_slab) {
654  		pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
655  		goto out_err;
656  	}
657  
658  
659  	ret = list_lru_init(&nfsd_file_lru);
660  	if (ret) {
661  		pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
662  		goto out_err;
663  	}
664  
665  	ret = register_shrinker(&nfsd_file_shrinker);
666  	if (ret) {
667  		pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret);
668  		goto out_lru;
669  	}
670  
671  	ret = lease_register_notifier(&nfsd_file_lease_notifier);
672  	if (ret) {
673  		pr_err("nfsd: unable to register lease notifier: %d\n", ret);
674  		goto out_shrinker;
675  	}
676  
677  	nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops);
678  	if (IS_ERR(nfsd_file_fsnotify_group)) {
679  		pr_err("nfsd: unable to create fsnotify group: %ld\n",
680  			PTR_ERR(nfsd_file_fsnotify_group));
681  		ret = PTR_ERR(nfsd_file_fsnotify_group);
682  		nfsd_file_fsnotify_group = NULL;
683  		goto out_notifier;
684  	}
685  
686  	for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
687  		INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head);
688  		spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock);
689  	}
690  
691  	INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker);
692  out:
693  	return ret;
694  out_notifier:
695  	lease_unregister_notifier(&nfsd_file_lease_notifier);
696  out_shrinker:
697  	unregister_shrinker(&nfsd_file_shrinker);
698  out_lru:
699  	list_lru_destroy(&nfsd_file_lru);
700  out_err:
701  	kmem_cache_destroy(nfsd_file_slab);
702  	nfsd_file_slab = NULL;
703  	kmem_cache_destroy(nfsd_file_mark_slab);
704  	nfsd_file_mark_slab = NULL;
705  	kvfree(nfsd_file_hashtbl);
706  	nfsd_file_hashtbl = NULL;
707  	destroy_workqueue(nfsd_filecache_wq);
708  	nfsd_filecache_wq = NULL;
709  	goto out;
710  }
711  
712  /*
713   * Note this can deadlock with nfsd_file_lru_cb.
714   */
715  void
nfsd_file_cache_purge(struct net * net)716  nfsd_file_cache_purge(struct net *net)
717  {
718  	unsigned int		i;
719  	struct nfsd_file	*nf;
720  	struct hlist_node	*next;
721  	LIST_HEAD(dispose);
722  	bool del;
723  
724  	if (!nfsd_file_hashtbl)
725  		return;
726  
727  	for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
728  		struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i];
729  
730  		spin_lock(&nfb->nfb_lock);
731  		hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) {
732  			if (net && nf->nf_net != net)
733  				continue;
734  			del = nfsd_file_unhash_and_release_locked(nf, &dispose);
735  
736  			/*
737  			 * Deadlock detected! Something marked this entry as
738  			 * unhased, but hasn't removed it from the hash list.
739  			 */
740  			WARN_ON_ONCE(!del);
741  		}
742  		spin_unlock(&nfb->nfb_lock);
743  		nfsd_file_dispose_list(&dispose);
744  	}
745  }
746  
747  static struct nfsd_fcache_disposal *
nfsd_alloc_fcache_disposal(struct net * net)748  nfsd_alloc_fcache_disposal(struct net *net)
749  {
750  	struct nfsd_fcache_disposal *l;
751  
752  	l = kmalloc(sizeof(*l), GFP_KERNEL);
753  	if (!l)
754  		return NULL;
755  	INIT_WORK(&l->work, nfsd_file_delayed_close);
756  	l->net = net;
757  	spin_lock_init(&l->lock);
758  	INIT_LIST_HEAD(&l->freeme);
759  	return l;
760  }
761  
762  static void
nfsd_free_fcache_disposal(struct nfsd_fcache_disposal * l)763  nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l)
764  {
765  	rcu_assign_pointer(l->net, NULL);
766  	cancel_work_sync(&l->work);
767  	nfsd_file_dispose_list(&l->freeme);
768  	kfree_rcu(l, rcu);
769  }
770  
771  static void
nfsd_add_fcache_disposal(struct nfsd_fcache_disposal * l)772  nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l)
773  {
774  	spin_lock(&laundrette_lock);
775  	list_add_tail_rcu(&l->list, &laundrettes);
776  	spin_unlock(&laundrette_lock);
777  }
778  
779  static void
nfsd_del_fcache_disposal(struct nfsd_fcache_disposal * l)780  nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l)
781  {
782  	spin_lock(&laundrette_lock);
783  	list_del_rcu(&l->list);
784  	spin_unlock(&laundrette_lock);
785  }
786  
787  static int
nfsd_alloc_fcache_disposal_net(struct net * net)788  nfsd_alloc_fcache_disposal_net(struct net *net)
789  {
790  	struct nfsd_fcache_disposal *l;
791  
792  	l = nfsd_alloc_fcache_disposal(net);
793  	if (!l)
794  		return -ENOMEM;
795  	nfsd_add_fcache_disposal(l);
796  	return 0;
797  }
798  
799  static void
nfsd_free_fcache_disposal_net(struct net * net)800  nfsd_free_fcache_disposal_net(struct net *net)
801  {
802  	struct nfsd_fcache_disposal *l;
803  
804  	rcu_read_lock();
805  	list_for_each_entry_rcu(l, &laundrettes, list) {
806  		if (l->net != net)
807  			continue;
808  		nfsd_del_fcache_disposal(l);
809  		rcu_read_unlock();
810  		nfsd_free_fcache_disposal(l);
811  		return;
812  	}
813  	rcu_read_unlock();
814  }
815  
816  int
nfsd_file_cache_start_net(struct net * net)817  nfsd_file_cache_start_net(struct net *net)
818  {
819  	return nfsd_alloc_fcache_disposal_net(net);
820  }
821  
822  void
nfsd_file_cache_shutdown_net(struct net * net)823  nfsd_file_cache_shutdown_net(struct net *net)
824  {
825  	nfsd_file_cache_purge(net);
826  	nfsd_free_fcache_disposal_net(net);
827  }
828  
829  void
nfsd_file_cache_shutdown(void)830  nfsd_file_cache_shutdown(void)
831  {
832  	set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
833  
834  	lease_unregister_notifier(&nfsd_file_lease_notifier);
835  	unregister_shrinker(&nfsd_file_shrinker);
836  	/*
837  	 * make sure all callers of nfsd_file_lru_cb are done before
838  	 * calling nfsd_file_cache_purge
839  	 */
840  	cancel_delayed_work_sync(&nfsd_filecache_laundrette);
841  	nfsd_file_cache_purge(NULL);
842  	list_lru_destroy(&nfsd_file_lru);
843  	rcu_barrier();
844  	fsnotify_put_group(nfsd_file_fsnotify_group);
845  	nfsd_file_fsnotify_group = NULL;
846  	kmem_cache_destroy(nfsd_file_slab);
847  	nfsd_file_slab = NULL;
848  	fsnotify_wait_marks_destroyed();
849  	kmem_cache_destroy(nfsd_file_mark_slab);
850  	nfsd_file_mark_slab = NULL;
851  	kvfree(nfsd_file_hashtbl);
852  	nfsd_file_hashtbl = NULL;
853  	destroy_workqueue(nfsd_filecache_wq);
854  	nfsd_filecache_wq = NULL;
855  }
856  
857  static bool
nfsd_match_cred(const struct cred * c1,const struct cred * c2)858  nfsd_match_cred(const struct cred *c1, const struct cred *c2)
859  {
860  	int i;
861  
862  	if (!uid_eq(c1->fsuid, c2->fsuid))
863  		return false;
864  	if (!gid_eq(c1->fsgid, c2->fsgid))
865  		return false;
866  	if (c1->group_info == NULL || c2->group_info == NULL)
867  		return c1->group_info == c2->group_info;
868  	if (c1->group_info->ngroups != c2->group_info->ngroups)
869  		return false;
870  	for (i = 0; i < c1->group_info->ngroups; i++) {
871  		if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
872  			return false;
873  	}
874  	return true;
875  }
876  
877  static struct nfsd_file *
nfsd_file_find_locked(struct inode * inode,unsigned int may_flags,unsigned int hashval,struct net * net)878  nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
879  			unsigned int hashval, struct net *net)
880  {
881  	struct nfsd_file *nf;
882  	unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
883  
884  	hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
885  				 nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) {
886  		if (nf->nf_may != need)
887  			continue;
888  		if (nf->nf_inode != inode)
889  			continue;
890  		if (nf->nf_net != net)
891  			continue;
892  		if (!nfsd_match_cred(nf->nf_cred, current_cred()))
893  			continue;
894  		if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags))
895  			continue;
896  		if (nfsd_file_get(nf) != NULL)
897  			return nf;
898  	}
899  	return NULL;
900  }
901  
902  /**
903   * nfsd_file_is_cached - are there any cached open files for this fh?
904   * @inode: inode of the file to check
905   *
906   * Scan the hashtable for open files that match this fh. Returns true if there
907   * are any, and false if not.
908   */
909  bool
nfsd_file_is_cached(struct inode * inode)910  nfsd_file_is_cached(struct inode *inode)
911  {
912  	bool			ret = false;
913  	struct nfsd_file	*nf;
914  	unsigned int		hashval;
915  
916          hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
917  
918  	rcu_read_lock();
919  	hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
920  				 nf_node) {
921  		if (inode == nf->nf_inode) {
922  			ret = true;
923  			break;
924  		}
925  	}
926  	rcu_read_unlock();
927  	trace_nfsd_file_is_cached(inode, hashval, (int)ret);
928  	return ret;
929  }
930  
931  __be32
nfsd_file_acquire(struct svc_rqst * rqstp,struct svc_fh * fhp,unsigned int may_flags,struct nfsd_file ** pnf)932  nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
933  		  unsigned int may_flags, struct nfsd_file **pnf)
934  {
935  	__be32	status;
936  	struct net *net = SVC_NET(rqstp);
937  	struct nfsd_file *nf, *new;
938  	struct inode *inode;
939  	unsigned int hashval;
940  	bool retry = true;
941  
942  	/* FIXME: skip this if fh_dentry is already set? */
943  	status = fh_verify(rqstp, fhp, S_IFREG,
944  				may_flags|NFSD_MAY_OWNER_OVERRIDE);
945  	if (status != nfs_ok)
946  		return status;
947  
948  	inode = d_inode(fhp->fh_dentry);
949  	hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
950  retry:
951  	rcu_read_lock();
952  	nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
953  	rcu_read_unlock();
954  	if (nf)
955  		goto wait_for_construction;
956  
957  	new = nfsd_file_alloc(inode, may_flags, hashval, net);
958  	if (!new) {
959  		trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags,
960  					NULL, nfserr_jukebox);
961  		return nfserr_jukebox;
962  	}
963  
964  	spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
965  	nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
966  	if (nf == NULL)
967  		goto open_file;
968  	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
969  	nfsd_file_slab_free(&new->nf_rcu);
970  
971  wait_for_construction:
972  	wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
973  
974  	/* Did construction of this file fail? */
975  	if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
976  		if (!retry) {
977  			status = nfserr_jukebox;
978  			goto out;
979  		}
980  		retry = false;
981  		nfsd_file_put_noref(nf);
982  		goto retry;
983  	}
984  
985  	this_cpu_inc(nfsd_file_cache_hits);
986  
987  	status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
988  out:
989  	if (status == nfs_ok) {
990  		*pnf = nf;
991  	} else {
992  		nfsd_file_put(nf);
993  		nf = NULL;
994  	}
995  
996  	trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status);
997  	return status;
998  open_file:
999  	nf = new;
1000  	/* Take reference for the hashtable */
1001  	refcount_inc(&nf->nf_ref);
1002  	__set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
1003  	__set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
1004  	list_lru_add(&nfsd_file_lru, &nf->nf_lru);
1005  	hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head);
1006  	++nfsd_file_hashtbl[hashval].nfb_count;
1007  	nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
1008  			nfsd_file_hashtbl[hashval].nfb_count);
1009  	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
1010  	if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD)
1011  		nfsd_file_gc();
1012  
1013  	nf->nf_mark = nfsd_file_mark_find_or_create(nf);
1014  	if (nf->nf_mark)
1015  		status = nfsd_open_verified(rqstp, fhp, S_IFREG,
1016  				may_flags, &nf->nf_file);
1017  	else
1018  		status = nfserr_jukebox;
1019  	/*
1020  	 * If construction failed, or we raced with a call to unlink()
1021  	 * then unhash.
1022  	 */
1023  	if (status != nfs_ok || inode->i_nlink == 0) {
1024  		bool do_free;
1025  		spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
1026  		do_free = nfsd_file_unhash(nf);
1027  		spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
1028  		if (do_free)
1029  			nfsd_file_put_noref(nf);
1030  	}
1031  	clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
1032  	smp_mb__after_atomic();
1033  	wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
1034  	goto out;
1035  }
1036  
1037  /*
1038   * Note that fields may be added, removed or reordered in the future. Programs
1039   * scraping this file for info should test the labels to ensure they're
1040   * getting the correct field.
1041   */
nfsd_file_cache_stats_show(struct seq_file * m,void * v)1042  static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
1043  {
1044  	unsigned int i, count = 0, longest = 0;
1045  	unsigned long hits = 0;
1046  
1047  	/*
1048  	 * No need for spinlocks here since we're not terribly interested in
1049  	 * accuracy. We do take the nfsd_mutex simply to ensure that we
1050  	 * don't end up racing with server shutdown
1051  	 */
1052  	mutex_lock(&nfsd_mutex);
1053  	if (nfsd_file_hashtbl) {
1054  		for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
1055  			count += nfsd_file_hashtbl[i].nfb_count;
1056  			longest = max(longest, nfsd_file_hashtbl[i].nfb_count);
1057  		}
1058  	}
1059  	mutex_unlock(&nfsd_mutex);
1060  
1061  	for_each_possible_cpu(i)
1062  		hits += per_cpu(nfsd_file_cache_hits, i);
1063  
1064  	seq_printf(m, "total entries: %u\n", count);
1065  	seq_printf(m, "longest chain: %u\n", longest);
1066  	seq_printf(m, "cache hits:    %lu\n", hits);
1067  	return 0;
1068  }
1069  
nfsd_file_cache_stats_open(struct inode * inode,struct file * file)1070  int nfsd_file_cache_stats_open(struct inode *inode, struct file *file)
1071  {
1072  	return single_open(file, nfsd_file_cache_stats_show, NULL);
1073  }
1074