• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License version 2.
8  */
9 
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 
12 #include <linux/sched.h>
13 #include <linux/slab.h>
14 #include <linux/spinlock.h>
15 #include <linux/buffer_head.h>
16 #include <linux/delay.h>
17 #include <linux/sort.h>
18 #include <linux/hash.h>
19 #include <linux/jhash.h>
20 #include <linux/kallsyms.h>
21 #include <linux/gfs2_ondisk.h>
22 #include <linux/list.h>
23 #include <linux/wait.h>
24 #include <linux/module.h>
25 #include <linux/uaccess.h>
26 #include <linux/seq_file.h>
27 #include <linux/debugfs.h>
28 #include <linux/kthread.h>
29 #include <linux/freezer.h>
30 #include <linux/workqueue.h>
31 #include <linux/jiffies.h>
32 #include <linux/rcupdate.h>
33 #include <linux/rculist_bl.h>
34 #include <linux/bit_spinlock.h>
35 #include <linux/percpu.h>
36 #include <linux/list_sort.h>
37 #include <linux/lockref.h>
38 #include <linux/rhashtable.h>
39 
40 #include "gfs2.h"
41 #include "incore.h"
42 #include "glock.h"
43 #include "glops.h"
44 #include "inode.h"
45 #include "lops.h"
46 #include "meta_io.h"
47 #include "quota.h"
48 #include "super.h"
49 #include "util.h"
50 #include "bmap.h"
51 #define CREATE_TRACE_POINTS
52 #include "trace_gfs2.h"
53 
54 struct gfs2_glock_iter {
55 	struct gfs2_sbd *sdp;		/* incore superblock           */
56 	struct rhashtable_iter hti;	/* rhashtable iterator         */
57 	struct gfs2_glock *gl;		/* current glock struct        */
58 	loff_t last_pos;		/* last position               */
59 };
60 
61 typedef void (*glock_examiner) (struct gfs2_glock * gl);
62 
63 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
64 
65 static struct dentry *gfs2_root;
66 static struct workqueue_struct *glock_workqueue;
67 struct workqueue_struct *gfs2_delete_workqueue;
68 static LIST_HEAD(lru_list);
69 static atomic_t lru_count = ATOMIC_INIT(0);
70 static DEFINE_SPINLOCK(lru_lock);
71 
72 #define GFS2_GL_HASH_SHIFT      15
73 #define GFS2_GL_HASH_SIZE       BIT(GFS2_GL_HASH_SHIFT)
74 
75 static const struct rhashtable_params ht_parms = {
76 	.nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4,
77 	.key_len = offsetofend(struct lm_lockname, ln_type),
78 	.key_offset = offsetof(struct gfs2_glock, gl_name),
79 	.head_offset = offsetof(struct gfs2_glock, gl_node),
80 };
81 
82 static struct rhashtable gl_hash_table;
83 
84 #define GLOCK_WAIT_TABLE_BITS 12
85 #define GLOCK_WAIT_TABLE_SIZE (1 << GLOCK_WAIT_TABLE_BITS)
86 static wait_queue_head_t glock_wait_table[GLOCK_WAIT_TABLE_SIZE] __cacheline_aligned;
87 
88 struct wait_glock_queue {
89 	struct lm_lockname *name;
90 	wait_queue_entry_t wait;
91 };
92 
glock_wake_function(wait_queue_entry_t * wait,unsigned int mode,int sync,void * key)93 static int glock_wake_function(wait_queue_entry_t *wait, unsigned int mode,
94 			       int sync, void *key)
95 {
96 	struct wait_glock_queue *wait_glock =
97 		container_of(wait, struct wait_glock_queue, wait);
98 	struct lm_lockname *wait_name = wait_glock->name;
99 	struct lm_lockname *wake_name = key;
100 
101 	if (wake_name->ln_sbd != wait_name->ln_sbd ||
102 	    wake_name->ln_number != wait_name->ln_number ||
103 	    wake_name->ln_type != wait_name->ln_type)
104 		return 0;
105 	return autoremove_wake_function(wait, mode, sync, key);
106 }
107 
glock_waitqueue(struct lm_lockname * name)108 static wait_queue_head_t *glock_waitqueue(struct lm_lockname *name)
109 {
110 	u32 hash = jhash2((u32 *)name, ht_parms.key_len / 4, 0);
111 
112 	return glock_wait_table + hash_32(hash, GLOCK_WAIT_TABLE_BITS);
113 }
114 
115 /**
116  * wake_up_glock  -  Wake up waiters on a glock
117  * @gl: the glock
118  */
wake_up_glock(struct gfs2_glock * gl)119 static void wake_up_glock(struct gfs2_glock *gl)
120 {
121 	wait_queue_head_t *wq = glock_waitqueue(&gl->gl_name);
122 
123 	if (waitqueue_active(wq))
124 		__wake_up(wq, TASK_NORMAL, 1, &gl->gl_name);
125 }
126 
gfs2_glock_dealloc(struct rcu_head * rcu)127 static void gfs2_glock_dealloc(struct rcu_head *rcu)
128 {
129 	struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
130 
131 	if (gl->gl_ops->go_flags & GLOF_ASPACE) {
132 		kmem_cache_free(gfs2_glock_aspace_cachep, gl);
133 	} else {
134 		kfree(gl->gl_lksb.sb_lvbptr);
135 		kmem_cache_free(gfs2_glock_cachep, gl);
136 	}
137 }
138 
gfs2_glock_free(struct gfs2_glock * gl)139 void gfs2_glock_free(struct gfs2_glock *gl)
140 {
141 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
142 
143 	BUG_ON(atomic_read(&gl->gl_revokes));
144 	rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
145 	smp_mb();
146 	wake_up_glock(gl);
147 	call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
148 	if (atomic_dec_and_test(&sdp->sd_glock_disposal))
149 		wake_up(&sdp->sd_glock_wait);
150 }
151 
152 /**
153  * gfs2_glock_hold() - increment reference count on glock
154  * @gl: The glock to hold
155  *
156  */
157 
gfs2_glock_hold(struct gfs2_glock * gl)158 void gfs2_glock_hold(struct gfs2_glock *gl)
159 {
160 	GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
161 	lockref_get(&gl->gl_lockref);
162 }
163 
164 /**
165  * demote_ok - Check to see if it's ok to unlock a glock
166  * @gl: the glock
167  *
168  * Returns: 1 if it's ok
169  */
170 
demote_ok(const struct gfs2_glock * gl)171 static int demote_ok(const struct gfs2_glock *gl)
172 {
173 	const struct gfs2_glock_operations *glops = gl->gl_ops;
174 
175 	if (gl->gl_state == LM_ST_UNLOCKED)
176 		return 0;
177 	if (!list_empty(&gl->gl_holders))
178 		return 0;
179 	if (glops->go_demote_ok)
180 		return glops->go_demote_ok(gl);
181 	return 1;
182 }
183 
184 
gfs2_glock_add_to_lru(struct gfs2_glock * gl)185 void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
186 {
187 	if (!(gl->gl_ops->go_flags & GLOF_LRU))
188 		return;
189 
190 	spin_lock(&lru_lock);
191 
192 	list_del(&gl->gl_lru);
193 	list_add_tail(&gl->gl_lru, &lru_list);
194 
195 	if (!test_bit(GLF_LRU, &gl->gl_flags)) {
196 		set_bit(GLF_LRU, &gl->gl_flags);
197 		atomic_inc(&lru_count);
198 	}
199 
200 	spin_unlock(&lru_lock);
201 }
202 
gfs2_glock_remove_from_lru(struct gfs2_glock * gl)203 static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
204 {
205 	if (!(gl->gl_ops->go_flags & GLOF_LRU))
206 		return;
207 
208 	spin_lock(&lru_lock);
209 	if (test_bit(GLF_LRU, &gl->gl_flags)) {
210 		list_del_init(&gl->gl_lru);
211 		atomic_dec(&lru_count);
212 		clear_bit(GLF_LRU, &gl->gl_flags);
213 	}
214 	spin_unlock(&lru_lock);
215 }
216 
217 /*
218  * Enqueue the glock on the work queue.  Passes one glock reference on to the
219  * work queue.
220  */
__gfs2_glock_queue_work(struct gfs2_glock * gl,unsigned long delay)221 static void __gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) {
222 	if (!queue_delayed_work(glock_workqueue, &gl->gl_work, delay)) {
223 		/*
224 		 * We are holding the lockref spinlock, and the work was still
225 		 * queued above.  The queued work (glock_work_func) takes that
226 		 * spinlock before dropping its glock reference(s), so it
227 		 * cannot have dropped them in the meantime.
228 		 */
229 		GLOCK_BUG_ON(gl, gl->gl_lockref.count < 2);
230 		gl->gl_lockref.count--;
231 	}
232 }
233 
gfs2_glock_queue_work(struct gfs2_glock * gl,unsigned long delay)234 static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) {
235 	spin_lock(&gl->gl_lockref.lock);
236 	__gfs2_glock_queue_work(gl, delay);
237 	spin_unlock(&gl->gl_lockref.lock);
238 }
239 
__gfs2_glock_put(struct gfs2_glock * gl)240 static void __gfs2_glock_put(struct gfs2_glock *gl)
241 {
242 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
243 	struct address_space *mapping = gfs2_glock2aspace(gl);
244 
245 	lockref_mark_dead(&gl->gl_lockref);
246 
247 	gfs2_glock_remove_from_lru(gl);
248 	spin_unlock(&gl->gl_lockref.lock);
249 	GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
250 	GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
251 	trace_gfs2_glock_put(gl);
252 	sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
253 }
254 
255 /*
256  * Cause the glock to be put in work queue context.
257  */
gfs2_glock_queue_put(struct gfs2_glock * gl)258 void gfs2_glock_queue_put(struct gfs2_glock *gl)
259 {
260 	gfs2_glock_queue_work(gl, 0);
261 }
262 
263 /**
264  * gfs2_glock_put() - Decrement reference count on glock
265  * @gl: The glock to put
266  *
267  */
268 
gfs2_glock_put(struct gfs2_glock * gl)269 void gfs2_glock_put(struct gfs2_glock *gl)
270 {
271 	if (lockref_put_or_lock(&gl->gl_lockref))
272 		return;
273 
274 	__gfs2_glock_put(gl);
275 }
276 
277 /**
278  * may_grant - check if its ok to grant a new lock
279  * @gl: The glock
280  * @gh: The lock request which we wish to grant
281  *
282  * Returns: true if its ok to grant the lock
283  */
284 
may_grant(const struct gfs2_glock * gl,const struct gfs2_holder * gh)285 static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh)
286 {
287 	const struct gfs2_holder *gh_head = list_entry(gl->gl_holders.next, const struct gfs2_holder, gh_list);
288 	if ((gh->gh_state == LM_ST_EXCLUSIVE ||
289 	     gh_head->gh_state == LM_ST_EXCLUSIVE) && gh != gh_head)
290 		return 0;
291 	if (gl->gl_state == gh->gh_state)
292 		return 1;
293 	if (gh->gh_flags & GL_EXACT)
294 		return 0;
295 	if (gl->gl_state == LM_ST_EXCLUSIVE) {
296 		if (gh->gh_state == LM_ST_SHARED && gh_head->gh_state == LM_ST_SHARED)
297 			return 1;
298 		if (gh->gh_state == LM_ST_DEFERRED && gh_head->gh_state == LM_ST_DEFERRED)
299 			return 1;
300 	}
301 	if (gl->gl_state != LM_ST_UNLOCKED && (gh->gh_flags & LM_FLAG_ANY))
302 		return 1;
303 	return 0;
304 }
305 
gfs2_holder_wake(struct gfs2_holder * gh)306 static void gfs2_holder_wake(struct gfs2_holder *gh)
307 {
308 	clear_bit(HIF_WAIT, &gh->gh_iflags);
309 	smp_mb__after_atomic();
310 	wake_up_bit(&gh->gh_iflags, HIF_WAIT);
311 }
312 
313 /**
314  * do_error - Something unexpected has happened during a lock request
315  *
316  */
317 
do_error(struct gfs2_glock * gl,const int ret)318 static void do_error(struct gfs2_glock *gl, const int ret)
319 {
320 	struct gfs2_holder *gh, *tmp;
321 
322 	list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
323 		if (test_bit(HIF_HOLDER, &gh->gh_iflags))
324 			continue;
325 		if (ret & LM_OUT_ERROR)
326 			gh->gh_error = -EIO;
327 		else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))
328 			gh->gh_error = GLR_TRYFAILED;
329 		else
330 			continue;
331 		list_del_init(&gh->gh_list);
332 		trace_gfs2_glock_queue(gh, 0);
333 		gfs2_holder_wake(gh);
334 	}
335 }
336 
337 /**
338  * do_promote - promote as many requests as possible on the current queue
339  * @gl: The glock
340  *
341  * Returns: 1 if there is a blocked holder at the head of the list, or 2
342  *          if a type specific operation is underway.
343  */
344 
do_promote(struct gfs2_glock * gl)345 static int do_promote(struct gfs2_glock *gl)
346 __releases(&gl->gl_lockref.lock)
347 __acquires(&gl->gl_lockref.lock)
348 {
349 	const struct gfs2_glock_operations *glops = gl->gl_ops;
350 	struct gfs2_holder *gh, *tmp;
351 	int ret;
352 
353 restart:
354 	list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
355 		if (test_bit(HIF_HOLDER, &gh->gh_iflags))
356 			continue;
357 		if (may_grant(gl, gh)) {
358 			if (gh->gh_list.prev == &gl->gl_holders &&
359 			    glops->go_lock) {
360 				spin_unlock(&gl->gl_lockref.lock);
361 				/* FIXME: eliminate this eventually */
362 				ret = glops->go_lock(gh);
363 				spin_lock(&gl->gl_lockref.lock);
364 				if (ret) {
365 					if (ret == 1)
366 						return 2;
367 					gh->gh_error = ret;
368 					list_del_init(&gh->gh_list);
369 					trace_gfs2_glock_queue(gh, 0);
370 					gfs2_holder_wake(gh);
371 					goto restart;
372 				}
373 				set_bit(HIF_HOLDER, &gh->gh_iflags);
374 				trace_gfs2_promote(gh, 1);
375 				gfs2_holder_wake(gh);
376 				goto restart;
377 			}
378 			set_bit(HIF_HOLDER, &gh->gh_iflags);
379 			trace_gfs2_promote(gh, 0);
380 			gfs2_holder_wake(gh);
381 			continue;
382 		}
383 		if (gh->gh_list.prev == &gl->gl_holders)
384 			return 1;
385 		do_error(gl, 0);
386 		break;
387 	}
388 	return 0;
389 }
390 
391 /**
392  * find_first_waiter - find the first gh that's waiting for the glock
393  * @gl: the glock
394  */
395 
find_first_waiter(const struct gfs2_glock * gl)396 static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl)
397 {
398 	struct gfs2_holder *gh;
399 
400 	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
401 		if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
402 			return gh;
403 	}
404 	return NULL;
405 }
406 
407 /**
408  * state_change - record that the glock is now in a different state
409  * @gl: the glock
410  * @new_state the new state
411  *
412  */
413 
state_change(struct gfs2_glock * gl,unsigned int new_state)414 static void state_change(struct gfs2_glock *gl, unsigned int new_state)
415 {
416 	int held1, held2;
417 
418 	held1 = (gl->gl_state != LM_ST_UNLOCKED);
419 	held2 = (new_state != LM_ST_UNLOCKED);
420 
421 	if (held1 != held2) {
422 		GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
423 		if (held2)
424 			gl->gl_lockref.count++;
425 		else
426 			gl->gl_lockref.count--;
427 	}
428 	if (held1 && held2 && list_empty(&gl->gl_holders))
429 		clear_bit(GLF_QUEUED, &gl->gl_flags);
430 
431 	if (new_state != gl->gl_target)
432 		/* shorten our minimum hold time */
433 		gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR,
434 				       GL_GLOCK_MIN_HOLD);
435 	gl->gl_state = new_state;
436 	gl->gl_tchange = jiffies;
437 }
438 
gfs2_demote_wake(struct gfs2_glock * gl)439 static void gfs2_demote_wake(struct gfs2_glock *gl)
440 {
441 	gl->gl_demote_state = LM_ST_EXCLUSIVE;
442 	clear_bit(GLF_DEMOTE, &gl->gl_flags);
443 	smp_mb__after_atomic();
444 	wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
445 }
446 
447 /**
448  * finish_xmote - The DLM has replied to one of our lock requests
449  * @gl: The glock
450  * @ret: The status from the DLM
451  *
452  */
453 
finish_xmote(struct gfs2_glock * gl,unsigned int ret)454 static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
455 {
456 	const struct gfs2_glock_operations *glops = gl->gl_ops;
457 	struct gfs2_holder *gh;
458 	unsigned state = ret & LM_OUT_ST_MASK;
459 	int rv;
460 
461 	spin_lock(&gl->gl_lockref.lock);
462 	trace_gfs2_glock_state_change(gl, state);
463 	state_change(gl, state);
464 	gh = find_first_waiter(gl);
465 
466 	/* Demote to UN request arrived during demote to SH or DF */
467 	if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
468 	    state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED)
469 		gl->gl_target = LM_ST_UNLOCKED;
470 
471 	/* Check for state != intended state */
472 	if (unlikely(state != gl->gl_target)) {
473 		if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) {
474 			/* move to back of queue and try next entry */
475 			if (ret & LM_OUT_CANCELED) {
476 				if ((gh->gh_flags & LM_FLAG_PRIORITY) == 0)
477 					list_move_tail(&gh->gh_list, &gl->gl_holders);
478 				gh = find_first_waiter(gl);
479 				gl->gl_target = gh->gh_state;
480 				goto retry;
481 			}
482 			/* Some error or failed "try lock" - report it */
483 			if ((ret & LM_OUT_ERROR) ||
484 			    (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
485 				gl->gl_target = gl->gl_state;
486 				do_error(gl, ret);
487 				goto out;
488 			}
489 		}
490 		switch(state) {
491 		/* Unlocked due to conversion deadlock, try again */
492 		case LM_ST_UNLOCKED:
493 retry:
494 			do_xmote(gl, gh, gl->gl_target);
495 			break;
496 		/* Conversion fails, unlock and try again */
497 		case LM_ST_SHARED:
498 		case LM_ST_DEFERRED:
499 			do_xmote(gl, gh, LM_ST_UNLOCKED);
500 			break;
501 		default: /* Everything else */
502 			pr_err("wanted %u got %u\n", gl->gl_target, state);
503 			GLOCK_BUG_ON(gl, 1);
504 		}
505 		spin_unlock(&gl->gl_lockref.lock);
506 		return;
507 	}
508 
509 	/* Fast path - we got what we asked for */
510 	if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags))
511 		gfs2_demote_wake(gl);
512 	if (state != LM_ST_UNLOCKED) {
513 		if (glops->go_xmote_bh) {
514 			spin_unlock(&gl->gl_lockref.lock);
515 			rv = glops->go_xmote_bh(gl, gh);
516 			spin_lock(&gl->gl_lockref.lock);
517 			if (rv) {
518 				do_error(gl, rv);
519 				goto out;
520 			}
521 		}
522 		rv = do_promote(gl);
523 		if (rv == 2)
524 			goto out_locked;
525 	}
526 out:
527 	clear_bit(GLF_LOCK, &gl->gl_flags);
528 out_locked:
529 	spin_unlock(&gl->gl_lockref.lock);
530 }
531 
532 /**
533  * do_xmote - Calls the DLM to change the state of a lock
534  * @gl: The lock state
535  * @gh: The holder (only for promotes)
536  * @target: The target lock state
537  *
538  */
539 
do_xmote(struct gfs2_glock * gl,struct gfs2_holder * gh,unsigned int target)540 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target)
541 __releases(&gl->gl_lockref.lock)
542 __acquires(&gl->gl_lockref.lock)
543 {
544 	const struct gfs2_glock_operations *glops = gl->gl_ops;
545 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
546 	unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0);
547 	int ret;
548 
549 	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) &&
550 	    target != LM_ST_UNLOCKED)
551 		return;
552 	lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
553 		      LM_FLAG_PRIORITY);
554 	GLOCK_BUG_ON(gl, gl->gl_state == target);
555 	GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target);
556 	if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
557 	    glops->go_inval) {
558 		set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
559 		do_error(gl, 0); /* Fail queued try locks */
560 	}
561 	gl->gl_req = target;
562 	set_bit(GLF_BLOCKING, &gl->gl_flags);
563 	if ((gl->gl_req == LM_ST_UNLOCKED) ||
564 	    (gl->gl_state == LM_ST_EXCLUSIVE) ||
565 	    (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)))
566 		clear_bit(GLF_BLOCKING, &gl->gl_flags);
567 	spin_unlock(&gl->gl_lockref.lock);
568 	if (glops->go_sync)
569 		glops->go_sync(gl);
570 	if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
571 		glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA);
572 	clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
573 
574 	gfs2_glock_hold(gl);
575 	if (sdp->sd_lockstruct.ls_ops->lm_lock)	{
576 		/* lock_dlm */
577 		ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
578 		if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED &&
579 		    target == LM_ST_UNLOCKED &&
580 		    test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags)) {
581 			finish_xmote(gl, target);
582 			gfs2_glock_queue_work(gl, 0);
583 		}
584 		else if (ret) {
585 			pr_err("lm_lock ret %d\n", ret);
586 			GLOCK_BUG_ON(gl, !test_bit(SDF_SHUTDOWN,
587 						   &sdp->sd_flags));
588 		}
589 	} else { /* lock_nolock */
590 		finish_xmote(gl, target);
591 		gfs2_glock_queue_work(gl, 0);
592 	}
593 
594 	spin_lock(&gl->gl_lockref.lock);
595 }
596 
597 /**
598  * find_first_holder - find the first "holder" gh
599  * @gl: the glock
600  */
601 
find_first_holder(const struct gfs2_glock * gl)602 static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
603 {
604 	struct gfs2_holder *gh;
605 
606 	if (!list_empty(&gl->gl_holders)) {
607 		gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
608 		if (test_bit(HIF_HOLDER, &gh->gh_iflags))
609 			return gh;
610 	}
611 	return NULL;
612 }
613 
614 /**
615  * run_queue - do all outstanding tasks related to a glock
616  * @gl: The glock in question
617  * @nonblock: True if we must not block in run_queue
618  *
619  */
620 
run_queue(struct gfs2_glock * gl,const int nonblock)621 static void run_queue(struct gfs2_glock *gl, const int nonblock)
622 __releases(&gl->gl_lockref.lock)
623 __acquires(&gl->gl_lockref.lock)
624 {
625 	struct gfs2_holder *gh = NULL;
626 	int ret;
627 
628 	if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
629 		return;
630 
631 	GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags));
632 
633 	if (test_bit(GLF_DEMOTE, &gl->gl_flags) &&
634 	    gl->gl_demote_state != gl->gl_state) {
635 		if (find_first_holder(gl))
636 			goto out_unlock;
637 		if (nonblock)
638 			goto out_sched;
639 		set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
640 		GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE);
641 		gl->gl_target = gl->gl_demote_state;
642 	} else {
643 		if (test_bit(GLF_DEMOTE, &gl->gl_flags))
644 			gfs2_demote_wake(gl);
645 		ret = do_promote(gl);
646 		if (ret == 0)
647 			goto out_unlock;
648 		if (ret == 2)
649 			goto out;
650 		gh = find_first_waiter(gl);
651 		gl->gl_target = gh->gh_state;
652 		if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
653 			do_error(gl, 0); /* Fail queued try locks */
654 	}
655 	do_xmote(gl, gh, gl->gl_target);
656 out:
657 	return;
658 
659 out_sched:
660 	clear_bit(GLF_LOCK, &gl->gl_flags);
661 	smp_mb__after_atomic();
662 	gl->gl_lockref.count++;
663 	__gfs2_glock_queue_work(gl, 0);
664 	return;
665 
666 out_unlock:
667 	clear_bit(GLF_LOCK, &gl->gl_flags);
668 	smp_mb__after_atomic();
669 	return;
670 }
671 
delete_work_func(struct work_struct * work)672 static void delete_work_func(struct work_struct *work)
673 {
674 	struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete);
675 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
676 	struct inode *inode;
677 	u64 no_addr = gl->gl_name.ln_number;
678 
679 	/* If someone's using this glock to create a new dinode, the block must
680 	   have been freed by another node, then re-used, in which case our
681 	   iopen callback is too late after the fact. Ignore it. */
682 	if (test_bit(GLF_INODE_CREATING, &gl->gl_flags))
683 		goto out;
684 
685 	inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
686 	if (inode && !IS_ERR(inode)) {
687 		d_prune_aliases(inode);
688 		iput(inode);
689 	}
690 out:
691 	gfs2_glock_put(gl);
692 }
693 
glock_work_func(struct work_struct * work)694 static void glock_work_func(struct work_struct *work)
695 {
696 	unsigned long delay = 0;
697 	struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
698 	unsigned int drop_refs = 1;
699 
700 	if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) {
701 		finish_xmote(gl, gl->gl_reply);
702 		drop_refs++;
703 	}
704 	spin_lock(&gl->gl_lockref.lock);
705 	if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
706 	    gl->gl_state != LM_ST_UNLOCKED &&
707 	    gl->gl_demote_state != LM_ST_EXCLUSIVE) {
708 		unsigned long holdtime, now = jiffies;
709 
710 		holdtime = gl->gl_tchange + gl->gl_hold_time;
711 		if (time_before(now, holdtime))
712 			delay = holdtime - now;
713 
714 		if (!delay) {
715 			clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
716 			set_bit(GLF_DEMOTE, &gl->gl_flags);
717 		}
718 	}
719 	run_queue(gl, 0);
720 	if (delay) {
721 		/* Keep one glock reference for the work we requeue. */
722 		drop_refs--;
723 		if (gl->gl_name.ln_type != LM_TYPE_INODE)
724 			delay = 0;
725 		__gfs2_glock_queue_work(gl, delay);
726 	}
727 
728 	/*
729 	 * Drop the remaining glock references manually here. (Mind that
730 	 * __gfs2_glock_queue_work depends on the lockref spinlock begin held
731 	 * here as well.)
732 	 */
733 	gl->gl_lockref.count -= drop_refs;
734 	if (!gl->gl_lockref.count) {
735 		__gfs2_glock_put(gl);
736 		return;
737 	}
738 	spin_unlock(&gl->gl_lockref.lock);
739 }
740 
find_insert_glock(struct lm_lockname * name,struct gfs2_glock * new)741 static struct gfs2_glock *find_insert_glock(struct lm_lockname *name,
742 					    struct gfs2_glock *new)
743 {
744 	struct wait_glock_queue wait;
745 	wait_queue_head_t *wq = glock_waitqueue(name);
746 	struct gfs2_glock *gl;
747 
748 	wait.name = name;
749 	init_wait(&wait.wait);
750 	wait.wait.func = glock_wake_function;
751 
752 again:
753 	prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
754 	rcu_read_lock();
755 	if (new) {
756 		gl = rhashtable_lookup_get_insert_fast(&gl_hash_table,
757 			&new->gl_node, ht_parms);
758 		if (IS_ERR(gl))
759 			goto out;
760 	} else {
761 		gl = rhashtable_lookup_fast(&gl_hash_table,
762 			name, ht_parms);
763 	}
764 	if (gl && !lockref_get_not_dead(&gl->gl_lockref)) {
765 		rcu_read_unlock();
766 		schedule();
767 		goto again;
768 	}
769 out:
770 	rcu_read_unlock();
771 	finish_wait(wq, &wait.wait);
772 	return gl;
773 }
774 
775 /**
776  * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
777  * @sdp: The GFS2 superblock
778  * @number: the lock number
779  * @glops: The glock_operations to use
780  * @create: If 0, don't create the glock if it doesn't exist
781  * @glp: the glock is returned here
782  *
783  * This does not lock a glock, just finds/creates structures for one.
784  *
785  * Returns: errno
786  */
787 
gfs2_glock_get(struct gfs2_sbd * sdp,u64 number,const struct gfs2_glock_operations * glops,int create,struct gfs2_glock ** glp)788 int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
789 		   const struct gfs2_glock_operations *glops, int create,
790 		   struct gfs2_glock **glp)
791 {
792 	struct super_block *s = sdp->sd_vfs;
793 	struct lm_lockname name = { .ln_number = number,
794 				    .ln_type = glops->go_type,
795 				    .ln_sbd = sdp };
796 	struct gfs2_glock *gl, *tmp;
797 	struct address_space *mapping;
798 	struct kmem_cache *cachep;
799 	int ret = 0;
800 
801 	gl = find_insert_glock(&name, NULL);
802 	if (gl) {
803 		*glp = gl;
804 		return 0;
805 	}
806 	if (!create)
807 		return -ENOENT;
808 
809 	if (glops->go_flags & GLOF_ASPACE)
810 		cachep = gfs2_glock_aspace_cachep;
811 	else
812 		cachep = gfs2_glock_cachep;
813 	gl = kmem_cache_alloc(cachep, GFP_NOFS);
814 	if (!gl)
815 		return -ENOMEM;
816 
817 	memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
818 
819 	if (glops->go_flags & GLOF_LVB) {
820 		gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_NOFS);
821 		if (!gl->gl_lksb.sb_lvbptr) {
822 			kmem_cache_free(cachep, gl);
823 			return -ENOMEM;
824 		}
825 	}
826 
827 	atomic_inc(&sdp->sd_glock_disposal);
828 	gl->gl_node.next = NULL;
829 	gl->gl_flags = 0;
830 	gl->gl_name = name;
831 	gl->gl_lockref.count = 1;
832 	gl->gl_state = LM_ST_UNLOCKED;
833 	gl->gl_target = LM_ST_UNLOCKED;
834 	gl->gl_demote_state = LM_ST_EXCLUSIVE;
835 	gl->gl_ops = glops;
836 	gl->gl_dstamp = 0;
837 	preempt_disable();
838 	/* We use the global stats to estimate the initial per-glock stats */
839 	gl->gl_stats = this_cpu_ptr(sdp->sd_lkstats)->lkstats[glops->go_type];
840 	preempt_enable();
841 	gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0;
842 	gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0;
843 	gl->gl_tchange = jiffies;
844 	gl->gl_object = NULL;
845 	gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
846 	INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
847 	INIT_WORK(&gl->gl_delete, delete_work_func);
848 
849 	mapping = gfs2_glock2aspace(gl);
850 	if (mapping) {
851                 mapping->a_ops = &gfs2_meta_aops;
852 		mapping->host = s->s_bdev->bd_inode;
853 		mapping->flags = 0;
854 		mapping_set_gfp_mask(mapping, GFP_NOFS);
855 		mapping->private_data = NULL;
856 		mapping->writeback_index = 0;
857 	}
858 
859 	tmp = find_insert_glock(&name, gl);
860 	if (!tmp) {
861 		*glp = gl;
862 		goto out;
863 	}
864 	if (IS_ERR(tmp)) {
865 		ret = PTR_ERR(tmp);
866 		goto out_free;
867 	}
868 	*glp = tmp;
869 
870 out_free:
871 	kfree(gl->gl_lksb.sb_lvbptr);
872 	kmem_cache_free(cachep, gl);
873 	atomic_dec(&sdp->sd_glock_disposal);
874 
875 out:
876 	return ret;
877 }
878 
879 /**
880  * gfs2_holder_init - initialize a struct gfs2_holder in the default way
881  * @gl: the glock
882  * @state: the state we're requesting
883  * @flags: the modifier flags
884  * @gh: the holder structure
885  *
886  */
887 
gfs2_holder_init(struct gfs2_glock * gl,unsigned int state,u16 flags,struct gfs2_holder * gh)888 void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags,
889 		      struct gfs2_holder *gh)
890 {
891 	INIT_LIST_HEAD(&gh->gh_list);
892 	gh->gh_gl = gl;
893 	gh->gh_ip = _RET_IP_;
894 	gh->gh_owner_pid = get_pid(task_pid(current));
895 	gh->gh_state = state;
896 	gh->gh_flags = flags;
897 	gh->gh_error = 0;
898 	gh->gh_iflags = 0;
899 	gfs2_glock_hold(gl);
900 }
901 
902 /**
903  * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it
904  * @state: the state we're requesting
905  * @flags: the modifier flags
906  * @gh: the holder structure
907  *
908  * Don't mess with the glock.
909  *
910  */
911 
gfs2_holder_reinit(unsigned int state,u16 flags,struct gfs2_holder * gh)912 void gfs2_holder_reinit(unsigned int state, u16 flags, struct gfs2_holder *gh)
913 {
914 	gh->gh_state = state;
915 	gh->gh_flags = flags;
916 	gh->gh_iflags = 0;
917 	gh->gh_ip = _RET_IP_;
918 	put_pid(gh->gh_owner_pid);
919 	gh->gh_owner_pid = get_pid(task_pid(current));
920 }
921 
922 /**
923  * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference)
924  * @gh: the holder structure
925  *
926  */
927 
gfs2_holder_uninit(struct gfs2_holder * gh)928 void gfs2_holder_uninit(struct gfs2_holder *gh)
929 {
930 	put_pid(gh->gh_owner_pid);
931 	gfs2_glock_put(gh->gh_gl);
932 	gfs2_holder_mark_uninitialized(gh);
933 	gh->gh_ip = 0;
934 }
935 
936 /**
937  * gfs2_glock_wait - wait on a glock acquisition
938  * @gh: the glock holder
939  *
940  * Returns: 0 on success
941  */
942 
gfs2_glock_wait(struct gfs2_holder * gh)943 int gfs2_glock_wait(struct gfs2_holder *gh)
944 {
945 	unsigned long time1 = jiffies;
946 
947 	might_sleep();
948 	wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE);
949 	if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */
950 		/* Lengthen the minimum hold time. */
951 		gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
952 					      GL_GLOCK_HOLD_INCR,
953 					      GL_GLOCK_MAX_HOLD);
954 	return gh->gh_error;
955 }
956 
957 /**
958  * handle_callback - process a demote request
959  * @gl: the glock
960  * @state: the state the caller wants us to change to
961  *
962  * There are only two requests that we are going to see in actual
963  * practise: LM_ST_SHARED and LM_ST_UNLOCKED
964  */
965 
handle_callback(struct gfs2_glock * gl,unsigned int state,unsigned long delay,bool remote)966 static void handle_callback(struct gfs2_glock *gl, unsigned int state,
967 			    unsigned long delay, bool remote)
968 {
969 	int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE;
970 
971 	set_bit(bit, &gl->gl_flags);
972 	if (gl->gl_demote_state == LM_ST_EXCLUSIVE) {
973 		gl->gl_demote_state = state;
974 		gl->gl_demote_time = jiffies;
975 	} else if (gl->gl_demote_state != LM_ST_UNLOCKED &&
976 			gl->gl_demote_state != state) {
977 		gl->gl_demote_state = LM_ST_UNLOCKED;
978 	}
979 	if (gl->gl_ops->go_callback)
980 		gl->gl_ops->go_callback(gl, remote);
981 	trace_gfs2_demote_rq(gl, remote);
982 }
983 
gfs2_print_dbg(struct seq_file * seq,const char * fmt,...)984 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
985 {
986 	struct va_format vaf;
987 	va_list args;
988 
989 	va_start(args, fmt);
990 
991 	if (seq) {
992 		seq_vprintf(seq, fmt, args);
993 	} else {
994 		vaf.fmt = fmt;
995 		vaf.va = &args;
996 
997 		pr_err("%pV", &vaf);
998 	}
999 
1000 	va_end(args);
1001 }
1002 
1003 /**
1004  * add_to_queue - Add a holder to the wait queue (but look for recursion)
1005  * @gh: the holder structure to add
1006  *
1007  * Eventually we should move the recursive locking trap to a
1008  * debugging option or something like that. This is the fast
1009  * path and needs to have the minimum number of distractions.
1010  *
1011  */
1012 
add_to_queue(struct gfs2_holder * gh)1013 static inline void add_to_queue(struct gfs2_holder *gh)
1014 __releases(&gl->gl_lockref.lock)
1015 __acquires(&gl->gl_lockref.lock)
1016 {
1017 	struct gfs2_glock *gl = gh->gh_gl;
1018 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
1019 	struct list_head *insert_pt = NULL;
1020 	struct gfs2_holder *gh2;
1021 	int try_futile = 0;
1022 
1023 	BUG_ON(gh->gh_owner_pid == NULL);
1024 	if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
1025 		BUG();
1026 
1027 	if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
1028 		if (test_bit(GLF_LOCK, &gl->gl_flags))
1029 			try_futile = !may_grant(gl, gh);
1030 		if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
1031 			goto fail;
1032 	}
1033 
1034 	list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
1035 		if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid &&
1036 		    (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK)))
1037 			goto trap_recursive;
1038 		if (try_futile &&
1039 		    !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
1040 fail:
1041 			gh->gh_error = GLR_TRYFAILED;
1042 			gfs2_holder_wake(gh);
1043 			return;
1044 		}
1045 		if (test_bit(HIF_HOLDER, &gh2->gh_iflags))
1046 			continue;
1047 		if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt))
1048 			insert_pt = &gh2->gh_list;
1049 	}
1050 	set_bit(GLF_QUEUED, &gl->gl_flags);
1051 	trace_gfs2_glock_queue(gh, 1);
1052 	gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT);
1053 	gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT);
1054 	if (likely(insert_pt == NULL)) {
1055 		list_add_tail(&gh->gh_list, &gl->gl_holders);
1056 		if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
1057 			goto do_cancel;
1058 		return;
1059 	}
1060 	list_add_tail(&gh->gh_list, insert_pt);
1061 do_cancel:
1062 	gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
1063 	if (!(gh->gh_flags & LM_FLAG_PRIORITY)) {
1064 		spin_unlock(&gl->gl_lockref.lock);
1065 		if (sdp->sd_lockstruct.ls_ops->lm_cancel)
1066 			sdp->sd_lockstruct.ls_ops->lm_cancel(gl);
1067 		spin_lock(&gl->gl_lockref.lock);
1068 	}
1069 	return;
1070 
1071 trap_recursive:
1072 	pr_err("original: %pSR\n", (void *)gh2->gh_ip);
1073 	pr_err("pid: %d\n", pid_nr(gh2->gh_owner_pid));
1074 	pr_err("lock type: %d req lock state : %d\n",
1075 	       gh2->gh_gl->gl_name.ln_type, gh2->gh_state);
1076 	pr_err("new: %pSR\n", (void *)gh->gh_ip);
1077 	pr_err("pid: %d\n", pid_nr(gh->gh_owner_pid));
1078 	pr_err("lock type: %d req lock state : %d\n",
1079 	       gh->gh_gl->gl_name.ln_type, gh->gh_state);
1080 	gfs2_dump_glock(NULL, gl);
1081 	BUG();
1082 }
1083 
1084 /**
1085  * gfs2_glock_nq - enqueue a struct gfs2_holder onto a glock (acquire a glock)
1086  * @gh: the holder structure
1087  *
1088  * if (gh->gh_flags & GL_ASYNC), this never returns an error
1089  *
1090  * Returns: 0, GLR_TRYFAILED, or errno on failure
1091  */
1092 
gfs2_glock_nq(struct gfs2_holder * gh)1093 int gfs2_glock_nq(struct gfs2_holder *gh)
1094 {
1095 	struct gfs2_glock *gl = gh->gh_gl;
1096 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
1097 	int error = 0;
1098 
1099 	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
1100 		return -EIO;
1101 
1102 	if (test_bit(GLF_LRU, &gl->gl_flags))
1103 		gfs2_glock_remove_from_lru(gl);
1104 
1105 	spin_lock(&gl->gl_lockref.lock);
1106 	add_to_queue(gh);
1107 	if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) &&
1108 		     test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) {
1109 		set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1110 		gl->gl_lockref.count++;
1111 		__gfs2_glock_queue_work(gl, 0);
1112 	}
1113 	run_queue(gl, 1);
1114 	spin_unlock(&gl->gl_lockref.lock);
1115 
1116 	if (!(gh->gh_flags & GL_ASYNC))
1117 		error = gfs2_glock_wait(gh);
1118 
1119 	return error;
1120 }
1121 
1122 /**
1123  * gfs2_glock_poll - poll to see if an async request has been completed
1124  * @gh: the holder
1125  *
1126  * Returns: 1 if the request is ready to be gfs2_glock_wait()ed on
1127  */
1128 
gfs2_glock_poll(struct gfs2_holder * gh)1129 int gfs2_glock_poll(struct gfs2_holder *gh)
1130 {
1131 	return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1;
1132 }
1133 
1134 /**
1135  * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
1136  * @gh: the glock holder
1137  *
1138  */
1139 
gfs2_glock_dq(struct gfs2_holder * gh)1140 void gfs2_glock_dq(struct gfs2_holder *gh)
1141 {
1142 	struct gfs2_glock *gl = gh->gh_gl;
1143 	const struct gfs2_glock_operations *glops = gl->gl_ops;
1144 	unsigned delay = 0;
1145 	int fast_path = 0;
1146 
1147 	spin_lock(&gl->gl_lockref.lock);
1148 	if (gh->gh_flags & GL_NOCACHE)
1149 		handle_callback(gl, LM_ST_UNLOCKED, 0, false);
1150 
1151 	list_del_init(&gh->gh_list);
1152 	clear_bit(HIF_HOLDER, &gh->gh_iflags);
1153 	if (find_first_holder(gl) == NULL) {
1154 		if (glops->go_unlock) {
1155 			GLOCK_BUG_ON(gl, test_and_set_bit(GLF_LOCK, &gl->gl_flags));
1156 			spin_unlock(&gl->gl_lockref.lock);
1157 			glops->go_unlock(gh);
1158 			spin_lock(&gl->gl_lockref.lock);
1159 			clear_bit(GLF_LOCK, &gl->gl_flags);
1160 		}
1161 		if (list_empty(&gl->gl_holders) &&
1162 		    !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
1163 		    !test_bit(GLF_DEMOTE, &gl->gl_flags))
1164 			fast_path = 1;
1165 	}
1166 	if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
1167 		gfs2_glock_add_to_lru(gl);
1168 
1169 	trace_gfs2_glock_queue(gh, 0);
1170 	if (unlikely(!fast_path)) {
1171 		gl->gl_lockref.count++;
1172 		if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
1173 		    !test_bit(GLF_DEMOTE, &gl->gl_flags) &&
1174 		    gl->gl_name.ln_type == LM_TYPE_INODE)
1175 			delay = gl->gl_hold_time;
1176 		__gfs2_glock_queue_work(gl, delay);
1177 	}
1178 	spin_unlock(&gl->gl_lockref.lock);
1179 }
1180 
gfs2_glock_dq_wait(struct gfs2_holder * gh)1181 void gfs2_glock_dq_wait(struct gfs2_holder *gh)
1182 {
1183 	struct gfs2_glock *gl = gh->gh_gl;
1184 	gfs2_glock_dq(gh);
1185 	might_sleep();
1186 	wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
1187 }
1188 
1189 /**
1190  * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
1191  * @gh: the holder structure
1192  *
1193  */
1194 
gfs2_glock_dq_uninit(struct gfs2_holder * gh)1195 void gfs2_glock_dq_uninit(struct gfs2_holder *gh)
1196 {
1197 	gfs2_glock_dq(gh);
1198 	gfs2_holder_uninit(gh);
1199 }
1200 
1201 /**
1202  * gfs2_glock_nq_num - acquire a glock based on lock number
1203  * @sdp: the filesystem
1204  * @number: the lock number
1205  * @glops: the glock operations for the type of glock
1206  * @state: the state to acquire the glock in
1207  * @flags: modifier flags for the acquisition
1208  * @gh: the struct gfs2_holder
1209  *
1210  * Returns: errno
1211  */
1212 
gfs2_glock_nq_num(struct gfs2_sbd * sdp,u64 number,const struct gfs2_glock_operations * glops,unsigned int state,u16 flags,struct gfs2_holder * gh)1213 int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
1214 		      const struct gfs2_glock_operations *glops,
1215 		      unsigned int state, u16 flags, struct gfs2_holder *gh)
1216 {
1217 	struct gfs2_glock *gl;
1218 	int error;
1219 
1220 	error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
1221 	if (!error) {
1222 		error = gfs2_glock_nq_init(gl, state, flags, gh);
1223 		gfs2_glock_put(gl);
1224 	}
1225 
1226 	return error;
1227 }
1228 
1229 /**
1230  * glock_compare - Compare two struct gfs2_glock structures for sorting
1231  * @arg_a: the first structure
1232  * @arg_b: the second structure
1233  *
1234  */
1235 
glock_compare(const void * arg_a,const void * arg_b)1236 static int glock_compare(const void *arg_a, const void *arg_b)
1237 {
1238 	const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a;
1239 	const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b;
1240 	const struct lm_lockname *a = &gh_a->gh_gl->gl_name;
1241 	const struct lm_lockname *b = &gh_b->gh_gl->gl_name;
1242 
1243 	if (a->ln_number > b->ln_number)
1244 		return 1;
1245 	if (a->ln_number < b->ln_number)
1246 		return -1;
1247 	BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type);
1248 	return 0;
1249 }
1250 
1251 /**
1252  * nq_m_sync - synchonously acquire more than one glock in deadlock free order
1253  * @num_gh: the number of structures
1254  * @ghs: an array of struct gfs2_holder structures
1255  *
1256  * Returns: 0 on success (all glocks acquired),
1257  *          errno on failure (no glocks acquired)
1258  */
1259 
nq_m_sync(unsigned int num_gh,struct gfs2_holder * ghs,struct gfs2_holder ** p)1260 static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
1261 		     struct gfs2_holder **p)
1262 {
1263 	unsigned int x;
1264 	int error = 0;
1265 
1266 	for (x = 0; x < num_gh; x++)
1267 		p[x] = &ghs[x];
1268 
1269 	sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL);
1270 
1271 	for (x = 0; x < num_gh; x++) {
1272 		p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1273 
1274 		error = gfs2_glock_nq(p[x]);
1275 		if (error) {
1276 			while (x--)
1277 				gfs2_glock_dq(p[x]);
1278 			break;
1279 		}
1280 	}
1281 
1282 	return error;
1283 }
1284 
1285 /**
1286  * gfs2_glock_nq_m - acquire multiple glocks
1287  * @num_gh: the number of structures
1288  * @ghs: an array of struct gfs2_holder structures
1289  *
1290  *
1291  * Returns: 0 on success (all glocks acquired),
1292  *          errno on failure (no glocks acquired)
1293  */
1294 
gfs2_glock_nq_m(unsigned int num_gh,struct gfs2_holder * ghs)1295 int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1296 {
1297 	struct gfs2_holder *tmp[4];
1298 	struct gfs2_holder **pph = tmp;
1299 	int error = 0;
1300 
1301 	switch(num_gh) {
1302 	case 0:
1303 		return 0;
1304 	case 1:
1305 		ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1306 		return gfs2_glock_nq(ghs);
1307 	default:
1308 		if (num_gh <= 4)
1309 			break;
1310 		pph = kmalloc_array(num_gh, sizeof(struct gfs2_holder *),
1311 				    GFP_NOFS);
1312 		if (!pph)
1313 			return -ENOMEM;
1314 	}
1315 
1316 	error = nq_m_sync(num_gh, ghs, pph);
1317 
1318 	if (pph != tmp)
1319 		kfree(pph);
1320 
1321 	return error;
1322 }
1323 
1324 /**
1325  * gfs2_glock_dq_m - release multiple glocks
1326  * @num_gh: the number of structures
1327  * @ghs: an array of struct gfs2_holder structures
1328  *
1329  */
1330 
gfs2_glock_dq_m(unsigned int num_gh,struct gfs2_holder * ghs)1331 void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1332 {
1333 	while (num_gh--)
1334 		gfs2_glock_dq(&ghs[num_gh]);
1335 }
1336 
gfs2_glock_cb(struct gfs2_glock * gl,unsigned int state)1337 void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
1338 {
1339 	unsigned long delay = 0;
1340 	unsigned long holdtime;
1341 	unsigned long now = jiffies;
1342 
1343 	gfs2_glock_hold(gl);
1344 	holdtime = gl->gl_tchange + gl->gl_hold_time;
1345 	if (test_bit(GLF_QUEUED, &gl->gl_flags) &&
1346 	    gl->gl_name.ln_type == LM_TYPE_INODE) {
1347 		if (time_before(now, holdtime))
1348 			delay = holdtime - now;
1349 		if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
1350 			delay = gl->gl_hold_time;
1351 	}
1352 
1353 	spin_lock(&gl->gl_lockref.lock);
1354 	handle_callback(gl, state, delay, true);
1355 	__gfs2_glock_queue_work(gl, delay);
1356 	spin_unlock(&gl->gl_lockref.lock);
1357 }
1358 
1359 /**
1360  * gfs2_should_freeze - Figure out if glock should be frozen
1361  * @gl: The glock in question
1362  *
1363  * Glocks are not frozen if (a) the result of the dlm operation is
1364  * an error, (b) the locking operation was an unlock operation or
1365  * (c) if there is a "noexp" flagged request anywhere in the queue
1366  *
1367  * Returns: 1 if freezing should occur, 0 otherwise
1368  */
1369 
gfs2_should_freeze(const struct gfs2_glock * gl)1370 static int gfs2_should_freeze(const struct gfs2_glock *gl)
1371 {
1372 	const struct gfs2_holder *gh;
1373 
1374 	if (gl->gl_reply & ~LM_OUT_ST_MASK)
1375 		return 0;
1376 	if (gl->gl_target == LM_ST_UNLOCKED)
1377 		return 0;
1378 
1379 	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
1380 		if (test_bit(HIF_HOLDER, &gh->gh_iflags))
1381 			continue;
1382 		if (LM_FLAG_NOEXP & gh->gh_flags)
1383 			return 0;
1384 	}
1385 
1386 	return 1;
1387 }
1388 
1389 /**
1390  * gfs2_glock_complete - Callback used by locking
1391  * @gl: Pointer to the glock
1392  * @ret: The return value from the dlm
1393  *
1394  * The gl_reply field is under the gl_lockref.lock lock so that it is ok
1395  * to use a bitfield shared with other glock state fields.
1396  */
1397 
gfs2_glock_complete(struct gfs2_glock * gl,int ret)1398 void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1399 {
1400 	struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
1401 
1402 	spin_lock(&gl->gl_lockref.lock);
1403 	gl->gl_reply = ret;
1404 
1405 	if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) {
1406 		if (gfs2_should_freeze(gl)) {
1407 			set_bit(GLF_FROZEN, &gl->gl_flags);
1408 			spin_unlock(&gl->gl_lockref.lock);
1409 			return;
1410 		}
1411 	}
1412 
1413 	gl->gl_lockref.count++;
1414 	set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1415 	__gfs2_glock_queue_work(gl, 0);
1416 	spin_unlock(&gl->gl_lockref.lock);
1417 }
1418 
glock_cmp(void * priv,struct list_head * a,struct list_head * b)1419 static int glock_cmp(void *priv, struct list_head *a, struct list_head *b)
1420 {
1421 	struct gfs2_glock *gla, *glb;
1422 
1423 	gla = list_entry(a, struct gfs2_glock, gl_lru);
1424 	glb = list_entry(b, struct gfs2_glock, gl_lru);
1425 
1426 	if (gla->gl_name.ln_number > glb->gl_name.ln_number)
1427 		return 1;
1428 	if (gla->gl_name.ln_number < glb->gl_name.ln_number)
1429 		return -1;
1430 
1431 	return 0;
1432 }
1433 
1434 /**
1435  * gfs2_dispose_glock_lru - Demote a list of glocks
1436  * @list: The list to dispose of
1437  *
1438  * Disposing of glocks may involve disk accesses, so that here we sort
1439  * the glocks by number (i.e. disk location of the inodes) so that if
1440  * there are any such accesses, they'll be sent in order (mostly).
1441  *
1442  * Must be called under the lru_lock, but may drop and retake this
1443  * lock. While the lru_lock is dropped, entries may vanish from the
1444  * list, but no new entries will appear on the list (since it is
1445  * private)
1446  */
1447 
gfs2_dispose_glock_lru(struct list_head * list)1448 static void gfs2_dispose_glock_lru(struct list_head *list)
1449 __releases(&lru_lock)
1450 __acquires(&lru_lock)
1451 {
1452 	struct gfs2_glock *gl;
1453 
1454 	list_sort(NULL, list, glock_cmp);
1455 
1456 	while(!list_empty(list)) {
1457 		gl = list_entry(list->next, struct gfs2_glock, gl_lru);
1458 		list_del_init(&gl->gl_lru);
1459 		if (!spin_trylock(&gl->gl_lockref.lock)) {
1460 add_back_to_lru:
1461 			list_add(&gl->gl_lru, &lru_list);
1462 			set_bit(GLF_LRU, &gl->gl_flags);
1463 			atomic_inc(&lru_count);
1464 			continue;
1465 		}
1466 		if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
1467 			spin_unlock(&gl->gl_lockref.lock);
1468 			goto add_back_to_lru;
1469 		}
1470 		gl->gl_lockref.count++;
1471 		if (demote_ok(gl))
1472 			handle_callback(gl, LM_ST_UNLOCKED, 0, false);
1473 		WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags));
1474 		__gfs2_glock_queue_work(gl, 0);
1475 		spin_unlock(&gl->gl_lockref.lock);
1476 		cond_resched_lock(&lru_lock);
1477 	}
1478 }
1479 
1480 /**
1481  * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote
1482  * @nr: The number of entries to scan
1483  *
1484  * This function selects the entries on the LRU which are able to
1485  * be demoted, and then kicks off the process by calling
1486  * gfs2_dispose_glock_lru() above.
1487  */
1488 
gfs2_scan_glock_lru(int nr)1489 static long gfs2_scan_glock_lru(int nr)
1490 {
1491 	struct gfs2_glock *gl;
1492 	LIST_HEAD(skipped);
1493 	LIST_HEAD(dispose);
1494 	long freed = 0;
1495 
1496 	spin_lock(&lru_lock);
1497 	while ((nr-- >= 0) && !list_empty(&lru_list)) {
1498 		gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
1499 
1500 		/* Test for being demotable */
1501 		if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
1502 			list_move(&gl->gl_lru, &dispose);
1503 			atomic_dec(&lru_count);
1504 			clear_bit(GLF_LRU, &gl->gl_flags);
1505 			freed++;
1506 			continue;
1507 		}
1508 
1509 		list_move(&gl->gl_lru, &skipped);
1510 	}
1511 	list_splice(&skipped, &lru_list);
1512 	if (!list_empty(&dispose))
1513 		gfs2_dispose_glock_lru(&dispose);
1514 	spin_unlock(&lru_lock);
1515 
1516 	return freed;
1517 }
1518 
gfs2_glock_shrink_scan(struct shrinker * shrink,struct shrink_control * sc)1519 static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink,
1520 					    struct shrink_control *sc)
1521 {
1522 	if (!(sc->gfp_mask & __GFP_FS))
1523 		return SHRINK_STOP;
1524 	return gfs2_scan_glock_lru(sc->nr_to_scan);
1525 }
1526 
gfs2_glock_shrink_count(struct shrinker * shrink,struct shrink_control * sc)1527 static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink,
1528 					     struct shrink_control *sc)
1529 {
1530 	return vfs_pressure_ratio(atomic_read(&lru_count));
1531 }
1532 
1533 static struct shrinker glock_shrinker = {
1534 	.seeks = DEFAULT_SEEKS,
1535 	.count_objects = gfs2_glock_shrink_count,
1536 	.scan_objects = gfs2_glock_shrink_scan,
1537 };
1538 
1539 /**
1540  * examine_bucket - Call a function for glock in a hash bucket
1541  * @examiner: the function
1542  * @sdp: the filesystem
1543  * @bucket: the bucket
1544  *
1545  * Note that the function can be called multiple times on the same
1546  * object.  So the user must ensure that the function can cope with
1547  * that.
1548  */
1549 
glock_hash_walk(glock_examiner examiner,const struct gfs2_sbd * sdp)1550 static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
1551 {
1552 	struct gfs2_glock *gl;
1553 	struct rhashtable_iter iter;
1554 
1555 	rhashtable_walk_enter(&gl_hash_table, &iter);
1556 
1557 	do {
1558 		rhashtable_walk_start(&iter);
1559 
1560 		while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl))
1561 			if (gl->gl_name.ln_sbd == sdp &&
1562 			    lockref_get_not_dead(&gl->gl_lockref))
1563 				examiner(gl);
1564 
1565 		rhashtable_walk_stop(&iter);
1566 	} while (cond_resched(), gl == ERR_PTR(-EAGAIN));
1567 
1568 	rhashtable_walk_exit(&iter);
1569 }
1570 
1571 /**
1572  * thaw_glock - thaw out a glock which has an unprocessed reply waiting
1573  * @gl: The glock to thaw
1574  *
1575  */
1576 
thaw_glock(struct gfs2_glock * gl)1577 static void thaw_glock(struct gfs2_glock *gl)
1578 {
1579 	if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) {
1580 		gfs2_glock_put(gl);
1581 		return;
1582 	}
1583 	set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1584 	gfs2_glock_queue_work(gl, 0);
1585 }
1586 
1587 /**
1588  * clear_glock - look at a glock and see if we can free it from glock cache
1589  * @gl: the glock to look at
1590  *
1591  */
1592 
clear_glock(struct gfs2_glock * gl)1593 static void clear_glock(struct gfs2_glock *gl)
1594 {
1595 	gfs2_glock_remove_from_lru(gl);
1596 
1597 	spin_lock(&gl->gl_lockref.lock);
1598 	if (gl->gl_state != LM_ST_UNLOCKED)
1599 		handle_callback(gl, LM_ST_UNLOCKED, 0, false);
1600 	__gfs2_glock_queue_work(gl, 0);
1601 	spin_unlock(&gl->gl_lockref.lock);
1602 }
1603 
1604 /**
1605  * gfs2_glock_thaw - Thaw any frozen glocks
1606  * @sdp: The super block
1607  *
1608  */
1609 
gfs2_glock_thaw(struct gfs2_sbd * sdp)1610 void gfs2_glock_thaw(struct gfs2_sbd *sdp)
1611 {
1612 	glock_hash_walk(thaw_glock, sdp);
1613 }
1614 
dump_glock(struct seq_file * seq,struct gfs2_glock * gl)1615 static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
1616 {
1617 	spin_lock(&gl->gl_lockref.lock);
1618 	gfs2_dump_glock(seq, gl);
1619 	spin_unlock(&gl->gl_lockref.lock);
1620 }
1621 
dump_glock_func(struct gfs2_glock * gl)1622 static void dump_glock_func(struct gfs2_glock *gl)
1623 {
1624 	dump_glock(NULL, gl);
1625 }
1626 
1627 /**
1628  * gfs2_gl_hash_clear - Empty out the glock hash table
1629  * @sdp: the filesystem
1630  * @wait: wait until it's all gone
1631  *
1632  * Called when unmounting the filesystem.
1633  */
1634 
gfs2_gl_hash_clear(struct gfs2_sbd * sdp)1635 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
1636 {
1637 	set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
1638 	flush_workqueue(glock_workqueue);
1639 	glock_hash_walk(clear_glock, sdp);
1640 	flush_workqueue(glock_workqueue);
1641 	wait_event_timeout(sdp->sd_glock_wait,
1642 			   atomic_read(&sdp->sd_glock_disposal) == 0,
1643 			   HZ * 600);
1644 	glock_hash_walk(dump_glock_func, sdp);
1645 }
1646 
gfs2_glock_finish_truncate(struct gfs2_inode * ip)1647 void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
1648 {
1649 	struct gfs2_glock *gl = ip->i_gl;
1650 	int ret;
1651 
1652 	ret = gfs2_truncatei_resume(ip);
1653 	gfs2_assert_withdraw(gl->gl_name.ln_sbd, ret == 0);
1654 
1655 	spin_lock(&gl->gl_lockref.lock);
1656 	clear_bit(GLF_LOCK, &gl->gl_flags);
1657 	run_queue(gl, 1);
1658 	spin_unlock(&gl->gl_lockref.lock);
1659 }
1660 
state2str(unsigned state)1661 static const char *state2str(unsigned state)
1662 {
1663 	switch(state) {
1664 	case LM_ST_UNLOCKED:
1665 		return "UN";
1666 	case LM_ST_SHARED:
1667 		return "SH";
1668 	case LM_ST_DEFERRED:
1669 		return "DF";
1670 	case LM_ST_EXCLUSIVE:
1671 		return "EX";
1672 	}
1673 	return "??";
1674 }
1675 
hflags2str(char * buf,u16 flags,unsigned long iflags)1676 static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
1677 {
1678 	char *p = buf;
1679 	if (flags & LM_FLAG_TRY)
1680 		*p++ = 't';
1681 	if (flags & LM_FLAG_TRY_1CB)
1682 		*p++ = 'T';
1683 	if (flags & LM_FLAG_NOEXP)
1684 		*p++ = 'e';
1685 	if (flags & LM_FLAG_ANY)
1686 		*p++ = 'A';
1687 	if (flags & LM_FLAG_PRIORITY)
1688 		*p++ = 'p';
1689 	if (flags & GL_ASYNC)
1690 		*p++ = 'a';
1691 	if (flags & GL_EXACT)
1692 		*p++ = 'E';
1693 	if (flags & GL_NOCACHE)
1694 		*p++ = 'c';
1695 	if (test_bit(HIF_HOLDER, &iflags))
1696 		*p++ = 'H';
1697 	if (test_bit(HIF_WAIT, &iflags))
1698 		*p++ = 'W';
1699 	if (test_bit(HIF_FIRST, &iflags))
1700 		*p++ = 'F';
1701 	*p = 0;
1702 	return buf;
1703 }
1704 
1705 /**
1706  * dump_holder - print information about a glock holder
1707  * @seq: the seq_file struct
1708  * @gh: the glock holder
1709  *
1710  */
1711 
dump_holder(struct seq_file * seq,const struct gfs2_holder * gh)1712 static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
1713 {
1714 	struct task_struct *gh_owner = NULL;
1715 	char flags_buf[32];
1716 
1717 	rcu_read_lock();
1718 	if (gh->gh_owner_pid)
1719 		gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
1720 	gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
1721 		       state2str(gh->gh_state),
1722 		       hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
1723 		       gh->gh_error,
1724 		       gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
1725 		       gh_owner ? gh_owner->comm : "(ended)",
1726 		       (void *)gh->gh_ip);
1727 	rcu_read_unlock();
1728 }
1729 
gflags2str(char * buf,const struct gfs2_glock * gl)1730 static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
1731 {
1732 	const unsigned long *gflags = &gl->gl_flags;
1733 	char *p = buf;
1734 
1735 	if (test_bit(GLF_LOCK, gflags))
1736 		*p++ = 'l';
1737 	if (test_bit(GLF_DEMOTE, gflags))
1738 		*p++ = 'D';
1739 	if (test_bit(GLF_PENDING_DEMOTE, gflags))
1740 		*p++ = 'd';
1741 	if (test_bit(GLF_DEMOTE_IN_PROGRESS, gflags))
1742 		*p++ = 'p';
1743 	if (test_bit(GLF_DIRTY, gflags))
1744 		*p++ = 'y';
1745 	if (test_bit(GLF_LFLUSH, gflags))
1746 		*p++ = 'f';
1747 	if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags))
1748 		*p++ = 'i';
1749 	if (test_bit(GLF_REPLY_PENDING, gflags))
1750 		*p++ = 'r';
1751 	if (test_bit(GLF_INITIAL, gflags))
1752 		*p++ = 'I';
1753 	if (test_bit(GLF_FROZEN, gflags))
1754 		*p++ = 'F';
1755 	if (test_bit(GLF_QUEUED, gflags))
1756 		*p++ = 'q';
1757 	if (test_bit(GLF_LRU, gflags))
1758 		*p++ = 'L';
1759 	if (gl->gl_object)
1760 		*p++ = 'o';
1761 	if (test_bit(GLF_BLOCKING, gflags))
1762 		*p++ = 'b';
1763 	*p = 0;
1764 	return buf;
1765 }
1766 
1767 /**
1768  * gfs2_dump_glock - print information about a glock
1769  * @seq: The seq_file struct
1770  * @gl: the glock
1771  *
1772  * The file format is as follows:
1773  * One line per object, capital letters are used to indicate objects
1774  * G = glock, I = Inode, R = rgrp, H = holder. Glocks are not indented,
1775  * other objects are indented by a single space and follow the glock to
1776  * which they are related. Fields are indicated by lower case letters
1777  * followed by a colon and the field value, except for strings which are in
1778  * [] so that its possible to see if they are composed of spaces for
1779  * example. The field's are n = number (id of the object), f = flags,
1780  * t = type, s = state, r = refcount, e = error, p = pid.
1781  *
1782  */
1783 
gfs2_dump_glock(struct seq_file * seq,const struct gfs2_glock * gl)1784 void gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
1785 {
1786 	const struct gfs2_glock_operations *glops = gl->gl_ops;
1787 	unsigned long long dtime;
1788 	const struct gfs2_holder *gh;
1789 	char gflags_buf[32];
1790 
1791 	dtime = jiffies - gl->gl_demote_time;
1792 	dtime *= 1000000/HZ; /* demote time in uSec */
1793 	if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
1794 		dtime = 0;
1795 	gfs2_print_dbg(seq, "G:  s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d m:%ld\n",
1796 		  state2str(gl->gl_state),
1797 		  gl->gl_name.ln_type,
1798 		  (unsigned long long)gl->gl_name.ln_number,
1799 		  gflags2str(gflags_buf, gl),
1800 		  state2str(gl->gl_target),
1801 		  state2str(gl->gl_demote_state), dtime,
1802 		  atomic_read(&gl->gl_ail_count),
1803 		  atomic_read(&gl->gl_revokes),
1804 		  (int)gl->gl_lockref.count, gl->gl_hold_time);
1805 
1806 	list_for_each_entry(gh, &gl->gl_holders, gh_list)
1807 		dump_holder(seq, gh);
1808 
1809 	if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump)
1810 		glops->go_dump(seq, gl);
1811 }
1812 
gfs2_glstats_seq_show(struct seq_file * seq,void * iter_ptr)1813 static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr)
1814 {
1815 	struct gfs2_glock *gl = iter_ptr;
1816 
1817 	seq_printf(seq, "G: n:%u/%llx rtt:%llu/%llu rttb:%llu/%llu irt:%llu/%llu dcnt: %llu qcnt: %llu\n",
1818 		   gl->gl_name.ln_type,
1819 		   (unsigned long long)gl->gl_name.ln_number,
1820 		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTT],
1821 		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR],
1822 		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTB],
1823 		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB],
1824 		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRT],
1825 		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR],
1826 		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT],
1827 		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]);
1828 	return 0;
1829 }
1830 
1831 static const char *gfs2_gltype[] = {
1832 	"type",
1833 	"reserved",
1834 	"nondisk",
1835 	"inode",
1836 	"rgrp",
1837 	"meta",
1838 	"iopen",
1839 	"flock",
1840 	"plock",
1841 	"quota",
1842 	"journal",
1843 };
1844 
1845 static const char *gfs2_stype[] = {
1846 	[GFS2_LKS_SRTT]		= "srtt",
1847 	[GFS2_LKS_SRTTVAR]	= "srttvar",
1848 	[GFS2_LKS_SRTTB]	= "srttb",
1849 	[GFS2_LKS_SRTTVARB]	= "srttvarb",
1850 	[GFS2_LKS_SIRT]		= "sirt",
1851 	[GFS2_LKS_SIRTVAR]	= "sirtvar",
1852 	[GFS2_LKS_DCOUNT]	= "dlm",
1853 	[GFS2_LKS_QCOUNT]	= "queue",
1854 };
1855 
1856 #define GFS2_NR_SBSTATS (ARRAY_SIZE(gfs2_gltype) * ARRAY_SIZE(gfs2_stype))
1857 
gfs2_sbstats_seq_show(struct seq_file * seq,void * iter_ptr)1858 static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr)
1859 {
1860 	struct gfs2_sbd *sdp = seq->private;
1861 	loff_t pos = *(loff_t *)iter_ptr;
1862 	unsigned index = pos >> 3;
1863 	unsigned subindex = pos & 0x07;
1864 	int i;
1865 
1866 	if (index == 0 && subindex != 0)
1867 		return 0;
1868 
1869 	seq_printf(seq, "%-10s %8s:", gfs2_gltype[index],
1870 		   (index == 0) ? "cpu": gfs2_stype[subindex]);
1871 
1872 	for_each_possible_cpu(i) {
1873                 const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i);
1874 
1875 		if (index == 0)
1876 			seq_printf(seq, " %15u", i);
1877 		else
1878 			seq_printf(seq, " %15llu", (unsigned long long)lkstats->
1879 				   lkstats[index - 1].stats[subindex]);
1880 	}
1881 	seq_putc(seq, '\n');
1882 	return 0;
1883 }
1884 
gfs2_glock_init(void)1885 int __init gfs2_glock_init(void)
1886 {
1887 	int i, ret;
1888 
1889 	ret = rhashtable_init(&gl_hash_table, &ht_parms);
1890 	if (ret < 0)
1891 		return ret;
1892 
1893 	glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
1894 					  WQ_HIGHPRI | WQ_FREEZABLE, 0);
1895 	if (!glock_workqueue) {
1896 		rhashtable_destroy(&gl_hash_table);
1897 		return -ENOMEM;
1898 	}
1899 	gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
1900 						WQ_MEM_RECLAIM | WQ_FREEZABLE,
1901 						0);
1902 	if (!gfs2_delete_workqueue) {
1903 		destroy_workqueue(glock_workqueue);
1904 		rhashtable_destroy(&gl_hash_table);
1905 		return -ENOMEM;
1906 	}
1907 
1908 	ret = register_shrinker(&glock_shrinker);
1909 	if (ret) {
1910 		destroy_workqueue(gfs2_delete_workqueue);
1911 		destroy_workqueue(glock_workqueue);
1912 		rhashtable_destroy(&gl_hash_table);
1913 		return ret;
1914 	}
1915 
1916 	for (i = 0; i < GLOCK_WAIT_TABLE_SIZE; i++)
1917 		init_waitqueue_head(glock_wait_table + i);
1918 
1919 	return 0;
1920 }
1921 
gfs2_glock_exit(void)1922 void gfs2_glock_exit(void)
1923 {
1924 	unregister_shrinker(&glock_shrinker);
1925 	rhashtable_destroy(&gl_hash_table);
1926 	destroy_workqueue(glock_workqueue);
1927 	destroy_workqueue(gfs2_delete_workqueue);
1928 }
1929 
gfs2_glock_iter_next(struct gfs2_glock_iter * gi,loff_t n)1930 static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n)
1931 {
1932 	struct gfs2_glock *gl = gi->gl;
1933 
1934 	if (gl) {
1935 		if (n == 0)
1936 			return;
1937 		if (!lockref_put_not_zero(&gl->gl_lockref))
1938 			gfs2_glock_queue_put(gl);
1939 	}
1940 	for (;;) {
1941 		gl = rhashtable_walk_next(&gi->hti);
1942 		if (IS_ERR_OR_NULL(gl)) {
1943 			if (gl == ERR_PTR(-EAGAIN)) {
1944 				n = 1;
1945 				continue;
1946 			}
1947 			gl = NULL;
1948 			break;
1949 		}
1950 		if (gl->gl_name.ln_sbd != gi->sdp)
1951 			continue;
1952 		if (n <= 1) {
1953 			if (!lockref_get_not_dead(&gl->gl_lockref))
1954 				continue;
1955 			break;
1956 		} else {
1957 			if (__lockref_is_dead(&gl->gl_lockref))
1958 				continue;
1959 			n--;
1960 		}
1961 	}
1962 	gi->gl = gl;
1963 }
1964 
gfs2_glock_seq_start(struct seq_file * seq,loff_t * pos)1965 static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
1966 	__acquires(RCU)
1967 {
1968 	struct gfs2_glock_iter *gi = seq->private;
1969 	loff_t n;
1970 
1971 	/*
1972 	 * We can either stay where we are, skip to the next hash table
1973 	 * entry, or start from the beginning.
1974 	 */
1975 	if (*pos < gi->last_pos) {
1976 		rhashtable_walk_exit(&gi->hti);
1977 		rhashtable_walk_enter(&gl_hash_table, &gi->hti);
1978 		n = *pos + 1;
1979 	} else {
1980 		n = *pos - gi->last_pos;
1981 	}
1982 
1983 	rhashtable_walk_start(&gi->hti);
1984 
1985 	gfs2_glock_iter_next(gi, n);
1986 	gi->last_pos = *pos;
1987 	return gi->gl;
1988 }
1989 
gfs2_glock_seq_next(struct seq_file * seq,void * iter_ptr,loff_t * pos)1990 static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
1991 				 loff_t *pos)
1992 {
1993 	struct gfs2_glock_iter *gi = seq->private;
1994 
1995 	(*pos)++;
1996 	gi->last_pos = *pos;
1997 	gfs2_glock_iter_next(gi, 1);
1998 	return gi->gl;
1999 }
2000 
gfs2_glock_seq_stop(struct seq_file * seq,void * iter_ptr)2001 static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
2002 	__releases(RCU)
2003 {
2004 	struct gfs2_glock_iter *gi = seq->private;
2005 
2006 	rhashtable_walk_stop(&gi->hti);
2007 }
2008 
gfs2_glock_seq_show(struct seq_file * seq,void * iter_ptr)2009 static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
2010 {
2011 	dump_glock(seq, iter_ptr);
2012 	return 0;
2013 }
2014 
gfs2_sbstats_seq_start(struct seq_file * seq,loff_t * pos)2015 static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos)
2016 {
2017 	preempt_disable();
2018 	if (*pos >= GFS2_NR_SBSTATS)
2019 		return NULL;
2020 	return pos;
2021 }
2022 
gfs2_sbstats_seq_next(struct seq_file * seq,void * iter_ptr,loff_t * pos)2023 static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr,
2024 				   loff_t *pos)
2025 {
2026 	(*pos)++;
2027 	if (*pos >= GFS2_NR_SBSTATS)
2028 		return NULL;
2029 	return pos;
2030 }
2031 
gfs2_sbstats_seq_stop(struct seq_file * seq,void * iter_ptr)2032 static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr)
2033 {
2034 	preempt_enable();
2035 }
2036 
2037 static const struct seq_operations gfs2_glock_seq_ops = {
2038 	.start = gfs2_glock_seq_start,
2039 	.next  = gfs2_glock_seq_next,
2040 	.stop  = gfs2_glock_seq_stop,
2041 	.show  = gfs2_glock_seq_show,
2042 };
2043 
2044 static const struct seq_operations gfs2_glstats_seq_ops = {
2045 	.start = gfs2_glock_seq_start,
2046 	.next  = gfs2_glock_seq_next,
2047 	.stop  = gfs2_glock_seq_stop,
2048 	.show  = gfs2_glstats_seq_show,
2049 };
2050 
2051 static const struct seq_operations gfs2_sbstats_seq_ops = {
2052 	.start = gfs2_sbstats_seq_start,
2053 	.next  = gfs2_sbstats_seq_next,
2054 	.stop  = gfs2_sbstats_seq_stop,
2055 	.show  = gfs2_sbstats_seq_show,
2056 };
2057 
2058 #define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL)
2059 
__gfs2_glocks_open(struct inode * inode,struct file * file,const struct seq_operations * ops)2060 static int __gfs2_glocks_open(struct inode *inode, struct file *file,
2061 			      const struct seq_operations *ops)
2062 {
2063 	int ret = seq_open_private(file, ops, sizeof(struct gfs2_glock_iter));
2064 	if (ret == 0) {
2065 		struct seq_file *seq = file->private_data;
2066 		struct gfs2_glock_iter *gi = seq->private;
2067 
2068 		gi->sdp = inode->i_private;
2069 		seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
2070 		if (seq->buf)
2071 			seq->size = GFS2_SEQ_GOODSIZE;
2072 		/*
2073 		 * Initially, we are "before" the first hash table entry; the
2074 		 * first call to rhashtable_walk_next gets us the first entry.
2075 		 */
2076 		gi->last_pos = -1;
2077 		gi->gl = NULL;
2078 		rhashtable_walk_enter(&gl_hash_table, &gi->hti);
2079 	}
2080 	return ret;
2081 }
2082 
gfs2_glocks_open(struct inode * inode,struct file * file)2083 static int gfs2_glocks_open(struct inode *inode, struct file *file)
2084 {
2085 	return __gfs2_glocks_open(inode, file, &gfs2_glock_seq_ops);
2086 }
2087 
gfs2_glocks_release(struct inode * inode,struct file * file)2088 static int gfs2_glocks_release(struct inode *inode, struct file *file)
2089 {
2090 	struct seq_file *seq = file->private_data;
2091 	struct gfs2_glock_iter *gi = seq->private;
2092 
2093 	if (gi->gl)
2094 		gfs2_glock_put(gi->gl);
2095 	rhashtable_walk_exit(&gi->hti);
2096 	return seq_release_private(inode, file);
2097 }
2098 
gfs2_glstats_open(struct inode * inode,struct file * file)2099 static int gfs2_glstats_open(struct inode *inode, struct file *file)
2100 {
2101 	return __gfs2_glocks_open(inode, file, &gfs2_glstats_seq_ops);
2102 }
2103 
gfs2_sbstats_open(struct inode * inode,struct file * file)2104 static int gfs2_sbstats_open(struct inode *inode, struct file *file)
2105 {
2106 	int ret = seq_open(file, &gfs2_sbstats_seq_ops);
2107 	if (ret == 0) {
2108 		struct seq_file *seq = file->private_data;
2109 		seq->private = inode->i_private;  /* sdp */
2110 	}
2111 	return ret;
2112 }
2113 
2114 static const struct file_operations gfs2_glocks_fops = {
2115 	.owner   = THIS_MODULE,
2116 	.open    = gfs2_glocks_open,
2117 	.read    = seq_read,
2118 	.llseek  = seq_lseek,
2119 	.release = gfs2_glocks_release,
2120 };
2121 
2122 static const struct file_operations gfs2_glstats_fops = {
2123 	.owner   = THIS_MODULE,
2124 	.open    = gfs2_glstats_open,
2125 	.read    = seq_read,
2126 	.llseek  = seq_lseek,
2127 	.release = gfs2_glocks_release,
2128 };
2129 
2130 static const struct file_operations gfs2_sbstats_fops = {
2131 	.owner   = THIS_MODULE,
2132 	.open	 = gfs2_sbstats_open,
2133 	.read    = seq_read,
2134 	.llseek  = seq_lseek,
2135 	.release = seq_release,
2136 };
2137 
gfs2_create_debugfs_file(struct gfs2_sbd * sdp)2138 int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
2139 {
2140 	struct dentry *dent;
2141 
2142 	dent = debugfs_create_dir(sdp->sd_table_name, gfs2_root);
2143 	if (IS_ERR_OR_NULL(dent))
2144 		goto fail;
2145 	sdp->debugfs_dir = dent;
2146 
2147 	dent = debugfs_create_file("glocks",
2148 				   S_IFREG | S_IRUGO,
2149 				   sdp->debugfs_dir, sdp,
2150 				   &gfs2_glocks_fops);
2151 	if (IS_ERR_OR_NULL(dent))
2152 		goto fail;
2153 	sdp->debugfs_dentry_glocks = dent;
2154 
2155 	dent = debugfs_create_file("glstats",
2156 				   S_IFREG | S_IRUGO,
2157 				   sdp->debugfs_dir, sdp,
2158 				   &gfs2_glstats_fops);
2159 	if (IS_ERR_OR_NULL(dent))
2160 		goto fail;
2161 	sdp->debugfs_dentry_glstats = dent;
2162 
2163 	dent = debugfs_create_file("sbstats",
2164 				   S_IFREG | S_IRUGO,
2165 				   sdp->debugfs_dir, sdp,
2166 				   &gfs2_sbstats_fops);
2167 	if (IS_ERR_OR_NULL(dent))
2168 		goto fail;
2169 	sdp->debugfs_dentry_sbstats = dent;
2170 
2171 	return 0;
2172 fail:
2173 	gfs2_delete_debugfs_file(sdp);
2174 	return dent ? PTR_ERR(dent) : -ENOMEM;
2175 }
2176 
gfs2_delete_debugfs_file(struct gfs2_sbd * sdp)2177 void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
2178 {
2179 	if (sdp->debugfs_dir) {
2180 		if (sdp->debugfs_dentry_glocks) {
2181 			debugfs_remove(sdp->debugfs_dentry_glocks);
2182 			sdp->debugfs_dentry_glocks = NULL;
2183 		}
2184 		if (sdp->debugfs_dentry_glstats) {
2185 			debugfs_remove(sdp->debugfs_dentry_glstats);
2186 			sdp->debugfs_dentry_glstats = NULL;
2187 		}
2188 		if (sdp->debugfs_dentry_sbstats) {
2189 			debugfs_remove(sdp->debugfs_dentry_sbstats);
2190 			sdp->debugfs_dentry_sbstats = NULL;
2191 		}
2192 		debugfs_remove(sdp->debugfs_dir);
2193 		sdp->debugfs_dir = NULL;
2194 	}
2195 }
2196 
gfs2_register_debugfs(void)2197 int gfs2_register_debugfs(void)
2198 {
2199 	gfs2_root = debugfs_create_dir("gfs2", NULL);
2200 	if (IS_ERR(gfs2_root))
2201 		return PTR_ERR(gfs2_root);
2202 	return gfs2_root ? 0 : -ENOMEM;
2203 }
2204 
gfs2_unregister_debugfs(void)2205 void gfs2_unregister_debugfs(void)
2206 {
2207 	debugfs_remove(gfs2_root);
2208 	gfs2_root = NULL;
2209 }
2210