• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2005-2008 Red Hat, Inc.  All rights reserved.
4  */
5 
6 #include <linux/fs.h>
7 #include <linux/miscdevice.h>
8 #include <linux/poll.h>
9 #include <linux/dlm.h>
10 #include <linux/dlm_plock.h>
11 #include <linux/slab.h>
12 
13 #include "dlm_internal.h"
14 #include "lockspace.h"
15 
16 static DEFINE_SPINLOCK(ops_lock);
17 static LIST_HEAD(send_list);
18 static LIST_HEAD(recv_list);
19 static DECLARE_WAIT_QUEUE_HEAD(send_wq);
20 static DECLARE_WAIT_QUEUE_HEAD(recv_wq);
21 
22 struct plock_async_data {
23 	void *fl;
24 	void *file;
25 	struct file_lock flc;
26 	int (*callback)(struct file_lock *fl, int result);
27 };
28 
29 struct plock_op {
30 	struct list_head list;
31 	int done;
32 	struct dlm_plock_info info;
33 	/* if set indicates async handling */
34 	struct plock_async_data *data;
35 };
36 
set_version(struct dlm_plock_info * info)37 static inline void set_version(struct dlm_plock_info *info)
38 {
39 	info->version[0] = DLM_PLOCK_VERSION_MAJOR;
40 	info->version[1] = DLM_PLOCK_VERSION_MINOR;
41 	info->version[2] = DLM_PLOCK_VERSION_PATCH;
42 }
43 
check_version(struct dlm_plock_info * info)44 static int check_version(struct dlm_plock_info *info)
45 {
46 	if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
47 	    (DLM_PLOCK_VERSION_MINOR < info->version[1])) {
48 		log_print("plock device version mismatch: "
49 			  "kernel (%u.%u.%u), user (%u.%u.%u)",
50 			  DLM_PLOCK_VERSION_MAJOR,
51 			  DLM_PLOCK_VERSION_MINOR,
52 			  DLM_PLOCK_VERSION_PATCH,
53 			  info->version[0],
54 			  info->version[1],
55 			  info->version[2]);
56 		return -EINVAL;
57 	}
58 	return 0;
59 }
60 
dlm_release_plock_op(struct plock_op * op)61 static void dlm_release_plock_op(struct plock_op *op)
62 {
63 	kfree(op->data);
64 	kfree(op);
65 }
66 
send_op(struct plock_op * op)67 static void send_op(struct plock_op *op)
68 {
69 	set_version(&op->info);
70 	spin_lock(&ops_lock);
71 	list_add_tail(&op->list, &send_list);
72 	spin_unlock(&ops_lock);
73 	wake_up(&send_wq);
74 }
75 
76 /* If a process was killed while waiting for the only plock on a file,
77    locks_remove_posix will not see any lock on the file so it won't
78    send an unlock-close to us to pass on to userspace to clean up the
79    abandoned waiter.  So, we have to insert the unlock-close when the
80    lock call is interrupted. */
81 
do_unlock_close(const struct dlm_plock_info * info)82 static void do_unlock_close(const struct dlm_plock_info *info)
83 {
84 	struct plock_op *op;
85 
86 	op = kzalloc(sizeof(*op), GFP_NOFS);
87 	if (!op)
88 		return;
89 
90 	op->info.optype		= DLM_PLOCK_OP_UNLOCK;
91 	op->info.pid		= info->pid;
92 	op->info.fsid		= info->fsid;
93 	op->info.number		= info->number;
94 	op->info.start		= 0;
95 	op->info.end		= OFFSET_MAX;
96 	op->info.owner		= info->owner;
97 
98 	op->info.flags |= DLM_PLOCK_FL_CLOSE;
99 	send_op(op);
100 }
101 
dlm_posix_lock(dlm_lockspace_t * lockspace,u64 number,struct file * file,int cmd,struct file_lock * fl)102 int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
103 		   int cmd, struct file_lock *fl)
104 {
105 	struct plock_async_data *op_data;
106 	struct dlm_ls *ls;
107 	struct plock_op *op;
108 	int rv;
109 
110 	ls = dlm_find_lockspace_local(lockspace);
111 	if (!ls)
112 		return -EINVAL;
113 
114 	op = kzalloc(sizeof(*op), GFP_NOFS);
115 	if (!op) {
116 		rv = -ENOMEM;
117 		goto out;
118 	}
119 
120 	op->info.optype		= DLM_PLOCK_OP_LOCK;
121 	op->info.pid		= fl->fl_pid;
122 	op->info.ex		= (fl->fl_type == F_WRLCK);
123 	op->info.wait		= IS_SETLKW(cmd);
124 	op->info.fsid		= ls->ls_global_id;
125 	op->info.number		= number;
126 	op->info.start		= fl->fl_start;
127 	op->info.end		= fl->fl_end;
128 	/* async handling */
129 	if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
130 		op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
131 		if (!op_data) {
132 			dlm_release_plock_op(op);
133 			rv = -ENOMEM;
134 			goto out;
135 		}
136 
137 		/* fl_owner is lockd which doesn't distinguish
138 		   processes on the nfs client */
139 		op->info.owner	= (__u64) fl->fl_pid;
140 		op_data->callback = fl->fl_lmops->lm_grant;
141 		locks_init_lock(&op_data->flc);
142 		locks_copy_lock(&op_data->flc, fl);
143 		op_data->fl		= fl;
144 		op_data->file	= file;
145 
146 		op->data = op_data;
147 
148 		send_op(op);
149 		rv = FILE_LOCK_DEFERRED;
150 		goto out;
151 	} else {
152 		op->info.owner	= (__u64)(long) fl->fl_owner;
153 	}
154 
155 	send_op(op);
156 
157 	if (op->info.wait) {
158 		rv = wait_event_killable(recv_wq, (op->done != 0));
159 		if (rv == -ERESTARTSYS) {
160 			spin_lock(&ops_lock);
161 			/* recheck under ops_lock if we got a done != 0,
162 			 * if so this interrupt case should be ignored
163 			 */
164 			if (op->done != 0) {
165 				spin_unlock(&ops_lock);
166 				goto do_lock_wait;
167 			}
168 			list_del(&op->list);
169 			spin_unlock(&ops_lock);
170 
171 			log_debug(ls, "%s: wait interrupted %x %llx pid %d",
172 				  __func__, ls->ls_global_id,
173 				  (unsigned long long)number, op->info.pid);
174 			do_unlock_close(&op->info);
175 			dlm_release_plock_op(op);
176 			goto out;
177 		}
178 	} else {
179 		wait_event(recv_wq, (op->done != 0));
180 	}
181 
182 do_lock_wait:
183 
184 	WARN_ON(!list_empty(&op->list));
185 
186 	rv = op->info.rv;
187 
188 	if (!rv) {
189 		if (locks_lock_file_wait(file, fl) < 0)
190 			log_error(ls, "dlm_posix_lock: vfs lock error %llx",
191 				  (unsigned long long)number);
192 	}
193 
194 	dlm_release_plock_op(op);
195 out:
196 	dlm_put_lockspace(ls);
197 	return rv;
198 }
199 EXPORT_SYMBOL_GPL(dlm_posix_lock);
200 
201 /* Returns failure iff a successful lock operation should be canceled */
dlm_plock_callback(struct plock_op * op)202 static int dlm_plock_callback(struct plock_op *op)
203 {
204 	struct plock_async_data *op_data = op->data;
205 	struct file *file;
206 	struct file_lock *fl;
207 	struct file_lock *flc;
208 	int (*notify)(struct file_lock *fl, int result) = NULL;
209 	int rv = 0;
210 
211 	WARN_ON(!list_empty(&op->list));
212 
213 	/* check if the following 2 are still valid or make a copy */
214 	file = op_data->file;
215 	flc = &op_data->flc;
216 	fl = op_data->fl;
217 	notify = op_data->callback;
218 
219 	if (op->info.rv) {
220 		notify(fl, op->info.rv);
221 		goto out;
222 	}
223 
224 	/* got fs lock; bookkeep locally as well: */
225 	flc->fl_flags &= ~FL_SLEEP;
226 	if (posix_lock_file(file, flc, NULL)) {
227 		/*
228 		 * This can only happen in the case of kmalloc() failure.
229 		 * The filesystem's own lock is the authoritative lock,
230 		 * so a failure to get the lock locally is not a disaster.
231 		 * As long as the fs cannot reliably cancel locks (especially
232 		 * in a low-memory situation), we're better off ignoring
233 		 * this failure than trying to recover.
234 		 */
235 		log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p",
236 			  (unsigned long long)op->info.number, file, fl);
237 	}
238 
239 	rv = notify(fl, 0);
240 	if (rv) {
241 		/* XXX: We need to cancel the fs lock here: */
242 		log_print("dlm_plock_callback: lock granted after lock request "
243 			  "failed; dangling lock!\n");
244 		goto out;
245 	}
246 
247 out:
248 	dlm_release_plock_op(op);
249 	return rv;
250 }
251 
dlm_posix_unlock(dlm_lockspace_t * lockspace,u64 number,struct file * file,struct file_lock * fl)252 int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
253 		     struct file_lock *fl)
254 {
255 	struct dlm_ls *ls;
256 	struct plock_op *op;
257 	int rv;
258 	unsigned char fl_flags = fl->fl_flags;
259 
260 	ls = dlm_find_lockspace_local(lockspace);
261 	if (!ls)
262 		return -EINVAL;
263 
264 	op = kzalloc(sizeof(*op), GFP_NOFS);
265 	if (!op) {
266 		rv = -ENOMEM;
267 		goto out;
268 	}
269 
270 	/* cause the vfs unlock to return ENOENT if lock is not found */
271 	fl->fl_flags |= FL_EXISTS;
272 
273 	rv = locks_lock_file_wait(file, fl);
274 	if (rv == -ENOENT) {
275 		rv = 0;
276 		goto out_free;
277 	}
278 	if (rv < 0) {
279 		log_error(ls, "dlm_posix_unlock: vfs unlock error %d %llx",
280 			  rv, (unsigned long long)number);
281 	}
282 
283 	op->info.optype		= DLM_PLOCK_OP_UNLOCK;
284 	op->info.pid		= fl->fl_pid;
285 	op->info.fsid		= ls->ls_global_id;
286 	op->info.number		= number;
287 	op->info.start		= fl->fl_start;
288 	op->info.end		= fl->fl_end;
289 	if (fl->fl_lmops && fl->fl_lmops->lm_grant)
290 		op->info.owner	= (__u64) fl->fl_pid;
291 	else
292 		op->info.owner	= (__u64)(long) fl->fl_owner;
293 
294 	if (fl->fl_flags & FL_CLOSE) {
295 		op->info.flags |= DLM_PLOCK_FL_CLOSE;
296 		send_op(op);
297 		rv = 0;
298 		goto out;
299 	}
300 
301 	send_op(op);
302 	wait_event(recv_wq, (op->done != 0));
303 
304 	WARN_ON(!list_empty(&op->list));
305 
306 	rv = op->info.rv;
307 
308 	if (rv == -ENOENT)
309 		rv = 0;
310 
311 out_free:
312 	dlm_release_plock_op(op);
313 out:
314 	dlm_put_lockspace(ls);
315 	fl->fl_flags = fl_flags;
316 	return rv;
317 }
318 EXPORT_SYMBOL_GPL(dlm_posix_unlock);
319 
dlm_posix_get(dlm_lockspace_t * lockspace,u64 number,struct file * file,struct file_lock * fl)320 int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
321 		  struct file_lock *fl)
322 {
323 	struct dlm_ls *ls;
324 	struct plock_op *op;
325 	int rv;
326 
327 	ls = dlm_find_lockspace_local(lockspace);
328 	if (!ls)
329 		return -EINVAL;
330 
331 	op = kzalloc(sizeof(*op), GFP_NOFS);
332 	if (!op) {
333 		rv = -ENOMEM;
334 		goto out;
335 	}
336 
337 	op->info.optype		= DLM_PLOCK_OP_GET;
338 	op->info.pid		= fl->fl_pid;
339 	op->info.ex		= (fl->fl_type == F_WRLCK);
340 	op->info.fsid		= ls->ls_global_id;
341 	op->info.number		= number;
342 	op->info.start		= fl->fl_start;
343 	op->info.end		= fl->fl_end;
344 	if (fl->fl_lmops && fl->fl_lmops->lm_grant)
345 		op->info.owner	= (__u64) fl->fl_pid;
346 	else
347 		op->info.owner	= (__u64)(long) fl->fl_owner;
348 
349 	send_op(op);
350 	wait_event(recv_wq, (op->done != 0));
351 
352 	WARN_ON(!list_empty(&op->list));
353 
354 	/* info.rv from userspace is 1 for conflict, 0 for no-conflict,
355 	   -ENOENT if there are no locks on the file */
356 
357 	rv = op->info.rv;
358 
359 	fl->fl_type = F_UNLCK;
360 	if (rv == -ENOENT)
361 		rv = 0;
362 	else if (rv > 0) {
363 		locks_init_lock(fl);
364 		fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
365 		fl->fl_flags = FL_POSIX;
366 		fl->fl_pid = op->info.pid;
367 		if (op->info.nodeid != dlm_our_nodeid())
368 			fl->fl_pid = -fl->fl_pid;
369 		fl->fl_start = op->info.start;
370 		fl->fl_end = op->info.end;
371 		rv = 0;
372 	}
373 
374 	dlm_release_plock_op(op);
375 out:
376 	dlm_put_lockspace(ls);
377 	return rv;
378 }
379 EXPORT_SYMBOL_GPL(dlm_posix_get);
380 
381 /* a read copies out one plock request from the send list */
dev_read(struct file * file,char __user * u,size_t count,loff_t * ppos)382 static ssize_t dev_read(struct file *file, char __user *u, size_t count,
383 			loff_t *ppos)
384 {
385 	struct dlm_plock_info info;
386 	struct plock_op *op = NULL;
387 
388 	if (count < sizeof(info))
389 		return -EINVAL;
390 
391 	spin_lock(&ops_lock);
392 	if (!list_empty(&send_list)) {
393 		op = list_first_entry(&send_list, struct plock_op, list);
394 		if (op->info.flags & DLM_PLOCK_FL_CLOSE)
395 			list_del(&op->list);
396 		else
397 			list_move_tail(&op->list, &recv_list);
398 		memcpy(&info, &op->info, sizeof(info));
399 	}
400 	spin_unlock(&ops_lock);
401 
402 	if (!op)
403 		return -EAGAIN;
404 
405 	/* there is no need to get a reply from userspace for unlocks
406 	   that were generated by the vfs cleaning up for a close
407 	   (the process did not make an unlock call). */
408 
409 	if (op->info.flags & DLM_PLOCK_FL_CLOSE)
410 		dlm_release_plock_op(op);
411 
412 	if (copy_to_user(u, &info, sizeof(info)))
413 		return -EFAULT;
414 	return sizeof(info);
415 }
416 
417 /* a write copies in one plock result that should match a plock_op
418    on the recv list */
dev_write(struct file * file,const char __user * u,size_t count,loff_t * ppos)419 static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
420 			 loff_t *ppos)
421 {
422 	struct plock_op *op = NULL, *iter;
423 	struct dlm_plock_info info;
424 	int do_callback = 0;
425 
426 	if (count != sizeof(info))
427 		return -EINVAL;
428 
429 	if (copy_from_user(&info, u, sizeof(info)))
430 		return -EFAULT;
431 
432 	if (check_version(&info))
433 		return -EINVAL;
434 
435 	/*
436 	 * The results for waiting ops (SETLKW) can be returned in any
437 	 * order, so match all fields to find the op.  The results for
438 	 * non-waiting ops are returned in the order that they were sent
439 	 * to userspace, so match the result with the first non-waiting op.
440 	 */
441 	spin_lock(&ops_lock);
442 	if (info.wait) {
443 		list_for_each_entry(iter, &recv_list, list) {
444 			if (iter->info.fsid == info.fsid &&
445 			    iter->info.number == info.number &&
446 			    iter->info.owner == info.owner &&
447 			    iter->info.pid == info.pid &&
448 			    iter->info.start == info.start &&
449 			    iter->info.end == info.end &&
450 			    iter->info.ex == info.ex &&
451 			    iter->info.wait) {
452 				op = iter;
453 				break;
454 			}
455 		}
456 	} else {
457 		list_for_each_entry(iter, &recv_list, list) {
458 			if (!iter->info.wait &&
459 			    iter->info.fsid == info.fsid) {
460 				op = iter;
461 				break;
462 			}
463 		}
464 	}
465 
466 	if (op) {
467 		/* Sanity check that op and info match. */
468 		if (info.wait)
469 			WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK);
470 		else
471 			WARN_ON(op->info.number != info.number ||
472 				op->info.owner != info.owner ||
473 				op->info.optype != info.optype);
474 
475 		list_del_init(&op->list);
476 		memcpy(&op->info, &info, sizeof(info));
477 		if (op->data)
478 			do_callback = 1;
479 		else
480 			op->done = 1;
481 	}
482 	spin_unlock(&ops_lock);
483 
484 	if (op) {
485 		if (do_callback)
486 			dlm_plock_callback(op);
487 		else
488 			wake_up(&recv_wq);
489 	} else
490 		pr_debug("%s: no op %x %llx", __func__,
491 			 info.fsid, (unsigned long long)info.number);
492 	return count;
493 }
494 
dev_poll(struct file * file,poll_table * wait)495 static __poll_t dev_poll(struct file *file, poll_table *wait)
496 {
497 	__poll_t mask = 0;
498 
499 	poll_wait(file, &send_wq, wait);
500 
501 	spin_lock(&ops_lock);
502 	if (!list_empty(&send_list))
503 		mask = EPOLLIN | EPOLLRDNORM;
504 	spin_unlock(&ops_lock);
505 
506 	return mask;
507 }
508 
509 static const struct file_operations dev_fops = {
510 	.read    = dev_read,
511 	.write   = dev_write,
512 	.poll    = dev_poll,
513 	.owner   = THIS_MODULE,
514 	.llseek  = noop_llseek,
515 };
516 
517 static struct miscdevice plock_dev_misc = {
518 	.minor = MISC_DYNAMIC_MINOR,
519 	.name = DLM_PLOCK_MISC_NAME,
520 	.fops = &dev_fops
521 };
522 
dlm_plock_init(void)523 int dlm_plock_init(void)
524 {
525 	int rv;
526 
527 	rv = misc_register(&plock_dev_misc);
528 	if (rv)
529 		log_print("dlm_plock_init: misc_register failed %d", rv);
530 	return rv;
531 }
532 
dlm_plock_exit(void)533 void dlm_plock_exit(void)
534 {
535 	misc_deregister(&plock_dev_misc);
536 }
537 
538