1 /*
2 * Copyright (c) 2016, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <linux/file.h>
34 #include <linux/anon_inodes.h>
35 #include <linux/sched/mm.h>
36 #include <rdma/ib_verbs.h>
37 #include <rdma/uverbs_types.h>
38 #include <linux/rcupdate.h>
39 #include <rdma/uverbs_ioctl.h>
40 #include <rdma/rdma_user_ioctl.h>
41 #include "uverbs.h"
42 #include "core_priv.h"
43 #include "rdma_core.h"
44
uverbs_uobject_get(struct ib_uobject * uobject)45 void uverbs_uobject_get(struct ib_uobject *uobject)
46 {
47 kref_get(&uobject->ref);
48 }
49
uverbs_uobject_free(struct kref * ref)50 static void uverbs_uobject_free(struct kref *ref)
51 {
52 struct ib_uobject *uobj =
53 container_of(ref, struct ib_uobject, ref);
54
55 if (uobj->uapi_object->type_class->needs_kfree_rcu)
56 kfree_rcu(uobj, rcu);
57 else
58 kfree(uobj);
59 }
60
uverbs_uobject_put(struct ib_uobject * uobject)61 void uverbs_uobject_put(struct ib_uobject *uobject)
62 {
63 kref_put(&uobject->ref, uverbs_uobject_free);
64 }
65
uverbs_try_lock_object(struct ib_uobject * uobj,enum rdma_lookup_mode mode)66 static int uverbs_try_lock_object(struct ib_uobject *uobj,
67 enum rdma_lookup_mode mode)
68 {
69 /*
70 * When a shared access is required, we use a positive counter. Each
71 * shared access request checks that the value != -1 and increment it.
72 * Exclusive access is required for operations like write or destroy.
73 * In exclusive access mode, we check that the counter is zero (nobody
74 * claimed this object) and we set it to -1. Releasing a shared access
75 * lock is done simply by decreasing the counter. As for exclusive
76 * access locks, since only a single one of them is is allowed
77 * concurrently, setting the counter to zero is enough for releasing
78 * this lock.
79 */
80 switch (mode) {
81 case UVERBS_LOOKUP_READ:
82 return atomic_fetch_add_unless(&uobj->usecnt, 1, -1) == -1 ?
83 -EBUSY : 0;
84 case UVERBS_LOOKUP_WRITE:
85 /* lock is exclusive */
86 return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
87 case UVERBS_LOOKUP_DESTROY:
88 return 0;
89 }
90 return 0;
91 }
92
assert_uverbs_usecnt(struct ib_uobject * uobj,enum rdma_lookup_mode mode)93 static void assert_uverbs_usecnt(struct ib_uobject *uobj,
94 enum rdma_lookup_mode mode)
95 {
96 #ifdef CONFIG_LOCKDEP
97 switch (mode) {
98 case UVERBS_LOOKUP_READ:
99 WARN_ON(atomic_read(&uobj->usecnt) <= 0);
100 break;
101 case UVERBS_LOOKUP_WRITE:
102 WARN_ON(atomic_read(&uobj->usecnt) != -1);
103 break;
104 case UVERBS_LOOKUP_DESTROY:
105 break;
106 }
107 #endif
108 }
109
110 /*
111 * This must be called with the hw_destroy_rwsem locked for read or write,
112 * also the uobject itself must be locked for write.
113 *
114 * Upon return the HW object is guaranteed to be destroyed.
115 *
116 * For RDMA_REMOVE_ABORT, the hw_destroy_rwsem is not required to be held,
117 * however the type's allocat_commit function cannot have been called and the
118 * uobject cannot be on the uobjects_lists
119 *
120 * For RDMA_REMOVE_DESTROY the caller shold be holding a kref (eg via
121 * rdma_lookup_get_uobject) and the object is left in a state where the caller
122 * needs to call rdma_lookup_put_uobject.
123 *
124 * For all other destroy modes this function internally unlocks the uobject
125 * and consumes the kref on the uobj.
126 */
uverbs_destroy_uobject(struct ib_uobject * uobj,enum rdma_remove_reason reason,struct uverbs_attr_bundle * attrs)127 static int uverbs_destroy_uobject(struct ib_uobject *uobj,
128 enum rdma_remove_reason reason,
129 struct uverbs_attr_bundle *attrs)
130 {
131 struct ib_uverbs_file *ufile = attrs->ufile;
132 unsigned long flags;
133 int ret;
134
135 lockdep_assert_held(&ufile->hw_destroy_rwsem);
136 assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
137
138 if (uobj->object) {
139 ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason,
140 attrs);
141 if (ret) {
142 if (ib_is_destroy_retryable(ret, reason, uobj))
143 return ret;
144
145 /* Nothing to be done, dangle the memory and move on */
146 WARN(true,
147 "ib_uverbs: failed to remove uobject id %d, driver err=%d",
148 uobj->id, ret);
149 }
150
151 uobj->object = NULL;
152 }
153
154 if (reason == RDMA_REMOVE_ABORT) {
155 WARN_ON(!list_empty(&uobj->list));
156 WARN_ON(!uobj->context);
157 uobj->uapi_object->type_class->alloc_abort(uobj);
158 }
159
160 uobj->context = NULL;
161
162 /*
163 * For DESTROY the usecnt is not changed, the caller is expected to
164 * manage it via uobj_put_destroy(). Only DESTROY can remove the IDR
165 * handle.
166 */
167 if (reason != RDMA_REMOVE_DESTROY)
168 atomic_set(&uobj->usecnt, 0);
169 else
170 uobj->uapi_object->type_class->remove_handle(uobj);
171
172 if (!list_empty(&uobj->list)) {
173 spin_lock_irqsave(&ufile->uobjects_lock, flags);
174 list_del_init(&uobj->list);
175 spin_unlock_irqrestore(&ufile->uobjects_lock, flags);
176
177 /*
178 * Pairs with the get in rdma_alloc_commit_uobject(), could
179 * destroy uobj.
180 */
181 uverbs_uobject_put(uobj);
182 }
183
184 /*
185 * When aborting the stack kref remains owned by the core code, and is
186 * not transferred into the type. Pairs with the get in alloc_uobj
187 */
188 if (reason == RDMA_REMOVE_ABORT)
189 uverbs_uobject_put(uobj);
190
191 return 0;
192 }
193
194 /*
195 * This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY
196 * sequence. It should only be used from command callbacks. On success the
197 * caller must pair this with uobj_put_destroy(). This
198 * version requires the caller to have already obtained an
199 * LOOKUP_DESTROY uobject kref.
200 */
uobj_destroy(struct ib_uobject * uobj,struct uverbs_attr_bundle * attrs)201 int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs)
202 {
203 struct ib_uverbs_file *ufile = attrs->ufile;
204 int ret;
205
206 down_read(&ufile->hw_destroy_rwsem);
207
208 /*
209 * Once the uobject is destroyed by RDMA_REMOVE_DESTROY then it is left
210 * write locked as the callers put it back with UVERBS_LOOKUP_DESTROY.
211 * This is because any other concurrent thread can still see the object
212 * in the xarray due to RCU. Leaving it locked ensures nothing else will
213 * touch it.
214 */
215 ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE);
216 if (ret)
217 goto out_unlock;
218
219 ret = uverbs_destroy_uobject(uobj, RDMA_REMOVE_DESTROY, attrs);
220 if (ret) {
221 atomic_set(&uobj->usecnt, 0);
222 goto out_unlock;
223 }
224
225 out_unlock:
226 up_read(&ufile->hw_destroy_rwsem);
227 return ret;
228 }
229
230 /*
231 * uobj_get_destroy destroys the HW object and returns a handle to the uobj
232 * with a NULL object pointer. The caller must pair this with
233 * uobj_put_destroy().
234 */
__uobj_get_destroy(const struct uverbs_api_object * obj,u32 id,struct uverbs_attr_bundle * attrs)235 struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
236 u32 id, struct uverbs_attr_bundle *attrs)
237 {
238 struct ib_uobject *uobj;
239 int ret;
240
241 uobj = rdma_lookup_get_uobject(obj, attrs->ufile, id,
242 UVERBS_LOOKUP_DESTROY, attrs);
243 if (IS_ERR(uobj))
244 return uobj;
245
246 ret = uobj_destroy(uobj, attrs);
247 if (ret) {
248 rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
249 return ERR_PTR(ret);
250 }
251
252 return uobj;
253 }
254
255 /*
256 * Does both uobj_get_destroy() and uobj_put_destroy(). Returns 0 on success
257 * (negative errno on failure). For use by callers that do not need the uobj.
258 */
__uobj_perform_destroy(const struct uverbs_api_object * obj,u32 id,struct uverbs_attr_bundle * attrs)259 int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
260 struct uverbs_attr_bundle *attrs)
261 {
262 struct ib_uobject *uobj;
263
264 uobj = __uobj_get_destroy(obj, id, attrs);
265 if (IS_ERR(uobj))
266 return PTR_ERR(uobj);
267 uobj_put_destroy(uobj);
268 return 0;
269 }
270
271 /* alloc_uobj must be undone by uverbs_destroy_uobject() */
alloc_uobj(struct ib_uverbs_file * ufile,const struct uverbs_api_object * obj)272 static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
273 const struct uverbs_api_object *obj)
274 {
275 struct ib_uobject *uobj;
276 struct ib_ucontext *ucontext;
277
278 ucontext = ib_uverbs_get_ucontext_file(ufile);
279 if (IS_ERR(ucontext))
280 return ERR_CAST(ucontext);
281
282 uobj = kzalloc(obj->type_attrs->obj_size, GFP_KERNEL);
283 if (!uobj)
284 return ERR_PTR(-ENOMEM);
285 /*
286 * user_handle should be filled by the handler,
287 * The object is added to the list in the commit stage.
288 */
289 uobj->ufile = ufile;
290 uobj->context = ucontext;
291 INIT_LIST_HEAD(&uobj->list);
292 uobj->uapi_object = obj;
293 /*
294 * Allocated objects start out as write locked to deny any other
295 * syscalls from accessing them until they are committed. See
296 * rdma_alloc_commit_uobject
297 */
298 atomic_set(&uobj->usecnt, -1);
299 kref_init(&uobj->ref);
300
301 return uobj;
302 }
303
idr_add_uobj(struct ib_uobject * uobj)304 static int idr_add_uobj(struct ib_uobject *uobj)
305 {
306 /*
307 * We start with allocating an idr pointing to NULL. This represents an
308 * object which isn't initialized yet. We'll replace it later on with
309 * the real object once we commit.
310 */
311 return xa_alloc(&uobj->ufile->idr, &uobj->id, NULL, xa_limit_32b,
312 GFP_KERNEL);
313 }
314
315 /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
316 static struct ib_uobject *
lookup_get_idr_uobject(const struct uverbs_api_object * obj,struct ib_uverbs_file * ufile,s64 id,enum rdma_lookup_mode mode)317 lookup_get_idr_uobject(const struct uverbs_api_object *obj,
318 struct ib_uverbs_file *ufile, s64 id,
319 enum rdma_lookup_mode mode)
320 {
321 struct ib_uobject *uobj;
322
323 if (id < 0 || id > ULONG_MAX)
324 return ERR_PTR(-EINVAL);
325
326 rcu_read_lock();
327 /*
328 * The idr_find is guaranteed to return a pointer to something that
329 * isn't freed yet, or NULL, as the free after idr_remove goes through
330 * kfree_rcu(). However the object may still have been released and
331 * kfree() could be called at any time.
332 */
333 uobj = xa_load(&ufile->idr, id);
334 if (!uobj || !kref_get_unless_zero(&uobj->ref))
335 uobj = ERR_PTR(-ENOENT);
336 rcu_read_unlock();
337 return uobj;
338 }
339
340 static struct ib_uobject *
lookup_get_fd_uobject(const struct uverbs_api_object * obj,struct ib_uverbs_file * ufile,s64 id,enum rdma_lookup_mode mode)341 lookup_get_fd_uobject(const struct uverbs_api_object *obj,
342 struct ib_uverbs_file *ufile, s64 id,
343 enum rdma_lookup_mode mode)
344 {
345 const struct uverbs_obj_fd_type *fd_type;
346 struct file *f;
347 struct ib_uobject *uobject;
348 int fdno = id;
349
350 if (fdno != id)
351 return ERR_PTR(-EINVAL);
352
353 if (mode != UVERBS_LOOKUP_READ)
354 return ERR_PTR(-EOPNOTSUPP);
355
356 if (!obj->type_attrs)
357 return ERR_PTR(-EIO);
358 fd_type =
359 container_of(obj->type_attrs, struct uverbs_obj_fd_type, type);
360
361 f = fget(fdno);
362 if (!f)
363 return ERR_PTR(-EBADF);
364
365 uobject = f->private_data;
366 /*
367 * fget(id) ensures we are not currently running uverbs_close_fd,
368 * and the caller is expected to ensure that uverbs_close_fd is never
369 * done while a call top lookup is possible.
370 */
371 if (f->f_op != fd_type->fops || uobject->ufile != ufile) {
372 fput(f);
373 return ERR_PTR(-EBADF);
374 }
375
376 uverbs_uobject_get(uobject);
377 return uobject;
378 }
379
rdma_lookup_get_uobject(const struct uverbs_api_object * obj,struct ib_uverbs_file * ufile,s64 id,enum rdma_lookup_mode mode,struct uverbs_attr_bundle * attrs)380 struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
381 struct ib_uverbs_file *ufile, s64 id,
382 enum rdma_lookup_mode mode,
383 struct uverbs_attr_bundle *attrs)
384 {
385 struct ib_uobject *uobj;
386 int ret;
387
388 if (obj == ERR_PTR(-ENOMSG)) {
389 /* must be UVERBS_IDR_ANY_OBJECT, see uapi_get_object() */
390 uobj = lookup_get_idr_uobject(NULL, ufile, id, mode);
391 if (IS_ERR(uobj))
392 return uobj;
393 } else {
394 if (IS_ERR(obj))
395 return ERR_PTR(-EINVAL);
396
397 uobj = obj->type_class->lookup_get(obj, ufile, id, mode);
398 if (IS_ERR(uobj))
399 return uobj;
400
401 if (uobj->uapi_object != obj) {
402 ret = -EINVAL;
403 goto free;
404 }
405 }
406
407 /*
408 * If we have been disassociated block every command except for
409 * DESTROY based commands.
410 */
411 if (mode != UVERBS_LOOKUP_DESTROY &&
412 !srcu_dereference(ufile->device->ib_dev,
413 &ufile->device->disassociate_srcu)) {
414 ret = -EIO;
415 goto free;
416 }
417
418 ret = uverbs_try_lock_object(uobj, mode);
419 if (ret)
420 goto free;
421 if (attrs)
422 attrs->context = uobj->context;
423
424 return uobj;
425 free:
426 uobj->uapi_object->type_class->lookup_put(uobj, mode);
427 uverbs_uobject_put(uobj);
428 return ERR_PTR(ret);
429 }
430
431 static struct ib_uobject *
alloc_begin_idr_uobject(const struct uverbs_api_object * obj,struct ib_uverbs_file * ufile)432 alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
433 struct ib_uverbs_file *ufile)
434 {
435 int ret;
436 struct ib_uobject *uobj;
437
438 uobj = alloc_uobj(ufile, obj);
439 if (IS_ERR(uobj))
440 return uobj;
441
442 ret = idr_add_uobj(uobj);
443 if (ret)
444 goto uobj_put;
445
446 ret = ib_rdmacg_try_charge(&uobj->cg_obj, uobj->context->device,
447 RDMACG_RESOURCE_HCA_OBJECT);
448 if (ret)
449 goto remove;
450
451 return uobj;
452
453 remove:
454 xa_erase(&ufile->idr, uobj->id);
455 uobj_put:
456 uverbs_uobject_put(uobj);
457 return ERR_PTR(ret);
458 }
459
460 static struct ib_uobject *
alloc_begin_fd_uobject(const struct uverbs_api_object * obj,struct ib_uverbs_file * ufile)461 alloc_begin_fd_uobject(const struct uverbs_api_object *obj,
462 struct ib_uverbs_file *ufile)
463 {
464 int new_fd;
465 struct ib_uobject *uobj;
466
467 new_fd = get_unused_fd_flags(O_CLOEXEC);
468 if (new_fd < 0)
469 return ERR_PTR(new_fd);
470
471 uobj = alloc_uobj(ufile, obj);
472 if (IS_ERR(uobj)) {
473 put_unused_fd(new_fd);
474 return uobj;
475 }
476
477 uobj->id = new_fd;
478 uobj->ufile = ufile;
479
480 return uobj;
481 }
482
rdma_alloc_begin_uobject(const struct uverbs_api_object * obj,struct ib_uverbs_file * ufile,struct uverbs_attr_bundle * attrs)483 struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
484 struct ib_uverbs_file *ufile,
485 struct uverbs_attr_bundle *attrs)
486 {
487 struct ib_uobject *ret;
488
489 if (IS_ERR(obj))
490 return ERR_PTR(-EINVAL);
491
492 /*
493 * The hw_destroy_rwsem is held across the entire object creation and
494 * released during rdma_alloc_commit_uobject or
495 * rdma_alloc_abort_uobject
496 */
497 if (!down_read_trylock(&ufile->hw_destroy_rwsem))
498 return ERR_PTR(-EIO);
499
500 ret = obj->type_class->alloc_begin(obj, ufile);
501 if (IS_ERR(ret)) {
502 up_read(&ufile->hw_destroy_rwsem);
503 return ret;
504 }
505 if (attrs)
506 attrs->context = ret->context;
507 return ret;
508 }
509
alloc_abort_idr_uobject(struct ib_uobject * uobj)510 static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
511 {
512 ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
513 RDMACG_RESOURCE_HCA_OBJECT);
514
515 xa_erase(&uobj->ufile->idr, uobj->id);
516 }
517
destroy_hw_idr_uobject(struct ib_uobject * uobj,enum rdma_remove_reason why,struct uverbs_attr_bundle * attrs)518 static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj,
519 enum rdma_remove_reason why,
520 struct uverbs_attr_bundle *attrs)
521 {
522 const struct uverbs_obj_idr_type *idr_type =
523 container_of(uobj->uapi_object->type_attrs,
524 struct uverbs_obj_idr_type, type);
525 int ret = idr_type->destroy_object(uobj, why, attrs);
526
527 /*
528 * We can only fail gracefully if the user requested to destroy the
529 * object or when a retry may be called upon an error.
530 * In the rest of the cases, just remove whatever you can.
531 */
532 if (ib_is_destroy_retryable(ret, why, uobj))
533 return ret;
534
535 if (why == RDMA_REMOVE_ABORT)
536 return 0;
537
538 ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
539 RDMACG_RESOURCE_HCA_OBJECT);
540
541 return 0;
542 }
543
remove_handle_idr_uobject(struct ib_uobject * uobj)544 static void remove_handle_idr_uobject(struct ib_uobject *uobj)
545 {
546 xa_erase(&uobj->ufile->idr, uobj->id);
547 /* Matches the kref in alloc_commit_idr_uobject */
548 uverbs_uobject_put(uobj);
549 }
550
alloc_abort_fd_uobject(struct ib_uobject * uobj)551 static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
552 {
553 put_unused_fd(uobj->id);
554 }
555
destroy_hw_fd_uobject(struct ib_uobject * uobj,enum rdma_remove_reason why,struct uverbs_attr_bundle * attrs)556 static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj,
557 enum rdma_remove_reason why,
558 struct uverbs_attr_bundle *attrs)
559 {
560 const struct uverbs_obj_fd_type *fd_type = container_of(
561 uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
562 int ret = fd_type->context_closed(uobj, why);
563
564 if (ib_is_destroy_retryable(ret, why, uobj))
565 return ret;
566
567 return 0;
568 }
569
remove_handle_fd_uobject(struct ib_uobject * uobj)570 static void remove_handle_fd_uobject(struct ib_uobject *uobj)
571 {
572 }
573
alloc_commit_idr_uobject(struct ib_uobject * uobj)574 static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
575 {
576 struct ib_uverbs_file *ufile = uobj->ufile;
577 void *old;
578
579 /*
580 * We already allocated this IDR with a NULL object, so
581 * this shouldn't fail.
582 *
583 * NOTE: Storing the uobj transfers our kref on uobj to the XArray.
584 * It will be put by remove_commit_idr_uobject()
585 */
586 old = xa_store(&ufile->idr, uobj->id, uobj, GFP_KERNEL);
587 WARN_ON(old != NULL);
588
589 return 0;
590 }
591
alloc_commit_fd_uobject(struct ib_uobject * uobj)592 static int alloc_commit_fd_uobject(struct ib_uobject *uobj)
593 {
594 const struct uverbs_obj_fd_type *fd_type = container_of(
595 uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
596 int fd = uobj->id;
597 struct file *filp;
598
599 /*
600 * The kref for uobj is moved into filp->private data and put in
601 * uverbs_close_fd(). Once alloc_commit() succeeds uverbs_close_fd()
602 * must be guaranteed to be called from the provided fops release
603 * callback.
604 */
605 filp = anon_inode_getfile(fd_type->name,
606 fd_type->fops,
607 uobj,
608 fd_type->flags);
609 if (IS_ERR(filp))
610 return PTR_ERR(filp);
611
612 uobj->object = filp;
613
614 /* Matching put will be done in uverbs_close_fd() */
615 kref_get(&uobj->ufile->ref);
616
617 /* This shouldn't be used anymore. Use the file object instead */
618 uobj->id = 0;
619
620 /*
621 * NOTE: Once we install the file we loose ownership of our kref on
622 * uobj. It will be put by uverbs_close_fd()
623 */
624 fd_install(fd, filp);
625
626 return 0;
627 }
628
629 /*
630 * In all cases rdma_alloc_commit_uobject() consumes the kref to uobj and the
631 * caller can no longer assume uobj is valid. If this function fails it
632 * destroys the uboject, including the attached HW object.
633 */
rdma_alloc_commit_uobject(struct ib_uobject * uobj,struct uverbs_attr_bundle * attrs)634 int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj,
635 struct uverbs_attr_bundle *attrs)
636 {
637 struct ib_uverbs_file *ufile = attrs->ufile;
638 int ret;
639
640 /* alloc_commit consumes the uobj kref */
641 ret = uobj->uapi_object->type_class->alloc_commit(uobj);
642 if (ret) {
643 uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs);
644 up_read(&ufile->hw_destroy_rwsem);
645 return ret;
646 }
647
648 /* kref is held so long as the uobj is on the uobj list. */
649 uverbs_uobject_get(uobj);
650 spin_lock_irq(&ufile->uobjects_lock);
651 list_add(&uobj->list, &ufile->uobjects);
652 spin_unlock_irq(&ufile->uobjects_lock);
653
654 /* matches atomic_set(-1) in alloc_uobj */
655 atomic_set(&uobj->usecnt, 0);
656
657 /* Matches the down_read in rdma_alloc_begin_uobject */
658 up_read(&ufile->hw_destroy_rwsem);
659
660 return 0;
661 }
662
663 /*
664 * This consumes the kref for uobj. It is up to the caller to unwind the HW
665 * object and anything else connected to uobj before calling this.
666 */
rdma_alloc_abort_uobject(struct ib_uobject * uobj,struct uverbs_attr_bundle * attrs)667 void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
668 struct uverbs_attr_bundle *attrs)
669 {
670 struct ib_uverbs_file *ufile = uobj->ufile;
671
672 uobj->object = NULL;
673 uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs);
674
675 /* Matches the down_read in rdma_alloc_begin_uobject */
676 up_read(&ufile->hw_destroy_rwsem);
677 }
678
lookup_put_idr_uobject(struct ib_uobject * uobj,enum rdma_lookup_mode mode)679 static void lookup_put_idr_uobject(struct ib_uobject *uobj,
680 enum rdma_lookup_mode mode)
681 {
682 }
683
lookup_put_fd_uobject(struct ib_uobject * uobj,enum rdma_lookup_mode mode)684 static void lookup_put_fd_uobject(struct ib_uobject *uobj,
685 enum rdma_lookup_mode mode)
686 {
687 struct file *filp = uobj->object;
688
689 WARN_ON(mode != UVERBS_LOOKUP_READ);
690 /* This indirectly calls uverbs_close_fd and free the object */
691 fput(filp);
692 }
693
rdma_lookup_put_uobject(struct ib_uobject * uobj,enum rdma_lookup_mode mode)694 void rdma_lookup_put_uobject(struct ib_uobject *uobj,
695 enum rdma_lookup_mode mode)
696 {
697 assert_uverbs_usecnt(uobj, mode);
698 /*
699 * In order to unlock an object, either decrease its usecnt for
700 * read access or zero it in case of exclusive access. See
701 * uverbs_try_lock_object for locking schema information.
702 */
703 switch (mode) {
704 case UVERBS_LOOKUP_READ:
705 atomic_dec(&uobj->usecnt);
706 break;
707 case UVERBS_LOOKUP_WRITE:
708 atomic_set(&uobj->usecnt, 0);
709 break;
710 case UVERBS_LOOKUP_DESTROY:
711 break;
712 }
713
714 uobj->uapi_object->type_class->lookup_put(uobj, mode);
715 /* Pairs with the kref obtained by type->lookup_get */
716 uverbs_uobject_put(uobj);
717 }
718
setup_ufile_idr_uobject(struct ib_uverbs_file * ufile)719 void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile)
720 {
721 xa_init_flags(&ufile->idr, XA_FLAGS_ALLOC);
722 }
723
release_ufile_idr_uobject(struct ib_uverbs_file * ufile)724 void release_ufile_idr_uobject(struct ib_uverbs_file *ufile)
725 {
726 struct ib_uobject *entry;
727 unsigned long id;
728
729 /*
730 * At this point uverbs_cleanup_ufile() is guaranteed to have run, and
731 * there are no HW objects left, however the xarray is still populated
732 * with anything that has not been cleaned up by userspace. Since the
733 * kref on ufile is 0, nothing is allowed to call lookup_get.
734 *
735 * This is an optimized equivalent to remove_handle_idr_uobject
736 */
737 xa_for_each(&ufile->idr, id, entry) {
738 WARN_ON(entry->object);
739 uverbs_uobject_put(entry);
740 }
741
742 xa_destroy(&ufile->idr);
743 }
744
745 const struct uverbs_obj_type_class uverbs_idr_class = {
746 .alloc_begin = alloc_begin_idr_uobject,
747 .lookup_get = lookup_get_idr_uobject,
748 .alloc_commit = alloc_commit_idr_uobject,
749 .alloc_abort = alloc_abort_idr_uobject,
750 .lookup_put = lookup_put_idr_uobject,
751 .destroy_hw = destroy_hw_idr_uobject,
752 .remove_handle = remove_handle_idr_uobject,
753 /*
754 * When we destroy an object, we first just lock it for WRITE and
755 * actually DESTROY it in the finalize stage. So, the problematic
756 * scenario is when we just started the finalize stage of the
757 * destruction (nothing was executed yet). Now, the other thread
758 * fetched the object for READ access, but it didn't lock it yet.
759 * The DESTROY thread continues and starts destroying the object.
760 * When the other thread continue - without the RCU, it would
761 * access freed memory. However, the rcu_read_lock delays the free
762 * until the rcu_read_lock of the READ operation quits. Since the
763 * exclusive lock of the object is still taken by the DESTROY flow, the
764 * READ operation will get -EBUSY and it'll just bail out.
765 */
766 .needs_kfree_rcu = true,
767 };
768 EXPORT_SYMBOL(uverbs_idr_class);
769
uverbs_close_fd(struct file * f)770 void uverbs_close_fd(struct file *f)
771 {
772 struct ib_uobject *uobj = f->private_data;
773 struct ib_uverbs_file *ufile = uobj->ufile;
774 struct uverbs_attr_bundle attrs = {
775 .context = uobj->context,
776 .ufile = ufile,
777 };
778
779 if (down_read_trylock(&ufile->hw_destroy_rwsem)) {
780 /*
781 * lookup_get_fd_uobject holds the kref on the struct file any
782 * time a FD uobj is locked, which prevents this release
783 * method from being invoked. Meaning we can always get the
784 * write lock here, or we have a kernel bug.
785 */
786 WARN_ON(uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE));
787 uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE, &attrs);
788 up_read(&ufile->hw_destroy_rwsem);
789 }
790
791 /* Matches the get in alloc_begin_fd_uobject */
792 kref_put(&ufile->ref, ib_uverbs_release_file);
793
794 /* Pairs with filp->private_data in alloc_begin_fd_uobject */
795 uverbs_uobject_put(uobj);
796 }
797 EXPORT_SYMBOL(uverbs_close_fd);
798
799 /*
800 * Drop the ucontext off the ufile and completely disconnect it from the
801 * ib_device
802 */
ufile_destroy_ucontext(struct ib_uverbs_file * ufile,enum rdma_remove_reason reason)803 static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
804 enum rdma_remove_reason reason)
805 {
806 struct ib_ucontext *ucontext = ufile->ucontext;
807 struct ib_device *ib_dev = ucontext->device;
808
809 /*
810 * If we are closing the FD then the user mmap VMAs must have
811 * already been destroyed as they hold on to the filep, otherwise
812 * they need to be zap'd.
813 */
814 if (reason == RDMA_REMOVE_DRIVER_REMOVE) {
815 uverbs_user_mmap_disassociate(ufile);
816 if (ib_dev->ops.disassociate_ucontext)
817 ib_dev->ops.disassociate_ucontext(ucontext);
818 }
819
820 ib_rdmacg_uncharge(&ucontext->cg_obj, ib_dev,
821 RDMACG_RESOURCE_HCA_HANDLE);
822
823 rdma_restrack_del(&ucontext->res);
824
825 ib_dev->ops.dealloc_ucontext(ucontext);
826 kfree(ucontext);
827
828 ufile->ucontext = NULL;
829 }
830
__uverbs_cleanup_ufile(struct ib_uverbs_file * ufile,enum rdma_remove_reason reason)831 static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
832 enum rdma_remove_reason reason)
833 {
834 struct ib_uobject *obj, *next_obj;
835 int ret = -EINVAL;
836 struct uverbs_attr_bundle attrs = { .ufile = ufile };
837
838 /*
839 * This shouldn't run while executing other commands on this
840 * context. Thus, the only thing we should take care of is
841 * releasing a FD while traversing this list. The FD could be
842 * closed and released from the _release fop of this FD.
843 * In order to mitigate this, we add a lock.
844 * We take and release the lock per traversal in order to let
845 * other threads (which might still use the FDs) chance to run.
846 */
847 list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) {
848 attrs.context = obj->context;
849 /*
850 * if we hit this WARN_ON, that means we are
851 * racing with a lookup_get.
852 */
853 WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE));
854 if (!uverbs_destroy_uobject(obj, reason, &attrs))
855 ret = 0;
856 else
857 atomic_set(&obj->usecnt, 0);
858 }
859 return ret;
860 }
861
862 /*
863 * Destroy the uncontext and every uobject associated with it. If called with
864 * reason != RDMA_REMOVE_CLOSE this will not return until the destruction has
865 * been completed and ufile->ucontext is NULL.
866 *
867 * This is internally locked and can be called in parallel from multiple
868 * contexts.
869 */
uverbs_destroy_ufile_hw(struct ib_uverbs_file * ufile,enum rdma_remove_reason reason)870 void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
871 enum rdma_remove_reason reason)
872 {
873 if (reason == RDMA_REMOVE_CLOSE) {
874 /*
875 * During destruction we might trigger something that
876 * synchronously calls release on any file descriptor. For
877 * this reason all paths that come from file_operations
878 * release must use try_lock. They can progress knowing that
879 * there is an ongoing uverbs_destroy_ufile_hw that will clean
880 * up the driver resources.
881 */
882 if (!mutex_trylock(&ufile->ucontext_lock))
883 return;
884
885 } else {
886 mutex_lock(&ufile->ucontext_lock);
887 }
888
889 down_write(&ufile->hw_destroy_rwsem);
890
891 /*
892 * If a ucontext was never created then we can't have any uobjects to
893 * cleanup, nothing to do.
894 */
895 if (!ufile->ucontext)
896 goto done;
897
898 ufile->ucontext->closing = true;
899 ufile->ucontext->cleanup_retryable = true;
900 while (!list_empty(&ufile->uobjects))
901 if (__uverbs_cleanup_ufile(ufile, reason)) {
902 /*
903 * No entry was cleaned-up successfully during this
904 * iteration
905 */
906 break;
907 }
908
909 ufile->ucontext->cleanup_retryable = false;
910 if (!list_empty(&ufile->uobjects))
911 __uverbs_cleanup_ufile(ufile, reason);
912
913 ufile_destroy_ucontext(ufile, reason);
914
915 done:
916 up_write(&ufile->hw_destroy_rwsem);
917 mutex_unlock(&ufile->ucontext_lock);
918 }
919
920 const struct uverbs_obj_type_class uverbs_fd_class = {
921 .alloc_begin = alloc_begin_fd_uobject,
922 .lookup_get = lookup_get_fd_uobject,
923 .alloc_commit = alloc_commit_fd_uobject,
924 .alloc_abort = alloc_abort_fd_uobject,
925 .lookup_put = lookup_put_fd_uobject,
926 .destroy_hw = destroy_hw_fd_uobject,
927 .remove_handle = remove_handle_fd_uobject,
928 .needs_kfree_rcu = false,
929 };
930 EXPORT_SYMBOL(uverbs_fd_class);
931
932 struct ib_uobject *
uverbs_get_uobject_from_file(u16 object_id,enum uverbs_obj_access access,s64 id,struct uverbs_attr_bundle * attrs)933 uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access,
934 s64 id, struct uverbs_attr_bundle *attrs)
935 {
936 const struct uverbs_api_object *obj =
937 uapi_get_object(attrs->ufile->device->uapi, object_id);
938
939 switch (access) {
940 case UVERBS_ACCESS_READ:
941 return rdma_lookup_get_uobject(obj, attrs->ufile, id,
942 UVERBS_LOOKUP_READ, attrs);
943 case UVERBS_ACCESS_DESTROY:
944 /* Actual destruction is done inside uverbs_handle_method */
945 return rdma_lookup_get_uobject(obj, attrs->ufile, id,
946 UVERBS_LOOKUP_DESTROY, attrs);
947 case UVERBS_ACCESS_WRITE:
948 return rdma_lookup_get_uobject(obj, attrs->ufile, id,
949 UVERBS_LOOKUP_WRITE, attrs);
950 case UVERBS_ACCESS_NEW:
951 return rdma_alloc_begin_uobject(obj, attrs->ufile, attrs);
952 default:
953 WARN_ON(true);
954 return ERR_PTR(-EOPNOTSUPP);
955 }
956 }
957
uverbs_finalize_object(struct ib_uobject * uobj,enum uverbs_obj_access access,bool commit,struct uverbs_attr_bundle * attrs)958 int uverbs_finalize_object(struct ib_uobject *uobj,
959 enum uverbs_obj_access access, bool commit,
960 struct uverbs_attr_bundle *attrs)
961 {
962 int ret = 0;
963
964 /*
965 * refcounts should be handled at the object level and not at the
966 * uobject level. Refcounts of the objects themselves are done in
967 * handlers.
968 */
969
970 switch (access) {
971 case UVERBS_ACCESS_READ:
972 rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ);
973 break;
974 case UVERBS_ACCESS_WRITE:
975 rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
976 break;
977 case UVERBS_ACCESS_DESTROY:
978 if (uobj)
979 rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
980 break;
981 case UVERBS_ACCESS_NEW:
982 if (commit)
983 ret = rdma_alloc_commit_uobject(uobj, attrs);
984 else
985 rdma_alloc_abort_uobject(uobj, attrs);
986 break;
987 default:
988 WARN_ON(true);
989 ret = -EOPNOTSUPP;
990 }
991
992 return ret;
993 }
994