• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5  */
6 
7 #include "rxe.h"
8 #include "rxe_loc.h"
9 
10 /* Return a random 8 bit key value that is
11  * different than the last_key. Set last_key to -1
12  * if this is the first key for an MR or MW
13  */
rxe_get_next_key(u32 last_key)14 u8 rxe_get_next_key(u32 last_key)
15 {
16 	u8 key;
17 
18 	do {
19 		get_random_bytes(&key, 1);
20 	} while (key == last_key);
21 
22 	return key;
23 }
24 
mr_check_range(struct rxe_mr * mr,u64 iova,size_t length)25 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
26 {
27 	switch (mr->type) {
28 	case RXE_MR_TYPE_DMA:
29 		return 0;
30 
31 	case RXE_MR_TYPE_MR:
32 		if (iova < mr->iova || length > mr->length ||
33 		    iova > mr->iova + mr->length - length)
34 			return -EFAULT;
35 		return 0;
36 
37 	default:
38 		return -EFAULT;
39 	}
40 }
41 
42 #define IB_ACCESS_REMOTE	(IB_ACCESS_REMOTE_READ		\
43 				| IB_ACCESS_REMOTE_WRITE	\
44 				| IB_ACCESS_REMOTE_ATOMIC)
45 
rxe_mr_init(int access,struct rxe_mr * mr)46 static void rxe_mr_init(int access, struct rxe_mr *mr)
47 {
48 	u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1);
49 	u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
50 
51 	/* set ibmr->l/rkey and also copy into private l/rkey
52 	 * for user MRs these will always be the same
53 	 * for cases where caller 'owns' the key portion
54 	 * they may be different until REG_MR WQE is executed.
55 	 */
56 	mr->lkey = mr->ibmr.lkey = lkey;
57 	mr->rkey = mr->ibmr.rkey = rkey;
58 
59 	mr->state = RXE_MR_STATE_INVALID;
60 	mr->type = RXE_MR_TYPE_NONE;
61 	mr->map_shift = ilog2(RXE_BUF_PER_MAP);
62 }
63 
rxe_mr_alloc(struct rxe_mr * mr,int num_buf)64 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
65 {
66 	int i;
67 	int num_map;
68 	struct rxe_map **map = mr->map;
69 
70 	num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
71 
72 	mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
73 	if (!mr->map)
74 		goto err1;
75 
76 	for (i = 0; i < num_map; i++) {
77 		mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
78 		if (!mr->map[i])
79 			goto err2;
80 	}
81 
82 	BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
83 
84 	mr->map_shift = ilog2(RXE_BUF_PER_MAP);
85 	mr->map_mask = RXE_BUF_PER_MAP - 1;
86 
87 	mr->num_buf = num_buf;
88 	mr->num_map = num_map;
89 	mr->max_buf = num_map * RXE_BUF_PER_MAP;
90 
91 	return 0;
92 
93 err2:
94 	for (i--; i >= 0; i--)
95 		kfree(mr->map[i]);
96 
97 	kfree(mr->map);
98 err1:
99 	return -ENOMEM;
100 }
101 
rxe_mr_init_dma(struct rxe_pd * pd,int access,struct rxe_mr * mr)102 void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
103 {
104 	rxe_mr_init(access, mr);
105 
106 	mr->ibmr.pd = &pd->ibpd;
107 	mr->access = access;
108 	mr->state = RXE_MR_STATE_VALID;
109 	mr->type = RXE_MR_TYPE_DMA;
110 }
111 
rxe_mr_init_user(struct rxe_pd * pd,u64 start,u64 length,u64 iova,int access,struct rxe_mr * mr)112 int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
113 		     int access, struct rxe_mr *mr)
114 {
115 	struct rxe_map		**map;
116 	struct rxe_phys_buf	*buf = NULL;
117 	struct ib_umem		*umem;
118 	struct sg_page_iter	sg_iter;
119 	int			num_buf;
120 	void			*vaddr;
121 	int err;
122 	int i;
123 
124 	umem = ib_umem_get(pd->ibpd.device, start, length, access);
125 	if (IS_ERR(umem)) {
126 		pr_warn("%s: Unable to pin memory region err = %d\n",
127 			__func__, (int)PTR_ERR(umem));
128 		err = PTR_ERR(umem);
129 		goto err_out;
130 	}
131 
132 	num_buf = ib_umem_num_pages(umem);
133 
134 	rxe_mr_init(access, mr);
135 
136 	err = rxe_mr_alloc(mr, num_buf);
137 	if (err) {
138 		pr_warn("%s: Unable to allocate memory for map\n",
139 				__func__);
140 		goto err_release_umem;
141 	}
142 
143 	mr->page_shift = PAGE_SHIFT;
144 	mr->page_mask = PAGE_SIZE - 1;
145 
146 	num_buf			= 0;
147 	map = mr->map;
148 	if (length > 0) {
149 		buf = map[0]->buf;
150 
151 		for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
152 			if (num_buf >= RXE_BUF_PER_MAP) {
153 				map++;
154 				buf = map[0]->buf;
155 				num_buf = 0;
156 			}
157 
158 			vaddr = page_address(sg_page_iter_page(&sg_iter));
159 			if (!vaddr) {
160 				pr_warn("%s: Unable to get virtual address\n",
161 						__func__);
162 				err = -ENOMEM;
163 				goto err_cleanup_map;
164 			}
165 
166 			buf->addr = (uintptr_t)vaddr;
167 			buf->size = PAGE_SIZE;
168 			num_buf++;
169 			buf++;
170 
171 		}
172 	}
173 
174 	mr->ibmr.pd = &pd->ibpd;
175 	mr->umem = umem;
176 	mr->access = access;
177 	mr->length = length;
178 	mr->iova = iova;
179 	mr->va = start;
180 	mr->offset = ib_umem_offset(umem);
181 	mr->state = RXE_MR_STATE_VALID;
182 	mr->type = RXE_MR_TYPE_MR;
183 
184 	return 0;
185 
186 err_cleanup_map:
187 	for (i = 0; i < mr->num_map; i++)
188 		kfree(mr->map[i]);
189 	kfree(mr->map);
190 err_release_umem:
191 	ib_umem_release(umem);
192 err_out:
193 	return err;
194 }
195 
rxe_mr_init_fast(struct rxe_pd * pd,int max_pages,struct rxe_mr * mr)196 int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
197 {
198 	int err;
199 
200 	/* always allow remote access for FMRs */
201 	rxe_mr_init(IB_ACCESS_REMOTE, mr);
202 
203 	err = rxe_mr_alloc(mr, max_pages);
204 	if (err)
205 		goto err1;
206 
207 	mr->ibmr.pd = &pd->ibpd;
208 	mr->max_buf = max_pages;
209 	mr->state = RXE_MR_STATE_FREE;
210 	mr->type = RXE_MR_TYPE_MR;
211 
212 	return 0;
213 
214 err1:
215 	return err;
216 }
217 
lookup_iova(struct rxe_mr * mr,u64 iova,int * m_out,int * n_out,size_t * offset_out)218 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
219 			size_t *offset_out)
220 {
221 	size_t offset = iova - mr->iova + mr->offset;
222 	int			map_index;
223 	int			buf_index;
224 	u64			length;
225 
226 	if (likely(mr->page_shift)) {
227 		*offset_out = offset & mr->page_mask;
228 		offset >>= mr->page_shift;
229 		*n_out = offset & mr->map_mask;
230 		*m_out = offset >> mr->map_shift;
231 	} else {
232 		map_index = 0;
233 		buf_index = 0;
234 
235 		length = mr->map[map_index]->buf[buf_index].size;
236 
237 		while (offset >= length) {
238 			offset -= length;
239 			buf_index++;
240 
241 			if (buf_index == RXE_BUF_PER_MAP) {
242 				map_index++;
243 				buf_index = 0;
244 			}
245 			length = mr->map[map_index]->buf[buf_index].size;
246 		}
247 
248 		*m_out = map_index;
249 		*n_out = buf_index;
250 		*offset_out = offset;
251 	}
252 }
253 
iova_to_vaddr(struct rxe_mr * mr,u64 iova,int length)254 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
255 {
256 	size_t offset;
257 	int m, n;
258 	void *addr;
259 
260 	if (mr->state != RXE_MR_STATE_VALID) {
261 		pr_warn("mr not in valid state\n");
262 		addr = NULL;
263 		goto out;
264 	}
265 
266 	if (!mr->map) {
267 		addr = (void *)(uintptr_t)iova;
268 		goto out;
269 	}
270 
271 	if (mr_check_range(mr, iova, length)) {
272 		pr_warn("range violation\n");
273 		addr = NULL;
274 		goto out;
275 	}
276 
277 	lookup_iova(mr, iova, &m, &n, &offset);
278 
279 	if (offset + length > mr->map[m]->buf[n].size) {
280 		pr_warn("crosses page boundary\n");
281 		addr = NULL;
282 		goto out;
283 	}
284 
285 	addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
286 
287 out:
288 	return addr;
289 }
290 
291 /* copy data from a range (vaddr, vaddr+length-1) to or from
292  * a mr object starting at iova.
293  */
rxe_mr_copy(struct rxe_mr * mr,u64 iova,void * addr,int length,enum rxe_mr_copy_dir dir)294 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
295 		enum rxe_mr_copy_dir dir)
296 {
297 	int			err;
298 	int			bytes;
299 	u8			*va;
300 	struct rxe_map		**map;
301 	struct rxe_phys_buf	*buf;
302 	int			m;
303 	int			i;
304 	size_t			offset;
305 
306 	if (length == 0)
307 		return 0;
308 
309 	if (mr->type == RXE_MR_TYPE_DMA) {
310 		u8 *src, *dest;
311 
312 		src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
313 
314 		dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
315 
316 		memcpy(dest, src, length);
317 
318 		return 0;
319 	}
320 
321 	WARN_ON_ONCE(!mr->map);
322 
323 	err = mr_check_range(mr, iova, length);
324 	if (err) {
325 		err = -EFAULT;
326 		goto err1;
327 	}
328 
329 	lookup_iova(mr, iova, &m, &i, &offset);
330 
331 	map = mr->map + m;
332 	buf	= map[0]->buf + i;
333 
334 	while (length > 0) {
335 		u8 *src, *dest;
336 
337 		va	= (u8 *)(uintptr_t)buf->addr + offset;
338 		src = (dir == RXE_TO_MR_OBJ) ? addr : va;
339 		dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
340 
341 		bytes	= buf->size - offset;
342 
343 		if (bytes > length)
344 			bytes = length;
345 
346 		memcpy(dest, src, bytes);
347 
348 		length	-= bytes;
349 		addr	+= bytes;
350 
351 		offset	= 0;
352 		buf++;
353 		i++;
354 
355 		if (i == RXE_BUF_PER_MAP) {
356 			i = 0;
357 			map++;
358 			buf = map[0]->buf;
359 		}
360 	}
361 
362 	return 0;
363 
364 err1:
365 	return err;
366 }
367 
368 /* copy data in or out of a wqe, i.e. sg list
369  * under the control of a dma descriptor
370  */
copy_data(struct rxe_pd * pd,int access,struct rxe_dma_info * dma,void * addr,int length,enum rxe_mr_copy_dir dir)371 int copy_data(
372 	struct rxe_pd		*pd,
373 	int			access,
374 	struct rxe_dma_info	*dma,
375 	void			*addr,
376 	int			length,
377 	enum rxe_mr_copy_dir	dir)
378 {
379 	int			bytes;
380 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
381 	int			offset	= dma->sge_offset;
382 	int			resid	= dma->resid;
383 	struct rxe_mr		*mr	= NULL;
384 	u64			iova;
385 	int			err;
386 
387 	if (length == 0)
388 		return 0;
389 
390 	if (length > resid) {
391 		err = -EINVAL;
392 		goto err2;
393 	}
394 
395 	if (sge->length && (offset < sge->length)) {
396 		mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
397 		if (!mr) {
398 			err = -EINVAL;
399 			goto err1;
400 		}
401 	}
402 
403 	while (length > 0) {
404 		bytes = length;
405 
406 		if (offset >= sge->length) {
407 			if (mr) {
408 				rxe_drop_ref(mr);
409 				mr = NULL;
410 			}
411 			sge++;
412 			dma->cur_sge++;
413 			offset = 0;
414 
415 			if (dma->cur_sge >= dma->num_sge) {
416 				err = -ENOSPC;
417 				goto err2;
418 			}
419 
420 			if (sge->length) {
421 				mr = lookup_mr(pd, access, sge->lkey,
422 					       RXE_LOOKUP_LOCAL);
423 				if (!mr) {
424 					err = -EINVAL;
425 					goto err1;
426 				}
427 			} else {
428 				continue;
429 			}
430 		}
431 
432 		if (bytes > sge->length - offset)
433 			bytes = sge->length - offset;
434 
435 		if (bytes > 0) {
436 			iova = sge->addr + offset;
437 
438 			err = rxe_mr_copy(mr, iova, addr, bytes, dir);
439 			if (err)
440 				goto err2;
441 
442 			offset	+= bytes;
443 			resid	-= bytes;
444 			length	-= bytes;
445 			addr	+= bytes;
446 		}
447 	}
448 
449 	dma->sge_offset = offset;
450 	dma->resid	= resid;
451 
452 	if (mr)
453 		rxe_drop_ref(mr);
454 
455 	return 0;
456 
457 err2:
458 	if (mr)
459 		rxe_drop_ref(mr);
460 err1:
461 	return err;
462 }
463 
advance_dma_data(struct rxe_dma_info * dma,unsigned int length)464 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
465 {
466 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
467 	int			offset	= dma->sge_offset;
468 	int			resid	= dma->resid;
469 
470 	while (length) {
471 		unsigned int bytes;
472 
473 		if (offset >= sge->length) {
474 			sge++;
475 			dma->cur_sge++;
476 			offset = 0;
477 			if (dma->cur_sge >= dma->num_sge)
478 				return -ENOSPC;
479 		}
480 
481 		bytes = length;
482 
483 		if (bytes > sge->length - offset)
484 			bytes = sge->length - offset;
485 
486 		offset	+= bytes;
487 		resid	-= bytes;
488 		length	-= bytes;
489 	}
490 
491 	dma->sge_offset = offset;
492 	dma->resid	= resid;
493 
494 	return 0;
495 }
496 
497 /* (1) find the mr corresponding to lkey/rkey
498  *     depending on lookup_type
499  * (2) verify that the (qp) pd matches the mr pd
500  * (3) verify that the mr can support the requested access
501  * (4) verify that mr state is valid
502  */
lookup_mr(struct rxe_pd * pd,int access,u32 key,enum rxe_mr_lookup_type type)503 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
504 			 enum rxe_mr_lookup_type type)
505 {
506 	struct rxe_mr *mr;
507 	struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
508 	int index = key >> 8;
509 
510 	mr = rxe_pool_get_index(&rxe->mr_pool, index);
511 	if (!mr)
512 		return NULL;
513 
514 	if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) ||
515 		     (type == RXE_LOOKUP_REMOTE && mr->rkey != key) ||
516 		     mr_pd(mr) != pd || (access && !(access & mr->access)) ||
517 		     mr->state != RXE_MR_STATE_VALID)) {
518 		rxe_drop_ref(mr);
519 		mr = NULL;
520 	}
521 
522 	return mr;
523 }
524 
rxe_invalidate_mr(struct rxe_qp * qp,u32 key)525 int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
526 {
527 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
528 	struct rxe_mr *mr;
529 	int ret;
530 
531 	mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
532 	if (!mr) {
533 		pr_err("%s: No MR for key %#x\n", __func__, key);
534 		ret = -EINVAL;
535 		goto err;
536 	}
537 
538 	if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) {
539 		pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n",
540 			__func__, key, (mr->rkey ? mr->rkey : mr->lkey));
541 		ret = -EINVAL;
542 		goto err_drop_ref;
543 	}
544 
545 	if (atomic_read(&mr->num_mw) > 0) {
546 		pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
547 			__func__);
548 		ret = -EINVAL;
549 		goto err_drop_ref;
550 	}
551 
552 	mr->state = RXE_MR_STATE_FREE;
553 	ret = 0;
554 
555 err_drop_ref:
556 	rxe_drop_ref(mr);
557 err:
558 	return ret;
559 }
560 
561 /* user can (re)register fast MR by executing a REG_MR WQE.
562  * user is expected to hold a reference on the ib mr until the
563  * WQE completes.
564  * Once a fast MR is created this is the only way to change the
565  * private keys. It is the responsibility of the user to maintain
566  * the ib mr keys in sync with rxe mr keys.
567  */
rxe_reg_fast_mr(struct rxe_qp * qp,struct rxe_send_wqe * wqe)568 int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
569 {
570 	struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr);
571 	u32 key = wqe->wr.wr.reg.key;
572 	u32 access = wqe->wr.wr.reg.access;
573 
574 	/* user can only register MR in free state */
575 	if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
576 		pr_warn("%s: mr->lkey = 0x%x not free\n",
577 			__func__, mr->lkey);
578 		return -EINVAL;
579 	}
580 
581 	/* user can only register mr with qp in same protection domain */
582 	if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) {
583 		pr_warn("%s: qp->pd and mr->pd don't match\n",
584 			__func__);
585 		return -EINVAL;
586 	}
587 
588 	/* user is only allowed to change key portion of l/rkey */
589 	if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) {
590 		pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n",
591 			__func__, key, mr->lkey);
592 		return -EINVAL;
593 	}
594 
595 	mr->access = access;
596 	mr->lkey = key;
597 	mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0;
598 	mr->iova = wqe->wr.wr.reg.mr->iova;
599 	mr->state = RXE_MR_STATE_VALID;
600 
601 	return 0;
602 }
603 
rxe_dereg_mr(struct ib_mr * ibmr,struct ib_udata * udata)604 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
605 {
606 	struct rxe_mr *mr = to_rmr(ibmr);
607 
608 	if (atomic_read(&mr->num_mw) > 0) {
609 		pr_warn("%s: Attempt to deregister an MR while bound to MWs\n",
610 			__func__);
611 		return -EINVAL;
612 	}
613 
614 	mr->state = RXE_MR_STATE_ZOMBIE;
615 	rxe_drop_ref(mr_pd(mr));
616 	rxe_drop_index(mr);
617 	rxe_drop_ref(mr);
618 
619 	return 0;
620 }
621 
rxe_mr_cleanup(struct rxe_pool_entry * arg)622 void rxe_mr_cleanup(struct rxe_pool_entry *arg)
623 {
624 	struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
625 	int i;
626 
627 	ib_umem_release(mr->umem);
628 
629 	if (mr->map) {
630 		for (i = 0; i < mr->num_map; i++)
631 			kfree(mr->map[i]);
632 
633 		kfree(mr->map);
634 	}
635 }
636