1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 */
6
7 #include "rxe.h"
8 #include "rxe_loc.h"
9
10 /* Return a random 8 bit key value that is
11 * different than the last_key. Set last_key to -1
12 * if this is the first key for an MR or MW
13 */
rxe_get_next_key(u32 last_key)14 u8 rxe_get_next_key(u32 last_key)
15 {
16 u8 key;
17
18 do {
19 get_random_bytes(&key, 1);
20 } while (key == last_key);
21
22 return key;
23 }
24
mr_check_range(struct rxe_mr * mr,u64 iova,size_t length)25 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
26 {
27 switch (mr->type) {
28 case RXE_MR_TYPE_DMA:
29 return 0;
30
31 case RXE_MR_TYPE_MR:
32 if (iova < mr->iova || length > mr->length ||
33 iova > mr->iova + mr->length - length)
34 return -EFAULT;
35 return 0;
36
37 default:
38 return -EFAULT;
39 }
40 }
41
42 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \
43 | IB_ACCESS_REMOTE_WRITE \
44 | IB_ACCESS_REMOTE_ATOMIC)
45
rxe_mr_init(int access,struct rxe_mr * mr)46 static void rxe_mr_init(int access, struct rxe_mr *mr)
47 {
48 u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1);
49 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
50
51 /* set ibmr->l/rkey and also copy into private l/rkey
52 * for user MRs these will always be the same
53 * for cases where caller 'owns' the key portion
54 * they may be different until REG_MR WQE is executed.
55 */
56 mr->lkey = mr->ibmr.lkey = lkey;
57 mr->rkey = mr->ibmr.rkey = rkey;
58
59 mr->state = RXE_MR_STATE_INVALID;
60 mr->type = RXE_MR_TYPE_NONE;
61 mr->map_shift = ilog2(RXE_BUF_PER_MAP);
62 }
63
rxe_mr_alloc(struct rxe_mr * mr,int num_buf)64 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
65 {
66 int i;
67 int num_map;
68 struct rxe_map **map = mr->map;
69
70 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
71
72 mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
73 if (!mr->map)
74 goto err1;
75
76 for (i = 0; i < num_map; i++) {
77 mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
78 if (!mr->map[i])
79 goto err2;
80 }
81
82 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
83
84 mr->map_shift = ilog2(RXE_BUF_PER_MAP);
85 mr->map_mask = RXE_BUF_PER_MAP - 1;
86
87 mr->num_buf = num_buf;
88 mr->num_map = num_map;
89 mr->max_buf = num_map * RXE_BUF_PER_MAP;
90
91 return 0;
92
93 err2:
94 for (i--; i >= 0; i--)
95 kfree(mr->map[i]);
96
97 kfree(mr->map);
98 err1:
99 return -ENOMEM;
100 }
101
rxe_mr_init_dma(struct rxe_pd * pd,int access,struct rxe_mr * mr)102 void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
103 {
104 rxe_mr_init(access, mr);
105
106 mr->ibmr.pd = &pd->ibpd;
107 mr->access = access;
108 mr->state = RXE_MR_STATE_VALID;
109 mr->type = RXE_MR_TYPE_DMA;
110 }
111
rxe_mr_init_user(struct rxe_pd * pd,u64 start,u64 length,u64 iova,int access,struct rxe_mr * mr)112 int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
113 int access, struct rxe_mr *mr)
114 {
115 struct rxe_map **map;
116 struct rxe_phys_buf *buf = NULL;
117 struct ib_umem *umem;
118 struct sg_page_iter sg_iter;
119 int num_buf;
120 void *vaddr;
121 int err;
122 int i;
123
124 umem = ib_umem_get(pd->ibpd.device, start, length, access);
125 if (IS_ERR(umem)) {
126 pr_warn("%s: Unable to pin memory region err = %d\n",
127 __func__, (int)PTR_ERR(umem));
128 err = PTR_ERR(umem);
129 goto err_out;
130 }
131
132 num_buf = ib_umem_num_pages(umem);
133
134 rxe_mr_init(access, mr);
135
136 err = rxe_mr_alloc(mr, num_buf);
137 if (err) {
138 pr_warn("%s: Unable to allocate memory for map\n",
139 __func__);
140 goto err_release_umem;
141 }
142
143 mr->page_shift = PAGE_SHIFT;
144 mr->page_mask = PAGE_SIZE - 1;
145
146 num_buf = 0;
147 map = mr->map;
148 if (length > 0) {
149 buf = map[0]->buf;
150
151 for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
152 if (num_buf >= RXE_BUF_PER_MAP) {
153 map++;
154 buf = map[0]->buf;
155 num_buf = 0;
156 }
157
158 vaddr = page_address(sg_page_iter_page(&sg_iter));
159 if (!vaddr) {
160 pr_warn("%s: Unable to get virtual address\n",
161 __func__);
162 err = -ENOMEM;
163 goto err_cleanup_map;
164 }
165
166 buf->addr = (uintptr_t)vaddr;
167 buf->size = PAGE_SIZE;
168 num_buf++;
169 buf++;
170
171 }
172 }
173
174 mr->ibmr.pd = &pd->ibpd;
175 mr->umem = umem;
176 mr->access = access;
177 mr->length = length;
178 mr->iova = iova;
179 mr->va = start;
180 mr->offset = ib_umem_offset(umem);
181 mr->state = RXE_MR_STATE_VALID;
182 mr->type = RXE_MR_TYPE_MR;
183
184 return 0;
185
186 err_cleanup_map:
187 for (i = 0; i < mr->num_map; i++)
188 kfree(mr->map[i]);
189 kfree(mr->map);
190 err_release_umem:
191 ib_umem_release(umem);
192 err_out:
193 return err;
194 }
195
rxe_mr_init_fast(struct rxe_pd * pd,int max_pages,struct rxe_mr * mr)196 int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
197 {
198 int err;
199
200 /* always allow remote access for FMRs */
201 rxe_mr_init(IB_ACCESS_REMOTE, mr);
202
203 err = rxe_mr_alloc(mr, max_pages);
204 if (err)
205 goto err1;
206
207 mr->ibmr.pd = &pd->ibpd;
208 mr->max_buf = max_pages;
209 mr->state = RXE_MR_STATE_FREE;
210 mr->type = RXE_MR_TYPE_MR;
211
212 return 0;
213
214 err1:
215 return err;
216 }
217
lookup_iova(struct rxe_mr * mr,u64 iova,int * m_out,int * n_out,size_t * offset_out)218 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
219 size_t *offset_out)
220 {
221 size_t offset = iova - mr->iova + mr->offset;
222 int map_index;
223 int buf_index;
224 u64 length;
225
226 if (likely(mr->page_shift)) {
227 *offset_out = offset & mr->page_mask;
228 offset >>= mr->page_shift;
229 *n_out = offset & mr->map_mask;
230 *m_out = offset >> mr->map_shift;
231 } else {
232 map_index = 0;
233 buf_index = 0;
234
235 length = mr->map[map_index]->buf[buf_index].size;
236
237 while (offset >= length) {
238 offset -= length;
239 buf_index++;
240
241 if (buf_index == RXE_BUF_PER_MAP) {
242 map_index++;
243 buf_index = 0;
244 }
245 length = mr->map[map_index]->buf[buf_index].size;
246 }
247
248 *m_out = map_index;
249 *n_out = buf_index;
250 *offset_out = offset;
251 }
252 }
253
iova_to_vaddr(struct rxe_mr * mr,u64 iova,int length)254 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
255 {
256 size_t offset;
257 int m, n;
258 void *addr;
259
260 if (mr->state != RXE_MR_STATE_VALID) {
261 pr_warn("mr not in valid state\n");
262 addr = NULL;
263 goto out;
264 }
265
266 if (!mr->map) {
267 addr = (void *)(uintptr_t)iova;
268 goto out;
269 }
270
271 if (mr_check_range(mr, iova, length)) {
272 pr_warn("range violation\n");
273 addr = NULL;
274 goto out;
275 }
276
277 lookup_iova(mr, iova, &m, &n, &offset);
278
279 if (offset + length > mr->map[m]->buf[n].size) {
280 pr_warn("crosses page boundary\n");
281 addr = NULL;
282 goto out;
283 }
284
285 addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
286
287 out:
288 return addr;
289 }
290
291 /* copy data from a range (vaddr, vaddr+length-1) to or from
292 * a mr object starting at iova.
293 */
rxe_mr_copy(struct rxe_mr * mr,u64 iova,void * addr,int length,enum rxe_mr_copy_dir dir)294 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
295 enum rxe_mr_copy_dir dir)
296 {
297 int err;
298 int bytes;
299 u8 *va;
300 struct rxe_map **map;
301 struct rxe_phys_buf *buf;
302 int m;
303 int i;
304 size_t offset;
305
306 if (length == 0)
307 return 0;
308
309 if (mr->type == RXE_MR_TYPE_DMA) {
310 u8 *src, *dest;
311
312 src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
313
314 dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
315
316 memcpy(dest, src, length);
317
318 return 0;
319 }
320
321 WARN_ON_ONCE(!mr->map);
322
323 err = mr_check_range(mr, iova, length);
324 if (err) {
325 err = -EFAULT;
326 goto err1;
327 }
328
329 lookup_iova(mr, iova, &m, &i, &offset);
330
331 map = mr->map + m;
332 buf = map[0]->buf + i;
333
334 while (length > 0) {
335 u8 *src, *dest;
336
337 va = (u8 *)(uintptr_t)buf->addr + offset;
338 src = (dir == RXE_TO_MR_OBJ) ? addr : va;
339 dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
340
341 bytes = buf->size - offset;
342
343 if (bytes > length)
344 bytes = length;
345
346 memcpy(dest, src, bytes);
347
348 length -= bytes;
349 addr += bytes;
350
351 offset = 0;
352 buf++;
353 i++;
354
355 if (i == RXE_BUF_PER_MAP) {
356 i = 0;
357 map++;
358 buf = map[0]->buf;
359 }
360 }
361
362 return 0;
363
364 err1:
365 return err;
366 }
367
368 /* copy data in or out of a wqe, i.e. sg list
369 * under the control of a dma descriptor
370 */
copy_data(struct rxe_pd * pd,int access,struct rxe_dma_info * dma,void * addr,int length,enum rxe_mr_copy_dir dir)371 int copy_data(
372 struct rxe_pd *pd,
373 int access,
374 struct rxe_dma_info *dma,
375 void *addr,
376 int length,
377 enum rxe_mr_copy_dir dir)
378 {
379 int bytes;
380 struct rxe_sge *sge = &dma->sge[dma->cur_sge];
381 int offset = dma->sge_offset;
382 int resid = dma->resid;
383 struct rxe_mr *mr = NULL;
384 u64 iova;
385 int err;
386
387 if (length == 0)
388 return 0;
389
390 if (length > resid) {
391 err = -EINVAL;
392 goto err2;
393 }
394
395 if (sge->length && (offset < sge->length)) {
396 mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
397 if (!mr) {
398 err = -EINVAL;
399 goto err1;
400 }
401 }
402
403 while (length > 0) {
404 bytes = length;
405
406 if (offset >= sge->length) {
407 if (mr) {
408 rxe_drop_ref(mr);
409 mr = NULL;
410 }
411 sge++;
412 dma->cur_sge++;
413 offset = 0;
414
415 if (dma->cur_sge >= dma->num_sge) {
416 err = -ENOSPC;
417 goto err2;
418 }
419
420 if (sge->length) {
421 mr = lookup_mr(pd, access, sge->lkey,
422 RXE_LOOKUP_LOCAL);
423 if (!mr) {
424 err = -EINVAL;
425 goto err1;
426 }
427 } else {
428 continue;
429 }
430 }
431
432 if (bytes > sge->length - offset)
433 bytes = sge->length - offset;
434
435 if (bytes > 0) {
436 iova = sge->addr + offset;
437
438 err = rxe_mr_copy(mr, iova, addr, bytes, dir);
439 if (err)
440 goto err2;
441
442 offset += bytes;
443 resid -= bytes;
444 length -= bytes;
445 addr += bytes;
446 }
447 }
448
449 dma->sge_offset = offset;
450 dma->resid = resid;
451
452 if (mr)
453 rxe_drop_ref(mr);
454
455 return 0;
456
457 err2:
458 if (mr)
459 rxe_drop_ref(mr);
460 err1:
461 return err;
462 }
463
advance_dma_data(struct rxe_dma_info * dma,unsigned int length)464 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
465 {
466 struct rxe_sge *sge = &dma->sge[dma->cur_sge];
467 int offset = dma->sge_offset;
468 int resid = dma->resid;
469
470 while (length) {
471 unsigned int bytes;
472
473 if (offset >= sge->length) {
474 sge++;
475 dma->cur_sge++;
476 offset = 0;
477 if (dma->cur_sge >= dma->num_sge)
478 return -ENOSPC;
479 }
480
481 bytes = length;
482
483 if (bytes > sge->length - offset)
484 bytes = sge->length - offset;
485
486 offset += bytes;
487 resid -= bytes;
488 length -= bytes;
489 }
490
491 dma->sge_offset = offset;
492 dma->resid = resid;
493
494 return 0;
495 }
496
497 /* (1) find the mr corresponding to lkey/rkey
498 * depending on lookup_type
499 * (2) verify that the (qp) pd matches the mr pd
500 * (3) verify that the mr can support the requested access
501 * (4) verify that mr state is valid
502 */
lookup_mr(struct rxe_pd * pd,int access,u32 key,enum rxe_mr_lookup_type type)503 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
504 enum rxe_mr_lookup_type type)
505 {
506 struct rxe_mr *mr;
507 struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
508 int index = key >> 8;
509
510 mr = rxe_pool_get_index(&rxe->mr_pool, index);
511 if (!mr)
512 return NULL;
513
514 if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) ||
515 (type == RXE_LOOKUP_REMOTE && mr->rkey != key) ||
516 mr_pd(mr) != pd || (access && !(access & mr->access)) ||
517 mr->state != RXE_MR_STATE_VALID)) {
518 rxe_drop_ref(mr);
519 mr = NULL;
520 }
521
522 return mr;
523 }
524
rxe_invalidate_mr(struct rxe_qp * qp,u32 key)525 int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
526 {
527 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
528 struct rxe_mr *mr;
529 int ret;
530
531 mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
532 if (!mr) {
533 pr_err("%s: No MR for key %#x\n", __func__, key);
534 ret = -EINVAL;
535 goto err;
536 }
537
538 if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) {
539 pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n",
540 __func__, key, (mr->rkey ? mr->rkey : mr->lkey));
541 ret = -EINVAL;
542 goto err_drop_ref;
543 }
544
545 if (atomic_read(&mr->num_mw) > 0) {
546 pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
547 __func__);
548 ret = -EINVAL;
549 goto err_drop_ref;
550 }
551
552 mr->state = RXE_MR_STATE_FREE;
553 ret = 0;
554
555 err_drop_ref:
556 rxe_drop_ref(mr);
557 err:
558 return ret;
559 }
560
561 /* user can (re)register fast MR by executing a REG_MR WQE.
562 * user is expected to hold a reference on the ib mr until the
563 * WQE completes.
564 * Once a fast MR is created this is the only way to change the
565 * private keys. It is the responsibility of the user to maintain
566 * the ib mr keys in sync with rxe mr keys.
567 */
rxe_reg_fast_mr(struct rxe_qp * qp,struct rxe_send_wqe * wqe)568 int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
569 {
570 struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr);
571 u32 key = wqe->wr.wr.reg.key;
572 u32 access = wqe->wr.wr.reg.access;
573
574 /* user can only register MR in free state */
575 if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
576 pr_warn("%s: mr->lkey = 0x%x not free\n",
577 __func__, mr->lkey);
578 return -EINVAL;
579 }
580
581 /* user can only register mr with qp in same protection domain */
582 if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) {
583 pr_warn("%s: qp->pd and mr->pd don't match\n",
584 __func__);
585 return -EINVAL;
586 }
587
588 /* user is only allowed to change key portion of l/rkey */
589 if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) {
590 pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n",
591 __func__, key, mr->lkey);
592 return -EINVAL;
593 }
594
595 mr->access = access;
596 mr->lkey = key;
597 mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0;
598 mr->iova = wqe->wr.wr.reg.mr->iova;
599 mr->state = RXE_MR_STATE_VALID;
600
601 return 0;
602 }
603
rxe_dereg_mr(struct ib_mr * ibmr,struct ib_udata * udata)604 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
605 {
606 struct rxe_mr *mr = to_rmr(ibmr);
607
608 if (atomic_read(&mr->num_mw) > 0) {
609 pr_warn("%s: Attempt to deregister an MR while bound to MWs\n",
610 __func__);
611 return -EINVAL;
612 }
613
614 mr->state = RXE_MR_STATE_ZOMBIE;
615 rxe_drop_ref(mr_pd(mr));
616 rxe_drop_index(mr);
617 rxe_drop_ref(mr);
618
619 return 0;
620 }
621
rxe_mr_cleanup(struct rxe_pool_entry * arg)622 void rxe_mr_cleanup(struct rxe_pool_entry *arg)
623 {
624 struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
625 int i;
626
627 ib_umem_release(mr->umem);
628
629 if (mr->map) {
630 for (i = 0; i < mr->num_map; i++)
631 kfree(mr->map[i]);
632
633 kfree(mr->map);
634 }
635 }
636