1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 */
6
7 #include "rxe.h"
8 #include "rxe_loc.h"
9
10 /*
11 * lfsr (linear feedback shift register) with period 255
12 */
rxe_get_key(void)13 static u8 rxe_get_key(void)
14 {
15 static u32 key = 1;
16
17 key = key << 1;
18
19 key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10))
20 ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40));
21
22 key &= 0xff;
23
24 return key;
25 }
26
mem_check_range(struct rxe_mem * mem,u64 iova,size_t length)27 int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length)
28 {
29 switch (mem->type) {
30 case RXE_MEM_TYPE_DMA:
31 return 0;
32
33 case RXE_MEM_TYPE_MR:
34 case RXE_MEM_TYPE_FMR:
35 if (iova < mem->iova ||
36 length > mem->length ||
37 iova > mem->iova + mem->length - length)
38 return -EFAULT;
39 return 0;
40
41 default:
42 return -EFAULT;
43 }
44 }
45
46 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \
47 | IB_ACCESS_REMOTE_WRITE \
48 | IB_ACCESS_REMOTE_ATOMIC)
49
rxe_mem_init(int access,struct rxe_mem * mem)50 static void rxe_mem_init(int access, struct rxe_mem *mem)
51 {
52 u32 lkey = mem->pelem.index << 8 | rxe_get_key();
53 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
54
55 mem->ibmr.lkey = lkey;
56 mem->ibmr.rkey = rkey;
57 mem->state = RXE_MEM_STATE_INVALID;
58 mem->type = RXE_MEM_TYPE_NONE;
59 mem->map_shift = ilog2(RXE_BUF_PER_MAP);
60 }
61
rxe_mem_cleanup(struct rxe_pool_entry * arg)62 void rxe_mem_cleanup(struct rxe_pool_entry *arg)
63 {
64 struct rxe_mem *mem = container_of(arg, typeof(*mem), pelem);
65 int i;
66
67 ib_umem_release(mem->umem);
68
69 if (mem->map) {
70 for (i = 0; i < mem->num_map; i++)
71 kfree(mem->map[i]);
72
73 kfree(mem->map);
74 }
75 }
76
rxe_mem_alloc(struct rxe_mem * mem,int num_buf)77 static int rxe_mem_alloc(struct rxe_mem *mem, int num_buf)
78 {
79 int i;
80 int num_map;
81 struct rxe_map **map = mem->map;
82
83 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
84
85 mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
86 if (!mem->map)
87 goto err1;
88
89 for (i = 0; i < num_map; i++) {
90 mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
91 if (!mem->map[i])
92 goto err2;
93 }
94
95 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
96
97 mem->map_shift = ilog2(RXE_BUF_PER_MAP);
98 mem->map_mask = RXE_BUF_PER_MAP - 1;
99
100 mem->num_buf = num_buf;
101 mem->num_map = num_map;
102 mem->max_buf = num_map * RXE_BUF_PER_MAP;
103
104 return 0;
105
106 err2:
107 for (i--; i >= 0; i--)
108 kfree(mem->map[i]);
109
110 kfree(mem->map);
111 err1:
112 return -ENOMEM;
113 }
114
rxe_mem_init_dma(struct rxe_pd * pd,int access,struct rxe_mem * mem)115 void rxe_mem_init_dma(struct rxe_pd *pd,
116 int access, struct rxe_mem *mem)
117 {
118 rxe_mem_init(access, mem);
119
120 mem->ibmr.pd = &pd->ibpd;
121 mem->access = access;
122 mem->state = RXE_MEM_STATE_VALID;
123 mem->type = RXE_MEM_TYPE_DMA;
124 }
125
rxe_mem_init_user(struct rxe_pd * pd,u64 start,u64 length,u64 iova,int access,struct ib_udata * udata,struct rxe_mem * mem)126 int rxe_mem_init_user(struct rxe_pd *pd, u64 start,
127 u64 length, u64 iova, int access, struct ib_udata *udata,
128 struct rxe_mem *mem)
129 {
130 struct rxe_map **map;
131 struct rxe_phys_buf *buf = NULL;
132 struct ib_umem *umem;
133 struct sg_page_iter sg_iter;
134 int num_buf;
135 void *vaddr;
136 int err;
137
138 umem = ib_umem_get(pd->ibpd.device, start, length, access);
139 if (IS_ERR(umem)) {
140 pr_warn("err %d from rxe_umem_get\n",
141 (int)PTR_ERR(umem));
142 err = PTR_ERR(umem);
143 goto err1;
144 }
145
146 mem->umem = umem;
147 num_buf = ib_umem_num_pages(umem);
148
149 rxe_mem_init(access, mem);
150
151 err = rxe_mem_alloc(mem, num_buf);
152 if (err) {
153 pr_warn("err %d from rxe_mem_alloc\n", err);
154 ib_umem_release(umem);
155 goto err1;
156 }
157
158 mem->page_shift = PAGE_SHIFT;
159 mem->page_mask = PAGE_SIZE - 1;
160
161 num_buf = 0;
162 map = mem->map;
163 if (length > 0) {
164 buf = map[0]->buf;
165
166 for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
167 if (num_buf >= RXE_BUF_PER_MAP) {
168 map++;
169 buf = map[0]->buf;
170 num_buf = 0;
171 }
172
173 vaddr = page_address(sg_page_iter_page(&sg_iter));
174 if (!vaddr) {
175 pr_warn("null vaddr\n");
176 ib_umem_release(umem);
177 err = -ENOMEM;
178 goto err1;
179 }
180
181 buf->addr = (uintptr_t)vaddr;
182 buf->size = PAGE_SIZE;
183 num_buf++;
184 buf++;
185
186 }
187 }
188
189 mem->ibmr.pd = &pd->ibpd;
190 mem->umem = umem;
191 mem->access = access;
192 mem->length = length;
193 mem->iova = iova;
194 mem->va = start;
195 mem->offset = ib_umem_offset(umem);
196 mem->state = RXE_MEM_STATE_VALID;
197 mem->type = RXE_MEM_TYPE_MR;
198
199 return 0;
200
201 err1:
202 return err;
203 }
204
rxe_mem_init_fast(struct rxe_pd * pd,int max_pages,struct rxe_mem * mem)205 int rxe_mem_init_fast(struct rxe_pd *pd,
206 int max_pages, struct rxe_mem *mem)
207 {
208 int err;
209
210 rxe_mem_init(0, mem);
211
212 /* In fastreg, we also set the rkey */
213 mem->ibmr.rkey = mem->ibmr.lkey;
214
215 err = rxe_mem_alloc(mem, max_pages);
216 if (err)
217 goto err1;
218
219 mem->ibmr.pd = &pd->ibpd;
220 mem->max_buf = max_pages;
221 mem->state = RXE_MEM_STATE_FREE;
222 mem->type = RXE_MEM_TYPE_MR;
223
224 return 0;
225
226 err1:
227 return err;
228 }
229
lookup_iova(struct rxe_mem * mem,u64 iova,int * m_out,int * n_out,size_t * offset_out)230 static void lookup_iova(
231 struct rxe_mem *mem,
232 u64 iova,
233 int *m_out,
234 int *n_out,
235 size_t *offset_out)
236 {
237 size_t offset = iova - mem->iova + mem->offset;
238 int map_index;
239 int buf_index;
240 u64 length;
241
242 if (likely(mem->page_shift)) {
243 *offset_out = offset & mem->page_mask;
244 offset >>= mem->page_shift;
245 *n_out = offset & mem->map_mask;
246 *m_out = offset >> mem->map_shift;
247 } else {
248 map_index = 0;
249 buf_index = 0;
250
251 length = mem->map[map_index]->buf[buf_index].size;
252
253 while (offset >= length) {
254 offset -= length;
255 buf_index++;
256
257 if (buf_index == RXE_BUF_PER_MAP) {
258 map_index++;
259 buf_index = 0;
260 }
261 length = mem->map[map_index]->buf[buf_index].size;
262 }
263
264 *m_out = map_index;
265 *n_out = buf_index;
266 *offset_out = offset;
267 }
268 }
269
iova_to_vaddr(struct rxe_mem * mem,u64 iova,int length)270 void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length)
271 {
272 size_t offset;
273 int m, n;
274 void *addr;
275
276 if (mem->state != RXE_MEM_STATE_VALID) {
277 pr_warn("mem not in valid state\n");
278 addr = NULL;
279 goto out;
280 }
281
282 if (!mem->map) {
283 addr = (void *)(uintptr_t)iova;
284 goto out;
285 }
286
287 if (mem_check_range(mem, iova, length)) {
288 pr_warn("range violation\n");
289 addr = NULL;
290 goto out;
291 }
292
293 lookup_iova(mem, iova, &m, &n, &offset);
294
295 if (offset + length > mem->map[m]->buf[n].size) {
296 pr_warn("crosses page boundary\n");
297 addr = NULL;
298 goto out;
299 }
300
301 addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset;
302
303 out:
304 return addr;
305 }
306
307 /* copy data from a range (vaddr, vaddr+length-1) to or from
308 * a mem object starting at iova. Compute incremental value of
309 * crc32 if crcp is not zero. caller must hold a reference to mem
310 */
rxe_mem_copy(struct rxe_mem * mem,u64 iova,void * addr,int length,enum copy_direction dir,u32 * crcp)311 int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
312 enum copy_direction dir, u32 *crcp)
313 {
314 int err;
315 int bytes;
316 u8 *va;
317 struct rxe_map **map;
318 struct rxe_phys_buf *buf;
319 int m;
320 int i;
321 size_t offset;
322 u32 crc = crcp ? (*crcp) : 0;
323
324 if (length == 0)
325 return 0;
326
327 if (mem->type == RXE_MEM_TYPE_DMA) {
328 u8 *src, *dest;
329
330 src = (dir == to_mem_obj) ?
331 addr : ((void *)(uintptr_t)iova);
332
333 dest = (dir == to_mem_obj) ?
334 ((void *)(uintptr_t)iova) : addr;
335
336 memcpy(dest, src, length);
337
338 if (crcp)
339 *crcp = rxe_crc32(to_rdev(mem->ibmr.device),
340 *crcp, dest, length);
341
342 return 0;
343 }
344
345 WARN_ON_ONCE(!mem->map);
346
347 err = mem_check_range(mem, iova, length);
348 if (err) {
349 err = -EFAULT;
350 goto err1;
351 }
352
353 lookup_iova(mem, iova, &m, &i, &offset);
354
355 map = mem->map + m;
356 buf = map[0]->buf + i;
357
358 while (length > 0) {
359 u8 *src, *dest;
360
361 va = (u8 *)(uintptr_t)buf->addr + offset;
362 src = (dir == to_mem_obj) ? addr : va;
363 dest = (dir == to_mem_obj) ? va : addr;
364
365 bytes = buf->size - offset;
366
367 if (bytes > length)
368 bytes = length;
369
370 memcpy(dest, src, bytes);
371
372 if (crcp)
373 crc = rxe_crc32(to_rdev(mem->ibmr.device),
374 crc, dest, bytes);
375
376 length -= bytes;
377 addr += bytes;
378
379 offset = 0;
380 buf++;
381 i++;
382
383 if (i == RXE_BUF_PER_MAP) {
384 i = 0;
385 map++;
386 buf = map[0]->buf;
387 }
388 }
389
390 if (crcp)
391 *crcp = crc;
392
393 return 0;
394
395 err1:
396 return err;
397 }
398
399 /* copy data in or out of a wqe, i.e. sg list
400 * under the control of a dma descriptor
401 */
copy_data(struct rxe_pd * pd,int access,struct rxe_dma_info * dma,void * addr,int length,enum copy_direction dir,u32 * crcp)402 int copy_data(
403 struct rxe_pd *pd,
404 int access,
405 struct rxe_dma_info *dma,
406 void *addr,
407 int length,
408 enum copy_direction dir,
409 u32 *crcp)
410 {
411 int bytes;
412 struct rxe_sge *sge = &dma->sge[dma->cur_sge];
413 int offset = dma->sge_offset;
414 int resid = dma->resid;
415 struct rxe_mem *mem = NULL;
416 u64 iova;
417 int err;
418
419 if (length == 0)
420 return 0;
421
422 if (length > resid) {
423 err = -EINVAL;
424 goto err2;
425 }
426
427 if (sge->length && (offset < sge->length)) {
428 mem = lookup_mem(pd, access, sge->lkey, lookup_local);
429 if (!mem) {
430 err = -EINVAL;
431 goto err1;
432 }
433 }
434
435 while (length > 0) {
436 bytes = length;
437
438 if (offset >= sge->length) {
439 if (mem) {
440 rxe_drop_ref(mem);
441 mem = NULL;
442 }
443 sge++;
444 dma->cur_sge++;
445 offset = 0;
446
447 if (dma->cur_sge >= dma->num_sge) {
448 err = -ENOSPC;
449 goto err2;
450 }
451
452 if (sge->length) {
453 mem = lookup_mem(pd, access, sge->lkey,
454 lookup_local);
455 if (!mem) {
456 err = -EINVAL;
457 goto err1;
458 }
459 } else {
460 continue;
461 }
462 }
463
464 if (bytes > sge->length - offset)
465 bytes = sge->length - offset;
466
467 if (bytes > 0) {
468 iova = sge->addr + offset;
469
470 err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp);
471 if (err)
472 goto err2;
473
474 offset += bytes;
475 resid -= bytes;
476 length -= bytes;
477 addr += bytes;
478 }
479 }
480
481 dma->sge_offset = offset;
482 dma->resid = resid;
483
484 if (mem)
485 rxe_drop_ref(mem);
486
487 return 0;
488
489 err2:
490 if (mem)
491 rxe_drop_ref(mem);
492 err1:
493 return err;
494 }
495
advance_dma_data(struct rxe_dma_info * dma,unsigned int length)496 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
497 {
498 struct rxe_sge *sge = &dma->sge[dma->cur_sge];
499 int offset = dma->sge_offset;
500 int resid = dma->resid;
501
502 while (length) {
503 unsigned int bytes;
504
505 if (offset >= sge->length) {
506 sge++;
507 dma->cur_sge++;
508 offset = 0;
509 if (dma->cur_sge >= dma->num_sge)
510 return -ENOSPC;
511 }
512
513 bytes = length;
514
515 if (bytes > sge->length - offset)
516 bytes = sge->length - offset;
517
518 offset += bytes;
519 resid -= bytes;
520 length -= bytes;
521 }
522
523 dma->sge_offset = offset;
524 dma->resid = resid;
525
526 return 0;
527 }
528
529 /* (1) find the mem (mr or mw) corresponding to lkey/rkey
530 * depending on lookup_type
531 * (2) verify that the (qp) pd matches the mem pd
532 * (3) verify that the mem can support the requested access
533 * (4) verify that mem state is valid
534 */
lookup_mem(struct rxe_pd * pd,int access,u32 key,enum lookup_type type)535 struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key,
536 enum lookup_type type)
537 {
538 struct rxe_mem *mem;
539 struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
540 int index = key >> 8;
541
542 mem = rxe_pool_get_index(&rxe->mr_pool, index);
543 if (!mem)
544 return NULL;
545
546 if (unlikely((type == lookup_local && mr_lkey(mem) != key) ||
547 (type == lookup_remote && mr_rkey(mem) != key) ||
548 mr_pd(mem) != pd ||
549 (access && !(access & mem->access)) ||
550 mem->state != RXE_MEM_STATE_VALID)) {
551 rxe_drop_ref(mem);
552 mem = NULL;
553 }
554
555 return mem;
556 }
557