1 /*
2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33
34 #include <linux/kref.h>
35 #include <linux/random.h>
36 #include <linux/debugfs.h>
37 #include <linux/export.h>
38 #include <linux/delay.h>
39 #include <rdma/ib_umem.h>
40 #include <rdma/ib_umem_odp.h>
41 #include <rdma/ib_verbs.h>
42 #include "mlx5_ib.h"
43
44 enum {
45 MAX_PENDING_REG_MR = 8,
46 };
47
48 #define MLX5_UMR_ALIGN 2048
49
50 static void
51 create_mkey_callback(int status, struct mlx5_async_work *context);
52
set_mkc_access_pd_addr_fields(void * mkc,int acc,u64 start_addr,struct ib_pd * pd)53 static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
54 struct ib_pd *pd)
55 {
56 struct mlx5_ib_dev *dev = to_mdev(pd->device);
57
58 MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
59 MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
60 MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
61 MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
62 MLX5_SET(mkc, mkc, lr, 1);
63
64 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
65 MLX5_SET(mkc, mkc, relaxed_ordering_write,
66 !!(acc & IB_ACCESS_RELAXED_ORDERING));
67 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
68 MLX5_SET(mkc, mkc, relaxed_ordering_read,
69 !!(acc & IB_ACCESS_RELAXED_ORDERING));
70
71 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
72 MLX5_SET(mkc, mkc, qpn, 0xffffff);
73 MLX5_SET64(mkc, mkc, start_addr, start_addr);
74 }
75
76 static void
assign_mkey_variant(struct mlx5_ib_dev * dev,struct mlx5_core_mkey * mkey,u32 * in)77 assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
78 u32 *in)
79 {
80 u8 key = atomic_inc_return(&dev->mkey_var);
81 void *mkc;
82
83 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
84 MLX5_SET(mkc, mkc, mkey_7_0, key);
85 mkey->key = key;
86 }
87
88 static int
mlx5_ib_create_mkey(struct mlx5_ib_dev * dev,struct mlx5_core_mkey * mkey,u32 * in,int inlen)89 mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
90 u32 *in, int inlen)
91 {
92 assign_mkey_variant(dev, mkey, in);
93 return mlx5_core_create_mkey(dev->mdev, mkey, in, inlen);
94 }
95
96 static int
mlx5_ib_create_mkey_cb(struct mlx5_ib_dev * dev,struct mlx5_core_mkey * mkey,struct mlx5_async_ctx * async_ctx,u32 * in,int inlen,u32 * out,int outlen,struct mlx5_async_work * context)97 mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev,
98 struct mlx5_core_mkey *mkey,
99 struct mlx5_async_ctx *async_ctx,
100 u32 *in, int inlen, u32 *out, int outlen,
101 struct mlx5_async_work *context)
102 {
103 MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
104 assign_mkey_variant(dev, mkey, in);
105 return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen,
106 create_mkey_callback, context);
107 }
108
109 static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
110 static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
111 static int mr_cache_max_order(struct mlx5_ib_dev *dev);
112 static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent);
113
umr_can_use_indirect_mkey(struct mlx5_ib_dev * dev)114 static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
115 {
116 return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
117 }
118
destroy_mkey(struct mlx5_ib_dev * dev,struct mlx5_ib_mr * mr)119 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
120 {
121 WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)));
122
123 return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
124 }
125
mlx5_ib_pas_fits_in_mr(struct mlx5_ib_mr * mr,u64 start,u64 length)126 static inline bool mlx5_ib_pas_fits_in_mr(struct mlx5_ib_mr *mr, u64 start,
127 u64 length)
128 {
129 return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
130 length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
131 }
132
create_mkey_callback(int status,struct mlx5_async_work * context)133 static void create_mkey_callback(int status, struct mlx5_async_work *context)
134 {
135 struct mlx5_ib_mr *mr =
136 container_of(context, struct mlx5_ib_mr, cb_work);
137 struct mlx5_ib_dev *dev = mr->dev;
138 struct mlx5_cache_ent *ent = mr->cache_ent;
139 unsigned long flags;
140
141 if (status) {
142 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
143 kfree(mr);
144 spin_lock_irqsave(&ent->lock, flags);
145 ent->pending--;
146 WRITE_ONCE(dev->fill_delay, 1);
147 spin_unlock_irqrestore(&ent->lock, flags);
148 mod_timer(&dev->delay_timer, jiffies + HZ);
149 return;
150 }
151
152 mr->mmkey.type = MLX5_MKEY_MR;
153 mr->mmkey.key |= mlx5_idx_to_mkey(
154 MLX5_GET(create_mkey_out, mr->out, mkey_index));
155
156 WRITE_ONCE(dev->cache.last_add, jiffies);
157
158 spin_lock_irqsave(&ent->lock, flags);
159 list_add_tail(&mr->list, &ent->head);
160 ent->available_mrs++;
161 ent->total_mrs++;
162 /* If we are doing fill_to_high_water then keep going. */
163 queue_adjust_cache_locked(ent);
164 ent->pending--;
165 spin_unlock_irqrestore(&ent->lock, flags);
166 }
167
alloc_cache_mr(struct mlx5_cache_ent * ent,void * mkc)168 static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
169 {
170 struct mlx5_ib_mr *mr;
171
172 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
173 if (!mr)
174 return NULL;
175 mr->order = ent->order;
176 mr->cache_ent = ent;
177 mr->dev = ent->dev;
178
179 set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
180 MLX5_SET(mkc, mkc, free, 1);
181 MLX5_SET(mkc, mkc, umr_en, 1);
182 MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
183 MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7);
184
185 MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
186 MLX5_SET(mkc, mkc, log_page_size, ent->page);
187 return mr;
188 }
189
190 /* Asynchronously schedule new MRs to be populated in the cache. */
add_keys(struct mlx5_cache_ent * ent,unsigned int num)191 static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
192 {
193 size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
194 struct mlx5_ib_mr *mr;
195 void *mkc;
196 u32 *in;
197 int err = 0;
198 int i;
199
200 in = kzalloc(inlen, GFP_KERNEL);
201 if (!in)
202 return -ENOMEM;
203
204 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
205 for (i = 0; i < num; i++) {
206 mr = alloc_cache_mr(ent, mkc);
207 if (!mr) {
208 err = -ENOMEM;
209 break;
210 }
211 spin_lock_irq(&ent->lock);
212 if (ent->pending >= MAX_PENDING_REG_MR) {
213 err = -EAGAIN;
214 spin_unlock_irq(&ent->lock);
215 kfree(mr);
216 break;
217 }
218 ent->pending++;
219 spin_unlock_irq(&ent->lock);
220 err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey,
221 &ent->dev->async_ctx, in, inlen,
222 mr->out, sizeof(mr->out),
223 &mr->cb_work);
224 if (err) {
225 spin_lock_irq(&ent->lock);
226 ent->pending--;
227 spin_unlock_irq(&ent->lock);
228 mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err);
229 kfree(mr);
230 break;
231 }
232 }
233
234 kfree(in);
235 return err;
236 }
237
238 /* Synchronously create a MR in the cache */
create_cache_mr(struct mlx5_cache_ent * ent)239 static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent)
240 {
241 size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
242 struct mlx5_ib_mr *mr;
243 void *mkc;
244 u32 *in;
245 int err;
246
247 in = kzalloc(inlen, GFP_KERNEL);
248 if (!in)
249 return ERR_PTR(-ENOMEM);
250 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
251
252 mr = alloc_cache_mr(ent, mkc);
253 if (!mr) {
254 err = -ENOMEM;
255 goto free_in;
256 }
257
258 err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey, in, inlen);
259 if (err)
260 goto free_mr;
261
262 mr->mmkey.type = MLX5_MKEY_MR;
263 WRITE_ONCE(ent->dev->cache.last_add, jiffies);
264 spin_lock_irq(&ent->lock);
265 ent->total_mrs++;
266 spin_unlock_irq(&ent->lock);
267 kfree(in);
268 return mr;
269 free_mr:
270 kfree(mr);
271 free_in:
272 kfree(in);
273 return ERR_PTR(err);
274 }
275
remove_cache_mr_locked(struct mlx5_cache_ent * ent)276 static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
277 {
278 struct mlx5_ib_mr *mr;
279
280 lockdep_assert_held(&ent->lock);
281 if (list_empty(&ent->head))
282 return;
283 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
284 list_del(&mr->list);
285 ent->available_mrs--;
286 ent->total_mrs--;
287 spin_unlock_irq(&ent->lock);
288 mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey);
289 kfree(mr);
290 spin_lock_irq(&ent->lock);
291 }
292
resize_available_mrs(struct mlx5_cache_ent * ent,unsigned int target,bool limit_fill)293 static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
294 bool limit_fill)
295 {
296 int err;
297
298 lockdep_assert_held(&ent->lock);
299
300 while (true) {
301 if (limit_fill)
302 target = ent->limit * 2;
303 if (target == ent->available_mrs + ent->pending)
304 return 0;
305 if (target > ent->available_mrs + ent->pending) {
306 u32 todo = target - (ent->available_mrs + ent->pending);
307
308 spin_unlock_irq(&ent->lock);
309 err = add_keys(ent, todo);
310 if (err == -EAGAIN)
311 usleep_range(3000, 5000);
312 spin_lock_irq(&ent->lock);
313 if (err) {
314 if (err != -EAGAIN)
315 return err;
316 } else
317 return 0;
318 } else {
319 remove_cache_mr_locked(ent);
320 }
321 }
322 }
323
size_write(struct file * filp,const char __user * buf,size_t count,loff_t * pos)324 static ssize_t size_write(struct file *filp, const char __user *buf,
325 size_t count, loff_t *pos)
326 {
327 struct mlx5_cache_ent *ent = filp->private_data;
328 u32 target;
329 int err;
330
331 err = kstrtou32_from_user(buf, count, 0, &target);
332 if (err)
333 return err;
334
335 /*
336 * Target is the new value of total_mrs the user requests, however we
337 * cannot free MRs that are in use. Compute the target value for
338 * available_mrs.
339 */
340 spin_lock_irq(&ent->lock);
341 if (target < ent->total_mrs - ent->available_mrs) {
342 err = -EINVAL;
343 goto err_unlock;
344 }
345 target = target - (ent->total_mrs - ent->available_mrs);
346 if (target < ent->limit || target > ent->limit*2) {
347 err = -EINVAL;
348 goto err_unlock;
349 }
350 err = resize_available_mrs(ent, target, false);
351 if (err)
352 goto err_unlock;
353 spin_unlock_irq(&ent->lock);
354
355 return count;
356
357 err_unlock:
358 spin_unlock_irq(&ent->lock);
359 return err;
360 }
361
size_read(struct file * filp,char __user * buf,size_t count,loff_t * pos)362 static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
363 loff_t *pos)
364 {
365 struct mlx5_cache_ent *ent = filp->private_data;
366 char lbuf[20];
367 int err;
368
369 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->total_mrs);
370 if (err < 0)
371 return err;
372
373 return simple_read_from_buffer(buf, count, pos, lbuf, err);
374 }
375
376 static const struct file_operations size_fops = {
377 .owner = THIS_MODULE,
378 .open = simple_open,
379 .write = size_write,
380 .read = size_read,
381 };
382
limit_write(struct file * filp,const char __user * buf,size_t count,loff_t * pos)383 static ssize_t limit_write(struct file *filp, const char __user *buf,
384 size_t count, loff_t *pos)
385 {
386 struct mlx5_cache_ent *ent = filp->private_data;
387 u32 var;
388 int err;
389
390 err = kstrtou32_from_user(buf, count, 0, &var);
391 if (err)
392 return err;
393
394 /*
395 * Upon set we immediately fill the cache to high water mark implied by
396 * the limit.
397 */
398 spin_lock_irq(&ent->lock);
399 ent->limit = var;
400 err = resize_available_mrs(ent, 0, true);
401 spin_unlock_irq(&ent->lock);
402 if (err)
403 return err;
404 return count;
405 }
406
limit_read(struct file * filp,char __user * buf,size_t count,loff_t * pos)407 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
408 loff_t *pos)
409 {
410 struct mlx5_cache_ent *ent = filp->private_data;
411 char lbuf[20];
412 int err;
413
414 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
415 if (err < 0)
416 return err;
417
418 return simple_read_from_buffer(buf, count, pos, lbuf, err);
419 }
420
421 static const struct file_operations limit_fops = {
422 .owner = THIS_MODULE,
423 .open = simple_open,
424 .write = limit_write,
425 .read = limit_read,
426 };
427
someone_adding(struct mlx5_mr_cache * cache)428 static bool someone_adding(struct mlx5_mr_cache *cache)
429 {
430 unsigned int i;
431
432 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
433 struct mlx5_cache_ent *ent = &cache->ent[i];
434 bool ret;
435
436 spin_lock_irq(&ent->lock);
437 ret = ent->available_mrs < ent->limit;
438 spin_unlock_irq(&ent->lock);
439 if (ret)
440 return true;
441 }
442 return false;
443 }
444
445 /*
446 * Check if the bucket is outside the high/low water mark and schedule an async
447 * update. The cache refill has hysteresis, once the low water mark is hit it is
448 * refilled up to the high mark.
449 */
queue_adjust_cache_locked(struct mlx5_cache_ent * ent)450 static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
451 {
452 lockdep_assert_held(&ent->lock);
453
454 if (ent->disabled || READ_ONCE(ent->dev->fill_delay))
455 return;
456 if (ent->available_mrs < ent->limit) {
457 ent->fill_to_high_water = true;
458 queue_work(ent->dev->cache.wq, &ent->work);
459 } else if (ent->fill_to_high_water &&
460 ent->available_mrs + ent->pending < 2 * ent->limit) {
461 /*
462 * Once we start populating due to hitting a low water mark
463 * continue until we pass the high water mark.
464 */
465 queue_work(ent->dev->cache.wq, &ent->work);
466 } else if (ent->available_mrs == 2 * ent->limit) {
467 ent->fill_to_high_water = false;
468 } else if (ent->available_mrs > 2 * ent->limit) {
469 /* Queue deletion of excess entries */
470 ent->fill_to_high_water = false;
471 if (ent->pending)
472 queue_delayed_work(ent->dev->cache.wq, &ent->dwork,
473 msecs_to_jiffies(1000));
474 else
475 queue_work(ent->dev->cache.wq, &ent->work);
476 }
477 }
478
__cache_work_func(struct mlx5_cache_ent * ent)479 static void __cache_work_func(struct mlx5_cache_ent *ent)
480 {
481 struct mlx5_ib_dev *dev = ent->dev;
482 struct mlx5_mr_cache *cache = &dev->cache;
483 int err;
484
485 spin_lock_irq(&ent->lock);
486 if (ent->disabled)
487 goto out;
488
489 if (ent->fill_to_high_water &&
490 ent->available_mrs + ent->pending < 2 * ent->limit &&
491 !READ_ONCE(dev->fill_delay)) {
492 spin_unlock_irq(&ent->lock);
493 err = add_keys(ent, 1);
494 spin_lock_irq(&ent->lock);
495 if (ent->disabled)
496 goto out;
497 if (err) {
498 /*
499 * EAGAIN only happens if pending is positive, so we
500 * will be rescheduled from reg_mr_callback(). The only
501 * failure path here is ENOMEM.
502 */
503 if (err != -EAGAIN) {
504 mlx5_ib_warn(
505 dev,
506 "command failed order %d, err %d\n",
507 ent->order, err);
508 queue_delayed_work(cache->wq, &ent->dwork,
509 msecs_to_jiffies(1000));
510 }
511 }
512 } else if (ent->available_mrs > 2 * ent->limit) {
513 bool need_delay;
514
515 /*
516 * The remove_cache_mr() logic is performed as garbage
517 * collection task. Such task is intended to be run when no
518 * other active processes are running.
519 *
520 * The need_resched() will return TRUE if there are user tasks
521 * to be activated in near future.
522 *
523 * In such case, we don't execute remove_cache_mr() and postpone
524 * the garbage collection work to try to run in next cycle, in
525 * order to free CPU resources to other tasks.
526 */
527 spin_unlock_irq(&ent->lock);
528 need_delay = need_resched() || someone_adding(cache) ||
529 !time_after(jiffies,
530 READ_ONCE(cache->last_add) + 300 * HZ);
531 spin_lock_irq(&ent->lock);
532 if (ent->disabled)
533 goto out;
534 if (need_delay)
535 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
536 remove_cache_mr_locked(ent);
537 queue_adjust_cache_locked(ent);
538 }
539 out:
540 spin_unlock_irq(&ent->lock);
541 }
542
delayed_cache_work_func(struct work_struct * work)543 static void delayed_cache_work_func(struct work_struct *work)
544 {
545 struct mlx5_cache_ent *ent;
546
547 ent = container_of(work, struct mlx5_cache_ent, dwork.work);
548 __cache_work_func(ent);
549 }
550
cache_work_func(struct work_struct * work)551 static void cache_work_func(struct work_struct *work)
552 {
553 struct mlx5_cache_ent *ent;
554
555 ent = container_of(work, struct mlx5_cache_ent, work);
556 __cache_work_func(ent);
557 }
558
559 /* Allocate a special entry from the cache */
mlx5_mr_cache_alloc(struct mlx5_ib_dev * dev,unsigned int entry,int access_flags)560 struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
561 unsigned int entry, int access_flags)
562 {
563 struct mlx5_mr_cache *cache = &dev->cache;
564 struct mlx5_cache_ent *ent;
565 struct mlx5_ib_mr *mr;
566
567 if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY ||
568 entry >= ARRAY_SIZE(cache->ent)))
569 return ERR_PTR(-EINVAL);
570
571 /* Matches access in alloc_cache_mr() */
572 if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
573 return ERR_PTR(-EOPNOTSUPP);
574
575 ent = &cache->ent[entry];
576 spin_lock_irq(&ent->lock);
577 if (list_empty(&ent->head)) {
578 spin_unlock_irq(&ent->lock);
579 mr = create_cache_mr(ent);
580 if (IS_ERR(mr))
581 return mr;
582 } else {
583 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
584 list_del(&mr->list);
585 ent->available_mrs--;
586 queue_adjust_cache_locked(ent);
587 spin_unlock_irq(&ent->lock);
588 }
589 mr->access_flags = access_flags;
590 return mr;
591 }
592
593 /* Return a MR already available in the cache */
get_cache_mr(struct mlx5_cache_ent * req_ent)594 static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent)
595 {
596 struct mlx5_ib_dev *dev = req_ent->dev;
597 struct mlx5_ib_mr *mr = NULL;
598 struct mlx5_cache_ent *ent = req_ent;
599
600 /* Try larger MR pools from the cache to satisfy the allocation */
601 for (; ent != &dev->cache.ent[MR_CACHE_LAST_STD_ENTRY + 1]; ent++) {
602 mlx5_ib_dbg(dev, "order %u, cache index %zu\n", ent->order,
603 ent - dev->cache.ent);
604
605 spin_lock_irq(&ent->lock);
606 if (!list_empty(&ent->head)) {
607 mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
608 list);
609 list_del(&mr->list);
610 ent->available_mrs--;
611 queue_adjust_cache_locked(ent);
612 spin_unlock_irq(&ent->lock);
613 break;
614 }
615 queue_adjust_cache_locked(ent);
616 spin_unlock_irq(&ent->lock);
617 }
618
619 if (!mr)
620 req_ent->miss++;
621
622 return mr;
623 }
624
detach_mr_from_cache(struct mlx5_ib_mr * mr)625 static void detach_mr_from_cache(struct mlx5_ib_mr *mr)
626 {
627 struct mlx5_cache_ent *ent = mr->cache_ent;
628
629 mr->cache_ent = NULL;
630 spin_lock_irq(&ent->lock);
631 ent->total_mrs--;
632 spin_unlock_irq(&ent->lock);
633 }
634
mlx5_mr_cache_free(struct mlx5_ib_dev * dev,struct mlx5_ib_mr * mr)635 void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
636 {
637 struct mlx5_cache_ent *ent = mr->cache_ent;
638
639 if (!ent)
640 return;
641
642 if (mlx5_mr_cache_invalidate(mr)) {
643 detach_mr_from_cache(mr);
644 destroy_mkey(dev, mr);
645 kfree(mr);
646 return;
647 }
648
649 spin_lock_irq(&ent->lock);
650 list_add_tail(&mr->list, &ent->head);
651 ent->available_mrs++;
652 queue_adjust_cache_locked(ent);
653 spin_unlock_irq(&ent->lock);
654 }
655
clean_keys(struct mlx5_ib_dev * dev,int c)656 static void clean_keys(struct mlx5_ib_dev *dev, int c)
657 {
658 struct mlx5_mr_cache *cache = &dev->cache;
659 struct mlx5_cache_ent *ent = &cache->ent[c];
660 struct mlx5_ib_mr *tmp_mr;
661 struct mlx5_ib_mr *mr;
662 LIST_HEAD(del_list);
663
664 cancel_delayed_work(&ent->dwork);
665 while (1) {
666 spin_lock_irq(&ent->lock);
667 if (list_empty(&ent->head)) {
668 spin_unlock_irq(&ent->lock);
669 break;
670 }
671 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
672 list_move(&mr->list, &del_list);
673 ent->available_mrs--;
674 ent->total_mrs--;
675 spin_unlock_irq(&ent->lock);
676 mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
677 }
678
679 list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
680 list_del(&mr->list);
681 kfree(mr);
682 }
683 }
684
mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev * dev)685 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
686 {
687 if (!mlx5_debugfs_root || dev->is_rep)
688 return;
689
690 debugfs_remove_recursive(dev->cache.root);
691 dev->cache.root = NULL;
692 }
693
mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev * dev)694 static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
695 {
696 struct mlx5_mr_cache *cache = &dev->cache;
697 struct mlx5_cache_ent *ent;
698 struct dentry *dir;
699 int i;
700
701 if (!mlx5_debugfs_root || dev->is_rep)
702 return;
703
704 cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
705
706 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
707 ent = &cache->ent[i];
708 sprintf(ent->name, "%d", ent->order);
709 dir = debugfs_create_dir(ent->name, cache->root);
710 debugfs_create_file("size", 0600, dir, ent, &size_fops);
711 debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
712 debugfs_create_u32("cur", 0400, dir, &ent->available_mrs);
713 debugfs_create_u32("miss", 0600, dir, &ent->miss);
714 }
715 }
716
delay_time_func(struct timer_list * t)717 static void delay_time_func(struct timer_list *t)
718 {
719 struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer);
720
721 WRITE_ONCE(dev->fill_delay, 0);
722 }
723
mlx5_mr_cache_init(struct mlx5_ib_dev * dev)724 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
725 {
726 struct mlx5_mr_cache *cache = &dev->cache;
727 struct mlx5_cache_ent *ent;
728 int i;
729
730 mutex_init(&dev->slow_path_mutex);
731 cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
732 if (!cache->wq) {
733 mlx5_ib_warn(dev, "failed to create work queue\n");
734 return -ENOMEM;
735 }
736
737 mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
738 timer_setup(&dev->delay_timer, delay_time_func, 0);
739 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
740 ent = &cache->ent[i];
741 INIT_LIST_HEAD(&ent->head);
742 spin_lock_init(&ent->lock);
743 ent->order = i + 2;
744 ent->dev = dev;
745 ent->limit = 0;
746
747 INIT_WORK(&ent->work, cache_work_func);
748 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
749
750 if (i > MR_CACHE_LAST_STD_ENTRY) {
751 mlx5_odp_init_mr_cache_entry(ent);
752 continue;
753 }
754
755 if (ent->order > mr_cache_max_order(dev))
756 continue;
757
758 ent->page = PAGE_SHIFT;
759 ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
760 MLX5_IB_UMR_OCTOWORD;
761 ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
762 if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
763 !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
764 mlx5_ib_can_load_pas_with_umr(dev, 0))
765 ent->limit = dev->mdev->profile->mr_cache[i].limit;
766 else
767 ent->limit = 0;
768 spin_lock_irq(&ent->lock);
769 queue_adjust_cache_locked(ent);
770 spin_unlock_irq(&ent->lock);
771 }
772
773 mlx5_mr_cache_debugfs_init(dev);
774
775 return 0;
776 }
777
mlx5_mr_cache_cleanup(struct mlx5_ib_dev * dev)778 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
779 {
780 unsigned int i;
781
782 if (!dev->cache.wq)
783 return 0;
784
785 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
786 struct mlx5_cache_ent *ent = &dev->cache.ent[i];
787
788 spin_lock_irq(&ent->lock);
789 ent->disabled = true;
790 spin_unlock_irq(&ent->lock);
791 cancel_work_sync(&ent->work);
792 cancel_delayed_work_sync(&ent->dwork);
793 }
794
795 mlx5_mr_cache_debugfs_cleanup(dev);
796 mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
797
798 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
799 clean_keys(dev, i);
800
801 destroy_workqueue(dev->cache.wq);
802 del_timer_sync(&dev->delay_timer);
803
804 return 0;
805 }
806
mlx5_ib_get_dma_mr(struct ib_pd * pd,int acc)807 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
808 {
809 struct mlx5_ib_dev *dev = to_mdev(pd->device);
810 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
811 struct mlx5_ib_mr *mr;
812 void *mkc;
813 u32 *in;
814 int err;
815
816 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
817 if (!mr)
818 return ERR_PTR(-ENOMEM);
819
820 in = kzalloc(inlen, GFP_KERNEL);
821 if (!in) {
822 err = -ENOMEM;
823 goto err_free;
824 }
825
826 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
827
828 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
829 MLX5_SET(mkc, mkc, length64, 1);
830 set_mkc_access_pd_addr_fields(mkc, acc, 0, pd);
831
832 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
833 if (err)
834 goto err_in;
835
836 kfree(in);
837 mr->mmkey.type = MLX5_MKEY_MR;
838 mr->ibmr.lkey = mr->mmkey.key;
839 mr->ibmr.rkey = mr->mmkey.key;
840 mr->umem = NULL;
841
842 return &mr->ibmr;
843
844 err_in:
845 kfree(in);
846
847 err_free:
848 kfree(mr);
849
850 return ERR_PTR(err);
851 }
852
get_octo_len(u64 addr,u64 len,int page_shift)853 static int get_octo_len(u64 addr, u64 len, int page_shift)
854 {
855 u64 page_size = 1ULL << page_shift;
856 u64 offset;
857 int npages;
858
859 offset = addr & (page_size - 1);
860 npages = ALIGN(len + offset, page_size) >> page_shift;
861 return (npages + 1) / 2;
862 }
863
mr_cache_max_order(struct mlx5_ib_dev * dev)864 static int mr_cache_max_order(struct mlx5_ib_dev *dev)
865 {
866 if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
867 return MR_CACHE_LAST_STD_ENTRY + 2;
868 return MLX5_MAX_UMR_SHIFT;
869 }
870
mr_umem_get(struct mlx5_ib_dev * dev,u64 start,u64 length,int access_flags,struct ib_umem ** umem,int * npages,int * page_shift,int * ncont,int * order)871 static int mr_umem_get(struct mlx5_ib_dev *dev, u64 start, u64 length,
872 int access_flags, struct ib_umem **umem, int *npages,
873 int *page_shift, int *ncont, int *order)
874 {
875 struct ib_umem *u;
876
877 *umem = NULL;
878
879 if (access_flags & IB_ACCESS_ON_DEMAND) {
880 struct ib_umem_odp *odp;
881
882 odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
883 &mlx5_mn_ops);
884 if (IS_ERR(odp)) {
885 mlx5_ib_dbg(dev, "umem get failed (%ld)\n",
886 PTR_ERR(odp));
887 return PTR_ERR(odp);
888 }
889
890 u = &odp->umem;
891
892 *page_shift = odp->page_shift;
893 *ncont = ib_umem_odp_num_pages(odp);
894 *npages = *ncont << (*page_shift - PAGE_SHIFT);
895 if (order)
896 *order = ilog2(roundup_pow_of_two(*ncont));
897 } else {
898 u = ib_umem_get(&dev->ib_dev, start, length, access_flags);
899 if (IS_ERR(u)) {
900 mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u));
901 return PTR_ERR(u);
902 }
903
904 mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
905 page_shift, ncont, order);
906 }
907
908 if (!*npages) {
909 mlx5_ib_warn(dev, "avoid zero region\n");
910 ib_umem_release(u);
911 return -EINVAL;
912 }
913
914 *umem = u;
915
916 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
917 *npages, *ncont, *order, *page_shift);
918
919 return 0;
920 }
921
mlx5_ib_umr_done(struct ib_cq * cq,struct ib_wc * wc)922 static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
923 {
924 struct mlx5_ib_umr_context *context =
925 container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
926
927 context->status = wc->status;
928 complete(&context->done);
929 }
930
mlx5_ib_init_umr_context(struct mlx5_ib_umr_context * context)931 static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
932 {
933 context->cqe.done = mlx5_ib_umr_done;
934 context->status = -1;
935 init_completion(&context->done);
936 }
937
mlx5_ib_post_send_wait(struct mlx5_ib_dev * dev,struct mlx5_umr_wr * umrwr)938 static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
939 struct mlx5_umr_wr *umrwr)
940 {
941 struct umr_common *umrc = &dev->umrc;
942 const struct ib_send_wr *bad;
943 int err;
944 struct mlx5_ib_umr_context umr_context;
945
946 mlx5_ib_init_umr_context(&umr_context);
947 umrwr->wr.wr_cqe = &umr_context.cqe;
948
949 down(&umrc->sem);
950 err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
951 if (err) {
952 mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
953 } else {
954 wait_for_completion(&umr_context.done);
955 if (umr_context.status != IB_WC_SUCCESS) {
956 mlx5_ib_warn(dev, "reg umr failed (%u)\n",
957 umr_context.status);
958 err = -EFAULT;
959 }
960 }
961 up(&umrc->sem);
962 return err;
963 }
964
mr_cache_ent_from_order(struct mlx5_ib_dev * dev,unsigned int order)965 static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
966 unsigned int order)
967 {
968 struct mlx5_mr_cache *cache = &dev->cache;
969
970 if (order < cache->ent[0].order)
971 return &cache->ent[0];
972 order = order - cache->ent[0].order;
973 if (order > MR_CACHE_LAST_STD_ENTRY)
974 return NULL;
975 return &cache->ent[order];
976 }
977
978 static struct mlx5_ib_mr *
alloc_mr_from_cache(struct ib_pd * pd,struct ib_umem * umem,u64 virt_addr,u64 len,int npages,int page_shift,unsigned int order,int access_flags)979 alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr,
980 u64 len, int npages, int page_shift, unsigned int order,
981 int access_flags)
982 {
983 struct mlx5_ib_dev *dev = to_mdev(pd->device);
984 struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order);
985 struct mlx5_ib_mr *mr;
986
987 if (!ent)
988 return ERR_PTR(-E2BIG);
989
990 /* Matches access in alloc_cache_mr() */
991 if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
992 return ERR_PTR(-EOPNOTSUPP);
993
994 mr = get_cache_mr(ent);
995 if (!mr) {
996 mr = create_cache_mr(ent);
997 if (IS_ERR(mr))
998 return mr;
999 }
1000
1001 mr->ibmr.pd = pd;
1002 mr->umem = umem;
1003 mr->access_flags = access_flags;
1004 mr->desc_size = sizeof(struct mlx5_mtt);
1005 mr->mmkey.iova = virt_addr;
1006 mr->mmkey.size = len;
1007 mr->mmkey.pd = to_mpd(pd)->pdn;
1008
1009 return mr;
1010 }
1011
1012 #define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
1013 MLX5_UMR_MTT_ALIGNMENT)
1014 #define MLX5_SPARE_UMR_CHUNK 0x10000
1015
mlx5_ib_update_xlt(struct mlx5_ib_mr * mr,u64 idx,int npages,int page_shift,int flags)1016 int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
1017 int page_shift, int flags)
1018 {
1019 struct mlx5_ib_dev *dev = mr->dev;
1020 struct device *ddev = dev->ib_dev.dev.parent;
1021 int size;
1022 void *xlt;
1023 dma_addr_t dma;
1024 struct mlx5_umr_wr wr;
1025 struct ib_sge sg;
1026 int err = 0;
1027 int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
1028 ? sizeof(struct mlx5_klm)
1029 : sizeof(struct mlx5_mtt);
1030 const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
1031 const int page_mask = page_align - 1;
1032 size_t pages_mapped = 0;
1033 size_t pages_to_map = 0;
1034 size_t pages_iter = 0;
1035 size_t size_to_map = 0;
1036 gfp_t gfp;
1037 bool use_emergency_page = false;
1038
1039 if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
1040 !umr_can_use_indirect_mkey(dev))
1041 return -EPERM;
1042
1043 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
1044 * so we need to align the offset and length accordingly
1045 */
1046 if (idx & page_mask) {
1047 npages += idx & page_mask;
1048 idx &= ~page_mask;
1049 }
1050
1051 gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL;
1052 gfp |= __GFP_ZERO | __GFP_NOWARN;
1053
1054 pages_to_map = ALIGN(npages, page_align);
1055 size = desc_size * pages_to_map;
1056 size = min_t(int, size, MLX5_MAX_UMR_CHUNK);
1057
1058 xlt = (void *)__get_free_pages(gfp, get_order(size));
1059 if (!xlt && size > MLX5_SPARE_UMR_CHUNK) {
1060 mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n",
1061 size, get_order(size), MLX5_SPARE_UMR_CHUNK);
1062
1063 size = MLX5_SPARE_UMR_CHUNK;
1064 xlt = (void *)__get_free_pages(gfp, get_order(size));
1065 }
1066
1067 if (!xlt) {
1068 mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
1069 xlt = (void *)mlx5_ib_get_xlt_emergency_page();
1070 size = PAGE_SIZE;
1071 memset(xlt, 0, size);
1072 use_emergency_page = true;
1073 }
1074 pages_iter = size / desc_size;
1075 dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE);
1076 if (dma_mapping_error(ddev, dma)) {
1077 mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
1078 err = -ENOMEM;
1079 goto free_xlt;
1080 }
1081
1082 if (mr->umem->is_odp) {
1083 if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
1084 struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
1085 size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
1086
1087 pages_to_map = min_t(size_t, pages_to_map, max_pages);
1088 }
1089 }
1090
1091 sg.addr = dma;
1092 sg.lkey = dev->umrc.pd->local_dma_lkey;
1093
1094 memset(&wr, 0, sizeof(wr));
1095 wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
1096 if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
1097 wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
1098 wr.wr.sg_list = &sg;
1099 wr.wr.num_sge = 1;
1100 wr.wr.opcode = MLX5_IB_WR_UMR;
1101
1102 wr.pd = mr->ibmr.pd;
1103 wr.mkey = mr->mmkey.key;
1104 wr.length = mr->mmkey.size;
1105 wr.virt_addr = mr->mmkey.iova;
1106 wr.access_flags = mr->access_flags;
1107 wr.page_shift = page_shift;
1108
1109 for (pages_mapped = 0;
1110 pages_mapped < pages_to_map && !err;
1111 pages_mapped += pages_iter, idx += pages_iter) {
1112 npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
1113 size_to_map = npages * desc_size;
1114 dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
1115 if (mr->umem->is_odp) {
1116 mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
1117 } else {
1118 __mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx,
1119 npages, xlt,
1120 MLX5_IB_MTT_PRESENT);
1121 /* Clear padding after the pages
1122 * brought from the umem.
1123 */
1124 memset(xlt + size_to_map, 0, size - size_to_map);
1125 }
1126 dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
1127
1128 sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
1129
1130 if (pages_mapped + pages_iter >= pages_to_map) {
1131 if (flags & MLX5_IB_UPD_XLT_ENABLE)
1132 wr.wr.send_flags |=
1133 MLX5_IB_SEND_UMR_ENABLE_MR |
1134 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
1135 MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1136 if (flags & MLX5_IB_UPD_XLT_PD ||
1137 flags & MLX5_IB_UPD_XLT_ACCESS)
1138 wr.wr.send_flags |=
1139 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
1140 if (flags & MLX5_IB_UPD_XLT_ADDR)
1141 wr.wr.send_flags |=
1142 MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1143 }
1144
1145 wr.offset = idx * desc_size;
1146 wr.xlt_size = sg.length;
1147
1148 err = mlx5_ib_post_send_wait(dev, &wr);
1149 }
1150 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
1151
1152 free_xlt:
1153 if (use_emergency_page)
1154 mlx5_ib_put_xlt_emergency_page();
1155 else
1156 free_pages((unsigned long)xlt, get_order(size));
1157
1158 return err;
1159 }
1160
1161 /*
1162 * If ibmr is NULL it will be allocated by reg_create.
1163 * Else, the given ibmr will be used.
1164 */
reg_create(struct ib_mr * ibmr,struct ib_pd * pd,u64 virt_addr,u64 length,struct ib_umem * umem,int npages,int page_shift,int access_flags,bool populate)1165 static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
1166 u64 virt_addr, u64 length,
1167 struct ib_umem *umem, int npages,
1168 int page_shift, int access_flags,
1169 bool populate)
1170 {
1171 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1172 struct mlx5_ib_mr *mr;
1173 __be64 *pas;
1174 void *mkc;
1175 int inlen;
1176 u32 *in;
1177 int err;
1178 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
1179
1180 mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
1181 if (!mr)
1182 return ERR_PTR(-ENOMEM);
1183
1184 mr->ibmr.pd = pd;
1185 mr->access_flags = access_flags;
1186
1187 inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1188 if (populate)
1189 inlen += sizeof(*pas) * roundup(npages, 2);
1190 in = kvzalloc(inlen, GFP_KERNEL);
1191 if (!in) {
1192 err = -ENOMEM;
1193 goto err_1;
1194 }
1195 pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
1196 if (populate) {
1197 if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND)) {
1198 err = -EINVAL;
1199 goto err_2;
1200 }
1201 mlx5_ib_populate_pas(dev, umem, page_shift, pas,
1202 pg_cap ? MLX5_IB_MTT_PRESENT : 0);
1203 }
1204
1205 /* The pg_access bit allows setting the access flags
1206 * in the page list submitted with the command. */
1207 MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
1208
1209 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1210 set_mkc_access_pd_addr_fields(mkc, access_flags, virt_addr,
1211 populate ? pd : dev->umrc.pd);
1212 MLX5_SET(mkc, mkc, free, !populate);
1213 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
1214 MLX5_SET(mkc, mkc, umr_en, 1);
1215
1216 MLX5_SET64(mkc, mkc, len, length);
1217 MLX5_SET(mkc, mkc, bsf_octword_size, 0);
1218 MLX5_SET(mkc, mkc, translations_octword_size,
1219 get_octo_len(virt_addr, length, page_shift));
1220 MLX5_SET(mkc, mkc, log_page_size, page_shift);
1221 if (populate) {
1222 MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
1223 get_octo_len(virt_addr, length, page_shift));
1224 }
1225
1226 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
1227 if (err) {
1228 mlx5_ib_warn(dev, "create mkey failed\n");
1229 goto err_2;
1230 }
1231 mr->mmkey.type = MLX5_MKEY_MR;
1232 mr->desc_size = sizeof(struct mlx5_mtt);
1233 mr->dev = dev;
1234 kvfree(in);
1235
1236 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
1237
1238 return mr;
1239
1240 err_2:
1241 kvfree(in);
1242
1243 err_1:
1244 if (!ibmr)
1245 kfree(mr);
1246
1247 return ERR_PTR(err);
1248 }
1249
set_mr_fields(struct mlx5_ib_dev * dev,struct mlx5_ib_mr * mr,u64 length,int access_flags)1250 static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
1251 u64 length, int access_flags)
1252 {
1253 mr->ibmr.lkey = mr->mmkey.key;
1254 mr->ibmr.rkey = mr->mmkey.key;
1255 mr->ibmr.length = length;
1256 mr->access_flags = access_flags;
1257 }
1258
mlx5_ib_get_dm_mr(struct ib_pd * pd,u64 start_addr,u64 length,int acc,int mode)1259 static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
1260 u64 length, int acc, int mode)
1261 {
1262 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1263 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1264 struct mlx5_ib_mr *mr;
1265 void *mkc;
1266 u32 *in;
1267 int err;
1268
1269 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1270 if (!mr)
1271 return ERR_PTR(-ENOMEM);
1272
1273 in = kzalloc(inlen, GFP_KERNEL);
1274 if (!in) {
1275 err = -ENOMEM;
1276 goto err_free;
1277 }
1278
1279 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1280
1281 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
1282 MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7);
1283 MLX5_SET64(mkc, mkc, len, length);
1284 set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd);
1285
1286 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
1287 if (err)
1288 goto err_in;
1289
1290 kfree(in);
1291
1292 set_mr_fields(dev, mr, length, acc);
1293
1294 return &mr->ibmr;
1295
1296 err_in:
1297 kfree(in);
1298
1299 err_free:
1300 kfree(mr);
1301
1302 return ERR_PTR(err);
1303 }
1304
mlx5_ib_advise_mr(struct ib_pd * pd,enum ib_uverbs_advise_mr_advice advice,u32 flags,struct ib_sge * sg_list,u32 num_sge,struct uverbs_attr_bundle * attrs)1305 int mlx5_ib_advise_mr(struct ib_pd *pd,
1306 enum ib_uverbs_advise_mr_advice advice,
1307 u32 flags,
1308 struct ib_sge *sg_list,
1309 u32 num_sge,
1310 struct uverbs_attr_bundle *attrs)
1311 {
1312 if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH &&
1313 advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE &&
1314 advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT)
1315 return -EOPNOTSUPP;
1316
1317 return mlx5_ib_advise_mr_prefetch(pd, advice, flags,
1318 sg_list, num_sge);
1319 }
1320
mlx5_ib_reg_dm_mr(struct ib_pd * pd,struct ib_dm * dm,struct ib_dm_mr_attr * attr,struct uverbs_attr_bundle * attrs)1321 struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
1322 struct ib_dm_mr_attr *attr,
1323 struct uverbs_attr_bundle *attrs)
1324 {
1325 struct mlx5_ib_dm *mdm = to_mdm(dm);
1326 struct mlx5_core_dev *dev = to_mdev(dm->device)->mdev;
1327 u64 start_addr = mdm->dev_addr + attr->offset;
1328 int mode;
1329
1330 switch (mdm->type) {
1331 case MLX5_IB_UAPI_DM_TYPE_MEMIC:
1332 if (attr->access_flags & ~MLX5_IB_DM_MEMIC_ALLOWED_ACCESS)
1333 return ERR_PTR(-EINVAL);
1334
1335 mode = MLX5_MKC_ACCESS_MODE_MEMIC;
1336 start_addr -= pci_resource_start(dev->pdev, 0);
1337 break;
1338 case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
1339 case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
1340 if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS)
1341 return ERR_PTR(-EINVAL);
1342
1343 mode = MLX5_MKC_ACCESS_MODE_SW_ICM;
1344 break;
1345 default:
1346 return ERR_PTR(-EINVAL);
1347 }
1348
1349 return mlx5_ib_get_dm_mr(pd, start_addr, attr->length,
1350 attr->access_flags, mode);
1351 }
1352
mlx5_ib_reg_user_mr(struct ib_pd * pd,u64 start,u64 length,u64 virt_addr,int access_flags,struct ib_udata * udata)1353 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1354 u64 virt_addr, int access_flags,
1355 struct ib_udata *udata)
1356 {
1357 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1358 struct mlx5_ib_mr *mr = NULL;
1359 bool xlt_with_umr;
1360 struct ib_umem *umem;
1361 int page_shift;
1362 int npages;
1363 int ncont;
1364 int order;
1365 int err;
1366
1367 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
1368 return ERR_PTR(-EOPNOTSUPP);
1369
1370 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1371 start, virt_addr, length, access_flags);
1372
1373 xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, length);
1374 /* ODP requires xlt update via umr to work. */
1375 if (!xlt_with_umr && (access_flags & IB_ACCESS_ON_DEMAND))
1376 return ERR_PTR(-EINVAL);
1377
1378 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start &&
1379 length == U64_MAX) {
1380 if (virt_addr != start)
1381 return ERR_PTR(-EINVAL);
1382 if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
1383 !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
1384 return ERR_PTR(-EINVAL);
1385
1386 mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
1387 if (IS_ERR(mr))
1388 return ERR_CAST(mr);
1389 return &mr->ibmr;
1390 }
1391
1392 err = mr_umem_get(dev, start, length, access_flags, &umem,
1393 &npages, &page_shift, &ncont, &order);
1394
1395 if (err < 0)
1396 return ERR_PTR(err);
1397
1398 if (xlt_with_umr) {
1399 mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
1400 page_shift, order, access_flags);
1401 if (IS_ERR(mr))
1402 mr = NULL;
1403 }
1404
1405 if (!mr) {
1406 mutex_lock(&dev->slow_path_mutex);
1407 mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
1408 page_shift, access_flags, !xlt_with_umr);
1409 mutex_unlock(&dev->slow_path_mutex);
1410 }
1411
1412 if (IS_ERR(mr)) {
1413 err = PTR_ERR(mr);
1414 goto error;
1415 }
1416
1417 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
1418
1419 mr->umem = umem;
1420 mr->npages = npages;
1421 atomic_add(mr->npages, &dev->mdev->priv.reg_pages);
1422 set_mr_fields(dev, mr, length, access_flags);
1423
1424 if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) {
1425 /*
1426 * If the MR was created with reg_create then it will be
1427 * configured properly but left disabled. It is safe to go ahead
1428 * and configure it again via UMR while enabling it.
1429 */
1430 int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
1431
1432 err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
1433 update_xlt_flags);
1434 if (err) {
1435 dereg_mr(dev, mr);
1436 return ERR_PTR(err);
1437 }
1438 }
1439
1440 if (is_odp_mr(mr)) {
1441 to_ib_umem_odp(mr->umem)->private = mr;
1442 init_waitqueue_head(&mr->q_deferred_work);
1443 atomic_set(&mr->num_deferred_work, 0);
1444 err = xa_err(xa_store(&dev->odp_mkeys,
1445 mlx5_base_mkey(mr->mmkey.key), &mr->mmkey,
1446 GFP_KERNEL));
1447 if (err) {
1448 dereg_mr(dev, mr);
1449 return ERR_PTR(err);
1450 }
1451
1452 err = mlx5_ib_init_odp_mr(mr, xlt_with_umr);
1453 if (err) {
1454 dereg_mr(dev, mr);
1455 return ERR_PTR(err);
1456 }
1457 }
1458
1459 return &mr->ibmr;
1460 error:
1461 ib_umem_release(umem);
1462 return ERR_PTR(err);
1463 }
1464
1465 /**
1466 * mlx5_mr_cache_invalidate - Fence all DMA on the MR
1467 * @mr: The MR to fence
1468 *
1469 * Upon return the NIC will not be doing any DMA to the pages under the MR,
1470 * and any DMA inprogress will be completed. Failure of this function
1471 * indicates the HW has failed catastrophically.
1472 */
mlx5_mr_cache_invalidate(struct mlx5_ib_mr * mr)1473 int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr)
1474 {
1475 struct mlx5_umr_wr umrwr = {};
1476
1477 if (mr->dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
1478 return 0;
1479
1480 umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
1481 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
1482 umrwr.wr.opcode = MLX5_IB_WR_UMR;
1483 umrwr.pd = mr->dev->umrc.pd;
1484 umrwr.mkey = mr->mmkey.key;
1485 umrwr.ignore_free_state = 1;
1486
1487 return mlx5_ib_post_send_wait(mr->dev, &umrwr);
1488 }
1489
rereg_umr(struct ib_pd * pd,struct mlx5_ib_mr * mr,int access_flags,int flags)1490 static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1491 int access_flags, int flags)
1492 {
1493 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1494 struct mlx5_umr_wr umrwr = {};
1495 int err;
1496
1497 umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
1498
1499 umrwr.wr.opcode = MLX5_IB_WR_UMR;
1500 umrwr.mkey = mr->mmkey.key;
1501
1502 if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) {
1503 umrwr.pd = pd;
1504 umrwr.access_flags = access_flags;
1505 umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
1506 }
1507
1508 err = mlx5_ib_post_send_wait(dev, &umrwr);
1509
1510 return err;
1511 }
1512
mlx5_ib_rereg_user_mr(struct ib_mr * ib_mr,int flags,u64 start,u64 length,u64 virt_addr,int new_access_flags,struct ib_pd * new_pd,struct ib_udata * udata)1513 int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
1514 u64 length, u64 virt_addr, int new_access_flags,
1515 struct ib_pd *new_pd, struct ib_udata *udata)
1516 {
1517 struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
1518 struct mlx5_ib_mr *mr = to_mmr(ib_mr);
1519 struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
1520 int access_flags = flags & IB_MR_REREG_ACCESS ?
1521 new_access_flags :
1522 mr->access_flags;
1523 int page_shift = 0;
1524 int upd_flags = 0;
1525 int npages = 0;
1526 int ncont = 0;
1527 int order = 0;
1528 u64 addr, len;
1529 int err;
1530
1531 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1532 start, virt_addr, length, access_flags);
1533
1534 if (!mr->umem)
1535 return -EINVAL;
1536
1537 if (is_odp_mr(mr))
1538 return -EOPNOTSUPP;
1539
1540 if (flags & IB_MR_REREG_TRANS) {
1541 addr = virt_addr;
1542 len = length;
1543 } else {
1544 addr = mr->umem->address;
1545 len = mr->umem->length;
1546 }
1547
1548 if (flags != IB_MR_REREG_PD) {
1549 /*
1550 * Replace umem. This needs to be done whether or not UMR is
1551 * used.
1552 */
1553 flags |= IB_MR_REREG_TRANS;
1554 atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
1555 mr->npages = 0;
1556 ib_umem_release(mr->umem);
1557 mr->umem = NULL;
1558
1559 err = mr_umem_get(dev, addr, len, access_flags, &mr->umem,
1560 &npages, &page_shift, &ncont, &order);
1561 if (err)
1562 goto err;
1563 mr->npages = ncont;
1564 atomic_add(mr->npages, &dev->mdev->priv.reg_pages);
1565 }
1566
1567 if (!mlx5_ib_can_reconfig_with_umr(dev, mr->access_flags,
1568 access_flags) ||
1569 !mlx5_ib_can_load_pas_with_umr(dev, len) ||
1570 (flags & IB_MR_REREG_TRANS &&
1571 !mlx5_ib_pas_fits_in_mr(mr, addr, len))) {
1572 /*
1573 * UMR can't be used - MKey needs to be replaced.
1574 */
1575 if (mr->cache_ent)
1576 detach_mr_from_cache(mr);
1577 err = destroy_mkey(dev, mr);
1578 if (err)
1579 goto err;
1580
1581 mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
1582 page_shift, access_flags, true);
1583
1584 if (IS_ERR(mr)) {
1585 err = PTR_ERR(mr);
1586 mr = to_mmr(ib_mr);
1587 goto err;
1588 }
1589 } else {
1590 /*
1591 * Send a UMR WQE
1592 */
1593 mr->ibmr.pd = pd;
1594 mr->access_flags = access_flags;
1595 mr->mmkey.iova = addr;
1596 mr->mmkey.size = len;
1597 mr->mmkey.pd = to_mpd(pd)->pdn;
1598
1599 if (flags & IB_MR_REREG_TRANS) {
1600 upd_flags = MLX5_IB_UPD_XLT_ADDR;
1601 if (flags & IB_MR_REREG_PD)
1602 upd_flags |= MLX5_IB_UPD_XLT_PD;
1603 if (flags & IB_MR_REREG_ACCESS)
1604 upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
1605 err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
1606 upd_flags);
1607 } else {
1608 err = rereg_umr(pd, mr, access_flags, flags);
1609 }
1610
1611 if (err)
1612 goto err;
1613 }
1614
1615 set_mr_fields(dev, mr, len, access_flags);
1616
1617 return 0;
1618
1619 err:
1620 ib_umem_release(mr->umem);
1621 mr->umem = NULL;
1622
1623 clean_mr(dev, mr);
1624 return err;
1625 }
1626
1627 static int
mlx5_alloc_priv_descs(struct ib_device * device,struct mlx5_ib_mr * mr,int ndescs,int desc_size)1628 mlx5_alloc_priv_descs(struct ib_device *device,
1629 struct mlx5_ib_mr *mr,
1630 int ndescs,
1631 int desc_size)
1632 {
1633 int size = ndescs * desc_size;
1634 int add_size;
1635 int ret;
1636
1637 add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
1638
1639 mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
1640 if (!mr->descs_alloc)
1641 return -ENOMEM;
1642
1643 mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
1644
1645 mr->desc_map = dma_map_single(device->dev.parent, mr->descs,
1646 size, DMA_TO_DEVICE);
1647 if (dma_mapping_error(device->dev.parent, mr->desc_map)) {
1648 ret = -ENOMEM;
1649 goto err;
1650 }
1651
1652 return 0;
1653 err:
1654 kfree(mr->descs_alloc);
1655
1656 return ret;
1657 }
1658
1659 static void
mlx5_free_priv_descs(struct mlx5_ib_mr * mr)1660 mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
1661 {
1662 if (mr->descs) {
1663 struct ib_device *device = mr->ibmr.device;
1664 int size = mr->max_descs * mr->desc_size;
1665
1666 dma_unmap_single(device->dev.parent, mr->desc_map,
1667 size, DMA_TO_DEVICE);
1668 kfree(mr->descs_alloc);
1669 mr->descs = NULL;
1670 }
1671 }
1672
clean_mr(struct mlx5_ib_dev * dev,struct mlx5_ib_mr * mr)1673 static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1674 {
1675 if (mr->sig) {
1676 if (mlx5_core_destroy_psv(dev->mdev,
1677 mr->sig->psv_memory.psv_idx))
1678 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1679 mr->sig->psv_memory.psv_idx);
1680 if (mlx5_core_destroy_psv(dev->mdev,
1681 mr->sig->psv_wire.psv_idx))
1682 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1683 mr->sig->psv_wire.psv_idx);
1684 xa_erase(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key));
1685 kfree(mr->sig);
1686 mr->sig = NULL;
1687 }
1688
1689 if (!mr->cache_ent) {
1690 destroy_mkey(dev, mr);
1691 mlx5_free_priv_descs(mr);
1692 }
1693 }
1694
dereg_mr(struct mlx5_ib_dev * dev,struct mlx5_ib_mr * mr)1695 static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1696 {
1697 int npages = mr->npages;
1698 struct ib_umem *umem = mr->umem;
1699
1700 /* Stop all DMA */
1701 if (is_odp_mr(mr))
1702 mlx5_ib_fence_odp_mr(mr);
1703 else
1704 clean_mr(dev, mr);
1705
1706 if (mr->cache_ent)
1707 mlx5_mr_cache_free(dev, mr);
1708 else
1709 kfree(mr);
1710
1711 ib_umem_release(umem);
1712 atomic_sub(npages, &dev->mdev->priv.reg_pages);
1713
1714 }
1715
mlx5_ib_dereg_mr(struct ib_mr * ibmr,struct ib_udata * udata)1716 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1717 {
1718 struct mlx5_ib_mr *mmr = to_mmr(ibmr);
1719
1720 if (ibmr->type == IB_MR_TYPE_INTEGRITY) {
1721 dereg_mr(to_mdev(mmr->mtt_mr->ibmr.device), mmr->mtt_mr);
1722 dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr);
1723 }
1724
1725 if (is_odp_mr(mmr) && to_ib_umem_odp(mmr->umem)->is_implicit_odp) {
1726 mlx5_ib_free_implicit_mr(mmr);
1727 return 0;
1728 }
1729
1730 dereg_mr(to_mdev(ibmr->device), mmr);
1731
1732 return 0;
1733 }
1734
mlx5_set_umr_free_mkey(struct ib_pd * pd,u32 * in,int ndescs,int access_mode,int page_shift)1735 static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs,
1736 int access_mode, int page_shift)
1737 {
1738 void *mkc;
1739
1740 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1741
1742 /* This is only used from the kernel, so setting the PD is OK. */
1743 set_mkc_access_pd_addr_fields(mkc, 0, 0, pd);
1744 MLX5_SET(mkc, mkc, free, 1);
1745 MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1746 MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
1747 MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
1748 MLX5_SET(mkc, mkc, umr_en, 1);
1749 MLX5_SET(mkc, mkc, log_page_size, page_shift);
1750 }
1751
_mlx5_alloc_mkey_descs(struct ib_pd * pd,struct mlx5_ib_mr * mr,int ndescs,int desc_size,int page_shift,int access_mode,u32 * in,int inlen)1752 static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1753 int ndescs, int desc_size, int page_shift,
1754 int access_mode, u32 *in, int inlen)
1755 {
1756 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1757 int err;
1758
1759 mr->access_mode = access_mode;
1760 mr->desc_size = desc_size;
1761 mr->max_descs = ndescs;
1762
1763 err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size);
1764 if (err)
1765 return err;
1766
1767 mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift);
1768
1769 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
1770 if (err)
1771 goto err_free_descs;
1772
1773 mr->mmkey.type = MLX5_MKEY_MR;
1774 mr->ibmr.lkey = mr->mmkey.key;
1775 mr->ibmr.rkey = mr->mmkey.key;
1776
1777 return 0;
1778
1779 err_free_descs:
1780 mlx5_free_priv_descs(mr);
1781 return err;
1782 }
1783
mlx5_ib_alloc_pi_mr(struct ib_pd * pd,u32 max_num_sg,u32 max_num_meta_sg,int desc_size,int access_mode)1784 static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
1785 u32 max_num_sg, u32 max_num_meta_sg,
1786 int desc_size, int access_mode)
1787 {
1788 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1789 int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4);
1790 int page_shift = 0;
1791 struct mlx5_ib_mr *mr;
1792 u32 *in;
1793 int err;
1794
1795 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1796 if (!mr)
1797 return ERR_PTR(-ENOMEM);
1798
1799 mr->ibmr.pd = pd;
1800 mr->ibmr.device = pd->device;
1801
1802 in = kzalloc(inlen, GFP_KERNEL);
1803 if (!in) {
1804 err = -ENOMEM;
1805 goto err_free;
1806 }
1807
1808 if (access_mode == MLX5_MKC_ACCESS_MODE_MTT)
1809 page_shift = PAGE_SHIFT;
1810
1811 err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift,
1812 access_mode, in, inlen);
1813 if (err)
1814 goto err_free_in;
1815
1816 mr->umem = NULL;
1817 kfree(in);
1818
1819 return mr;
1820
1821 err_free_in:
1822 kfree(in);
1823 err_free:
1824 kfree(mr);
1825 return ERR_PTR(err);
1826 }
1827
mlx5_alloc_mem_reg_descs(struct ib_pd * pd,struct mlx5_ib_mr * mr,int ndescs,u32 * in,int inlen)1828 static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1829 int ndescs, u32 *in, int inlen)
1830 {
1831 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt),
1832 PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in,
1833 inlen);
1834 }
1835
mlx5_alloc_sg_gaps_descs(struct ib_pd * pd,struct mlx5_ib_mr * mr,int ndescs,u32 * in,int inlen)1836 static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1837 int ndescs, u32 *in, int inlen)
1838 {
1839 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm),
1840 0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
1841 }
1842
mlx5_alloc_integrity_descs(struct ib_pd * pd,struct mlx5_ib_mr * mr,int max_num_sg,int max_num_meta_sg,u32 * in,int inlen)1843 static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1844 int max_num_sg, int max_num_meta_sg,
1845 u32 *in, int inlen)
1846 {
1847 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1848 u32 psv_index[2];
1849 void *mkc;
1850 int err;
1851
1852 mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
1853 if (!mr->sig)
1854 return -ENOMEM;
1855
1856 /* create mem & wire PSVs */
1857 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index);
1858 if (err)
1859 goto err_free_sig;
1860
1861 mr->sig->psv_memory.psv_idx = psv_index[0];
1862 mr->sig->psv_wire.psv_idx = psv_index[1];
1863
1864 mr->sig->sig_status_checked = true;
1865 mr->sig->sig_err_exists = false;
1866 /* Next UMR, Arm SIGERR */
1867 ++mr->sig->sigerr_count;
1868 mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
1869 sizeof(struct mlx5_klm),
1870 MLX5_MKC_ACCESS_MODE_KLMS);
1871 if (IS_ERR(mr->klm_mr)) {
1872 err = PTR_ERR(mr->klm_mr);
1873 goto err_destroy_psv;
1874 }
1875 mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
1876 sizeof(struct mlx5_mtt),
1877 MLX5_MKC_ACCESS_MODE_MTT);
1878 if (IS_ERR(mr->mtt_mr)) {
1879 err = PTR_ERR(mr->mtt_mr);
1880 goto err_free_klm_mr;
1881 }
1882
1883 /* Set bsf descriptors for mkey */
1884 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1885 MLX5_SET(mkc, mkc, bsf_en, 1);
1886 MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
1887
1888 err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0,
1889 MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
1890 if (err)
1891 goto err_free_mtt_mr;
1892
1893 err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key),
1894 mr->sig, GFP_KERNEL));
1895 if (err)
1896 goto err_free_descs;
1897 return 0;
1898
1899 err_free_descs:
1900 destroy_mkey(dev, mr);
1901 mlx5_free_priv_descs(mr);
1902 err_free_mtt_mr:
1903 dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr);
1904 mr->mtt_mr = NULL;
1905 err_free_klm_mr:
1906 dereg_mr(to_mdev(mr->klm_mr->ibmr.device), mr->klm_mr);
1907 mr->klm_mr = NULL;
1908 err_destroy_psv:
1909 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx))
1910 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1911 mr->sig->psv_memory.psv_idx);
1912 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx))
1913 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1914 mr->sig->psv_wire.psv_idx);
1915 err_free_sig:
1916 kfree(mr->sig);
1917
1918 return err;
1919 }
1920
__mlx5_ib_alloc_mr(struct ib_pd * pd,enum ib_mr_type mr_type,u32 max_num_sg,u32 max_num_meta_sg)1921 static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
1922 enum ib_mr_type mr_type, u32 max_num_sg,
1923 u32 max_num_meta_sg)
1924 {
1925 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1926 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1927 int ndescs = ALIGN(max_num_sg, 4);
1928 struct mlx5_ib_mr *mr;
1929 u32 *in;
1930 int err;
1931
1932 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1933 if (!mr)
1934 return ERR_PTR(-ENOMEM);
1935
1936 in = kzalloc(inlen, GFP_KERNEL);
1937 if (!in) {
1938 err = -ENOMEM;
1939 goto err_free;
1940 }
1941
1942 mr->ibmr.device = pd->device;
1943 mr->umem = NULL;
1944
1945 switch (mr_type) {
1946 case IB_MR_TYPE_MEM_REG:
1947 err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen);
1948 break;
1949 case IB_MR_TYPE_SG_GAPS:
1950 err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen);
1951 break;
1952 case IB_MR_TYPE_INTEGRITY:
1953 err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg,
1954 max_num_meta_sg, in, inlen);
1955 break;
1956 default:
1957 mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
1958 err = -EINVAL;
1959 }
1960
1961 if (err)
1962 goto err_free_in;
1963
1964 kfree(in);
1965
1966 return &mr->ibmr;
1967
1968 err_free_in:
1969 kfree(in);
1970 err_free:
1971 kfree(mr);
1972 return ERR_PTR(err);
1973 }
1974
mlx5_ib_alloc_mr(struct ib_pd * pd,enum ib_mr_type mr_type,u32 max_num_sg)1975 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
1976 u32 max_num_sg)
1977 {
1978 return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0);
1979 }
1980
mlx5_ib_alloc_mr_integrity(struct ib_pd * pd,u32 max_num_sg,u32 max_num_meta_sg)1981 struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
1982 u32 max_num_sg, u32 max_num_meta_sg)
1983 {
1984 return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg,
1985 max_num_meta_sg);
1986 }
1987
mlx5_ib_alloc_mw(struct ib_mw * ibmw,struct ib_udata * udata)1988 int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
1989 {
1990 struct mlx5_ib_dev *dev = to_mdev(ibmw->device);
1991 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1992 struct mlx5_ib_mw *mw = to_mmw(ibmw);
1993 u32 *in = NULL;
1994 void *mkc;
1995 int ndescs;
1996 int err;
1997 struct mlx5_ib_alloc_mw req = {};
1998 struct {
1999 __u32 comp_mask;
2000 __u32 response_length;
2001 } resp = {};
2002
2003 err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
2004 if (err)
2005 return err;
2006
2007 if (req.comp_mask || req.reserved1 || req.reserved2)
2008 return -EOPNOTSUPP;
2009
2010 if (udata->inlen > sizeof(req) &&
2011 !ib_is_udata_cleared(udata, sizeof(req),
2012 udata->inlen - sizeof(req)))
2013 return -EOPNOTSUPP;
2014
2015 ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
2016
2017 in = kzalloc(inlen, GFP_KERNEL);
2018 if (!in) {
2019 err = -ENOMEM;
2020 goto free;
2021 }
2022
2023 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
2024
2025 MLX5_SET(mkc, mkc, free, 1);
2026 MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
2027 MLX5_SET(mkc, mkc, pd, to_mpd(ibmw->pd)->pdn);
2028 MLX5_SET(mkc, mkc, umr_en, 1);
2029 MLX5_SET(mkc, mkc, lr, 1);
2030 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
2031 MLX5_SET(mkc, mkc, en_rinval, !!((ibmw->type == IB_MW_TYPE_2)));
2032 MLX5_SET(mkc, mkc, qpn, 0xffffff);
2033
2034 err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen);
2035 if (err)
2036 goto free;
2037
2038 mw->mmkey.type = MLX5_MKEY_MW;
2039 ibmw->rkey = mw->mmkey.key;
2040 mw->ndescs = ndescs;
2041
2042 resp.response_length =
2043 min(offsetofend(typeof(resp), response_length), udata->outlen);
2044 if (resp.response_length) {
2045 err = ib_copy_to_udata(udata, &resp, resp.response_length);
2046 if (err)
2047 goto free_mkey;
2048 }
2049
2050 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
2051 err = xa_err(xa_store(&dev->odp_mkeys,
2052 mlx5_base_mkey(mw->mmkey.key), &mw->mmkey,
2053 GFP_KERNEL));
2054 if (err)
2055 goto free_mkey;
2056 }
2057
2058 kfree(in);
2059 return 0;
2060
2061 free_mkey:
2062 mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
2063 free:
2064 kfree(in);
2065 return err;
2066 }
2067
mlx5_ib_dealloc_mw(struct ib_mw * mw)2068 int mlx5_ib_dealloc_mw(struct ib_mw *mw)
2069 {
2070 struct mlx5_ib_dev *dev = to_mdev(mw->device);
2071 struct mlx5_ib_mw *mmw = to_mmw(mw);
2072
2073 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
2074 xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key));
2075 /*
2076 * pagefault_single_data_segment() may be accessing mmw under
2077 * SRCU if the user bound an ODP MR to this MW.
2078 */
2079 synchronize_srcu(&dev->odp_srcu);
2080 }
2081
2082 return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
2083 }
2084
mlx5_ib_check_mr_status(struct ib_mr * ibmr,u32 check_mask,struct ib_mr_status * mr_status)2085 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
2086 struct ib_mr_status *mr_status)
2087 {
2088 struct mlx5_ib_mr *mmr = to_mmr(ibmr);
2089 int ret = 0;
2090
2091 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
2092 pr_err("Invalid status check mask\n");
2093 ret = -EINVAL;
2094 goto done;
2095 }
2096
2097 mr_status->fail_status = 0;
2098 if (check_mask & IB_MR_CHECK_SIG_STATUS) {
2099 if (!mmr->sig) {
2100 ret = -EINVAL;
2101 pr_err("signature status check requested on a non-signature enabled MR\n");
2102 goto done;
2103 }
2104
2105 mmr->sig->sig_status_checked = true;
2106 if (!mmr->sig->sig_err_exists)
2107 goto done;
2108
2109 if (ibmr->lkey == mmr->sig->err_item.key)
2110 memcpy(&mr_status->sig_err, &mmr->sig->err_item,
2111 sizeof(mr_status->sig_err));
2112 else {
2113 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
2114 mr_status->sig_err.sig_err_offset = 0;
2115 mr_status->sig_err.key = mmr->sig->err_item.key;
2116 }
2117
2118 mmr->sig->sig_err_exists = false;
2119 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
2120 }
2121
2122 done:
2123 return ret;
2124 }
2125
2126 static int
mlx5_ib_map_pa_mr_sg_pi(struct ib_mr * ibmr,struct scatterlist * data_sg,int data_sg_nents,unsigned int * data_sg_offset,struct scatterlist * meta_sg,int meta_sg_nents,unsigned int * meta_sg_offset)2127 mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2128 int data_sg_nents, unsigned int *data_sg_offset,
2129 struct scatterlist *meta_sg, int meta_sg_nents,
2130 unsigned int *meta_sg_offset)
2131 {
2132 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2133 unsigned int sg_offset = 0;
2134 int n = 0;
2135
2136 mr->meta_length = 0;
2137 if (data_sg_nents == 1) {
2138 n++;
2139 mr->ndescs = 1;
2140 if (data_sg_offset)
2141 sg_offset = *data_sg_offset;
2142 mr->data_length = sg_dma_len(data_sg) - sg_offset;
2143 mr->data_iova = sg_dma_address(data_sg) + sg_offset;
2144 if (meta_sg_nents == 1) {
2145 n++;
2146 mr->meta_ndescs = 1;
2147 if (meta_sg_offset)
2148 sg_offset = *meta_sg_offset;
2149 else
2150 sg_offset = 0;
2151 mr->meta_length = sg_dma_len(meta_sg) - sg_offset;
2152 mr->pi_iova = sg_dma_address(meta_sg) + sg_offset;
2153 }
2154 ibmr->length = mr->data_length + mr->meta_length;
2155 }
2156
2157 return n;
2158 }
2159
2160 static int
mlx5_ib_sg_to_klms(struct mlx5_ib_mr * mr,struct scatterlist * sgl,unsigned short sg_nents,unsigned int * sg_offset_p,struct scatterlist * meta_sgl,unsigned short meta_sg_nents,unsigned int * meta_sg_offset_p)2161 mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
2162 struct scatterlist *sgl,
2163 unsigned short sg_nents,
2164 unsigned int *sg_offset_p,
2165 struct scatterlist *meta_sgl,
2166 unsigned short meta_sg_nents,
2167 unsigned int *meta_sg_offset_p)
2168 {
2169 struct scatterlist *sg = sgl;
2170 struct mlx5_klm *klms = mr->descs;
2171 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
2172 u32 lkey = mr->ibmr.pd->local_dma_lkey;
2173 int i, j = 0;
2174
2175 mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
2176 mr->ibmr.length = 0;
2177
2178 for_each_sg(sgl, sg, sg_nents, i) {
2179 if (unlikely(i >= mr->max_descs))
2180 break;
2181 klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
2182 klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
2183 klms[i].key = cpu_to_be32(lkey);
2184 mr->ibmr.length += sg_dma_len(sg) - sg_offset;
2185
2186 sg_offset = 0;
2187 }
2188
2189 if (sg_offset_p)
2190 *sg_offset_p = sg_offset;
2191
2192 mr->ndescs = i;
2193 mr->data_length = mr->ibmr.length;
2194
2195 if (meta_sg_nents) {
2196 sg = meta_sgl;
2197 sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0;
2198 for_each_sg(meta_sgl, sg, meta_sg_nents, j) {
2199 if (unlikely(i + j >= mr->max_descs))
2200 break;
2201 klms[i + j].va = cpu_to_be64(sg_dma_address(sg) +
2202 sg_offset);
2203 klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) -
2204 sg_offset);
2205 klms[i + j].key = cpu_to_be32(lkey);
2206 mr->ibmr.length += sg_dma_len(sg) - sg_offset;
2207
2208 sg_offset = 0;
2209 }
2210 if (meta_sg_offset_p)
2211 *meta_sg_offset_p = sg_offset;
2212
2213 mr->meta_ndescs = j;
2214 mr->meta_length = mr->ibmr.length - mr->data_length;
2215 }
2216
2217 return i + j;
2218 }
2219
mlx5_set_page(struct ib_mr * ibmr,u64 addr)2220 static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
2221 {
2222 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2223 __be64 *descs;
2224
2225 if (unlikely(mr->ndescs == mr->max_descs))
2226 return -ENOMEM;
2227
2228 descs = mr->descs;
2229 descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
2230
2231 return 0;
2232 }
2233
mlx5_set_page_pi(struct ib_mr * ibmr,u64 addr)2234 static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr)
2235 {
2236 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2237 __be64 *descs;
2238
2239 if (unlikely(mr->ndescs + mr->meta_ndescs == mr->max_descs))
2240 return -ENOMEM;
2241
2242 descs = mr->descs;
2243 descs[mr->ndescs + mr->meta_ndescs++] =
2244 cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
2245
2246 return 0;
2247 }
2248
2249 static int
mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr * ibmr,struct scatterlist * data_sg,int data_sg_nents,unsigned int * data_sg_offset,struct scatterlist * meta_sg,int meta_sg_nents,unsigned int * meta_sg_offset)2250 mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2251 int data_sg_nents, unsigned int *data_sg_offset,
2252 struct scatterlist *meta_sg, int meta_sg_nents,
2253 unsigned int *meta_sg_offset)
2254 {
2255 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2256 struct mlx5_ib_mr *pi_mr = mr->mtt_mr;
2257 int n;
2258
2259 pi_mr->ndescs = 0;
2260 pi_mr->meta_ndescs = 0;
2261 pi_mr->meta_length = 0;
2262
2263 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
2264 pi_mr->desc_size * pi_mr->max_descs,
2265 DMA_TO_DEVICE);
2266
2267 pi_mr->ibmr.page_size = ibmr->page_size;
2268 n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset,
2269 mlx5_set_page);
2270 if (n != data_sg_nents)
2271 return n;
2272
2273 pi_mr->data_iova = pi_mr->ibmr.iova;
2274 pi_mr->data_length = pi_mr->ibmr.length;
2275 pi_mr->ibmr.length = pi_mr->data_length;
2276 ibmr->length = pi_mr->data_length;
2277
2278 if (meta_sg_nents) {
2279 u64 page_mask = ~((u64)ibmr->page_size - 1);
2280 u64 iova = pi_mr->data_iova;
2281
2282 n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents,
2283 meta_sg_offset, mlx5_set_page_pi);
2284
2285 pi_mr->meta_length = pi_mr->ibmr.length;
2286 /*
2287 * PI address for the HW is the offset of the metadata address
2288 * relative to the first data page address.
2289 * It equals to first data page address + size of data pages +
2290 * metadata offset at the first metadata page
2291 */
2292 pi_mr->pi_iova = (iova & page_mask) +
2293 pi_mr->ndescs * ibmr->page_size +
2294 (pi_mr->ibmr.iova & ~page_mask);
2295 /*
2296 * In order to use one MTT MR for data and metadata, we register
2297 * also the gaps between the end of the data and the start of
2298 * the metadata (the sig MR will verify that the HW will access
2299 * to right addresses). This mapping is safe because we use
2300 * internal mkey for the registration.
2301 */
2302 pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova;
2303 pi_mr->ibmr.iova = iova;
2304 ibmr->length += pi_mr->meta_length;
2305 }
2306
2307 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
2308 pi_mr->desc_size * pi_mr->max_descs,
2309 DMA_TO_DEVICE);
2310
2311 return n;
2312 }
2313
2314 static int
mlx5_ib_map_klm_mr_sg_pi(struct ib_mr * ibmr,struct scatterlist * data_sg,int data_sg_nents,unsigned int * data_sg_offset,struct scatterlist * meta_sg,int meta_sg_nents,unsigned int * meta_sg_offset)2315 mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2316 int data_sg_nents, unsigned int *data_sg_offset,
2317 struct scatterlist *meta_sg, int meta_sg_nents,
2318 unsigned int *meta_sg_offset)
2319 {
2320 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2321 struct mlx5_ib_mr *pi_mr = mr->klm_mr;
2322 int n;
2323
2324 pi_mr->ndescs = 0;
2325 pi_mr->meta_ndescs = 0;
2326 pi_mr->meta_length = 0;
2327
2328 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
2329 pi_mr->desc_size * pi_mr->max_descs,
2330 DMA_TO_DEVICE);
2331
2332 n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset,
2333 meta_sg, meta_sg_nents, meta_sg_offset);
2334
2335 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
2336 pi_mr->desc_size * pi_mr->max_descs,
2337 DMA_TO_DEVICE);
2338
2339 /* This is zero-based memory region */
2340 pi_mr->data_iova = 0;
2341 pi_mr->ibmr.iova = 0;
2342 pi_mr->pi_iova = pi_mr->data_length;
2343 ibmr->length = pi_mr->ibmr.length;
2344
2345 return n;
2346 }
2347
mlx5_ib_map_mr_sg_pi(struct ib_mr * ibmr,struct scatterlist * data_sg,int data_sg_nents,unsigned int * data_sg_offset,struct scatterlist * meta_sg,int meta_sg_nents,unsigned int * meta_sg_offset)2348 int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2349 int data_sg_nents, unsigned int *data_sg_offset,
2350 struct scatterlist *meta_sg, int meta_sg_nents,
2351 unsigned int *meta_sg_offset)
2352 {
2353 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2354 struct mlx5_ib_mr *pi_mr = NULL;
2355 int n;
2356
2357 WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY);
2358
2359 mr->ndescs = 0;
2360 mr->data_length = 0;
2361 mr->data_iova = 0;
2362 mr->meta_ndescs = 0;
2363 mr->pi_iova = 0;
2364 /*
2365 * As a performance optimization, if possible, there is no need to
2366 * perform UMR operation to register the data/metadata buffers.
2367 * First try to map the sg lists to PA descriptors with local_dma_lkey.
2368 * Fallback to UMR only in case of a failure.
2369 */
2370 n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2371 data_sg_offset, meta_sg, meta_sg_nents,
2372 meta_sg_offset);
2373 if (n == data_sg_nents + meta_sg_nents)
2374 goto out;
2375 /*
2376 * As a performance optimization, if possible, there is no need to map
2377 * the sg lists to KLM descriptors. First try to map the sg lists to MTT
2378 * descriptors and fallback to KLM only in case of a failure.
2379 * It's more efficient for the HW to work with MTT descriptors
2380 * (especially in high load).
2381 * Use KLM (indirect access) only if it's mandatory.
2382 */
2383 pi_mr = mr->mtt_mr;
2384 n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2385 data_sg_offset, meta_sg, meta_sg_nents,
2386 meta_sg_offset);
2387 if (n == data_sg_nents + meta_sg_nents)
2388 goto out;
2389
2390 pi_mr = mr->klm_mr;
2391 n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2392 data_sg_offset, meta_sg, meta_sg_nents,
2393 meta_sg_offset);
2394 if (unlikely(n != data_sg_nents + meta_sg_nents))
2395 return -ENOMEM;
2396
2397 out:
2398 /* This is zero-based memory region */
2399 ibmr->iova = 0;
2400 mr->pi_mr = pi_mr;
2401 if (pi_mr)
2402 ibmr->sig_attrs->meta_length = pi_mr->meta_length;
2403 else
2404 ibmr->sig_attrs->meta_length = mr->meta_length;
2405
2406 return 0;
2407 }
2408
mlx5_ib_map_mr_sg(struct ib_mr * ibmr,struct scatterlist * sg,int sg_nents,unsigned int * sg_offset)2409 int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
2410 unsigned int *sg_offset)
2411 {
2412 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2413 int n;
2414
2415 mr->ndescs = 0;
2416
2417 ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
2418 mr->desc_size * mr->max_descs,
2419 DMA_TO_DEVICE);
2420
2421 if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
2422 n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0,
2423 NULL);
2424 else
2425 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
2426 mlx5_set_page);
2427
2428 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
2429 mr->desc_size * mr->max_descs,
2430 DMA_TO_DEVICE);
2431
2432 return n;
2433 }
2434