• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 
34 #include <linux/kref.h>
35 #include <linux/random.h>
36 #include <linux/debugfs.h>
37 #include <linux/export.h>
38 #include <linux/delay.h>
39 #include <rdma/ib_umem.h>
40 #include <rdma/ib_umem_odp.h>
41 #include <rdma/ib_verbs.h>
42 #include "mlx5_ib.h"
43 
44 enum {
45 	MAX_PENDING_REG_MR = 8,
46 };
47 
48 #define MLX5_UMR_ALIGN 2048
49 
50 static void
51 create_mkey_callback(int status, struct mlx5_async_work *context);
52 
set_mkc_access_pd_addr_fields(void * mkc,int acc,u64 start_addr,struct ib_pd * pd)53 static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
54 					  struct ib_pd *pd)
55 {
56 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
57 
58 	MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
59 	MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
60 	MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
61 	MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
62 	MLX5_SET(mkc, mkc, lr, 1);
63 
64 	if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
65 		MLX5_SET(mkc, mkc, relaxed_ordering_write,
66 			 !!(acc & IB_ACCESS_RELAXED_ORDERING));
67 	if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
68 		MLX5_SET(mkc, mkc, relaxed_ordering_read,
69 			 !!(acc & IB_ACCESS_RELAXED_ORDERING));
70 
71 	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
72 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
73 	MLX5_SET64(mkc, mkc, start_addr, start_addr);
74 }
75 
76 static void
assign_mkey_variant(struct mlx5_ib_dev * dev,struct mlx5_core_mkey * mkey,u32 * in)77 assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
78 		    u32 *in)
79 {
80 	u8 key = atomic_inc_return(&dev->mkey_var);
81 	void *mkc;
82 
83 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
84 	MLX5_SET(mkc, mkc, mkey_7_0, key);
85 	mkey->key = key;
86 }
87 
88 static int
mlx5_ib_create_mkey(struct mlx5_ib_dev * dev,struct mlx5_core_mkey * mkey,u32 * in,int inlen)89 mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
90 		    u32 *in, int inlen)
91 {
92 	assign_mkey_variant(dev, mkey, in);
93 	return mlx5_core_create_mkey(dev->mdev, mkey, in, inlen);
94 }
95 
96 static int
mlx5_ib_create_mkey_cb(struct mlx5_ib_dev * dev,struct mlx5_core_mkey * mkey,struct mlx5_async_ctx * async_ctx,u32 * in,int inlen,u32 * out,int outlen,struct mlx5_async_work * context)97 mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev,
98 		       struct mlx5_core_mkey *mkey,
99 		       struct mlx5_async_ctx *async_ctx,
100 		       u32 *in, int inlen, u32 *out, int outlen,
101 		       struct mlx5_async_work *context)
102 {
103 	MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
104 	assign_mkey_variant(dev, mkey, in);
105 	return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen,
106 				create_mkey_callback, context);
107 }
108 
109 static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
110 static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
111 static int mr_cache_max_order(struct mlx5_ib_dev *dev);
112 static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent);
113 
umr_can_use_indirect_mkey(struct mlx5_ib_dev * dev)114 static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
115 {
116 	return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
117 }
118 
destroy_mkey(struct mlx5_ib_dev * dev,struct mlx5_ib_mr * mr)119 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
120 {
121 	WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)));
122 
123 	return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
124 }
125 
mlx5_ib_pas_fits_in_mr(struct mlx5_ib_mr * mr,u64 start,u64 length)126 static inline bool mlx5_ib_pas_fits_in_mr(struct mlx5_ib_mr *mr, u64 start,
127 					  u64 length)
128 {
129 	return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
130 		length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
131 }
132 
create_mkey_callback(int status,struct mlx5_async_work * context)133 static void create_mkey_callback(int status, struct mlx5_async_work *context)
134 {
135 	struct mlx5_ib_mr *mr =
136 		container_of(context, struct mlx5_ib_mr, cb_work);
137 	struct mlx5_ib_dev *dev = mr->dev;
138 	struct mlx5_cache_ent *ent = mr->cache_ent;
139 	unsigned long flags;
140 
141 	if (status) {
142 		mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
143 		kfree(mr);
144 		spin_lock_irqsave(&ent->lock, flags);
145 		ent->pending--;
146 		WRITE_ONCE(dev->fill_delay, 1);
147 		spin_unlock_irqrestore(&ent->lock, flags);
148 		mod_timer(&dev->delay_timer, jiffies + HZ);
149 		return;
150 	}
151 
152 	mr->mmkey.type = MLX5_MKEY_MR;
153 	mr->mmkey.key |= mlx5_idx_to_mkey(
154 		MLX5_GET(create_mkey_out, mr->out, mkey_index));
155 
156 	WRITE_ONCE(dev->cache.last_add, jiffies);
157 
158 	spin_lock_irqsave(&ent->lock, flags);
159 	list_add_tail(&mr->list, &ent->head);
160 	ent->available_mrs++;
161 	ent->total_mrs++;
162 	/* If we are doing fill_to_high_water then keep going. */
163 	queue_adjust_cache_locked(ent);
164 	ent->pending--;
165 	spin_unlock_irqrestore(&ent->lock, flags);
166 }
167 
alloc_cache_mr(struct mlx5_cache_ent * ent,void * mkc)168 static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
169 {
170 	struct mlx5_ib_mr *mr;
171 
172 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
173 	if (!mr)
174 		return NULL;
175 	mr->order = ent->order;
176 	mr->cache_ent = ent;
177 	mr->dev = ent->dev;
178 
179 	set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
180 	MLX5_SET(mkc, mkc, free, 1);
181 	MLX5_SET(mkc, mkc, umr_en, 1);
182 	MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
183 	MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7);
184 
185 	MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
186 	MLX5_SET(mkc, mkc, log_page_size, ent->page);
187 	return mr;
188 }
189 
190 /* Asynchronously schedule new MRs to be populated in the cache. */
add_keys(struct mlx5_cache_ent * ent,unsigned int num)191 static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
192 {
193 	size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
194 	struct mlx5_ib_mr *mr;
195 	void *mkc;
196 	u32 *in;
197 	int err = 0;
198 	int i;
199 
200 	in = kzalloc(inlen, GFP_KERNEL);
201 	if (!in)
202 		return -ENOMEM;
203 
204 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
205 	for (i = 0; i < num; i++) {
206 		mr = alloc_cache_mr(ent, mkc);
207 		if (!mr) {
208 			err = -ENOMEM;
209 			break;
210 		}
211 		spin_lock_irq(&ent->lock);
212 		if (ent->pending >= MAX_PENDING_REG_MR) {
213 			err = -EAGAIN;
214 			spin_unlock_irq(&ent->lock);
215 			kfree(mr);
216 			break;
217 		}
218 		ent->pending++;
219 		spin_unlock_irq(&ent->lock);
220 		err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey,
221 					     &ent->dev->async_ctx, in, inlen,
222 					     mr->out, sizeof(mr->out),
223 					     &mr->cb_work);
224 		if (err) {
225 			spin_lock_irq(&ent->lock);
226 			ent->pending--;
227 			spin_unlock_irq(&ent->lock);
228 			mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err);
229 			kfree(mr);
230 			break;
231 		}
232 	}
233 
234 	kfree(in);
235 	return err;
236 }
237 
238 /* Synchronously create a MR in the cache */
create_cache_mr(struct mlx5_cache_ent * ent)239 static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent)
240 {
241 	size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
242 	struct mlx5_ib_mr *mr;
243 	void *mkc;
244 	u32 *in;
245 	int err;
246 
247 	in = kzalloc(inlen, GFP_KERNEL);
248 	if (!in)
249 		return ERR_PTR(-ENOMEM);
250 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
251 
252 	mr = alloc_cache_mr(ent, mkc);
253 	if (!mr) {
254 		err = -ENOMEM;
255 		goto free_in;
256 	}
257 
258 	err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey, in, inlen);
259 	if (err)
260 		goto free_mr;
261 
262 	mr->mmkey.type = MLX5_MKEY_MR;
263 	WRITE_ONCE(ent->dev->cache.last_add, jiffies);
264 	spin_lock_irq(&ent->lock);
265 	ent->total_mrs++;
266 	spin_unlock_irq(&ent->lock);
267 	kfree(in);
268 	return mr;
269 free_mr:
270 	kfree(mr);
271 free_in:
272 	kfree(in);
273 	return ERR_PTR(err);
274 }
275 
remove_cache_mr_locked(struct mlx5_cache_ent * ent)276 static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
277 {
278 	struct mlx5_ib_mr *mr;
279 
280 	lockdep_assert_held(&ent->lock);
281 	if (list_empty(&ent->head))
282 		return;
283 	mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
284 	list_del(&mr->list);
285 	ent->available_mrs--;
286 	ent->total_mrs--;
287 	spin_unlock_irq(&ent->lock);
288 	mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey);
289 	kfree(mr);
290 	spin_lock_irq(&ent->lock);
291 }
292 
resize_available_mrs(struct mlx5_cache_ent * ent,unsigned int target,bool limit_fill)293 static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
294 				bool limit_fill)
295 {
296 	int err;
297 
298 	lockdep_assert_held(&ent->lock);
299 
300 	while (true) {
301 		if (limit_fill)
302 			target = ent->limit * 2;
303 		if (target == ent->available_mrs + ent->pending)
304 			return 0;
305 		if (target > ent->available_mrs + ent->pending) {
306 			u32 todo = target - (ent->available_mrs + ent->pending);
307 
308 			spin_unlock_irq(&ent->lock);
309 			err = add_keys(ent, todo);
310 			if (err == -EAGAIN)
311 				usleep_range(3000, 5000);
312 			spin_lock_irq(&ent->lock);
313 			if (err) {
314 				if (err != -EAGAIN)
315 					return err;
316 			} else
317 				return 0;
318 		} else {
319 			remove_cache_mr_locked(ent);
320 		}
321 	}
322 }
323 
size_write(struct file * filp,const char __user * buf,size_t count,loff_t * pos)324 static ssize_t size_write(struct file *filp, const char __user *buf,
325 			  size_t count, loff_t *pos)
326 {
327 	struct mlx5_cache_ent *ent = filp->private_data;
328 	u32 target;
329 	int err;
330 
331 	err = kstrtou32_from_user(buf, count, 0, &target);
332 	if (err)
333 		return err;
334 
335 	/*
336 	 * Target is the new value of total_mrs the user requests, however we
337 	 * cannot free MRs that are in use. Compute the target value for
338 	 * available_mrs.
339 	 */
340 	spin_lock_irq(&ent->lock);
341 	if (target < ent->total_mrs - ent->available_mrs) {
342 		err = -EINVAL;
343 		goto err_unlock;
344 	}
345 	target = target - (ent->total_mrs - ent->available_mrs);
346 	if (target < ent->limit || target > ent->limit*2) {
347 		err = -EINVAL;
348 		goto err_unlock;
349 	}
350 	err = resize_available_mrs(ent, target, false);
351 	if (err)
352 		goto err_unlock;
353 	spin_unlock_irq(&ent->lock);
354 
355 	return count;
356 
357 err_unlock:
358 	spin_unlock_irq(&ent->lock);
359 	return err;
360 }
361 
size_read(struct file * filp,char __user * buf,size_t count,loff_t * pos)362 static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
363 			 loff_t *pos)
364 {
365 	struct mlx5_cache_ent *ent = filp->private_data;
366 	char lbuf[20];
367 	int err;
368 
369 	err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->total_mrs);
370 	if (err < 0)
371 		return err;
372 
373 	return simple_read_from_buffer(buf, count, pos, lbuf, err);
374 }
375 
376 static const struct file_operations size_fops = {
377 	.owner	= THIS_MODULE,
378 	.open	= simple_open,
379 	.write	= size_write,
380 	.read	= size_read,
381 };
382 
limit_write(struct file * filp,const char __user * buf,size_t count,loff_t * pos)383 static ssize_t limit_write(struct file *filp, const char __user *buf,
384 			   size_t count, loff_t *pos)
385 {
386 	struct mlx5_cache_ent *ent = filp->private_data;
387 	u32 var;
388 	int err;
389 
390 	err = kstrtou32_from_user(buf, count, 0, &var);
391 	if (err)
392 		return err;
393 
394 	/*
395 	 * Upon set we immediately fill the cache to high water mark implied by
396 	 * the limit.
397 	 */
398 	spin_lock_irq(&ent->lock);
399 	ent->limit = var;
400 	err = resize_available_mrs(ent, 0, true);
401 	spin_unlock_irq(&ent->lock);
402 	if (err)
403 		return err;
404 	return count;
405 }
406 
limit_read(struct file * filp,char __user * buf,size_t count,loff_t * pos)407 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
408 			  loff_t *pos)
409 {
410 	struct mlx5_cache_ent *ent = filp->private_data;
411 	char lbuf[20];
412 	int err;
413 
414 	err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
415 	if (err < 0)
416 		return err;
417 
418 	return simple_read_from_buffer(buf, count, pos, lbuf, err);
419 }
420 
421 static const struct file_operations limit_fops = {
422 	.owner	= THIS_MODULE,
423 	.open	= simple_open,
424 	.write	= limit_write,
425 	.read	= limit_read,
426 };
427 
someone_adding(struct mlx5_mr_cache * cache)428 static bool someone_adding(struct mlx5_mr_cache *cache)
429 {
430 	unsigned int i;
431 
432 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
433 		struct mlx5_cache_ent *ent = &cache->ent[i];
434 		bool ret;
435 
436 		spin_lock_irq(&ent->lock);
437 		ret = ent->available_mrs < ent->limit;
438 		spin_unlock_irq(&ent->lock);
439 		if (ret)
440 			return true;
441 	}
442 	return false;
443 }
444 
445 /*
446  * Check if the bucket is outside the high/low water mark and schedule an async
447  * update. The cache refill has hysteresis, once the low water mark is hit it is
448  * refilled up to the high mark.
449  */
queue_adjust_cache_locked(struct mlx5_cache_ent * ent)450 static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
451 {
452 	lockdep_assert_held(&ent->lock);
453 
454 	if (ent->disabled || READ_ONCE(ent->dev->fill_delay))
455 		return;
456 	if (ent->available_mrs < ent->limit) {
457 		ent->fill_to_high_water = true;
458 		queue_work(ent->dev->cache.wq, &ent->work);
459 	} else if (ent->fill_to_high_water &&
460 		   ent->available_mrs + ent->pending < 2 * ent->limit) {
461 		/*
462 		 * Once we start populating due to hitting a low water mark
463 		 * continue until we pass the high water mark.
464 		 */
465 		queue_work(ent->dev->cache.wq, &ent->work);
466 	} else if (ent->available_mrs == 2 * ent->limit) {
467 		ent->fill_to_high_water = false;
468 	} else if (ent->available_mrs > 2 * ent->limit) {
469 		/* Queue deletion of excess entries */
470 		ent->fill_to_high_water = false;
471 		if (ent->pending)
472 			queue_delayed_work(ent->dev->cache.wq, &ent->dwork,
473 					   msecs_to_jiffies(1000));
474 		else
475 			queue_work(ent->dev->cache.wq, &ent->work);
476 	}
477 }
478 
__cache_work_func(struct mlx5_cache_ent * ent)479 static void __cache_work_func(struct mlx5_cache_ent *ent)
480 {
481 	struct mlx5_ib_dev *dev = ent->dev;
482 	struct mlx5_mr_cache *cache = &dev->cache;
483 	int err;
484 
485 	spin_lock_irq(&ent->lock);
486 	if (ent->disabled)
487 		goto out;
488 
489 	if (ent->fill_to_high_water &&
490 	    ent->available_mrs + ent->pending < 2 * ent->limit &&
491 	    !READ_ONCE(dev->fill_delay)) {
492 		spin_unlock_irq(&ent->lock);
493 		err = add_keys(ent, 1);
494 		spin_lock_irq(&ent->lock);
495 		if (ent->disabled)
496 			goto out;
497 		if (err) {
498 			/*
499 			 * EAGAIN only happens if pending is positive, so we
500 			 * will be rescheduled from reg_mr_callback(). The only
501 			 * failure path here is ENOMEM.
502 			 */
503 			if (err != -EAGAIN) {
504 				mlx5_ib_warn(
505 					dev,
506 					"command failed order %d, err %d\n",
507 					ent->order, err);
508 				queue_delayed_work(cache->wq, &ent->dwork,
509 						   msecs_to_jiffies(1000));
510 			}
511 		}
512 	} else if (ent->available_mrs > 2 * ent->limit) {
513 		bool need_delay;
514 
515 		/*
516 		 * The remove_cache_mr() logic is performed as garbage
517 		 * collection task. Such task is intended to be run when no
518 		 * other active processes are running.
519 		 *
520 		 * The need_resched() will return TRUE if there are user tasks
521 		 * to be activated in near future.
522 		 *
523 		 * In such case, we don't execute remove_cache_mr() and postpone
524 		 * the garbage collection work to try to run in next cycle, in
525 		 * order to free CPU resources to other tasks.
526 		 */
527 		spin_unlock_irq(&ent->lock);
528 		need_delay = need_resched() || someone_adding(cache) ||
529 			     !time_after(jiffies,
530 					 READ_ONCE(cache->last_add) + 300 * HZ);
531 		spin_lock_irq(&ent->lock);
532 		if (ent->disabled)
533 			goto out;
534 		if (need_delay) {
535 			queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
536 			goto out;
537 		}
538 		remove_cache_mr_locked(ent);
539 		queue_adjust_cache_locked(ent);
540 	}
541 out:
542 	spin_unlock_irq(&ent->lock);
543 }
544 
delayed_cache_work_func(struct work_struct * work)545 static void delayed_cache_work_func(struct work_struct *work)
546 {
547 	struct mlx5_cache_ent *ent;
548 
549 	ent = container_of(work, struct mlx5_cache_ent, dwork.work);
550 	__cache_work_func(ent);
551 }
552 
cache_work_func(struct work_struct * work)553 static void cache_work_func(struct work_struct *work)
554 {
555 	struct mlx5_cache_ent *ent;
556 
557 	ent = container_of(work, struct mlx5_cache_ent, work);
558 	__cache_work_func(ent);
559 }
560 
561 /* Allocate a special entry from the cache */
mlx5_mr_cache_alloc(struct mlx5_ib_dev * dev,unsigned int entry,int access_flags)562 struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
563 				       unsigned int entry, int access_flags)
564 {
565 	struct mlx5_mr_cache *cache = &dev->cache;
566 	struct mlx5_cache_ent *ent;
567 	struct mlx5_ib_mr *mr;
568 
569 	if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY ||
570 		    entry >= ARRAY_SIZE(cache->ent)))
571 		return ERR_PTR(-EINVAL);
572 
573 	/* Matches access in alloc_cache_mr() */
574 	if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
575 		return ERR_PTR(-EOPNOTSUPP);
576 
577 	ent = &cache->ent[entry];
578 	spin_lock_irq(&ent->lock);
579 	if (list_empty(&ent->head)) {
580 		queue_adjust_cache_locked(ent);
581 		ent->miss++;
582 		spin_unlock_irq(&ent->lock);
583 		mr = create_cache_mr(ent);
584 		if (IS_ERR(mr))
585 			return mr;
586 	} else {
587 		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
588 		list_del(&mr->list);
589 		ent->available_mrs--;
590 		queue_adjust_cache_locked(ent);
591 		spin_unlock_irq(&ent->lock);
592 	}
593 	mr->access_flags = access_flags;
594 	return mr;
595 }
596 
597 /* Return a MR already available in the cache */
get_cache_mr(struct mlx5_cache_ent * req_ent)598 static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent)
599 {
600 	struct mlx5_ib_dev *dev = req_ent->dev;
601 	struct mlx5_ib_mr *mr = NULL;
602 	struct mlx5_cache_ent *ent = req_ent;
603 
604 	/* Try larger MR pools from the cache to satisfy the allocation */
605 	for (; ent != &dev->cache.ent[MR_CACHE_LAST_STD_ENTRY + 1]; ent++) {
606 		mlx5_ib_dbg(dev, "order %u, cache index %zu\n", ent->order,
607 			    ent - dev->cache.ent);
608 
609 		spin_lock_irq(&ent->lock);
610 		if (!list_empty(&ent->head)) {
611 			mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
612 					      list);
613 			list_del(&mr->list);
614 			ent->available_mrs--;
615 			queue_adjust_cache_locked(ent);
616 			spin_unlock_irq(&ent->lock);
617 			break;
618 		}
619 		queue_adjust_cache_locked(ent);
620 		spin_unlock_irq(&ent->lock);
621 	}
622 
623 	if (!mr)
624 		req_ent->miss++;
625 
626 	return mr;
627 }
628 
detach_mr_from_cache(struct mlx5_ib_mr * mr)629 static void detach_mr_from_cache(struct mlx5_ib_mr *mr)
630 {
631 	struct mlx5_cache_ent *ent = mr->cache_ent;
632 
633 	mr->cache_ent = NULL;
634 	spin_lock_irq(&ent->lock);
635 	ent->total_mrs--;
636 	spin_unlock_irq(&ent->lock);
637 }
638 
mlx5_mr_cache_free(struct mlx5_ib_dev * dev,struct mlx5_ib_mr * mr)639 void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
640 {
641 	struct mlx5_cache_ent *ent = mr->cache_ent;
642 
643 	if (!ent)
644 		return;
645 
646 	if (mlx5_mr_cache_invalidate(mr)) {
647 		detach_mr_from_cache(mr);
648 		destroy_mkey(dev, mr);
649 		kfree(mr);
650 		return;
651 	}
652 
653 	spin_lock_irq(&ent->lock);
654 	list_add_tail(&mr->list, &ent->head);
655 	ent->available_mrs++;
656 	queue_adjust_cache_locked(ent);
657 	spin_unlock_irq(&ent->lock);
658 }
659 
clean_keys(struct mlx5_ib_dev * dev,int c)660 static void clean_keys(struct mlx5_ib_dev *dev, int c)
661 {
662 	struct mlx5_mr_cache *cache = &dev->cache;
663 	struct mlx5_cache_ent *ent = &cache->ent[c];
664 	struct mlx5_ib_mr *tmp_mr;
665 	struct mlx5_ib_mr *mr;
666 	LIST_HEAD(del_list);
667 
668 	cancel_delayed_work(&ent->dwork);
669 	while (1) {
670 		spin_lock_irq(&ent->lock);
671 		if (list_empty(&ent->head)) {
672 			spin_unlock_irq(&ent->lock);
673 			break;
674 		}
675 		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
676 		list_move(&mr->list, &del_list);
677 		ent->available_mrs--;
678 		ent->total_mrs--;
679 		spin_unlock_irq(&ent->lock);
680 		mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
681 	}
682 
683 	list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
684 		list_del(&mr->list);
685 		kfree(mr);
686 	}
687 }
688 
mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev * dev)689 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
690 {
691 	if (!mlx5_debugfs_root || dev->is_rep)
692 		return;
693 
694 	debugfs_remove_recursive(dev->cache.root);
695 	dev->cache.root = NULL;
696 }
697 
mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev * dev)698 static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
699 {
700 	struct mlx5_mr_cache *cache = &dev->cache;
701 	struct mlx5_cache_ent *ent;
702 	struct dentry *dir;
703 	int i;
704 
705 	if (!mlx5_debugfs_root || dev->is_rep)
706 		return;
707 
708 	cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
709 
710 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
711 		ent = &cache->ent[i];
712 		sprintf(ent->name, "%d", ent->order);
713 		dir = debugfs_create_dir(ent->name, cache->root);
714 		debugfs_create_file("size", 0600, dir, ent, &size_fops);
715 		debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
716 		debugfs_create_u32("cur", 0400, dir, &ent->available_mrs);
717 		debugfs_create_u32("miss", 0600, dir, &ent->miss);
718 	}
719 }
720 
delay_time_func(struct timer_list * t)721 static void delay_time_func(struct timer_list *t)
722 {
723 	struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer);
724 
725 	WRITE_ONCE(dev->fill_delay, 0);
726 }
727 
mlx5_mr_cache_init(struct mlx5_ib_dev * dev)728 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
729 {
730 	struct mlx5_mr_cache *cache = &dev->cache;
731 	struct mlx5_cache_ent *ent;
732 	int i;
733 
734 	mutex_init(&dev->slow_path_mutex);
735 	cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
736 	if (!cache->wq) {
737 		mlx5_ib_warn(dev, "failed to create work queue\n");
738 		return -ENOMEM;
739 	}
740 
741 	mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
742 	timer_setup(&dev->delay_timer, delay_time_func, 0);
743 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
744 		ent = &cache->ent[i];
745 		INIT_LIST_HEAD(&ent->head);
746 		spin_lock_init(&ent->lock);
747 		ent->order = i + 2;
748 		ent->dev = dev;
749 		ent->limit = 0;
750 
751 		INIT_WORK(&ent->work, cache_work_func);
752 		INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
753 
754 		if (i > MR_CACHE_LAST_STD_ENTRY) {
755 			mlx5_odp_init_mr_cache_entry(ent);
756 			continue;
757 		}
758 
759 		if (ent->order > mr_cache_max_order(dev))
760 			continue;
761 
762 		ent->page = PAGE_SHIFT;
763 		ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
764 			   MLX5_IB_UMR_OCTOWORD;
765 		ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
766 		if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
767 		    !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
768 		    mlx5_ib_can_load_pas_with_umr(dev, 0))
769 			ent->limit = dev->mdev->profile->mr_cache[i].limit;
770 		else
771 			ent->limit = 0;
772 		spin_lock_irq(&ent->lock);
773 		queue_adjust_cache_locked(ent);
774 		spin_unlock_irq(&ent->lock);
775 	}
776 
777 	mlx5_mr_cache_debugfs_init(dev);
778 
779 	return 0;
780 }
781 
mlx5_mr_cache_cleanup(struct mlx5_ib_dev * dev)782 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
783 {
784 	unsigned int i;
785 
786 	if (!dev->cache.wq)
787 		return 0;
788 
789 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
790 		struct mlx5_cache_ent *ent = &dev->cache.ent[i];
791 
792 		spin_lock_irq(&ent->lock);
793 		ent->disabled = true;
794 		spin_unlock_irq(&ent->lock);
795 		cancel_work_sync(&ent->work);
796 		cancel_delayed_work_sync(&ent->dwork);
797 	}
798 
799 	mlx5_mr_cache_debugfs_cleanup(dev);
800 	mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
801 
802 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
803 		clean_keys(dev, i);
804 
805 	destroy_workqueue(dev->cache.wq);
806 	del_timer_sync(&dev->delay_timer);
807 
808 	return 0;
809 }
810 
mlx5_ib_get_dma_mr(struct ib_pd * pd,int acc)811 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
812 {
813 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
814 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
815 	struct mlx5_ib_mr *mr;
816 	void *mkc;
817 	u32 *in;
818 	int err;
819 
820 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
821 	if (!mr)
822 		return ERR_PTR(-ENOMEM);
823 
824 	in = kzalloc(inlen, GFP_KERNEL);
825 	if (!in) {
826 		err = -ENOMEM;
827 		goto err_free;
828 	}
829 
830 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
831 
832 	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
833 	MLX5_SET(mkc, mkc, length64, 1);
834 	set_mkc_access_pd_addr_fields(mkc, acc, 0, pd);
835 
836 	err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
837 	if (err)
838 		goto err_in;
839 
840 	kfree(in);
841 	mr->mmkey.type = MLX5_MKEY_MR;
842 	mr->ibmr.lkey = mr->mmkey.key;
843 	mr->ibmr.rkey = mr->mmkey.key;
844 	mr->umem = NULL;
845 
846 	return &mr->ibmr;
847 
848 err_in:
849 	kfree(in);
850 
851 err_free:
852 	kfree(mr);
853 
854 	return ERR_PTR(err);
855 }
856 
get_octo_len(u64 addr,u64 len,int page_shift)857 static int get_octo_len(u64 addr, u64 len, int page_shift)
858 {
859 	u64 page_size = 1ULL << page_shift;
860 	u64 offset;
861 	int npages;
862 
863 	offset = addr & (page_size - 1);
864 	npages = ALIGN(len + offset, page_size) >> page_shift;
865 	return (npages + 1) / 2;
866 }
867 
mr_cache_max_order(struct mlx5_ib_dev * dev)868 static int mr_cache_max_order(struct mlx5_ib_dev *dev)
869 {
870 	if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
871 		return MR_CACHE_LAST_STD_ENTRY + 2;
872 	return MLX5_MAX_UMR_SHIFT;
873 }
874 
mr_umem_get(struct mlx5_ib_dev * dev,u64 start,u64 length,int access_flags,struct ib_umem ** umem,int * npages,int * page_shift,int * ncont,int * order)875 static int mr_umem_get(struct mlx5_ib_dev *dev, u64 start, u64 length,
876 		       int access_flags, struct ib_umem **umem, int *npages,
877 		       int *page_shift, int *ncont, int *order)
878 {
879 	struct ib_umem *u;
880 
881 	*umem = NULL;
882 
883 	if (access_flags & IB_ACCESS_ON_DEMAND) {
884 		struct ib_umem_odp *odp;
885 
886 		odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
887 				      &mlx5_mn_ops);
888 		if (IS_ERR(odp)) {
889 			mlx5_ib_dbg(dev, "umem get failed (%ld)\n",
890 				    PTR_ERR(odp));
891 			return PTR_ERR(odp);
892 		}
893 
894 		u = &odp->umem;
895 
896 		*page_shift = odp->page_shift;
897 		*ncont = ib_umem_odp_num_pages(odp);
898 		*npages = *ncont << (*page_shift - PAGE_SHIFT);
899 		if (order)
900 			*order = ilog2(roundup_pow_of_two(*ncont));
901 	} else {
902 		u = ib_umem_get(&dev->ib_dev, start, length, access_flags);
903 		if (IS_ERR(u)) {
904 			mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u));
905 			return PTR_ERR(u);
906 		}
907 
908 		mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
909 				   page_shift, ncont, order);
910 	}
911 
912 	if (!*npages) {
913 		mlx5_ib_warn(dev, "avoid zero region\n");
914 		ib_umem_release(u);
915 		return -EINVAL;
916 	}
917 
918 	*umem = u;
919 
920 	mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
921 		    *npages, *ncont, *order, *page_shift);
922 
923 	return 0;
924 }
925 
mlx5_ib_umr_done(struct ib_cq * cq,struct ib_wc * wc)926 static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
927 {
928 	struct mlx5_ib_umr_context *context =
929 		container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
930 
931 	context->status = wc->status;
932 	complete(&context->done);
933 }
934 
mlx5_ib_init_umr_context(struct mlx5_ib_umr_context * context)935 static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
936 {
937 	context->cqe.done = mlx5_ib_umr_done;
938 	context->status = -1;
939 	init_completion(&context->done);
940 }
941 
mlx5_ib_post_send_wait(struct mlx5_ib_dev * dev,struct mlx5_umr_wr * umrwr)942 static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
943 				  struct mlx5_umr_wr *umrwr)
944 {
945 	struct umr_common *umrc = &dev->umrc;
946 	const struct ib_send_wr *bad;
947 	int err;
948 	struct mlx5_ib_umr_context umr_context;
949 
950 	mlx5_ib_init_umr_context(&umr_context);
951 	umrwr->wr.wr_cqe = &umr_context.cqe;
952 
953 	down(&umrc->sem);
954 	err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
955 	if (err) {
956 		mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
957 	} else {
958 		wait_for_completion(&umr_context.done);
959 		if (umr_context.status != IB_WC_SUCCESS) {
960 			mlx5_ib_warn(dev, "reg umr failed (%u)\n",
961 				     umr_context.status);
962 			err = -EFAULT;
963 		}
964 	}
965 	up(&umrc->sem);
966 	return err;
967 }
968 
mr_cache_ent_from_order(struct mlx5_ib_dev * dev,unsigned int order)969 static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
970 						      unsigned int order)
971 {
972 	struct mlx5_mr_cache *cache = &dev->cache;
973 
974 	if (order < cache->ent[0].order)
975 		return &cache->ent[0];
976 	order = order - cache->ent[0].order;
977 	if (order > MR_CACHE_LAST_STD_ENTRY)
978 		return NULL;
979 	return &cache->ent[order];
980 }
981 
982 static struct mlx5_ib_mr *
alloc_mr_from_cache(struct ib_pd * pd,struct ib_umem * umem,u64 virt_addr,u64 len,int npages,int page_shift,unsigned int order,int access_flags)983 alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr,
984 		    u64 len, int npages, int page_shift, unsigned int order,
985 		    int access_flags)
986 {
987 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
988 	struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order);
989 	struct mlx5_ib_mr *mr;
990 
991 	if (!ent)
992 		return ERR_PTR(-E2BIG);
993 
994 	/* Matches access in alloc_cache_mr() */
995 	if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
996 		return ERR_PTR(-EOPNOTSUPP);
997 
998 	mr = get_cache_mr(ent);
999 	if (!mr) {
1000 		mr = create_cache_mr(ent);
1001 		if (IS_ERR(mr))
1002 			return mr;
1003 	}
1004 
1005 	mr->ibmr.pd = pd;
1006 	mr->umem = umem;
1007 	mr->access_flags = access_flags;
1008 	mr->desc_size = sizeof(struct mlx5_mtt);
1009 	mr->mmkey.iova = virt_addr;
1010 	mr->mmkey.size = len;
1011 	mr->mmkey.pd = to_mpd(pd)->pdn;
1012 
1013 	return mr;
1014 }
1015 
1016 #define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
1017 			    MLX5_UMR_MTT_ALIGNMENT)
1018 #define MLX5_SPARE_UMR_CHUNK 0x10000
1019 
mlx5_ib_update_xlt(struct mlx5_ib_mr * mr,u64 idx,int npages,int page_shift,int flags)1020 int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
1021 		       int page_shift, int flags)
1022 {
1023 	struct mlx5_ib_dev *dev = mr->dev;
1024 	struct device *ddev = dev->ib_dev.dev.parent;
1025 	int size;
1026 	void *xlt;
1027 	dma_addr_t dma;
1028 	struct mlx5_umr_wr wr;
1029 	struct ib_sge sg;
1030 	int err = 0;
1031 	int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
1032 			       ? sizeof(struct mlx5_klm)
1033 			       : sizeof(struct mlx5_mtt);
1034 	const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
1035 	const int page_mask = page_align - 1;
1036 	size_t pages_mapped = 0;
1037 	size_t pages_to_map = 0;
1038 	size_t pages_iter = 0;
1039 	size_t size_to_map = 0;
1040 	gfp_t gfp;
1041 	bool use_emergency_page = false;
1042 
1043 	if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
1044 	    !umr_can_use_indirect_mkey(dev))
1045 		return -EPERM;
1046 
1047 	/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
1048 	 * so we need to align the offset and length accordingly
1049 	 */
1050 	if (idx & page_mask) {
1051 		npages += idx & page_mask;
1052 		idx &= ~page_mask;
1053 	}
1054 
1055 	gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL;
1056 	gfp |= __GFP_ZERO | __GFP_NOWARN;
1057 
1058 	pages_to_map = ALIGN(npages, page_align);
1059 	size = desc_size * pages_to_map;
1060 	size = min_t(int, size, MLX5_MAX_UMR_CHUNK);
1061 
1062 	xlt = (void *)__get_free_pages(gfp, get_order(size));
1063 	if (!xlt && size > MLX5_SPARE_UMR_CHUNK) {
1064 		mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n",
1065 			    size, get_order(size), MLX5_SPARE_UMR_CHUNK);
1066 
1067 		size = MLX5_SPARE_UMR_CHUNK;
1068 		xlt = (void *)__get_free_pages(gfp, get_order(size));
1069 	}
1070 
1071 	if (!xlt) {
1072 		mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
1073 		xlt = (void *)mlx5_ib_get_xlt_emergency_page();
1074 		size = PAGE_SIZE;
1075 		memset(xlt, 0, size);
1076 		use_emergency_page = true;
1077 	}
1078 	pages_iter = size / desc_size;
1079 	dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE);
1080 	if (dma_mapping_error(ddev, dma)) {
1081 		mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
1082 		err = -ENOMEM;
1083 		goto free_xlt;
1084 	}
1085 
1086 	if (mr->umem->is_odp) {
1087 		if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
1088 			struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
1089 			size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
1090 
1091 			pages_to_map = min_t(size_t, pages_to_map, max_pages);
1092 		}
1093 	}
1094 
1095 	sg.addr = dma;
1096 	sg.lkey = dev->umrc.pd->local_dma_lkey;
1097 
1098 	memset(&wr, 0, sizeof(wr));
1099 	wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
1100 	if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
1101 		wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
1102 	wr.wr.sg_list = &sg;
1103 	wr.wr.num_sge = 1;
1104 	wr.wr.opcode = MLX5_IB_WR_UMR;
1105 
1106 	wr.pd = mr->ibmr.pd;
1107 	wr.mkey = mr->mmkey.key;
1108 	wr.length = mr->mmkey.size;
1109 	wr.virt_addr = mr->mmkey.iova;
1110 	wr.access_flags = mr->access_flags;
1111 	wr.page_shift = page_shift;
1112 
1113 	for (pages_mapped = 0;
1114 	     pages_mapped < pages_to_map && !err;
1115 	     pages_mapped += pages_iter, idx += pages_iter) {
1116 		npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
1117 		size_to_map = npages * desc_size;
1118 		dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
1119 		if (mr->umem->is_odp) {
1120 			mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
1121 		} else {
1122 			__mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx,
1123 					       npages, xlt,
1124 					       MLX5_IB_MTT_PRESENT);
1125 			/* Clear padding after the pages
1126 			 * brought from the umem.
1127 			 */
1128 			memset(xlt + size_to_map, 0, size - size_to_map);
1129 		}
1130 		dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
1131 
1132 		sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
1133 
1134 		if (pages_mapped + pages_iter >= pages_to_map) {
1135 			if (flags & MLX5_IB_UPD_XLT_ENABLE)
1136 				wr.wr.send_flags |=
1137 					MLX5_IB_SEND_UMR_ENABLE_MR |
1138 					MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
1139 					MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1140 			if (flags & MLX5_IB_UPD_XLT_PD ||
1141 			    flags & MLX5_IB_UPD_XLT_ACCESS)
1142 				wr.wr.send_flags |=
1143 					MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
1144 			if (flags & MLX5_IB_UPD_XLT_ADDR)
1145 				wr.wr.send_flags |=
1146 					MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1147 		}
1148 
1149 		wr.offset = idx * desc_size;
1150 		wr.xlt_size = sg.length;
1151 
1152 		err = mlx5_ib_post_send_wait(dev, &wr);
1153 	}
1154 	dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
1155 
1156 free_xlt:
1157 	if (use_emergency_page)
1158 		mlx5_ib_put_xlt_emergency_page();
1159 	else
1160 		free_pages((unsigned long)xlt, get_order(size));
1161 
1162 	return err;
1163 }
1164 
1165 /*
1166  * If ibmr is NULL it will be allocated by reg_create.
1167  * Else, the given ibmr will be used.
1168  */
reg_create(struct ib_mr * ibmr,struct ib_pd * pd,u64 virt_addr,u64 length,struct ib_umem * umem,int npages,int page_shift,int access_flags,bool populate)1169 static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
1170 				     u64 virt_addr, u64 length,
1171 				     struct ib_umem *umem, int npages,
1172 				     int page_shift, int access_flags,
1173 				     bool populate)
1174 {
1175 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1176 	struct mlx5_ib_mr *mr;
1177 	__be64 *pas;
1178 	void *mkc;
1179 	int inlen;
1180 	u32 *in;
1181 	int err;
1182 	bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
1183 
1184 	mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
1185 	if (!mr)
1186 		return ERR_PTR(-ENOMEM);
1187 
1188 	mr->ibmr.pd = pd;
1189 	mr->access_flags = access_flags;
1190 
1191 	inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1192 	if (populate)
1193 		inlen += sizeof(*pas) * roundup(npages, 2);
1194 	in = kvzalloc(inlen, GFP_KERNEL);
1195 	if (!in) {
1196 		err = -ENOMEM;
1197 		goto err_1;
1198 	}
1199 	pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
1200 	if (populate) {
1201 		if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND)) {
1202 			err = -EINVAL;
1203 			goto err_2;
1204 		}
1205 		mlx5_ib_populate_pas(dev, umem, page_shift, pas,
1206 				     pg_cap ? MLX5_IB_MTT_PRESENT : 0);
1207 	}
1208 
1209 	/* The pg_access bit allows setting the access flags
1210 	 * in the page list submitted with the command. */
1211 	MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
1212 
1213 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1214 	set_mkc_access_pd_addr_fields(mkc, access_flags, virt_addr,
1215 				      populate ? pd : dev->umrc.pd);
1216 	MLX5_SET(mkc, mkc, free, !populate);
1217 	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
1218 	MLX5_SET(mkc, mkc, umr_en, 1);
1219 
1220 	MLX5_SET64(mkc, mkc, len, length);
1221 	MLX5_SET(mkc, mkc, bsf_octword_size, 0);
1222 	MLX5_SET(mkc, mkc, translations_octword_size,
1223 		 get_octo_len(virt_addr, length, page_shift));
1224 	MLX5_SET(mkc, mkc, log_page_size, page_shift);
1225 	if (populate) {
1226 		MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
1227 			 get_octo_len(virt_addr, length, page_shift));
1228 	}
1229 
1230 	err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
1231 	if (err) {
1232 		mlx5_ib_warn(dev, "create mkey failed\n");
1233 		goto err_2;
1234 	}
1235 	mr->mmkey.type = MLX5_MKEY_MR;
1236 	mr->desc_size = sizeof(struct mlx5_mtt);
1237 	mr->dev = dev;
1238 	kvfree(in);
1239 
1240 	mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
1241 
1242 	return mr;
1243 
1244 err_2:
1245 	kvfree(in);
1246 
1247 err_1:
1248 	if (!ibmr)
1249 		kfree(mr);
1250 
1251 	return ERR_PTR(err);
1252 }
1253 
set_mr_fields(struct mlx5_ib_dev * dev,struct mlx5_ib_mr * mr,u64 length,int access_flags)1254 static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
1255 			  u64 length, int access_flags)
1256 {
1257 	mr->ibmr.lkey = mr->mmkey.key;
1258 	mr->ibmr.rkey = mr->mmkey.key;
1259 	mr->ibmr.length = length;
1260 	mr->access_flags = access_flags;
1261 }
1262 
mlx5_ib_get_dm_mr(struct ib_pd * pd,u64 start_addr,u64 length,int acc,int mode)1263 static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
1264 				       u64 length, int acc, int mode)
1265 {
1266 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1267 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1268 	struct mlx5_ib_mr *mr;
1269 	void *mkc;
1270 	u32 *in;
1271 	int err;
1272 
1273 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1274 	if (!mr)
1275 		return ERR_PTR(-ENOMEM);
1276 
1277 	in = kzalloc(inlen, GFP_KERNEL);
1278 	if (!in) {
1279 		err = -ENOMEM;
1280 		goto err_free;
1281 	}
1282 
1283 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1284 
1285 	MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
1286 	MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7);
1287 	MLX5_SET64(mkc, mkc, len, length);
1288 	set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd);
1289 
1290 	err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
1291 	if (err)
1292 		goto err_in;
1293 
1294 	kfree(in);
1295 
1296 	set_mr_fields(dev, mr, length, acc);
1297 
1298 	return &mr->ibmr;
1299 
1300 err_in:
1301 	kfree(in);
1302 
1303 err_free:
1304 	kfree(mr);
1305 
1306 	return ERR_PTR(err);
1307 }
1308 
mlx5_ib_advise_mr(struct ib_pd * pd,enum ib_uverbs_advise_mr_advice advice,u32 flags,struct ib_sge * sg_list,u32 num_sge,struct uverbs_attr_bundle * attrs)1309 int mlx5_ib_advise_mr(struct ib_pd *pd,
1310 		      enum ib_uverbs_advise_mr_advice advice,
1311 		      u32 flags,
1312 		      struct ib_sge *sg_list,
1313 		      u32 num_sge,
1314 		      struct uverbs_attr_bundle *attrs)
1315 {
1316 	if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH &&
1317 	    advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE &&
1318 	    advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT)
1319 		return -EOPNOTSUPP;
1320 
1321 	return mlx5_ib_advise_mr_prefetch(pd, advice, flags,
1322 					 sg_list, num_sge);
1323 }
1324 
mlx5_ib_reg_dm_mr(struct ib_pd * pd,struct ib_dm * dm,struct ib_dm_mr_attr * attr,struct uverbs_attr_bundle * attrs)1325 struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
1326 				struct ib_dm_mr_attr *attr,
1327 				struct uverbs_attr_bundle *attrs)
1328 {
1329 	struct mlx5_ib_dm *mdm = to_mdm(dm);
1330 	struct mlx5_core_dev *dev = to_mdev(dm->device)->mdev;
1331 	u64 start_addr = mdm->dev_addr + attr->offset;
1332 	int mode;
1333 
1334 	switch (mdm->type) {
1335 	case MLX5_IB_UAPI_DM_TYPE_MEMIC:
1336 		if (attr->access_flags & ~MLX5_IB_DM_MEMIC_ALLOWED_ACCESS)
1337 			return ERR_PTR(-EINVAL);
1338 
1339 		mode = MLX5_MKC_ACCESS_MODE_MEMIC;
1340 		start_addr -= pci_resource_start(dev->pdev, 0);
1341 		break;
1342 	case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
1343 	case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
1344 		if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS)
1345 			return ERR_PTR(-EINVAL);
1346 
1347 		mode = MLX5_MKC_ACCESS_MODE_SW_ICM;
1348 		break;
1349 	default:
1350 		return ERR_PTR(-EINVAL);
1351 	}
1352 
1353 	return mlx5_ib_get_dm_mr(pd, start_addr, attr->length,
1354 				 attr->access_flags, mode);
1355 }
1356 
mlx5_ib_reg_user_mr(struct ib_pd * pd,u64 start,u64 length,u64 virt_addr,int access_flags,struct ib_udata * udata)1357 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1358 				  u64 virt_addr, int access_flags,
1359 				  struct ib_udata *udata)
1360 {
1361 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1362 	struct mlx5_ib_mr *mr = NULL;
1363 	bool xlt_with_umr;
1364 	struct ib_umem *umem;
1365 	int page_shift;
1366 	int npages;
1367 	int ncont;
1368 	int order;
1369 	int err;
1370 
1371 	if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
1372 		return ERR_PTR(-EOPNOTSUPP);
1373 
1374 	mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1375 		    start, virt_addr, length, access_flags);
1376 
1377 	xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, length);
1378 	/* ODP requires xlt update via umr to work. */
1379 	if (!xlt_with_umr && (access_flags & IB_ACCESS_ON_DEMAND))
1380 		return ERR_PTR(-EINVAL);
1381 
1382 	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start &&
1383 	    length == U64_MAX) {
1384 		if (virt_addr != start)
1385 			return ERR_PTR(-EINVAL);
1386 		if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
1387 		    !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
1388 			return ERR_PTR(-EINVAL);
1389 
1390 		mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
1391 		if (IS_ERR(mr))
1392 			return ERR_CAST(mr);
1393 		return &mr->ibmr;
1394 	}
1395 
1396 	err = mr_umem_get(dev, start, length, access_flags, &umem,
1397 			  &npages, &page_shift, &ncont, &order);
1398 
1399 	if (err < 0)
1400 		return ERR_PTR(err);
1401 
1402 	if (xlt_with_umr) {
1403 		mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
1404 					 page_shift, order, access_flags);
1405 		if (IS_ERR(mr))
1406 			mr = NULL;
1407 	}
1408 
1409 	if (!mr) {
1410 		mutex_lock(&dev->slow_path_mutex);
1411 		mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
1412 				page_shift, access_flags, !xlt_with_umr);
1413 		mutex_unlock(&dev->slow_path_mutex);
1414 	}
1415 
1416 	if (IS_ERR(mr)) {
1417 		err = PTR_ERR(mr);
1418 		goto error;
1419 	}
1420 
1421 	mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
1422 
1423 	mr->umem = umem;
1424 	mr->npages = npages;
1425 	atomic_add(mr->npages, &dev->mdev->priv.reg_pages);
1426 	set_mr_fields(dev, mr, length, access_flags);
1427 
1428 	if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) {
1429 		/*
1430 		 * If the MR was created with reg_create then it will be
1431 		 * configured properly but left disabled. It is safe to go ahead
1432 		 * and configure it again via UMR while enabling it.
1433 		 */
1434 		int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
1435 
1436 		err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
1437 					 update_xlt_flags);
1438 		if (err) {
1439 			dereg_mr(dev, mr);
1440 			return ERR_PTR(err);
1441 		}
1442 	}
1443 
1444 	if (is_odp_mr(mr)) {
1445 		to_ib_umem_odp(mr->umem)->private = mr;
1446 		init_waitqueue_head(&mr->q_deferred_work);
1447 		atomic_set(&mr->num_deferred_work, 0);
1448 		err = xa_err(xa_store(&dev->odp_mkeys,
1449 				      mlx5_base_mkey(mr->mmkey.key), &mr->mmkey,
1450 				      GFP_KERNEL));
1451 		if (err) {
1452 			dereg_mr(dev, mr);
1453 			return ERR_PTR(err);
1454 		}
1455 
1456 		err = mlx5_ib_init_odp_mr(mr, xlt_with_umr);
1457 		if (err) {
1458 			dereg_mr(dev, mr);
1459 			return ERR_PTR(err);
1460 		}
1461 	}
1462 
1463 	return &mr->ibmr;
1464 error:
1465 	ib_umem_release(umem);
1466 	return ERR_PTR(err);
1467 }
1468 
1469 /**
1470  * mlx5_mr_cache_invalidate - Fence all DMA on the MR
1471  * @mr: The MR to fence
1472  *
1473  * Upon return the NIC will not be doing any DMA to the pages under the MR,
1474  * and any DMA inprogress will be completed. Failure of this function
1475  * indicates the HW has failed catastrophically.
1476  */
mlx5_mr_cache_invalidate(struct mlx5_ib_mr * mr)1477 int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr)
1478 {
1479 	struct mlx5_umr_wr umrwr = {};
1480 
1481 	if (mr->dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
1482 		return 0;
1483 
1484 	umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
1485 			      MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
1486 	umrwr.wr.opcode = MLX5_IB_WR_UMR;
1487 	umrwr.pd = mr->dev->umrc.pd;
1488 	umrwr.mkey = mr->mmkey.key;
1489 	umrwr.ignore_free_state = 1;
1490 
1491 	return mlx5_ib_post_send_wait(mr->dev, &umrwr);
1492 }
1493 
rereg_umr(struct ib_pd * pd,struct mlx5_ib_mr * mr,int access_flags,int flags)1494 static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1495 		     int access_flags, int flags)
1496 {
1497 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1498 	struct mlx5_umr_wr umrwr = {};
1499 	int err;
1500 
1501 	umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
1502 
1503 	umrwr.wr.opcode = MLX5_IB_WR_UMR;
1504 	umrwr.mkey = mr->mmkey.key;
1505 
1506 	if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) {
1507 		umrwr.pd = pd;
1508 		umrwr.access_flags = access_flags;
1509 		umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
1510 	}
1511 
1512 	err = mlx5_ib_post_send_wait(dev, &umrwr);
1513 
1514 	return err;
1515 }
1516 
mlx5_ib_rereg_user_mr(struct ib_mr * ib_mr,int flags,u64 start,u64 length,u64 virt_addr,int new_access_flags,struct ib_pd * new_pd,struct ib_udata * udata)1517 int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
1518 			  u64 length, u64 virt_addr, int new_access_flags,
1519 			  struct ib_pd *new_pd, struct ib_udata *udata)
1520 {
1521 	struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
1522 	struct mlx5_ib_mr *mr = to_mmr(ib_mr);
1523 	struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
1524 	int access_flags = flags & IB_MR_REREG_ACCESS ?
1525 			    new_access_flags :
1526 			    mr->access_flags;
1527 	int page_shift = 0;
1528 	int upd_flags = 0;
1529 	int npages = 0;
1530 	int ncont = 0;
1531 	int order = 0;
1532 	u64 addr, len;
1533 	int err;
1534 
1535 	mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1536 		    start, virt_addr, length, access_flags);
1537 
1538 	if (!mr->umem)
1539 		return -EINVAL;
1540 
1541 	if (is_odp_mr(mr))
1542 		return -EOPNOTSUPP;
1543 
1544 	if (flags & IB_MR_REREG_TRANS) {
1545 		addr = virt_addr;
1546 		len = length;
1547 	} else {
1548 		addr = mr->umem->address;
1549 		len = mr->umem->length;
1550 	}
1551 
1552 	if (flags != IB_MR_REREG_PD) {
1553 		/*
1554 		 * Replace umem. This needs to be done whether or not UMR is
1555 		 * used.
1556 		 */
1557 		flags |= IB_MR_REREG_TRANS;
1558 		atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
1559 		mr->npages = 0;
1560 		ib_umem_release(mr->umem);
1561 		mr->umem = NULL;
1562 
1563 		err = mr_umem_get(dev, addr, len, access_flags, &mr->umem,
1564 				  &npages, &page_shift, &ncont, &order);
1565 		if (err)
1566 			goto err;
1567 		mr->npages = ncont;
1568 		atomic_add(mr->npages, &dev->mdev->priv.reg_pages);
1569 	}
1570 
1571 	if (!mlx5_ib_can_reconfig_with_umr(dev, mr->access_flags,
1572 					   access_flags) ||
1573 	    !mlx5_ib_can_load_pas_with_umr(dev, len) ||
1574 	    (flags & IB_MR_REREG_TRANS &&
1575 	     !mlx5_ib_pas_fits_in_mr(mr, addr, len))) {
1576 		/*
1577 		 * UMR can't be used - MKey needs to be replaced.
1578 		 */
1579 		if (mr->cache_ent)
1580 			detach_mr_from_cache(mr);
1581 		err = destroy_mkey(dev, mr);
1582 		if (err)
1583 			goto err;
1584 
1585 		mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
1586 				page_shift, access_flags, true);
1587 
1588 		if (IS_ERR(mr)) {
1589 			err = PTR_ERR(mr);
1590 			mr = to_mmr(ib_mr);
1591 			goto err;
1592 		}
1593 	} else {
1594 		/*
1595 		 * Send a UMR WQE
1596 		 */
1597 		mr->ibmr.pd = pd;
1598 		mr->access_flags = access_flags;
1599 		mr->mmkey.iova = addr;
1600 		mr->mmkey.size = len;
1601 		mr->mmkey.pd = to_mpd(pd)->pdn;
1602 
1603 		if (flags & IB_MR_REREG_TRANS) {
1604 			upd_flags = MLX5_IB_UPD_XLT_ADDR;
1605 			if (flags & IB_MR_REREG_PD)
1606 				upd_flags |= MLX5_IB_UPD_XLT_PD;
1607 			if (flags & IB_MR_REREG_ACCESS)
1608 				upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
1609 			err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
1610 						 upd_flags);
1611 		} else {
1612 			err = rereg_umr(pd, mr, access_flags, flags);
1613 		}
1614 
1615 		if (err)
1616 			goto err;
1617 	}
1618 
1619 	set_mr_fields(dev, mr, len, access_flags);
1620 
1621 	return 0;
1622 
1623 err:
1624 	ib_umem_release(mr->umem);
1625 	mr->umem = NULL;
1626 
1627 	clean_mr(dev, mr);
1628 	return err;
1629 }
1630 
1631 static int
mlx5_alloc_priv_descs(struct ib_device * device,struct mlx5_ib_mr * mr,int ndescs,int desc_size)1632 mlx5_alloc_priv_descs(struct ib_device *device,
1633 		      struct mlx5_ib_mr *mr,
1634 		      int ndescs,
1635 		      int desc_size)
1636 {
1637 	int size = ndescs * desc_size;
1638 	int add_size;
1639 	int ret;
1640 
1641 	add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
1642 
1643 	mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
1644 	if (!mr->descs_alloc)
1645 		return -ENOMEM;
1646 
1647 	mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
1648 
1649 	mr->desc_map = dma_map_single(device->dev.parent, mr->descs,
1650 				      size, DMA_TO_DEVICE);
1651 	if (dma_mapping_error(device->dev.parent, mr->desc_map)) {
1652 		ret = -ENOMEM;
1653 		goto err;
1654 	}
1655 
1656 	return 0;
1657 err:
1658 	kfree(mr->descs_alloc);
1659 
1660 	return ret;
1661 }
1662 
1663 static void
mlx5_free_priv_descs(struct mlx5_ib_mr * mr)1664 mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
1665 {
1666 	if (mr->descs) {
1667 		struct ib_device *device = mr->ibmr.device;
1668 		int size = mr->max_descs * mr->desc_size;
1669 
1670 		dma_unmap_single(device->dev.parent, mr->desc_map,
1671 				 size, DMA_TO_DEVICE);
1672 		kfree(mr->descs_alloc);
1673 		mr->descs = NULL;
1674 	}
1675 }
1676 
clean_mr(struct mlx5_ib_dev * dev,struct mlx5_ib_mr * mr)1677 static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1678 {
1679 	if (mr->sig) {
1680 		if (mlx5_core_destroy_psv(dev->mdev,
1681 					  mr->sig->psv_memory.psv_idx))
1682 			mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1683 				     mr->sig->psv_memory.psv_idx);
1684 		if (mlx5_core_destroy_psv(dev->mdev,
1685 					  mr->sig->psv_wire.psv_idx))
1686 			mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1687 				     mr->sig->psv_wire.psv_idx);
1688 		xa_erase(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key));
1689 		kfree(mr->sig);
1690 		mr->sig = NULL;
1691 	}
1692 
1693 	if (!mr->cache_ent) {
1694 		destroy_mkey(dev, mr);
1695 		mlx5_free_priv_descs(mr);
1696 	}
1697 }
1698 
dereg_mr(struct mlx5_ib_dev * dev,struct mlx5_ib_mr * mr)1699 static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1700 {
1701 	int npages = mr->npages;
1702 	struct ib_umem *umem = mr->umem;
1703 
1704 	/* Stop all DMA */
1705 	if (is_odp_mr(mr))
1706 		mlx5_ib_fence_odp_mr(mr);
1707 	else
1708 		clean_mr(dev, mr);
1709 
1710 	if (mr->cache_ent)
1711 		mlx5_mr_cache_free(dev, mr);
1712 	else
1713 		kfree(mr);
1714 
1715 	ib_umem_release(umem);
1716 	atomic_sub(npages, &dev->mdev->priv.reg_pages);
1717 
1718 }
1719 
mlx5_ib_dereg_mr(struct ib_mr * ibmr,struct ib_udata * udata)1720 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1721 {
1722 	struct mlx5_ib_mr *mmr = to_mmr(ibmr);
1723 
1724 	if (ibmr->type == IB_MR_TYPE_INTEGRITY) {
1725 		dereg_mr(to_mdev(mmr->mtt_mr->ibmr.device), mmr->mtt_mr);
1726 		dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr);
1727 	}
1728 
1729 	if (is_odp_mr(mmr) && to_ib_umem_odp(mmr->umem)->is_implicit_odp) {
1730 		mlx5_ib_free_implicit_mr(mmr);
1731 		return 0;
1732 	}
1733 
1734 	dereg_mr(to_mdev(ibmr->device), mmr);
1735 
1736 	return 0;
1737 }
1738 
mlx5_set_umr_free_mkey(struct ib_pd * pd,u32 * in,int ndescs,int access_mode,int page_shift)1739 static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs,
1740 				   int access_mode, int page_shift)
1741 {
1742 	void *mkc;
1743 
1744 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1745 
1746 	/* This is only used from the kernel, so setting the PD is OK. */
1747 	set_mkc_access_pd_addr_fields(mkc, 0, 0, pd);
1748 	MLX5_SET(mkc, mkc, free, 1);
1749 	MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1750 	MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
1751 	MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
1752 	MLX5_SET(mkc, mkc, umr_en, 1);
1753 	MLX5_SET(mkc, mkc, log_page_size, page_shift);
1754 }
1755 
_mlx5_alloc_mkey_descs(struct ib_pd * pd,struct mlx5_ib_mr * mr,int ndescs,int desc_size,int page_shift,int access_mode,u32 * in,int inlen)1756 static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1757 				  int ndescs, int desc_size, int page_shift,
1758 				  int access_mode, u32 *in, int inlen)
1759 {
1760 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1761 	int err;
1762 
1763 	mr->access_mode = access_mode;
1764 	mr->desc_size = desc_size;
1765 	mr->max_descs = ndescs;
1766 
1767 	err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size);
1768 	if (err)
1769 		return err;
1770 
1771 	mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift);
1772 
1773 	err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
1774 	if (err)
1775 		goto err_free_descs;
1776 
1777 	mr->mmkey.type = MLX5_MKEY_MR;
1778 	mr->ibmr.lkey = mr->mmkey.key;
1779 	mr->ibmr.rkey = mr->mmkey.key;
1780 
1781 	return 0;
1782 
1783 err_free_descs:
1784 	mlx5_free_priv_descs(mr);
1785 	return err;
1786 }
1787 
mlx5_ib_alloc_pi_mr(struct ib_pd * pd,u32 max_num_sg,u32 max_num_meta_sg,int desc_size,int access_mode)1788 static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
1789 				u32 max_num_sg, u32 max_num_meta_sg,
1790 				int desc_size, int access_mode)
1791 {
1792 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1793 	int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4);
1794 	int page_shift = 0;
1795 	struct mlx5_ib_mr *mr;
1796 	u32 *in;
1797 	int err;
1798 
1799 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1800 	if (!mr)
1801 		return ERR_PTR(-ENOMEM);
1802 
1803 	mr->ibmr.pd = pd;
1804 	mr->ibmr.device = pd->device;
1805 
1806 	in = kzalloc(inlen, GFP_KERNEL);
1807 	if (!in) {
1808 		err = -ENOMEM;
1809 		goto err_free;
1810 	}
1811 
1812 	if (access_mode == MLX5_MKC_ACCESS_MODE_MTT)
1813 		page_shift = PAGE_SHIFT;
1814 
1815 	err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift,
1816 				     access_mode, in, inlen);
1817 	if (err)
1818 		goto err_free_in;
1819 
1820 	mr->umem = NULL;
1821 	kfree(in);
1822 
1823 	return mr;
1824 
1825 err_free_in:
1826 	kfree(in);
1827 err_free:
1828 	kfree(mr);
1829 	return ERR_PTR(err);
1830 }
1831 
mlx5_alloc_mem_reg_descs(struct ib_pd * pd,struct mlx5_ib_mr * mr,int ndescs,u32 * in,int inlen)1832 static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1833 				    int ndescs, u32 *in, int inlen)
1834 {
1835 	return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt),
1836 				      PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in,
1837 				      inlen);
1838 }
1839 
mlx5_alloc_sg_gaps_descs(struct ib_pd * pd,struct mlx5_ib_mr * mr,int ndescs,u32 * in,int inlen)1840 static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1841 				    int ndescs, u32 *in, int inlen)
1842 {
1843 	return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm),
1844 				      0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
1845 }
1846 
mlx5_alloc_integrity_descs(struct ib_pd * pd,struct mlx5_ib_mr * mr,int max_num_sg,int max_num_meta_sg,u32 * in,int inlen)1847 static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1848 				      int max_num_sg, int max_num_meta_sg,
1849 				      u32 *in, int inlen)
1850 {
1851 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1852 	u32 psv_index[2];
1853 	void *mkc;
1854 	int err;
1855 
1856 	mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
1857 	if (!mr->sig)
1858 		return -ENOMEM;
1859 
1860 	/* create mem & wire PSVs */
1861 	err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index);
1862 	if (err)
1863 		goto err_free_sig;
1864 
1865 	mr->sig->psv_memory.psv_idx = psv_index[0];
1866 	mr->sig->psv_wire.psv_idx = psv_index[1];
1867 
1868 	mr->sig->sig_status_checked = true;
1869 	mr->sig->sig_err_exists = false;
1870 	/* Next UMR, Arm SIGERR */
1871 	++mr->sig->sigerr_count;
1872 	mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
1873 					 sizeof(struct mlx5_klm),
1874 					 MLX5_MKC_ACCESS_MODE_KLMS);
1875 	if (IS_ERR(mr->klm_mr)) {
1876 		err = PTR_ERR(mr->klm_mr);
1877 		goto err_destroy_psv;
1878 	}
1879 	mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
1880 					 sizeof(struct mlx5_mtt),
1881 					 MLX5_MKC_ACCESS_MODE_MTT);
1882 	if (IS_ERR(mr->mtt_mr)) {
1883 		err = PTR_ERR(mr->mtt_mr);
1884 		goto err_free_klm_mr;
1885 	}
1886 
1887 	/* Set bsf descriptors for mkey */
1888 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1889 	MLX5_SET(mkc, mkc, bsf_en, 1);
1890 	MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
1891 
1892 	err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0,
1893 				     MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
1894 	if (err)
1895 		goto err_free_mtt_mr;
1896 
1897 	err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key),
1898 			      mr->sig, GFP_KERNEL));
1899 	if (err)
1900 		goto err_free_descs;
1901 	return 0;
1902 
1903 err_free_descs:
1904 	destroy_mkey(dev, mr);
1905 	mlx5_free_priv_descs(mr);
1906 err_free_mtt_mr:
1907 	dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr);
1908 	mr->mtt_mr = NULL;
1909 err_free_klm_mr:
1910 	dereg_mr(to_mdev(mr->klm_mr->ibmr.device), mr->klm_mr);
1911 	mr->klm_mr = NULL;
1912 err_destroy_psv:
1913 	if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx))
1914 		mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1915 			     mr->sig->psv_memory.psv_idx);
1916 	if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx))
1917 		mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1918 			     mr->sig->psv_wire.psv_idx);
1919 err_free_sig:
1920 	kfree(mr->sig);
1921 
1922 	return err;
1923 }
1924 
__mlx5_ib_alloc_mr(struct ib_pd * pd,enum ib_mr_type mr_type,u32 max_num_sg,u32 max_num_meta_sg)1925 static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
1926 					enum ib_mr_type mr_type, u32 max_num_sg,
1927 					u32 max_num_meta_sg)
1928 {
1929 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1930 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1931 	int ndescs = ALIGN(max_num_sg, 4);
1932 	struct mlx5_ib_mr *mr;
1933 	u32 *in;
1934 	int err;
1935 
1936 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1937 	if (!mr)
1938 		return ERR_PTR(-ENOMEM);
1939 
1940 	in = kzalloc(inlen, GFP_KERNEL);
1941 	if (!in) {
1942 		err = -ENOMEM;
1943 		goto err_free;
1944 	}
1945 
1946 	mr->ibmr.device = pd->device;
1947 	mr->umem = NULL;
1948 
1949 	switch (mr_type) {
1950 	case IB_MR_TYPE_MEM_REG:
1951 		err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen);
1952 		break;
1953 	case IB_MR_TYPE_SG_GAPS:
1954 		err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen);
1955 		break;
1956 	case IB_MR_TYPE_INTEGRITY:
1957 		err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg,
1958 						 max_num_meta_sg, in, inlen);
1959 		break;
1960 	default:
1961 		mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
1962 		err = -EINVAL;
1963 	}
1964 
1965 	if (err)
1966 		goto err_free_in;
1967 
1968 	kfree(in);
1969 
1970 	return &mr->ibmr;
1971 
1972 err_free_in:
1973 	kfree(in);
1974 err_free:
1975 	kfree(mr);
1976 	return ERR_PTR(err);
1977 }
1978 
mlx5_ib_alloc_mr(struct ib_pd * pd,enum ib_mr_type mr_type,u32 max_num_sg)1979 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
1980 			       u32 max_num_sg)
1981 {
1982 	return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0);
1983 }
1984 
mlx5_ib_alloc_mr_integrity(struct ib_pd * pd,u32 max_num_sg,u32 max_num_meta_sg)1985 struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
1986 					 u32 max_num_sg, u32 max_num_meta_sg)
1987 {
1988 	return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg,
1989 				  max_num_meta_sg);
1990 }
1991 
mlx5_ib_alloc_mw(struct ib_mw * ibmw,struct ib_udata * udata)1992 int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
1993 {
1994 	struct mlx5_ib_dev *dev = to_mdev(ibmw->device);
1995 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1996 	struct mlx5_ib_mw *mw = to_mmw(ibmw);
1997 	u32 *in = NULL;
1998 	void *mkc;
1999 	int ndescs;
2000 	int err;
2001 	struct mlx5_ib_alloc_mw req = {};
2002 	struct {
2003 		__u32	comp_mask;
2004 		__u32	response_length;
2005 	} resp = {};
2006 
2007 	err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
2008 	if (err)
2009 		return err;
2010 
2011 	if (req.comp_mask || req.reserved1 || req.reserved2)
2012 		return -EOPNOTSUPP;
2013 
2014 	if (udata->inlen > sizeof(req) &&
2015 	    !ib_is_udata_cleared(udata, sizeof(req),
2016 				 udata->inlen - sizeof(req)))
2017 		return -EOPNOTSUPP;
2018 
2019 	ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
2020 
2021 	in = kzalloc(inlen, GFP_KERNEL);
2022 	if (!in) {
2023 		err = -ENOMEM;
2024 		goto free;
2025 	}
2026 
2027 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
2028 
2029 	MLX5_SET(mkc, mkc, free, 1);
2030 	MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
2031 	MLX5_SET(mkc, mkc, pd, to_mpd(ibmw->pd)->pdn);
2032 	MLX5_SET(mkc, mkc, umr_en, 1);
2033 	MLX5_SET(mkc, mkc, lr, 1);
2034 	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
2035 	MLX5_SET(mkc, mkc, en_rinval, !!((ibmw->type == IB_MW_TYPE_2)));
2036 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
2037 
2038 	err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen);
2039 	if (err)
2040 		goto free;
2041 
2042 	mw->mmkey.type = MLX5_MKEY_MW;
2043 	ibmw->rkey = mw->mmkey.key;
2044 	mw->ndescs = ndescs;
2045 
2046 	resp.response_length =
2047 		min(offsetofend(typeof(resp), response_length), udata->outlen);
2048 	if (resp.response_length) {
2049 		err = ib_copy_to_udata(udata, &resp, resp.response_length);
2050 		if (err)
2051 			goto free_mkey;
2052 	}
2053 
2054 	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
2055 		err = xa_err(xa_store(&dev->odp_mkeys,
2056 				      mlx5_base_mkey(mw->mmkey.key), &mw->mmkey,
2057 				      GFP_KERNEL));
2058 		if (err)
2059 			goto free_mkey;
2060 	}
2061 
2062 	kfree(in);
2063 	return 0;
2064 
2065 free_mkey:
2066 	mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
2067 free:
2068 	kfree(in);
2069 	return err;
2070 }
2071 
mlx5_ib_dealloc_mw(struct ib_mw * mw)2072 int mlx5_ib_dealloc_mw(struct ib_mw *mw)
2073 {
2074 	struct mlx5_ib_dev *dev = to_mdev(mw->device);
2075 	struct mlx5_ib_mw *mmw = to_mmw(mw);
2076 
2077 	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
2078 		xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key));
2079 		/*
2080 		 * pagefault_single_data_segment() may be accessing mmw under
2081 		 * SRCU if the user bound an ODP MR to this MW.
2082 		 */
2083 		synchronize_srcu(&dev->odp_srcu);
2084 	}
2085 
2086 	return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
2087 }
2088 
mlx5_ib_check_mr_status(struct ib_mr * ibmr,u32 check_mask,struct ib_mr_status * mr_status)2089 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
2090 			    struct ib_mr_status *mr_status)
2091 {
2092 	struct mlx5_ib_mr *mmr = to_mmr(ibmr);
2093 	int ret = 0;
2094 
2095 	if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
2096 		pr_err("Invalid status check mask\n");
2097 		ret = -EINVAL;
2098 		goto done;
2099 	}
2100 
2101 	mr_status->fail_status = 0;
2102 	if (check_mask & IB_MR_CHECK_SIG_STATUS) {
2103 		if (!mmr->sig) {
2104 			ret = -EINVAL;
2105 			pr_err("signature status check requested on a non-signature enabled MR\n");
2106 			goto done;
2107 		}
2108 
2109 		mmr->sig->sig_status_checked = true;
2110 		if (!mmr->sig->sig_err_exists)
2111 			goto done;
2112 
2113 		if (ibmr->lkey == mmr->sig->err_item.key)
2114 			memcpy(&mr_status->sig_err, &mmr->sig->err_item,
2115 			       sizeof(mr_status->sig_err));
2116 		else {
2117 			mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
2118 			mr_status->sig_err.sig_err_offset = 0;
2119 			mr_status->sig_err.key = mmr->sig->err_item.key;
2120 		}
2121 
2122 		mmr->sig->sig_err_exists = false;
2123 		mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
2124 	}
2125 
2126 done:
2127 	return ret;
2128 }
2129 
2130 static int
mlx5_ib_map_pa_mr_sg_pi(struct ib_mr * ibmr,struct scatterlist * data_sg,int data_sg_nents,unsigned int * data_sg_offset,struct scatterlist * meta_sg,int meta_sg_nents,unsigned int * meta_sg_offset)2131 mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2132 			int data_sg_nents, unsigned int *data_sg_offset,
2133 			struct scatterlist *meta_sg, int meta_sg_nents,
2134 			unsigned int *meta_sg_offset)
2135 {
2136 	struct mlx5_ib_mr *mr = to_mmr(ibmr);
2137 	unsigned int sg_offset = 0;
2138 	int n = 0;
2139 
2140 	mr->meta_length = 0;
2141 	if (data_sg_nents == 1) {
2142 		n++;
2143 		mr->ndescs = 1;
2144 		if (data_sg_offset)
2145 			sg_offset = *data_sg_offset;
2146 		mr->data_length = sg_dma_len(data_sg) - sg_offset;
2147 		mr->data_iova = sg_dma_address(data_sg) + sg_offset;
2148 		if (meta_sg_nents == 1) {
2149 			n++;
2150 			mr->meta_ndescs = 1;
2151 			if (meta_sg_offset)
2152 				sg_offset = *meta_sg_offset;
2153 			else
2154 				sg_offset = 0;
2155 			mr->meta_length = sg_dma_len(meta_sg) - sg_offset;
2156 			mr->pi_iova = sg_dma_address(meta_sg) + sg_offset;
2157 		}
2158 		ibmr->length = mr->data_length + mr->meta_length;
2159 	}
2160 
2161 	return n;
2162 }
2163 
2164 static int
mlx5_ib_sg_to_klms(struct mlx5_ib_mr * mr,struct scatterlist * sgl,unsigned short sg_nents,unsigned int * sg_offset_p,struct scatterlist * meta_sgl,unsigned short meta_sg_nents,unsigned int * meta_sg_offset_p)2165 mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
2166 		   struct scatterlist *sgl,
2167 		   unsigned short sg_nents,
2168 		   unsigned int *sg_offset_p,
2169 		   struct scatterlist *meta_sgl,
2170 		   unsigned short meta_sg_nents,
2171 		   unsigned int *meta_sg_offset_p)
2172 {
2173 	struct scatterlist *sg = sgl;
2174 	struct mlx5_klm *klms = mr->descs;
2175 	unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
2176 	u32 lkey = mr->ibmr.pd->local_dma_lkey;
2177 	int i, j = 0;
2178 
2179 	mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
2180 	mr->ibmr.length = 0;
2181 
2182 	for_each_sg(sgl, sg, sg_nents, i) {
2183 		if (unlikely(i >= mr->max_descs))
2184 			break;
2185 		klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
2186 		klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
2187 		klms[i].key = cpu_to_be32(lkey);
2188 		mr->ibmr.length += sg_dma_len(sg) - sg_offset;
2189 
2190 		sg_offset = 0;
2191 	}
2192 
2193 	if (sg_offset_p)
2194 		*sg_offset_p = sg_offset;
2195 
2196 	mr->ndescs = i;
2197 	mr->data_length = mr->ibmr.length;
2198 
2199 	if (meta_sg_nents) {
2200 		sg = meta_sgl;
2201 		sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0;
2202 		for_each_sg(meta_sgl, sg, meta_sg_nents, j) {
2203 			if (unlikely(i + j >= mr->max_descs))
2204 				break;
2205 			klms[i + j].va = cpu_to_be64(sg_dma_address(sg) +
2206 						     sg_offset);
2207 			klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) -
2208 							 sg_offset);
2209 			klms[i + j].key = cpu_to_be32(lkey);
2210 			mr->ibmr.length += sg_dma_len(sg) - sg_offset;
2211 
2212 			sg_offset = 0;
2213 		}
2214 		if (meta_sg_offset_p)
2215 			*meta_sg_offset_p = sg_offset;
2216 
2217 		mr->meta_ndescs = j;
2218 		mr->meta_length = mr->ibmr.length - mr->data_length;
2219 	}
2220 
2221 	return i + j;
2222 }
2223 
mlx5_set_page(struct ib_mr * ibmr,u64 addr)2224 static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
2225 {
2226 	struct mlx5_ib_mr *mr = to_mmr(ibmr);
2227 	__be64 *descs;
2228 
2229 	if (unlikely(mr->ndescs == mr->max_descs))
2230 		return -ENOMEM;
2231 
2232 	descs = mr->descs;
2233 	descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
2234 
2235 	return 0;
2236 }
2237 
mlx5_set_page_pi(struct ib_mr * ibmr,u64 addr)2238 static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr)
2239 {
2240 	struct mlx5_ib_mr *mr = to_mmr(ibmr);
2241 	__be64 *descs;
2242 
2243 	if (unlikely(mr->ndescs + mr->meta_ndescs == mr->max_descs))
2244 		return -ENOMEM;
2245 
2246 	descs = mr->descs;
2247 	descs[mr->ndescs + mr->meta_ndescs++] =
2248 		cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
2249 
2250 	return 0;
2251 }
2252 
2253 static int
mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr * ibmr,struct scatterlist * data_sg,int data_sg_nents,unsigned int * data_sg_offset,struct scatterlist * meta_sg,int meta_sg_nents,unsigned int * meta_sg_offset)2254 mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2255 			 int data_sg_nents, unsigned int *data_sg_offset,
2256 			 struct scatterlist *meta_sg, int meta_sg_nents,
2257 			 unsigned int *meta_sg_offset)
2258 {
2259 	struct mlx5_ib_mr *mr = to_mmr(ibmr);
2260 	struct mlx5_ib_mr *pi_mr = mr->mtt_mr;
2261 	int n;
2262 
2263 	pi_mr->ndescs = 0;
2264 	pi_mr->meta_ndescs = 0;
2265 	pi_mr->meta_length = 0;
2266 
2267 	ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
2268 				   pi_mr->desc_size * pi_mr->max_descs,
2269 				   DMA_TO_DEVICE);
2270 
2271 	pi_mr->ibmr.page_size = ibmr->page_size;
2272 	n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset,
2273 			   mlx5_set_page);
2274 	if (n != data_sg_nents)
2275 		return n;
2276 
2277 	pi_mr->data_iova = pi_mr->ibmr.iova;
2278 	pi_mr->data_length = pi_mr->ibmr.length;
2279 	pi_mr->ibmr.length = pi_mr->data_length;
2280 	ibmr->length = pi_mr->data_length;
2281 
2282 	if (meta_sg_nents) {
2283 		u64 page_mask = ~((u64)ibmr->page_size - 1);
2284 		u64 iova = pi_mr->data_iova;
2285 
2286 		n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents,
2287 				    meta_sg_offset, mlx5_set_page_pi);
2288 
2289 		pi_mr->meta_length = pi_mr->ibmr.length;
2290 		/*
2291 		 * PI address for the HW is the offset of the metadata address
2292 		 * relative to the first data page address.
2293 		 * It equals to first data page address + size of data pages +
2294 		 * metadata offset at the first metadata page
2295 		 */
2296 		pi_mr->pi_iova = (iova & page_mask) +
2297 				 pi_mr->ndescs * ibmr->page_size +
2298 				 (pi_mr->ibmr.iova & ~page_mask);
2299 		/*
2300 		 * In order to use one MTT MR for data and metadata, we register
2301 		 * also the gaps between the end of the data and the start of
2302 		 * the metadata (the sig MR will verify that the HW will access
2303 		 * to right addresses). This mapping is safe because we use
2304 		 * internal mkey for the registration.
2305 		 */
2306 		pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova;
2307 		pi_mr->ibmr.iova = iova;
2308 		ibmr->length += pi_mr->meta_length;
2309 	}
2310 
2311 	ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
2312 				      pi_mr->desc_size * pi_mr->max_descs,
2313 				      DMA_TO_DEVICE);
2314 
2315 	return n;
2316 }
2317 
2318 static int
mlx5_ib_map_klm_mr_sg_pi(struct ib_mr * ibmr,struct scatterlist * data_sg,int data_sg_nents,unsigned int * data_sg_offset,struct scatterlist * meta_sg,int meta_sg_nents,unsigned int * meta_sg_offset)2319 mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2320 			 int data_sg_nents, unsigned int *data_sg_offset,
2321 			 struct scatterlist *meta_sg, int meta_sg_nents,
2322 			 unsigned int *meta_sg_offset)
2323 {
2324 	struct mlx5_ib_mr *mr = to_mmr(ibmr);
2325 	struct mlx5_ib_mr *pi_mr = mr->klm_mr;
2326 	int n;
2327 
2328 	pi_mr->ndescs = 0;
2329 	pi_mr->meta_ndescs = 0;
2330 	pi_mr->meta_length = 0;
2331 
2332 	ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
2333 				   pi_mr->desc_size * pi_mr->max_descs,
2334 				   DMA_TO_DEVICE);
2335 
2336 	n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset,
2337 			       meta_sg, meta_sg_nents, meta_sg_offset);
2338 
2339 	ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
2340 				      pi_mr->desc_size * pi_mr->max_descs,
2341 				      DMA_TO_DEVICE);
2342 
2343 	/* This is zero-based memory region */
2344 	pi_mr->data_iova = 0;
2345 	pi_mr->ibmr.iova = 0;
2346 	pi_mr->pi_iova = pi_mr->data_length;
2347 	ibmr->length = pi_mr->ibmr.length;
2348 
2349 	return n;
2350 }
2351 
mlx5_ib_map_mr_sg_pi(struct ib_mr * ibmr,struct scatterlist * data_sg,int data_sg_nents,unsigned int * data_sg_offset,struct scatterlist * meta_sg,int meta_sg_nents,unsigned int * meta_sg_offset)2352 int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2353 			 int data_sg_nents, unsigned int *data_sg_offset,
2354 			 struct scatterlist *meta_sg, int meta_sg_nents,
2355 			 unsigned int *meta_sg_offset)
2356 {
2357 	struct mlx5_ib_mr *mr = to_mmr(ibmr);
2358 	struct mlx5_ib_mr *pi_mr = NULL;
2359 	int n;
2360 
2361 	WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY);
2362 
2363 	mr->ndescs = 0;
2364 	mr->data_length = 0;
2365 	mr->data_iova = 0;
2366 	mr->meta_ndescs = 0;
2367 	mr->pi_iova = 0;
2368 	/*
2369 	 * As a performance optimization, if possible, there is no need to
2370 	 * perform UMR operation to register the data/metadata buffers.
2371 	 * First try to map the sg lists to PA descriptors with local_dma_lkey.
2372 	 * Fallback to UMR only in case of a failure.
2373 	 */
2374 	n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2375 				    data_sg_offset, meta_sg, meta_sg_nents,
2376 				    meta_sg_offset);
2377 	if (n == data_sg_nents + meta_sg_nents)
2378 		goto out;
2379 	/*
2380 	 * As a performance optimization, if possible, there is no need to map
2381 	 * the sg lists to KLM descriptors. First try to map the sg lists to MTT
2382 	 * descriptors and fallback to KLM only in case of a failure.
2383 	 * It's more efficient for the HW to work with MTT descriptors
2384 	 * (especially in high load).
2385 	 * Use KLM (indirect access) only if it's mandatory.
2386 	 */
2387 	pi_mr = mr->mtt_mr;
2388 	n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2389 				     data_sg_offset, meta_sg, meta_sg_nents,
2390 				     meta_sg_offset);
2391 	if (n == data_sg_nents + meta_sg_nents)
2392 		goto out;
2393 
2394 	pi_mr = mr->klm_mr;
2395 	n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2396 				     data_sg_offset, meta_sg, meta_sg_nents,
2397 				     meta_sg_offset);
2398 	if (unlikely(n != data_sg_nents + meta_sg_nents))
2399 		return -ENOMEM;
2400 
2401 out:
2402 	/* This is zero-based memory region */
2403 	ibmr->iova = 0;
2404 	mr->pi_mr = pi_mr;
2405 	if (pi_mr)
2406 		ibmr->sig_attrs->meta_length = pi_mr->meta_length;
2407 	else
2408 		ibmr->sig_attrs->meta_length = mr->meta_length;
2409 
2410 	return 0;
2411 }
2412 
mlx5_ib_map_mr_sg(struct ib_mr * ibmr,struct scatterlist * sg,int sg_nents,unsigned int * sg_offset)2413 int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
2414 		      unsigned int *sg_offset)
2415 {
2416 	struct mlx5_ib_mr *mr = to_mmr(ibmr);
2417 	int n;
2418 
2419 	mr->ndescs = 0;
2420 
2421 	ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
2422 				   mr->desc_size * mr->max_descs,
2423 				   DMA_TO_DEVICE);
2424 
2425 	if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
2426 		n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0,
2427 				       NULL);
2428 	else
2429 		n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
2430 				mlx5_set_page);
2431 
2432 	ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
2433 				      mr->desc_size * mr->max_descs,
2434 				      DMA_TO_DEVICE);
2435 
2436 	return n;
2437 }
2438