1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3
4 #include <linux/vdpa.h>
5 #include <linux/gcd.h>
6 #include <linux/string.h>
7 #include <linux/mlx5/qp.h>
8 #include "mlx5_vdpa.h"
9
10 /* DIV_ROUND_UP where the divider is a power of 2 give by its log base 2 value */
11 #define MLX5_DIV_ROUND_UP_POW2(_n, _s) \
12 ({ \
13 u64 __s = _s; \
14 u64 _res; \
15 _res = (((_n) + (1 << (__s)) - 1) >> (__s)); \
16 _res; \
17 })
18
get_octo_len(u64 len,int page_shift)19 static int get_octo_len(u64 len, int page_shift)
20 {
21 u64 page_size = 1ULL << page_shift;
22 int npages;
23
24 npages = ALIGN(len, page_size) >> page_shift;
25 return (npages + 1) / 2;
26 }
27
mlx5_set_access_mode(void * mkc,int mode)28 static void mlx5_set_access_mode(void *mkc, int mode)
29 {
30 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
31 MLX5_SET(mkc, mkc, access_mode_4_2, mode >> 2);
32 }
33
populate_mtts(struct mlx5_vdpa_direct_mr * mr,__be64 * mtt)34 static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt)
35 {
36 struct scatterlist *sg;
37 int nsg = mr->nsg;
38 u64 dma_addr;
39 u64 dma_len;
40 int j = 0;
41 int i;
42
43 for_each_sg(mr->sg_head.sgl, sg, mr->nent, i) {
44 for (dma_addr = sg_dma_address(sg), dma_len = sg_dma_len(sg);
45 nsg && dma_len;
46 nsg--, dma_addr += BIT(mr->log_size), dma_len -= BIT(mr->log_size))
47 mtt[j++] = cpu_to_be64(dma_addr);
48 }
49 }
50
create_direct_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_direct_mr * mr)51 static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
52 {
53 int inlen;
54 void *mkc;
55 void *in;
56 int err;
57
58 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16);
59 in = kvzalloc(inlen, GFP_KERNEL);
60 if (!in)
61 return -ENOMEM;
62
63 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
64 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
65 MLX5_SET(mkc, mkc, lw, !!(mr->perm & VHOST_MAP_WO));
66 MLX5_SET(mkc, mkc, lr, !!(mr->perm & VHOST_MAP_RO));
67 mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_MTT);
68 MLX5_SET(mkc, mkc, qpn, 0xffffff);
69 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
70 MLX5_SET64(mkc, mkc, start_addr, mr->offset);
71 MLX5_SET64(mkc, mkc, len, mr->end - mr->start);
72 MLX5_SET(mkc, mkc, log_page_size, mr->log_size);
73 MLX5_SET(mkc, mkc, translations_octword_size,
74 get_octo_len(mr->end - mr->start, mr->log_size));
75 MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
76 get_octo_len(mr->end - mr->start, mr->log_size));
77 populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt));
78 err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen);
79 kvfree(in);
80 if (err) {
81 mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n");
82 return err;
83 }
84
85 return 0;
86 }
87
destroy_direct_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_direct_mr * mr)88 static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
89 {
90 mlx5_vdpa_destroy_mkey(mvdev, &mr->mr);
91 }
92
map_start(struct vhost_iotlb_map * map,struct mlx5_vdpa_direct_mr * mr)93 static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
94 {
95 return max_t(u64, map->start, mr->start);
96 }
97
map_end(struct vhost_iotlb_map * map,struct mlx5_vdpa_direct_mr * mr)98 static u64 map_end(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
99 {
100 return min_t(u64, map->last + 1, mr->end);
101 }
102
maplen(struct vhost_iotlb_map * map,struct mlx5_vdpa_direct_mr * mr)103 static u64 maplen(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
104 {
105 return map_end(map, mr) - map_start(map, mr);
106 }
107
108 #define MLX5_VDPA_INVALID_START_ADDR ((u64)-1)
109 #define MLX5_VDPA_INVALID_LEN ((u64)-1)
110
indir_start_addr(struct mlx5_vdpa_mr * mkey)111 static u64 indir_start_addr(struct mlx5_vdpa_mr *mkey)
112 {
113 struct mlx5_vdpa_direct_mr *s;
114
115 s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
116 if (!s)
117 return MLX5_VDPA_INVALID_START_ADDR;
118
119 return s->start;
120 }
121
indir_len(struct mlx5_vdpa_mr * mkey)122 static u64 indir_len(struct mlx5_vdpa_mr *mkey)
123 {
124 struct mlx5_vdpa_direct_mr *s;
125 struct mlx5_vdpa_direct_mr *e;
126
127 s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
128 if (!s)
129 return MLX5_VDPA_INVALID_LEN;
130
131 e = list_last_entry(&mkey->head, struct mlx5_vdpa_direct_mr, list);
132
133 return e->end - s->start;
134 }
135
136 #define LOG_MAX_KLM_SIZE 30
137 #define MAX_KLM_SIZE BIT(LOG_MAX_KLM_SIZE)
138
klm_bcount(u64 size)139 static u32 klm_bcount(u64 size)
140 {
141 return (u32)size;
142 }
143
fill_indir(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mkey,void * in)144 static void fill_indir(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey, void *in)
145 {
146 struct mlx5_vdpa_direct_mr *dmr;
147 struct mlx5_klm *klmarr;
148 struct mlx5_klm *klm;
149 bool first = true;
150 u64 preve;
151 int i;
152
153 klmarr = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
154 i = 0;
155 list_for_each_entry(dmr, &mkey->head, list) {
156 again:
157 klm = &klmarr[i++];
158 if (first) {
159 preve = dmr->start;
160 first = false;
161 }
162
163 if (preve == dmr->start) {
164 klm->key = cpu_to_be32(dmr->mr.key);
165 klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start));
166 preve = dmr->end;
167 } else {
168 klm->key = cpu_to_be32(mvdev->res.null_mkey);
169 klm->bcount = cpu_to_be32(klm_bcount(dmr->start - preve));
170 preve = dmr->start;
171 goto again;
172 }
173 }
174 }
175
klm_byte_size(int nklms)176 static int klm_byte_size(int nklms)
177 {
178 return 16 * ALIGN(nklms, 4);
179 }
180
create_indirect_key(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mr)181 static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
182 {
183 int inlen;
184 void *mkc;
185 void *in;
186 int err;
187 u64 start;
188 u64 len;
189
190 start = indir_start_addr(mr);
191 len = indir_len(mr);
192 if (start == MLX5_VDPA_INVALID_START_ADDR || len == MLX5_VDPA_INVALID_LEN)
193 return -EINVAL;
194
195 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + klm_byte_size(mr->num_klms);
196 in = kzalloc(inlen, GFP_KERNEL);
197 if (!in)
198 return -ENOMEM;
199
200 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
201 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
202 MLX5_SET(mkc, mkc, lw, 1);
203 MLX5_SET(mkc, mkc, lr, 1);
204 mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_KLMS);
205 MLX5_SET(mkc, mkc, qpn, 0xffffff);
206 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
207 MLX5_SET64(mkc, mkc, start_addr, start);
208 MLX5_SET64(mkc, mkc, len, len);
209 MLX5_SET(mkc, mkc, translations_octword_size, klm_byte_size(mr->num_klms) / 16);
210 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, mr->num_klms);
211 fill_indir(mvdev, mr, in);
212 err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
213 kfree(in);
214 return err;
215 }
216
destroy_indirect_key(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mkey)217 static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey)
218 {
219 mlx5_vdpa_destroy_mkey(mvdev, &mkey->mkey);
220 }
221
map_direct_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_direct_mr * mr,struct vhost_iotlb * iotlb)222 static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr,
223 struct vhost_iotlb *iotlb)
224 {
225 struct vhost_iotlb_map *map;
226 unsigned long lgcd = 0;
227 int log_entity_size;
228 unsigned long size;
229 u64 start = 0;
230 int err;
231 struct page *pg;
232 unsigned int nsg;
233 int sglen;
234 u64 pa;
235 u64 paend;
236 struct scatterlist *sg;
237 struct device *dma = mvdev->mdev->device;
238
239 for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
240 map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) {
241 size = maplen(map, mr);
242 lgcd = gcd(lgcd, size);
243 start += size;
244 }
245 log_entity_size = ilog2(lgcd);
246
247 sglen = 1 << log_entity_size;
248 nsg = MLX5_DIV_ROUND_UP_POW2(mr->end - mr->start, log_entity_size);
249
250 err = sg_alloc_table(&mr->sg_head, nsg, GFP_KERNEL);
251 if (err)
252 return err;
253
254 sg = mr->sg_head.sgl;
255 for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
256 map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) {
257 paend = map->addr + maplen(map, mr);
258 for (pa = map->addr; pa < paend; pa += sglen) {
259 pg = pfn_to_page(__phys_to_pfn(pa));
260 if (!sg) {
261 mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n",
262 map->start, map->last + 1);
263 err = -ENOMEM;
264 goto err_map;
265 }
266 sg_set_page(sg, pg, sglen, 0);
267 sg = sg_next(sg);
268 if (!sg)
269 goto done;
270 }
271 }
272 done:
273 mr->log_size = log_entity_size;
274 mr->nsg = nsg;
275 mr->nent = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
276 if (!mr->nent) {
277 err = -ENOMEM;
278 goto err_map;
279 }
280
281 err = create_direct_mr(mvdev, mr);
282 if (err)
283 goto err_direct;
284
285 return 0;
286
287 err_direct:
288 dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
289 err_map:
290 sg_free_table(&mr->sg_head);
291 return err;
292 }
293
unmap_direct_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_direct_mr * mr)294 static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
295 {
296 struct device *dma = mvdev->mdev->device;
297
298 destroy_direct_mr(mvdev, mr);
299 dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
300 sg_free_table(&mr->sg_head);
301 }
302
add_direct_chain(struct mlx5_vdpa_dev * mvdev,u64 start,u64 size,u8 perm,struct vhost_iotlb * iotlb)303 static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 perm,
304 struct vhost_iotlb *iotlb)
305 {
306 struct mlx5_vdpa_mr *mr = &mvdev->mr;
307 struct mlx5_vdpa_direct_mr *dmr;
308 struct mlx5_vdpa_direct_mr *n;
309 LIST_HEAD(tmp);
310 u64 st;
311 u64 sz;
312 int err;
313 int i = 0;
314
315 st = start;
316 while (size) {
317 sz = (u32)min_t(u64, MAX_KLM_SIZE, size);
318 dmr = kzalloc(sizeof(*dmr), GFP_KERNEL);
319 if (!dmr) {
320 err = -ENOMEM;
321 goto err_alloc;
322 }
323
324 dmr->start = st;
325 dmr->end = st + sz;
326 dmr->perm = perm;
327 err = map_direct_mr(mvdev, dmr, iotlb);
328 if (err) {
329 kfree(dmr);
330 goto err_alloc;
331 }
332
333 list_add_tail(&dmr->list, &tmp);
334 size -= sz;
335 mr->num_directs++;
336 mr->num_klms++;
337 st += sz;
338 i++;
339 }
340 list_splice_tail(&tmp, &mr->head);
341 return 0;
342
343 err_alloc:
344 list_for_each_entry_safe(dmr, n, &mr->head, list) {
345 list_del_init(&dmr->list);
346 unmap_direct_mr(mvdev, dmr);
347 kfree(dmr);
348 }
349 return err;
350 }
351
352 /* The iotlb pointer contains a list of maps. Go over the maps, possibly
353 * merging mergeable maps, and create direct memory keys that provide the
354 * device access to memory. The direct mkeys are then referred to by the
355 * indirect memory key that provides access to the enitre address space given
356 * by iotlb.
357 */
_mlx5_vdpa_create_mr(struct mlx5_vdpa_dev * mvdev,struct vhost_iotlb * iotlb)358 static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
359 {
360 struct mlx5_vdpa_mr *mr = &mvdev->mr;
361 struct mlx5_vdpa_direct_mr *dmr;
362 struct mlx5_vdpa_direct_mr *n;
363 struct vhost_iotlb_map *map;
364 u32 pperm = U16_MAX;
365 u64 last = U64_MAX;
366 u64 ps = U64_MAX;
367 u64 pe = U64_MAX;
368 u64 start = 0;
369 int err = 0;
370 int nnuls;
371
372 if (mr->initialized)
373 return 0;
374
375 INIT_LIST_HEAD(&mr->head);
376 for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
377 map = vhost_iotlb_itree_next(map, start, last)) {
378 start = map->start;
379 if (pe == map->start && pperm == map->perm) {
380 pe = map->last + 1;
381 } else {
382 if (ps != U64_MAX) {
383 if (pe < map->start) {
384 /* We have a hole in the map. Check how
385 * many null keys are required to fill it.
386 */
387 nnuls = MLX5_DIV_ROUND_UP_POW2(map->start - pe,
388 LOG_MAX_KLM_SIZE);
389 mr->num_klms += nnuls;
390 }
391 err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb);
392 if (err)
393 goto err_chain;
394 }
395 ps = map->start;
396 pe = map->last + 1;
397 pperm = map->perm;
398 }
399 }
400 err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb);
401 if (err)
402 goto err_chain;
403
404 /* Create the memory key that defines the guests's address space. This
405 * memory key refers to the direct keys that contain the MTT
406 * translations
407 */
408 err = create_indirect_key(mvdev, mr);
409 if (err)
410 goto err_chain;
411
412 mr->initialized = true;
413 return 0;
414
415 err_chain:
416 list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
417 list_del_init(&dmr->list);
418 unmap_direct_mr(mvdev, dmr);
419 kfree(dmr);
420 }
421 return err;
422 }
423
mlx5_vdpa_create_mr(struct mlx5_vdpa_dev * mvdev,struct vhost_iotlb * iotlb)424 int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
425 {
426 struct mlx5_vdpa_mr *mr = &mvdev->mr;
427 int err;
428
429 mutex_lock(&mr->mkey_mtx);
430 err = _mlx5_vdpa_create_mr(mvdev, iotlb);
431 mutex_unlock(&mr->mkey_mtx);
432 return err;
433 }
434
mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev * mvdev)435 void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
436 {
437 struct mlx5_vdpa_mr *mr = &mvdev->mr;
438 struct mlx5_vdpa_direct_mr *dmr;
439 struct mlx5_vdpa_direct_mr *n;
440
441 mutex_lock(&mr->mkey_mtx);
442 if (!mr->initialized)
443 goto out;
444
445 destroy_indirect_key(mvdev, mr);
446 list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
447 list_del_init(&dmr->list);
448 unmap_direct_mr(mvdev, dmr);
449 kfree(dmr);
450 }
451 memset(mr, 0, sizeof(*mr));
452 mr->initialized = false;
453 out:
454 mutex_unlock(&mr->mkey_mtx);
455 }
456
mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev * mvdev,struct vhost_iotlb * iotlb,bool * change_map)457 int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
458 bool *change_map)
459 {
460 struct mlx5_vdpa_mr *mr = &mvdev->mr;
461 int err = 0;
462
463 *change_map = false;
464 mutex_lock(&mr->mkey_mtx);
465 if (mr->initialized) {
466 mlx5_vdpa_info(mvdev, "memory map update\n");
467 *change_map = true;
468 }
469 if (!*change_map)
470 err = _mlx5_vdpa_create_mr(mvdev, iotlb);
471 mutex_unlock(&mr->mkey_mtx);
472
473 return err;
474 }
475