1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3
4 #include <linux/smp.h>
5 #include "dr_types.h"
6
7 #define QUEUE_SIZE 128
8 #define SIGNAL_PER_DIV_QUEUE 16
9 #define TH_NUMS_TO_DRAIN 2
10
11 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
12
13 struct dr_data_seg {
14 u64 addr;
15 u32 length;
16 u32 lkey;
17 unsigned int send_flags;
18 };
19
20 struct postsend_info {
21 struct dr_data_seg write;
22 struct dr_data_seg read;
23 u64 remote_addr;
24 u32 rkey;
25 };
26
27 struct dr_qp_rtr_attr {
28 struct mlx5dr_cmd_gid_attr dgid_attr;
29 enum ib_mtu mtu;
30 u32 qp_num;
31 u16 port_num;
32 u8 min_rnr_timer;
33 u8 sgid_index;
34 u16 udp_src_port;
35 };
36
37 struct dr_qp_rts_attr {
38 u8 timeout;
39 u8 retry_cnt;
40 u8 rnr_retry;
41 };
42
43 struct dr_qp_init_attr {
44 u32 cqn;
45 u32 pdn;
46 u32 max_send_wr;
47 struct mlx5_uars_page *uar;
48 };
49
dr_parse_cqe(struct mlx5dr_cq * dr_cq,struct mlx5_cqe64 * cqe64)50 static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64)
51 {
52 unsigned int idx;
53 u8 opcode;
54
55 opcode = get_cqe_opcode(cqe64);
56 if (opcode == MLX5_CQE_REQ_ERR) {
57 idx = be16_to_cpu(cqe64->wqe_counter) &
58 (dr_cq->qp->sq.wqe_cnt - 1);
59 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
60 } else if (opcode == MLX5_CQE_RESP_ERR) {
61 ++dr_cq->qp->sq.cc;
62 } else {
63 idx = be16_to_cpu(cqe64->wqe_counter) &
64 (dr_cq->qp->sq.wqe_cnt - 1);
65 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
66
67 return CQ_OK;
68 }
69
70 return CQ_POLL_ERR;
71 }
72
dr_cq_poll_one(struct mlx5dr_cq * dr_cq)73 static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq)
74 {
75 struct mlx5_cqe64 *cqe64;
76 int err;
77
78 cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq);
79 if (!cqe64)
80 return CQ_EMPTY;
81
82 mlx5_cqwq_pop(&dr_cq->wq);
83 err = dr_parse_cqe(dr_cq, cqe64);
84 mlx5_cqwq_update_db_record(&dr_cq->wq);
85
86 return err;
87 }
88
dr_poll_cq(struct mlx5dr_cq * dr_cq,int ne)89 static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
90 {
91 int npolled;
92 int err = 0;
93
94 for (npolled = 0; npolled < ne; ++npolled) {
95 err = dr_cq_poll_one(dr_cq);
96 if (err != CQ_OK)
97 break;
98 }
99
100 return err == CQ_POLL_ERR ? err : npolled;
101 }
102
dr_create_rc_qp(struct mlx5_core_dev * mdev,struct dr_qp_init_attr * attr)103 static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
104 struct dr_qp_init_attr *attr)
105 {
106 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
107 u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
108 struct mlx5_wq_param wqp;
109 struct mlx5dr_qp *dr_qp;
110 int inlen;
111 void *qpc;
112 void *in;
113 int err;
114
115 dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL);
116 if (!dr_qp)
117 return NULL;
118
119 wqp.buf_numa_node = mdev->priv.numa_node;
120 wqp.db_numa_node = mdev->priv.numa_node;
121
122 dr_qp->rq.pc = 0;
123 dr_qp->rq.cc = 0;
124 dr_qp->rq.wqe_cnt = 4;
125 dr_qp->sq.pc = 0;
126 dr_qp->sq.cc = 0;
127 dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr);
128
129 MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
130 MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
131 MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
132 err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq,
133 &dr_qp->wq_ctrl);
134 if (err) {
135 mlx5_core_warn(mdev, "Can't create QP WQ\n");
136 goto err_wq;
137 }
138
139 dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt,
140 sizeof(dr_qp->sq.wqe_head[0]),
141 GFP_KERNEL);
142
143 if (!dr_qp->sq.wqe_head) {
144 mlx5_core_warn(mdev, "Can't allocate wqe head\n");
145 goto err_wqe_head;
146 }
147
148 inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
149 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
150 dr_qp->wq_ctrl.buf.npages;
151 in = kvzalloc(inlen, GFP_KERNEL);
152 if (!in) {
153 err = -ENOMEM;
154 goto err_in;
155 }
156
157 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
158 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
159 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
160 MLX5_SET(qpc, qpc, pd, attr->pdn);
161 MLX5_SET(qpc, qpc, uar_page, attr->uar->index);
162 MLX5_SET(qpc, qpc, log_page_size,
163 dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
164 MLX5_SET(qpc, qpc, fre, 1);
165 MLX5_SET(qpc, qpc, rlky, 1);
166 MLX5_SET(qpc, qpc, cqn_snd, attr->cqn);
167 MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn);
168 MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
169 MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
170 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
171 MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
172 MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma);
173 if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
174 MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
175 mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf,
176 (__be64 *)MLX5_ADDR_OF(create_qp_in,
177 in, pas));
178
179 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
180 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
181 dr_qp->qpn = MLX5_GET(create_qp_out, out, qpn);
182 kvfree(in);
183 if (err)
184 goto err_in;
185 dr_qp->uar = attr->uar;
186
187 return dr_qp;
188
189 err_in:
190 kfree(dr_qp->sq.wqe_head);
191 err_wqe_head:
192 mlx5_wq_destroy(&dr_qp->wq_ctrl);
193 err_wq:
194 kfree(dr_qp);
195 return NULL;
196 }
197
dr_destroy_qp(struct mlx5_core_dev * mdev,struct mlx5dr_qp * dr_qp)198 static void dr_destroy_qp(struct mlx5_core_dev *mdev,
199 struct mlx5dr_qp *dr_qp)
200 {
201 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
202
203 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
204 MLX5_SET(destroy_qp_in, in, qpn, dr_qp->qpn);
205 mlx5_cmd_exec_in(mdev, destroy_qp, in);
206
207 kfree(dr_qp->sq.wqe_head);
208 mlx5_wq_destroy(&dr_qp->wq_ctrl);
209 kfree(dr_qp);
210 }
211
dr_cmd_notify_hw(struct mlx5dr_qp * dr_qp,void * ctrl)212 static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
213 {
214 dma_wmb();
215 *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xfffff);
216
217 /* After wmb() the hw aware of new work */
218 wmb();
219
220 mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET);
221 }
222
dr_rdma_segments(struct mlx5dr_qp * dr_qp,u64 remote_addr,u32 rkey,struct dr_data_seg * data_seg,u32 opcode,int nreq)223 static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
224 u32 rkey, struct dr_data_seg *data_seg,
225 u32 opcode, int nreq)
226 {
227 struct mlx5_wqe_raddr_seg *wq_raddr;
228 struct mlx5_wqe_ctrl_seg *wq_ctrl;
229 struct mlx5_wqe_data_seg *wq_dseg;
230 unsigned int size;
231 unsigned int idx;
232
233 size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 +
234 sizeof(*wq_raddr) / 16;
235
236 idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
237
238 wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
239 wq_ctrl->imm = 0;
240 wq_ctrl->fm_ce_se = (data_seg->send_flags) ?
241 MLX5_WQE_CTRL_CQ_UPDATE : 0;
242 wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) |
243 opcode);
244 wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8);
245 wq_raddr = (void *)(wq_ctrl + 1);
246 wq_raddr->raddr = cpu_to_be64(remote_addr);
247 wq_raddr->rkey = cpu_to_be32(rkey);
248 wq_raddr->reserved = 0;
249
250 wq_dseg = (void *)(wq_raddr + 1);
251 wq_dseg->byte_count = cpu_to_be32(data_seg->length);
252 wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
253 wq_dseg->addr = cpu_to_be64(data_seg->addr);
254
255 dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++;
256
257 if (nreq)
258 dr_cmd_notify_hw(dr_qp, wq_ctrl);
259 }
260
dr_post_send(struct mlx5dr_qp * dr_qp,struct postsend_info * send_info)261 static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info)
262 {
263 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
264 &send_info->write, MLX5_OPCODE_RDMA_WRITE, 0);
265 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
266 &send_info->read, MLX5_OPCODE_RDMA_READ, 1);
267 }
268
269 /**
270 * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent
271 * with send_list parameters:
272 *
273 * @ste: The data that attached to this specific ste
274 * @size: of data to write
275 * @offset: of the data from start of the hw_ste entry
276 * @data: data
277 * @ste_info: ste to be sent with send_list
278 * @send_list: to append into it
279 * @copy_data: if true indicates that the data should be kept because
280 * it's not backuped any where (like in re-hash).
281 * if false, it lets the data to be updated after
282 * it was added to the list.
283 */
mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste * ste,u16 size,u16 offset,u8 * data,struct mlx5dr_ste_send_info * ste_info,struct list_head * send_list,bool copy_data)284 void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size,
285 u16 offset, u8 *data,
286 struct mlx5dr_ste_send_info *ste_info,
287 struct list_head *send_list,
288 bool copy_data)
289 {
290 ste_info->size = size;
291 ste_info->ste = ste;
292 ste_info->offset = offset;
293
294 if (copy_data) {
295 memcpy(ste_info->data_cont, data, size);
296 ste_info->data = ste_info->data_cont;
297 } else {
298 ste_info->data = data;
299 }
300
301 list_add_tail(&ste_info->send_list, send_list);
302 }
303
304 /* The function tries to consume one wc each time, unless the queue is full, in
305 * that case, which means that the hw is behind the sw in a full queue len
306 * the function will drain the cq till it empty.
307 */
dr_handle_pending_wc(struct mlx5dr_domain * dmn,struct mlx5dr_send_ring * send_ring)308 static int dr_handle_pending_wc(struct mlx5dr_domain *dmn,
309 struct mlx5dr_send_ring *send_ring)
310 {
311 bool is_drain = false;
312 int ne;
313
314 if (send_ring->pending_wqe < send_ring->signal_th)
315 return 0;
316
317 /* Queue is full start drain it */
318 if (send_ring->pending_wqe >=
319 dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN)
320 is_drain = true;
321
322 do {
323 ne = dr_poll_cq(send_ring->cq, 1);
324 if (ne < 0)
325 return ne;
326 else if (ne == 1)
327 send_ring->pending_wqe -= send_ring->signal_th;
328 } while (is_drain && send_ring->pending_wqe);
329
330 return 0;
331 }
332
dr_fill_data_segs(struct mlx5dr_send_ring * send_ring,struct postsend_info * send_info)333 static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring,
334 struct postsend_info *send_info)
335 {
336 send_ring->pending_wqe++;
337
338 if (send_ring->pending_wqe % send_ring->signal_th == 0)
339 send_info->write.send_flags |= IB_SEND_SIGNALED;
340
341 send_ring->pending_wqe++;
342 send_info->read.length = send_info->write.length;
343 /* Read into the same write area */
344 send_info->read.addr = (uintptr_t)send_info->write.addr;
345 send_info->read.lkey = send_ring->mr->mkey.key;
346
347 if (send_ring->pending_wqe % send_ring->signal_th == 0)
348 send_info->read.send_flags = IB_SEND_SIGNALED;
349 else
350 send_info->read.send_flags = 0;
351 }
352
dr_postsend_icm_data(struct mlx5dr_domain * dmn,struct postsend_info * send_info)353 static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
354 struct postsend_info *send_info)
355 {
356 struct mlx5dr_send_ring *send_ring = dmn->send_ring;
357 u32 buff_offset;
358 int ret;
359
360 spin_lock(&send_ring->lock);
361
362 ret = dr_handle_pending_wc(dmn, send_ring);
363 if (ret)
364 goto out_unlock;
365
366 if (send_info->write.length > dmn->info.max_inline_size) {
367 buff_offset = (send_ring->tx_head &
368 (dmn->send_ring->signal_th - 1)) *
369 send_ring->max_post_send_size;
370 /* Copy to ring mr */
371 memcpy(send_ring->buf + buff_offset,
372 (void *)(uintptr_t)send_info->write.addr,
373 send_info->write.length);
374 send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
375 send_info->write.lkey = send_ring->mr->mkey.key;
376 }
377
378 send_ring->tx_head++;
379 dr_fill_data_segs(send_ring, send_info);
380 dr_post_send(send_ring->qp, send_info);
381
382 out_unlock:
383 spin_unlock(&send_ring->lock);
384 return ret;
385 }
386
dr_get_tbl_copy_details(struct mlx5dr_domain * dmn,struct mlx5dr_ste_htbl * htbl,u8 ** data,u32 * byte_size,int * iterations,int * num_stes)387 static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
388 struct mlx5dr_ste_htbl *htbl,
389 u8 **data,
390 u32 *byte_size,
391 int *iterations,
392 int *num_stes)
393 {
394 int alloc_size;
395
396 if (htbl->chunk->byte_size > dmn->send_ring->max_post_send_size) {
397 *iterations = htbl->chunk->byte_size /
398 dmn->send_ring->max_post_send_size;
399 *byte_size = dmn->send_ring->max_post_send_size;
400 alloc_size = *byte_size;
401 *num_stes = *byte_size / DR_STE_SIZE;
402 } else {
403 *iterations = 1;
404 *num_stes = htbl->chunk->num_of_entries;
405 alloc_size = *num_stes * DR_STE_SIZE;
406 }
407
408 *data = kzalloc(alloc_size, GFP_KERNEL);
409 if (!*data)
410 return -ENOMEM;
411
412 return 0;
413 }
414
415 /**
416 * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm.
417 *
418 * @dmn: Domain
419 * @ste: The ste struct that contains the data (at
420 * least part of it)
421 * @data: The real data to send size data
422 * @size: for writing.
423 * @offset: The offset from the icm mapped data to
424 * start write to this for write only part of the
425 * buffer.
426 *
427 * Return: 0 on success.
428 */
mlx5dr_send_postsend_ste(struct mlx5dr_domain * dmn,struct mlx5dr_ste * ste,u8 * data,u16 size,u16 offset)429 int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste,
430 u8 *data, u16 size, u16 offset)
431 {
432 struct postsend_info send_info = {};
433
434 send_info.write.addr = (uintptr_t)data;
435 send_info.write.length = size;
436 send_info.write.lkey = 0;
437 send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset;
438 send_info.rkey = ste->htbl->chunk->rkey;
439
440 return dr_postsend_icm_data(dmn, &send_info);
441 }
442
mlx5dr_send_postsend_htbl(struct mlx5dr_domain * dmn,struct mlx5dr_ste_htbl * htbl,u8 * formatted_ste,u8 * mask)443 int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
444 struct mlx5dr_ste_htbl *htbl,
445 u8 *formatted_ste, u8 *mask)
446 {
447 u32 byte_size = htbl->chunk->byte_size;
448 int num_stes_per_iter;
449 int iterations;
450 u8 *data;
451 int ret;
452 int i;
453 int j;
454
455 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
456 &iterations, &num_stes_per_iter);
457 if (ret)
458 return ret;
459
460 /* Send the data iteration times */
461 for (i = 0; i < iterations; i++) {
462 u32 ste_index = i * (byte_size / DR_STE_SIZE);
463 struct postsend_info send_info = {};
464
465 /* Copy all ste's on the data buffer
466 * need to add the bit_mask
467 */
468 for (j = 0; j < num_stes_per_iter; j++) {
469 struct mlx5dr_ste *ste = &htbl->ste_arr[ste_index + j];
470 u32 ste_off = j * DR_STE_SIZE;
471
472 if (mlx5dr_ste_is_not_used(ste)) {
473 memcpy(data + ste_off,
474 formatted_ste, DR_STE_SIZE);
475 } else {
476 /* Copy data */
477 memcpy(data + ste_off,
478 htbl->ste_arr[ste_index + j].hw_ste,
479 DR_STE_SIZE_REDUCED);
480 /* Copy bit_mask */
481 memcpy(data + ste_off + DR_STE_SIZE_REDUCED,
482 mask, DR_STE_SIZE_MASK);
483 }
484 }
485
486 send_info.write.addr = (uintptr_t)data;
487 send_info.write.length = byte_size;
488 send_info.write.lkey = 0;
489 send_info.remote_addr =
490 mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
491 send_info.rkey = htbl->chunk->rkey;
492
493 ret = dr_postsend_icm_data(dmn, &send_info);
494 if (ret)
495 goto out_free;
496 }
497
498 out_free:
499 kfree(data);
500 return ret;
501 }
502
503 /* Initialize htble with default STEs */
mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain * dmn,struct mlx5dr_ste_htbl * htbl,u8 * ste_init_data,bool update_hw_ste)504 int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
505 struct mlx5dr_ste_htbl *htbl,
506 u8 *ste_init_data,
507 bool update_hw_ste)
508 {
509 u32 byte_size = htbl->chunk->byte_size;
510 int iterations;
511 int num_stes;
512 u8 *data;
513 int ret;
514 int i;
515
516 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
517 &iterations, &num_stes);
518 if (ret)
519 return ret;
520
521 for (i = 0; i < num_stes; i++) {
522 u8 *copy_dst;
523
524 /* Copy the same ste on the data buffer */
525 copy_dst = data + i * DR_STE_SIZE;
526 memcpy(copy_dst, ste_init_data, DR_STE_SIZE);
527
528 if (update_hw_ste) {
529 /* Copy the reduced ste to hash table ste_arr */
530 copy_dst = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
531 memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED);
532 }
533 }
534
535 /* Send the data iteration times */
536 for (i = 0; i < iterations; i++) {
537 u8 ste_index = i * (byte_size / DR_STE_SIZE);
538 struct postsend_info send_info = {};
539
540 send_info.write.addr = (uintptr_t)data;
541 send_info.write.length = byte_size;
542 send_info.write.lkey = 0;
543 send_info.remote_addr =
544 mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
545 send_info.rkey = htbl->chunk->rkey;
546
547 ret = dr_postsend_icm_data(dmn, &send_info);
548 if (ret)
549 goto out_free;
550 }
551
552 out_free:
553 kfree(data);
554 return ret;
555 }
556
mlx5dr_send_postsend_action(struct mlx5dr_domain * dmn,struct mlx5dr_action * action)557 int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
558 struct mlx5dr_action *action)
559 {
560 struct postsend_info send_info = {};
561 int ret;
562
563 send_info.write.addr = (uintptr_t)action->rewrite.data;
564 send_info.write.length = action->rewrite.num_of_actions *
565 DR_MODIFY_ACTION_SIZE;
566 send_info.write.lkey = 0;
567 send_info.remote_addr = action->rewrite.chunk->mr_addr;
568 send_info.rkey = action->rewrite.chunk->rkey;
569
570 ret = dr_postsend_icm_data(dmn, &send_info);
571
572 return ret;
573 }
574
dr_modify_qp_rst2init(struct mlx5_core_dev * mdev,struct mlx5dr_qp * dr_qp,int port)575 static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
576 struct mlx5dr_qp *dr_qp,
577 int port)
578 {
579 u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
580 void *qpc;
581
582 qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
583
584 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port);
585 MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED);
586 MLX5_SET(qpc, qpc, rre, 1);
587 MLX5_SET(qpc, qpc, rwe, 1);
588
589 MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP);
590 MLX5_SET(rst2init_qp_in, in, qpn, dr_qp->qpn);
591
592 return mlx5_cmd_exec_in(mdev, rst2init_qp, in);
593 }
594
dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev * mdev,struct mlx5dr_qp * dr_qp,struct dr_qp_rts_attr * attr)595 static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
596 struct mlx5dr_qp *dr_qp,
597 struct dr_qp_rts_attr *attr)
598 {
599 u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
600 void *qpc;
601
602 qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc);
603
604 MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
605
606 MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
607 MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
608 MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */
609
610 MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
611 MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
612
613 return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in);
614 }
615
dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev * mdev,struct mlx5dr_qp * dr_qp,struct dr_qp_rtr_attr * attr)616 static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
617 struct mlx5dr_qp *dr_qp,
618 struct dr_qp_rtr_attr *attr)
619 {
620 u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
621 void *qpc;
622
623 qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc);
624
625 MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
626
627 MLX5_SET(qpc, qpc, mtu, attr->mtu);
628 MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1);
629 MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num);
630 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
631 attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac));
632 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
633 attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid));
634 MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
635 attr->sgid_index);
636
637 if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2)
638 MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
639 attr->udp_src_port);
640
641 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num);
642 MLX5_SET(qpc, qpc, min_rnr_nak, 1);
643
644 MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
645 MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
646
647 return mlx5_cmd_exec_in(mdev, init2rtr_qp, in);
648 }
649
dr_prepare_qp_to_rts(struct mlx5dr_domain * dmn)650 static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
651 {
652 struct mlx5dr_qp *dr_qp = dmn->send_ring->qp;
653 struct dr_qp_rts_attr rts_attr = {};
654 struct dr_qp_rtr_attr rtr_attr = {};
655 enum ib_mtu mtu = IB_MTU_1024;
656 u16 gid_index = 0;
657 int port = 1;
658 int ret;
659
660 /* Init */
661 ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port);
662 if (ret) {
663 mlx5dr_err(dmn, "Failed modify QP rst2init\n");
664 return ret;
665 }
666
667 /* RTR */
668 ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr);
669 if (ret)
670 return ret;
671
672 rtr_attr.mtu = mtu;
673 rtr_attr.qp_num = dr_qp->qpn;
674 rtr_attr.min_rnr_timer = 12;
675 rtr_attr.port_num = port;
676 rtr_attr.sgid_index = gid_index;
677 rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp;
678
679 ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr);
680 if (ret) {
681 mlx5dr_err(dmn, "Failed modify QP init2rtr\n");
682 return ret;
683 }
684
685 /* RTS */
686 rts_attr.timeout = 14;
687 rts_attr.retry_cnt = 7;
688 rts_attr.rnr_retry = 7;
689
690 ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr);
691 if (ret) {
692 mlx5dr_err(dmn, "Failed modify QP rtr2rts\n");
693 return ret;
694 }
695
696 return 0;
697 }
698
dr_cq_complete(struct mlx5_core_cq * mcq,struct mlx5_eqe * eqe)699 static void dr_cq_complete(struct mlx5_core_cq *mcq,
700 struct mlx5_eqe *eqe)
701 {
702 pr_err("CQ completion CQ: #%u\n", mcq->cqn);
703 }
704
dr_create_cq(struct mlx5_core_dev * mdev,struct mlx5_uars_page * uar,size_t ncqe)705 static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
706 struct mlx5_uars_page *uar,
707 size_t ncqe)
708 {
709 u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {};
710 u32 out[MLX5_ST_SZ_DW(create_cq_out)];
711 struct mlx5_wq_param wqp;
712 struct mlx5_cqe64 *cqe;
713 struct mlx5dr_cq *cq;
714 int inlen, err, eqn;
715 void *cqc, *in;
716 __be64 *pas;
717 int vector;
718 u32 i;
719
720 cq = kzalloc(sizeof(*cq), GFP_KERNEL);
721 if (!cq)
722 return NULL;
723
724 ncqe = roundup_pow_of_two(ncqe);
725 MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe));
726
727 wqp.buf_numa_node = mdev->priv.numa_node;
728 wqp.db_numa_node = mdev->priv.numa_node;
729
730 err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq,
731 &cq->wq_ctrl);
732 if (err)
733 goto out;
734
735 for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
736 cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
737 cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
738 }
739
740 inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
741 sizeof(u64) * cq->wq_ctrl.buf.npages;
742 in = kvzalloc(inlen, GFP_KERNEL);
743 if (!in)
744 goto err_cqwq;
745
746 vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev);
747 err = mlx5_vector2eqn(mdev, vector, &eqn);
748 if (err) {
749 kvfree(in);
750 goto err_cqwq;
751 }
752
753 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
754 MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
755 MLX5_SET(cqc, cqc, c_eqn, eqn);
756 MLX5_SET(cqc, cqc, uar_page, uar->index);
757 MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
758 MLX5_ADAPTER_PAGE_SHIFT);
759 MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
760
761 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
762 mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas);
763
764 cq->mcq.comp = dr_cq_complete;
765
766 err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
767 kvfree(in);
768
769 if (err)
770 goto err_cqwq;
771
772 cq->mcq.cqe_sz = 64;
773 cq->mcq.set_ci_db = cq->wq_ctrl.db.db;
774 cq->mcq.arm_db = cq->wq_ctrl.db.db + 1;
775 *cq->mcq.set_ci_db = 0;
776
777 /* set no-zero value, in order to avoid the HW to run db-recovery on
778 * CQ that used in polling mode.
779 */
780 *cq->mcq.arm_db = cpu_to_be32(2 << 28);
781
782 cq->mcq.vector = 0;
783 cq->mcq.uar = uar;
784
785 return cq;
786
787 err_cqwq:
788 mlx5_wq_destroy(&cq->wq_ctrl);
789 out:
790 kfree(cq);
791 return NULL;
792 }
793
dr_destroy_cq(struct mlx5_core_dev * mdev,struct mlx5dr_cq * cq)794 static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq)
795 {
796 mlx5_core_destroy_cq(mdev, &cq->mcq);
797 mlx5_wq_destroy(&cq->wq_ctrl);
798 kfree(cq);
799 }
800
801 static int
dr_create_mkey(struct mlx5_core_dev * mdev,u32 pdn,struct mlx5_core_mkey * mkey)802 dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey)
803 {
804 u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
805 void *mkc;
806
807 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
808 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
809 MLX5_SET(mkc, mkc, a, 1);
810 MLX5_SET(mkc, mkc, rw, 1);
811 MLX5_SET(mkc, mkc, rr, 1);
812 MLX5_SET(mkc, mkc, lw, 1);
813 MLX5_SET(mkc, mkc, lr, 1);
814
815 MLX5_SET(mkc, mkc, pd, pdn);
816 MLX5_SET(mkc, mkc, length64, 1);
817 MLX5_SET(mkc, mkc, qpn, 0xffffff);
818
819 return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in));
820 }
821
dr_reg_mr(struct mlx5_core_dev * mdev,u32 pdn,void * buf,size_t size)822 static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
823 u32 pdn, void *buf, size_t size)
824 {
825 struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL);
826 struct device *dma_device;
827 dma_addr_t dma_addr;
828 int err;
829
830 if (!mr)
831 return NULL;
832
833 dma_device = mlx5_core_dma_dev(mdev);
834 dma_addr = dma_map_single(dma_device, buf, size,
835 DMA_BIDIRECTIONAL);
836 err = dma_mapping_error(dma_device, dma_addr);
837 if (err) {
838 mlx5_core_warn(mdev, "Can't dma buf\n");
839 kfree(mr);
840 return NULL;
841 }
842
843 err = dr_create_mkey(mdev, pdn, &mr->mkey);
844 if (err) {
845 mlx5_core_warn(mdev, "Can't create mkey\n");
846 dma_unmap_single(dma_device, dma_addr, size,
847 DMA_BIDIRECTIONAL);
848 kfree(mr);
849 return NULL;
850 }
851
852 mr->dma_addr = dma_addr;
853 mr->size = size;
854 mr->addr = buf;
855
856 return mr;
857 }
858
dr_dereg_mr(struct mlx5_core_dev * mdev,struct mlx5dr_mr * mr)859 static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr)
860 {
861 mlx5_core_destroy_mkey(mdev, &mr->mkey);
862 dma_unmap_single(mlx5_core_dma_dev(mdev), mr->dma_addr, mr->size,
863 DMA_BIDIRECTIONAL);
864 kfree(mr);
865 }
866
mlx5dr_send_ring_alloc(struct mlx5dr_domain * dmn)867 int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
868 {
869 struct dr_qp_init_attr init_attr = {};
870 int cq_size;
871 int size;
872 int ret;
873
874 dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL);
875 if (!dmn->send_ring)
876 return -ENOMEM;
877
878 cq_size = QUEUE_SIZE + 1;
879 dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size);
880 if (!dmn->send_ring->cq) {
881 mlx5dr_err(dmn, "Failed creating CQ\n");
882 ret = -ENOMEM;
883 goto free_send_ring;
884 }
885
886 init_attr.cqn = dmn->send_ring->cq->mcq.cqn;
887 init_attr.pdn = dmn->pdn;
888 init_attr.uar = dmn->uar;
889 init_attr.max_send_wr = QUEUE_SIZE;
890 spin_lock_init(&dmn->send_ring->lock);
891
892 dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr);
893 if (!dmn->send_ring->qp) {
894 mlx5dr_err(dmn, "Failed creating QP\n");
895 ret = -ENOMEM;
896 goto clean_cq;
897 }
898
899 dmn->send_ring->cq->qp = dmn->send_ring->qp;
900
901 dmn->info.max_send_wr = QUEUE_SIZE;
902 dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data,
903 DR_STE_SIZE);
904
905 dmn->send_ring->signal_th = dmn->info.max_send_wr /
906 SIGNAL_PER_DIV_QUEUE;
907
908 /* Prepare qp to be used */
909 ret = dr_prepare_qp_to_rts(dmn);
910 if (ret)
911 goto clean_qp;
912
913 dmn->send_ring->max_post_send_size =
914 mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K,
915 DR_ICM_TYPE_STE);
916
917 /* Allocating the max size as a buffer for writing */
918 size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size;
919 dmn->send_ring->buf = kzalloc(size, GFP_KERNEL);
920 if (!dmn->send_ring->buf) {
921 ret = -ENOMEM;
922 goto clean_qp;
923 }
924
925 dmn->send_ring->buf_size = size;
926
927 dmn->send_ring->mr = dr_reg_mr(dmn->mdev,
928 dmn->pdn, dmn->send_ring->buf, size);
929 if (!dmn->send_ring->mr) {
930 ret = -ENOMEM;
931 goto free_mem;
932 }
933
934 dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev,
935 dmn->pdn, dmn->send_ring->sync_buff,
936 MIN_READ_SYNC);
937 if (!dmn->send_ring->sync_mr) {
938 ret = -ENOMEM;
939 goto clean_mr;
940 }
941
942 return 0;
943
944 clean_mr:
945 dr_dereg_mr(dmn->mdev, dmn->send_ring->mr);
946 free_mem:
947 kfree(dmn->send_ring->buf);
948 clean_qp:
949 dr_destroy_qp(dmn->mdev, dmn->send_ring->qp);
950 clean_cq:
951 dr_destroy_cq(dmn->mdev, dmn->send_ring->cq);
952 free_send_ring:
953 kfree(dmn->send_ring);
954
955 return ret;
956 }
957
mlx5dr_send_ring_free(struct mlx5dr_domain * dmn,struct mlx5dr_send_ring * send_ring)958 void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
959 struct mlx5dr_send_ring *send_ring)
960 {
961 dr_destroy_qp(dmn->mdev, send_ring->qp);
962 dr_destroy_cq(dmn->mdev, send_ring->cq);
963 dr_dereg_mr(dmn->mdev, send_ring->sync_mr);
964 dr_dereg_mr(dmn->mdev, send_ring->mr);
965 kfree(send_ring->buf);
966 kfree(send_ring);
967 }
968
mlx5dr_send_ring_force_drain(struct mlx5dr_domain * dmn)969 int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
970 {
971 struct mlx5dr_send_ring *send_ring = dmn->send_ring;
972 struct postsend_info send_info = {};
973 u8 data[DR_STE_SIZE];
974 int num_of_sends_req;
975 int ret;
976 int i;
977
978 /* Sending this amount of requests makes sure we will get drain */
979 num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2;
980
981 /* Send fake requests forcing the last to be signaled */
982 send_info.write.addr = (uintptr_t)data;
983 send_info.write.length = DR_STE_SIZE;
984 send_info.write.lkey = 0;
985 /* Using the sync_mr in order to write/read */
986 send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
987 send_info.rkey = send_ring->sync_mr->mkey.key;
988
989 for (i = 0; i < num_of_sends_req; i++) {
990 ret = dr_postsend_icm_data(dmn, &send_info);
991 if (ret)
992 return ret;
993 }
994
995 spin_lock(&send_ring->lock);
996 ret = dr_handle_pending_wc(dmn, send_ring);
997 spin_unlock(&send_ring->lock);
998
999 return ret;
1000 }
1001