1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Huawei HiNIC PCI Express Linux driver
4 * Copyright(c) 2017 Huawei Technologies Co., Ltd
5 */
6
7 #include <linux/kernel.h>
8 #include <linux/types.h>
9 #include <linux/pci.h>
10 #include <linux/device.h>
11 #include <linux/dma-mapping.h>
12 #include <linux/vmalloc.h>
13 #include <linux/errno.h>
14 #include <linux/sizes.h>
15 #include <linux/atomic.h>
16 #include <linux/skbuff.h>
17 #include <linux/io.h>
18 #include <asm/barrier.h>
19 #include <asm/byteorder.h>
20
21 #include "hinic_common.h"
22 #include "hinic_hw_if.h"
23 #include "hinic_hw_wqe.h"
24 #include "hinic_hw_wq.h"
25 #include "hinic_hw_qp_ctxt.h"
26 #include "hinic_hw_qp.h"
27 #include "hinic_hw_io.h"
28
29 #define SQ_DB_OFF SZ_2K
30
31 /* The number of cache line to prefetch Until threshold state */
32 #define WQ_PREFETCH_MAX 2
33 /* The number of cache line to prefetch After threshold state */
34 #define WQ_PREFETCH_MIN 1
35 /* Threshold state */
36 #define WQ_PREFETCH_THRESHOLD 256
37
38 /* sizes of the SQ/RQ ctxt */
39 #define Q_CTXT_SIZE 48
40 #define CTXT_RSVD 240
41
42 #define SQ_CTXT_OFFSET(max_sqs, max_rqs, q_id) \
43 (((max_rqs) + (max_sqs)) * CTXT_RSVD + (q_id) * Q_CTXT_SIZE)
44
45 #define RQ_CTXT_OFFSET(max_sqs, max_rqs, q_id) \
46 (((max_rqs) + (max_sqs)) * CTXT_RSVD + \
47 (max_sqs + (q_id)) * Q_CTXT_SIZE)
48
49 #define SIZE_16BYTES(size) (ALIGN(size, 16) >> 4)
50 #define SIZE_8BYTES(size) (ALIGN(size, 8) >> 3)
51 #define SECT_SIZE_FROM_8BYTES(size) ((size) << 3)
52
53 #define SQ_DB_PI_HI_SHIFT 8
54 #define SQ_DB_PI_HI(prod_idx) ((prod_idx) >> SQ_DB_PI_HI_SHIFT)
55
56 #define SQ_DB_PI_LOW_MASK 0xFF
57 #define SQ_DB_PI_LOW(prod_idx) ((prod_idx) & SQ_DB_PI_LOW_MASK)
58
59 #define SQ_DB_ADDR(sq, pi) ((u64 *)((sq)->db_base) + SQ_DB_PI_LOW(pi))
60
61 #define SQ_MASKED_IDX(sq, idx) ((idx) & (sq)->wq->mask)
62 #define RQ_MASKED_IDX(rq, idx) ((idx) & (rq)->wq->mask)
63
64 enum sq_wqe_type {
65 SQ_NORMAL_WQE = 0,
66 };
67
68 enum rq_completion_fmt {
69 RQ_COMPLETE_SGE = 1
70 };
71
hinic_qp_prepare_header(struct hinic_qp_ctxt_header * qp_ctxt_hdr,enum hinic_qp_ctxt_type ctxt_type,u16 num_queues,u16 max_queues)72 void hinic_qp_prepare_header(struct hinic_qp_ctxt_header *qp_ctxt_hdr,
73 enum hinic_qp_ctxt_type ctxt_type,
74 u16 num_queues, u16 max_queues)
75 {
76 u16 max_sqs = max_queues;
77 u16 max_rqs = max_queues;
78
79 qp_ctxt_hdr->num_queues = num_queues;
80 qp_ctxt_hdr->queue_type = ctxt_type;
81
82 if (ctxt_type == HINIC_QP_CTXT_TYPE_SQ)
83 qp_ctxt_hdr->addr_offset = SQ_CTXT_OFFSET(max_sqs, max_rqs, 0);
84 else
85 qp_ctxt_hdr->addr_offset = RQ_CTXT_OFFSET(max_sqs, max_rqs, 0);
86
87 qp_ctxt_hdr->addr_offset = SIZE_16BYTES(qp_ctxt_hdr->addr_offset);
88
89 hinic_cpu_to_be32(qp_ctxt_hdr, sizeof(*qp_ctxt_hdr));
90 }
91
hinic_sq_prepare_ctxt(struct hinic_sq_ctxt * sq_ctxt,struct hinic_sq * sq,u16 global_qid)92 void hinic_sq_prepare_ctxt(struct hinic_sq_ctxt *sq_ctxt,
93 struct hinic_sq *sq, u16 global_qid)
94 {
95 u32 wq_page_pfn_hi, wq_page_pfn_lo, wq_block_pfn_hi, wq_block_pfn_lo;
96 u64 wq_page_addr, wq_page_pfn, wq_block_pfn;
97 u16 pi_start, ci_start;
98 struct hinic_wq *wq;
99
100 wq = sq->wq;
101 ci_start = atomic_read(&wq->cons_idx);
102 pi_start = atomic_read(&wq->prod_idx);
103
104 /* Read the first page paddr from the WQ page paddr ptrs */
105 wq_page_addr = be64_to_cpu(*wq->block_vaddr);
106
107 wq_page_pfn = HINIC_WQ_PAGE_PFN(wq_page_addr);
108 wq_page_pfn_hi = upper_32_bits(wq_page_pfn);
109 wq_page_pfn_lo = lower_32_bits(wq_page_pfn);
110
111 wq_block_pfn = HINIC_WQ_BLOCK_PFN(wq->block_paddr);
112 wq_block_pfn_hi = upper_32_bits(wq_block_pfn);
113 wq_block_pfn_lo = lower_32_bits(wq_block_pfn);
114
115 sq_ctxt->ceq_attr = HINIC_SQ_CTXT_CEQ_ATTR_SET(global_qid,
116 GLOBAL_SQ_ID) |
117 HINIC_SQ_CTXT_CEQ_ATTR_SET(0, EN);
118
119 sq_ctxt->ci_wrapped = HINIC_SQ_CTXT_CI_SET(ci_start, IDX) |
120 HINIC_SQ_CTXT_CI_SET(1, WRAPPED);
121
122 sq_ctxt->wq_hi_pfn_pi =
123 HINIC_SQ_CTXT_WQ_PAGE_SET(wq_page_pfn_hi, HI_PFN) |
124 HINIC_SQ_CTXT_WQ_PAGE_SET(pi_start, PI);
125
126 sq_ctxt->wq_lo_pfn = wq_page_pfn_lo;
127
128 sq_ctxt->pref_cache =
129 HINIC_SQ_CTXT_PREF_SET(WQ_PREFETCH_MIN, CACHE_MIN) |
130 HINIC_SQ_CTXT_PREF_SET(WQ_PREFETCH_MAX, CACHE_MAX) |
131 HINIC_SQ_CTXT_PREF_SET(WQ_PREFETCH_THRESHOLD, CACHE_THRESHOLD);
132
133 sq_ctxt->pref_wrapped = 1;
134
135 sq_ctxt->pref_wq_hi_pfn_ci =
136 HINIC_SQ_CTXT_PREF_SET(ci_start, CI) |
137 HINIC_SQ_CTXT_PREF_SET(wq_page_pfn_hi, WQ_HI_PFN);
138
139 sq_ctxt->pref_wq_lo_pfn = wq_page_pfn_lo;
140
141 sq_ctxt->wq_block_hi_pfn =
142 HINIC_SQ_CTXT_WQ_BLOCK_SET(wq_block_pfn_hi, HI_PFN);
143
144 sq_ctxt->wq_block_lo_pfn = wq_block_pfn_lo;
145
146 hinic_cpu_to_be32(sq_ctxt, sizeof(*sq_ctxt));
147 }
148
hinic_rq_prepare_ctxt(struct hinic_rq_ctxt * rq_ctxt,struct hinic_rq * rq,u16 global_qid)149 void hinic_rq_prepare_ctxt(struct hinic_rq_ctxt *rq_ctxt,
150 struct hinic_rq *rq, u16 global_qid)
151 {
152 u32 wq_page_pfn_hi, wq_page_pfn_lo, wq_block_pfn_hi, wq_block_pfn_lo;
153 u64 wq_page_addr, wq_page_pfn, wq_block_pfn;
154 u16 pi_start, ci_start;
155 struct hinic_wq *wq;
156
157 wq = rq->wq;
158 ci_start = atomic_read(&wq->cons_idx);
159 pi_start = atomic_read(&wq->prod_idx);
160
161 /* Read the first page paddr from the WQ page paddr ptrs */
162 wq_page_addr = be64_to_cpu(*wq->block_vaddr);
163
164 wq_page_pfn = HINIC_WQ_PAGE_PFN(wq_page_addr);
165 wq_page_pfn_hi = upper_32_bits(wq_page_pfn);
166 wq_page_pfn_lo = lower_32_bits(wq_page_pfn);
167
168 wq_block_pfn = HINIC_WQ_BLOCK_PFN(wq->block_paddr);
169 wq_block_pfn_hi = upper_32_bits(wq_block_pfn);
170 wq_block_pfn_lo = lower_32_bits(wq_block_pfn);
171
172 rq_ctxt->ceq_attr = HINIC_RQ_CTXT_CEQ_ATTR_SET(0, EN) |
173 HINIC_RQ_CTXT_CEQ_ATTR_SET(1, WRAPPED);
174
175 rq_ctxt->pi_intr_attr = HINIC_RQ_CTXT_PI_SET(pi_start, IDX) |
176 HINIC_RQ_CTXT_PI_SET(rq->msix_entry, INTR);
177
178 rq_ctxt->wq_hi_pfn_ci = HINIC_RQ_CTXT_WQ_PAGE_SET(wq_page_pfn_hi,
179 HI_PFN) |
180 HINIC_RQ_CTXT_WQ_PAGE_SET(ci_start, CI);
181
182 rq_ctxt->wq_lo_pfn = wq_page_pfn_lo;
183
184 rq_ctxt->pref_cache =
185 HINIC_RQ_CTXT_PREF_SET(WQ_PREFETCH_MIN, CACHE_MIN) |
186 HINIC_RQ_CTXT_PREF_SET(WQ_PREFETCH_MAX, CACHE_MAX) |
187 HINIC_RQ_CTXT_PREF_SET(WQ_PREFETCH_THRESHOLD, CACHE_THRESHOLD);
188
189 rq_ctxt->pref_wrapped = 1;
190
191 rq_ctxt->pref_wq_hi_pfn_ci =
192 HINIC_RQ_CTXT_PREF_SET(wq_page_pfn_hi, WQ_HI_PFN) |
193 HINIC_RQ_CTXT_PREF_SET(ci_start, CI);
194
195 rq_ctxt->pref_wq_lo_pfn = wq_page_pfn_lo;
196
197 rq_ctxt->pi_paddr_hi = upper_32_bits(rq->pi_dma_addr);
198 rq_ctxt->pi_paddr_lo = lower_32_bits(rq->pi_dma_addr);
199
200 rq_ctxt->wq_block_hi_pfn =
201 HINIC_RQ_CTXT_WQ_BLOCK_SET(wq_block_pfn_hi, HI_PFN);
202
203 rq_ctxt->wq_block_lo_pfn = wq_block_pfn_lo;
204
205 hinic_cpu_to_be32(rq_ctxt, sizeof(*rq_ctxt));
206 }
207
208 /**
209 * alloc_sq_skb_arr - allocate sq array for saved skb
210 * @sq: HW Send Queue
211 *
212 * Return 0 - Success, negative - Failure
213 **/
alloc_sq_skb_arr(struct hinic_sq * sq)214 static int alloc_sq_skb_arr(struct hinic_sq *sq)
215 {
216 struct hinic_wq *wq = sq->wq;
217 size_t skb_arr_size;
218
219 skb_arr_size = wq->q_depth * sizeof(*sq->saved_skb);
220 sq->saved_skb = vzalloc(skb_arr_size);
221 if (!sq->saved_skb)
222 return -ENOMEM;
223
224 return 0;
225 }
226
227 /**
228 * free_sq_skb_arr - free sq array for saved skb
229 * @sq: HW Send Queue
230 **/
free_sq_skb_arr(struct hinic_sq * sq)231 static void free_sq_skb_arr(struct hinic_sq *sq)
232 {
233 vfree(sq->saved_skb);
234 }
235
236 /**
237 * alloc_rq_skb_arr - allocate rq array for saved skb
238 * @rq: HW Receive Queue
239 *
240 * Return 0 - Success, negative - Failure
241 **/
alloc_rq_skb_arr(struct hinic_rq * rq)242 static int alloc_rq_skb_arr(struct hinic_rq *rq)
243 {
244 struct hinic_wq *wq = rq->wq;
245 size_t skb_arr_size;
246
247 skb_arr_size = wq->q_depth * sizeof(*rq->saved_skb);
248 rq->saved_skb = vzalloc(skb_arr_size);
249 if (!rq->saved_skb)
250 return -ENOMEM;
251
252 return 0;
253 }
254
255 /**
256 * free_rq_skb_arr - free rq array for saved skb
257 * @rq: HW Receive Queue
258 **/
free_rq_skb_arr(struct hinic_rq * rq)259 static void free_rq_skb_arr(struct hinic_rq *rq)
260 {
261 vfree(rq->saved_skb);
262 }
263
264 /**
265 * hinic_init_sq - Initialize HW Send Queue
266 * @sq: HW Send Queue
267 * @hwif: HW Interface for accessing HW
268 * @wq: Work Queue for the data of the SQ
269 * @entry: msix entry for sq
270 * @ci_addr: address for reading the current HW consumer index
271 * @ci_dma_addr: dma address for reading the current HW consumer index
272 * @db_base: doorbell base address
273 *
274 * Return 0 - Success, negative - Failure
275 **/
hinic_init_sq(struct hinic_sq * sq,struct hinic_hwif * hwif,struct hinic_wq * wq,struct msix_entry * entry,void * ci_addr,dma_addr_t ci_dma_addr,void __iomem * db_base)276 int hinic_init_sq(struct hinic_sq *sq, struct hinic_hwif *hwif,
277 struct hinic_wq *wq, struct msix_entry *entry,
278 void *ci_addr, dma_addr_t ci_dma_addr,
279 void __iomem *db_base)
280 {
281 sq->hwif = hwif;
282
283 sq->wq = wq;
284
285 sq->irq = entry->vector;
286 sq->msix_entry = entry->entry;
287
288 sq->hw_ci_addr = ci_addr;
289 sq->hw_ci_dma_addr = ci_dma_addr;
290
291 sq->db_base = db_base + SQ_DB_OFF;
292
293 return alloc_sq_skb_arr(sq);
294 }
295
296 /**
297 * hinic_clean_sq - Clean HW Send Queue's Resources
298 * @sq: Send Queue
299 **/
hinic_clean_sq(struct hinic_sq * sq)300 void hinic_clean_sq(struct hinic_sq *sq)
301 {
302 free_sq_skb_arr(sq);
303 }
304
305 /**
306 * alloc_rq_cqe - allocate rq completion queue elements
307 * @rq: HW Receive Queue
308 *
309 * Return 0 - Success, negative - Failure
310 **/
alloc_rq_cqe(struct hinic_rq * rq)311 static int alloc_rq_cqe(struct hinic_rq *rq)
312 {
313 struct hinic_hwif *hwif = rq->hwif;
314 struct pci_dev *pdev = hwif->pdev;
315 size_t cqe_dma_size, cqe_size;
316 struct hinic_wq *wq = rq->wq;
317 int j, i;
318
319 cqe_size = wq->q_depth * sizeof(*rq->cqe);
320 rq->cqe = vzalloc(cqe_size);
321 if (!rq->cqe)
322 return -ENOMEM;
323
324 cqe_dma_size = wq->q_depth * sizeof(*rq->cqe_dma);
325 rq->cqe_dma = vzalloc(cqe_dma_size);
326 if (!rq->cqe_dma)
327 goto err_cqe_dma_arr_alloc;
328
329 for (i = 0; i < wq->q_depth; i++) {
330 rq->cqe[i] = dma_alloc_coherent(&pdev->dev,
331 sizeof(*rq->cqe[i]),
332 &rq->cqe_dma[i], GFP_KERNEL);
333 if (!rq->cqe[i])
334 goto err_cqe_alloc;
335 }
336
337 return 0;
338
339 err_cqe_alloc:
340 for (j = 0; j < i; j++)
341 dma_free_coherent(&pdev->dev, sizeof(*rq->cqe[j]), rq->cqe[j],
342 rq->cqe_dma[j]);
343
344 vfree(rq->cqe_dma);
345
346 err_cqe_dma_arr_alloc:
347 vfree(rq->cqe);
348 return -ENOMEM;
349 }
350
351 /**
352 * free_rq_cqe - free rq completion queue elements
353 * @rq: HW Receive Queue
354 **/
free_rq_cqe(struct hinic_rq * rq)355 static void free_rq_cqe(struct hinic_rq *rq)
356 {
357 struct hinic_hwif *hwif = rq->hwif;
358 struct pci_dev *pdev = hwif->pdev;
359 struct hinic_wq *wq = rq->wq;
360 int i;
361
362 for (i = 0; i < wq->q_depth; i++)
363 dma_free_coherent(&pdev->dev, sizeof(*rq->cqe[i]), rq->cqe[i],
364 rq->cqe_dma[i]);
365
366 vfree(rq->cqe_dma);
367 vfree(rq->cqe);
368 }
369
370 /**
371 * hinic_init_rq - Initialize HW Receive Queue
372 * @rq: HW Receive Queue
373 * @hwif: HW Interface for accessing HW
374 * @wq: Work Queue for the data of the RQ
375 * @entry: msix entry for rq
376 *
377 * Return 0 - Success, negative - Failure
378 **/
hinic_init_rq(struct hinic_rq * rq,struct hinic_hwif * hwif,struct hinic_wq * wq,struct msix_entry * entry)379 int hinic_init_rq(struct hinic_rq *rq, struct hinic_hwif *hwif,
380 struct hinic_wq *wq, struct msix_entry *entry)
381 {
382 struct pci_dev *pdev = hwif->pdev;
383 size_t pi_size;
384 int err;
385
386 rq->hwif = hwif;
387
388 rq->wq = wq;
389
390 rq->irq = entry->vector;
391 rq->msix_entry = entry->entry;
392
393 rq->buf_sz = HINIC_RX_BUF_SZ;
394
395 err = alloc_rq_skb_arr(rq);
396 if (err) {
397 dev_err(&pdev->dev, "Failed to allocate rq priv data\n");
398 return err;
399 }
400
401 err = alloc_rq_cqe(rq);
402 if (err) {
403 dev_err(&pdev->dev, "Failed to allocate rq cqe\n");
404 goto err_alloc_rq_cqe;
405 }
406
407 /* HW requirements: Must be at least 32 bit */
408 pi_size = ALIGN(sizeof(*rq->pi_virt_addr), sizeof(u32));
409 rq->pi_virt_addr = dma_alloc_coherent(&pdev->dev, pi_size,
410 &rq->pi_dma_addr, GFP_KERNEL);
411 if (!rq->pi_virt_addr) {
412 dev_err(&pdev->dev, "Failed to allocate PI address\n");
413 err = -ENOMEM;
414 goto err_pi_virt;
415 }
416
417 return 0;
418
419 err_pi_virt:
420 free_rq_cqe(rq);
421
422 err_alloc_rq_cqe:
423 free_rq_skb_arr(rq);
424 return err;
425 }
426
427 /**
428 * hinic_clean_rq - Clean HW Receive Queue's Resources
429 * @rq: HW Receive Queue
430 **/
hinic_clean_rq(struct hinic_rq * rq)431 void hinic_clean_rq(struct hinic_rq *rq)
432 {
433 struct hinic_hwif *hwif = rq->hwif;
434 struct pci_dev *pdev = hwif->pdev;
435 size_t pi_size;
436
437 pi_size = ALIGN(sizeof(*rq->pi_virt_addr), sizeof(u32));
438 dma_free_coherent(&pdev->dev, pi_size, rq->pi_virt_addr,
439 rq->pi_dma_addr);
440
441 free_rq_cqe(rq);
442 free_rq_skb_arr(rq);
443 }
444
445 /**
446 * hinic_get_sq_free_wqebbs - return number of free wqebbs for use
447 * @sq: send queue
448 *
449 * Return number of free wqebbs
450 **/
hinic_get_sq_free_wqebbs(struct hinic_sq * sq)451 int hinic_get_sq_free_wqebbs(struct hinic_sq *sq)
452 {
453 struct hinic_wq *wq = sq->wq;
454
455 return atomic_read(&wq->delta) - 1;
456 }
457
458 /**
459 * hinic_get_rq_free_wqebbs - return number of free wqebbs for use
460 * @rq: recv queue
461 *
462 * Return number of free wqebbs
463 **/
hinic_get_rq_free_wqebbs(struct hinic_rq * rq)464 int hinic_get_rq_free_wqebbs(struct hinic_rq *rq)
465 {
466 struct hinic_wq *wq = rq->wq;
467
468 return atomic_read(&wq->delta) - 1;
469 }
470
sq_prepare_ctrl(struct hinic_sq_ctrl * ctrl,u16 prod_idx,int nr_descs)471 static void sq_prepare_ctrl(struct hinic_sq_ctrl *ctrl, u16 prod_idx,
472 int nr_descs)
473 {
474 u32 ctrl_size, task_size, bufdesc_size;
475
476 ctrl_size = SIZE_8BYTES(sizeof(struct hinic_sq_ctrl));
477 task_size = SIZE_8BYTES(sizeof(struct hinic_sq_task));
478 bufdesc_size = nr_descs * sizeof(struct hinic_sq_bufdesc);
479 bufdesc_size = SIZE_8BYTES(bufdesc_size);
480
481 ctrl->ctrl_info = HINIC_SQ_CTRL_SET(bufdesc_size, BUFDESC_SECT_LEN) |
482 HINIC_SQ_CTRL_SET(task_size, TASKSECT_LEN) |
483 HINIC_SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) |
484 HINIC_SQ_CTRL_SET(ctrl_size, LEN);
485
486 ctrl->queue_info = HINIC_SQ_CTRL_SET(HINIC_MSS_DEFAULT,
487 QUEUE_INFO_MSS) |
488 HINIC_SQ_CTRL_SET(1, QUEUE_INFO_UC);
489 }
490
sq_prepare_task(struct hinic_sq_task * task)491 static void sq_prepare_task(struct hinic_sq_task *task)
492 {
493 task->pkt_info0 = 0;
494 task->pkt_info1 = 0;
495 task->pkt_info2 = 0;
496
497 task->ufo_v6_identify = 0;
498
499 task->pkt_info4 = HINIC_SQ_TASK_INFO4_SET(HINIC_L2TYPE_ETH, L2TYPE);
500
501 task->zero_pad = 0;
502 }
503
hinic_task_set_l2hdr(struct hinic_sq_task * task,u32 len)504 void hinic_task_set_l2hdr(struct hinic_sq_task *task, u32 len)
505 {
506 task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(len, L2HDR_LEN);
507 }
508
hinic_task_set_outter_l3(struct hinic_sq_task * task,enum hinic_l3_offload_type l3_type,u32 network_len)509 void hinic_task_set_outter_l3(struct hinic_sq_task *task,
510 enum hinic_l3_offload_type l3_type,
511 u32 network_len)
512 {
513 task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l3_type, OUTER_L3TYPE) |
514 HINIC_SQ_TASK_INFO2_SET(network_len, OUTER_L3LEN);
515 }
516
hinic_task_set_inner_l3(struct hinic_sq_task * task,enum hinic_l3_offload_type l3_type,u32 network_len)517 void hinic_task_set_inner_l3(struct hinic_sq_task *task,
518 enum hinic_l3_offload_type l3_type,
519 u32 network_len)
520 {
521 task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l3_type, INNER_L3TYPE);
522 task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(network_len, INNER_L3LEN);
523 }
524
hinic_task_set_tunnel_l4(struct hinic_sq_task * task,enum hinic_l4_tunnel_type l4_type,u32 tunnel_len)525 void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
526 enum hinic_l4_tunnel_type l4_type,
527 u32 tunnel_len)
528 {
529 task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l4_type, TUNNEL_L4TYPE) |
530 HINIC_SQ_TASK_INFO2_SET(tunnel_len, TUNNEL_L4LEN);
531 }
532
hinic_set_cs_inner_l4(struct hinic_sq_task * task,u32 * queue_info,enum hinic_l4_offload_type l4_offload,u32 l4_len,u32 offset)533 void hinic_set_cs_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
534 enum hinic_l4_offload_type l4_offload,
535 u32 l4_len, u32 offset)
536 {
537 u32 tcp_udp_cs = 0, sctp = 0;
538 u32 mss = HINIC_MSS_DEFAULT;
539
540 if (l4_offload == TCP_OFFLOAD_ENABLE ||
541 l4_offload == UDP_OFFLOAD_ENABLE)
542 tcp_udp_cs = 1;
543 else if (l4_offload == SCTP_OFFLOAD_ENABLE)
544 sctp = 1;
545
546 task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
547 task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
548
549 *queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
550 HINIC_SQ_CTRL_SET(tcp_udp_cs, QUEUE_INFO_TCPUDP_CS) |
551 HINIC_SQ_CTRL_SET(sctp, QUEUE_INFO_SCTP);
552
553 *queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
554 *queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
555 }
556
hinic_set_tso_inner_l4(struct hinic_sq_task * task,u32 * queue_info,enum hinic_l4_offload_type l4_offload,u32 l4_len,u32 offset,u32 ip_ident,u32 mss)557 void hinic_set_tso_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
558 enum hinic_l4_offload_type l4_offload,
559 u32 l4_len, u32 offset, u32 ip_ident, u32 mss)
560 {
561 u32 tso = 0, ufo = 0;
562
563 if (l4_offload == TCP_OFFLOAD_ENABLE)
564 tso = 1;
565 else if (l4_offload == UDP_OFFLOAD_ENABLE)
566 ufo = 1;
567
568 task->ufo_v6_identify = ip_ident;
569
570 task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
571 task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(tso || ufo, TSO_FLAG);
572 task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
573
574 *queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
575 HINIC_SQ_CTRL_SET(tso, QUEUE_INFO_TSO) |
576 HINIC_SQ_CTRL_SET(ufo, QUEUE_INFO_UFO) |
577 HINIC_SQ_CTRL_SET(!!l4_offload, QUEUE_INFO_TCPUDP_CS);
578
579 /* set MSS value */
580 *queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
581 *queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
582 }
583
584 /**
585 * hinic_sq_prepare_wqe - prepare wqe before insert to the queue
586 * @sq: send queue
587 * @prod_idx: pi value
588 * @sq_wqe: wqe to prepare
589 * @sges: sges for use by the wqe for send for buf addresses
590 * @nr_sges: number of sges
591 **/
hinic_sq_prepare_wqe(struct hinic_sq * sq,u16 prod_idx,struct hinic_sq_wqe * sq_wqe,struct hinic_sge * sges,int nr_sges)592 void hinic_sq_prepare_wqe(struct hinic_sq *sq, u16 prod_idx,
593 struct hinic_sq_wqe *sq_wqe, struct hinic_sge *sges,
594 int nr_sges)
595 {
596 int i;
597
598 sq_prepare_ctrl(&sq_wqe->ctrl, prod_idx, nr_sges);
599
600 sq_prepare_task(&sq_wqe->task);
601
602 for (i = 0; i < nr_sges; i++)
603 sq_wqe->buf_descs[i].sge = sges[i];
604 }
605
606 /**
607 * sq_prepare_db - prepare doorbell to write
608 * @sq: send queue
609 * @prod_idx: pi value for the doorbell
610 * @cos: cos of the doorbell
611 *
612 * Return db value
613 **/
sq_prepare_db(struct hinic_sq * sq,u16 prod_idx,unsigned int cos)614 static u32 sq_prepare_db(struct hinic_sq *sq, u16 prod_idx, unsigned int cos)
615 {
616 struct hinic_qp *qp = container_of(sq, struct hinic_qp, sq);
617 u8 hi_prod_idx = SQ_DB_PI_HI(SQ_MASKED_IDX(sq, prod_idx));
618
619 /* Data should be written to HW in Big Endian Format */
620 return cpu_to_be32(HINIC_SQ_DB_INFO_SET(hi_prod_idx, PI_HI) |
621 HINIC_SQ_DB_INFO_SET(HINIC_DB_SQ_TYPE, TYPE) |
622 HINIC_SQ_DB_INFO_SET(HINIC_DATA_PATH, PATH) |
623 HINIC_SQ_DB_INFO_SET(cos, COS) |
624 HINIC_SQ_DB_INFO_SET(qp->q_id, QID));
625 }
626
627 /**
628 * hinic_sq_write_db- write doorbell
629 * @sq: send queue
630 * @prod_idx: pi value for the doorbell
631 * @wqe_size: wqe size
632 * @cos: cos of the wqe
633 **/
hinic_sq_write_db(struct hinic_sq * sq,u16 prod_idx,unsigned int wqe_size,unsigned int cos)634 void hinic_sq_write_db(struct hinic_sq *sq, u16 prod_idx, unsigned int wqe_size,
635 unsigned int cos)
636 {
637 struct hinic_wq *wq = sq->wq;
638
639 /* increment prod_idx to the next */
640 prod_idx += ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
641
642 wmb(); /* Write all before the doorbell */
643
644 writel(sq_prepare_db(sq, prod_idx, cos), SQ_DB_ADDR(sq, prod_idx));
645 }
646
647 /**
648 * hinic_sq_get_wqe - get wqe ptr in the current pi and update the pi
649 * @sq: sq to get wqe from
650 * @wqe_size: wqe size
651 * @prod_idx: returned pi
652 *
653 * Return wqe pointer
654 **/
hinic_sq_get_wqe(struct hinic_sq * sq,unsigned int wqe_size,u16 * prod_idx)655 struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq,
656 unsigned int wqe_size, u16 *prod_idx)
657 {
658 struct hinic_hw_wqe *hw_wqe = hinic_get_wqe(sq->wq, wqe_size,
659 prod_idx);
660
661 if (IS_ERR(hw_wqe))
662 return NULL;
663
664 return &hw_wqe->sq_wqe;
665 }
666
667 /**
668 * hinic_sq_return_wqe - return the wqe to the sq
669 * @sq: send queue
670 * @wqe_size: the size of the wqe
671 **/
hinic_sq_return_wqe(struct hinic_sq * sq,unsigned int wqe_size)672 void hinic_sq_return_wqe(struct hinic_sq *sq, unsigned int wqe_size)
673 {
674 hinic_return_wqe(sq->wq, wqe_size);
675 }
676
677 /**
678 * hinic_sq_write_wqe - write the wqe to the sq
679 * @sq: send queue
680 * @prod_idx: pi of the wqe
681 * @sq_wqe: the wqe to write
682 * @skb: skb to save
683 * @wqe_size: the size of the wqe
684 **/
hinic_sq_write_wqe(struct hinic_sq * sq,u16 prod_idx,struct hinic_sq_wqe * sq_wqe,struct sk_buff * skb,unsigned int wqe_size)685 void hinic_sq_write_wqe(struct hinic_sq *sq, u16 prod_idx,
686 struct hinic_sq_wqe *sq_wqe,
687 struct sk_buff *skb, unsigned int wqe_size)
688 {
689 struct hinic_hw_wqe *hw_wqe = (struct hinic_hw_wqe *)sq_wqe;
690
691 sq->saved_skb[prod_idx] = skb;
692
693 /* The data in the HW should be in Big Endian Format */
694 hinic_cpu_to_be32(sq_wqe, wqe_size);
695
696 hinic_write_wqe(sq->wq, hw_wqe, wqe_size);
697 }
698
699 /**
700 * hinic_sq_read_wqebb - read wqe ptr in the current ci and update the ci, the
701 * wqe only have one wqebb
702 * @sq: send queue
703 * @skb: return skb that was saved
704 * @wqe_size: the wqe size ptr
705 * @cons_idx: consumer index of the wqe
706 *
707 * Return wqe in ci position
708 **/
hinic_sq_read_wqebb(struct hinic_sq * sq,struct sk_buff ** skb,unsigned int * wqe_size,u16 * cons_idx)709 struct hinic_sq_wqe *hinic_sq_read_wqebb(struct hinic_sq *sq,
710 struct sk_buff **skb,
711 unsigned int *wqe_size, u16 *cons_idx)
712 {
713 struct hinic_hw_wqe *hw_wqe;
714 struct hinic_sq_wqe *sq_wqe;
715 struct hinic_sq_ctrl *ctrl;
716 unsigned int buf_sect_len;
717 u32 ctrl_info;
718
719 /* read the ctrl section for getting wqe size */
720 hw_wqe = hinic_read_wqe(sq->wq, sizeof(*ctrl), cons_idx);
721 if (IS_ERR(hw_wqe))
722 return NULL;
723
724 *skb = sq->saved_skb[*cons_idx];
725
726 sq_wqe = &hw_wqe->sq_wqe;
727 ctrl = &sq_wqe->ctrl;
728 ctrl_info = be32_to_cpu(ctrl->ctrl_info);
729 buf_sect_len = HINIC_SQ_CTRL_GET(ctrl_info, BUFDESC_SECT_LEN);
730
731 *wqe_size = sizeof(*ctrl) + sizeof(sq_wqe->task);
732 *wqe_size += SECT_SIZE_FROM_8BYTES(buf_sect_len);
733 *wqe_size = ALIGN(*wqe_size, sq->wq->wqebb_size);
734
735 return &hw_wqe->sq_wqe;
736 }
737
738 /**
739 * hinic_sq_read_wqe - read wqe ptr in the current ci and update the ci
740 * @sq: send queue
741 * @skb: return skb that was saved
742 * @wqe_size: the size of the wqe
743 * @cons_idx: consumer index of the wqe
744 *
745 * Return wqe in ci position
746 **/
hinic_sq_read_wqe(struct hinic_sq * sq,struct sk_buff ** skb,unsigned int wqe_size,u16 * cons_idx)747 struct hinic_sq_wqe *hinic_sq_read_wqe(struct hinic_sq *sq,
748 struct sk_buff **skb,
749 unsigned int wqe_size, u16 *cons_idx)
750 {
751 struct hinic_hw_wqe *hw_wqe;
752
753 hw_wqe = hinic_read_wqe(sq->wq, wqe_size, cons_idx);
754 *skb = sq->saved_skb[*cons_idx];
755
756 return &hw_wqe->sq_wqe;
757 }
758
759 /**
760 * hinic_sq_put_wqe - release the ci for new wqes
761 * @sq: send queue
762 * @wqe_size: the size of the wqe
763 **/
hinic_sq_put_wqe(struct hinic_sq * sq,unsigned int wqe_size)764 void hinic_sq_put_wqe(struct hinic_sq *sq, unsigned int wqe_size)
765 {
766 hinic_put_wqe(sq->wq, wqe_size);
767 }
768
769 /**
770 * hinic_sq_get_sges - get sges from the wqe
771 * @sq_wqe: wqe to get the sges from its buffer addresses
772 * @sges: returned sges
773 * @nr_sges: number sges to return
774 **/
hinic_sq_get_sges(struct hinic_sq_wqe * sq_wqe,struct hinic_sge * sges,int nr_sges)775 void hinic_sq_get_sges(struct hinic_sq_wqe *sq_wqe, struct hinic_sge *sges,
776 int nr_sges)
777 {
778 int i;
779
780 for (i = 0; i < nr_sges && i < HINIC_MAX_SQ_BUFDESCS; i++) {
781 sges[i] = sq_wqe->buf_descs[i].sge;
782 hinic_be32_to_cpu(&sges[i], sizeof(sges[i]));
783 }
784 }
785
786 /**
787 * hinic_rq_get_wqe - get wqe ptr in the current pi and update the pi
788 * @rq: rq to get wqe from
789 * @wqe_size: wqe size
790 * @prod_idx: returned pi
791 *
792 * Return wqe pointer
793 **/
hinic_rq_get_wqe(struct hinic_rq * rq,unsigned int wqe_size,u16 * prod_idx)794 struct hinic_rq_wqe *hinic_rq_get_wqe(struct hinic_rq *rq,
795 unsigned int wqe_size, u16 *prod_idx)
796 {
797 struct hinic_hw_wqe *hw_wqe = hinic_get_wqe(rq->wq, wqe_size,
798 prod_idx);
799
800 if (IS_ERR(hw_wqe))
801 return NULL;
802
803 return &hw_wqe->rq_wqe;
804 }
805
806 /**
807 * hinic_rq_write_wqe - write the wqe to the rq
808 * @rq: recv queue
809 * @prod_idx: pi of the wqe
810 * @rq_wqe: the wqe to write
811 * @skb: skb to save
812 **/
hinic_rq_write_wqe(struct hinic_rq * rq,u16 prod_idx,struct hinic_rq_wqe * rq_wqe,struct sk_buff * skb)813 void hinic_rq_write_wqe(struct hinic_rq *rq, u16 prod_idx,
814 struct hinic_rq_wqe *rq_wqe, struct sk_buff *skb)
815 {
816 struct hinic_hw_wqe *hw_wqe = (struct hinic_hw_wqe *)rq_wqe;
817
818 rq->saved_skb[prod_idx] = skb;
819
820 /* The data in the HW should be in Big Endian Format */
821 hinic_cpu_to_be32(rq_wqe, sizeof(*rq_wqe));
822
823 hinic_write_wqe(rq->wq, hw_wqe, sizeof(*rq_wqe));
824 }
825
826 /**
827 * hinic_rq_read_wqe - read wqe ptr in the current ci and update the ci
828 * @rq: recv queue
829 * @wqe_size: the size of the wqe
830 * @skb: return saved skb
831 * @cons_idx: consumer index of the wqe
832 *
833 * Return wqe in ci position
834 **/
hinic_rq_read_wqe(struct hinic_rq * rq,unsigned int wqe_size,struct sk_buff ** skb,u16 * cons_idx)835 struct hinic_rq_wqe *hinic_rq_read_wqe(struct hinic_rq *rq,
836 unsigned int wqe_size,
837 struct sk_buff **skb, u16 *cons_idx)
838 {
839 struct hinic_hw_wqe *hw_wqe;
840 struct hinic_rq_cqe *cqe;
841 int rx_done;
842 u32 status;
843
844 hw_wqe = hinic_read_wqe(rq->wq, wqe_size, cons_idx);
845 if (IS_ERR(hw_wqe))
846 return NULL;
847
848 cqe = rq->cqe[*cons_idx];
849
850 status = be32_to_cpu(cqe->status);
851
852 rx_done = HINIC_RQ_CQE_STATUS_GET(status, RXDONE);
853 if (!rx_done)
854 return NULL;
855
856 *skb = rq->saved_skb[*cons_idx];
857
858 return &hw_wqe->rq_wqe;
859 }
860
861 /**
862 * hinic_rq_read_next_wqe - increment ci and read the wqe in ci position
863 * @rq: recv queue
864 * @wqe_size: the size of the wqe
865 * @skb: return saved skb
866 * @cons_idx: consumer index in the wq
867 *
868 * Return wqe in incremented ci position
869 **/
hinic_rq_read_next_wqe(struct hinic_rq * rq,unsigned int wqe_size,struct sk_buff ** skb,u16 * cons_idx)870 struct hinic_rq_wqe *hinic_rq_read_next_wqe(struct hinic_rq *rq,
871 unsigned int wqe_size,
872 struct sk_buff **skb,
873 u16 *cons_idx)
874 {
875 struct hinic_wq *wq = rq->wq;
876 struct hinic_hw_wqe *hw_wqe;
877 unsigned int num_wqebbs;
878
879 wqe_size = ALIGN(wqe_size, wq->wqebb_size);
880 num_wqebbs = wqe_size / wq->wqebb_size;
881
882 *cons_idx = RQ_MASKED_IDX(rq, *cons_idx + num_wqebbs);
883
884 *skb = rq->saved_skb[*cons_idx];
885
886 hw_wqe = hinic_read_wqe_direct(wq, *cons_idx);
887
888 return &hw_wqe->rq_wqe;
889 }
890
891 /**
892 * hinic_put_wqe - release the ci for new wqes
893 * @rq: recv queue
894 * @cons_idx: consumer index of the wqe
895 * @wqe_size: the size of the wqe
896 **/
hinic_rq_put_wqe(struct hinic_rq * rq,u16 cons_idx,unsigned int wqe_size)897 void hinic_rq_put_wqe(struct hinic_rq *rq, u16 cons_idx,
898 unsigned int wqe_size)
899 {
900 struct hinic_rq_cqe *cqe = rq->cqe[cons_idx];
901 u32 status = be32_to_cpu(cqe->status);
902
903 status = HINIC_RQ_CQE_STATUS_CLEAR(status, RXDONE);
904
905 /* Rx WQE size is 1 WQEBB, no wq shadow*/
906 cqe->status = cpu_to_be32(status);
907
908 wmb(); /* clear done flag */
909
910 hinic_put_wqe(rq->wq, wqe_size);
911 }
912
913 /**
914 * hinic_rq_get_sge - get sge from the wqe
915 * @rq: recv queue
916 * @rq_wqe: wqe to get the sge from its buf address
917 * @cons_idx: consumer index
918 * @sge: returned sge
919 **/
hinic_rq_get_sge(struct hinic_rq * rq,struct hinic_rq_wqe * rq_wqe,u16 cons_idx,struct hinic_sge * sge)920 void hinic_rq_get_sge(struct hinic_rq *rq, struct hinic_rq_wqe *rq_wqe,
921 u16 cons_idx, struct hinic_sge *sge)
922 {
923 struct hinic_rq_cqe *cqe = rq->cqe[cons_idx];
924 u32 len = be32_to_cpu(cqe->len);
925
926 sge->hi_addr = be32_to_cpu(rq_wqe->buf_desc.hi_addr);
927 sge->lo_addr = be32_to_cpu(rq_wqe->buf_desc.lo_addr);
928 sge->len = HINIC_RQ_CQE_SGE_GET(len, LEN);
929 }
930
931 /**
932 * hinic_rq_prepare_wqe - prepare wqe before insert to the queue
933 * @rq: recv queue
934 * @prod_idx: pi value
935 * @rq_wqe: the wqe
936 * @sge: sge for use by the wqe for recv buf address
937 **/
hinic_rq_prepare_wqe(struct hinic_rq * rq,u16 prod_idx,struct hinic_rq_wqe * rq_wqe,struct hinic_sge * sge)938 void hinic_rq_prepare_wqe(struct hinic_rq *rq, u16 prod_idx,
939 struct hinic_rq_wqe *rq_wqe, struct hinic_sge *sge)
940 {
941 struct hinic_rq_cqe_sect *cqe_sect = &rq_wqe->cqe_sect;
942 struct hinic_rq_bufdesc *buf_desc = &rq_wqe->buf_desc;
943 struct hinic_rq_cqe *cqe = rq->cqe[prod_idx];
944 struct hinic_rq_ctrl *ctrl = &rq_wqe->ctrl;
945 dma_addr_t cqe_dma = rq->cqe_dma[prod_idx];
946
947 ctrl->ctrl_info =
948 HINIC_RQ_CTRL_SET(SIZE_8BYTES(sizeof(*ctrl)), LEN) |
949 HINIC_RQ_CTRL_SET(SIZE_8BYTES(sizeof(*cqe_sect)),
950 COMPLETE_LEN) |
951 HINIC_RQ_CTRL_SET(SIZE_8BYTES(sizeof(*buf_desc)),
952 BUFDESC_SECT_LEN) |
953 HINIC_RQ_CTRL_SET(RQ_COMPLETE_SGE, COMPLETE_FORMAT);
954
955 hinic_set_sge(&cqe_sect->sge, cqe_dma, sizeof(*cqe));
956
957 buf_desc->hi_addr = sge->hi_addr;
958 buf_desc->lo_addr = sge->lo_addr;
959 }
960
961 /**
962 * hinic_rq_update - update pi of the rq
963 * @rq: recv queue
964 * @prod_idx: pi value
965 **/
hinic_rq_update(struct hinic_rq * rq,u16 prod_idx)966 void hinic_rq_update(struct hinic_rq *rq, u16 prod_idx)
967 {
968 *rq->pi_virt_addr = cpu_to_be16(RQ_MASKED_IDX(rq, prod_idx + 1));
969 }
970