1 /*
2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34 #ifndef RXE_VERBS_H
35 #define RXE_VERBS_H
36
37 #include <linux/interrupt.h>
38 #include <rdma/rdma_user_rxe.h>
39 #include "rxe_pool.h"
40 #include "rxe_task.h"
41
pkey_match(u16 key1,u16 key2)42 static inline int pkey_match(u16 key1, u16 key2)
43 {
44 return (((key1 & 0x7fff) != 0) &&
45 ((key1 & 0x7fff) == (key2 & 0x7fff)) &&
46 ((key1 & 0x8000) || (key2 & 0x8000))) ? 1 : 0;
47 }
48
49 /* Return >0 if psn_a > psn_b
50 * 0 if psn_a == psn_b
51 * <0 if psn_a < psn_b
52 */
psn_compare(u32 psn_a,u32 psn_b)53 static inline int psn_compare(u32 psn_a, u32 psn_b)
54 {
55 s32 diff;
56
57 diff = (psn_a - psn_b) << 8;
58 return diff;
59 }
60
61 struct rxe_ucontext {
62 struct rxe_pool_entry pelem;
63 struct ib_ucontext ibuc;
64 };
65
66 struct rxe_pd {
67 struct rxe_pool_entry pelem;
68 struct ib_pd ibpd;
69 };
70
71 struct rxe_ah {
72 struct rxe_pool_entry pelem;
73 struct ib_ah ibah;
74 struct rxe_pd *pd;
75 struct rxe_av av;
76 };
77
78 struct rxe_cqe {
79 union {
80 struct ib_wc ibwc;
81 struct ib_uverbs_wc uibwc;
82 };
83 };
84
85 struct rxe_cq {
86 struct rxe_pool_entry pelem;
87 struct ib_cq ibcq;
88 struct rxe_queue *queue;
89 spinlock_t cq_lock;
90 u8 notify;
91 int is_user;
92 struct tasklet_struct comp_task;
93 };
94
95 enum wqe_state {
96 wqe_state_posted,
97 wqe_state_processing,
98 wqe_state_pending,
99 wqe_state_done,
100 wqe_state_error,
101 };
102
103 struct rxe_sq {
104 int max_wr;
105 int max_sge;
106 int max_inline;
107 spinlock_t sq_lock; /* guard queue */
108 struct rxe_queue *queue;
109 };
110
111 struct rxe_rq {
112 int max_wr;
113 int max_sge;
114 spinlock_t producer_lock; /* guard queue producer */
115 spinlock_t consumer_lock; /* guard queue consumer */
116 struct rxe_queue *queue;
117 };
118
119 struct rxe_srq {
120 struct rxe_pool_entry pelem;
121 struct ib_srq ibsrq;
122 struct rxe_pd *pd;
123 struct rxe_rq rq;
124 u32 srq_num;
125
126 int limit;
127 int error;
128 };
129
130 enum rxe_qp_state {
131 QP_STATE_RESET,
132 QP_STATE_INIT,
133 QP_STATE_READY,
134 QP_STATE_DRAIN, /* req only */
135 QP_STATE_DRAINED, /* req only */
136 QP_STATE_ERROR
137 };
138
139 extern char *rxe_qp_state_name[];
140
141 struct rxe_req_info {
142 enum rxe_qp_state state;
143 int wqe_index;
144 u32 psn;
145 int opcode;
146 atomic_t rd_atomic;
147 int wait_fence;
148 int need_rd_atomic;
149 int wait_psn;
150 int need_retry;
151 int noack_pkts;
152 struct rxe_task task;
153 };
154
155 struct rxe_comp_info {
156 u32 psn;
157 int opcode;
158 int timeout;
159 int timeout_retry;
160 u32 retry_cnt;
161 u32 rnr_retry;
162 struct rxe_task task;
163 };
164
165 enum rdatm_res_state {
166 rdatm_res_state_next,
167 rdatm_res_state_new,
168 rdatm_res_state_replay,
169 };
170
171 struct resp_res {
172 int type;
173 u32 first_psn;
174 u32 last_psn;
175 u32 cur_psn;
176 enum rdatm_res_state state;
177
178 union {
179 struct {
180 struct sk_buff *skb;
181 } atomic;
182 struct {
183 struct rxe_mem *mr;
184 u64 va_org;
185 u32 rkey;
186 u32 length;
187 u64 va;
188 u32 resid;
189 } read;
190 };
191 };
192
193 struct rxe_resp_info {
194 enum rxe_qp_state state;
195 u32 msn;
196 u32 psn;
197 int opcode;
198 int drop_msg;
199 int goto_error;
200 int sent_psn_nak;
201 enum ib_wc_status status;
202 u8 aeth_syndrome;
203
204 /* Receive only */
205 struct rxe_recv_wqe *wqe;
206
207 /* RDMA read / atomic only */
208 u64 va;
209 struct rxe_mem *mr;
210 u32 resid;
211 u32 rkey;
212 u64 atomic_orig;
213
214 /* SRQ only */
215 struct {
216 struct rxe_recv_wqe wqe;
217 struct ib_sge sge[RXE_MAX_SGE];
218 } srq_wqe;
219
220 /* Responder resources. It's a circular list where the oldest
221 * resource is dropped first.
222 */
223 struct resp_res *resources;
224 unsigned int res_head;
225 unsigned int res_tail;
226 struct resp_res *res;
227 struct rxe_task task;
228 };
229
230 struct rxe_qp {
231 struct rxe_pool_entry pelem;
232 struct ib_qp ibqp;
233 struct ib_qp_attr attr;
234 unsigned int valid;
235 unsigned int mtu;
236 int is_user;
237
238 struct rxe_pd *pd;
239 struct rxe_srq *srq;
240 struct rxe_cq *scq;
241 struct rxe_cq *rcq;
242
243 enum ib_sig_type sq_sig_type;
244
245 struct rxe_sq sq;
246 struct rxe_rq rq;
247
248 struct socket *sk;
249
250 struct rxe_av pri_av;
251 struct rxe_av alt_av;
252
253 /* list of mcast groups qp has joined (for cleanup) */
254 struct list_head grp_list;
255 spinlock_t grp_lock; /* guard grp_list */
256
257 struct sk_buff_head req_pkts;
258 struct sk_buff_head resp_pkts;
259 struct sk_buff_head send_pkts;
260
261 struct rxe_req_info req;
262 struct rxe_comp_info comp;
263 struct rxe_resp_info resp;
264
265 atomic_t ssn;
266 atomic_t skb_out;
267 int need_req_skb;
268
269 /* Timer for retranmitting packet when ACKs have been lost. RC
270 * only. The requester sets it when it is not already
271 * started. The responder resets it whenever an ack is
272 * received.
273 */
274 struct timer_list retrans_timer;
275 u64 qp_timeout_jiffies;
276
277 /* Timer for handling RNR NAKS. */
278 struct timer_list rnr_nak_timer;
279
280 spinlock_t state_lock; /* guard requester and completer */
281 };
282
283 enum rxe_mem_state {
284 RXE_MEM_STATE_ZOMBIE,
285 RXE_MEM_STATE_INVALID,
286 RXE_MEM_STATE_FREE,
287 RXE_MEM_STATE_VALID,
288 };
289
290 enum rxe_mem_type {
291 RXE_MEM_TYPE_NONE,
292 RXE_MEM_TYPE_DMA,
293 RXE_MEM_TYPE_MR,
294 RXE_MEM_TYPE_FMR,
295 RXE_MEM_TYPE_MW,
296 };
297
298 #define RXE_BUF_PER_MAP (PAGE_SIZE / sizeof(struct rxe_phys_buf))
299
300 struct rxe_phys_buf {
301 u64 addr;
302 u64 size;
303 };
304
305 struct rxe_map {
306 struct rxe_phys_buf buf[RXE_BUF_PER_MAP];
307 };
308
309 struct rxe_mem {
310 struct rxe_pool_entry pelem;
311 union {
312 struct ib_mr ibmr;
313 struct ib_mw ibmw;
314 };
315
316 struct rxe_pd *pd;
317 struct ib_umem *umem;
318
319 u32 lkey;
320 u32 rkey;
321
322 enum rxe_mem_state state;
323 enum rxe_mem_type type;
324 u64 va;
325 u64 iova;
326 size_t length;
327 u32 offset;
328 int access;
329
330 int page_shift;
331 int page_mask;
332 int map_shift;
333 int map_mask;
334
335 u32 num_buf;
336 u32 nbuf;
337
338 u32 max_buf;
339 u32 num_map;
340
341 struct rxe_map **map;
342 };
343
344 struct rxe_mc_grp {
345 struct rxe_pool_entry pelem;
346 spinlock_t mcg_lock; /* guard group */
347 struct rxe_dev *rxe;
348 struct list_head qp_list;
349 union ib_gid mgid;
350 int num_qp;
351 u32 qkey;
352 u16 pkey;
353 };
354
355 struct rxe_mc_elem {
356 struct rxe_pool_entry pelem;
357 struct list_head qp_list;
358 struct list_head grp_list;
359 struct rxe_qp *qp;
360 struct rxe_mc_grp *grp;
361 };
362
363 struct rxe_port {
364 struct ib_port_attr attr;
365 u16 *pkey_tbl;
366 __be64 port_guid;
367 __be64 subnet_prefix;
368 spinlock_t port_lock; /* guard port */
369 unsigned int mtu_cap;
370 /* special QPs */
371 u32 qp_smi_index;
372 u32 qp_gsi_index;
373 };
374
375 /* callbacks from rdma_rxe to network interface layer */
376 struct rxe_ifc_ops {
377 void (*release)(struct rxe_dev *rxe);
378 __be64 (*node_guid)(struct rxe_dev *rxe);
379 __be64 (*port_guid)(struct rxe_dev *rxe);
380 struct device *(*dma_device)(struct rxe_dev *rxe);
381 int (*mcast_add)(struct rxe_dev *rxe, union ib_gid *mgid);
382 int (*mcast_delete)(struct rxe_dev *rxe, union ib_gid *mgid);
383 int (*prepare)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
384 struct sk_buff *skb, u32 *crc);
385 int (*send)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
386 struct sk_buff *skb);
387 int (*loopback)(struct sk_buff *skb);
388 struct sk_buff *(*init_packet)(struct rxe_dev *rxe, struct rxe_av *av,
389 int paylen, struct rxe_pkt_info *pkt);
390 char *(*parent_name)(struct rxe_dev *rxe, unsigned int port_num);
391 enum rdma_link_layer (*link_layer)(struct rxe_dev *rxe,
392 unsigned int port_num);
393 };
394
395 struct rxe_dev {
396 struct ib_device ib_dev;
397 struct ib_device_attr attr;
398 int max_ucontext;
399 int max_inline_data;
400 struct kref ref_cnt;
401 struct mutex usdev_lock;
402
403 struct rxe_ifc_ops *ifc_ops;
404
405 struct net_device *ndev;
406
407 int xmit_errors;
408
409 struct rxe_pool uc_pool;
410 struct rxe_pool pd_pool;
411 struct rxe_pool ah_pool;
412 struct rxe_pool srq_pool;
413 struct rxe_pool qp_pool;
414 struct rxe_pool cq_pool;
415 struct rxe_pool mr_pool;
416 struct rxe_pool mw_pool;
417 struct rxe_pool mc_grp_pool;
418 struct rxe_pool mc_elem_pool;
419
420 spinlock_t pending_lock; /* guard pending_mmaps */
421 struct list_head pending_mmaps;
422
423 spinlock_t mmap_offset_lock; /* guard mmap_offset */
424 int mmap_offset;
425
426 struct rxe_port port;
427 struct list_head list;
428 };
429
to_rdev(struct ib_device * dev)430 static inline struct rxe_dev *to_rdev(struct ib_device *dev)
431 {
432 return dev ? container_of(dev, struct rxe_dev, ib_dev) : NULL;
433 }
434
to_ruc(struct ib_ucontext * uc)435 static inline struct rxe_ucontext *to_ruc(struct ib_ucontext *uc)
436 {
437 return uc ? container_of(uc, struct rxe_ucontext, ibuc) : NULL;
438 }
439
to_rpd(struct ib_pd * pd)440 static inline struct rxe_pd *to_rpd(struct ib_pd *pd)
441 {
442 return pd ? container_of(pd, struct rxe_pd, ibpd) : NULL;
443 }
444
to_rah(struct ib_ah * ah)445 static inline struct rxe_ah *to_rah(struct ib_ah *ah)
446 {
447 return ah ? container_of(ah, struct rxe_ah, ibah) : NULL;
448 }
449
to_rsrq(struct ib_srq * srq)450 static inline struct rxe_srq *to_rsrq(struct ib_srq *srq)
451 {
452 return srq ? container_of(srq, struct rxe_srq, ibsrq) : NULL;
453 }
454
to_rqp(struct ib_qp * qp)455 static inline struct rxe_qp *to_rqp(struct ib_qp *qp)
456 {
457 return qp ? container_of(qp, struct rxe_qp, ibqp) : NULL;
458 }
459
to_rcq(struct ib_cq * cq)460 static inline struct rxe_cq *to_rcq(struct ib_cq *cq)
461 {
462 return cq ? container_of(cq, struct rxe_cq, ibcq) : NULL;
463 }
464
to_rmr(struct ib_mr * mr)465 static inline struct rxe_mem *to_rmr(struct ib_mr *mr)
466 {
467 return mr ? container_of(mr, struct rxe_mem, ibmr) : NULL;
468 }
469
to_rmw(struct ib_mw * mw)470 static inline struct rxe_mem *to_rmw(struct ib_mw *mw)
471 {
472 return mw ? container_of(mw, struct rxe_mem, ibmw) : NULL;
473 }
474
475 int rxe_register_device(struct rxe_dev *rxe);
476 int rxe_unregister_device(struct rxe_dev *rxe);
477
478 void rxe_mc_cleanup(void *arg);
479
480 #endif /* RXE_VERBS_H */
481