1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3 * RDMA Transport Layer
4 *
5 * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
6 * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
7 * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
8 */
9
10 #ifndef RTRS_PRI_H
11 #define RTRS_PRI_H
12
13 #include <linux/uuid.h>
14 #include <rdma/rdma_cm.h>
15 #include <rdma/ib_verbs.h>
16 #include <rdma/ib.h>
17
18 #include "rtrs.h"
19
20 #define RTRS_PROTO_VER_MAJOR 2
21 #define RTRS_PROTO_VER_MINOR 0
22
23 #define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \
24 __stringify(RTRS_PROTO_VER_MINOR)
25
26 enum rtrs_imm_const {
27 MAX_IMM_TYPE_BITS = 4,
28 MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1),
29 MAX_IMM_PAYL_BITS = 28,
30 MAX_IMM_PAYL_MASK = ((1 << MAX_IMM_PAYL_BITS) - 1),
31 };
32
33 enum rtrs_imm_type {
34 RTRS_IO_REQ_IMM = 0, /* client to server */
35 RTRS_IO_RSP_IMM = 1, /* server to client */
36 RTRS_IO_RSP_W_INV_IMM = 2, /* server to client */
37
38 RTRS_HB_MSG_IMM = 8, /* HB: HeartBeat */
39 RTRS_HB_ACK_IMM = 9,
40
41 RTRS_LAST_IMM,
42 };
43
44 enum {
45 SERVICE_CON_QUEUE_DEPTH = 512,
46
47 MAX_PATHS_NUM = 128,
48
49 /*
50 * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS)
51 * and the minimum chunk size is 4096 (2^12).
52 * So the maximum sess_queue_depth is 65536 (2^16) in theory.
53 * But mempool_create, create_qp and ib_post_send fail with
54 * "cannot allocate memory" error if sess_queue_depth is too big.
55 * Therefore the pratical max value of sess_queue_depth is
56 * somewhere between 1 and 65536 and it depends on the system.
57 */
58 MAX_SESS_QUEUE_DEPTH = 65536,
59
60 RTRS_HB_INTERVAL_MS = 5000,
61 RTRS_HB_MISSED_MAX = 5,
62
63 RTRS_MAGIC = 0x1BBD,
64 RTRS_PROTO_VER = (RTRS_PROTO_VER_MAJOR << 8) | RTRS_PROTO_VER_MINOR,
65 };
66
67 struct rtrs_ib_dev;
68
69 struct rtrs_rdma_dev_pd_ops {
70 struct rtrs_ib_dev *(*alloc)(void);
71 void (*free)(struct rtrs_ib_dev *dev);
72 int (*init)(struct rtrs_ib_dev *dev);
73 void (*deinit)(struct rtrs_ib_dev *dev);
74 };
75
76 struct rtrs_rdma_dev_pd {
77 struct mutex mutex;
78 struct list_head list;
79 enum ib_pd_flags pd_flags;
80 const struct rtrs_rdma_dev_pd_ops *ops;
81 };
82
83 struct rtrs_ib_dev {
84 struct ib_device *ib_dev;
85 struct ib_pd *ib_pd;
86 struct kref ref;
87 struct list_head entry;
88 struct rtrs_rdma_dev_pd *pool;
89 };
90
91 struct rtrs_con {
92 struct rtrs_sess *sess;
93 struct ib_qp *qp;
94 struct ib_cq *cq;
95 struct rdma_cm_id *cm_id;
96 unsigned int cid;
97 };
98
99 struct rtrs_sess {
100 struct list_head entry;
101 struct sockaddr_storage dst_addr;
102 struct sockaddr_storage src_addr;
103 char sessname[NAME_MAX];
104 uuid_t uuid;
105 struct rtrs_con **con;
106 unsigned int con_num;
107 unsigned int recon_cnt;
108 struct rtrs_ib_dev *dev;
109 int dev_ref;
110 struct ib_cqe *hb_cqe;
111 void (*hb_err_handler)(struct rtrs_con *con);
112 struct workqueue_struct *hb_wq;
113 struct delayed_work hb_dwork;
114 unsigned int hb_interval_ms;
115 unsigned int hb_missed_cnt;
116 unsigned int hb_missed_max;
117 };
118
119 /* rtrs information unit */
120 struct rtrs_iu {
121 struct ib_cqe cqe;
122 dma_addr_t dma_addr;
123 void *buf;
124 size_t size;
125 enum dma_data_direction direction;
126 };
127
128 /**
129 * enum rtrs_msg_types - RTRS message types, see also rtrs/README
130 * @RTRS_MSG_INFO_REQ: Client additional info request to the server
131 * @RTRS_MSG_INFO_RSP: Server additional info response to the client
132 * @RTRS_MSG_WRITE: Client writes data per RDMA to server
133 * @RTRS_MSG_READ: Client requests data transfer from server
134 * @RTRS_MSG_RKEY_RSP: Server refreshed rkey for rbuf
135 */
136 enum rtrs_msg_types {
137 RTRS_MSG_INFO_REQ,
138 RTRS_MSG_INFO_RSP,
139 RTRS_MSG_WRITE,
140 RTRS_MSG_READ,
141 RTRS_MSG_RKEY_RSP,
142 };
143
144 /**
145 * enum rtrs_msg_flags - RTRS message flags.
146 * @RTRS_NEED_INVAL: Send invalidation in response.
147 * @RTRS_MSG_NEW_RKEY_F: Send refreshed rkey in response.
148 */
149 enum rtrs_msg_flags {
150 RTRS_MSG_NEED_INVAL_F = 1 << 0,
151 RTRS_MSG_NEW_RKEY_F = 1 << 1,
152 };
153
154 /**
155 * struct rtrs_sg_desc - RDMA-Buffer entry description
156 * @addr: Address of RDMA destination buffer
157 * @key: Authorization rkey to write to the buffer
158 * @len: Size of the buffer
159 */
160 struct rtrs_sg_desc {
161 __le64 addr;
162 __le32 key;
163 __le32 len;
164 };
165
166 /**
167 * struct rtrs_msg_conn_req - Client connection request to the server
168 * @magic: RTRS magic
169 * @version: RTRS protocol version
170 * @cid: Current connection id
171 * @cid_num: Number of connections per session
172 * @recon_cnt: Reconnections counter
173 * @sess_uuid: UUID of a session (path)
174 * @paths_uuid: UUID of a group of sessions (paths)
175 *
176 * NOTE: max size 56 bytes, see man rdma_connect().
177 */
178 struct rtrs_msg_conn_req {
179 /* Is set to 0 by cma.c in case of AF_IB, do not touch that.
180 * see https://www.spinics.net/lists/linux-rdma/msg22397.html
181 */
182 u8 __cma_version;
183 /* On sender side that should be set to 0, or cma_save_ip_info()
184 * extract garbage and will fail.
185 */
186 u8 __ip_version;
187 __le16 magic;
188 __le16 version;
189 __le16 cid;
190 __le16 cid_num;
191 __le16 recon_cnt;
192 uuid_t sess_uuid;
193 uuid_t paths_uuid;
194 u8 first_conn : 1;
195 u8 reserved_bits : 7;
196 u8 reserved[11];
197 };
198
199 /**
200 * struct rtrs_msg_conn_rsp - Server connection response to the client
201 * @magic: RTRS magic
202 * @version: RTRS protocol version
203 * @errno: If rdma_accept() then 0, if rdma_reject() indicates error
204 * @queue_depth: max inflight messages (queue-depth) in this session
205 * @max_io_size: max io size server supports
206 * @max_hdr_size: max msg header size server supports
207 *
208 * NOTE: size is 56 bytes, max possible is 136 bytes, see man rdma_accept().
209 */
210 struct rtrs_msg_conn_rsp {
211 __le16 magic;
212 __le16 version;
213 __le16 errno;
214 __le16 queue_depth;
215 __le32 max_io_size;
216 __le32 max_hdr_size;
217 __le32 flags;
218 u8 reserved[36];
219 };
220
221 /**
222 * struct rtrs_msg_info_req
223 * @type: @RTRS_MSG_INFO_REQ
224 * @sessname: Session name chosen by client
225 */
226 struct rtrs_msg_info_req {
227 __le16 type;
228 u8 sessname[NAME_MAX];
229 u8 reserved[15];
230 };
231
232 /**
233 * struct rtrs_msg_info_rsp
234 * @type: @RTRS_MSG_INFO_RSP
235 * @sg_cnt: Number of @desc entries
236 * @desc: RDMA buffers where the client can write to server
237 */
238 struct rtrs_msg_info_rsp {
239 __le16 type;
240 __le16 sg_cnt;
241 u8 reserved[4];
242 struct rtrs_sg_desc desc[];
243 };
244
245 /**
246 * struct rtrs_msg_rkey_rsp
247 * @type: @RTRS_MSG_RKEY_RSP
248 * @buf_id: RDMA buf_id of the new rkey
249 * @rkey: new remote key for RDMA buffers id from server
250 */
251 struct rtrs_msg_rkey_rsp {
252 __le16 type;
253 __le16 buf_id;
254 __le32 rkey;
255 };
256
257 /**
258 * struct rtrs_msg_rdma_read - RDMA data transfer request from client
259 * @type: always @RTRS_MSG_READ
260 * @usr_len: length of user payload
261 * @sg_cnt: number of @desc entries
262 * @desc: RDMA buffers where the server can write the result to
263 */
264 struct rtrs_msg_rdma_read {
265 __le16 type;
266 __le16 usr_len;
267 __le16 flags;
268 __le16 sg_cnt;
269 struct rtrs_sg_desc desc[];
270 };
271
272 /**
273 * struct_msg_rdma_write - Message transferred to server with RDMA-Write
274 * @type: always @RTRS_MSG_WRITE
275 * @usr_len: length of user payload
276 */
277 struct rtrs_msg_rdma_write {
278 __le16 type;
279 __le16 usr_len;
280 };
281
282 /**
283 * struct_msg_rdma_hdr - header for read or write request
284 * @type: @RTRS_MSG_WRITE | @RTRS_MSG_READ
285 */
286 struct rtrs_msg_rdma_hdr {
287 __le16 type;
288 };
289
290 /* rtrs.c */
291
292 struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t t,
293 struct ib_device *dev, enum dma_data_direction,
294 void (*done)(struct ib_cq *cq, struct ib_wc *wc));
295 void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_size);
296 int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu);
297 int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size,
298 struct ib_send_wr *head);
299 int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu,
300 struct ib_sge *sge, unsigned int num_sge,
301 u32 rkey, u64 rdma_addr, u32 imm_data,
302 enum ib_send_flags flags,
303 struct ib_send_wr *head);
304
305 int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe);
306 int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe,
307 u32 imm_data, enum ib_send_flags flags,
308 struct ib_send_wr *head);
309
310 int rtrs_cq_qp_create(struct rtrs_sess *rtrs_sess, struct rtrs_con *con,
311 u32 max_send_sge, int cq_vector, int cq_size,
312 u32 max_send_wr, u32 max_recv_wr,
313 enum ib_poll_context poll_ctx);
314 void rtrs_cq_qp_destroy(struct rtrs_con *con);
315
316 void rtrs_init_hb(struct rtrs_sess *sess, struct ib_cqe *cqe,
317 unsigned int interval_ms, unsigned int missed_max,
318 void (*err_handler)(struct rtrs_con *con),
319 struct workqueue_struct *wq);
320 void rtrs_start_hb(struct rtrs_sess *sess);
321 void rtrs_stop_hb(struct rtrs_sess *sess);
322 void rtrs_send_hb_ack(struct rtrs_sess *sess);
323
324 void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags,
325 struct rtrs_rdma_dev_pd *pool);
326 void rtrs_rdma_dev_pd_deinit(struct rtrs_rdma_dev_pd *pool);
327
328 struct rtrs_ib_dev *rtrs_ib_dev_find_or_add(struct ib_device *ib_dev,
329 struct rtrs_rdma_dev_pd *pool);
330 int rtrs_ib_dev_put(struct rtrs_ib_dev *dev);
331
rtrs_to_imm(u32 type,u32 payload)332 static inline u32 rtrs_to_imm(u32 type, u32 payload)
333 {
334 BUILD_BUG_ON(MAX_IMM_PAYL_BITS + MAX_IMM_TYPE_BITS != 32);
335 BUILD_BUG_ON(RTRS_LAST_IMM > (1<<MAX_IMM_TYPE_BITS));
336 return ((type & MAX_IMM_TYPE_MASK) << MAX_IMM_PAYL_BITS) |
337 (payload & MAX_IMM_PAYL_MASK);
338 }
339
rtrs_from_imm(u32 imm,u32 * type,u32 * payload)340 static inline void rtrs_from_imm(u32 imm, u32 *type, u32 *payload)
341 {
342 *payload = imm & MAX_IMM_PAYL_MASK;
343 *type = imm >> MAX_IMM_PAYL_BITS;
344 }
345
rtrs_to_io_req_imm(u32 addr)346 static inline u32 rtrs_to_io_req_imm(u32 addr)
347 {
348 return rtrs_to_imm(RTRS_IO_REQ_IMM, addr);
349 }
350
rtrs_to_io_rsp_imm(u32 msg_id,int errno,bool w_inval)351 static inline u32 rtrs_to_io_rsp_imm(u32 msg_id, int errno, bool w_inval)
352 {
353 enum rtrs_imm_type type;
354 u32 payload;
355
356 /* 9 bits for errno, 19 bits for msg_id */
357 payload = (abs(errno) & 0x1ff) << 19 | (msg_id & 0x7ffff);
358 type = w_inval ? RTRS_IO_RSP_W_INV_IMM : RTRS_IO_RSP_IMM;
359
360 return rtrs_to_imm(type, payload);
361 }
362
rtrs_from_io_rsp_imm(u32 payload,u32 * msg_id,int * errno)363 static inline void rtrs_from_io_rsp_imm(u32 payload, u32 *msg_id, int *errno)
364 {
365 /* 9 bits for errno, 19 bits for msg_id */
366 *msg_id = payload & 0x7ffff;
367 *errno = -(int)((payload >> 19) & 0x1ff);
368 }
369
370 #define STAT_STORE_FUNC(type, set_value, reset) \
371 static ssize_t set_value##_store(struct kobject *kobj, \
372 struct kobj_attribute *attr, \
373 const char *buf, size_t count) \
374 { \
375 int ret = -EINVAL; \
376 type *stats = container_of(kobj, type, kobj_stats); \
377 \
378 if (sysfs_streq(buf, "1")) \
379 ret = reset(stats, true); \
380 else if (sysfs_streq(buf, "0")) \
381 ret = reset(stats, false); \
382 if (ret) \
383 return ret; \
384 \
385 return count; \
386 }
387
388 #define STAT_SHOW_FUNC(type, get_value, print) \
389 static ssize_t get_value##_show(struct kobject *kobj, \
390 struct kobj_attribute *attr, \
391 char *page) \
392 { \
393 type *stats = container_of(kobj, type, kobj_stats); \
394 \
395 return print(stats, page, PAGE_SIZE); \
396 }
397
398 #define STAT_ATTR(type, stat, print, reset) \
399 STAT_STORE_FUNC(type, stat, reset) \
400 STAT_SHOW_FUNC(type, stat, print) \
401 static struct kobj_attribute stat##_attr = __ATTR_RW(stat)
402
403 #endif /* RTRS_PRI_H */
404