1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
4 *
5 * CLC (connection layer control) handshake over initial TCP socket to
6 * prepare for RDMA traffic
7 *
8 * Copyright IBM Corp. 2016
9 *
10 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
11 */
12
13 #include <linux/in.h>
14 #include <linux/if_ether.h>
15 #include <linux/sched/signal.h>
16
17 #include <net/sock.h>
18 #include <net/tcp.h>
19
20 #include "smc.h"
21 #include "smc_core.h"
22 #include "smc_clc.h"
23 #include "smc_ib.h"
24
25 /* Wait for data on the tcp-socket, analyze received data
26 * Returns:
27 * 0 if success and it was not a decline that we received.
28 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send.
29 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
30 */
smc_clc_wait_msg(struct smc_sock * smc,void * buf,int buflen,u8 expected_type)31 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
32 u8 expected_type)
33 {
34 struct sock *clc_sk = smc->clcsock->sk;
35 struct smc_clc_msg_hdr *clcm = buf;
36 struct msghdr msg = {NULL, 0};
37 int reason_code = 0;
38 struct kvec vec;
39 int len, datlen;
40 int krflags;
41
42 /* peek the first few bytes to determine length of data to receive
43 * so we don't consume any subsequent CLC message or payload data
44 * in the TCP byte stream
45 */
46 vec.iov_base = buf;
47 vec.iov_len = buflen;
48 krflags = MSG_PEEK | MSG_WAITALL;
49 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
50 len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1,
51 sizeof(struct smc_clc_msg_hdr), krflags);
52 if (signal_pending(current)) {
53 reason_code = -EINTR;
54 clc_sk->sk_err = EINTR;
55 smc->sk.sk_err = EINTR;
56 goto out;
57 }
58 if (clc_sk->sk_err) {
59 reason_code = -clc_sk->sk_err;
60 smc->sk.sk_err = clc_sk->sk_err;
61 goto out;
62 }
63 if (!len) { /* peer has performed orderly shutdown */
64 smc->sk.sk_err = ECONNRESET;
65 reason_code = -ECONNRESET;
66 goto out;
67 }
68 if (len < 0) {
69 smc->sk.sk_err = -len;
70 reason_code = len;
71 goto out;
72 }
73 datlen = ntohs(clcm->length);
74 if ((len < sizeof(struct smc_clc_msg_hdr)) ||
75 (datlen < sizeof(struct smc_clc_msg_decline)) ||
76 (datlen > sizeof(struct smc_clc_msg_accept_confirm)) ||
77 memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) ||
78 ((clcm->type != SMC_CLC_DECLINE) &&
79 (clcm->type != expected_type))) {
80 smc->sk.sk_err = EPROTO;
81 reason_code = -EPROTO;
82 goto out;
83 }
84
85 /* receive the complete CLC message */
86 vec.iov_base = buf;
87 vec.iov_len = buflen;
88 memset(&msg, 0, sizeof(struct msghdr));
89 krflags = MSG_WAITALL;
90 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
91 len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags);
92 if (len < datlen) {
93 smc->sk.sk_err = EPROTO;
94 reason_code = -EPROTO;
95 goto out;
96 }
97 if (clcm->type == SMC_CLC_DECLINE) {
98 reason_code = SMC_CLC_DECL_REPLY;
99 if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
100 smc->conn.lgr->sync_err = true;
101 smc_lgr_terminate(smc->conn.lgr);
102 }
103 }
104
105 out:
106 return reason_code;
107 }
108
109 /* send CLC DECLINE message across internal TCP socket */
smc_clc_send_decline(struct smc_sock * smc,u32 peer_diag_info)110 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
111 {
112 struct smc_clc_msg_decline dclc;
113 struct msghdr msg;
114 struct kvec vec;
115 int len;
116
117 memset(&dclc, 0, sizeof(dclc));
118 memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
119 dclc.hdr.type = SMC_CLC_DECLINE;
120 dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
121 dclc.hdr.version = SMC_CLC_V1;
122 dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0;
123 memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
124 dclc.peer_diagnosis = htonl(peer_diag_info);
125 memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
126
127 memset(&msg, 0, sizeof(msg));
128 vec.iov_base = &dclc;
129 vec.iov_len = sizeof(struct smc_clc_msg_decline);
130 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
131 sizeof(struct smc_clc_msg_decline));
132 if (len < sizeof(struct smc_clc_msg_decline))
133 smc->sk.sk_err = EPROTO;
134 if (len < 0)
135 smc->sk.sk_err = -len;
136 return len;
137 }
138
139 /* send CLC PROPOSAL message across internal TCP socket */
smc_clc_send_proposal(struct smc_sock * smc,struct smc_ib_device * smcibdev,u8 ibport)140 int smc_clc_send_proposal(struct smc_sock *smc,
141 struct smc_ib_device *smcibdev,
142 u8 ibport)
143 {
144 struct smc_clc_msg_proposal pclc;
145 int reason_code = 0;
146 struct msghdr msg;
147 struct kvec vec;
148 int len, rc;
149
150 /* send SMC Proposal CLC message */
151 memset(&pclc, 0, sizeof(pclc));
152 memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
153 pclc.hdr.type = SMC_CLC_PROPOSAL;
154 pclc.hdr.length = htons(sizeof(pclc));
155 pclc.hdr.version = SMC_CLC_V1; /* SMC version */
156 memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
157 memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE);
158 memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
159
160 /* determine subnet and mask from internal TCP socket */
161 rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet,
162 &pclc.prefix_len);
163 if (rc)
164 return SMC_CLC_DECL_CNFERR; /* configuration error */
165 memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
166 memset(&msg, 0, sizeof(msg));
167 vec.iov_base = &pclc;
168 vec.iov_len = sizeof(pclc);
169 /* due to the few bytes needed for clc-handshake this cannot block */
170 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc));
171 if (len < sizeof(pclc)) {
172 if (len >= 0) {
173 reason_code = -ENETUNREACH;
174 smc->sk.sk_err = -reason_code;
175 } else {
176 smc->sk.sk_err = smc->clcsock->sk->sk_err;
177 reason_code = -smc->sk.sk_err;
178 }
179 }
180
181 return reason_code;
182 }
183
184 /* send CLC CONFIRM message across internal TCP socket */
smc_clc_send_confirm(struct smc_sock * smc)185 int smc_clc_send_confirm(struct smc_sock *smc)
186 {
187 struct smc_connection *conn = &smc->conn;
188 struct smc_clc_msg_accept_confirm cclc;
189 struct smc_link *link;
190 int reason_code = 0;
191 struct msghdr msg;
192 struct kvec vec;
193 int len;
194
195 link = &conn->lgr->lnk[SMC_SINGLE_LINK];
196 /* send SMC Confirm CLC msg */
197 memset(&cclc, 0, sizeof(cclc));
198 memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
199 cclc.hdr.type = SMC_CLC_CONFIRM;
200 cclc.hdr.length = htons(sizeof(cclc));
201 cclc.hdr.version = SMC_CLC_V1; /* SMC version */
202 memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
203 memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
204 SMC_GID_SIZE);
205 memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
206 hton24(cclc.qpn, link->roce_qp->qp_num);
207 cclc.rmb_rkey =
208 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
209 cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
210 cclc.rmbe_alert_token = htonl(conn->alert_token_local);
211 cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
212 cclc.rmbe_size = conn->rmbe_size_short;
213 cclc.rmb_dma_addr = cpu_to_be64(
214 (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
215 hton24(cclc.psn, link->psn_initial);
216
217 memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
218
219 memset(&msg, 0, sizeof(msg));
220 vec.iov_base = &cclc;
221 vec.iov_len = sizeof(cclc);
222 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc));
223 if (len < sizeof(cclc)) {
224 if (len >= 0) {
225 reason_code = -ENETUNREACH;
226 smc->sk.sk_err = -reason_code;
227 } else {
228 smc->sk.sk_err = smc->clcsock->sk->sk_err;
229 reason_code = -smc->sk.sk_err;
230 }
231 }
232 return reason_code;
233 }
234
235 /* send CLC ACCEPT message across internal TCP socket */
smc_clc_send_accept(struct smc_sock * new_smc,int srv_first_contact)236 int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
237 {
238 struct smc_connection *conn = &new_smc->conn;
239 struct smc_clc_msg_accept_confirm aclc;
240 struct smc_link *link;
241 struct msghdr msg;
242 struct kvec vec;
243 int rc = 0;
244 int len;
245
246 link = &conn->lgr->lnk[SMC_SINGLE_LINK];
247 memset(&aclc, 0, sizeof(aclc));
248 memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
249 aclc.hdr.type = SMC_CLC_ACCEPT;
250 aclc.hdr.length = htons(sizeof(aclc));
251 aclc.hdr.version = SMC_CLC_V1; /* SMC version */
252 if (srv_first_contact)
253 aclc.hdr.flag = 1;
254 memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
255 memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
256 SMC_GID_SIZE);
257 memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
258 hton24(aclc.qpn, link->roce_qp->qp_num);
259 aclc.rmb_rkey =
260 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
261 aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */
262 aclc.rmbe_alert_token = htonl(conn->alert_token_local);
263 aclc.qp_mtu = link->path_mtu;
264 aclc.rmbe_size = conn->rmbe_size_short,
265 aclc.rmb_dma_addr = cpu_to_be64(
266 (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
267 hton24(aclc.psn, link->psn_initial);
268 memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
269
270 memset(&msg, 0, sizeof(msg));
271 vec.iov_base = &aclc;
272 vec.iov_len = sizeof(aclc);
273 len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc));
274 if (len < sizeof(aclc)) {
275 if (len >= 0)
276 new_smc->sk.sk_err = EPROTO;
277 else
278 new_smc->sk.sk_err = new_smc->clcsock->sk->sk_err;
279 rc = sock_error(&new_smc->sk);
280 }
281
282 return rc;
283 }
284