• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  CLC (connection layer control) handshake over initial TCP socket to
6  *  prepare for RDMA traffic
7  *
8  *  Copyright IBM Corp. 2016
9  *
10  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
11  */
12 
13 #include <linux/in.h>
14 #include <linux/if_ether.h>
15 #include <linux/sched/signal.h>
16 
17 #include <net/sock.h>
18 #include <net/tcp.h>
19 
20 #include "smc.h"
21 #include "smc_core.h"
22 #include "smc_clc.h"
23 #include "smc_ib.h"
24 
25 /* Wait for data on the tcp-socket, analyze received data
26  * Returns:
27  * 0 if success and it was not a decline that we received.
28  * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send.
29  * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
30  */
smc_clc_wait_msg(struct smc_sock * smc,void * buf,int buflen,u8 expected_type)31 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
32 		     u8 expected_type)
33 {
34 	struct sock *clc_sk = smc->clcsock->sk;
35 	struct smc_clc_msg_hdr *clcm = buf;
36 	struct msghdr msg = {NULL, 0};
37 	int reason_code = 0;
38 	struct kvec vec;
39 	int len, datlen;
40 	int krflags;
41 
42 	/* peek the first few bytes to determine length of data to receive
43 	 * so we don't consume any subsequent CLC message or payload data
44 	 * in the TCP byte stream
45 	 */
46 	vec.iov_base = buf;
47 	vec.iov_len = buflen;
48 	krflags = MSG_PEEK | MSG_WAITALL;
49 	smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
50 	len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1,
51 			     sizeof(struct smc_clc_msg_hdr), krflags);
52 	if (signal_pending(current)) {
53 		reason_code = -EINTR;
54 		clc_sk->sk_err = EINTR;
55 		smc->sk.sk_err = EINTR;
56 		goto out;
57 	}
58 	if (clc_sk->sk_err) {
59 		reason_code = -clc_sk->sk_err;
60 		smc->sk.sk_err = clc_sk->sk_err;
61 		goto out;
62 	}
63 	if (!len) { /* peer has performed orderly shutdown */
64 		smc->sk.sk_err = ECONNRESET;
65 		reason_code = -ECONNRESET;
66 		goto out;
67 	}
68 	if (len < 0) {
69 		smc->sk.sk_err = -len;
70 		reason_code = len;
71 		goto out;
72 	}
73 	datlen = ntohs(clcm->length);
74 	if ((len < sizeof(struct smc_clc_msg_hdr)) ||
75 	    (datlen < sizeof(struct smc_clc_msg_decline)) ||
76 	    (datlen > sizeof(struct smc_clc_msg_accept_confirm)) ||
77 	    memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) ||
78 	    ((clcm->type != SMC_CLC_DECLINE) &&
79 	     (clcm->type != expected_type))) {
80 		smc->sk.sk_err = EPROTO;
81 		reason_code = -EPROTO;
82 		goto out;
83 	}
84 
85 	/* receive the complete CLC message */
86 	vec.iov_base = buf;
87 	vec.iov_len = buflen;
88 	memset(&msg, 0, sizeof(struct msghdr));
89 	krflags = MSG_WAITALL;
90 	smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
91 	len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags);
92 	if (len < datlen) {
93 		smc->sk.sk_err = EPROTO;
94 		reason_code = -EPROTO;
95 		goto out;
96 	}
97 	if (clcm->type == SMC_CLC_DECLINE) {
98 		reason_code = SMC_CLC_DECL_REPLY;
99 		if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
100 			smc->conn.lgr->sync_err = true;
101 			smc_lgr_terminate(smc->conn.lgr);
102 		}
103 	}
104 
105 out:
106 	return reason_code;
107 }
108 
109 /* send CLC DECLINE message across internal TCP socket */
smc_clc_send_decline(struct smc_sock * smc,u32 peer_diag_info)110 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
111 {
112 	struct smc_clc_msg_decline dclc;
113 	struct msghdr msg;
114 	struct kvec vec;
115 	int len;
116 
117 	memset(&dclc, 0, sizeof(dclc));
118 	memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
119 	dclc.hdr.type = SMC_CLC_DECLINE;
120 	dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
121 	dclc.hdr.version = SMC_CLC_V1;
122 	dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0;
123 	memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
124 	dclc.peer_diagnosis = htonl(peer_diag_info);
125 	memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
126 
127 	memset(&msg, 0, sizeof(msg));
128 	vec.iov_base = &dclc;
129 	vec.iov_len = sizeof(struct smc_clc_msg_decline);
130 	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
131 			     sizeof(struct smc_clc_msg_decline));
132 	if (len < sizeof(struct smc_clc_msg_decline))
133 		smc->sk.sk_err = EPROTO;
134 	if (len < 0)
135 		smc->sk.sk_err = -len;
136 	return len;
137 }
138 
139 /* send CLC PROPOSAL message across internal TCP socket */
smc_clc_send_proposal(struct smc_sock * smc,struct smc_ib_device * smcibdev,u8 ibport)140 int smc_clc_send_proposal(struct smc_sock *smc,
141 			  struct smc_ib_device *smcibdev,
142 			  u8 ibport)
143 {
144 	struct smc_clc_msg_proposal pclc;
145 	int reason_code = 0;
146 	struct msghdr msg;
147 	struct kvec vec;
148 	int len, rc;
149 
150 	/* send SMC Proposal CLC message */
151 	memset(&pclc, 0, sizeof(pclc));
152 	memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
153 	pclc.hdr.type = SMC_CLC_PROPOSAL;
154 	pclc.hdr.length = htons(sizeof(pclc));
155 	pclc.hdr.version = SMC_CLC_V1;		/* SMC version */
156 	memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
157 	memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE);
158 	memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
159 
160 	/* determine subnet and mask from internal TCP socket */
161 	rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet,
162 				  &pclc.prefix_len);
163 	if (rc)
164 		return SMC_CLC_DECL_CNFERR; /* configuration error */
165 	memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
166 	memset(&msg, 0, sizeof(msg));
167 	vec.iov_base = &pclc;
168 	vec.iov_len = sizeof(pclc);
169 	/* due to the few bytes needed for clc-handshake this cannot block */
170 	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc));
171 	if (len < sizeof(pclc)) {
172 		if (len >= 0) {
173 			reason_code = -ENETUNREACH;
174 			smc->sk.sk_err = -reason_code;
175 		} else {
176 			smc->sk.sk_err = smc->clcsock->sk->sk_err;
177 			reason_code = -smc->sk.sk_err;
178 		}
179 	}
180 
181 	return reason_code;
182 }
183 
184 /* send CLC CONFIRM message across internal TCP socket */
smc_clc_send_confirm(struct smc_sock * smc)185 int smc_clc_send_confirm(struct smc_sock *smc)
186 {
187 	struct smc_connection *conn = &smc->conn;
188 	struct smc_clc_msg_accept_confirm cclc;
189 	struct smc_link *link;
190 	int reason_code = 0;
191 	struct msghdr msg;
192 	struct kvec vec;
193 	int len;
194 
195 	link = &conn->lgr->lnk[SMC_SINGLE_LINK];
196 	/* send SMC Confirm CLC msg */
197 	memset(&cclc, 0, sizeof(cclc));
198 	memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
199 	cclc.hdr.type = SMC_CLC_CONFIRM;
200 	cclc.hdr.length = htons(sizeof(cclc));
201 	cclc.hdr.version = SMC_CLC_V1;		/* SMC version */
202 	memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
203 	memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
204 	       SMC_GID_SIZE);
205 	memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
206 	hton24(cclc.qpn, link->roce_qp->qp_num);
207 	cclc.rmb_rkey =
208 		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
209 	cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
210 	cclc.rmbe_alert_token = htonl(conn->alert_token_local);
211 	cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
212 	cclc.rmbe_size = conn->rmbe_size_short;
213 	cclc.rmb_dma_addr = cpu_to_be64(
214 		(u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
215 	hton24(cclc.psn, link->psn_initial);
216 
217 	memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
218 
219 	memset(&msg, 0, sizeof(msg));
220 	vec.iov_base = &cclc;
221 	vec.iov_len = sizeof(cclc);
222 	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc));
223 	if (len < sizeof(cclc)) {
224 		if (len >= 0) {
225 			reason_code = -ENETUNREACH;
226 			smc->sk.sk_err = -reason_code;
227 		} else {
228 			smc->sk.sk_err = smc->clcsock->sk->sk_err;
229 			reason_code = -smc->sk.sk_err;
230 		}
231 	}
232 	return reason_code;
233 }
234 
235 /* send CLC ACCEPT message across internal TCP socket */
smc_clc_send_accept(struct smc_sock * new_smc,int srv_first_contact)236 int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
237 {
238 	struct smc_connection *conn = &new_smc->conn;
239 	struct smc_clc_msg_accept_confirm aclc;
240 	struct smc_link *link;
241 	struct msghdr msg;
242 	struct kvec vec;
243 	int rc = 0;
244 	int len;
245 
246 	link = &conn->lgr->lnk[SMC_SINGLE_LINK];
247 	memset(&aclc, 0, sizeof(aclc));
248 	memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
249 	aclc.hdr.type = SMC_CLC_ACCEPT;
250 	aclc.hdr.length = htons(sizeof(aclc));
251 	aclc.hdr.version = SMC_CLC_V1;		/* SMC version */
252 	if (srv_first_contact)
253 		aclc.hdr.flag = 1;
254 	memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
255 	memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
256 	       SMC_GID_SIZE);
257 	memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
258 	hton24(aclc.qpn, link->roce_qp->qp_num);
259 	aclc.rmb_rkey =
260 		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
261 	aclc.conn_idx = 1;			/* as long as 1 RMB = 1 RMBE */
262 	aclc.rmbe_alert_token = htonl(conn->alert_token_local);
263 	aclc.qp_mtu = link->path_mtu;
264 	aclc.rmbe_size = conn->rmbe_size_short,
265 	aclc.rmb_dma_addr = cpu_to_be64(
266 		(u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
267 	hton24(aclc.psn, link->psn_initial);
268 	memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
269 
270 	memset(&msg, 0, sizeof(msg));
271 	vec.iov_base = &aclc;
272 	vec.iov_len = sizeof(aclc);
273 	len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc));
274 	if (len < sizeof(aclc)) {
275 		if (len >= 0)
276 			new_smc->sk.sk_err = EPROTO;
277 		else
278 			new_smc->sk.sk_err = new_smc->clcsock->sk->sk_err;
279 		rc = sock_error(&new_smc->sk);
280 	}
281 
282 	return rc;
283 }
284