• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Intel MIC Platform Software Stack (MPSS)
4  *
5  * Copyright(c) 2014 Intel Corporation.
6  *
7  * Intel SCIF driver.
8  */
9 #include <linux/scif.h>
10 #include "scif_main.h"
11 #include "scif_map.h"
12 
13 static const char * const scif_ep_states[] = {
14 	"Unbound",
15 	"Bound",
16 	"Listening",
17 	"Connected",
18 	"Connecting",
19 	"Mapping",
20 	"Closing",
21 	"Close Listening",
22 	"Disconnected",
23 	"Zombie"};
24 
25 enum conn_async_state {
26 	ASYNC_CONN_IDLE = 1,	/* ep setup for async connect */
27 	ASYNC_CONN_INPROGRESS,	/* async connect in progress */
28 	ASYNC_CONN_FLUSH_WORK	/* async work flush in progress  */
29 };
30 
31 /*
32  * File operations for anonymous inode file associated with a SCIF endpoint,
33  * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
34  * poll API in the kernel and these take in a struct file *. Since a struct
35  * file is not available to kernel mode SCIF, it uses an anonymous file for
36  * this purpose.
37  */
38 const struct file_operations scif_anon_fops = {
39 	.owner = THIS_MODULE,
40 };
41 
scif_open(void)42 scif_epd_t scif_open(void)
43 {
44 	struct scif_endpt *ep;
45 	int err;
46 
47 	might_sleep();
48 	ep = kzalloc(sizeof(*ep), GFP_KERNEL);
49 	if (!ep)
50 		goto err_ep_alloc;
51 
52 	ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL);
53 	if (!ep->qp_info.qp)
54 		goto err_qp_alloc;
55 
56 	err = scif_anon_inode_getfile(ep);
57 	if (err)
58 		goto err_anon_inode;
59 
60 	spin_lock_init(&ep->lock);
61 	mutex_init(&ep->sendlock);
62 	mutex_init(&ep->recvlock);
63 
64 	scif_rma_ep_init(ep);
65 	ep->state = SCIFEP_UNBOUND;
66 	dev_dbg(scif_info.mdev.this_device,
67 		"SCIFAPI open: ep %p success\n", ep);
68 	return ep;
69 
70 err_anon_inode:
71 	kfree(ep->qp_info.qp);
72 err_qp_alloc:
73 	kfree(ep);
74 err_ep_alloc:
75 	return NULL;
76 }
77 EXPORT_SYMBOL_GPL(scif_open);
78 
79 /*
80  * scif_disconnect_ep - Disconnects the endpoint if found
81  * @epd: The end point returned from scif_open()
82  */
scif_disconnect_ep(struct scif_endpt * ep)83 static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep)
84 {
85 	struct scifmsg msg;
86 	struct scif_endpt *fep = NULL;
87 	struct scif_endpt *tmpep;
88 	struct list_head *pos, *tmpq;
89 	int err;
90 
91 	/*
92 	 * Wake up any threads blocked in send()/recv() before closing
93 	 * out the connection. Grabbing and releasing the send/recv lock
94 	 * will ensure that any blocked senders/receivers have exited for
95 	 * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
96 	 * close. Ring 3 endpoints are not affected since close will not
97 	 * be called while there are IOCTLs executing.
98 	 */
99 	wake_up_interruptible(&ep->sendwq);
100 	wake_up_interruptible(&ep->recvwq);
101 	mutex_lock(&ep->sendlock);
102 	mutex_unlock(&ep->sendlock);
103 	mutex_lock(&ep->recvlock);
104 	mutex_unlock(&ep->recvlock);
105 
106 	/* Remove from the connected list */
107 	mutex_lock(&scif_info.connlock);
108 	list_for_each_safe(pos, tmpq, &scif_info.connected) {
109 		tmpep = list_entry(pos, struct scif_endpt, list);
110 		if (tmpep == ep) {
111 			list_del(pos);
112 			fep = tmpep;
113 			spin_lock(&ep->lock);
114 			break;
115 		}
116 	}
117 
118 	if (!fep) {
119 		/*
120 		 * The other side has completed the disconnect before
121 		 * the end point can be removed from the list. Therefore
122 		 * the ep lock is not locked, traverse the disconnected
123 		 * list to find the endpoint and release the conn lock.
124 		 */
125 		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
126 			tmpep = list_entry(pos, struct scif_endpt, list);
127 			if (tmpep == ep) {
128 				list_del(pos);
129 				break;
130 			}
131 		}
132 		mutex_unlock(&scif_info.connlock);
133 		return NULL;
134 	}
135 
136 	init_completion(&ep->discon);
137 	msg.uop = SCIF_DISCNCT;
138 	msg.src = ep->port;
139 	msg.dst = ep->peer;
140 	msg.payload[0] = (u64)ep;
141 	msg.payload[1] = ep->remote_ep;
142 
143 	err = scif_nodeqp_send(ep->remote_dev, &msg);
144 	spin_unlock(&ep->lock);
145 	mutex_unlock(&scif_info.connlock);
146 
147 	if (!err)
148 		/* Wait for the remote node to respond with SCIF_DISCNT_ACK */
149 		wait_for_completion_timeout(&ep->discon,
150 					    SCIF_NODE_ALIVE_TIMEOUT);
151 	return ep;
152 }
153 
scif_close(scif_epd_t epd)154 int scif_close(scif_epd_t epd)
155 {
156 	struct scif_endpt *ep = (struct scif_endpt *)epd;
157 	struct scif_endpt *tmpep;
158 	struct list_head *pos, *tmpq;
159 	enum scif_epd_state oldstate;
160 	bool flush_conn;
161 
162 	dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n",
163 		ep, scif_ep_states[ep->state]);
164 	might_sleep();
165 	spin_lock(&ep->lock);
166 	flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS);
167 	spin_unlock(&ep->lock);
168 
169 	if (flush_conn)
170 		flush_work(&scif_info.conn_work);
171 
172 	spin_lock(&ep->lock);
173 	oldstate = ep->state;
174 
175 	ep->state = SCIFEP_CLOSING;
176 
177 	switch (oldstate) {
178 	case SCIFEP_ZOMBIE:
179 		dev_err(scif_info.mdev.this_device,
180 			"SCIFAPI close: zombie state unexpected\n");
181 		/* fall through */
182 	case SCIFEP_DISCONNECTED:
183 		spin_unlock(&ep->lock);
184 		scif_unregister_all_windows(epd);
185 		/* Remove from the disconnected list */
186 		mutex_lock(&scif_info.connlock);
187 		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
188 			tmpep = list_entry(pos, struct scif_endpt, list);
189 			if (tmpep == ep) {
190 				list_del(pos);
191 				break;
192 			}
193 		}
194 		mutex_unlock(&scif_info.connlock);
195 		break;
196 	case SCIFEP_UNBOUND:
197 	case SCIFEP_BOUND:
198 	case SCIFEP_CONNECTING:
199 		spin_unlock(&ep->lock);
200 		break;
201 	case SCIFEP_MAPPING:
202 	case SCIFEP_CONNECTED:
203 	case SCIFEP_CLOSING:
204 	{
205 		spin_unlock(&ep->lock);
206 		scif_unregister_all_windows(epd);
207 		scif_disconnect_ep(ep);
208 		break;
209 	}
210 	case SCIFEP_LISTENING:
211 	case SCIFEP_CLLISTEN:
212 	{
213 		struct scif_conreq *conreq;
214 		struct scifmsg msg;
215 		struct scif_endpt *aep;
216 
217 		spin_unlock(&ep->lock);
218 		mutex_lock(&scif_info.eplock);
219 
220 		/* remove from listen list */
221 		list_for_each_safe(pos, tmpq, &scif_info.listen) {
222 			tmpep = list_entry(pos, struct scif_endpt, list);
223 			if (tmpep == ep)
224 				list_del(pos);
225 		}
226 		/* Remove any dangling accepts */
227 		while (ep->acceptcnt) {
228 			aep = list_first_entry(&ep->li_accept,
229 					       struct scif_endpt, liacceptlist);
230 			list_del(&aep->liacceptlist);
231 			scif_put_port(aep->port.port);
232 			list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
233 				tmpep = list_entry(pos, struct scif_endpt,
234 						   miacceptlist);
235 				if (tmpep == aep) {
236 					list_del(pos);
237 					break;
238 				}
239 			}
240 			mutex_unlock(&scif_info.eplock);
241 			mutex_lock(&scif_info.connlock);
242 			list_for_each_safe(pos, tmpq, &scif_info.connected) {
243 				tmpep = list_entry(pos,
244 						   struct scif_endpt, list);
245 				if (tmpep == aep) {
246 					list_del(pos);
247 					break;
248 				}
249 			}
250 			list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
251 				tmpep = list_entry(pos,
252 						   struct scif_endpt, list);
253 				if (tmpep == aep) {
254 					list_del(pos);
255 					break;
256 				}
257 			}
258 			mutex_unlock(&scif_info.connlock);
259 			scif_teardown_ep(aep);
260 			mutex_lock(&scif_info.eplock);
261 			scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
262 			ep->acceptcnt--;
263 		}
264 
265 		spin_lock(&ep->lock);
266 		mutex_unlock(&scif_info.eplock);
267 
268 		/* Remove and reject any pending connection requests. */
269 		while (ep->conreqcnt) {
270 			conreq = list_first_entry(&ep->conlist,
271 						  struct scif_conreq, list);
272 			list_del(&conreq->list);
273 
274 			msg.uop = SCIF_CNCT_REJ;
275 			msg.dst.node = conreq->msg.src.node;
276 			msg.dst.port = conreq->msg.src.port;
277 			msg.payload[0] = conreq->msg.payload[0];
278 			msg.payload[1] = conreq->msg.payload[1];
279 			/*
280 			 * No Error Handling on purpose for scif_nodeqp_send().
281 			 * If the remote node is lost we still want free the
282 			 * connection requests on the self node.
283 			 */
284 			scif_nodeqp_send(&scif_dev[conreq->msg.src.node],
285 					 &msg);
286 			ep->conreqcnt--;
287 			kfree(conreq);
288 		}
289 
290 		spin_unlock(&ep->lock);
291 		/* If a kSCIF accept is waiting wake it up */
292 		wake_up_interruptible(&ep->conwq);
293 		break;
294 	}
295 	}
296 	scif_put_port(ep->port.port);
297 	scif_anon_inode_fput(ep);
298 	scif_teardown_ep(ep);
299 	scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
300 	return 0;
301 }
302 EXPORT_SYMBOL_GPL(scif_close);
303 
304 /**
305  * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
306  *			accept new connections.
307  * @epd: The end point returned from scif_open()
308  */
__scif_flush(scif_epd_t epd)309 int __scif_flush(scif_epd_t epd)
310 {
311 	struct scif_endpt *ep = (struct scif_endpt *)epd;
312 
313 	switch (ep->state) {
314 	case SCIFEP_LISTENING:
315 	{
316 		ep->state = SCIFEP_CLLISTEN;
317 
318 		/* If an accept is waiting wake it up */
319 		wake_up_interruptible(&ep->conwq);
320 		break;
321 	}
322 	default:
323 		break;
324 	}
325 	return 0;
326 }
327 
scif_bind(scif_epd_t epd,u16 pn)328 int scif_bind(scif_epd_t epd, u16 pn)
329 {
330 	struct scif_endpt *ep = (struct scif_endpt *)epd;
331 	int ret = 0;
332 	int tmp;
333 
334 	dev_dbg(scif_info.mdev.this_device,
335 		"SCIFAPI bind: ep %p %s requested port number %d\n",
336 		ep, scif_ep_states[ep->state], pn);
337 	if (pn) {
338 		/*
339 		 * Similar to IETF RFC 1700, SCIF ports below
340 		 * SCIF_ADMIN_PORT_END can only be bound by system (or root)
341 		 * processes or by processes executed by privileged users.
342 		 */
343 		if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) {
344 			ret = -EACCES;
345 			goto scif_bind_admin_exit;
346 		}
347 	}
348 
349 	spin_lock(&ep->lock);
350 	if (ep->state == SCIFEP_BOUND) {
351 		ret = -EINVAL;
352 		goto scif_bind_exit;
353 	} else if (ep->state != SCIFEP_UNBOUND) {
354 		ret = -EISCONN;
355 		goto scif_bind_exit;
356 	}
357 
358 	if (pn) {
359 		tmp = scif_rsrv_port(pn);
360 		if (tmp != pn) {
361 			ret = -EINVAL;
362 			goto scif_bind_exit;
363 		}
364 	} else {
365 		ret = scif_get_new_port();
366 		if (ret < 0)
367 			goto scif_bind_exit;
368 		pn = ret;
369 	}
370 
371 	ep->state = SCIFEP_BOUND;
372 	ep->port.node = scif_info.nodeid;
373 	ep->port.port = pn;
374 	ep->conn_async_state = ASYNC_CONN_IDLE;
375 	ret = pn;
376 	dev_dbg(scif_info.mdev.this_device,
377 		"SCIFAPI bind: bound to port number %d\n", pn);
378 scif_bind_exit:
379 	spin_unlock(&ep->lock);
380 scif_bind_admin_exit:
381 	return ret;
382 }
383 EXPORT_SYMBOL_GPL(scif_bind);
384 
scif_listen(scif_epd_t epd,int backlog)385 int scif_listen(scif_epd_t epd, int backlog)
386 {
387 	struct scif_endpt *ep = (struct scif_endpt *)epd;
388 
389 	dev_dbg(scif_info.mdev.this_device,
390 		"SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]);
391 	spin_lock(&ep->lock);
392 	switch (ep->state) {
393 	case SCIFEP_ZOMBIE:
394 	case SCIFEP_CLOSING:
395 	case SCIFEP_CLLISTEN:
396 	case SCIFEP_UNBOUND:
397 	case SCIFEP_DISCONNECTED:
398 		spin_unlock(&ep->lock);
399 		return -EINVAL;
400 	case SCIFEP_LISTENING:
401 	case SCIFEP_CONNECTED:
402 	case SCIFEP_CONNECTING:
403 	case SCIFEP_MAPPING:
404 		spin_unlock(&ep->lock);
405 		return -EISCONN;
406 	case SCIFEP_BOUND:
407 		break;
408 	}
409 
410 	ep->state = SCIFEP_LISTENING;
411 	ep->backlog = backlog;
412 
413 	ep->conreqcnt = 0;
414 	ep->acceptcnt = 0;
415 	INIT_LIST_HEAD(&ep->conlist);
416 	init_waitqueue_head(&ep->conwq);
417 	INIT_LIST_HEAD(&ep->li_accept);
418 	spin_unlock(&ep->lock);
419 
420 	/*
421 	 * Listen status is complete so delete the qp information not needed
422 	 * on a listen before placing on the list of listening ep's
423 	 */
424 	scif_teardown_ep(ep);
425 	ep->qp_info.qp = NULL;
426 
427 	mutex_lock(&scif_info.eplock);
428 	list_add_tail(&ep->list, &scif_info.listen);
429 	mutex_unlock(&scif_info.eplock);
430 	return 0;
431 }
432 EXPORT_SYMBOL_GPL(scif_listen);
433 
434 /*
435  ************************************************************************
436  * SCIF connection flow:
437  *
438  * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
439  *	connections via a SCIF_CNCT_REQ message
440  * 2) A SCIF endpoint can initiate a SCIF connection by calling
441  *	scif_connect(..) which calls scif_setup_qp_connect(..) which
442  *	allocates the local qp for the endpoint ring buffer and then sends
443  *	a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
444  *	a SCIF_CNCT_REJ message
445  * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
446  *	wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
447  *	message otherwise
448  * 4) A thread blocked waiting for incoming connections allocates its local
449  *	endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
450  *	and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
451  *	the node sends a SCIF_CNCT_REJ message
452  * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
453  *	connecting endpoint is woken up as part of handling
454  *	scif_cnctgnt_resp(..) following which it maps the remote endpoints'
455  *	QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
456  *	success or a SCIF_CNCT_GNTNACK message on failure and completes
457  *	the scif_connect(..) API
458  * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
459  *	in step 4 is woken up and completes the scif_accept(..) API
460  * 7) The SCIF connection is now established between the two SCIF endpoints.
461  */
scif_conn_func(struct scif_endpt * ep)462 static int scif_conn_func(struct scif_endpt *ep)
463 {
464 	int err = 0;
465 	struct scifmsg msg;
466 	struct device *spdev;
467 
468 	err = scif_reserve_dma_chan(ep);
469 	if (err) {
470 		dev_err(&ep->remote_dev->sdev->dev,
471 			"%s %d err %d\n", __func__, __LINE__, err);
472 		ep->state = SCIFEP_BOUND;
473 		goto connect_error_simple;
474 	}
475 	/* Initiate the first part of the endpoint QP setup */
476 	err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
477 				    SCIF_ENDPT_QP_SIZE, ep->remote_dev);
478 	if (err) {
479 		dev_err(&ep->remote_dev->sdev->dev,
480 			"%s err %d qp_offset 0x%llx\n",
481 			__func__, err, ep->qp_info.qp_offset);
482 		ep->state = SCIFEP_BOUND;
483 		goto connect_error_simple;
484 	}
485 
486 	spdev = scif_get_peer_dev(ep->remote_dev);
487 	if (IS_ERR(spdev)) {
488 		err = PTR_ERR(spdev);
489 		goto cleanup_qp;
490 	}
491 	/* Format connect message and send it */
492 	msg.src = ep->port;
493 	msg.dst = ep->conn_port;
494 	msg.uop = SCIF_CNCT_REQ;
495 	msg.payload[0] = (u64)ep;
496 	msg.payload[1] = ep->qp_info.qp_offset;
497 	err = _scif_nodeqp_send(ep->remote_dev, &msg);
498 	if (err)
499 		goto connect_error_dec;
500 	scif_put_peer_dev(spdev);
501 	/*
502 	 * Wait for the remote node to respond with SCIF_CNCT_GNT or
503 	 * SCIF_CNCT_REJ message.
504 	 */
505 	err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING,
506 				 SCIF_NODE_ALIVE_TIMEOUT);
507 	if (!err) {
508 		dev_err(&ep->remote_dev->sdev->dev,
509 			"%s %d timeout\n", __func__, __LINE__);
510 		ep->state = SCIFEP_BOUND;
511 	}
512 	spdev = scif_get_peer_dev(ep->remote_dev);
513 	if (IS_ERR(spdev)) {
514 		err = PTR_ERR(spdev);
515 		goto cleanup_qp;
516 	}
517 	if (ep->state == SCIFEP_MAPPING) {
518 		err = scif_setup_qp_connect_response(ep->remote_dev,
519 						     ep->qp_info.qp,
520 						     ep->qp_info.gnt_pld);
521 		/*
522 		 * If the resource to map the queue are not available then
523 		 * we need to tell the other side to terminate the accept
524 		 */
525 		if (err) {
526 			dev_err(&ep->remote_dev->sdev->dev,
527 				"%s %d err %d\n", __func__, __LINE__, err);
528 			msg.uop = SCIF_CNCT_GNTNACK;
529 			msg.payload[0] = ep->remote_ep;
530 			_scif_nodeqp_send(ep->remote_dev, &msg);
531 			ep->state = SCIFEP_BOUND;
532 			goto connect_error_dec;
533 		}
534 
535 		msg.uop = SCIF_CNCT_GNTACK;
536 		msg.payload[0] = ep->remote_ep;
537 		err = _scif_nodeqp_send(ep->remote_dev, &msg);
538 		if (err) {
539 			ep->state = SCIFEP_BOUND;
540 			goto connect_error_dec;
541 		}
542 		ep->state = SCIFEP_CONNECTED;
543 		mutex_lock(&scif_info.connlock);
544 		list_add_tail(&ep->list, &scif_info.connected);
545 		mutex_unlock(&scif_info.connlock);
546 		dev_dbg(&ep->remote_dev->sdev->dev,
547 			"SCIFAPI connect: ep %p connected\n", ep);
548 	} else if (ep->state == SCIFEP_BOUND) {
549 		dev_dbg(&ep->remote_dev->sdev->dev,
550 			"SCIFAPI connect: ep %p connection refused\n", ep);
551 		err = -ECONNREFUSED;
552 		goto connect_error_dec;
553 	}
554 	scif_put_peer_dev(spdev);
555 	return err;
556 connect_error_dec:
557 	scif_put_peer_dev(spdev);
558 cleanup_qp:
559 	scif_cleanup_ep_qp(ep);
560 connect_error_simple:
561 	return err;
562 }
563 
564 /*
565  * scif_conn_handler:
566  *
567  * Workqueue handler for servicing non-blocking SCIF connect
568  *
569  */
scif_conn_handler(struct work_struct * work)570 void scif_conn_handler(struct work_struct *work)
571 {
572 	struct scif_endpt *ep;
573 
574 	do {
575 		ep = NULL;
576 		spin_lock(&scif_info.nb_connect_lock);
577 		if (!list_empty(&scif_info.nb_connect_list)) {
578 			ep = list_first_entry(&scif_info.nb_connect_list,
579 					      struct scif_endpt, conn_list);
580 			list_del(&ep->conn_list);
581 		}
582 		spin_unlock(&scif_info.nb_connect_lock);
583 		if (ep) {
584 			ep->conn_err = scif_conn_func(ep);
585 			wake_up_interruptible(&ep->conn_pend_wq);
586 		}
587 	} while (ep);
588 }
589 
__scif_connect(scif_epd_t epd,struct scif_port_id * dst,bool non_block)590 int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
591 {
592 	struct scif_endpt *ep = (struct scif_endpt *)epd;
593 	int err = 0;
594 	struct scif_dev *remote_dev;
595 	struct device *spdev;
596 
597 	dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep,
598 		scif_ep_states[ep->state]);
599 
600 	if (!scif_dev || dst->node > scif_info.maxid)
601 		return -ENODEV;
602 
603 	might_sleep();
604 
605 	remote_dev = &scif_dev[dst->node];
606 	spdev = scif_get_peer_dev(remote_dev);
607 	if (IS_ERR(spdev)) {
608 		err = PTR_ERR(spdev);
609 		return err;
610 	}
611 
612 	spin_lock(&ep->lock);
613 	switch (ep->state) {
614 	case SCIFEP_ZOMBIE:
615 	case SCIFEP_CLOSING:
616 		err = -EINVAL;
617 		break;
618 	case SCIFEP_DISCONNECTED:
619 		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
620 			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
621 		else
622 			err = -EINVAL;
623 		break;
624 	case SCIFEP_LISTENING:
625 	case SCIFEP_CLLISTEN:
626 		err = -EOPNOTSUPP;
627 		break;
628 	case SCIFEP_CONNECTING:
629 	case SCIFEP_MAPPING:
630 		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
631 			err = -EINPROGRESS;
632 		else
633 			err = -EISCONN;
634 		break;
635 	case SCIFEP_CONNECTED:
636 		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
637 			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
638 		else
639 			err = -EISCONN;
640 		break;
641 	case SCIFEP_UNBOUND:
642 		err = scif_get_new_port();
643 		if (err < 0)
644 			break;
645 		ep->port.port = err;
646 		ep->port.node = scif_info.nodeid;
647 		ep->conn_async_state = ASYNC_CONN_IDLE;
648 		/* Fall through */
649 	case SCIFEP_BOUND:
650 		/*
651 		 * If a non-blocking connect has been already initiated
652 		 * (conn_async_state is either ASYNC_CONN_INPROGRESS or
653 		 * ASYNC_CONN_FLUSH_WORK), the end point could end up in
654 		 * SCIF_BOUND due an error in the connection process
655 		 * (e.g., connection refused) If conn_async_state is
656 		 * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
657 		 * so that the error status can be collected. If the state is
658 		 * already ASYNC_CONN_FLUSH_WORK - then set the error to
659 		 * EINPROGRESS since some other thread is waiting to collect
660 		 * error status.
661 		 */
662 		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
663 			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
664 		} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
665 			err = -EINPROGRESS;
666 		} else {
667 			ep->conn_port = *dst;
668 			init_waitqueue_head(&ep->sendwq);
669 			init_waitqueue_head(&ep->recvwq);
670 			init_waitqueue_head(&ep->conwq);
671 			ep->conn_async_state = 0;
672 
673 			if (unlikely(non_block))
674 				ep->conn_async_state = ASYNC_CONN_INPROGRESS;
675 		}
676 		break;
677 	}
678 
679 	if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
680 			goto connect_simple_unlock1;
681 
682 	ep->state = SCIFEP_CONNECTING;
683 	ep->remote_dev = &scif_dev[dst->node];
684 	ep->qp_info.qp->magic = SCIFEP_MAGIC;
685 	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
686 		init_waitqueue_head(&ep->conn_pend_wq);
687 		spin_lock(&scif_info.nb_connect_lock);
688 		list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
689 		spin_unlock(&scif_info.nb_connect_lock);
690 		err = -EINPROGRESS;
691 		schedule_work(&scif_info.conn_work);
692 	}
693 connect_simple_unlock1:
694 	spin_unlock(&ep->lock);
695 	scif_put_peer_dev(spdev);
696 	if (err) {
697 		return err;
698 	} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
699 		flush_work(&scif_info.conn_work);
700 		err = ep->conn_err;
701 		spin_lock(&ep->lock);
702 		ep->conn_async_state = ASYNC_CONN_IDLE;
703 		spin_unlock(&ep->lock);
704 	} else {
705 		err = scif_conn_func(ep);
706 	}
707 	return err;
708 }
709 
scif_connect(scif_epd_t epd,struct scif_port_id * dst)710 int scif_connect(scif_epd_t epd, struct scif_port_id *dst)
711 {
712 	return __scif_connect(epd, dst, false);
713 }
714 EXPORT_SYMBOL_GPL(scif_connect);
715 
716 /**
717  * scif_accept() - Accept a connection request from the remote node
718  *
719  * The function accepts a connection request from the remote node.  Successful
720  * complete is indicate by a new end point being created and passed back
721  * to the caller for future reference.
722  *
723  * Upon successful complete a zero will be returned and the peer information
724  * will be filled in.
725  *
726  * If the end point is not in the listening state -EINVAL will be returned.
727  *
728  * If during the connection sequence resource allocation fails the -ENOMEM
729  * will be returned.
730  *
731  * If the function is called with the ASYNC flag set and no connection requests
732  * are pending it will return -EAGAIN.
733  *
734  * If the remote side is not sending any connection requests the caller may
735  * terminate this function with a signal.  If so a -EINTR will be returned.
736  */
scif_accept(scif_epd_t epd,struct scif_port_id * peer,scif_epd_t * newepd,int flags)737 int scif_accept(scif_epd_t epd, struct scif_port_id *peer,
738 		scif_epd_t *newepd, int flags)
739 {
740 	struct scif_endpt *lep = (struct scif_endpt *)epd;
741 	struct scif_endpt *cep;
742 	struct scif_conreq *conreq;
743 	struct scifmsg msg;
744 	int err;
745 	struct device *spdev;
746 
747 	dev_dbg(scif_info.mdev.this_device,
748 		"SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]);
749 
750 	if (flags & ~SCIF_ACCEPT_SYNC)
751 		return -EINVAL;
752 
753 	if (!peer || !newepd)
754 		return -EINVAL;
755 
756 	might_sleep();
757 	spin_lock(&lep->lock);
758 	if (lep->state != SCIFEP_LISTENING) {
759 		spin_unlock(&lep->lock);
760 		return -EINVAL;
761 	}
762 
763 	if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) {
764 		/* No connection request present and we do not want to wait */
765 		spin_unlock(&lep->lock);
766 		return -EAGAIN;
767 	}
768 
769 	lep->files = current->files;
770 retry_connection:
771 	spin_unlock(&lep->lock);
772 	/* Wait for the remote node to send us a SCIF_CNCT_REQ */
773 	err = wait_event_interruptible(lep->conwq,
774 				       (lep->conreqcnt ||
775 				       (lep->state != SCIFEP_LISTENING)));
776 	if (err)
777 		return err;
778 
779 	if (lep->state != SCIFEP_LISTENING)
780 		return -EINTR;
781 
782 	spin_lock(&lep->lock);
783 
784 	if (!lep->conreqcnt)
785 		goto retry_connection;
786 
787 	/* Get the first connect request off the list */
788 	conreq = list_first_entry(&lep->conlist, struct scif_conreq, list);
789 	list_del(&conreq->list);
790 	lep->conreqcnt--;
791 	spin_unlock(&lep->lock);
792 
793 	/* Fill in the peer information */
794 	peer->node = conreq->msg.src.node;
795 	peer->port = conreq->msg.src.port;
796 
797 	cep = kzalloc(sizeof(*cep), GFP_KERNEL);
798 	if (!cep) {
799 		err = -ENOMEM;
800 		goto scif_accept_error_epalloc;
801 	}
802 	spin_lock_init(&cep->lock);
803 	mutex_init(&cep->sendlock);
804 	mutex_init(&cep->recvlock);
805 	cep->state = SCIFEP_CONNECTING;
806 	cep->remote_dev = &scif_dev[peer->node];
807 	cep->remote_ep = conreq->msg.payload[0];
808 
809 	scif_rma_ep_init(cep);
810 
811 	err = scif_reserve_dma_chan(cep);
812 	if (err) {
813 		dev_err(scif_info.mdev.this_device,
814 			"%s %d err %d\n", __func__, __LINE__, err);
815 		goto scif_accept_error_qpalloc;
816 	}
817 
818 	cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
819 	if (!cep->qp_info.qp) {
820 		err = -ENOMEM;
821 		goto scif_accept_error_qpalloc;
822 	}
823 
824 	err = scif_anon_inode_getfile(cep);
825 	if (err)
826 		goto scif_accept_error_anon_inode;
827 
828 	cep->qp_info.qp->magic = SCIFEP_MAGIC;
829 	spdev = scif_get_peer_dev(cep->remote_dev);
830 	if (IS_ERR(spdev)) {
831 		err = PTR_ERR(spdev);
832 		goto scif_accept_error_map;
833 	}
834 	err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset,
835 				   conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE,
836 				   cep->remote_dev);
837 	if (err) {
838 		dev_dbg(&cep->remote_dev->sdev->dev,
839 			"SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
840 			lep, cep, err, cep->qp_info.qp_offset);
841 		scif_put_peer_dev(spdev);
842 		goto scif_accept_error_map;
843 	}
844 
845 	cep->port.node = lep->port.node;
846 	cep->port.port = lep->port.port;
847 	cep->peer.node = peer->node;
848 	cep->peer.port = peer->port;
849 	init_waitqueue_head(&cep->sendwq);
850 	init_waitqueue_head(&cep->recvwq);
851 	init_waitqueue_head(&cep->conwq);
852 
853 	msg.uop = SCIF_CNCT_GNT;
854 	msg.src = cep->port;
855 	msg.payload[0] = cep->remote_ep;
856 	msg.payload[1] = cep->qp_info.qp_offset;
857 	msg.payload[2] = (u64)cep;
858 
859 	err = _scif_nodeqp_send(cep->remote_dev, &msg);
860 	scif_put_peer_dev(spdev);
861 	if (err)
862 		goto scif_accept_error_map;
863 retry:
864 	/* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
865 	err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING,
866 				 SCIF_NODE_ACCEPT_TIMEOUT);
867 	if (!err && scifdev_alive(cep))
868 		goto retry;
869 	err = !err ? -ENODEV : 0;
870 	if (err)
871 		goto scif_accept_error_map;
872 	kfree(conreq);
873 
874 	spin_lock(&cep->lock);
875 
876 	if (cep->state == SCIFEP_CLOSING) {
877 		/*
878 		 * Remote failed to allocate resources and NAKed the grant.
879 		 * There is at this point nothing referencing the new end point.
880 		 */
881 		spin_unlock(&cep->lock);
882 		scif_teardown_ep(cep);
883 		kfree(cep);
884 
885 		/* If call with sync flag then go back and wait. */
886 		if (flags & SCIF_ACCEPT_SYNC) {
887 			spin_lock(&lep->lock);
888 			goto retry_connection;
889 		}
890 		return -EAGAIN;
891 	}
892 
893 	scif_get_port(cep->port.port);
894 	*newepd = (scif_epd_t)cep;
895 	spin_unlock(&cep->lock);
896 	return 0;
897 scif_accept_error_map:
898 	scif_anon_inode_fput(cep);
899 scif_accept_error_anon_inode:
900 	scif_teardown_ep(cep);
901 scif_accept_error_qpalloc:
902 	kfree(cep);
903 scif_accept_error_epalloc:
904 	msg.uop = SCIF_CNCT_REJ;
905 	msg.dst.node = conreq->msg.src.node;
906 	msg.dst.port = conreq->msg.src.port;
907 	msg.payload[0] = conreq->msg.payload[0];
908 	msg.payload[1] = conreq->msg.payload[1];
909 	scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg);
910 	kfree(conreq);
911 	return err;
912 }
913 EXPORT_SYMBOL_GPL(scif_accept);
914 
915 /*
916  * scif_msg_param_check:
917  * @epd: The end point returned from scif_open()
918  * @len: Length to receive
919  * @flags: blocking or non blocking
920  *
921  * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
922  */
scif_msg_param_check(scif_epd_t epd,int len,int flags)923 static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags)
924 {
925 	int ret = -EINVAL;
926 
927 	if (len < 0)
928 		goto err_ret;
929 	if (flags && (!(flags & SCIF_RECV_BLOCK)))
930 		goto err_ret;
931 	ret = 0;
932 err_ret:
933 	return ret;
934 }
935 
_scif_send(scif_epd_t epd,void * msg,int len,int flags)936 static int _scif_send(scif_epd_t epd, void *msg, int len, int flags)
937 {
938 	struct scif_endpt *ep = (struct scif_endpt *)epd;
939 	struct scifmsg notif_msg;
940 	int curr_xfer_len = 0, sent_len = 0, write_count;
941 	int ret = 0;
942 	struct scif_qp *qp = ep->qp_info.qp;
943 
944 	if (flags & SCIF_SEND_BLOCK)
945 		might_sleep();
946 
947 	spin_lock(&ep->lock);
948 	while (sent_len != len && SCIFEP_CONNECTED == ep->state) {
949 		write_count = scif_rb_space(&qp->outbound_q);
950 		if (write_count) {
951 			/* Best effort to send as much data as possible */
952 			curr_xfer_len = min(len - sent_len, write_count);
953 			ret = scif_rb_write(&qp->outbound_q, msg,
954 					    curr_xfer_len);
955 			if (ret < 0)
956 				break;
957 			/* Success. Update write pointer */
958 			scif_rb_commit(&qp->outbound_q);
959 			/*
960 			 * Send a notification to the peer about the
961 			 * produced data message.
962 			 */
963 			notif_msg.src = ep->port;
964 			notif_msg.uop = SCIF_CLIENT_SENT;
965 			notif_msg.payload[0] = ep->remote_ep;
966 			ret = _scif_nodeqp_send(ep->remote_dev, &notif_msg);
967 			if (ret)
968 				break;
969 			sent_len += curr_xfer_len;
970 			msg = msg + curr_xfer_len;
971 			continue;
972 		}
973 		curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1);
974 		/* Not enough RB space. return for the Non Blocking case */
975 		if (!(flags & SCIF_SEND_BLOCK))
976 			break;
977 
978 		spin_unlock(&ep->lock);
979 		/* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
980 		ret =
981 		wait_event_interruptible(ep->sendwq,
982 					 (SCIFEP_CONNECTED != ep->state) ||
983 					 (scif_rb_space(&qp->outbound_q) >=
984 					 curr_xfer_len));
985 		spin_lock(&ep->lock);
986 		if (ret)
987 			break;
988 	}
989 	if (sent_len)
990 		ret = sent_len;
991 	else if (!ret && SCIFEP_CONNECTED != ep->state)
992 		ret = SCIFEP_DISCONNECTED == ep->state ?
993 			-ECONNRESET : -ENOTCONN;
994 	spin_unlock(&ep->lock);
995 	return ret;
996 }
997 
_scif_recv(scif_epd_t epd,void * msg,int len,int flags)998 static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags)
999 {
1000 	int read_size;
1001 	struct scif_endpt *ep = (struct scif_endpt *)epd;
1002 	struct scifmsg notif_msg;
1003 	int curr_recv_len = 0, remaining_len = len, read_count;
1004 	int ret = 0;
1005 	struct scif_qp *qp = ep->qp_info.qp;
1006 
1007 	if (flags & SCIF_RECV_BLOCK)
1008 		might_sleep();
1009 	spin_lock(&ep->lock);
1010 	while (remaining_len && (SCIFEP_CONNECTED == ep->state ||
1011 				 SCIFEP_DISCONNECTED == ep->state)) {
1012 		read_count = scif_rb_count(&qp->inbound_q, remaining_len);
1013 		if (read_count) {
1014 			/*
1015 			 * Best effort to recv as much data as there
1016 			 * are bytes to read in the RB particularly
1017 			 * important for the Non Blocking case.
1018 			 */
1019 			curr_recv_len = min(remaining_len, read_count);
1020 			read_size = scif_rb_get_next(&qp->inbound_q,
1021 						     msg, curr_recv_len);
1022 			if (ep->state == SCIFEP_CONNECTED) {
1023 				/*
1024 				 * Update the read pointer only if the endpoint
1025 				 * is still connected else the read pointer
1026 				 * might no longer exist since the peer has
1027 				 * freed resources!
1028 				 */
1029 				scif_rb_update_read_ptr(&qp->inbound_q);
1030 				/*
1031 				 * Send a notification to the peer about the
1032 				 * consumed data message only if the EP is in
1033 				 * SCIFEP_CONNECTED state.
1034 				 */
1035 				notif_msg.src = ep->port;
1036 				notif_msg.uop = SCIF_CLIENT_RCVD;
1037 				notif_msg.payload[0] = ep->remote_ep;
1038 				ret = _scif_nodeqp_send(ep->remote_dev,
1039 							&notif_msg);
1040 				if (ret)
1041 					break;
1042 			}
1043 			remaining_len -= curr_recv_len;
1044 			msg = msg + curr_recv_len;
1045 			continue;
1046 		}
1047 		/*
1048 		 * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
1049 		 * we will keep looping forever.
1050 		 */
1051 		if (ep->state == SCIFEP_DISCONNECTED)
1052 			break;
1053 		/*
1054 		 * Return in the Non Blocking case if there is no data
1055 		 * to read in this iteration.
1056 		 */
1057 		if (!(flags & SCIF_RECV_BLOCK))
1058 			break;
1059 		curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1);
1060 		spin_unlock(&ep->lock);
1061 		/*
1062 		 * Wait for a SCIF_CLIENT_SEND message in the blocking case
1063 		 * or until other side disconnects.
1064 		 */
1065 		ret =
1066 		wait_event_interruptible(ep->recvwq,
1067 					 SCIFEP_CONNECTED != ep->state ||
1068 					 scif_rb_count(&qp->inbound_q,
1069 						       curr_recv_len)
1070 					 >= curr_recv_len);
1071 		spin_lock(&ep->lock);
1072 		if (ret)
1073 			break;
1074 	}
1075 	if (len - remaining_len)
1076 		ret = len - remaining_len;
1077 	else if (!ret && ep->state != SCIFEP_CONNECTED)
1078 		ret = ep->state == SCIFEP_DISCONNECTED ?
1079 			-ECONNRESET : -ENOTCONN;
1080 	spin_unlock(&ep->lock);
1081 	return ret;
1082 }
1083 
1084 /**
1085  * scif_user_send() - Send data to connection queue
1086  * @epd: The end point returned from scif_open()
1087  * @msg: Address to place data
1088  * @len: Length to receive
1089  * @flags: blocking or non blocking
1090  *
1091  * This function is called from the driver IOCTL entry point
1092  * only and is a wrapper for _scif_send().
1093  */
scif_user_send(scif_epd_t epd,void __user * msg,int len,int flags)1094 int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags)
1095 {
1096 	struct scif_endpt *ep = (struct scif_endpt *)epd;
1097 	int err = 0;
1098 	int sent_len = 0;
1099 	char *tmp;
1100 	int loop_len;
1101 	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1102 
1103 	dev_dbg(scif_info.mdev.this_device,
1104 		"SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1105 	if (!len)
1106 		return 0;
1107 
1108 	err = scif_msg_param_check(epd, len, flags);
1109 	if (err)
1110 		goto send_err;
1111 
1112 	tmp = kmalloc(chunk_len, GFP_KERNEL);
1113 	if (!tmp) {
1114 		err = -ENOMEM;
1115 		goto send_err;
1116 	}
1117 	/*
1118 	 * Grabbing the lock before breaking up the transfer in
1119 	 * multiple chunks is required to ensure that messages do
1120 	 * not get fragmented and reordered.
1121 	 */
1122 	mutex_lock(&ep->sendlock);
1123 	while (sent_len != len) {
1124 		loop_len = len - sent_len;
1125 		loop_len = min(chunk_len, loop_len);
1126 		if (copy_from_user(tmp, msg, loop_len)) {
1127 			err = -EFAULT;
1128 			goto send_free_err;
1129 		}
1130 		err = _scif_send(epd, tmp, loop_len, flags);
1131 		if (err < 0)
1132 			goto send_free_err;
1133 		sent_len += err;
1134 		msg += err;
1135 		if (err != loop_len)
1136 			goto send_free_err;
1137 	}
1138 send_free_err:
1139 	mutex_unlock(&ep->sendlock);
1140 	kfree(tmp);
1141 send_err:
1142 	return err < 0 ? err : sent_len;
1143 }
1144 
1145 /**
1146  * scif_user_recv() - Receive data from connection queue
1147  * @epd: The end point returned from scif_open()
1148  * @msg: Address to place data
1149  * @len: Length to receive
1150  * @flags: blocking or non blocking
1151  *
1152  * This function is called from the driver IOCTL entry point
1153  * only and is a wrapper for _scif_recv().
1154  */
scif_user_recv(scif_epd_t epd,void __user * msg,int len,int flags)1155 int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags)
1156 {
1157 	struct scif_endpt *ep = (struct scif_endpt *)epd;
1158 	int err = 0;
1159 	int recv_len = 0;
1160 	char *tmp;
1161 	int loop_len;
1162 	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1163 
1164 	dev_dbg(scif_info.mdev.this_device,
1165 		"SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1166 	if (!len)
1167 		return 0;
1168 
1169 	err = scif_msg_param_check(epd, len, flags);
1170 	if (err)
1171 		goto recv_err;
1172 
1173 	tmp = kmalloc(chunk_len, GFP_KERNEL);
1174 	if (!tmp) {
1175 		err = -ENOMEM;
1176 		goto recv_err;
1177 	}
1178 	/*
1179 	 * Grabbing the lock before breaking up the transfer in
1180 	 * multiple chunks is required to ensure that messages do
1181 	 * not get fragmented and reordered.
1182 	 */
1183 	mutex_lock(&ep->recvlock);
1184 	while (recv_len != len) {
1185 		loop_len = len - recv_len;
1186 		loop_len = min(chunk_len, loop_len);
1187 		err = _scif_recv(epd, tmp, loop_len, flags);
1188 		if (err < 0)
1189 			goto recv_free_err;
1190 		if (copy_to_user(msg, tmp, err)) {
1191 			err = -EFAULT;
1192 			goto recv_free_err;
1193 		}
1194 		recv_len += err;
1195 		msg += err;
1196 		if (err != loop_len)
1197 			goto recv_free_err;
1198 	}
1199 recv_free_err:
1200 	mutex_unlock(&ep->recvlock);
1201 	kfree(tmp);
1202 recv_err:
1203 	return err < 0 ? err : recv_len;
1204 }
1205 
1206 /**
1207  * scif_send() - Send data to connection queue
1208  * @epd: The end point returned from scif_open()
1209  * @msg: Address to place data
1210  * @len: Length to receive
1211  * @flags: blocking or non blocking
1212  *
1213  * This function is called from the kernel mode only and is
1214  * a wrapper for _scif_send().
1215  */
scif_send(scif_epd_t epd,void * msg,int len,int flags)1216 int scif_send(scif_epd_t epd, void *msg, int len, int flags)
1217 {
1218 	struct scif_endpt *ep = (struct scif_endpt *)epd;
1219 	int ret;
1220 
1221 	dev_dbg(scif_info.mdev.this_device,
1222 		"SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1223 	if (!len)
1224 		return 0;
1225 
1226 	ret = scif_msg_param_check(epd, len, flags);
1227 	if (ret)
1228 		return ret;
1229 	if (!ep->remote_dev)
1230 		return -ENOTCONN;
1231 	/*
1232 	 * Grab the mutex lock in the blocking case only
1233 	 * to ensure messages do not get fragmented/reordered.
1234 	 * The non blocking mode is protected using spin locks
1235 	 * in _scif_send().
1236 	 */
1237 	if (flags & SCIF_SEND_BLOCK)
1238 		mutex_lock(&ep->sendlock);
1239 
1240 	ret = _scif_send(epd, msg, len, flags);
1241 
1242 	if (flags & SCIF_SEND_BLOCK)
1243 		mutex_unlock(&ep->sendlock);
1244 	return ret;
1245 }
1246 EXPORT_SYMBOL_GPL(scif_send);
1247 
1248 /**
1249  * scif_recv() - Receive data from connection queue
1250  * @epd: The end point returned from scif_open()
1251  * @msg: Address to place data
1252  * @len: Length to receive
1253  * @flags: blocking or non blocking
1254  *
1255  * This function is called from the kernel mode only and is
1256  * a wrapper for _scif_recv().
1257  */
scif_recv(scif_epd_t epd,void * msg,int len,int flags)1258 int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
1259 {
1260 	struct scif_endpt *ep = (struct scif_endpt *)epd;
1261 	int ret;
1262 
1263 	dev_dbg(scif_info.mdev.this_device,
1264 		"SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1265 	if (!len)
1266 		return 0;
1267 
1268 	ret = scif_msg_param_check(epd, len, flags);
1269 	if (ret)
1270 		return ret;
1271 	/*
1272 	 * Grab the mutex lock in the blocking case only
1273 	 * to ensure messages do not get fragmented/reordered.
1274 	 * The non blocking mode is protected using spin locks
1275 	 * in _scif_send().
1276 	 */
1277 	if (flags & SCIF_RECV_BLOCK)
1278 		mutex_lock(&ep->recvlock);
1279 
1280 	ret = _scif_recv(epd, msg, len, flags);
1281 
1282 	if (flags & SCIF_RECV_BLOCK)
1283 		mutex_unlock(&ep->recvlock);
1284 
1285 	return ret;
1286 }
1287 EXPORT_SYMBOL_GPL(scif_recv);
1288 
_scif_poll_wait(struct file * f,wait_queue_head_t * wq,poll_table * p,struct scif_endpt * ep)1289 static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
1290 				   poll_table *p, struct scif_endpt *ep)
1291 {
1292 	/*
1293 	 * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
1294 	 * and regrab it afterwards. Because the endpoint state might have
1295 	 * changed while the lock was given up, the state must be checked
1296 	 * again after re-acquiring the lock. The code in __scif_pollfd(..)
1297 	 * does this.
1298 	 */
1299 	spin_unlock(&ep->lock);
1300 	poll_wait(f, wq, p);
1301 	spin_lock(&ep->lock);
1302 }
1303 
1304 __poll_t
__scif_pollfd(struct file * f,poll_table * wait,struct scif_endpt * ep)1305 __scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
1306 {
1307 	__poll_t mask = 0;
1308 
1309 	dev_dbg(scif_info.mdev.this_device,
1310 		"SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
1311 
1312 	spin_lock(&ep->lock);
1313 
1314 	/* Endpoint is waiting for a non-blocking connect to complete */
1315 	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1316 		_scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
1317 		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1318 			if (ep->state == SCIFEP_CONNECTED ||
1319 			    ep->state == SCIFEP_DISCONNECTED ||
1320 			    ep->conn_err)
1321 				mask |= EPOLLOUT;
1322 			goto exit;
1323 		}
1324 	}
1325 
1326 	/* Endpoint is listening for incoming connection requests */
1327 	if (ep->state == SCIFEP_LISTENING) {
1328 		_scif_poll_wait(f, &ep->conwq, wait, ep);
1329 		if (ep->state == SCIFEP_LISTENING) {
1330 			if (ep->conreqcnt)
1331 				mask |= EPOLLIN;
1332 			goto exit;
1333 		}
1334 	}
1335 
1336 	/* Endpoint is connected or disconnected */
1337 	if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
1338 		if (poll_requested_events(wait) & EPOLLIN)
1339 			_scif_poll_wait(f, &ep->recvwq, wait, ep);
1340 		if (poll_requested_events(wait) & EPOLLOUT)
1341 			_scif_poll_wait(f, &ep->sendwq, wait, ep);
1342 		if (ep->state == SCIFEP_CONNECTED ||
1343 		    ep->state == SCIFEP_DISCONNECTED) {
1344 			/* Data can be read without blocking */
1345 			if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
1346 				mask |= EPOLLIN;
1347 			/* Data can be written without blocking */
1348 			if (scif_rb_space(&ep->qp_info.qp->outbound_q))
1349 				mask |= EPOLLOUT;
1350 			/* Return EPOLLHUP if endpoint is disconnected */
1351 			if (ep->state == SCIFEP_DISCONNECTED)
1352 				mask |= EPOLLHUP;
1353 			goto exit;
1354 		}
1355 	}
1356 
1357 	/* Return EPOLLERR if the endpoint is in none of the above states */
1358 	mask |= EPOLLERR;
1359 exit:
1360 	spin_unlock(&ep->lock);
1361 	return mask;
1362 }
1363 
1364 /**
1365  * scif_poll() - Kernel mode SCIF poll
1366  * @ufds: Array of scif_pollepd structures containing the end points
1367  *	  and events to poll on
1368  * @nfds: Size of the ufds array
1369  * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
1370  *
1371  * The code flow in this function is based on do_poll(..) in select.c
1372  *
1373  * Returns the number of endpoints which have pending events or 0 in
1374  * the event of a timeout. If a signal is used for wake up, -EINTR is
1375  * returned.
1376  */
1377 int
scif_poll(struct scif_pollepd * ufds,unsigned int nfds,long timeout_msecs)1378 scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
1379 {
1380 	struct poll_wqueues table;
1381 	poll_table *pt;
1382 	int i, count = 0, timed_out = timeout_msecs == 0;
1383 	__poll_t mask;
1384 	u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
1385 		: msecs_to_jiffies(timeout_msecs);
1386 
1387 	poll_initwait(&table);
1388 	pt = &table.pt;
1389 	while (1) {
1390 		for (i = 0; i < nfds; i++) {
1391 			pt->_key = ufds[i].events | EPOLLERR | EPOLLHUP;
1392 			mask = __scif_pollfd(ufds[i].epd->anon,
1393 					     pt, ufds[i].epd);
1394 			mask &= ufds[i].events | EPOLLERR | EPOLLHUP;
1395 			if (mask) {
1396 				count++;
1397 				pt->_qproc = NULL;
1398 			}
1399 			ufds[i].revents = mask;
1400 		}
1401 		pt->_qproc = NULL;
1402 		if (!count) {
1403 			count = table.error;
1404 			if (signal_pending(current))
1405 				count = -EINTR;
1406 		}
1407 		if (count || timed_out)
1408 			break;
1409 
1410 		if (!schedule_timeout_interruptible(timeout))
1411 			timed_out = 1;
1412 	}
1413 	poll_freewait(&table);
1414 	return count;
1415 }
1416 EXPORT_SYMBOL_GPL(scif_poll);
1417 
scif_get_node_ids(u16 * nodes,int len,u16 * self)1418 int scif_get_node_ids(u16 *nodes, int len, u16 *self)
1419 {
1420 	int online = 0;
1421 	int offset = 0;
1422 	int node;
1423 
1424 	if (!scif_is_mgmt_node())
1425 		scif_get_node_info();
1426 
1427 	*self = scif_info.nodeid;
1428 	mutex_lock(&scif_info.conflock);
1429 	len = min_t(int, len, scif_info.total);
1430 	for (node = 0; node <= scif_info.maxid; node++) {
1431 		if (_scifdev_alive(&scif_dev[node])) {
1432 			online++;
1433 			if (offset < len)
1434 				nodes[offset++] = node;
1435 		}
1436 	}
1437 	dev_dbg(scif_info.mdev.this_device,
1438 		"SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
1439 		scif_info.total, online, offset);
1440 	mutex_unlock(&scif_info.conflock);
1441 
1442 	return online;
1443 }
1444 EXPORT_SYMBOL_GPL(scif_get_node_ids);
1445 
scif_add_client_dev(struct device * dev,struct subsys_interface * si)1446 static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
1447 {
1448 	struct scif_client *client =
1449 		container_of(si, struct scif_client, si);
1450 	struct scif_peer_dev *spdev =
1451 		container_of(dev, struct scif_peer_dev, dev);
1452 
1453 	if (client->probe)
1454 		client->probe(spdev);
1455 	return 0;
1456 }
1457 
scif_remove_client_dev(struct device * dev,struct subsys_interface * si)1458 static void scif_remove_client_dev(struct device *dev,
1459 				   struct subsys_interface *si)
1460 {
1461 	struct scif_client *client =
1462 		container_of(si, struct scif_client, si);
1463 	struct scif_peer_dev *spdev =
1464 		container_of(dev, struct scif_peer_dev, dev);
1465 
1466 	if (client->remove)
1467 		client->remove(spdev);
1468 }
1469 
scif_client_unregister(struct scif_client * client)1470 void scif_client_unregister(struct scif_client *client)
1471 {
1472 	subsys_interface_unregister(&client->si);
1473 }
1474 EXPORT_SYMBOL_GPL(scif_client_unregister);
1475 
scif_client_register(struct scif_client * client)1476 int scif_client_register(struct scif_client *client)
1477 {
1478 	struct subsys_interface *si = &client->si;
1479 
1480 	si->name = client->name;
1481 	si->subsys = &scif_peer_bus;
1482 	si->add_dev = scif_add_client_dev;
1483 	si->remove_dev = scif_remove_client_dev;
1484 
1485 	return subsys_interface_register(&client->si);
1486 }
1487 EXPORT_SYMBOL_GPL(scif_client_register);
1488