• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Intel MIC Platform Software Stack (MPSS)
3  *
4  * Copyright(c) 2014 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License, version 2, as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * General Public License for more details.
14  *
15  * Intel SCIF driver.
16  *
17  */
18 #include <linux/scif.h>
19 #include "scif_main.h"
20 #include "scif_map.h"
21 
22 static const char * const scif_ep_states[] = {
23 	"Unbound",
24 	"Bound",
25 	"Listening",
26 	"Connected",
27 	"Connecting",
28 	"Mapping",
29 	"Closing",
30 	"Close Listening",
31 	"Disconnected",
32 	"Zombie"};
33 
34 enum conn_async_state {
35 	ASYNC_CONN_IDLE = 1,	/* ep setup for async connect */
36 	ASYNC_CONN_INPROGRESS,	/* async connect in progress */
37 	ASYNC_CONN_FLUSH_WORK	/* async work flush in progress  */
38 };
39 
40 /*
41  * File operations for anonymous inode file associated with a SCIF endpoint,
42  * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
43  * poll API in the kernel and these take in a struct file *. Since a struct
44  * file is not available to kernel mode SCIF, it uses an anonymous file for
45  * this purpose.
46  */
47 const struct file_operations scif_anon_fops = {
48 	.owner = THIS_MODULE,
49 };
50 
scif_open(void)51 scif_epd_t scif_open(void)
52 {
53 	struct scif_endpt *ep;
54 	int err;
55 
56 	might_sleep();
57 	ep = kzalloc(sizeof(*ep), GFP_KERNEL);
58 	if (!ep)
59 		goto err_ep_alloc;
60 
61 	ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL);
62 	if (!ep->qp_info.qp)
63 		goto err_qp_alloc;
64 
65 	err = scif_anon_inode_getfile(ep);
66 	if (err)
67 		goto err_anon_inode;
68 
69 	spin_lock_init(&ep->lock);
70 	mutex_init(&ep->sendlock);
71 	mutex_init(&ep->recvlock);
72 
73 	scif_rma_ep_init(ep);
74 	ep->state = SCIFEP_UNBOUND;
75 	dev_dbg(scif_info.mdev.this_device,
76 		"SCIFAPI open: ep %p success\n", ep);
77 	return ep;
78 
79 err_anon_inode:
80 	kfree(ep->qp_info.qp);
81 err_qp_alloc:
82 	kfree(ep);
83 err_ep_alloc:
84 	return NULL;
85 }
86 EXPORT_SYMBOL_GPL(scif_open);
87 
88 /*
89  * scif_disconnect_ep - Disconnects the endpoint if found
90  * @epd: The end point returned from scif_open()
91  */
scif_disconnect_ep(struct scif_endpt * ep)92 static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep)
93 {
94 	struct scifmsg msg;
95 	struct scif_endpt *fep = NULL;
96 	struct scif_endpt *tmpep;
97 	struct list_head *pos, *tmpq;
98 	int err;
99 
100 	/*
101 	 * Wake up any threads blocked in send()/recv() before closing
102 	 * out the connection. Grabbing and releasing the send/recv lock
103 	 * will ensure that any blocked senders/receivers have exited for
104 	 * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
105 	 * close. Ring 3 endpoints are not affected since close will not
106 	 * be called while there are IOCTLs executing.
107 	 */
108 	wake_up_interruptible(&ep->sendwq);
109 	wake_up_interruptible(&ep->recvwq);
110 	mutex_lock(&ep->sendlock);
111 	mutex_unlock(&ep->sendlock);
112 	mutex_lock(&ep->recvlock);
113 	mutex_unlock(&ep->recvlock);
114 
115 	/* Remove from the connected list */
116 	mutex_lock(&scif_info.connlock);
117 	list_for_each_safe(pos, tmpq, &scif_info.connected) {
118 		tmpep = list_entry(pos, struct scif_endpt, list);
119 		if (tmpep == ep) {
120 			list_del(pos);
121 			fep = tmpep;
122 			spin_lock(&ep->lock);
123 			break;
124 		}
125 	}
126 
127 	if (!fep) {
128 		/*
129 		 * The other side has completed the disconnect before
130 		 * the end point can be removed from the list. Therefore
131 		 * the ep lock is not locked, traverse the disconnected
132 		 * list to find the endpoint and release the conn lock.
133 		 */
134 		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
135 			tmpep = list_entry(pos, struct scif_endpt, list);
136 			if (tmpep == ep) {
137 				list_del(pos);
138 				break;
139 			}
140 		}
141 		mutex_unlock(&scif_info.connlock);
142 		return NULL;
143 	}
144 
145 	init_completion(&ep->discon);
146 	msg.uop = SCIF_DISCNCT;
147 	msg.src = ep->port;
148 	msg.dst = ep->peer;
149 	msg.payload[0] = (u64)ep;
150 	msg.payload[1] = ep->remote_ep;
151 
152 	err = scif_nodeqp_send(ep->remote_dev, &msg);
153 	spin_unlock(&ep->lock);
154 	mutex_unlock(&scif_info.connlock);
155 
156 	if (!err)
157 		/* Wait for the remote node to respond with SCIF_DISCNT_ACK */
158 		wait_for_completion_timeout(&ep->discon,
159 					    SCIF_NODE_ALIVE_TIMEOUT);
160 	return ep;
161 }
162 
scif_close(scif_epd_t epd)163 int scif_close(scif_epd_t epd)
164 {
165 	struct scif_endpt *ep = (struct scif_endpt *)epd;
166 	struct scif_endpt *tmpep;
167 	struct list_head *pos, *tmpq;
168 	enum scif_epd_state oldstate;
169 	bool flush_conn;
170 
171 	dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n",
172 		ep, scif_ep_states[ep->state]);
173 	might_sleep();
174 	spin_lock(&ep->lock);
175 	flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS);
176 	spin_unlock(&ep->lock);
177 
178 	if (flush_conn)
179 		flush_work(&scif_info.conn_work);
180 
181 	spin_lock(&ep->lock);
182 	oldstate = ep->state;
183 
184 	ep->state = SCIFEP_CLOSING;
185 
186 	switch (oldstate) {
187 	case SCIFEP_ZOMBIE:
188 		dev_err(scif_info.mdev.this_device,
189 			"SCIFAPI close: zombie state unexpected\n");
190 	case SCIFEP_DISCONNECTED:
191 		spin_unlock(&ep->lock);
192 		scif_unregister_all_windows(epd);
193 		/* Remove from the disconnected list */
194 		mutex_lock(&scif_info.connlock);
195 		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
196 			tmpep = list_entry(pos, struct scif_endpt, list);
197 			if (tmpep == ep) {
198 				list_del(pos);
199 				break;
200 			}
201 		}
202 		mutex_unlock(&scif_info.connlock);
203 		break;
204 	case SCIFEP_UNBOUND:
205 	case SCIFEP_BOUND:
206 	case SCIFEP_CONNECTING:
207 		spin_unlock(&ep->lock);
208 		break;
209 	case SCIFEP_MAPPING:
210 	case SCIFEP_CONNECTED:
211 	case SCIFEP_CLOSING:
212 	{
213 		spin_unlock(&ep->lock);
214 		scif_unregister_all_windows(epd);
215 		scif_disconnect_ep(ep);
216 		break;
217 	}
218 	case SCIFEP_LISTENING:
219 	case SCIFEP_CLLISTEN:
220 	{
221 		struct scif_conreq *conreq;
222 		struct scifmsg msg;
223 		struct scif_endpt *aep;
224 
225 		spin_unlock(&ep->lock);
226 		mutex_lock(&scif_info.eplock);
227 
228 		/* remove from listen list */
229 		list_for_each_safe(pos, tmpq, &scif_info.listen) {
230 			tmpep = list_entry(pos, struct scif_endpt, list);
231 			if (tmpep == ep)
232 				list_del(pos);
233 		}
234 		/* Remove any dangling accepts */
235 		while (ep->acceptcnt) {
236 			aep = list_first_entry(&ep->li_accept,
237 					       struct scif_endpt, liacceptlist);
238 			list_del(&aep->liacceptlist);
239 			scif_put_port(aep->port.port);
240 			list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
241 				tmpep = list_entry(pos, struct scif_endpt,
242 						   miacceptlist);
243 				if (tmpep == aep) {
244 					list_del(pos);
245 					break;
246 				}
247 			}
248 			mutex_unlock(&scif_info.eplock);
249 			mutex_lock(&scif_info.connlock);
250 			list_for_each_safe(pos, tmpq, &scif_info.connected) {
251 				tmpep = list_entry(pos,
252 						   struct scif_endpt, list);
253 				if (tmpep == aep) {
254 					list_del(pos);
255 					break;
256 				}
257 			}
258 			list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
259 				tmpep = list_entry(pos,
260 						   struct scif_endpt, list);
261 				if (tmpep == aep) {
262 					list_del(pos);
263 					break;
264 				}
265 			}
266 			mutex_unlock(&scif_info.connlock);
267 			scif_teardown_ep(aep);
268 			mutex_lock(&scif_info.eplock);
269 			scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
270 			ep->acceptcnt--;
271 		}
272 
273 		spin_lock(&ep->lock);
274 		mutex_unlock(&scif_info.eplock);
275 
276 		/* Remove and reject any pending connection requests. */
277 		while (ep->conreqcnt) {
278 			conreq = list_first_entry(&ep->conlist,
279 						  struct scif_conreq, list);
280 			list_del(&conreq->list);
281 
282 			msg.uop = SCIF_CNCT_REJ;
283 			msg.dst.node = conreq->msg.src.node;
284 			msg.dst.port = conreq->msg.src.port;
285 			msg.payload[0] = conreq->msg.payload[0];
286 			msg.payload[1] = conreq->msg.payload[1];
287 			/*
288 			 * No Error Handling on purpose for scif_nodeqp_send().
289 			 * If the remote node is lost we still want free the
290 			 * connection requests on the self node.
291 			 */
292 			scif_nodeqp_send(&scif_dev[conreq->msg.src.node],
293 					 &msg);
294 			ep->conreqcnt--;
295 			kfree(conreq);
296 		}
297 
298 		spin_unlock(&ep->lock);
299 		/* If a kSCIF accept is waiting wake it up */
300 		wake_up_interruptible(&ep->conwq);
301 		break;
302 	}
303 	}
304 	scif_put_port(ep->port.port);
305 	scif_anon_inode_fput(ep);
306 	scif_teardown_ep(ep);
307 	scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
308 	return 0;
309 }
310 EXPORT_SYMBOL_GPL(scif_close);
311 
312 /**
313  * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
314  *			accept new connections.
315  * @epd: The end point returned from scif_open()
316  */
__scif_flush(scif_epd_t epd)317 int __scif_flush(scif_epd_t epd)
318 {
319 	struct scif_endpt *ep = (struct scif_endpt *)epd;
320 
321 	switch (ep->state) {
322 	case SCIFEP_LISTENING:
323 	{
324 		ep->state = SCIFEP_CLLISTEN;
325 
326 		/* If an accept is waiting wake it up */
327 		wake_up_interruptible(&ep->conwq);
328 		break;
329 	}
330 	default:
331 		break;
332 	}
333 	return 0;
334 }
335 
scif_bind(scif_epd_t epd,u16 pn)336 int scif_bind(scif_epd_t epd, u16 pn)
337 {
338 	struct scif_endpt *ep = (struct scif_endpt *)epd;
339 	int ret = 0;
340 	int tmp;
341 
342 	dev_dbg(scif_info.mdev.this_device,
343 		"SCIFAPI bind: ep %p %s requested port number %d\n",
344 		ep, scif_ep_states[ep->state], pn);
345 	if (pn) {
346 		/*
347 		 * Similar to IETF RFC 1700, SCIF ports below
348 		 * SCIF_ADMIN_PORT_END can only be bound by system (or root)
349 		 * processes or by processes executed by privileged users.
350 		 */
351 		if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) {
352 			ret = -EACCES;
353 			goto scif_bind_admin_exit;
354 		}
355 	}
356 
357 	spin_lock(&ep->lock);
358 	if (ep->state == SCIFEP_BOUND) {
359 		ret = -EINVAL;
360 		goto scif_bind_exit;
361 	} else if (ep->state != SCIFEP_UNBOUND) {
362 		ret = -EISCONN;
363 		goto scif_bind_exit;
364 	}
365 
366 	if (pn) {
367 		tmp = scif_rsrv_port(pn);
368 		if (tmp != pn) {
369 			ret = -EINVAL;
370 			goto scif_bind_exit;
371 		}
372 	} else {
373 		ret = scif_get_new_port();
374 		if (ret < 0)
375 			goto scif_bind_exit;
376 		pn = ret;
377 	}
378 
379 	ep->state = SCIFEP_BOUND;
380 	ep->port.node = scif_info.nodeid;
381 	ep->port.port = pn;
382 	ep->conn_async_state = ASYNC_CONN_IDLE;
383 	ret = pn;
384 	dev_dbg(scif_info.mdev.this_device,
385 		"SCIFAPI bind: bound to port number %d\n", pn);
386 scif_bind_exit:
387 	spin_unlock(&ep->lock);
388 scif_bind_admin_exit:
389 	return ret;
390 }
391 EXPORT_SYMBOL_GPL(scif_bind);
392 
scif_listen(scif_epd_t epd,int backlog)393 int scif_listen(scif_epd_t epd, int backlog)
394 {
395 	struct scif_endpt *ep = (struct scif_endpt *)epd;
396 
397 	dev_dbg(scif_info.mdev.this_device,
398 		"SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]);
399 	spin_lock(&ep->lock);
400 	switch (ep->state) {
401 	case SCIFEP_ZOMBIE:
402 	case SCIFEP_CLOSING:
403 	case SCIFEP_CLLISTEN:
404 	case SCIFEP_UNBOUND:
405 	case SCIFEP_DISCONNECTED:
406 		spin_unlock(&ep->lock);
407 		return -EINVAL;
408 	case SCIFEP_LISTENING:
409 	case SCIFEP_CONNECTED:
410 	case SCIFEP_CONNECTING:
411 	case SCIFEP_MAPPING:
412 		spin_unlock(&ep->lock);
413 		return -EISCONN;
414 	case SCIFEP_BOUND:
415 		break;
416 	}
417 
418 	ep->state = SCIFEP_LISTENING;
419 	ep->backlog = backlog;
420 
421 	ep->conreqcnt = 0;
422 	ep->acceptcnt = 0;
423 	INIT_LIST_HEAD(&ep->conlist);
424 	init_waitqueue_head(&ep->conwq);
425 	INIT_LIST_HEAD(&ep->li_accept);
426 	spin_unlock(&ep->lock);
427 
428 	/*
429 	 * Listen status is complete so delete the qp information not needed
430 	 * on a listen before placing on the list of listening ep's
431 	 */
432 	scif_teardown_ep(ep);
433 	ep->qp_info.qp = NULL;
434 
435 	mutex_lock(&scif_info.eplock);
436 	list_add_tail(&ep->list, &scif_info.listen);
437 	mutex_unlock(&scif_info.eplock);
438 	return 0;
439 }
440 EXPORT_SYMBOL_GPL(scif_listen);
441 
442 /*
443  ************************************************************************
444  * SCIF connection flow:
445  *
446  * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
447  *	connections via a SCIF_CNCT_REQ message
448  * 2) A SCIF endpoint can initiate a SCIF connection by calling
449  *	scif_connect(..) which calls scif_setup_qp_connect(..) which
450  *	allocates the local qp for the endpoint ring buffer and then sends
451  *	a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
452  *	a SCIF_CNCT_REJ message
453  * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
454  *	wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
455  *	message otherwise
456  * 4) A thread blocked waiting for incoming connections allocates its local
457  *	endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
458  *	and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
459  *	the node sends a SCIF_CNCT_REJ message
460  * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
461  *	connecting endpoint is woken up as part of handling
462  *	scif_cnctgnt_resp(..) following which it maps the remote endpoints'
463  *	QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
464  *	success or a SCIF_CNCT_GNTNACK message on failure and completes
465  *	the scif_connect(..) API
466  * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
467  *	in step 4 is woken up and completes the scif_accept(..) API
468  * 7) The SCIF connection is now established between the two SCIF endpoints.
469  */
scif_conn_func(struct scif_endpt * ep)470 static int scif_conn_func(struct scif_endpt *ep)
471 {
472 	int err = 0;
473 	struct scifmsg msg;
474 	struct device *spdev;
475 
476 	err = scif_reserve_dma_chan(ep);
477 	if (err) {
478 		dev_err(&ep->remote_dev->sdev->dev,
479 			"%s %d err %d\n", __func__, __LINE__, err);
480 		ep->state = SCIFEP_BOUND;
481 		goto connect_error_simple;
482 	}
483 	/* Initiate the first part of the endpoint QP setup */
484 	err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
485 				    SCIF_ENDPT_QP_SIZE, ep->remote_dev);
486 	if (err) {
487 		dev_err(&ep->remote_dev->sdev->dev,
488 			"%s err %d qp_offset 0x%llx\n",
489 			__func__, err, ep->qp_info.qp_offset);
490 		ep->state = SCIFEP_BOUND;
491 		goto connect_error_simple;
492 	}
493 
494 	spdev = scif_get_peer_dev(ep->remote_dev);
495 	if (IS_ERR(spdev)) {
496 		err = PTR_ERR(spdev);
497 		goto cleanup_qp;
498 	}
499 	/* Format connect message and send it */
500 	msg.src = ep->port;
501 	msg.dst = ep->conn_port;
502 	msg.uop = SCIF_CNCT_REQ;
503 	msg.payload[0] = (u64)ep;
504 	msg.payload[1] = ep->qp_info.qp_offset;
505 	err = _scif_nodeqp_send(ep->remote_dev, &msg);
506 	if (err)
507 		goto connect_error_dec;
508 	scif_put_peer_dev(spdev);
509 	/*
510 	 * Wait for the remote node to respond with SCIF_CNCT_GNT or
511 	 * SCIF_CNCT_REJ message.
512 	 */
513 	err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING,
514 				 SCIF_NODE_ALIVE_TIMEOUT);
515 	if (!err) {
516 		dev_err(&ep->remote_dev->sdev->dev,
517 			"%s %d timeout\n", __func__, __LINE__);
518 		ep->state = SCIFEP_BOUND;
519 	}
520 	spdev = scif_get_peer_dev(ep->remote_dev);
521 	if (IS_ERR(spdev)) {
522 		err = PTR_ERR(spdev);
523 		goto cleanup_qp;
524 	}
525 	if (ep->state == SCIFEP_MAPPING) {
526 		err = scif_setup_qp_connect_response(ep->remote_dev,
527 						     ep->qp_info.qp,
528 						     ep->qp_info.gnt_pld);
529 		/*
530 		 * If the resource to map the queue are not available then
531 		 * we need to tell the other side to terminate the accept
532 		 */
533 		if (err) {
534 			dev_err(&ep->remote_dev->sdev->dev,
535 				"%s %d err %d\n", __func__, __LINE__, err);
536 			msg.uop = SCIF_CNCT_GNTNACK;
537 			msg.payload[0] = ep->remote_ep;
538 			_scif_nodeqp_send(ep->remote_dev, &msg);
539 			ep->state = SCIFEP_BOUND;
540 			goto connect_error_dec;
541 		}
542 
543 		msg.uop = SCIF_CNCT_GNTACK;
544 		msg.payload[0] = ep->remote_ep;
545 		err = _scif_nodeqp_send(ep->remote_dev, &msg);
546 		if (err) {
547 			ep->state = SCIFEP_BOUND;
548 			goto connect_error_dec;
549 		}
550 		ep->state = SCIFEP_CONNECTED;
551 		mutex_lock(&scif_info.connlock);
552 		list_add_tail(&ep->list, &scif_info.connected);
553 		mutex_unlock(&scif_info.connlock);
554 		dev_dbg(&ep->remote_dev->sdev->dev,
555 			"SCIFAPI connect: ep %p connected\n", ep);
556 	} else if (ep->state == SCIFEP_BOUND) {
557 		dev_dbg(&ep->remote_dev->sdev->dev,
558 			"SCIFAPI connect: ep %p connection refused\n", ep);
559 		err = -ECONNREFUSED;
560 		goto connect_error_dec;
561 	}
562 	scif_put_peer_dev(spdev);
563 	return err;
564 connect_error_dec:
565 	scif_put_peer_dev(spdev);
566 cleanup_qp:
567 	scif_cleanup_ep_qp(ep);
568 connect_error_simple:
569 	return err;
570 }
571 
572 /*
573  * scif_conn_handler:
574  *
575  * Workqueue handler for servicing non-blocking SCIF connect
576  *
577  */
scif_conn_handler(struct work_struct * work)578 void scif_conn_handler(struct work_struct *work)
579 {
580 	struct scif_endpt *ep;
581 
582 	do {
583 		ep = NULL;
584 		spin_lock(&scif_info.nb_connect_lock);
585 		if (!list_empty(&scif_info.nb_connect_list)) {
586 			ep = list_first_entry(&scif_info.nb_connect_list,
587 					      struct scif_endpt, conn_list);
588 			list_del(&ep->conn_list);
589 		}
590 		spin_unlock(&scif_info.nb_connect_lock);
591 		if (ep) {
592 			ep->conn_err = scif_conn_func(ep);
593 			wake_up_interruptible(&ep->conn_pend_wq);
594 		}
595 	} while (ep);
596 }
597 
__scif_connect(scif_epd_t epd,struct scif_port_id * dst,bool non_block)598 int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
599 {
600 	struct scif_endpt *ep = (struct scif_endpt *)epd;
601 	int err = 0;
602 	struct scif_dev *remote_dev;
603 	struct device *spdev;
604 
605 	dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep,
606 		scif_ep_states[ep->state]);
607 
608 	if (!scif_dev || dst->node > scif_info.maxid)
609 		return -ENODEV;
610 
611 	might_sleep();
612 
613 	remote_dev = &scif_dev[dst->node];
614 	spdev = scif_get_peer_dev(remote_dev);
615 	if (IS_ERR(spdev)) {
616 		err = PTR_ERR(spdev);
617 		return err;
618 	}
619 
620 	spin_lock(&ep->lock);
621 	switch (ep->state) {
622 	case SCIFEP_ZOMBIE:
623 	case SCIFEP_CLOSING:
624 		err = -EINVAL;
625 		break;
626 	case SCIFEP_DISCONNECTED:
627 		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
628 			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
629 		else
630 			err = -EINVAL;
631 		break;
632 	case SCIFEP_LISTENING:
633 	case SCIFEP_CLLISTEN:
634 		err = -EOPNOTSUPP;
635 		break;
636 	case SCIFEP_CONNECTING:
637 	case SCIFEP_MAPPING:
638 		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
639 			err = -EINPROGRESS;
640 		else
641 			err = -EISCONN;
642 		break;
643 	case SCIFEP_CONNECTED:
644 		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
645 			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
646 		else
647 			err = -EISCONN;
648 		break;
649 	case SCIFEP_UNBOUND:
650 		err = scif_get_new_port();
651 		if (err < 0)
652 			break;
653 		ep->port.port = err;
654 		ep->port.node = scif_info.nodeid;
655 		ep->conn_async_state = ASYNC_CONN_IDLE;
656 		/* Fall through */
657 	case SCIFEP_BOUND:
658 		/*
659 		 * If a non-blocking connect has been already initiated
660 		 * (conn_async_state is either ASYNC_CONN_INPROGRESS or
661 		 * ASYNC_CONN_FLUSH_WORK), the end point could end up in
662 		 * SCIF_BOUND due an error in the connection process
663 		 * (e.g., connection refused) If conn_async_state is
664 		 * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
665 		 * so that the error status can be collected. If the state is
666 		 * already ASYNC_CONN_FLUSH_WORK - then set the error to
667 		 * EINPROGRESS since some other thread is waiting to collect
668 		 * error status.
669 		 */
670 		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
671 			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
672 		} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
673 			err = -EINPROGRESS;
674 		} else {
675 			ep->conn_port = *dst;
676 			init_waitqueue_head(&ep->sendwq);
677 			init_waitqueue_head(&ep->recvwq);
678 			init_waitqueue_head(&ep->conwq);
679 			ep->conn_async_state = 0;
680 
681 			if (unlikely(non_block))
682 				ep->conn_async_state = ASYNC_CONN_INPROGRESS;
683 		}
684 		break;
685 	}
686 
687 	if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
688 			goto connect_simple_unlock1;
689 
690 	ep->state = SCIFEP_CONNECTING;
691 	ep->remote_dev = &scif_dev[dst->node];
692 	ep->qp_info.qp->magic = SCIFEP_MAGIC;
693 	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
694 		init_waitqueue_head(&ep->conn_pend_wq);
695 		spin_lock(&scif_info.nb_connect_lock);
696 		list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
697 		spin_unlock(&scif_info.nb_connect_lock);
698 		err = -EINPROGRESS;
699 		schedule_work(&scif_info.conn_work);
700 	}
701 connect_simple_unlock1:
702 	spin_unlock(&ep->lock);
703 	scif_put_peer_dev(spdev);
704 	if (err) {
705 		return err;
706 	} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
707 		flush_work(&scif_info.conn_work);
708 		err = ep->conn_err;
709 		spin_lock(&ep->lock);
710 		ep->conn_async_state = ASYNC_CONN_IDLE;
711 		spin_unlock(&ep->lock);
712 	} else {
713 		err = scif_conn_func(ep);
714 	}
715 	return err;
716 }
717 
scif_connect(scif_epd_t epd,struct scif_port_id * dst)718 int scif_connect(scif_epd_t epd, struct scif_port_id *dst)
719 {
720 	return __scif_connect(epd, dst, false);
721 }
722 EXPORT_SYMBOL_GPL(scif_connect);
723 
724 /**
725  * scif_accept() - Accept a connection request from the remote node
726  *
727  * The function accepts a connection request from the remote node.  Successful
728  * complete is indicate by a new end point being created and passed back
729  * to the caller for future reference.
730  *
731  * Upon successful complete a zero will be returned and the peer information
732  * will be filled in.
733  *
734  * If the end point is not in the listening state -EINVAL will be returned.
735  *
736  * If during the connection sequence resource allocation fails the -ENOMEM
737  * will be returned.
738  *
739  * If the function is called with the ASYNC flag set and no connection requests
740  * are pending it will return -EAGAIN.
741  *
742  * If the remote side is not sending any connection requests the caller may
743  * terminate this function with a signal.  If so a -EINTR will be returned.
744  */
scif_accept(scif_epd_t epd,struct scif_port_id * peer,scif_epd_t * newepd,int flags)745 int scif_accept(scif_epd_t epd, struct scif_port_id *peer,
746 		scif_epd_t *newepd, int flags)
747 {
748 	struct scif_endpt *lep = (struct scif_endpt *)epd;
749 	struct scif_endpt *cep;
750 	struct scif_conreq *conreq;
751 	struct scifmsg msg;
752 	int err;
753 	struct device *spdev;
754 
755 	dev_dbg(scif_info.mdev.this_device,
756 		"SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]);
757 
758 	if (flags & ~SCIF_ACCEPT_SYNC)
759 		return -EINVAL;
760 
761 	if (!peer || !newepd)
762 		return -EINVAL;
763 
764 	might_sleep();
765 	spin_lock(&lep->lock);
766 	if (lep->state != SCIFEP_LISTENING) {
767 		spin_unlock(&lep->lock);
768 		return -EINVAL;
769 	}
770 
771 	if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) {
772 		/* No connection request present and we do not want to wait */
773 		spin_unlock(&lep->lock);
774 		return -EAGAIN;
775 	}
776 
777 	lep->files = current->files;
778 retry_connection:
779 	spin_unlock(&lep->lock);
780 	/* Wait for the remote node to send us a SCIF_CNCT_REQ */
781 	err = wait_event_interruptible(lep->conwq,
782 				       (lep->conreqcnt ||
783 				       (lep->state != SCIFEP_LISTENING)));
784 	if (err)
785 		return err;
786 
787 	if (lep->state != SCIFEP_LISTENING)
788 		return -EINTR;
789 
790 	spin_lock(&lep->lock);
791 
792 	if (!lep->conreqcnt)
793 		goto retry_connection;
794 
795 	/* Get the first connect request off the list */
796 	conreq = list_first_entry(&lep->conlist, struct scif_conreq, list);
797 	list_del(&conreq->list);
798 	lep->conreqcnt--;
799 	spin_unlock(&lep->lock);
800 
801 	/* Fill in the peer information */
802 	peer->node = conreq->msg.src.node;
803 	peer->port = conreq->msg.src.port;
804 
805 	cep = kzalloc(sizeof(*cep), GFP_KERNEL);
806 	if (!cep) {
807 		err = -ENOMEM;
808 		goto scif_accept_error_epalloc;
809 	}
810 	spin_lock_init(&cep->lock);
811 	mutex_init(&cep->sendlock);
812 	mutex_init(&cep->recvlock);
813 	cep->state = SCIFEP_CONNECTING;
814 	cep->remote_dev = &scif_dev[peer->node];
815 	cep->remote_ep = conreq->msg.payload[0];
816 
817 	scif_rma_ep_init(cep);
818 
819 	err = scif_reserve_dma_chan(cep);
820 	if (err) {
821 		dev_err(scif_info.mdev.this_device,
822 			"%s %d err %d\n", __func__, __LINE__, err);
823 		goto scif_accept_error_qpalloc;
824 	}
825 
826 	cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
827 	if (!cep->qp_info.qp) {
828 		err = -ENOMEM;
829 		goto scif_accept_error_qpalloc;
830 	}
831 
832 	err = scif_anon_inode_getfile(cep);
833 	if (err)
834 		goto scif_accept_error_anon_inode;
835 
836 	cep->qp_info.qp->magic = SCIFEP_MAGIC;
837 	spdev = scif_get_peer_dev(cep->remote_dev);
838 	if (IS_ERR(spdev)) {
839 		err = PTR_ERR(spdev);
840 		goto scif_accept_error_map;
841 	}
842 	err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset,
843 				   conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE,
844 				   cep->remote_dev);
845 	if (err) {
846 		dev_dbg(&cep->remote_dev->sdev->dev,
847 			"SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
848 			lep, cep, err, cep->qp_info.qp_offset);
849 		scif_put_peer_dev(spdev);
850 		goto scif_accept_error_map;
851 	}
852 
853 	cep->port.node = lep->port.node;
854 	cep->port.port = lep->port.port;
855 	cep->peer.node = peer->node;
856 	cep->peer.port = peer->port;
857 	init_waitqueue_head(&cep->sendwq);
858 	init_waitqueue_head(&cep->recvwq);
859 	init_waitqueue_head(&cep->conwq);
860 
861 	msg.uop = SCIF_CNCT_GNT;
862 	msg.src = cep->port;
863 	msg.payload[0] = cep->remote_ep;
864 	msg.payload[1] = cep->qp_info.qp_offset;
865 	msg.payload[2] = (u64)cep;
866 
867 	err = _scif_nodeqp_send(cep->remote_dev, &msg);
868 	scif_put_peer_dev(spdev);
869 	if (err)
870 		goto scif_accept_error_map;
871 retry:
872 	/* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
873 	err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING,
874 				 SCIF_NODE_ACCEPT_TIMEOUT);
875 	if (!err && scifdev_alive(cep))
876 		goto retry;
877 	err = !err ? -ENODEV : 0;
878 	if (err)
879 		goto scif_accept_error_map;
880 	kfree(conreq);
881 
882 	spin_lock(&cep->lock);
883 
884 	if (cep->state == SCIFEP_CLOSING) {
885 		/*
886 		 * Remote failed to allocate resources and NAKed the grant.
887 		 * There is at this point nothing referencing the new end point.
888 		 */
889 		spin_unlock(&cep->lock);
890 		scif_teardown_ep(cep);
891 		kfree(cep);
892 
893 		/* If call with sync flag then go back and wait. */
894 		if (flags & SCIF_ACCEPT_SYNC) {
895 			spin_lock(&lep->lock);
896 			goto retry_connection;
897 		}
898 		return -EAGAIN;
899 	}
900 
901 	scif_get_port(cep->port.port);
902 	*newepd = (scif_epd_t)cep;
903 	spin_unlock(&cep->lock);
904 	return 0;
905 scif_accept_error_map:
906 	scif_anon_inode_fput(cep);
907 scif_accept_error_anon_inode:
908 	scif_teardown_ep(cep);
909 scif_accept_error_qpalloc:
910 	kfree(cep);
911 scif_accept_error_epalloc:
912 	msg.uop = SCIF_CNCT_REJ;
913 	msg.dst.node = conreq->msg.src.node;
914 	msg.dst.port = conreq->msg.src.port;
915 	msg.payload[0] = conreq->msg.payload[0];
916 	msg.payload[1] = conreq->msg.payload[1];
917 	scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg);
918 	kfree(conreq);
919 	return err;
920 }
921 EXPORT_SYMBOL_GPL(scif_accept);
922 
923 /*
924  * scif_msg_param_check:
925  * @epd: The end point returned from scif_open()
926  * @len: Length to receive
927  * @flags: blocking or non blocking
928  *
929  * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
930  */
scif_msg_param_check(scif_epd_t epd,int len,int flags)931 static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags)
932 {
933 	int ret = -EINVAL;
934 
935 	if (len < 0)
936 		goto err_ret;
937 	if (flags && (!(flags & SCIF_RECV_BLOCK)))
938 		goto err_ret;
939 	ret = 0;
940 err_ret:
941 	return ret;
942 }
943 
_scif_send(scif_epd_t epd,void * msg,int len,int flags)944 static int _scif_send(scif_epd_t epd, void *msg, int len, int flags)
945 {
946 	struct scif_endpt *ep = (struct scif_endpt *)epd;
947 	struct scifmsg notif_msg;
948 	int curr_xfer_len = 0, sent_len = 0, write_count;
949 	int ret = 0;
950 	struct scif_qp *qp = ep->qp_info.qp;
951 
952 	if (flags & SCIF_SEND_BLOCK)
953 		might_sleep();
954 
955 	spin_lock(&ep->lock);
956 	while (sent_len != len && SCIFEP_CONNECTED == ep->state) {
957 		write_count = scif_rb_space(&qp->outbound_q);
958 		if (write_count) {
959 			/* Best effort to send as much data as possible */
960 			curr_xfer_len = min(len - sent_len, write_count);
961 			ret = scif_rb_write(&qp->outbound_q, msg,
962 					    curr_xfer_len);
963 			if (ret < 0)
964 				break;
965 			/* Success. Update write pointer */
966 			scif_rb_commit(&qp->outbound_q);
967 			/*
968 			 * Send a notification to the peer about the
969 			 * produced data message.
970 			 */
971 			notif_msg.src = ep->port;
972 			notif_msg.uop = SCIF_CLIENT_SENT;
973 			notif_msg.payload[0] = ep->remote_ep;
974 			ret = _scif_nodeqp_send(ep->remote_dev, &notif_msg);
975 			if (ret)
976 				break;
977 			sent_len += curr_xfer_len;
978 			msg = msg + curr_xfer_len;
979 			continue;
980 		}
981 		curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1);
982 		/* Not enough RB space. return for the Non Blocking case */
983 		if (!(flags & SCIF_SEND_BLOCK))
984 			break;
985 
986 		spin_unlock(&ep->lock);
987 		/* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
988 		ret =
989 		wait_event_interruptible(ep->sendwq,
990 					 (SCIFEP_CONNECTED != ep->state) ||
991 					 (scif_rb_space(&qp->outbound_q) >=
992 					 curr_xfer_len));
993 		spin_lock(&ep->lock);
994 		if (ret)
995 			break;
996 	}
997 	if (sent_len)
998 		ret = sent_len;
999 	else if (!ret && SCIFEP_CONNECTED != ep->state)
1000 		ret = SCIFEP_DISCONNECTED == ep->state ?
1001 			-ECONNRESET : -ENOTCONN;
1002 	spin_unlock(&ep->lock);
1003 	return ret;
1004 }
1005 
_scif_recv(scif_epd_t epd,void * msg,int len,int flags)1006 static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags)
1007 {
1008 	int read_size;
1009 	struct scif_endpt *ep = (struct scif_endpt *)epd;
1010 	struct scifmsg notif_msg;
1011 	int curr_recv_len = 0, remaining_len = len, read_count;
1012 	int ret = 0;
1013 	struct scif_qp *qp = ep->qp_info.qp;
1014 
1015 	if (flags & SCIF_RECV_BLOCK)
1016 		might_sleep();
1017 	spin_lock(&ep->lock);
1018 	while (remaining_len && (SCIFEP_CONNECTED == ep->state ||
1019 				 SCIFEP_DISCONNECTED == ep->state)) {
1020 		read_count = scif_rb_count(&qp->inbound_q, remaining_len);
1021 		if (read_count) {
1022 			/*
1023 			 * Best effort to recv as much data as there
1024 			 * are bytes to read in the RB particularly
1025 			 * important for the Non Blocking case.
1026 			 */
1027 			curr_recv_len = min(remaining_len, read_count);
1028 			read_size = scif_rb_get_next(&qp->inbound_q,
1029 						     msg, curr_recv_len);
1030 			if (ep->state == SCIFEP_CONNECTED) {
1031 				/*
1032 				 * Update the read pointer only if the endpoint
1033 				 * is still connected else the read pointer
1034 				 * might no longer exist since the peer has
1035 				 * freed resources!
1036 				 */
1037 				scif_rb_update_read_ptr(&qp->inbound_q);
1038 				/*
1039 				 * Send a notification to the peer about the
1040 				 * consumed data message only if the EP is in
1041 				 * SCIFEP_CONNECTED state.
1042 				 */
1043 				notif_msg.src = ep->port;
1044 				notif_msg.uop = SCIF_CLIENT_RCVD;
1045 				notif_msg.payload[0] = ep->remote_ep;
1046 				ret = _scif_nodeqp_send(ep->remote_dev,
1047 							&notif_msg);
1048 				if (ret)
1049 					break;
1050 			}
1051 			remaining_len -= curr_recv_len;
1052 			msg = msg + curr_recv_len;
1053 			continue;
1054 		}
1055 		/*
1056 		 * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
1057 		 * we will keep looping forever.
1058 		 */
1059 		if (ep->state == SCIFEP_DISCONNECTED)
1060 			break;
1061 		/*
1062 		 * Return in the Non Blocking case if there is no data
1063 		 * to read in this iteration.
1064 		 */
1065 		if (!(flags & SCIF_RECV_BLOCK))
1066 			break;
1067 		curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1);
1068 		spin_unlock(&ep->lock);
1069 		/*
1070 		 * Wait for a SCIF_CLIENT_SEND message in the blocking case
1071 		 * or until other side disconnects.
1072 		 */
1073 		ret =
1074 		wait_event_interruptible(ep->recvwq,
1075 					 SCIFEP_CONNECTED != ep->state ||
1076 					 scif_rb_count(&qp->inbound_q,
1077 						       curr_recv_len)
1078 					 >= curr_recv_len);
1079 		spin_lock(&ep->lock);
1080 		if (ret)
1081 			break;
1082 	}
1083 	if (len - remaining_len)
1084 		ret = len - remaining_len;
1085 	else if (!ret && ep->state != SCIFEP_CONNECTED)
1086 		ret = ep->state == SCIFEP_DISCONNECTED ?
1087 			-ECONNRESET : -ENOTCONN;
1088 	spin_unlock(&ep->lock);
1089 	return ret;
1090 }
1091 
1092 /**
1093  * scif_user_send() - Send data to connection queue
1094  * @epd: The end point returned from scif_open()
1095  * @msg: Address to place data
1096  * @len: Length to receive
1097  * @flags: blocking or non blocking
1098  *
1099  * This function is called from the driver IOCTL entry point
1100  * only and is a wrapper for _scif_send().
1101  */
scif_user_send(scif_epd_t epd,void __user * msg,int len,int flags)1102 int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags)
1103 {
1104 	struct scif_endpt *ep = (struct scif_endpt *)epd;
1105 	int err = 0;
1106 	int sent_len = 0;
1107 	char *tmp;
1108 	int loop_len;
1109 	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1110 
1111 	dev_dbg(scif_info.mdev.this_device,
1112 		"SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1113 	if (!len)
1114 		return 0;
1115 
1116 	err = scif_msg_param_check(epd, len, flags);
1117 	if (err)
1118 		goto send_err;
1119 
1120 	tmp = kmalloc(chunk_len, GFP_KERNEL);
1121 	if (!tmp) {
1122 		err = -ENOMEM;
1123 		goto send_err;
1124 	}
1125 	/*
1126 	 * Grabbing the lock before breaking up the transfer in
1127 	 * multiple chunks is required to ensure that messages do
1128 	 * not get fragmented and reordered.
1129 	 */
1130 	mutex_lock(&ep->sendlock);
1131 	while (sent_len != len) {
1132 		loop_len = len - sent_len;
1133 		loop_len = min(chunk_len, loop_len);
1134 		if (copy_from_user(tmp, msg, loop_len)) {
1135 			err = -EFAULT;
1136 			goto send_free_err;
1137 		}
1138 		err = _scif_send(epd, tmp, loop_len, flags);
1139 		if (err < 0)
1140 			goto send_free_err;
1141 		sent_len += err;
1142 		msg += err;
1143 		if (err != loop_len)
1144 			goto send_free_err;
1145 	}
1146 send_free_err:
1147 	mutex_unlock(&ep->sendlock);
1148 	kfree(tmp);
1149 send_err:
1150 	return err < 0 ? err : sent_len;
1151 }
1152 
1153 /**
1154  * scif_user_recv() - Receive data from connection queue
1155  * @epd: The end point returned from scif_open()
1156  * @msg: Address to place data
1157  * @len: Length to receive
1158  * @flags: blocking or non blocking
1159  *
1160  * This function is called from the driver IOCTL entry point
1161  * only and is a wrapper for _scif_recv().
1162  */
scif_user_recv(scif_epd_t epd,void __user * msg,int len,int flags)1163 int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags)
1164 {
1165 	struct scif_endpt *ep = (struct scif_endpt *)epd;
1166 	int err = 0;
1167 	int recv_len = 0;
1168 	char *tmp;
1169 	int loop_len;
1170 	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1171 
1172 	dev_dbg(scif_info.mdev.this_device,
1173 		"SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1174 	if (!len)
1175 		return 0;
1176 
1177 	err = scif_msg_param_check(epd, len, flags);
1178 	if (err)
1179 		goto recv_err;
1180 
1181 	tmp = kmalloc(chunk_len, GFP_KERNEL);
1182 	if (!tmp) {
1183 		err = -ENOMEM;
1184 		goto recv_err;
1185 	}
1186 	/*
1187 	 * Grabbing the lock before breaking up the transfer in
1188 	 * multiple chunks is required to ensure that messages do
1189 	 * not get fragmented and reordered.
1190 	 */
1191 	mutex_lock(&ep->recvlock);
1192 	while (recv_len != len) {
1193 		loop_len = len - recv_len;
1194 		loop_len = min(chunk_len, loop_len);
1195 		err = _scif_recv(epd, tmp, loop_len, flags);
1196 		if (err < 0)
1197 			goto recv_free_err;
1198 		if (copy_to_user(msg, tmp, err)) {
1199 			err = -EFAULT;
1200 			goto recv_free_err;
1201 		}
1202 		recv_len += err;
1203 		msg += err;
1204 		if (err != loop_len)
1205 			goto recv_free_err;
1206 	}
1207 recv_free_err:
1208 	mutex_unlock(&ep->recvlock);
1209 	kfree(tmp);
1210 recv_err:
1211 	return err < 0 ? err : recv_len;
1212 }
1213 
1214 /**
1215  * scif_send() - Send data to connection queue
1216  * @epd: The end point returned from scif_open()
1217  * @msg: Address to place data
1218  * @len: Length to receive
1219  * @flags: blocking or non blocking
1220  *
1221  * This function is called from the kernel mode only and is
1222  * a wrapper for _scif_send().
1223  */
scif_send(scif_epd_t epd,void * msg,int len,int flags)1224 int scif_send(scif_epd_t epd, void *msg, int len, int flags)
1225 {
1226 	struct scif_endpt *ep = (struct scif_endpt *)epd;
1227 	int ret;
1228 
1229 	dev_dbg(scif_info.mdev.this_device,
1230 		"SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1231 	if (!len)
1232 		return 0;
1233 
1234 	ret = scif_msg_param_check(epd, len, flags);
1235 	if (ret)
1236 		return ret;
1237 	if (!ep->remote_dev)
1238 		return -ENOTCONN;
1239 	/*
1240 	 * Grab the mutex lock in the blocking case only
1241 	 * to ensure messages do not get fragmented/reordered.
1242 	 * The non blocking mode is protected using spin locks
1243 	 * in _scif_send().
1244 	 */
1245 	if (flags & SCIF_SEND_BLOCK)
1246 		mutex_lock(&ep->sendlock);
1247 
1248 	ret = _scif_send(epd, msg, len, flags);
1249 
1250 	if (flags & SCIF_SEND_BLOCK)
1251 		mutex_unlock(&ep->sendlock);
1252 	return ret;
1253 }
1254 EXPORT_SYMBOL_GPL(scif_send);
1255 
1256 /**
1257  * scif_recv() - Receive data from connection queue
1258  * @epd: The end point returned from scif_open()
1259  * @msg: Address to place data
1260  * @len: Length to receive
1261  * @flags: blocking or non blocking
1262  *
1263  * This function is called from the kernel mode only and is
1264  * a wrapper for _scif_recv().
1265  */
scif_recv(scif_epd_t epd,void * msg,int len,int flags)1266 int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
1267 {
1268 	struct scif_endpt *ep = (struct scif_endpt *)epd;
1269 	int ret;
1270 
1271 	dev_dbg(scif_info.mdev.this_device,
1272 		"SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1273 	if (!len)
1274 		return 0;
1275 
1276 	ret = scif_msg_param_check(epd, len, flags);
1277 	if (ret)
1278 		return ret;
1279 	/*
1280 	 * Grab the mutex lock in the blocking case only
1281 	 * to ensure messages do not get fragmented/reordered.
1282 	 * The non blocking mode is protected using spin locks
1283 	 * in _scif_send().
1284 	 */
1285 	if (flags & SCIF_RECV_BLOCK)
1286 		mutex_lock(&ep->recvlock);
1287 
1288 	ret = _scif_recv(epd, msg, len, flags);
1289 
1290 	if (flags & SCIF_RECV_BLOCK)
1291 		mutex_unlock(&ep->recvlock);
1292 
1293 	return ret;
1294 }
1295 EXPORT_SYMBOL_GPL(scif_recv);
1296 
_scif_poll_wait(struct file * f,wait_queue_head_t * wq,poll_table * p,struct scif_endpt * ep)1297 static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
1298 				   poll_table *p, struct scif_endpt *ep)
1299 {
1300 	/*
1301 	 * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
1302 	 * and regrab it afterwards. Because the endpoint state might have
1303 	 * changed while the lock was given up, the state must be checked
1304 	 * again after re-acquiring the lock. The code in __scif_pollfd(..)
1305 	 * does this.
1306 	 */
1307 	spin_unlock(&ep->lock);
1308 	poll_wait(f, wq, p);
1309 	spin_lock(&ep->lock);
1310 }
1311 
1312 unsigned int
__scif_pollfd(struct file * f,poll_table * wait,struct scif_endpt * ep)1313 __scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
1314 {
1315 	unsigned int mask = 0;
1316 
1317 	dev_dbg(scif_info.mdev.this_device,
1318 		"SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
1319 
1320 	spin_lock(&ep->lock);
1321 
1322 	/* Endpoint is waiting for a non-blocking connect to complete */
1323 	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1324 		_scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
1325 		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1326 			if (ep->state == SCIFEP_CONNECTED ||
1327 			    ep->state == SCIFEP_DISCONNECTED ||
1328 			    ep->conn_err)
1329 				mask |= POLLOUT;
1330 			goto exit;
1331 		}
1332 	}
1333 
1334 	/* Endpoint is listening for incoming connection requests */
1335 	if (ep->state == SCIFEP_LISTENING) {
1336 		_scif_poll_wait(f, &ep->conwq, wait, ep);
1337 		if (ep->state == SCIFEP_LISTENING) {
1338 			if (ep->conreqcnt)
1339 				mask |= POLLIN;
1340 			goto exit;
1341 		}
1342 	}
1343 
1344 	/* Endpoint is connected or disconnected */
1345 	if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
1346 		if (poll_requested_events(wait) & POLLIN)
1347 			_scif_poll_wait(f, &ep->recvwq, wait, ep);
1348 		if (poll_requested_events(wait) & POLLOUT)
1349 			_scif_poll_wait(f, &ep->sendwq, wait, ep);
1350 		if (ep->state == SCIFEP_CONNECTED ||
1351 		    ep->state == SCIFEP_DISCONNECTED) {
1352 			/* Data can be read without blocking */
1353 			if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
1354 				mask |= POLLIN;
1355 			/* Data can be written without blocking */
1356 			if (scif_rb_space(&ep->qp_info.qp->outbound_q))
1357 				mask |= POLLOUT;
1358 			/* Return POLLHUP if endpoint is disconnected */
1359 			if (ep->state == SCIFEP_DISCONNECTED)
1360 				mask |= POLLHUP;
1361 			goto exit;
1362 		}
1363 	}
1364 
1365 	/* Return POLLERR if the endpoint is in none of the above states */
1366 	mask |= POLLERR;
1367 exit:
1368 	spin_unlock(&ep->lock);
1369 	return mask;
1370 }
1371 
1372 /**
1373  * scif_poll() - Kernel mode SCIF poll
1374  * @ufds: Array of scif_pollepd structures containing the end points
1375  *	  and events to poll on
1376  * @nfds: Size of the ufds array
1377  * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
1378  *
1379  * The code flow in this function is based on do_poll(..) in select.c
1380  *
1381  * Returns the number of endpoints which have pending events or 0 in
1382  * the event of a timeout. If a signal is used for wake up, -EINTR is
1383  * returned.
1384  */
1385 int
scif_poll(struct scif_pollepd * ufds,unsigned int nfds,long timeout_msecs)1386 scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
1387 {
1388 	struct poll_wqueues table;
1389 	poll_table *pt;
1390 	int i, mask, count = 0, timed_out = timeout_msecs == 0;
1391 	u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
1392 		: msecs_to_jiffies(timeout_msecs);
1393 
1394 	poll_initwait(&table);
1395 	pt = &table.pt;
1396 	while (1) {
1397 		for (i = 0; i < nfds; i++) {
1398 			pt->_key = ufds[i].events | POLLERR | POLLHUP;
1399 			mask = __scif_pollfd(ufds[i].epd->anon,
1400 					     pt, ufds[i].epd);
1401 			mask &= ufds[i].events | POLLERR | POLLHUP;
1402 			if (mask) {
1403 				count++;
1404 				pt->_qproc = NULL;
1405 			}
1406 			ufds[i].revents = mask;
1407 		}
1408 		pt->_qproc = NULL;
1409 		if (!count) {
1410 			count = table.error;
1411 			if (signal_pending(current))
1412 				count = -EINTR;
1413 		}
1414 		if (count || timed_out)
1415 			break;
1416 
1417 		if (!schedule_timeout_interruptible(timeout))
1418 			timed_out = 1;
1419 	}
1420 	poll_freewait(&table);
1421 	return count;
1422 }
1423 EXPORT_SYMBOL_GPL(scif_poll);
1424 
scif_get_node_ids(u16 * nodes,int len,u16 * self)1425 int scif_get_node_ids(u16 *nodes, int len, u16 *self)
1426 {
1427 	int online = 0;
1428 	int offset = 0;
1429 	int node;
1430 
1431 	if (!scif_is_mgmt_node())
1432 		scif_get_node_info();
1433 
1434 	*self = scif_info.nodeid;
1435 	mutex_lock(&scif_info.conflock);
1436 	len = min_t(int, len, scif_info.total);
1437 	for (node = 0; node <= scif_info.maxid; node++) {
1438 		if (_scifdev_alive(&scif_dev[node])) {
1439 			online++;
1440 			if (offset < len)
1441 				nodes[offset++] = node;
1442 		}
1443 	}
1444 	dev_dbg(scif_info.mdev.this_device,
1445 		"SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
1446 		scif_info.total, online, offset);
1447 	mutex_unlock(&scif_info.conflock);
1448 
1449 	return online;
1450 }
1451 EXPORT_SYMBOL_GPL(scif_get_node_ids);
1452 
scif_add_client_dev(struct device * dev,struct subsys_interface * si)1453 static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
1454 {
1455 	struct scif_client *client =
1456 		container_of(si, struct scif_client, si);
1457 	struct scif_peer_dev *spdev =
1458 		container_of(dev, struct scif_peer_dev, dev);
1459 
1460 	if (client->probe)
1461 		client->probe(spdev);
1462 	return 0;
1463 }
1464 
scif_remove_client_dev(struct device * dev,struct subsys_interface * si)1465 static void scif_remove_client_dev(struct device *dev,
1466 				   struct subsys_interface *si)
1467 {
1468 	struct scif_client *client =
1469 		container_of(si, struct scif_client, si);
1470 	struct scif_peer_dev *spdev =
1471 		container_of(dev, struct scif_peer_dev, dev);
1472 
1473 	if (client->remove)
1474 		client->remove(spdev);
1475 }
1476 
scif_client_unregister(struct scif_client * client)1477 void scif_client_unregister(struct scif_client *client)
1478 {
1479 	subsys_interface_unregister(&client->si);
1480 }
1481 EXPORT_SYMBOL_GPL(scif_client_unregister);
1482 
scif_client_register(struct scif_client * client)1483 int scif_client_register(struct scif_client *client)
1484 {
1485 	struct subsys_interface *si = &client->si;
1486 
1487 	si->name = client->name;
1488 	si->subsys = &scif_peer_bus;
1489 	si->add_dev = scif_add_client_dev;
1490 	si->remove_dev = scif_remove_client_dev;
1491 
1492 	return subsys_interface_register(&client->si);
1493 }
1494 EXPORT_SYMBOL_GPL(scif_client_register);
1495