1 /*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2014 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * Intel SCIF driver.
16 *
17 */
18 #include <linux/scif.h>
19 #include "scif_main.h"
20 #include "scif_map.h"
21
22 static const char * const scif_ep_states[] = {
23 "Unbound",
24 "Bound",
25 "Listening",
26 "Connected",
27 "Connecting",
28 "Mapping",
29 "Closing",
30 "Close Listening",
31 "Disconnected",
32 "Zombie"};
33
34 enum conn_async_state {
35 ASYNC_CONN_IDLE = 1, /* ep setup for async connect */
36 ASYNC_CONN_INPROGRESS, /* async connect in progress */
37 ASYNC_CONN_FLUSH_WORK /* async work flush in progress */
38 };
39
40 /*
41 * File operations for anonymous inode file associated with a SCIF endpoint,
42 * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
43 * poll API in the kernel and these take in a struct file *. Since a struct
44 * file is not available to kernel mode SCIF, it uses an anonymous file for
45 * this purpose.
46 */
47 const struct file_operations scif_anon_fops = {
48 .owner = THIS_MODULE,
49 };
50
scif_open(void)51 scif_epd_t scif_open(void)
52 {
53 struct scif_endpt *ep;
54 int err;
55
56 might_sleep();
57 ep = kzalloc(sizeof(*ep), GFP_KERNEL);
58 if (!ep)
59 goto err_ep_alloc;
60
61 ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL);
62 if (!ep->qp_info.qp)
63 goto err_qp_alloc;
64
65 err = scif_anon_inode_getfile(ep);
66 if (err)
67 goto err_anon_inode;
68
69 spin_lock_init(&ep->lock);
70 mutex_init(&ep->sendlock);
71 mutex_init(&ep->recvlock);
72
73 scif_rma_ep_init(ep);
74 ep->state = SCIFEP_UNBOUND;
75 dev_dbg(scif_info.mdev.this_device,
76 "SCIFAPI open: ep %p success\n", ep);
77 return ep;
78
79 err_anon_inode:
80 kfree(ep->qp_info.qp);
81 err_qp_alloc:
82 kfree(ep);
83 err_ep_alloc:
84 return NULL;
85 }
86 EXPORT_SYMBOL_GPL(scif_open);
87
88 /*
89 * scif_disconnect_ep - Disconnects the endpoint if found
90 * @epd: The end point returned from scif_open()
91 */
scif_disconnect_ep(struct scif_endpt * ep)92 static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep)
93 {
94 struct scifmsg msg;
95 struct scif_endpt *fep = NULL;
96 struct scif_endpt *tmpep;
97 struct list_head *pos, *tmpq;
98 int err;
99
100 /*
101 * Wake up any threads blocked in send()/recv() before closing
102 * out the connection. Grabbing and releasing the send/recv lock
103 * will ensure that any blocked senders/receivers have exited for
104 * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
105 * close. Ring 3 endpoints are not affected since close will not
106 * be called while there are IOCTLs executing.
107 */
108 wake_up_interruptible(&ep->sendwq);
109 wake_up_interruptible(&ep->recvwq);
110 mutex_lock(&ep->sendlock);
111 mutex_unlock(&ep->sendlock);
112 mutex_lock(&ep->recvlock);
113 mutex_unlock(&ep->recvlock);
114
115 /* Remove from the connected list */
116 mutex_lock(&scif_info.connlock);
117 list_for_each_safe(pos, tmpq, &scif_info.connected) {
118 tmpep = list_entry(pos, struct scif_endpt, list);
119 if (tmpep == ep) {
120 list_del(pos);
121 fep = tmpep;
122 spin_lock(&ep->lock);
123 break;
124 }
125 }
126
127 if (!fep) {
128 /*
129 * The other side has completed the disconnect before
130 * the end point can be removed from the list. Therefore
131 * the ep lock is not locked, traverse the disconnected
132 * list to find the endpoint and release the conn lock.
133 */
134 list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
135 tmpep = list_entry(pos, struct scif_endpt, list);
136 if (tmpep == ep) {
137 list_del(pos);
138 break;
139 }
140 }
141 mutex_unlock(&scif_info.connlock);
142 return NULL;
143 }
144
145 init_completion(&ep->discon);
146 msg.uop = SCIF_DISCNCT;
147 msg.src = ep->port;
148 msg.dst = ep->peer;
149 msg.payload[0] = (u64)ep;
150 msg.payload[1] = ep->remote_ep;
151
152 err = scif_nodeqp_send(ep->remote_dev, &msg);
153 spin_unlock(&ep->lock);
154 mutex_unlock(&scif_info.connlock);
155
156 if (!err)
157 /* Wait for the remote node to respond with SCIF_DISCNT_ACK */
158 wait_for_completion_timeout(&ep->discon,
159 SCIF_NODE_ALIVE_TIMEOUT);
160 return ep;
161 }
162
scif_close(scif_epd_t epd)163 int scif_close(scif_epd_t epd)
164 {
165 struct scif_endpt *ep = (struct scif_endpt *)epd;
166 struct scif_endpt *tmpep;
167 struct list_head *pos, *tmpq;
168 enum scif_epd_state oldstate;
169 bool flush_conn;
170
171 dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n",
172 ep, scif_ep_states[ep->state]);
173 might_sleep();
174 spin_lock(&ep->lock);
175 flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS);
176 spin_unlock(&ep->lock);
177
178 if (flush_conn)
179 flush_work(&scif_info.conn_work);
180
181 spin_lock(&ep->lock);
182 oldstate = ep->state;
183
184 ep->state = SCIFEP_CLOSING;
185
186 switch (oldstate) {
187 case SCIFEP_ZOMBIE:
188 dev_err(scif_info.mdev.this_device,
189 "SCIFAPI close: zombie state unexpected\n");
190 case SCIFEP_DISCONNECTED:
191 spin_unlock(&ep->lock);
192 scif_unregister_all_windows(epd);
193 /* Remove from the disconnected list */
194 mutex_lock(&scif_info.connlock);
195 list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
196 tmpep = list_entry(pos, struct scif_endpt, list);
197 if (tmpep == ep) {
198 list_del(pos);
199 break;
200 }
201 }
202 mutex_unlock(&scif_info.connlock);
203 break;
204 case SCIFEP_UNBOUND:
205 case SCIFEP_BOUND:
206 case SCIFEP_CONNECTING:
207 spin_unlock(&ep->lock);
208 break;
209 case SCIFEP_MAPPING:
210 case SCIFEP_CONNECTED:
211 case SCIFEP_CLOSING:
212 {
213 spin_unlock(&ep->lock);
214 scif_unregister_all_windows(epd);
215 scif_disconnect_ep(ep);
216 break;
217 }
218 case SCIFEP_LISTENING:
219 case SCIFEP_CLLISTEN:
220 {
221 struct scif_conreq *conreq;
222 struct scifmsg msg;
223 struct scif_endpt *aep;
224
225 spin_unlock(&ep->lock);
226 mutex_lock(&scif_info.eplock);
227
228 /* remove from listen list */
229 list_for_each_safe(pos, tmpq, &scif_info.listen) {
230 tmpep = list_entry(pos, struct scif_endpt, list);
231 if (tmpep == ep)
232 list_del(pos);
233 }
234 /* Remove any dangling accepts */
235 while (ep->acceptcnt) {
236 aep = list_first_entry(&ep->li_accept,
237 struct scif_endpt, liacceptlist);
238 list_del(&aep->liacceptlist);
239 scif_put_port(aep->port.port);
240 list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
241 tmpep = list_entry(pos, struct scif_endpt,
242 miacceptlist);
243 if (tmpep == aep) {
244 list_del(pos);
245 break;
246 }
247 }
248 mutex_unlock(&scif_info.eplock);
249 mutex_lock(&scif_info.connlock);
250 list_for_each_safe(pos, tmpq, &scif_info.connected) {
251 tmpep = list_entry(pos,
252 struct scif_endpt, list);
253 if (tmpep == aep) {
254 list_del(pos);
255 break;
256 }
257 }
258 list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
259 tmpep = list_entry(pos,
260 struct scif_endpt, list);
261 if (tmpep == aep) {
262 list_del(pos);
263 break;
264 }
265 }
266 mutex_unlock(&scif_info.connlock);
267 scif_teardown_ep(aep);
268 mutex_lock(&scif_info.eplock);
269 scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
270 ep->acceptcnt--;
271 }
272
273 spin_lock(&ep->lock);
274 mutex_unlock(&scif_info.eplock);
275
276 /* Remove and reject any pending connection requests. */
277 while (ep->conreqcnt) {
278 conreq = list_first_entry(&ep->conlist,
279 struct scif_conreq, list);
280 list_del(&conreq->list);
281
282 msg.uop = SCIF_CNCT_REJ;
283 msg.dst.node = conreq->msg.src.node;
284 msg.dst.port = conreq->msg.src.port;
285 msg.payload[0] = conreq->msg.payload[0];
286 msg.payload[1] = conreq->msg.payload[1];
287 /*
288 * No Error Handling on purpose for scif_nodeqp_send().
289 * If the remote node is lost we still want free the
290 * connection requests on the self node.
291 */
292 scif_nodeqp_send(&scif_dev[conreq->msg.src.node],
293 &msg);
294 ep->conreqcnt--;
295 kfree(conreq);
296 }
297
298 spin_unlock(&ep->lock);
299 /* If a kSCIF accept is waiting wake it up */
300 wake_up_interruptible(&ep->conwq);
301 break;
302 }
303 }
304 scif_put_port(ep->port.port);
305 scif_anon_inode_fput(ep);
306 scif_teardown_ep(ep);
307 scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
308 return 0;
309 }
310 EXPORT_SYMBOL_GPL(scif_close);
311
312 /**
313 * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
314 * accept new connections.
315 * @epd: The end point returned from scif_open()
316 */
__scif_flush(scif_epd_t epd)317 int __scif_flush(scif_epd_t epd)
318 {
319 struct scif_endpt *ep = (struct scif_endpt *)epd;
320
321 switch (ep->state) {
322 case SCIFEP_LISTENING:
323 {
324 ep->state = SCIFEP_CLLISTEN;
325
326 /* If an accept is waiting wake it up */
327 wake_up_interruptible(&ep->conwq);
328 break;
329 }
330 default:
331 break;
332 }
333 return 0;
334 }
335
scif_bind(scif_epd_t epd,u16 pn)336 int scif_bind(scif_epd_t epd, u16 pn)
337 {
338 struct scif_endpt *ep = (struct scif_endpt *)epd;
339 int ret = 0;
340 int tmp;
341
342 dev_dbg(scif_info.mdev.this_device,
343 "SCIFAPI bind: ep %p %s requested port number %d\n",
344 ep, scif_ep_states[ep->state], pn);
345 if (pn) {
346 /*
347 * Similar to IETF RFC 1700, SCIF ports below
348 * SCIF_ADMIN_PORT_END can only be bound by system (or root)
349 * processes or by processes executed by privileged users.
350 */
351 if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) {
352 ret = -EACCES;
353 goto scif_bind_admin_exit;
354 }
355 }
356
357 spin_lock(&ep->lock);
358 if (ep->state == SCIFEP_BOUND) {
359 ret = -EINVAL;
360 goto scif_bind_exit;
361 } else if (ep->state != SCIFEP_UNBOUND) {
362 ret = -EISCONN;
363 goto scif_bind_exit;
364 }
365
366 if (pn) {
367 tmp = scif_rsrv_port(pn);
368 if (tmp != pn) {
369 ret = -EINVAL;
370 goto scif_bind_exit;
371 }
372 } else {
373 ret = scif_get_new_port();
374 if (ret < 0)
375 goto scif_bind_exit;
376 pn = ret;
377 }
378
379 ep->state = SCIFEP_BOUND;
380 ep->port.node = scif_info.nodeid;
381 ep->port.port = pn;
382 ep->conn_async_state = ASYNC_CONN_IDLE;
383 ret = pn;
384 dev_dbg(scif_info.mdev.this_device,
385 "SCIFAPI bind: bound to port number %d\n", pn);
386 scif_bind_exit:
387 spin_unlock(&ep->lock);
388 scif_bind_admin_exit:
389 return ret;
390 }
391 EXPORT_SYMBOL_GPL(scif_bind);
392
scif_listen(scif_epd_t epd,int backlog)393 int scif_listen(scif_epd_t epd, int backlog)
394 {
395 struct scif_endpt *ep = (struct scif_endpt *)epd;
396
397 dev_dbg(scif_info.mdev.this_device,
398 "SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]);
399 spin_lock(&ep->lock);
400 switch (ep->state) {
401 case SCIFEP_ZOMBIE:
402 case SCIFEP_CLOSING:
403 case SCIFEP_CLLISTEN:
404 case SCIFEP_UNBOUND:
405 case SCIFEP_DISCONNECTED:
406 spin_unlock(&ep->lock);
407 return -EINVAL;
408 case SCIFEP_LISTENING:
409 case SCIFEP_CONNECTED:
410 case SCIFEP_CONNECTING:
411 case SCIFEP_MAPPING:
412 spin_unlock(&ep->lock);
413 return -EISCONN;
414 case SCIFEP_BOUND:
415 break;
416 }
417
418 ep->state = SCIFEP_LISTENING;
419 ep->backlog = backlog;
420
421 ep->conreqcnt = 0;
422 ep->acceptcnt = 0;
423 INIT_LIST_HEAD(&ep->conlist);
424 init_waitqueue_head(&ep->conwq);
425 INIT_LIST_HEAD(&ep->li_accept);
426 spin_unlock(&ep->lock);
427
428 /*
429 * Listen status is complete so delete the qp information not needed
430 * on a listen before placing on the list of listening ep's
431 */
432 scif_teardown_ep(ep);
433 ep->qp_info.qp = NULL;
434
435 mutex_lock(&scif_info.eplock);
436 list_add_tail(&ep->list, &scif_info.listen);
437 mutex_unlock(&scif_info.eplock);
438 return 0;
439 }
440 EXPORT_SYMBOL_GPL(scif_listen);
441
442 /*
443 ************************************************************************
444 * SCIF connection flow:
445 *
446 * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
447 * connections via a SCIF_CNCT_REQ message
448 * 2) A SCIF endpoint can initiate a SCIF connection by calling
449 * scif_connect(..) which calls scif_setup_qp_connect(..) which
450 * allocates the local qp for the endpoint ring buffer and then sends
451 * a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
452 * a SCIF_CNCT_REJ message
453 * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
454 * wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
455 * message otherwise
456 * 4) A thread blocked waiting for incoming connections allocates its local
457 * endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
458 * and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
459 * the node sends a SCIF_CNCT_REJ message
460 * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
461 * connecting endpoint is woken up as part of handling
462 * scif_cnctgnt_resp(..) following which it maps the remote endpoints'
463 * QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
464 * success or a SCIF_CNCT_GNTNACK message on failure and completes
465 * the scif_connect(..) API
466 * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
467 * in step 4 is woken up and completes the scif_accept(..) API
468 * 7) The SCIF connection is now established between the two SCIF endpoints.
469 */
scif_conn_func(struct scif_endpt * ep)470 static int scif_conn_func(struct scif_endpt *ep)
471 {
472 int err = 0;
473 struct scifmsg msg;
474 struct device *spdev;
475
476 err = scif_reserve_dma_chan(ep);
477 if (err) {
478 dev_err(&ep->remote_dev->sdev->dev,
479 "%s %d err %d\n", __func__, __LINE__, err);
480 ep->state = SCIFEP_BOUND;
481 goto connect_error_simple;
482 }
483 /* Initiate the first part of the endpoint QP setup */
484 err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
485 SCIF_ENDPT_QP_SIZE, ep->remote_dev);
486 if (err) {
487 dev_err(&ep->remote_dev->sdev->dev,
488 "%s err %d qp_offset 0x%llx\n",
489 __func__, err, ep->qp_info.qp_offset);
490 ep->state = SCIFEP_BOUND;
491 goto connect_error_simple;
492 }
493
494 spdev = scif_get_peer_dev(ep->remote_dev);
495 if (IS_ERR(spdev)) {
496 err = PTR_ERR(spdev);
497 goto cleanup_qp;
498 }
499 /* Format connect message and send it */
500 msg.src = ep->port;
501 msg.dst = ep->conn_port;
502 msg.uop = SCIF_CNCT_REQ;
503 msg.payload[0] = (u64)ep;
504 msg.payload[1] = ep->qp_info.qp_offset;
505 err = _scif_nodeqp_send(ep->remote_dev, &msg);
506 if (err)
507 goto connect_error_dec;
508 scif_put_peer_dev(spdev);
509 /*
510 * Wait for the remote node to respond with SCIF_CNCT_GNT or
511 * SCIF_CNCT_REJ message.
512 */
513 err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING,
514 SCIF_NODE_ALIVE_TIMEOUT);
515 if (!err) {
516 dev_err(&ep->remote_dev->sdev->dev,
517 "%s %d timeout\n", __func__, __LINE__);
518 ep->state = SCIFEP_BOUND;
519 }
520 spdev = scif_get_peer_dev(ep->remote_dev);
521 if (IS_ERR(spdev)) {
522 err = PTR_ERR(spdev);
523 goto cleanup_qp;
524 }
525 if (ep->state == SCIFEP_MAPPING) {
526 err = scif_setup_qp_connect_response(ep->remote_dev,
527 ep->qp_info.qp,
528 ep->qp_info.gnt_pld);
529 /*
530 * If the resource to map the queue are not available then
531 * we need to tell the other side to terminate the accept
532 */
533 if (err) {
534 dev_err(&ep->remote_dev->sdev->dev,
535 "%s %d err %d\n", __func__, __LINE__, err);
536 msg.uop = SCIF_CNCT_GNTNACK;
537 msg.payload[0] = ep->remote_ep;
538 _scif_nodeqp_send(ep->remote_dev, &msg);
539 ep->state = SCIFEP_BOUND;
540 goto connect_error_dec;
541 }
542
543 msg.uop = SCIF_CNCT_GNTACK;
544 msg.payload[0] = ep->remote_ep;
545 err = _scif_nodeqp_send(ep->remote_dev, &msg);
546 if (err) {
547 ep->state = SCIFEP_BOUND;
548 goto connect_error_dec;
549 }
550 ep->state = SCIFEP_CONNECTED;
551 mutex_lock(&scif_info.connlock);
552 list_add_tail(&ep->list, &scif_info.connected);
553 mutex_unlock(&scif_info.connlock);
554 dev_dbg(&ep->remote_dev->sdev->dev,
555 "SCIFAPI connect: ep %p connected\n", ep);
556 } else if (ep->state == SCIFEP_BOUND) {
557 dev_dbg(&ep->remote_dev->sdev->dev,
558 "SCIFAPI connect: ep %p connection refused\n", ep);
559 err = -ECONNREFUSED;
560 goto connect_error_dec;
561 }
562 scif_put_peer_dev(spdev);
563 return err;
564 connect_error_dec:
565 scif_put_peer_dev(spdev);
566 cleanup_qp:
567 scif_cleanup_ep_qp(ep);
568 connect_error_simple:
569 return err;
570 }
571
572 /*
573 * scif_conn_handler:
574 *
575 * Workqueue handler for servicing non-blocking SCIF connect
576 *
577 */
scif_conn_handler(struct work_struct * work)578 void scif_conn_handler(struct work_struct *work)
579 {
580 struct scif_endpt *ep;
581
582 do {
583 ep = NULL;
584 spin_lock(&scif_info.nb_connect_lock);
585 if (!list_empty(&scif_info.nb_connect_list)) {
586 ep = list_first_entry(&scif_info.nb_connect_list,
587 struct scif_endpt, conn_list);
588 list_del(&ep->conn_list);
589 }
590 spin_unlock(&scif_info.nb_connect_lock);
591 if (ep) {
592 ep->conn_err = scif_conn_func(ep);
593 wake_up_interruptible(&ep->conn_pend_wq);
594 }
595 } while (ep);
596 }
597
__scif_connect(scif_epd_t epd,struct scif_port_id * dst,bool non_block)598 int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
599 {
600 struct scif_endpt *ep = (struct scif_endpt *)epd;
601 int err = 0;
602 struct scif_dev *remote_dev;
603 struct device *spdev;
604
605 dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep,
606 scif_ep_states[ep->state]);
607
608 if (!scif_dev || dst->node > scif_info.maxid)
609 return -ENODEV;
610
611 might_sleep();
612
613 remote_dev = &scif_dev[dst->node];
614 spdev = scif_get_peer_dev(remote_dev);
615 if (IS_ERR(spdev)) {
616 err = PTR_ERR(spdev);
617 return err;
618 }
619
620 spin_lock(&ep->lock);
621 switch (ep->state) {
622 case SCIFEP_ZOMBIE:
623 case SCIFEP_CLOSING:
624 err = -EINVAL;
625 break;
626 case SCIFEP_DISCONNECTED:
627 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
628 ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
629 else
630 err = -EINVAL;
631 break;
632 case SCIFEP_LISTENING:
633 case SCIFEP_CLLISTEN:
634 err = -EOPNOTSUPP;
635 break;
636 case SCIFEP_CONNECTING:
637 case SCIFEP_MAPPING:
638 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
639 err = -EINPROGRESS;
640 else
641 err = -EISCONN;
642 break;
643 case SCIFEP_CONNECTED:
644 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
645 ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
646 else
647 err = -EISCONN;
648 break;
649 case SCIFEP_UNBOUND:
650 err = scif_get_new_port();
651 if (err < 0)
652 break;
653 ep->port.port = err;
654 ep->port.node = scif_info.nodeid;
655 ep->conn_async_state = ASYNC_CONN_IDLE;
656 /* Fall through */
657 case SCIFEP_BOUND:
658 /*
659 * If a non-blocking connect has been already initiated
660 * (conn_async_state is either ASYNC_CONN_INPROGRESS or
661 * ASYNC_CONN_FLUSH_WORK), the end point could end up in
662 * SCIF_BOUND due an error in the connection process
663 * (e.g., connection refused) If conn_async_state is
664 * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
665 * so that the error status can be collected. If the state is
666 * already ASYNC_CONN_FLUSH_WORK - then set the error to
667 * EINPROGRESS since some other thread is waiting to collect
668 * error status.
669 */
670 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
671 ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
672 } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
673 err = -EINPROGRESS;
674 } else {
675 ep->conn_port = *dst;
676 init_waitqueue_head(&ep->sendwq);
677 init_waitqueue_head(&ep->recvwq);
678 init_waitqueue_head(&ep->conwq);
679 ep->conn_async_state = 0;
680
681 if (unlikely(non_block))
682 ep->conn_async_state = ASYNC_CONN_INPROGRESS;
683 }
684 break;
685 }
686
687 if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
688 goto connect_simple_unlock1;
689
690 ep->state = SCIFEP_CONNECTING;
691 ep->remote_dev = &scif_dev[dst->node];
692 ep->qp_info.qp->magic = SCIFEP_MAGIC;
693 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
694 init_waitqueue_head(&ep->conn_pend_wq);
695 spin_lock(&scif_info.nb_connect_lock);
696 list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
697 spin_unlock(&scif_info.nb_connect_lock);
698 err = -EINPROGRESS;
699 schedule_work(&scif_info.conn_work);
700 }
701 connect_simple_unlock1:
702 spin_unlock(&ep->lock);
703 scif_put_peer_dev(spdev);
704 if (err) {
705 return err;
706 } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
707 flush_work(&scif_info.conn_work);
708 err = ep->conn_err;
709 spin_lock(&ep->lock);
710 ep->conn_async_state = ASYNC_CONN_IDLE;
711 spin_unlock(&ep->lock);
712 } else {
713 err = scif_conn_func(ep);
714 }
715 return err;
716 }
717
scif_connect(scif_epd_t epd,struct scif_port_id * dst)718 int scif_connect(scif_epd_t epd, struct scif_port_id *dst)
719 {
720 return __scif_connect(epd, dst, false);
721 }
722 EXPORT_SYMBOL_GPL(scif_connect);
723
724 /**
725 * scif_accept() - Accept a connection request from the remote node
726 *
727 * The function accepts a connection request from the remote node. Successful
728 * complete is indicate by a new end point being created and passed back
729 * to the caller for future reference.
730 *
731 * Upon successful complete a zero will be returned and the peer information
732 * will be filled in.
733 *
734 * If the end point is not in the listening state -EINVAL will be returned.
735 *
736 * If during the connection sequence resource allocation fails the -ENOMEM
737 * will be returned.
738 *
739 * If the function is called with the ASYNC flag set and no connection requests
740 * are pending it will return -EAGAIN.
741 *
742 * If the remote side is not sending any connection requests the caller may
743 * terminate this function with a signal. If so a -EINTR will be returned.
744 */
scif_accept(scif_epd_t epd,struct scif_port_id * peer,scif_epd_t * newepd,int flags)745 int scif_accept(scif_epd_t epd, struct scif_port_id *peer,
746 scif_epd_t *newepd, int flags)
747 {
748 struct scif_endpt *lep = (struct scif_endpt *)epd;
749 struct scif_endpt *cep;
750 struct scif_conreq *conreq;
751 struct scifmsg msg;
752 int err;
753 struct device *spdev;
754
755 dev_dbg(scif_info.mdev.this_device,
756 "SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]);
757
758 if (flags & ~SCIF_ACCEPT_SYNC)
759 return -EINVAL;
760
761 if (!peer || !newepd)
762 return -EINVAL;
763
764 might_sleep();
765 spin_lock(&lep->lock);
766 if (lep->state != SCIFEP_LISTENING) {
767 spin_unlock(&lep->lock);
768 return -EINVAL;
769 }
770
771 if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) {
772 /* No connection request present and we do not want to wait */
773 spin_unlock(&lep->lock);
774 return -EAGAIN;
775 }
776
777 lep->files = current->files;
778 retry_connection:
779 spin_unlock(&lep->lock);
780 /* Wait for the remote node to send us a SCIF_CNCT_REQ */
781 err = wait_event_interruptible(lep->conwq,
782 (lep->conreqcnt ||
783 (lep->state != SCIFEP_LISTENING)));
784 if (err)
785 return err;
786
787 if (lep->state != SCIFEP_LISTENING)
788 return -EINTR;
789
790 spin_lock(&lep->lock);
791
792 if (!lep->conreqcnt)
793 goto retry_connection;
794
795 /* Get the first connect request off the list */
796 conreq = list_first_entry(&lep->conlist, struct scif_conreq, list);
797 list_del(&conreq->list);
798 lep->conreqcnt--;
799 spin_unlock(&lep->lock);
800
801 /* Fill in the peer information */
802 peer->node = conreq->msg.src.node;
803 peer->port = conreq->msg.src.port;
804
805 cep = kzalloc(sizeof(*cep), GFP_KERNEL);
806 if (!cep) {
807 err = -ENOMEM;
808 goto scif_accept_error_epalloc;
809 }
810 spin_lock_init(&cep->lock);
811 mutex_init(&cep->sendlock);
812 mutex_init(&cep->recvlock);
813 cep->state = SCIFEP_CONNECTING;
814 cep->remote_dev = &scif_dev[peer->node];
815 cep->remote_ep = conreq->msg.payload[0];
816
817 scif_rma_ep_init(cep);
818
819 err = scif_reserve_dma_chan(cep);
820 if (err) {
821 dev_err(scif_info.mdev.this_device,
822 "%s %d err %d\n", __func__, __LINE__, err);
823 goto scif_accept_error_qpalloc;
824 }
825
826 cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
827 if (!cep->qp_info.qp) {
828 err = -ENOMEM;
829 goto scif_accept_error_qpalloc;
830 }
831
832 err = scif_anon_inode_getfile(cep);
833 if (err)
834 goto scif_accept_error_anon_inode;
835
836 cep->qp_info.qp->magic = SCIFEP_MAGIC;
837 spdev = scif_get_peer_dev(cep->remote_dev);
838 if (IS_ERR(spdev)) {
839 err = PTR_ERR(spdev);
840 goto scif_accept_error_map;
841 }
842 err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset,
843 conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE,
844 cep->remote_dev);
845 if (err) {
846 dev_dbg(&cep->remote_dev->sdev->dev,
847 "SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
848 lep, cep, err, cep->qp_info.qp_offset);
849 scif_put_peer_dev(spdev);
850 goto scif_accept_error_map;
851 }
852
853 cep->port.node = lep->port.node;
854 cep->port.port = lep->port.port;
855 cep->peer.node = peer->node;
856 cep->peer.port = peer->port;
857 init_waitqueue_head(&cep->sendwq);
858 init_waitqueue_head(&cep->recvwq);
859 init_waitqueue_head(&cep->conwq);
860
861 msg.uop = SCIF_CNCT_GNT;
862 msg.src = cep->port;
863 msg.payload[0] = cep->remote_ep;
864 msg.payload[1] = cep->qp_info.qp_offset;
865 msg.payload[2] = (u64)cep;
866
867 err = _scif_nodeqp_send(cep->remote_dev, &msg);
868 scif_put_peer_dev(spdev);
869 if (err)
870 goto scif_accept_error_map;
871 retry:
872 /* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
873 err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING,
874 SCIF_NODE_ACCEPT_TIMEOUT);
875 if (!err && scifdev_alive(cep))
876 goto retry;
877 err = !err ? -ENODEV : 0;
878 if (err)
879 goto scif_accept_error_map;
880 kfree(conreq);
881
882 spin_lock(&cep->lock);
883
884 if (cep->state == SCIFEP_CLOSING) {
885 /*
886 * Remote failed to allocate resources and NAKed the grant.
887 * There is at this point nothing referencing the new end point.
888 */
889 spin_unlock(&cep->lock);
890 scif_teardown_ep(cep);
891 kfree(cep);
892
893 /* If call with sync flag then go back and wait. */
894 if (flags & SCIF_ACCEPT_SYNC) {
895 spin_lock(&lep->lock);
896 goto retry_connection;
897 }
898 return -EAGAIN;
899 }
900
901 scif_get_port(cep->port.port);
902 *newepd = (scif_epd_t)cep;
903 spin_unlock(&cep->lock);
904 return 0;
905 scif_accept_error_map:
906 scif_anon_inode_fput(cep);
907 scif_accept_error_anon_inode:
908 scif_teardown_ep(cep);
909 scif_accept_error_qpalloc:
910 kfree(cep);
911 scif_accept_error_epalloc:
912 msg.uop = SCIF_CNCT_REJ;
913 msg.dst.node = conreq->msg.src.node;
914 msg.dst.port = conreq->msg.src.port;
915 msg.payload[0] = conreq->msg.payload[0];
916 msg.payload[1] = conreq->msg.payload[1];
917 scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg);
918 kfree(conreq);
919 return err;
920 }
921 EXPORT_SYMBOL_GPL(scif_accept);
922
923 /*
924 * scif_msg_param_check:
925 * @epd: The end point returned from scif_open()
926 * @len: Length to receive
927 * @flags: blocking or non blocking
928 *
929 * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
930 */
scif_msg_param_check(scif_epd_t epd,int len,int flags)931 static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags)
932 {
933 int ret = -EINVAL;
934
935 if (len < 0)
936 goto err_ret;
937 if (flags && (!(flags & SCIF_RECV_BLOCK)))
938 goto err_ret;
939 ret = 0;
940 err_ret:
941 return ret;
942 }
943
_scif_send(scif_epd_t epd,void * msg,int len,int flags)944 static int _scif_send(scif_epd_t epd, void *msg, int len, int flags)
945 {
946 struct scif_endpt *ep = (struct scif_endpt *)epd;
947 struct scifmsg notif_msg;
948 int curr_xfer_len = 0, sent_len = 0, write_count;
949 int ret = 0;
950 struct scif_qp *qp = ep->qp_info.qp;
951
952 if (flags & SCIF_SEND_BLOCK)
953 might_sleep();
954
955 spin_lock(&ep->lock);
956 while (sent_len != len && SCIFEP_CONNECTED == ep->state) {
957 write_count = scif_rb_space(&qp->outbound_q);
958 if (write_count) {
959 /* Best effort to send as much data as possible */
960 curr_xfer_len = min(len - sent_len, write_count);
961 ret = scif_rb_write(&qp->outbound_q, msg,
962 curr_xfer_len);
963 if (ret < 0)
964 break;
965 /* Success. Update write pointer */
966 scif_rb_commit(&qp->outbound_q);
967 /*
968 * Send a notification to the peer about the
969 * produced data message.
970 */
971 notif_msg.src = ep->port;
972 notif_msg.uop = SCIF_CLIENT_SENT;
973 notif_msg.payload[0] = ep->remote_ep;
974 ret = _scif_nodeqp_send(ep->remote_dev, ¬if_msg);
975 if (ret)
976 break;
977 sent_len += curr_xfer_len;
978 msg = msg + curr_xfer_len;
979 continue;
980 }
981 curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1);
982 /* Not enough RB space. return for the Non Blocking case */
983 if (!(flags & SCIF_SEND_BLOCK))
984 break;
985
986 spin_unlock(&ep->lock);
987 /* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
988 ret =
989 wait_event_interruptible(ep->sendwq,
990 (SCIFEP_CONNECTED != ep->state) ||
991 (scif_rb_space(&qp->outbound_q) >=
992 curr_xfer_len));
993 spin_lock(&ep->lock);
994 if (ret)
995 break;
996 }
997 if (sent_len)
998 ret = sent_len;
999 else if (!ret && SCIFEP_CONNECTED != ep->state)
1000 ret = SCIFEP_DISCONNECTED == ep->state ?
1001 -ECONNRESET : -ENOTCONN;
1002 spin_unlock(&ep->lock);
1003 return ret;
1004 }
1005
_scif_recv(scif_epd_t epd,void * msg,int len,int flags)1006 static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags)
1007 {
1008 int read_size;
1009 struct scif_endpt *ep = (struct scif_endpt *)epd;
1010 struct scifmsg notif_msg;
1011 int curr_recv_len = 0, remaining_len = len, read_count;
1012 int ret = 0;
1013 struct scif_qp *qp = ep->qp_info.qp;
1014
1015 if (flags & SCIF_RECV_BLOCK)
1016 might_sleep();
1017 spin_lock(&ep->lock);
1018 while (remaining_len && (SCIFEP_CONNECTED == ep->state ||
1019 SCIFEP_DISCONNECTED == ep->state)) {
1020 read_count = scif_rb_count(&qp->inbound_q, remaining_len);
1021 if (read_count) {
1022 /*
1023 * Best effort to recv as much data as there
1024 * are bytes to read in the RB particularly
1025 * important for the Non Blocking case.
1026 */
1027 curr_recv_len = min(remaining_len, read_count);
1028 read_size = scif_rb_get_next(&qp->inbound_q,
1029 msg, curr_recv_len);
1030 if (ep->state == SCIFEP_CONNECTED) {
1031 /*
1032 * Update the read pointer only if the endpoint
1033 * is still connected else the read pointer
1034 * might no longer exist since the peer has
1035 * freed resources!
1036 */
1037 scif_rb_update_read_ptr(&qp->inbound_q);
1038 /*
1039 * Send a notification to the peer about the
1040 * consumed data message only if the EP is in
1041 * SCIFEP_CONNECTED state.
1042 */
1043 notif_msg.src = ep->port;
1044 notif_msg.uop = SCIF_CLIENT_RCVD;
1045 notif_msg.payload[0] = ep->remote_ep;
1046 ret = _scif_nodeqp_send(ep->remote_dev,
1047 ¬if_msg);
1048 if (ret)
1049 break;
1050 }
1051 remaining_len -= curr_recv_len;
1052 msg = msg + curr_recv_len;
1053 continue;
1054 }
1055 /*
1056 * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
1057 * we will keep looping forever.
1058 */
1059 if (ep->state == SCIFEP_DISCONNECTED)
1060 break;
1061 /*
1062 * Return in the Non Blocking case if there is no data
1063 * to read in this iteration.
1064 */
1065 if (!(flags & SCIF_RECV_BLOCK))
1066 break;
1067 curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1);
1068 spin_unlock(&ep->lock);
1069 /*
1070 * Wait for a SCIF_CLIENT_SEND message in the blocking case
1071 * or until other side disconnects.
1072 */
1073 ret =
1074 wait_event_interruptible(ep->recvwq,
1075 SCIFEP_CONNECTED != ep->state ||
1076 scif_rb_count(&qp->inbound_q,
1077 curr_recv_len)
1078 >= curr_recv_len);
1079 spin_lock(&ep->lock);
1080 if (ret)
1081 break;
1082 }
1083 if (len - remaining_len)
1084 ret = len - remaining_len;
1085 else if (!ret && ep->state != SCIFEP_CONNECTED)
1086 ret = ep->state == SCIFEP_DISCONNECTED ?
1087 -ECONNRESET : -ENOTCONN;
1088 spin_unlock(&ep->lock);
1089 return ret;
1090 }
1091
1092 /**
1093 * scif_user_send() - Send data to connection queue
1094 * @epd: The end point returned from scif_open()
1095 * @msg: Address to place data
1096 * @len: Length to receive
1097 * @flags: blocking or non blocking
1098 *
1099 * This function is called from the driver IOCTL entry point
1100 * only and is a wrapper for _scif_send().
1101 */
scif_user_send(scif_epd_t epd,void __user * msg,int len,int flags)1102 int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags)
1103 {
1104 struct scif_endpt *ep = (struct scif_endpt *)epd;
1105 int err = 0;
1106 int sent_len = 0;
1107 char *tmp;
1108 int loop_len;
1109 int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1110
1111 dev_dbg(scif_info.mdev.this_device,
1112 "SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1113 if (!len)
1114 return 0;
1115
1116 err = scif_msg_param_check(epd, len, flags);
1117 if (err)
1118 goto send_err;
1119
1120 tmp = kmalloc(chunk_len, GFP_KERNEL);
1121 if (!tmp) {
1122 err = -ENOMEM;
1123 goto send_err;
1124 }
1125 /*
1126 * Grabbing the lock before breaking up the transfer in
1127 * multiple chunks is required to ensure that messages do
1128 * not get fragmented and reordered.
1129 */
1130 mutex_lock(&ep->sendlock);
1131 while (sent_len != len) {
1132 loop_len = len - sent_len;
1133 loop_len = min(chunk_len, loop_len);
1134 if (copy_from_user(tmp, msg, loop_len)) {
1135 err = -EFAULT;
1136 goto send_free_err;
1137 }
1138 err = _scif_send(epd, tmp, loop_len, flags);
1139 if (err < 0)
1140 goto send_free_err;
1141 sent_len += err;
1142 msg += err;
1143 if (err != loop_len)
1144 goto send_free_err;
1145 }
1146 send_free_err:
1147 mutex_unlock(&ep->sendlock);
1148 kfree(tmp);
1149 send_err:
1150 return err < 0 ? err : sent_len;
1151 }
1152
1153 /**
1154 * scif_user_recv() - Receive data from connection queue
1155 * @epd: The end point returned from scif_open()
1156 * @msg: Address to place data
1157 * @len: Length to receive
1158 * @flags: blocking or non blocking
1159 *
1160 * This function is called from the driver IOCTL entry point
1161 * only and is a wrapper for _scif_recv().
1162 */
scif_user_recv(scif_epd_t epd,void __user * msg,int len,int flags)1163 int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags)
1164 {
1165 struct scif_endpt *ep = (struct scif_endpt *)epd;
1166 int err = 0;
1167 int recv_len = 0;
1168 char *tmp;
1169 int loop_len;
1170 int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1171
1172 dev_dbg(scif_info.mdev.this_device,
1173 "SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1174 if (!len)
1175 return 0;
1176
1177 err = scif_msg_param_check(epd, len, flags);
1178 if (err)
1179 goto recv_err;
1180
1181 tmp = kmalloc(chunk_len, GFP_KERNEL);
1182 if (!tmp) {
1183 err = -ENOMEM;
1184 goto recv_err;
1185 }
1186 /*
1187 * Grabbing the lock before breaking up the transfer in
1188 * multiple chunks is required to ensure that messages do
1189 * not get fragmented and reordered.
1190 */
1191 mutex_lock(&ep->recvlock);
1192 while (recv_len != len) {
1193 loop_len = len - recv_len;
1194 loop_len = min(chunk_len, loop_len);
1195 err = _scif_recv(epd, tmp, loop_len, flags);
1196 if (err < 0)
1197 goto recv_free_err;
1198 if (copy_to_user(msg, tmp, err)) {
1199 err = -EFAULT;
1200 goto recv_free_err;
1201 }
1202 recv_len += err;
1203 msg += err;
1204 if (err != loop_len)
1205 goto recv_free_err;
1206 }
1207 recv_free_err:
1208 mutex_unlock(&ep->recvlock);
1209 kfree(tmp);
1210 recv_err:
1211 return err < 0 ? err : recv_len;
1212 }
1213
1214 /**
1215 * scif_send() - Send data to connection queue
1216 * @epd: The end point returned from scif_open()
1217 * @msg: Address to place data
1218 * @len: Length to receive
1219 * @flags: blocking or non blocking
1220 *
1221 * This function is called from the kernel mode only and is
1222 * a wrapper for _scif_send().
1223 */
scif_send(scif_epd_t epd,void * msg,int len,int flags)1224 int scif_send(scif_epd_t epd, void *msg, int len, int flags)
1225 {
1226 struct scif_endpt *ep = (struct scif_endpt *)epd;
1227 int ret;
1228
1229 dev_dbg(scif_info.mdev.this_device,
1230 "SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1231 if (!len)
1232 return 0;
1233
1234 ret = scif_msg_param_check(epd, len, flags);
1235 if (ret)
1236 return ret;
1237 if (!ep->remote_dev)
1238 return -ENOTCONN;
1239 /*
1240 * Grab the mutex lock in the blocking case only
1241 * to ensure messages do not get fragmented/reordered.
1242 * The non blocking mode is protected using spin locks
1243 * in _scif_send().
1244 */
1245 if (flags & SCIF_SEND_BLOCK)
1246 mutex_lock(&ep->sendlock);
1247
1248 ret = _scif_send(epd, msg, len, flags);
1249
1250 if (flags & SCIF_SEND_BLOCK)
1251 mutex_unlock(&ep->sendlock);
1252 return ret;
1253 }
1254 EXPORT_SYMBOL_GPL(scif_send);
1255
1256 /**
1257 * scif_recv() - Receive data from connection queue
1258 * @epd: The end point returned from scif_open()
1259 * @msg: Address to place data
1260 * @len: Length to receive
1261 * @flags: blocking or non blocking
1262 *
1263 * This function is called from the kernel mode only and is
1264 * a wrapper for _scif_recv().
1265 */
scif_recv(scif_epd_t epd,void * msg,int len,int flags)1266 int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
1267 {
1268 struct scif_endpt *ep = (struct scif_endpt *)epd;
1269 int ret;
1270
1271 dev_dbg(scif_info.mdev.this_device,
1272 "SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1273 if (!len)
1274 return 0;
1275
1276 ret = scif_msg_param_check(epd, len, flags);
1277 if (ret)
1278 return ret;
1279 /*
1280 * Grab the mutex lock in the blocking case only
1281 * to ensure messages do not get fragmented/reordered.
1282 * The non blocking mode is protected using spin locks
1283 * in _scif_send().
1284 */
1285 if (flags & SCIF_RECV_BLOCK)
1286 mutex_lock(&ep->recvlock);
1287
1288 ret = _scif_recv(epd, msg, len, flags);
1289
1290 if (flags & SCIF_RECV_BLOCK)
1291 mutex_unlock(&ep->recvlock);
1292
1293 return ret;
1294 }
1295 EXPORT_SYMBOL_GPL(scif_recv);
1296
_scif_poll_wait(struct file * f,wait_queue_head_t * wq,poll_table * p,struct scif_endpt * ep)1297 static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
1298 poll_table *p, struct scif_endpt *ep)
1299 {
1300 /*
1301 * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
1302 * and regrab it afterwards. Because the endpoint state might have
1303 * changed while the lock was given up, the state must be checked
1304 * again after re-acquiring the lock. The code in __scif_pollfd(..)
1305 * does this.
1306 */
1307 spin_unlock(&ep->lock);
1308 poll_wait(f, wq, p);
1309 spin_lock(&ep->lock);
1310 }
1311
1312 unsigned int
__scif_pollfd(struct file * f,poll_table * wait,struct scif_endpt * ep)1313 __scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
1314 {
1315 unsigned int mask = 0;
1316
1317 dev_dbg(scif_info.mdev.this_device,
1318 "SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
1319
1320 spin_lock(&ep->lock);
1321
1322 /* Endpoint is waiting for a non-blocking connect to complete */
1323 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1324 _scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
1325 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1326 if (ep->state == SCIFEP_CONNECTED ||
1327 ep->state == SCIFEP_DISCONNECTED ||
1328 ep->conn_err)
1329 mask |= POLLOUT;
1330 goto exit;
1331 }
1332 }
1333
1334 /* Endpoint is listening for incoming connection requests */
1335 if (ep->state == SCIFEP_LISTENING) {
1336 _scif_poll_wait(f, &ep->conwq, wait, ep);
1337 if (ep->state == SCIFEP_LISTENING) {
1338 if (ep->conreqcnt)
1339 mask |= POLLIN;
1340 goto exit;
1341 }
1342 }
1343
1344 /* Endpoint is connected or disconnected */
1345 if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
1346 if (poll_requested_events(wait) & POLLIN)
1347 _scif_poll_wait(f, &ep->recvwq, wait, ep);
1348 if (poll_requested_events(wait) & POLLOUT)
1349 _scif_poll_wait(f, &ep->sendwq, wait, ep);
1350 if (ep->state == SCIFEP_CONNECTED ||
1351 ep->state == SCIFEP_DISCONNECTED) {
1352 /* Data can be read without blocking */
1353 if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
1354 mask |= POLLIN;
1355 /* Data can be written without blocking */
1356 if (scif_rb_space(&ep->qp_info.qp->outbound_q))
1357 mask |= POLLOUT;
1358 /* Return POLLHUP if endpoint is disconnected */
1359 if (ep->state == SCIFEP_DISCONNECTED)
1360 mask |= POLLHUP;
1361 goto exit;
1362 }
1363 }
1364
1365 /* Return POLLERR if the endpoint is in none of the above states */
1366 mask |= POLLERR;
1367 exit:
1368 spin_unlock(&ep->lock);
1369 return mask;
1370 }
1371
1372 /**
1373 * scif_poll() - Kernel mode SCIF poll
1374 * @ufds: Array of scif_pollepd structures containing the end points
1375 * and events to poll on
1376 * @nfds: Size of the ufds array
1377 * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
1378 *
1379 * The code flow in this function is based on do_poll(..) in select.c
1380 *
1381 * Returns the number of endpoints which have pending events or 0 in
1382 * the event of a timeout. If a signal is used for wake up, -EINTR is
1383 * returned.
1384 */
1385 int
scif_poll(struct scif_pollepd * ufds,unsigned int nfds,long timeout_msecs)1386 scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
1387 {
1388 struct poll_wqueues table;
1389 poll_table *pt;
1390 int i, mask, count = 0, timed_out = timeout_msecs == 0;
1391 u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
1392 : msecs_to_jiffies(timeout_msecs);
1393
1394 poll_initwait(&table);
1395 pt = &table.pt;
1396 while (1) {
1397 for (i = 0; i < nfds; i++) {
1398 pt->_key = ufds[i].events | POLLERR | POLLHUP;
1399 mask = __scif_pollfd(ufds[i].epd->anon,
1400 pt, ufds[i].epd);
1401 mask &= ufds[i].events | POLLERR | POLLHUP;
1402 if (mask) {
1403 count++;
1404 pt->_qproc = NULL;
1405 }
1406 ufds[i].revents = mask;
1407 }
1408 pt->_qproc = NULL;
1409 if (!count) {
1410 count = table.error;
1411 if (signal_pending(current))
1412 count = -EINTR;
1413 }
1414 if (count || timed_out)
1415 break;
1416
1417 if (!schedule_timeout_interruptible(timeout))
1418 timed_out = 1;
1419 }
1420 poll_freewait(&table);
1421 return count;
1422 }
1423 EXPORT_SYMBOL_GPL(scif_poll);
1424
scif_get_node_ids(u16 * nodes,int len,u16 * self)1425 int scif_get_node_ids(u16 *nodes, int len, u16 *self)
1426 {
1427 int online = 0;
1428 int offset = 0;
1429 int node;
1430
1431 if (!scif_is_mgmt_node())
1432 scif_get_node_info();
1433
1434 *self = scif_info.nodeid;
1435 mutex_lock(&scif_info.conflock);
1436 len = min_t(int, len, scif_info.total);
1437 for (node = 0; node <= scif_info.maxid; node++) {
1438 if (_scifdev_alive(&scif_dev[node])) {
1439 online++;
1440 if (offset < len)
1441 nodes[offset++] = node;
1442 }
1443 }
1444 dev_dbg(scif_info.mdev.this_device,
1445 "SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
1446 scif_info.total, online, offset);
1447 mutex_unlock(&scif_info.conflock);
1448
1449 return online;
1450 }
1451 EXPORT_SYMBOL_GPL(scif_get_node_ids);
1452
scif_add_client_dev(struct device * dev,struct subsys_interface * si)1453 static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
1454 {
1455 struct scif_client *client =
1456 container_of(si, struct scif_client, si);
1457 struct scif_peer_dev *spdev =
1458 container_of(dev, struct scif_peer_dev, dev);
1459
1460 if (client->probe)
1461 client->probe(spdev);
1462 return 0;
1463 }
1464
scif_remove_client_dev(struct device * dev,struct subsys_interface * si)1465 static void scif_remove_client_dev(struct device *dev,
1466 struct subsys_interface *si)
1467 {
1468 struct scif_client *client =
1469 container_of(si, struct scif_client, si);
1470 struct scif_peer_dev *spdev =
1471 container_of(dev, struct scif_peer_dev, dev);
1472
1473 if (client->remove)
1474 client->remove(spdev);
1475 }
1476
scif_client_unregister(struct scif_client * client)1477 void scif_client_unregister(struct scif_client *client)
1478 {
1479 subsys_interface_unregister(&client->si);
1480 }
1481 EXPORT_SYMBOL_GPL(scif_client_unregister);
1482
scif_client_register(struct scif_client * client)1483 int scif_client_register(struct scif_client *client)
1484 {
1485 struct subsys_interface *si = &client->si;
1486
1487 si->name = client->name;
1488 si->subsys = &scif_peer_bus;
1489 si->add_dev = scif_add_client_dev;
1490 si->remove_dev = scif_remove_client_dev;
1491
1492 return subsys_interface_register(&client->si);
1493 }
1494 EXPORT_SYMBOL_GPL(scif_client_register);
1495