1 /*
2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19 FILE_LICENCE ( GPL2_OR_LATER );
20
21 #include <stdint.h>
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <byteswap.h>
27 #include <errno.h>
28 #include <assert.h>
29 #include <gpxe/list.h>
30 #include <gpxe/errortab.h>
31 #include <gpxe/if_arp.h>
32 #include <gpxe/netdevice.h>
33 #include <gpxe/iobuf.h>
34 #include <gpxe/ipoib.h>
35 #include <gpxe/process.h>
36 #include <gpxe/infiniband.h>
37 #include <gpxe/ib_mi.h>
38 #include <gpxe/ib_sma.h>
39
40 /** @file
41 *
42 * Infiniband protocol
43 *
44 */
45
46 /** List of Infiniband devices */
47 struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
48
49 /** List of open Infiniband devices, in reverse order of opening */
50 static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
51
52 /* Disambiguate the various possible EINPROGRESSes */
53 #define EINPROGRESS_INIT ( EINPROGRESS | EUNIQ_01 )
54 #define EINPROGRESS_ARMED ( EINPROGRESS | EUNIQ_02 )
55
56 /** Human-readable message for the link statuses */
57 struct errortab infiniband_errors[] __errortab = {
58 { EINPROGRESS_INIT, "Initialising" },
59 { EINPROGRESS_ARMED, "Armed" },
60 };
61
62 /***************************************************************************
63 *
64 * Completion queues
65 *
66 ***************************************************************************
67 */
68
69 /**
70 * Create completion queue
71 *
72 * @v ibdev Infiniband device
73 * @v num_cqes Number of completion queue entries
74 * @v op Completion queue operations
75 * @ret cq New completion queue
76 */
77 struct ib_completion_queue *
ib_create_cq(struct ib_device * ibdev,unsigned int num_cqes,struct ib_completion_queue_operations * op)78 ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
79 struct ib_completion_queue_operations *op ) {
80 struct ib_completion_queue *cq;
81 int rc;
82
83 DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
84
85 /* Allocate and initialise data structure */
86 cq = zalloc ( sizeof ( *cq ) );
87 if ( ! cq )
88 goto err_alloc_cq;
89 cq->ibdev = ibdev;
90 list_add ( &cq->list, &ibdev->cqs );
91 cq->num_cqes = num_cqes;
92 INIT_LIST_HEAD ( &cq->work_queues );
93 cq->op = op;
94
95 /* Perform device-specific initialisation and get CQN */
96 if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
97 DBGC ( ibdev, "IBDEV %p could not initialise completion "
98 "queue: %s\n", ibdev, strerror ( rc ) );
99 goto err_dev_create_cq;
100 }
101
102 DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
103 "with CQN %#lx\n", ibdev, num_cqes, cq,
104 ib_cq_get_drvdata ( cq ), cq->cqn );
105 return cq;
106
107 ibdev->op->destroy_cq ( ibdev, cq );
108 err_dev_create_cq:
109 list_del ( &cq->list );
110 free ( cq );
111 err_alloc_cq:
112 return NULL;
113 }
114
115 /**
116 * Destroy completion queue
117 *
118 * @v ibdev Infiniband device
119 * @v cq Completion queue
120 */
ib_destroy_cq(struct ib_device * ibdev,struct ib_completion_queue * cq)121 void ib_destroy_cq ( struct ib_device *ibdev,
122 struct ib_completion_queue *cq ) {
123 DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
124 ibdev, cq->cqn );
125 assert ( list_empty ( &cq->work_queues ) );
126 ibdev->op->destroy_cq ( ibdev, cq );
127 list_del ( &cq->list );
128 free ( cq );
129 }
130
131 /**
132 * Poll completion queue
133 *
134 * @v ibdev Infiniband device
135 * @v cq Completion queue
136 */
ib_poll_cq(struct ib_device * ibdev,struct ib_completion_queue * cq)137 void ib_poll_cq ( struct ib_device *ibdev,
138 struct ib_completion_queue *cq ) {
139 struct ib_work_queue *wq;
140
141 /* Poll completion queue */
142 ibdev->op->poll_cq ( ibdev, cq );
143
144 /* Refill receive work queues */
145 list_for_each_entry ( wq, &cq->work_queues, list ) {
146 if ( ! wq->is_send )
147 ib_refill_recv ( ibdev, wq->qp );
148 }
149 }
150
151 /***************************************************************************
152 *
153 * Work queues
154 *
155 ***************************************************************************
156 */
157
158 /**
159 * Create queue pair
160 *
161 * @v ibdev Infiniband device
162 * @v type Queue pair type
163 * @v num_send_wqes Number of send work queue entries
164 * @v send_cq Send completion queue
165 * @v num_recv_wqes Number of receive work queue entries
166 * @v recv_cq Receive completion queue
167 * @ret qp Queue pair
168 *
169 * The queue pair will be left in the INIT state; you must call
170 * ib_modify_qp() before it is ready to use for sending and receiving.
171 */
ib_create_qp(struct ib_device * ibdev,enum ib_queue_pair_type type,unsigned int num_send_wqes,struct ib_completion_queue * send_cq,unsigned int num_recv_wqes,struct ib_completion_queue * recv_cq)172 struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
173 enum ib_queue_pair_type type,
174 unsigned int num_send_wqes,
175 struct ib_completion_queue *send_cq,
176 unsigned int num_recv_wqes,
177 struct ib_completion_queue *recv_cq ) {
178 struct ib_queue_pair *qp;
179 size_t total_size;
180 int rc;
181
182 DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
183
184 /* Allocate and initialise data structure */
185 total_size = ( sizeof ( *qp ) +
186 ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
187 ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
188 qp = zalloc ( total_size );
189 if ( ! qp )
190 goto err_alloc_qp;
191 qp->ibdev = ibdev;
192 list_add ( &qp->list, &ibdev->qps );
193 qp->type = type;
194 qp->send.qp = qp;
195 qp->send.is_send = 1;
196 qp->send.cq = send_cq;
197 list_add ( &qp->send.list, &send_cq->work_queues );
198 qp->send.psn = ( random() & 0xffffffUL );
199 qp->send.num_wqes = num_send_wqes;
200 qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
201 qp->recv.qp = qp;
202 qp->recv.cq = recv_cq;
203 list_add ( &qp->recv.list, &recv_cq->work_queues );
204 qp->recv.psn = ( random() & 0xffffffUL );
205 qp->recv.num_wqes = num_recv_wqes;
206 qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
207 ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
208 INIT_LIST_HEAD ( &qp->mgids );
209
210 /* Perform device-specific initialisation and get QPN */
211 if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
212 DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
213 "%s\n", ibdev, strerror ( rc ) );
214 goto err_dev_create_qp;
215 }
216 DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
217 ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
218 DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
219 ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
220 qp->recv.iobufs );
221 DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
222 ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
223 ( ( ( void * ) qp ) + total_size ) );
224
225 /* Calculate externally-visible QPN */
226 switch ( type ) {
227 case IB_QPT_SMI:
228 qp->ext_qpn = IB_QPN_SMI;
229 break;
230 case IB_QPT_GSI:
231 qp->ext_qpn = IB_QPN_GSI;
232 break;
233 default:
234 qp->ext_qpn = qp->qpn;
235 break;
236 }
237 if ( qp->ext_qpn != qp->qpn ) {
238 DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n",
239 ibdev, qp->qpn, qp->ext_qpn );
240 }
241
242 return qp;
243
244 ibdev->op->destroy_qp ( ibdev, qp );
245 err_dev_create_qp:
246 list_del ( &qp->send.list );
247 list_del ( &qp->recv.list );
248 list_del ( &qp->list );
249 free ( qp );
250 err_alloc_qp:
251 return NULL;
252 }
253
254 /**
255 * Modify queue pair
256 *
257 * @v ibdev Infiniband device
258 * @v qp Queue pair
259 * @v av New address vector, if applicable
260 * @ret rc Return status code
261 */
ib_modify_qp(struct ib_device * ibdev,struct ib_queue_pair * qp)262 int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
263 int rc;
264
265 DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
266
267 if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
268 DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
269 ibdev, qp->qpn, strerror ( rc ) );
270 return rc;
271 }
272
273 return 0;
274 }
275
276 /**
277 * Destroy queue pair
278 *
279 * @v ibdev Infiniband device
280 * @v qp Queue pair
281 */
ib_destroy_qp(struct ib_device * ibdev,struct ib_queue_pair * qp)282 void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
283 struct io_buffer *iobuf;
284 unsigned int i;
285
286 DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
287 ibdev, qp->qpn );
288
289 assert ( list_empty ( &qp->mgids ) );
290
291 /* Perform device-specific destruction */
292 ibdev->op->destroy_qp ( ibdev, qp );
293
294 /* Complete any remaining I/O buffers with errors */
295 for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
296 if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
297 ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
298 }
299 for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
300 if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
301 ib_complete_recv ( ibdev, qp, NULL, iobuf,
302 -ECANCELED );
303 }
304 }
305
306 /* Remove work queues from completion queue */
307 list_del ( &qp->send.list );
308 list_del ( &qp->recv.list );
309
310 /* Free QP */
311 list_del ( &qp->list );
312 free ( qp );
313 }
314
315 /**
316 * Find queue pair by QPN
317 *
318 * @v ibdev Infiniband device
319 * @v qpn Queue pair number
320 * @ret qp Queue pair, or NULL
321 */
ib_find_qp_qpn(struct ib_device * ibdev,unsigned long qpn)322 struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
323 unsigned long qpn ) {
324 struct ib_queue_pair *qp;
325
326 list_for_each_entry ( qp, &ibdev->qps, list ) {
327 if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
328 return qp;
329 }
330 return NULL;
331 }
332
333 /**
334 * Find queue pair by multicast GID
335 *
336 * @v ibdev Infiniband device
337 * @v gid Multicast GID
338 * @ret qp Queue pair, or NULL
339 */
ib_find_qp_mgid(struct ib_device * ibdev,struct ib_gid * gid)340 struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
341 struct ib_gid *gid ) {
342 struct ib_queue_pair *qp;
343 struct ib_multicast_gid *mgid;
344
345 list_for_each_entry ( qp, &ibdev->qps, list ) {
346 list_for_each_entry ( mgid, &qp->mgids, list ) {
347 if ( memcmp ( &mgid->gid, gid,
348 sizeof ( mgid->gid ) ) == 0 ) {
349 return qp;
350 }
351 }
352 }
353 return NULL;
354 }
355
356 /**
357 * Find work queue belonging to completion queue
358 *
359 * @v cq Completion queue
360 * @v qpn Queue pair number
361 * @v is_send Find send work queue (rather than receive)
362 * @ret wq Work queue, or NULL if not found
363 */
ib_find_wq(struct ib_completion_queue * cq,unsigned long qpn,int is_send)364 struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
365 unsigned long qpn, int is_send ) {
366 struct ib_work_queue *wq;
367
368 list_for_each_entry ( wq, &cq->work_queues, list ) {
369 if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
370 return wq;
371 }
372 return NULL;
373 }
374
375 /**
376 * Post send work queue entry
377 *
378 * @v ibdev Infiniband device
379 * @v qp Queue pair
380 * @v av Address vector
381 * @v iobuf I/O buffer
382 * @ret rc Return status code
383 */
ib_post_send(struct ib_device * ibdev,struct ib_queue_pair * qp,struct ib_address_vector * av,struct io_buffer * iobuf)384 int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
385 struct ib_address_vector *av,
386 struct io_buffer *iobuf ) {
387 struct ib_address_vector av_copy;
388 int rc;
389
390 /* Check queue fill level */
391 if ( qp->send.fill >= qp->send.num_wqes ) {
392 DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n",
393 ibdev, qp->qpn );
394 return -ENOBUFS;
395 }
396
397 /* Use default address vector if none specified */
398 if ( ! av )
399 av = &qp->av;
400
401 /* Make modifiable copy of address vector */
402 memcpy ( &av_copy, av, sizeof ( av_copy ) );
403 av = &av_copy;
404
405 /* Fill in optional parameters in address vector */
406 if ( ! av->qkey )
407 av->qkey = qp->qkey;
408 if ( ! av->rate )
409 av->rate = IB_RATE_2_5;
410
411 /* Post to hardware */
412 if ( ( rc = ibdev->op->post_send ( ibdev, qp, av, iobuf ) ) != 0 ) {
413 DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: "
414 "%s\n", ibdev, qp->qpn, strerror ( rc ) );
415 return rc;
416 }
417
418 qp->send.fill++;
419 return 0;
420 }
421
422 /**
423 * Post receive work queue entry
424 *
425 * @v ibdev Infiniband device
426 * @v qp Queue pair
427 * @v iobuf I/O buffer
428 * @ret rc Return status code
429 */
ib_post_recv(struct ib_device * ibdev,struct ib_queue_pair * qp,struct io_buffer * iobuf)430 int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
431 struct io_buffer *iobuf ) {
432 int rc;
433
434 /* Check packet length */
435 if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
436 DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n",
437 ibdev, qp->qpn, iob_tailroom ( iobuf ) );
438 return -EINVAL;
439 }
440
441 /* Check queue fill level */
442 if ( qp->recv.fill >= qp->recv.num_wqes ) {
443 DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n",
444 ibdev, qp->qpn );
445 return -ENOBUFS;
446 }
447
448 /* Post to hardware */
449 if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
450 DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: "
451 "%s\n", ibdev, qp->qpn, strerror ( rc ) );
452 return rc;
453 }
454
455 qp->recv.fill++;
456 return 0;
457 }
458
459 /**
460 * Complete send work queue entry
461 *
462 * @v ibdev Infiniband device
463 * @v qp Queue pair
464 * @v iobuf I/O buffer
465 * @v rc Completion status code
466 */
ib_complete_send(struct ib_device * ibdev,struct ib_queue_pair * qp,struct io_buffer * iobuf,int rc)467 void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
468 struct io_buffer *iobuf, int rc ) {
469
470 if ( qp->send.cq->op->complete_send ) {
471 qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
472 } else {
473 free_iob ( iobuf );
474 }
475 qp->send.fill--;
476 }
477
478 /**
479 * Complete receive work queue entry
480 *
481 * @v ibdev Infiniband device
482 * @v qp Queue pair
483 * @v av Address vector
484 * @v iobuf I/O buffer
485 * @v rc Completion status code
486 */
ib_complete_recv(struct ib_device * ibdev,struct ib_queue_pair * qp,struct ib_address_vector * av,struct io_buffer * iobuf,int rc)487 void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
488 struct ib_address_vector *av,
489 struct io_buffer *iobuf, int rc ) {
490
491 if ( qp->recv.cq->op->complete_recv ) {
492 qp->recv.cq->op->complete_recv ( ibdev, qp, av, iobuf, rc );
493 } else {
494 free_iob ( iobuf );
495 }
496 qp->recv.fill--;
497 }
498
499 /**
500 * Refill receive work queue
501 *
502 * @v ibdev Infiniband device
503 * @v qp Queue pair
504 */
ib_refill_recv(struct ib_device * ibdev,struct ib_queue_pair * qp)505 void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
506 struct io_buffer *iobuf;
507 int rc;
508
509 /* Keep filling while unfilled entries remain */
510 while ( qp->recv.fill < qp->recv.num_wqes ) {
511
512 /* Allocate I/O buffer */
513 iobuf = alloc_iob ( IB_MAX_PAYLOAD_SIZE );
514 if ( ! iobuf ) {
515 /* Non-fatal; we will refill on next attempt */
516 return;
517 }
518
519 /* Post I/O buffer */
520 if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
521 DBGC ( ibdev, "IBDEV %p could not refill: %s\n",
522 ibdev, strerror ( rc ) );
523 free_iob ( iobuf );
524 /* Give up */
525 return;
526 }
527 }
528 }
529
530 /***************************************************************************
531 *
532 * Link control
533 *
534 ***************************************************************************
535 */
536
537 /**
538 * Open port
539 *
540 * @v ibdev Infiniband device
541 * @ret rc Return status code
542 */
ib_open(struct ib_device * ibdev)543 int ib_open ( struct ib_device *ibdev ) {
544 int rc;
545
546 /* Increment device open request counter */
547 if ( ibdev->open_count++ > 0 ) {
548 /* Device was already open; do nothing */
549 return 0;
550 }
551
552 /* Create subnet management interface */
553 ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI );
554 if ( ! ibdev->smi ) {
555 DBGC ( ibdev, "IBDEV %p could not create SMI\n", ibdev );
556 rc = -ENOMEM;
557 goto err_create_smi;
558 }
559
560 /* Create subnet management agent */
561 if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
562 DBGC ( ibdev, "IBDEV %p could not create SMA: %s\n",
563 ibdev, strerror ( rc ) );
564 goto err_create_sma;
565 }
566
567 /* Create general services interface */
568 ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI );
569 if ( ! ibdev->gsi ) {
570 DBGC ( ibdev, "IBDEV %p could not create GSI\n", ibdev );
571 rc = -ENOMEM;
572 goto err_create_gsi;
573 }
574
575 /* Open device */
576 if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
577 DBGC ( ibdev, "IBDEV %p could not open: %s\n",
578 ibdev, strerror ( rc ) );
579 goto err_open;
580 }
581
582 /* Add to head of open devices list */
583 list_add ( &ibdev->open_list, &open_ib_devices );
584
585 assert ( ibdev->open_count == 1 );
586 return 0;
587
588 ibdev->op->close ( ibdev );
589 err_open:
590 ib_destroy_mi ( ibdev, ibdev->gsi );
591 err_create_gsi:
592 ib_destroy_sma ( ibdev, ibdev->smi );
593 err_create_sma:
594 ib_destroy_mi ( ibdev, ibdev->smi );
595 err_create_smi:
596 assert ( ibdev->open_count == 1 );
597 ibdev->open_count = 0;
598 return rc;
599 }
600
601 /**
602 * Close port
603 *
604 * @v ibdev Infiniband device
605 */
ib_close(struct ib_device * ibdev)606 void ib_close ( struct ib_device *ibdev ) {
607
608 /* Decrement device open request counter */
609 ibdev->open_count--;
610
611 /* Close device if this was the last remaining requested opening */
612 if ( ibdev->open_count == 0 ) {
613 list_del ( &ibdev->open_list );
614 ib_destroy_mi ( ibdev, ibdev->gsi );
615 ib_destroy_sma ( ibdev, ibdev->smi );
616 ib_destroy_mi ( ibdev, ibdev->smi );
617 ibdev->op->close ( ibdev );
618 }
619 }
620
621 /**
622 * Get link state
623 *
624 * @v ibdev Infiniband device
625 * @ret rc Link status code
626 */
ib_link_rc(struct ib_device * ibdev)627 int ib_link_rc ( struct ib_device *ibdev ) {
628 switch ( ibdev->port_state ) {
629 case IB_PORT_STATE_DOWN: return -ENOTCONN;
630 case IB_PORT_STATE_INIT: return -EINPROGRESS_INIT;
631 case IB_PORT_STATE_ARMED: return -EINPROGRESS_ARMED;
632 case IB_PORT_STATE_ACTIVE: return 0;
633 default: return -EINVAL;
634 }
635 }
636
637 /***************************************************************************
638 *
639 * Multicast
640 *
641 ***************************************************************************
642 */
643
644 /**
645 * Attach to multicast group
646 *
647 * @v ibdev Infiniband device
648 * @v qp Queue pair
649 * @v gid Multicast GID
650 * @ret rc Return status code
651 *
652 * Note that this function handles only the local device's attachment
653 * to the multicast GID; it does not issue the relevant MADs to join
654 * the multicast group on the subnet.
655 */
ib_mcast_attach(struct ib_device * ibdev,struct ib_queue_pair * qp,struct ib_gid * gid)656 int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
657 struct ib_gid *gid ) {
658 struct ib_multicast_gid *mgid;
659 int rc;
660
661 /* Add to software multicast GID list */
662 mgid = zalloc ( sizeof ( *mgid ) );
663 if ( ! mgid ) {
664 rc = -ENOMEM;
665 goto err_alloc_mgid;
666 }
667 memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
668 list_add ( &mgid->list, &qp->mgids );
669
670 /* Add to hardware multicast GID list */
671 if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
672 goto err_dev_mcast_attach;
673
674 return 0;
675
676 err_dev_mcast_attach:
677 list_del ( &mgid->list );
678 free ( mgid );
679 err_alloc_mgid:
680 return rc;
681 }
682
683 /**
684 * Detach from multicast group
685 *
686 * @v ibdev Infiniband device
687 * @v qp Queue pair
688 * @v gid Multicast GID
689 */
ib_mcast_detach(struct ib_device * ibdev,struct ib_queue_pair * qp,struct ib_gid * gid)690 void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
691 struct ib_gid *gid ) {
692 struct ib_multicast_gid *mgid;
693
694 /* Remove from hardware multicast GID list */
695 ibdev->op->mcast_detach ( ibdev, qp, gid );
696
697 /* Remove from software multicast GID list */
698 list_for_each_entry ( mgid, &qp->mgids, list ) {
699 if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
700 list_del ( &mgid->list );
701 free ( mgid );
702 break;
703 }
704 }
705 }
706
707 /***************************************************************************
708 *
709 * Miscellaneous
710 *
711 ***************************************************************************
712 */
713
714 /**
715 * Get Infiniband HCA information
716 *
717 * @v ibdev Infiniband device
718 * @ret hca_guid HCA GUID
719 * @ret num_ports Number of ports
720 */
ib_get_hca_info(struct ib_device * ibdev,struct ib_gid_half * hca_guid)721 int ib_get_hca_info ( struct ib_device *ibdev,
722 struct ib_gid_half *hca_guid ) {
723 struct ib_device *tmp;
724 int num_ports = 0;
725
726 /* Search for IB devices with the same physical device to
727 * identify port count and a suitable Node GUID.
728 */
729 for_each_ibdev ( tmp ) {
730 if ( tmp->dev != ibdev->dev )
731 continue;
732 if ( num_ports == 0 ) {
733 memcpy ( hca_guid, &tmp->gid.u.half[1],
734 sizeof ( *hca_guid ) );
735 }
736 num_ports++;
737 }
738 return num_ports;
739 }
740
741 /**
742 * Set port information
743 *
744 * @v ibdev Infiniband device
745 * @v mad Set port information MAD
746 */
ib_set_port_info(struct ib_device * ibdev,union ib_mad * mad)747 int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
748 int rc;
749
750 /* Adapters with embedded SMAs do not need to support this method */
751 if ( ! ibdev->op->set_port_info ) {
752 DBGC ( ibdev, "IBDEV %p does not support setting port "
753 "information\n", ibdev );
754 return -ENOTSUP;
755 }
756
757 if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
758 DBGC ( ibdev, "IBDEV %p could not set port information: %s\n",
759 ibdev, strerror ( rc ) );
760 return rc;
761 }
762
763 return 0;
764 };
765
766 /**
767 * Set partition key table
768 *
769 * @v ibdev Infiniband device
770 * @v mad Set partition key table MAD
771 */
ib_set_pkey_table(struct ib_device * ibdev,union ib_mad * mad)772 int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
773 int rc;
774
775 /* Adapters with embedded SMAs do not need to support this method */
776 if ( ! ibdev->op->set_pkey_table ) {
777 DBGC ( ibdev, "IBDEV %p does not support setting partition "
778 "key table\n", ibdev );
779 return -ENOTSUP;
780 }
781
782 if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
783 DBGC ( ibdev, "IBDEV %p could not set partition key table: "
784 "%s\n", ibdev, strerror ( rc ) );
785 return rc;
786 }
787
788 return 0;
789 };
790
791 /***************************************************************************
792 *
793 * Event queues
794 *
795 ***************************************************************************
796 */
797
798 /**
799 * Handle Infiniband link state change
800 *
801 * @v ibdev Infiniband device
802 */
ib_link_state_changed(struct ib_device * ibdev)803 void ib_link_state_changed ( struct ib_device *ibdev ) {
804
805 /* Notify IPoIB of link state change */
806 ipoib_link_state_changed ( ibdev );
807 }
808
809 /**
810 * Poll event queue
811 *
812 * @v ibdev Infiniband device
813 */
ib_poll_eq(struct ib_device * ibdev)814 void ib_poll_eq ( struct ib_device *ibdev ) {
815 struct ib_completion_queue *cq;
816
817 /* Poll device's event queue */
818 ibdev->op->poll_eq ( ibdev );
819
820 /* Poll all completion queues */
821 list_for_each_entry ( cq, &ibdev->cqs, list )
822 ib_poll_cq ( ibdev, cq );
823 }
824
825 /**
826 * Single-step the Infiniband event queue
827 *
828 * @v process Infiniband event queue process
829 */
ib_step(struct process * process __unused)830 static void ib_step ( struct process *process __unused ) {
831 struct ib_device *ibdev;
832
833 for_each_ibdev ( ibdev )
834 ib_poll_eq ( ibdev );
835 }
836
837 /** Infiniband event queue process */
838 struct process ib_process __permanent_process = {
839 .list = LIST_HEAD_INIT ( ib_process.list ),
840 .step = ib_step,
841 };
842
843 /***************************************************************************
844 *
845 * Infiniband device creation/destruction
846 *
847 ***************************************************************************
848 */
849
850 /**
851 * Allocate Infiniband device
852 *
853 * @v priv_size Size of driver private data area
854 * @ret ibdev Infiniband device, or NULL
855 */
alloc_ibdev(size_t priv_size)856 struct ib_device * alloc_ibdev ( size_t priv_size ) {
857 struct ib_device *ibdev;
858 void *drv_priv;
859 size_t total_len;
860
861 total_len = ( sizeof ( *ibdev ) + priv_size );
862 ibdev = zalloc ( total_len );
863 if ( ibdev ) {
864 drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
865 ib_set_drvdata ( ibdev, drv_priv );
866 INIT_LIST_HEAD ( &ibdev->cqs );
867 INIT_LIST_HEAD ( &ibdev->qps );
868 ibdev->port_state = IB_PORT_STATE_DOWN;
869 ibdev->lid = IB_LID_NONE;
870 ibdev->pkey = IB_PKEY_DEFAULT;
871 }
872 return ibdev;
873 }
874
875 /**
876 * Register Infiniband device
877 *
878 * @v ibdev Infiniband device
879 * @ret rc Return status code
880 */
register_ibdev(struct ib_device * ibdev)881 int register_ibdev ( struct ib_device *ibdev ) {
882 int rc;
883
884 /* Add to device list */
885 ibdev_get ( ibdev );
886 list_add_tail ( &ibdev->list, &ib_devices );
887
888 /* Add IPoIB device */
889 if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) {
890 DBGC ( ibdev, "IBDEV %p could not add IPoIB device: %s\n",
891 ibdev, strerror ( rc ) );
892 goto err_ipoib_probe;
893 }
894
895 DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
896 ibdev->dev->name );
897 return 0;
898
899 err_ipoib_probe:
900 list_del ( &ibdev->list );
901 ibdev_put ( ibdev );
902 return rc;
903 }
904
905 /**
906 * Unregister Infiniband device
907 *
908 * @v ibdev Infiniband device
909 */
unregister_ibdev(struct ib_device * ibdev)910 void unregister_ibdev ( struct ib_device *ibdev ) {
911
912 /* Close device */
913 ipoib_remove ( ibdev );
914
915 /* Remove from device list */
916 list_del ( &ibdev->list );
917 ibdev_put ( ibdev );
918 DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );
919 }
920
921 /**
922 * Find Infiniband device by GID
923 *
924 * @v gid GID
925 * @ret ibdev Infiniband device, or NULL
926 */
find_ibdev(struct ib_gid * gid)927 struct ib_device * find_ibdev ( struct ib_gid *gid ) {
928 struct ib_device *ibdev;
929
930 for_each_ibdev ( ibdev ) {
931 if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
932 return ibdev;
933 }
934 return NULL;
935 }
936
937 /**
938 * Get most recently opened Infiniband device
939 *
940 * @ret ibdev Most recently opened Infiniband device, or NULL
941 */
last_opened_ibdev(void)942 struct ib_device * last_opened_ibdev ( void ) {
943 struct ib_device *ibdev;
944
945 list_for_each_entry ( ibdev, &open_ib_devices, open_list ) {
946 assert ( ibdev->open_count != 0 );
947 return ibdev;
948 }
949
950 return NULL;
951 }
952