• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /************************************************* -*- linux-c -*-
2  * Myricom 10Gb Network Interface Card Software
3  * Copyright 2009, Myricom, Inc.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17  ****************************************************************/
18 
19 FILE_LICENCE ( GPL2_ONLY );
20 
21 /*
22  * Author: Glenn Brown <glenn@myri.com>
23  */
24 
25 /*
26  * General Theory of Operation
27  *
28  * This is a minimal Myricom 10 gigabit Ethernet driver for network
29  * boot.
30  *
31  * Initialization
32  *
33  * myri10ge_pci_probe() is called by gPXE during initialization.
34  * Minimal NIC initialization is performed to minimize resources
35  * consumed when the driver is resident but unused.
36  *
37  * Network Boot
38  *
39  * myri10ge_net_open() is called by gPXE before attempting to network
40  * boot from the card.  Packet buffers are allocated and the NIC
41  * interface is initialized.
42  *
43  * Transmit
44  *
45  * myri10ge_net_transmit() enqueues frames for transmission by writing
46  * discriptors to the NIC's tx ring.  For simplicity and to avoid
47  * copies, we always have the NIC DMA up the packet.  The sent I/O
48  * buffer is released once the NIC signals myri10ge_interrupt_handler()
49  * that the send has completed.
50  *
51  * Receive
52  *
53  * Receives are posted to the NIC's receive ring.  The NIC fills a
54  * DMAable receive_completion ring with completion notifications.
55  * myri10ge_net_poll() polls for these receive notifications, posts
56  * replacement receive buffers to the NIC, and passes received frames
57  * to netdev_rx().
58  */
59 
60 /*
61  * Debugging levels:
62  *	- DBG() is for any errors, i.e. failed alloc_iob(), malloc_dma(),
63  *	  TX overflow, corrupted packets, ...
64  *	- DBG2() is for successful events, like packet received,
65  *	  packet transmitted, and other general notifications.
66  *	- DBGP() prints the name of each called function on entry
67  */
68 
69 #include <stdint.h>
70 
71 #include <byteswap.h>
72 #include <errno.h>
73 #include <gpxe/ethernet.h>
74 #include <gpxe/if_ether.h>
75 #include <gpxe/iobuf.h>
76 #include <gpxe/malloc.h>
77 #include <gpxe/netdevice.h>
78 #include <gpxe/pci.h>
79 #include <gpxe/timer.h>
80 
81 #include "myri10ge_mcp.h"
82 
83 /****************************************************************
84  * Forward declarations
85  ****************************************************************/
86 
87 /* PCI driver entry points */
88 
89 static int	myri10ge_pci_probe ( struct pci_device*,
90 				     const struct pci_device_id* );
91 static void	myri10ge_pci_remove ( struct pci_device* );
92 
93 /* Network device operations */
94 
95 static void	myri10ge_net_close ( struct net_device* );
96 static void	myri10ge_net_irq ( struct net_device*, int enable );
97 static int	myri10ge_net_open ( struct net_device* );
98 static void	myri10ge_net_poll ( struct net_device* );
99 static int	myri10ge_net_transmit ( struct net_device*, struct io_buffer* );
100 
101 /****************************************************************
102  * Constants
103  ****************************************************************/
104 
105 /* Maximum ring indices, used to wrap ring indices.  These must be 2**N-1. */
106 
107 #define MYRI10GE_TRANSMIT_WRAP                  1U
108 #define MYRI10GE_RECEIVE_WRAP                   7U
109 #define MYRI10GE_RECEIVE_COMPLETION_WRAP        31U
110 
111 /****************************************************************
112  * Driver internal data types.
113  ****************************************************************/
114 
115 /* Structure holding all DMA buffers for a NIC, which we will
116    allocated as contiguous read/write DMAable memory when the NIC is
117    initialized. */
118 
119 struct myri10ge_dma_buffers
120 {
121 	/* The NIC DMAs receive completion notifications into this ring */
122 
123 	mcp_slot_t receive_completion[1+MYRI10GE_RECEIVE_COMPLETION_WRAP];
124 
125 	/* Interrupt details are DMAd here before interrupting. */
126 
127 	mcp_irq_data_t irq_data; /* 64B */
128 
129 	/* NIC command completion status is DMAd here. */
130 
131 	mcp_cmd_response_t command_response; /* 8B */
132 };
133 
134 struct myri10ge_private
135 {
136 	/* Interrupt support */
137 
138 	uint32	*irq_claim;	/* in NIC SRAM */
139 	uint32	*irq_deassert;	/* in NIC SRAM */
140 
141 	/* DMA buffers. */
142 
143 	struct myri10ge_dma_buffers	*dma;
144 
145 	/*
146 	 * Transmit state.
147 	 *
148 	 * The counts here are uint32 for easy comparison with
149 	 * priv->dma->irq_data.send_done_count and with each other.
150 	 */
151 
152 	mcp_kreq_ether_send_t	*transmit_ring;	/* in NIC SRAM */
153 	uint32                   transmit_ring_wrap;
154 	uint32                   transmits_posted;
155 	uint32                   transmits_done;
156 	struct io_buffer	*transmit_iob[1 + MYRI10GE_TRANSMIT_WRAP];
157 
158 	/*
159 	 * Receive state.
160 	 */
161 
162 	mcp_kreq_ether_recv_t	*receive_post_ring;	/* in NIC SRAM */
163 	unsigned int             receive_post_ring_wrap;
164 	unsigned int             receives_posted;
165 	unsigned int             receives_done;
166 	struct io_buffer	*receive_iob[1 + MYRI10GE_RECEIVE_WRAP];
167 
168 	/* Address for writing commands to the firmware.
169 	   BEWARE: the value must be written 32 bits at a time. */
170 
171 	mcp_cmd_t	*command;
172 };
173 
174 /****************************************************************
175  * Driver internal functions.
176  ****************************************************************/
177 
178 /* Print ring status when debugging.  Use this only after a printed
179    value changes. */
180 
181 #define DBG2_RINGS( priv ) 						\
182 	DBG2 ( "tx %x/%x rx %x/%x in %s() \n",				\
183 	       ( priv ) ->transmits_done, ( priv ) -> transmits_posted,	\
184 	       ( priv ) ->receives_done, ( priv ) -> receives_posted,	\
185 	       __FUNCTION__ )
186 
187 /*
188  * Return a pointer to the driver private data for a network device.
189  *
190  * @v netdev	Network device created by this driver.
191  * @ret priv	The corresponding driver private data.
192  */
myri10ge_priv(struct net_device * nd)193 static inline struct myri10ge_private *myri10ge_priv ( struct net_device *nd )
194 {
195 	/* Our private data always follows the network device in memory,
196 	   since we use alloc_netdev() to allocate the storage. */
197 
198 	return ( struct myri10ge_private * ) ( nd + 1 );
199 }
200 
201 /*
202  * Pass a receive buffer to the NIC to be filled.
203  *
204  * @v priv	The network device to receive the buffer.
205  * @v iob	The I/O buffer to fill.
206  *
207  * Receive buffers are filled in FIFO order.
208  */
myri10ge_post_receive(struct myri10ge_private * priv,struct io_buffer * iob)209 static void myri10ge_post_receive ( struct myri10ge_private *priv,
210 				    struct io_buffer *iob )
211 {
212 	unsigned int		 receives_posted;
213 	mcp_kreq_ether_recv_t	*request;
214 
215 	/* Record the posted I/O buffer, to be passed to netdev_rx() on
216 	   receive. */
217 
218 	receives_posted = priv->receives_posted;
219 	priv->receive_iob[receives_posted & MYRI10GE_RECEIVE_WRAP] = iob;
220 
221 	/* Post the receive. */
222 
223 	request = &priv->receive_post_ring[receives_posted
224 					   & priv->receive_post_ring_wrap];
225 	request->addr_high = 0;
226 	wmb();
227 	request->addr_low = htonl ( virt_to_bus ( iob->data ) );
228 	priv->receives_posted = ++receives_posted;
229 }
230 
231 /*
232  * Execute a command on the NIC.
233  *
234  * @v priv	NIC to perform the command.
235  * @v cmd	The command to perform.
236  * @v data	I/O copy buffer for parameters/results
237  * @ret rc	0 on success, else an error code.
238  */
myri10ge_command(struct myri10ge_private * priv,uint32 cmd,uint32 data[3])239 static int myri10ge_command ( struct myri10ge_private *priv,
240 			      uint32 cmd,
241 			      uint32 data[3] )
242 {
243 	int				 i;
244 	mcp_cmd_t			*command;
245 	uint32				 result;
246 	unsigned int			 slept_ms;
247 	volatile mcp_cmd_response_t	*response;
248 
249 	DBGP ( "myri10ge_command ( ,%d, ) \n", cmd );
250 	command = priv->command;
251 	response = &priv->dma->command_response;
252 
253 	/* Mark the command as incomplete. */
254 
255 	response->result = 0xFFFFFFFF;
256 
257 	/* Pass the command to the NIC. */
258 
259 	command->cmd		    = htonl ( cmd );
260 	command->data0		    = htonl ( data[0] );
261 	command->data1		    = htonl ( data[1] );
262 	command->data2		    = htonl ( data[2] );
263 	command->response_addr.high = 0;
264 	command->response_addr.low
265 		= htonl ( virt_to_bus ( &priv->dma->command_response ) );
266 	for ( i=0; i<36; i+=4 )
267 		* ( uint32 * ) &command->pad[i] = 0;
268 	wmb();
269 	* ( uint32 * ) &command->pad[36] = 0;
270 
271 	/* Wait up to 2 seconds for a response. */
272 
273 	for ( slept_ms=0; slept_ms<2000; slept_ms++ ) {
274 		result = response->result;
275 		if ( result == 0 ) {
276 			data[0] = ntohl ( response->data );
277 			return 0;
278 		} else if ( result != 0xFFFFFFFF ) {
279 			DBG ( "cmd%d:0x%x\n",
280 			      cmd,
281 			      ntohl ( response->result ) );
282 			return -EIO;
283 		}
284 		udelay ( 1000 );
285 		rmb();
286 	}
287 	DBG ( "cmd%d:timed out\n", cmd );
288 	return -ETIMEDOUT;
289 }
290 
291 /*
292  * Handle any pending interrupt.
293  *
294  * @v netdev		Device being polled for interrupts.
295  *
296  * This is called periodically to let the driver check for interrupts.
297  */
myri10ge_interrupt_handler(struct net_device * netdev)298 static void myri10ge_interrupt_handler ( struct net_device *netdev )
299 {
300 	struct myri10ge_private *priv;
301 	mcp_irq_data_t		*irq_data;
302 	uint8			 valid;
303 
304 	priv = myri10ge_priv ( netdev );
305 	irq_data = &priv->dma->irq_data;
306 
307 	/* Return if there was no interrupt. */
308 
309 	rmb();
310 	valid = irq_data->valid;
311 	if ( !valid )
312 		return;
313 	DBG2 ( "irq " );
314 
315 	/* Tell the NIC to deassert the interrupt and clear
316 	   irq_data->valid.*/
317 
318 	*priv->irq_deassert = 0;	/* any value is OK. */
319 	mb();
320 
321 	/* Handle any new receives. */
322 
323 	if ( valid & 1 ) {
324 
325 		/* Pass the receive interrupt token back to the NIC. */
326 
327 		DBG2 ( "rx " );
328 		*priv->irq_claim = htonl ( 3 );
329 		wmb();
330 	}
331 
332 	/* Handle any sent packet by freeing its I/O buffer, now that
333 	   we know it has been DMAd. */
334 
335 	if ( valid & 2 ) {
336 		unsigned int nic_done_count;
337 
338 		DBG2 ( "snt " );
339 		nic_done_count = ntohl ( priv->dma->irq_data.send_done_count );
340 		while ( priv->transmits_done != nic_done_count ) {
341 			struct io_buffer *iob;
342 
343 			iob = priv->transmit_iob [priv->transmits_done
344 						  & MYRI10GE_TRANSMIT_WRAP];
345 			DBG2 ( "%p ", iob );
346 			netdev_tx_complete ( netdev, iob );
347 			++priv->transmits_done;
348 		}
349 	}
350 
351 	/* Record any statistics update. */
352 
353 	if ( irq_data->stats_updated ) {
354 
355 		/* Update the link status. */
356 
357 		DBG2 ( "stats " );
358 		if ( ntohl ( irq_data->link_up ) == MXGEFW_LINK_UP )
359 			netdev_link_up ( netdev );
360 		else
361 			netdev_link_down ( netdev );
362 
363 		/* Ignore all error counters from the NIC. */
364 	}
365 
366 	/* Wait for the interrupt to be deasserted, as indicated by
367 	   irq_data->valid, which is set by the NIC after the deassert. */
368 
369 	DBG2 ( "wait " );
370 	do {
371 		mb();
372 	} while ( irq_data->valid );
373 
374 	/* Claim the interrupt to enable future interrupt generation. */
375 
376 	DBG2 ( "claim\n" );
377 	* ( priv->irq_claim + 1 ) = htonl ( 3 );
378 	mb();
379 }
380 
381 /* Constants for reading the STRING_SPECS via the Myricom
382    Vendor Specific PCI configuration space capability. */
383 
384 #define VS_ADDR ( vs + 0x18 )
385 #define VS_DATA ( vs + 0x14 )
386 #define VS_MODE ( vs + 0x10 )
387 #define 	VS_MODE_READ32 0x3
388 #define 	VS_MODE_LOCATE 0x8
389 #define 		VS_LOCATE_STRING_SPECS 0x3
390 
391 /*
392  * Read MAC address from its 'string specs' via the vendor-specific
393  * capability.  (This capability allows NIC SRAM and ROM to be read
394  * before it is mapped.)
395  *
396  * @v pci		The device.
397  * @v mac		Buffer to store the MAC address.
398  * @ret rc		Returns 0 on success, else an error code.
399  */
mac_address_from_string_specs(struct pci_device * pci,uint8 mac[ETH_ALEN])400 static int mac_address_from_string_specs ( struct pci_device *pci,
401 						   uint8 mac[ETH_ALEN] )
402 {
403 	char string_specs[256];
404 	char *ptr, *limit;
405 	char *to = string_specs;
406 	uint32 addr;
407 	uint32 len;
408 	unsigned int vs;
409 	int mac_set = 0;
410 
411 	/* Find the "vendor specific" capability. */
412 
413 	vs = pci_find_capability ( pci, 9 );
414 	if ( vs == 0 ) {
415 		DBG ( "no VS\n" );
416 		return -ENOTSUP;
417 	}
418 
419 	/* Locate the String specs in LANai SRAM. */
420 
421 	pci_write_config_byte ( pci, VS_MODE, VS_MODE_LOCATE );
422 	pci_write_config_dword ( pci, VS_ADDR, VS_LOCATE_STRING_SPECS );
423 	pci_read_config_dword ( pci, VS_ADDR, &addr );
424 	pci_read_config_dword ( pci, VS_DATA, &len );
425 	DBG2 ( "ss@%x,%x\n", addr, len );
426 
427 	/* Copy in the string specs.  Use 32-bit reads for performance. */
428 
429 	if ( len > sizeof ( string_specs ) || ( len & 3 ) ) {
430 		DBG ( "SS too big\n" );
431 		return -ENOTSUP;
432 	}
433 
434 	pci_write_config_byte ( pci, VS_MODE, VS_MODE_READ32 );
435 	while ( len >= 4 ) {
436 		uint32 tmp;
437 
438 		pci_write_config_byte ( pci, VS_ADDR, addr );
439 		pci_read_config_dword ( pci, VS_DATA, &tmp );
440 		tmp = ntohl ( tmp );
441 		memcpy ( to, &tmp, 4 );
442 		to += 4;
443 		addr += 4;
444 		len -= 4;
445 	}
446 	pci_write_config_byte ( pci, VS_MODE, 0 );
447 
448 	/* Parse the string specs. */
449 
450 	DBG2 ( "STRING_SPECS:\n" );
451 	ptr = string_specs;
452 	limit = string_specs + sizeof ( string_specs );
453 	while ( *ptr != '\0' && ptr < limit ) {
454 		DBG2 ( "%s\n", ptr );
455 		if ( memcmp ( ptr, "MAC=", 4 ) == 0 ) {
456 			unsigned int i;
457 
458 			ptr += 4;
459 			for ( i=0; i<6; i++ ) {
460 				if ( ( ptr + 2 ) > limit ) {
461 					DBG ( "bad MAC addr\n" );
462 					return -ENOTSUP;
463 				}
464 				mac[i] = strtoul ( ptr, &ptr, 16 );
465 				ptr += 1;
466 			}
467 			mac_set = 1;
468 		}
469 		else
470 			while ( ptr < limit && *ptr++ );
471 	}
472 
473 	/* Verify we parsed all we need. */
474 
475 	if ( !mac_set ) {
476 		DBG ( "no MAC addr\n" );
477 		return -ENOTSUP;
478 	}
479 
480 	DBG2 ( "MAC %02x:%02x:%02x:%02x:%02x:%02x\n",
481 	       mac[0], mac[1], mac[2], mac[3], mac[4], mac[5] );
482 
483 	return 0;
484 }
485 
486 /****************************************************************
487  * gPXE PCI Device Driver API functions
488  ****************************************************************/
489 
490 /*
491  * Initialize the PCI device.
492  *
493  * @v pci 		The device's associated pci_device structure.
494  * @v id  		The PCI device + vendor id.
495  * @ret rc		Returns zero if successfully initialized.
496  *
497  * This function is called very early on, while gPXE is initializing.
498  * This is a gPXE PCI Device Driver API function.
499  */
myri10ge_pci_probe(struct pci_device * pci,const struct pci_device_id * id __unused)500 static int myri10ge_pci_probe ( struct pci_device *pci,
501 				const struct pci_device_id *id __unused )
502 {
503 	static struct net_device_operations myri10ge_operations = {
504 		.open     = myri10ge_net_open,
505 		.close    = myri10ge_net_close,
506 		.transmit = myri10ge_net_transmit,
507 		.poll     = myri10ge_net_poll,
508 		.irq      = myri10ge_net_irq
509 	};
510 
511 	const char *dbg;
512 	int rc;
513 	struct net_device *netdev;
514 	struct myri10ge_private *priv;
515 
516 	DBGP ( "myri10ge_pci_probe: " );
517 
518 	netdev = alloc_etherdev ( sizeof ( *priv ) );
519 	if ( !netdev ) {
520 		rc = -ENOMEM;
521 		dbg = "alloc_etherdev";
522 		goto abort_with_nothing;
523 	}
524 
525 	netdev_init ( netdev, &myri10ge_operations );
526 	priv = myri10ge_priv ( netdev );
527 
528 	pci_set_drvdata ( pci, netdev );
529 	netdev->dev = &pci->dev;
530 
531 	/* Make sure interrupts are disabled. */
532 
533 	myri10ge_net_irq ( netdev, 0 );
534 
535 	/* Read the NIC HW address. */
536 
537 	rc = mac_address_from_string_specs ( pci, netdev->hw_addr );
538 	if ( rc ) {
539 		dbg = "mac_from_ss";
540 		goto abort_with_netdev_init;
541 	}
542 	DBGP ( "mac " );
543 
544 	/* Enable bus master, etc. */
545 
546 	adjust_pci_device ( pci );
547 	DBGP ( "pci " );
548 
549 	/* Register the initialized network device. */
550 
551 	rc = register_netdev ( netdev );
552 	if ( rc ) {
553 		dbg = "register_netdev";
554 		goto abort_with_netdev_init;
555 	}
556 
557 	DBGP ( "done\n" );
558 
559 	return 0;
560 
561 abort_with_netdev_init:
562 	netdev_nullify ( netdev );
563 	netdev_put ( netdev );
564 abort_with_nothing:
565 	DBG ( "%s:%s\n", dbg, strerror ( rc ) );
566 	return rc;
567 }
568 
569 /*
570  * Remove a device from the PCI device list.
571  *
572  * @v pci		PCI device to remove.
573  *
574  * This is a PCI Device Driver API function.
575  */
myri10ge_pci_remove(struct pci_device * pci)576 static void myri10ge_pci_remove ( struct pci_device *pci )
577 {
578 	struct net_device	*netdev;
579 
580 	DBGP ( "myri10ge_pci_remove\n" );
581 	netdev = pci_get_drvdata ( pci );
582 
583 	unregister_netdev ( netdev );
584 	netdev_nullify ( netdev );
585 	netdev_put ( netdev );
586 }
587 
588 /****************************************************************
589  * gPXE Network Device Driver Operations
590  ****************************************************************/
591 
592 /*
593  * Close a network device.
594  *
595  * @v netdev		Device to close.
596  *
597  * This is a gPXE Network Device Driver API function.
598  */
myri10ge_net_close(struct net_device * netdev)599 static void myri10ge_net_close ( struct net_device *netdev )
600 {
601 	struct myri10ge_private *priv;
602 	uint32			 data[3];
603 
604 	DBGP ( "myri10ge_net_close\n" );
605 	priv = myri10ge_priv ( netdev );
606 
607 	/* disable interrupts */
608 
609 	myri10ge_net_irq ( netdev, 0 );
610 
611 	/* Reset the NIC interface, so we won't get any more events from
612 	   the NIC. */
613 
614 	myri10ge_command ( priv, MXGEFW_CMD_RESET, data );
615 
616 	/* Free receive buffers that were never filled. */
617 
618 	while ( priv->receives_done != priv->receives_posted ) {
619 		free_iob ( priv->receive_iob[priv->receives_done
620 					     & MYRI10GE_RECEIVE_WRAP] );
621 		++priv->receives_done;
622 	}
623 
624 	/* Release DMAable memory. */
625 
626 	free_dma ( priv->dma, sizeof ( *priv->dma ) );
627 
628 	/* Erase all state from the open. */
629 
630 	memset ( priv, 0, sizeof ( *priv ) );
631 
632 	DBG2_RINGS ( priv );
633 }
634 
635 /*
636  * Enable or disable IRQ masking.
637  *
638  * @v netdev		Device to control.
639  * @v enable		Zero to mask off IRQ, non-zero to enable IRQ.
640  *
641  * This is a gPXE Network Driver API function.
642  */
myri10ge_net_irq(struct net_device * netdev,int enable)643 static void myri10ge_net_irq ( struct net_device *netdev, int enable )
644 {
645 	struct pci_device	*pci_dev;
646 	uint16			 val;
647 
648 	DBGP ( "myri10ge_net_irq\n" );
649 	pci_dev = ( struct pci_device * ) netdev->dev;
650 
651 	/* Adjust the Interrupt Disable bit in the Command register of the
652 	   PCI Device. */
653 
654 	pci_read_config_word ( pci_dev, PCI_COMMAND, &val );
655 	if ( enable )
656 		val &= ~PCI_COMMAND_INTX_DISABLE;
657 	else
658 		val |= PCI_COMMAND_INTX_DISABLE;
659 	pci_write_config_word ( pci_dev, PCI_COMMAND, val );
660 }
661 
662 /*
663  * Opens a network device.
664  *
665  * @v netdev		Device to be opened.
666  * @ret rc  		Non-zero if failed to open.
667  *
668  * This enables tx and rx on the device.
669  * This is a gPXE Network Device Driver API function.
670  */
myri10ge_net_open(struct net_device * netdev)671 static int myri10ge_net_open ( struct net_device *netdev )
672 {
673 	const char		*dbg;	/* printed upon error return */
674 	int			 rc;
675 	struct io_buffer	*iob;
676 	struct myri10ge_private *priv;
677 	uint32			 data[3];
678 	struct pci_device	*pci_dev;
679 	void			*membase;
680 
681 	DBGP ( "myri10ge_net_open\n" );
682 	priv	= myri10ge_priv ( netdev );
683 	pci_dev = ( struct pci_device * ) netdev->dev;
684 	membase = phys_to_virt ( pci_dev->membase );
685 
686 	/* Compute address for passing commands to the firmware. */
687 
688 	priv->command = membase + MXGEFW_ETH_CMD;
689 
690 	/* Ensure interrupts are disabled. */
691 
692 	myri10ge_net_irq ( netdev, 0 );
693 
694 	/* Allocate cleared DMAable buffers. */
695 
696 	priv->dma = malloc_dma ( sizeof ( *priv->dma ) , 128 );
697 	if ( !priv->dma ) {
698 		rc = -ENOMEM;
699 		dbg = "DMA";
700 		goto abort_with_nothing;
701 	}
702 	memset ( priv->dma, 0, sizeof ( *priv->dma ) );
703 
704 	/* Simplify following code. */
705 
706 #define TRY( prefix, base, suffix ) do {		\
707 		rc = myri10ge_command ( priv,		\
708 					MXGEFW_		\
709 					## prefix	\
710 					## base		\
711 					## suffix,	\
712 					data );		\
713 		if ( rc ) {				\
714 			dbg = #base;			\
715 			goto abort_with_dma;		\
716 		}					\
717 	} while ( 0 )
718 
719 	/* Send a reset command to the card to see if it is alive,
720 	   and to reset its queue state. */
721 
722 	TRY ( CMD_, RESET , );
723 
724 	/* Set the interrupt queue size. */
725 
726 	data[0] = ( sizeof ( priv->dma->receive_completion )
727 		    | MXGEFW_CMD_SET_INTRQ_SIZE_FLAG_NO_STRICT_SIZE_CHECK );
728 	TRY ( CMD_SET_ , INTRQ_SIZE , );
729 
730 	/* Set the interrupt queue DMA address. */
731 
732 	data[0] = virt_to_bus ( &priv->dma->receive_completion );
733 	data[1] = 0;
734 	TRY ( CMD_SET_, INTRQ_DMA, );
735 
736 	/* Get the NIC interrupt claim address. */
737 
738 	TRY ( CMD_GET_, IRQ_ACK, _OFFSET );
739 	priv->irq_claim = membase + data[0];
740 
741 	/* Get the NIC interrupt assert address. */
742 
743 	TRY ( CMD_GET_, IRQ_DEASSERT, _OFFSET );
744 	priv->irq_deassert = membase + data[0];
745 
746 	/* Disable interrupt coalescing, which is inappropriate for the
747 	   minimal buffering we provide. */
748 
749 	TRY ( CMD_GET_, INTR_COAL, _DELAY_OFFSET );
750 	* ( ( uint32 * ) ( membase + data[0] ) ) = 0;
751 
752 	/* Set the NIC mac address. */
753 
754 	data[0] = ( netdev->ll_addr[0] << 24
755 		    | netdev->ll_addr[1] << 16
756 		    | netdev->ll_addr[2] << 8
757 		    | netdev->ll_addr[3] );
758 	data[1] = ( ( netdev->ll_addr[4] << 8 )
759 		     | netdev->ll_addr[5] );
760 	TRY ( SET_ , MAC_ADDRESS , );
761 
762 	/* Enable multicast receives, because some gPXE clients don't work
763 	   without multicast. . */
764 
765 	TRY ( ENABLE_ , ALLMULTI , );
766 
767 	/* Disable Ethernet flow control, so the NIC cannot deadlock the
768 	   network under any circumstances. */
769 
770 	TRY ( DISABLE_ , FLOW , _CONTROL );
771 
772 	/* Compute transmit ring sizes. */
773 
774 	data[0] = 0;		/* slice 0 */
775 	TRY ( CMD_GET_, SEND_RING, _SIZE );
776 	priv->transmit_ring_wrap
777 		= data[0] / sizeof ( mcp_kreq_ether_send_t ) - 1;
778 	if ( priv->transmit_ring_wrap
779 	     & ( priv->transmit_ring_wrap + 1 ) ) {
780 		rc = -EPROTO;
781 		dbg = "TX_RING";
782 		goto abort_with_dma;
783 	}
784 
785 	/* Compute receive ring sizes. */
786 
787 	data[0] = 0;		/* slice 0 */
788 	TRY ( CMD_GET_ , RX_RING , _SIZE );
789 	priv->receive_post_ring_wrap = data[0] / sizeof ( mcp_dma_addr_t ) - 1;
790 	if ( priv->receive_post_ring_wrap
791 	     & ( priv->receive_post_ring_wrap + 1 ) ) {
792 		rc = -EPROTO;
793 		dbg = "RX_RING";
794 		goto abort_with_dma;
795 	}
796 
797 	/* Get NIC transmit ring address. */
798 
799 	data[0] = 0;		/* slice 0. */
800 	TRY ( CMD_GET_, SEND, _OFFSET );
801 	priv->transmit_ring = membase + data[0];
802 
803 	/* Get the NIC receive ring address. */
804 
805 	data[0] = 0;		/* slice 0. */
806 	TRY ( CMD_GET_, SMALL_RX, _OFFSET );
807 	priv->receive_post_ring = membase + data[0];
808 
809 	/* Set the Nic MTU. */
810 
811 	data[0] = ETH_FRAME_LEN;
812 	TRY ( CMD_SET_, MTU, );
813 
814 	/* Tell the NIC our buffer sizes. ( We use only small buffers, so we
815 	   set both buffer sizes to the same value, which will force all
816 	   received frames to use small buffers. ) */
817 
818 	data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
819 	TRY ( CMD_SET_, SMALL_BUFFER, _SIZE );
820 	data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
821 	TRY ( CMD_SET_, BIG_BUFFER, _SIZE );
822 
823         /* Tell firmware where to DMA IRQ data */
824 
825 	data[0] = virt_to_bus ( &priv->dma->irq_data );
826 	data[1] = 0;
827 	data[2] = sizeof ( priv->dma->irq_data );
828 	TRY ( CMD_SET_, STATS_DMA_V2, );
829 
830 	/* Post receives. */
831 
832 	while ( priv->receives_posted <= MYRI10GE_RECEIVE_WRAP ) {
833 
834 		/* Reserve 2 extra bytes at the start of packets, since
835 		   the firmware always skips the first 2 bytes of the buffer
836 		   so TCP headers will be aligned. */
837 
838 		iob = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
839 		if ( !iob ) {
840 			rc = -ENOMEM;
841 			dbg = "alloc_iob";
842 			goto abort_with_receives_posted;
843 		}
844 		iob_reserve ( iob, MXGEFW_PAD );
845 		myri10ge_post_receive ( priv, iob );
846 	}
847 
848 	/* Bring up the link. */
849 
850 	TRY ( CMD_, ETHERNET_UP, );
851 
852 	DBG2_RINGS ( priv );
853 	return 0;
854 
855 abort_with_receives_posted:
856 	while ( priv->receives_posted-- )
857 		free_iob ( priv->receive_iob[priv->receives_posted] );
858 abort_with_dma:
859 	/* Because the link is not up, we don't have to reset the NIC here. */
860 	free_dma ( priv->dma, sizeof ( *priv->dma ) );
861 abort_with_nothing:
862 	/* Erase all signs of the failed open. */
863 	memset ( priv, 0, sizeof ( *priv ) );
864 	DBG ( "%s: %s\n", dbg, strerror ( rc ) );
865 	return ( rc );
866 }
867 
868 /*
869  * This function allows a driver to process events during operation.
870  *
871  * @v netdev		Device being polled.
872  *
873  * This is called periodically by gPXE to let the driver check the status of
874  * transmitted packets and to allow the driver to check for received packets.
875  * This is a gPXE Network Device Driver API function.
876  */
myri10ge_net_poll(struct net_device * netdev)877 static void myri10ge_net_poll ( struct net_device *netdev )
878 {
879 	struct io_buffer		*iob;
880 	struct io_buffer		*replacement;
881 	struct myri10ge_dma_buffers	*dma;
882 	struct myri10ge_private		*priv;
883 	unsigned int			 length;
884 	unsigned int			 orig_receives_posted;
885 
886 	DBGP ( "myri10ge_net_poll\n" );
887 	priv = myri10ge_priv ( netdev );
888 	dma  = priv->dma;
889 
890 	/* Process any pending interrupt. */
891 
892 	myri10ge_interrupt_handler ( netdev );
893 
894 	/* Pass up received frames, but limit ourselves to receives posted
895 	   before this function was called, so we cannot livelock if
896 	   receives are arriving faster than we process them. */
897 
898 	orig_receives_posted = priv->receives_posted;
899 	while ( priv->receives_done != orig_receives_posted ) {
900 
901 		/* Stop if there is no pending receive. */
902 
903 		length = ntohs ( dma->receive_completion
904 				 [priv->receives_done
905 				  & MYRI10GE_RECEIVE_COMPLETION_WRAP]
906 				 .length );
907 		if ( length == 0 )
908 			break;
909 
910 		/* Allocate a replacement buffer.  If none is available,
911 		   stop passing up packets until a buffer is available.
912 
913 		   Reserve 2 extra bytes at the start of packets, since
914 		   the firmware always skips the first 2 bytes of the buffer
915 		   so TCP headers will be aligned. */
916 
917 		replacement = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
918 		if ( !replacement ) {
919 			DBG ( "NO RX BUF\n" );
920 			break;
921 		}
922 		iob_reserve ( replacement, MXGEFW_PAD );
923 
924 		/* Pass up the received frame. */
925 
926 		iob = priv->receive_iob[priv->receives_done
927 					& MYRI10GE_RECEIVE_WRAP];
928 		iob_put ( iob, length );
929 		netdev_rx ( netdev, iob );
930 
931 		/* We have consumed the packet, so clear the receive
932 		   notification. */
933 
934 		dma->receive_completion [priv->receives_done
935 					 & MYRI10GE_RECEIVE_COMPLETION_WRAP]
936 			.length = 0;
937 		wmb();
938 
939 		/* Replace the passed-up I/O buffer. */
940 
941 		myri10ge_post_receive ( priv, replacement );
942 		++priv->receives_done;
943 		DBG2_RINGS ( priv );
944 	}
945 }
946 
947 /*
948  * This transmits a packet.
949  *
950  * @v netdev		Device to transmit from.
951  * @v iobuf 		Data to transmit.
952  * @ret rc  		Non-zero if failed to transmit.
953  *
954  * This is a gPXE Network Driver API function.
955  */
myri10ge_net_transmit(struct net_device * netdev,struct io_buffer * iobuf)956 static int myri10ge_net_transmit ( struct net_device *netdev,
957 				   struct io_buffer *iobuf )
958 {
959 	mcp_kreq_ether_send_t	*kreq;
960 	size_t			 len;
961 	struct myri10ge_private *priv;
962 	uint32			 transmits_posted;
963 
964 	DBGP ( "myri10ge_net_transmit\n" );
965 	priv = myri10ge_priv ( netdev );
966 
967 	/* Confirm space in the send ring. */
968 
969 	transmits_posted = priv->transmits_posted;
970 	if ( transmits_posted - priv->transmits_done
971 	     > MYRI10GE_TRANSMIT_WRAP ) {
972 		DBG ( "TX ring full\n" );
973 		return -ENOBUFS;
974 	}
975 
976 	DBG2 ( "TX %p+%d ", iobuf->data, iob_len ( iobuf ) );
977 	DBG2_HD ( iobuf->data, 14 );
978 
979 	/* Record the packet being transmitted, so we can later report
980 	   send completion. */
981 
982 	priv->transmit_iob[transmits_posted & MYRI10GE_TRANSMIT_WRAP] = iobuf;
983 
984 	/* Copy and pad undersized frames, because the NIC does not pad,
985 	   and we would rather copy small frames than do a gather. */
986 
987 	len = iob_len ( iobuf );
988 	if ( len < ETH_ZLEN ) {
989 		iob_pad ( iobuf, ETH_ZLEN );
990 		len = ETH_ZLEN;
991 	}
992 
993 	/* Enqueue the packet by writing a descriptor to the NIC.
994 	   This is a bit tricky because the HW requires 32-bit writes,
995 	   but the structure has smaller fields. */
996 
997 	kreq = &priv->transmit_ring[transmits_posted
998 				    & priv->transmit_ring_wrap];
999 	kreq->addr_high = 0;
1000 	kreq->addr_low = htonl ( virt_to_bus ( iobuf->data ) );
1001 	( ( uint32 * ) kreq ) [2] = htonl (
1002 		0x0000 << 16	 /* pseudo_header_offset */
1003 		| ( len & 0xFFFF ) /* length */
1004 		);
1005 	wmb();
1006 	( ( uint32 * ) kreq ) [3] = htonl (
1007 		0x00 << 24	/* pad */
1008 		| 0x01 << 16	/* rdma_count */
1009 		| 0x00 << 8	/* cksum_offset */
1010 		| ( MXGEFW_FLAGS_SMALL
1011 		    | MXGEFW_FLAGS_FIRST
1012 		    | MXGEFW_FLAGS_NO_TSO ) /* flags */
1013 		);
1014 	wmb();
1015 
1016 	/* Mark the slot as consumed and return. */
1017 
1018 	priv->transmits_posted = ++transmits_posted;
1019 	DBG2_RINGS ( priv );
1020 	return 0;
1021 }
1022 
1023 static struct pci_device_id myri10ge_nics[] = {
1024 	/* Each of these macros must be a single line to satisfy a script. */
1025 	PCI_ROM ( 0x14c1, 0x0008, "myri10ge", "Myricom 10Gb Ethernet Adapter", 0 ) ,
1026 };
1027 
1028 struct pci_driver myri10ge_driver __pci_driver = {
1029 	.ids      = myri10ge_nics,
1030 	.id_count = ( sizeof ( myri10ge_nics ) / sizeof ( myri10ge_nics[0] ) ) ,
1031 	.probe    = myri10ge_pci_probe,
1032 	.remove   = myri10ge_pci_remove
1033 };
1034 
1035 /*
1036  * Local variables:
1037  *  c-basic-offset: 8
1038  *  c-indent-level: 8
1039  *  tab-width: 8
1040  * End:
1041  */
1042