diff mbox

seastar - SeaStar Ethernet driver

Message ID 20100202205845.GE5246@hawkeye.sandia.gov
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Kevin Pedretti Feb. 2, 2010, 8:58 p.m. UTC
[PATCH] seastar - SeaStar Ethernet driver

The following patch introduces the seastar driver for the
SeaStar network interface in Cray XT3/XT4/XT5 systems. The
driver is called 'seastar'. This patch is against 2.6.32.7.

The driver uses a simple datagram interface exported by the
SeaStar network interface to encapsulate Ethernet frames
on the Cray XT high speed network. The driver has been tested
to function correctly and is in use on Cray XT4 development
systems at Sandia. 

Signed-off-by: Kevin Pedretti <ktpedre@sandia.gov>



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

David Miller Feb. 2, 2010, 9:42 p.m. UTC | #1
From: "Kevin Pedretti" <ktpedre@sandia.gov>
Date: Tue, 2 Feb 2010 13:58:45 -0700

> +void seastar_setup_htb_bi(uint32_t idr)

Please use the in-kernel sized types "u32", "u16", etc.
instead of "uint32_t" et al.

> +extern void
> +seastar_ip_tx_cmd(
> +	struct ss_priv		*ssp,
> +	uint16_t		nid,
> +	uint16_t		length,
> +	uint64_t		address,
> +	uint16_t		pending_index
> +);
> +
> +
> +void
> +seastar_setup_htb_bi(
> +	uint32_t		idr
> +);
> +
> +
> +extern int
> +seastar_hw_init(
> +	struct ss_priv		*ssp
> +);

Please fix the formatting of these function declarations,
something like:

extern void seastar_ip_tx_cmd(struct ss_priv *ssp,
			      uint16_t nid,
			      uint16_t length,
			      uint64_t address,
			      uint16_t pending_index);

extern void seastar_setup_htb_bi(uint32_t idr);

extern int seastar_hw_init(struct ss_priv *ssp);

And again use "u16" instead of "uint16_t" etc.

There are many bad code formatting cases like this in your
driver, lease fix them all up.

> +static int ss_open(struct net_device *netdev)
> +{
> +	struct ss_priv *ssp = netdev_priv(netdev);
> +	int i;
> +
> +	netif_start_queue(netdev);
> +
> +	for (i = 0; i < NUM_SKBS; i++) {
> +		ssp->skb_table_phys[i] = 0;
> +		ssp->skb_table_virt[i] = 0;
> +		refill_skb(netdev, i);
> +	}
> +
> +	return 0;
> +}

You shouldn't call netif_start_queue() until you are completely
done initializing the chip.  Packets can start being transmitted
to the driver the exact moment that function returns.

> +static int eth2ss(struct ss_priv *ssp, struct sk_buff *skb)
 ...
> +static int ss2eth(struct sk_buff *skb)

This device can only transmit IPv4 packets and can only receive IPv4
packets?

> +#ifdef CONFIG_PM
> +static int ss_suspend(struct pci_dev *pdev, pm_message_t state)
> +{
> +	return -ENOSYS;
> +}
> +
> +
> +static int ss_resume(struct pci_dev *pdev)
> +{
> +	return -ENOSYS;
> +}
> +#endif

If you don't support suspend and resume, simply leave the
method pointers unassigned, there is no need to provide
NOP routines like this.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Randy.Dunlap Feb. 2, 2010, 9:59 p.m. UTC | #2
On 02/02/10 12:58, Kevin Pedretti wrote:
> [PATCH] seastar - SeaStar Ethernet driver
>
> The following patch introduces the seastar driver for the
> SeaStar network interface in Cray XT3/XT4/XT5 systems. The
> driver is called 'seastar'. This patch is against 2.6.32.7.
>
> The driver uses a simple datagram interface exported by the
> SeaStar network interface to encapsulate Ethernet frames
> on the Cray XT high speed network. The driver has been tested
> to function correctly and is in use on Cray XT4 development
> systems at Sandia.
>
> Signed-off-by: Kevin Pedretti<ktpedre@sandia.gov>

>   obj-$(CONFIG_PPP_ASYNC) += ppp_async.o
> diff -uprN -X linux-2.6.32.7-vanilla/Documentation/dontdiff linux-2.6.32.7-vanilla/drivers/net/seastar/firmware.c linux-2.6.32.7/drivers/net/seastar/firmware.c
> --- linux-2.6.32.7-vanilla/drivers/net/seastar/firmware.c	1969-12-31 17:00:00.000000000 -0700
> +++ linux-2.6.32.7/drivers/net/seastar/firmware.c	2010-02-02 09:13:44.000000000 -0700
> @@ -0,0 +1,236 @@

> +
> +/**
> + * Maps a region of host memory into the SeaStar.
> + */

Please note that "/**" in Linux kernel sources means "beginning of a kernel-doc comment",
and none of these is in kernel-doc notation (format), so please change all of them
to a simple "/*".
(throughout all .c/.h files)

> +static void seastar_map_host_region(struct ss_priv *ssp, const void *addr)
> +{
> +	/* Round addr to the nearest 128 MB */
> +	unsigned long raw_paddr = __pa(addr);
> +	unsigned long paddr = raw_paddr&  ~((1<<  28) - 1);
> +
> +	htb_map[8] = 0x8000 | ((paddr>>  28) + 0);
> +	htb_map[9] = 0x8000 | ((paddr>>  28) + 1);

space before <<, &, and >>

> +
> +	ssp->host_region_phys = paddr;
> +}
> +
> +
> +/**
> + * Converts a kernel virtual address to a SeaStar address.
> + */
> +static uint32_t virt_to_fw(struct ss_priv *ssp, void *addr)
> +{
> +	unsigned long saddr;
> +
> +	saddr = __pa(addr) - ssp->host_region_phys;
> +	saddr&= (2<<  28) - 1;
> +	saddr += (8<<  28);

space before << (2x)

> +
> +	return saddr;
> +}
> +
> +
> +/**
> + * Send a command to the Seastar.
> + */
> +static uint32_t seastar_cmd(struct ss_priv *ssp, const struct command *cmd,
> +			    int wait_for_result)
> +{
> +	struct mailbox *mbox = ssp->mailbox;
> +	unsigned int next_write;
> +	uint32_t tail, result;
> +
> +	/* Copy the command into the mailbox */
> +	mbox->commandq[ssp->mailbox_cached_write] = *cmd;
> +	next_write = ssp->mailbox_cached_write + 1;
> +	if (next_write == COMMAND_Q_LENGTH)
> +		next_write = 0;
> +
> +	/* Wait until it is safe to advance the write pointer */
> +	while (next_write == ssp->mailbox_cached_read)
> +		ssp->mailbox_cached_read = mbox->commandq_read;
> +
> +	/* Advance the write pointer */
> +	mbox->commandq_write       = next_write;
> +	ssp->mailbox_cached_write = next_write;
> +
> +	if (!wait_for_result)
> +		return 0;
> +
> +	/* Wait for the result to arrive */
> +	tail = mbox->resultq_read;
> +	while (tail == mbox->resultq_write)
> +		;

I would limit that while loop somehow (not allowing it to continue forever).

> +
> +	/* Read the result */
> +	result = mbox->resultq[tail];
> +	mbox->resultq_read = (tail>= RESULT_Q_LENGTH - 1) ? 0 : tail + 1;
> +
> +	return result;
> +}
> +
> +
> +/**
> + * Sends a datagram transmit command to the SeaStar.
> + */
> +void seastar_ip_tx_cmd(struct ss_priv *ssp, uint16_t nid, uint16_t length,
> +		       uint64_t address, uint16_t pending_index)
> +{
> +	struct command_ip_tx tx_cmd = {
> +		.op		= COMMAND_IP_TX,
> +		.nid		= nid,
> +		.length		= length,
> +		.address	= address,
> +		.pending_index	= pending_index,
> +	};
> +
> +	seastar_cmd(ssp, (struct command *)&tx_cmd, 0);
> +}
> +
> +
> +/**
> + * Programs the SeaStar's HTB_BI register.
> + */
> +void seastar_setup_htb_bi(uint32_t idr)
> +{
> +	/* Mask the APIC dest setup by Linux, causes problems with SeaStar */
> +	idr&= 0xFFFF0000;

space after '&'

> +
> +	*htb_bi = 0xFD000000 | (idr>>  8);

space before >>

> +}
> +
> +
> +/**
> + * Brings up the low-level Seastar hardware.
> + */
> +int seastar_hw_init(struct ss_priv *ssp)
> +{
> +	uint32_t lower_memory = SEASTAR_HOST_BASE;
> +	const int num_eq = 1;
> +	uint32_t lower_pending;
> +	uint32_t lower_eqcb;
> +	uint32_t result;
> +	struct command_init init_cmd;
> +	struct command_init_eqcb eqcb_cmd;
> +	struct command_mark_alive alive_cmd;
> +
> +	/* Read our NID from SeaStar and write it to the NIC control block */
> +	niccb->local_nid = *tx_source;
> +
> +	printk(KERN_INFO "%s: nid %d (0x%x) version %x built %x\n",
> +		__func__,
> +		niccb->local_nid,
> +		niccb->local_nid,
> +		niccb->version,
> +		niccb->build_time
> +	);
> +
> +	/* Allocate the PPC memory */
> +	lower_pending = lower_memory;
> +	lower_memory += NUM_PENDINGS * FW_PENDING_SIZE;
> +
> +	lower_eqcb = lower_memory;
> +	lower_memory = num_eq * FW_EQCB_SIZE;
> +
> +	/* Initialize the HTB map so that the Seastar can see our memory.
> +	 * Since we are only doing upper pendings, we just use the
> +	 * upper_pending_phys instead of the host_phys area. */
> +	seastar_map_host_region(ssp, ssp);
> +
> +	ssp->mailbox			=&seastar_mailbox[0];

space before '&'

> +	ssp->mailbox_cached_read	= ssp->mailbox->commandq_read;
> +	ssp->mailbox_cached_write	= ssp->mailbox->commandq_write;
> +
> +	/* Attempt to send a setup command to the NIC */
> +	init_cmd.op			= COMMAND_INIT;
> +	init_cmd.process_index		= 1;
> +	init_cmd.uid			= 0;
> +	init_cmd.jid			= 0;
> +
> +	init_cmd.num_pendings		= NUM_PENDINGS;
> +	init_cmd.pending_tx_limit	= NUM_TX_PENDINGS;
> +	init_cmd.pending_table_addr	= lower_pending;
> +	init_cmd.up_pending_table_addr	= virt_to_fw(ssp, ssp->pending_table);
> +	init_cmd.up_pending_table_ht_addr = 0;
> +
> +	init_cmd.num_memds		= 0;
> +	init_cmd.memd_table_addr	= 0;
> +
> +	init_cmd.num_eqcbs		= num_eq;
> +	init_cmd.eqcb_table_addr	= lower_eqcb;
> +	init_cmd.eqheap_addr		= virt_to_fw(ssp, ssp->eq);
> +	init_cmd.eqheap_length		= NUM_EQ_ENTRIES * sizeof(ssp->eq[0]);
> +
> +	init_cmd.shdr_table_ht_addr	= 0;
> +	init_cmd.result_block_addr	= 0;
> +	init_cmd.smb_table_addr		= 0;
> +
> +	result = seastar_cmd(ssp, (struct command *)&init_cmd, 1);
> +	if (result != 0) {
> +		dev_err(&ssp->pdev->dev,
> +			"init command failed, result=%d.\n", result);
> +		return -1;
> +	}
> +
> +	eqcb_cmd.op			= COMMAND_INIT_EQCB;
> +	eqcb_cmd.eqcb_index		= 0;
> +	eqcb_cmd.base			= virt_to_fw(ssp, ssp->eq);
> +	eqcb_cmd.count			= NUM_EQ_ENTRIES;
> +
> +	result = seastar_cmd(ssp, (struct command *)&eqcb_cmd, 1);
> +	if (result != 1) {
> +		dev_err(&ssp->pdev->dev,
> +			"init_eqcb command failed, result=%d.\n", result);
> +		return -1;
> +	}
> +
> +	alive_cmd.op			= COMMAND_MARK_ALIVE;
> +	alive_cmd.index			= 1;
> +
> +	result = seastar_cmd(ssp, (struct command *)&alive_cmd, 1);
> +	if (result != 0) {
> +		dev_err(&ssp->pdev->dev,
> +			"mark_alive command failed, result=%d\n", result);
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> diff -uprN -X linux-2.6.32.7-vanilla/Documentation/dontdiff linux-2.6.32.7-vanilla/drivers/net/seastar/firmware.h linux-2.6.32.7/drivers/net/seastar/firmware.h
> --- linux-2.6.32.7-vanilla/drivers/net/seastar/firmware.h	1969-12-31 17:00:00.000000000 -0700
> +++ linux-2.6.32.7/drivers/net/seastar/firmware.h	2010-02-02 09:15:02.000000000 -0700
> @@ -0,0 +1,329 @@

> +
> +#ifndef _SEASTAR_FIRMWARE_H
> +#define _SEASTAR_FIRMWARE_H
> +
> +
> +/**
> + * Number of entries in Host ->  SeaStar command queue.
> + *
> + * WARNING: This must match the definition used by the
> + *          closed-source SeaStar firmware.
> + */
> +#define COMMAND_Q_LENGTH		63
> +
> +
> +/**
> + * Number of entries in SeaStar ->  Host result queue.
> + *
> + * WARNING: This must match the definition used by the
> + *          closed-source SeaStar firmware.
> + */
> +#define RESULT_Q_LENGTH			2
> +
> +
> +/**
> + * SeaStar ->  Host event types.
> + *
> + * WARNING: These must match the definitions used by the
> + *          closed-source SeaStar firmware.
> + */
> +#define EVENT_TX_END			125
> +#define EVENT_RX			126
> +#define EVENT_RX_EMPTY			127
> +
> +
> +/**
> + * Host ->  SeaStar command types.
> + *
> + * WARNING: These must match the definitions used by the
> + *          closed-source SeaStar firmware.
> + */
> +#define COMMAND_INIT			0
> +#define COMMAND_MARK_ALIVE		1
> +#define COMMAND_INIT_EQCB		2
> +#define COMMAND_IP_TX			13
> +
> +
> +/**
> + * Number of entries in the incoming datagram buffer table.
> + *
> + * WARNING: This must match the definition used by the
> + *          closed-source SeaStar firmware.
> + */
> +#define NUM_SKBS			64
> +
> +
> +/**
> + * Size of the pending structure used by the SeaStar firmware.
> + *
> + * WARNING: This must match the definition used by the
> + *          closed-source SeaStar firmware.
> + */
> +#define FW_PENDING_SIZE			32
> +
> +
> +/**
> + * Size of the event queue control block structure used by the SeaStar firmware.
> + *
> + * WARNING: This must match the definition used by the
> + *          closed-source SeaStar firmware.
> + */
> +#define FW_EQCB_SIZE			32
> +
> +
> +/**
> + * SeaStar addresses of important structures in SeaStar memory.
> + *
> + * WARNING: These must match the definitions used by the
> + *          closed-source SeaStar firmware.
> + */
> +#define SEASTAR_SCRATCH_BASE		0xFFFA0000
> +#define SEASTAR_TX_SOURCE		0xFFE00108
> +#define SEASTAR_MAILBOX_BASE		0xFFFA0000
> +#define SEASTAR_SKB_BASE		0xFFFA4000
> +#define SEASTAR_HOST_BASE		0xFFFA5000
> +#define SEASTAR_HTB_BASE		0xFFE20000
> +#define SEASTAR_HTB_BI			0xFFE20048
> +#define SEASTAR_NICCB_BASE		0xFFFFE000
> +
> +
> +/**
> + * Kernel virtual address where the SeaStar memory is mapped.
> + */
> +#define SEASTAR_VIRT_BASE		(0xFFFFFFFFull<<  32)
> +
> +
> +/**
> + * Kernel virtual address of the SeaStar's NIC control block.
> + */
> +static volatile struct niccb * const niccb
> +	= (void *)(SEASTAR_VIRT_BASE + SEASTAR_NICCB_BASE);
> +
> +
> +/**
> + * Kernel virtual address of the SeaStar's HTB_BI register.
> + */
> +static volatile uint32_t * const htb_bi
> +	= (void *)(SEASTAR_VIRT_BASE + SEASTAR_HTB_BI);
> +
> +
> +/**
> + * Kernel virtual address of the SeaStar's HyperTransport map.
> + */
> +static volatile uint32_t * const htb_map
> +	= (void *)(SEASTAR_VIRT_BASE + SEASTAR_HTB_BASE);
> +
> +
> +/**
> + * Kernel virtual address of the Host<->  SeaStar mailbox.
> + */
> +static struct mailbox * const seastar_mailbox
> +	= (void *)(SEASTAR_VIRT_BASE + SEASTAR_MAILBOX_BASE);
> +
> +
> +/**
> + * Kernel virtual address of the incoming datagram buffer table.
> + */
> +static volatile uint64_t * const seastar_skb
> +	= (void *)(SEASTAR_VIRT_BASE + SEASTAR_SKB_BASE);
> +
> +
> +/**
> + * Kernel virtual address of the SeaStar TX Source register.
> + */
> +static volatile uint16_t * const tx_source
> +	= (void *)(SEASTAR_VIRT_BASE + SEASTAR_TX_SOURCE);
> +
> +
> +/**
> + * The SeaStar NIC Control Block.
> + *
> + * WARNING: This must match the definition used by the
> + *          closed-source SeaStar firmware.
> + */
> +struct niccb {
> +	uint32_t	version;			/* 0   */
> +	uint8_t		pad[24];
> +	uint32_t	build_time;			/* 28  */
> +	uint8_t		pad2[68];
> +	uint32_t	ip_tx;				/* 100 */
> +	uint32_t	ip_tx_drop;			/* 104 */
> +	uint32_t	ip_rx;				/* 108 */
> +	uint32_t	ip_rx_drop;			/* 112 */
> +	uint8_t		pad3[52];
> +	uint16_t	local_nid;			/* 168 */
> +} __attribute__((packed, aligned));
> +
> +
> +/**
> + * SeaStar datagram packet wire header.
> + *
> + * WARNING: This must match the definition used by the
> + *          closed-source SeaStar firmware.
> + */
> +struct sshdr {
> +	uint16_t	length;				/* 0 */
> +	uint8_t		lo_macs;			/* 2 */
> +	uint8_t		hdr_type;			/* 3 */
> +} __attribute__((packed));
> +
> +
> +/**
> + * Generic Host ->  SeaStar command structure.
> + *
> + * WARNING: This must match the definition used by the
> + *          closed-source SeaStar firmware.
> + */
> +struct command {
> +	uint8_t		op;				/* 0      */
> +	uint8_t		pad[63];			/* [1,63] */
> +} __attribute__((packed));
> +
> +
> +/**
> + * Initialize firmware command.
> + *
> + * WARNING: This must match the definition used by the
> + *          closed-source SeaStar firmware.
> + */
> +struct command_init {

> +
> +/**
> + * Start firmware running command.
> + *
> + * WARNING: This must match the definition used by the
> + *          closed-source SeaStar firmware.
> + */
> +struct command_mark_alive {
> +	uint8_t		op;				/* 0 */
> +	uint8_t		index;				/* 1 */
> +} __attribute__((packed));
> +
> +
> +/**
> + * Initialize event queue command.
> + *
> + * WARNING: This must match the definition used by the
> + *          closed-source SeaStar firmware.
> + */
> +struct command_init_eqcb {
> +	uint8_t		op;				/* 0 */
> +	uint8_t 	pad;				/* 1 */
> +	uint16_t	eqcb_index;			/* 2 */
> +	uint32_t	base;				/* 4 */
> +	uint32_t	count;				/* 8 */
> +} __attribute__((packed));
> +
> +
> +/**
> + * Send datagram command.
> + *
> + * WARNING: This must match the definition used by the
> + *          closed-source SeaStar firmware.
> + */
> +struct command_ip_tx {
> +	uint8_t		op;				/* 0  */
> +	uint8_t		pad;				/* 1  */
> +	uint16_t	nid;				/* 2  */
> +	uint16_t	length;				/* 4  */
> +	uint16_t	pad2;				/* 6  */
> +	uint64_t	address;			/* 8  */
> +	uint16_t	pending_index;			/* 16 */
> +} __attribute__((packed));
> +
> +
> +/**
> + * Host<->  SeaStar Mailbox structure.
> + *
> + * WARNING: This must match the definition used by the
> + *          closed-source SeaStar firmware.
> + */
> +struct mailbox {

> diff -uprN -X linux-2.6.32.7-vanilla/Documentation/dontdiff linux-2.6.32.7-vanilla/drivers/net/seastar/main.c linux-2.6.32.7/drivers/net/seastar/main.c
> --- linux-2.6.32.7-vanilla/drivers/net/seastar/main.c	1969-12-31 17:00:00.000000000 -0700
> +++ linux-2.6.32.7/drivers/net/seastar/main.c	2010-02-02 12:49:20.000000000 -0700
> @@ -0,0 +1,584 @@

> +
> +static void refill_skb(struct net_device *netdev, int i)
> +{
> +	struct ss_priv *ssp = netdev_priv(netdev);
> +	struct sk_buff *skb;
> +
> +	skb = dev_alloc_skb(netdev->mtu + SKB_PAD);
> +	if (!skb) {
> +		dev_err(&ssp->pdev->dev, "dev_alloc_skb() failed.\n");
> +		return;
> +	}
> +
> +	skb->dev = netdev;
> +	skb_reserve(skb, SKB_PAD);
> +
> +	/* Push it down to the PPC as a quadbyte address */
> +	ssp->skb_table_phys[i] = virt_to_phys(skb->data)>>  2;

space before >>

> +	ssp->skb_table_virt[i] = skb;
> +}
> +
> +
> +static int ss_open(struct net_device *netdev)
> +{
> +	struct ss_priv *ssp = netdev_priv(netdev);
> +	int i;
> +
> +	netif_start_queue(netdev);
> +
> +	for (i = 0; i<  NUM_SKBS; i++) {

space before '<'

> +		ssp->skb_table_phys[i] = 0;
> +		ssp->skb_table_virt[i] = 0;
> +		refill_skb(netdev, i);
> +	}
> +
> +	return 0;
> +}
> +
> +
> +static int eth2ss(struct ss_priv *ssp, struct sk_buff *skb)
> +{
> +	struct ethhdr *ethhdr;
> +	struct sshdr *sshdr;
> +	uint8_t source_lo_mac, dest_lo_mac;
> +	uint32_t qb_len;
> +
> +	/* Read the "low" bytes of the source and destination MAC addresses */
> +	ethhdr = (struct ethhdr *)skb->data;
> +	source_lo_mac = ethhdr->h_source[5];
> +	dest_lo_mac   = ethhdr->h_dest[5];
> +
> +	/* Drop anything not IPv4 */
> +	if (ethhdr->h_proto != ntohs(ETH_P_IP)) {
> +		dev_err(&ssp->pdev->dev, "squashing non-IPv4 packet.");
> +		return -1;
> +	}
> +
> +	/* Squash broadcast packets, SeaStar doesn't support broadcast */
> +	if (dest_lo_mac == 0xFF) {
> +		dev_err(&ssp->pdev->dev, "squashing broadcast packet.");
> +		return -1;
> +	}
> +
> +	/* We only support 4 bits of virtual hosts per physical node */
> +	if ((source_lo_mac&  ~0xF) || (dest_lo_mac&  ~0xF)) {

space before '&' (2x)

> +		dev_err(&ssp->pdev->dev, "lo_mac out of range.");
> +		return -1;
> +	}
> +
> +	/* Move ahead to allow sshdr to be filled in overtop of the ethhdr */
> +	sshdr = (struct sshdr *)
> +		skb_pull(skb, (unsigned int)(ETH_HLEN - sizeof(struct sshdr)));
> +
> +	/* The length in quad bytes, rounded up to the nearest quad byte.
> +	 * SS header is already counted in skb->len as per skb_pull() above */

Is that second line supposed to explain the "- 1" below?

> +	qb_len = (ROUNDUP4(skb->len)>>  2) - 1;

space before >>

> +
> +	/* Build the SeaStar header */
> +	sshdr->length   = qb_len;
> +	sshdr->lo_macs  = (source_lo_mac<<  4) | dest_lo_mac;
> +	sshdr->hdr_type = (2<<  5); /* Datagram 2, type 0 == IP */

	                  (2 << 5); ...

> +
> +	return 0;
> +}
> +

> +
> +
> +static int ss_tx(struct sk_buff *skb, struct net_device *netdev)
> +{
> +	unsigned long flags;
> +	struct ss_priv *ssp = netdev_priv(netdev);
> +	struct ethhdr *eh = (struct ethhdr *)skb->data;
> +	struct sshdr *sshdr;
> +	uint32_t dest_nid = ntohl(*(uint32_t *)eh->h_dest);
> +	struct pending *pending = NULL;
> +	void *msg;
> +
> +	spin_lock_irqsave(&ssp->lock, flags);
> +
> +	if (netif_queue_stopped(netdev)) {
> +		spin_unlock_irqrestore(&ssp->lock, flags);
> +		return NETDEV_TX_BUSY;
> +	}
> +
> +	/* Convert the SKB from an ethernet frame to a seastar frame */
> +	if (eth2ss(ssp, skb)) {
> +		netdev->stats.tx_errors++;
> +		goto drop;
> +	}
> +
> +	sshdr = (struct sshdr *)skb->data;
> +
> +	/* Get a tx_pending so that we can track the completion of this SKB */
> +	pending = alloc_tx_pending(ssp);
> +	if (!pending) {
> +		netif_stop_queue(netdev);
> +		spin_unlock_irqrestore(&ssp->lock, flags);
> +		return NETDEV_TX_BUSY;
> +	}
> +
> +	/* Stash skb away in the pending, will be needed in ss_tx_end() */
> +	pending->skb = skb;
> +
> +	/* Make sure buffer we pass to SeaStar is quad-byte aligned */
> +	if (((unsigned long)skb->data&  0x3) == 0) {
> +		pending->bounce = NULL;
> +		msg = skb->data;
> +	} else {
> +		/* Need to use bounce buffer to get quad-byte alignment */
> +		pending->bounce = kmalloc(skb->len, GFP_KERNEL);
> +		if (!pending->bounce) {
> +			dev_err(&ssp->pdev->dev, "dev_alloc_skb() failed.\n");
> +			goto drop;
> +		}
> +		memcpy(pending->bounce, skb->data, skb->len);
> +		msg = pending->bounce;
> +	}
> +
> +	seastar_ip_tx_cmd(
> +		ssp,
> +		dest_nid,
> +		sshdr->length,
> +		virt_to_phys(msg)>>  2,

		            (msg) >> 2,

> +		pending_to_index(ssp, pending)
> +	);
> +
> +	netdev->stats.tx_packets++;
> +	netdev->stats.tx_bytes += skb->len;
> +
> +	spin_unlock_irqrestore(&ssp->lock, flags);
> +	return 0;
> +
> +drop:
> +	dev_kfree_skb_any(skb);
> +	if (pending)
> +		free_tx_pending(ssp, pending);
> +	spin_unlock_irqrestore(&ssp->lock, flags);
> +	return 0;
> +}
> +

> +
> +
> +static irqreturn_t ss_interrupt(int irq, void *dev)
> +{
> +	struct net_device *netdev = (struct net_device *)dev;
> +	struct ss_priv *ssp = netdev_priv(netdev);
> +	uint32_t ev;
> +	unsigned int type, index;
> +
> +	while (1) {
> +		ev = next_event(ssp);
> +		if (!ev)
> +			break;

There is usually some condition that limits how long a while (1) can continue.
Does this environment not need that?

> +
> +		type  = (ev>>  16)&  0xFFFF;
> +		index = (ev>>   0)&  0xFFFF;
> +
> +		switch (type) {
> +
> +		case EVENT_TX_END:
> +			ss_tx_end(netdev, index);
> +			break;
> +
> +		case EVENT_RX:
> +			ss_rx(netdev, index);
> +			break;
> +
> +		case EVENT_RX_EMPTY:
> +			ss_rx_refill(netdev);
> +			break;
> +
> +		default:
> +			dev_err(&ssp->pdev->dev,
> +				"unknown event type (type=%u, index=%u).\n",
> +				type, index);
> +		}
> +	}
> +
> +	return IRQ_HANDLED;
> +}
> +

> +
> +static int __devinit ss_probe(struct pci_dev *pdev,
> +			      const struct pci_device_id *id)
> +{
> +	struct net_device *netdev;
> +	struct ss_priv *ssp;
> +	int i, irq, err = 0;
> +
> +	err = pci_enable_device(pdev);
> +	if (err != 0) {
> +		dev_err(&pdev->dev, "Could not enable PCI device.\n");
> +		return -ENODEV;
> +	}
> +
> +	netdev = alloc_etherdev(sizeof(*ssp));
> +	if (netdev == NULL) {
> +		dev_err(&pdev->dev, "Could not allocate ethernet device.\n");
> +		return -ENOMEM;
> +	}
> +
> +	SET_NETDEV_DEV(netdev,&pdev->dev);
> +
> +	strcpy(netdev->name, "ss");
> +	netdev->netdev_ops	=&ss_netdev_ops;
> +	netdev->header_ops	=&ss_header_ops;

				= &ss...

> +	netdev->mtu		= 16000;
> +	netdev->flags		= IFF_NOARP;
> +
> +	/* Setup private state */
> +	ssp = netdev_priv(netdev);
> +	memset(ssp, 0, sizeof(*ssp));
> +
> +	spin_lock_init(&ssp->lock);
> +	ssp->skb_table_phys	= seastar_skb;
> +	ssp->eq_read		= 0;
> +	ssp->pdev		= pdev;
> +
> +	/* Build the TX pending free list */
> +	ssp->tx_pending_free_list = 0;
> +	for (i = 0; i<  NUM_TX_PENDINGS; i++)

	          ; i < NUM_TX...

> +		free_tx_pending(ssp, index_to_pending(ssp, i));
> +
> +	irq = __ht_create_irq(pdev, 0, ss_ht_irq_update);
> +	if (irq<  0) {

	if (irq < 0) {

> +		dev_err(&pdev->dev, "__ht_create_irq() failed, err=%d.\n", err);
> +		goto err_out;
> +	}
> +
> +	err = request_irq(irq, ss_interrupt, IRQF_NOBALANCING,
> +			  "seastar", netdev);
> +	if (err != 0) {
> +		dev_err(&pdev->dev, "request_irq() failed, err=%d.\n", err);
> +		goto err_out;
> +	}
> +
> +	err = seastar_hw_init(netdev_priv(netdev));
> +	if (err != 0) {
> +		dev_err(&pdev->dev, "seastar_hw_init() failed, err=%d.\n", err);
> +		goto err_out;
> +	}
> +
> +	err = register_netdev(netdev);
> +	if (err != 0) {
> +		dev_err(&pdev->dev, "register_netdev() failed, err=%d.\n", err);
> +		goto err_out;
> +	}
> +
> +	return 0;
> +
> +err_out:
> +	free_netdev(netdev);
> +	return err;
> +}
> +
> +
> +static void __devexit ss_remove(struct pci_dev *pdev)
> +{
> +	struct net_device *netdev = pci_get_drvdata(pdev);
> +
> +	unregister_netdev(netdev);
> +	free_netdev(netdev);
> +	pci_disable_device(pdev);
> +}
> +
> +
> +#define PCI_VENDOR_ID_CRAY		0x17DB
> +#define PCI_DEVICE_ID_SEASTAR		0x0101
> +
> +
> +static struct pci_device_id ss_pci_tbl[] = {
> +	{PCI_DEVICE(PCI_VENDOR_ID_CRAY, PCI_DEVICE_ID_SEASTAR)},
> +	{0},
> +};
> +
> +

> diff -uprN -X linux-2.6.32.7-vanilla/Documentation/dontdiff linux-2.6.32.7-vanilla/drivers/net/seastar/seastar.h linux-2.6.32.7/drivers/net/seastar/seastar.h
> --- linux-2.6.32.7-vanilla/drivers/net/seastar/seastar.h	1969-12-31 17:00:00.000000000 -0700
> +++ linux-2.6.32.7/drivers/net/seastar/seastar.h	2010-02-02 09:14:33.000000000 -0700
> @@ -0,0 +1,104 @@
> +
> +#ifndef _SEASTAR_H
> +#define _SEASTAR_H
> +
> +
> +/**
> + * Rounds up to the nearest quadbyte.
> + */
> +#define ROUNDUP4(val)		((val + (4-1))&  ~(4-1))
> +
> +
> +/**
> + * SeaStar datagram packet maximum transfer unit size in bytes.
> + */
> +#define SEASTAR_MTU		8192
> +
> +
> +/**
> + * Number of transmit and receive pending structures.
> + */
> +#define NUM_TX_PENDINGS		64
> +#define NUM_RX_PENDINGS		64
> +#define NUM_PENDINGS		(NUM_TX_PENDINGS + NUM_RX_PENDINGS)
> +
> +
> +/**
> + * Number of entries in the SeaStar ->  Host event queue.
> + */
> +#define NUM_EQ_ENTRIES		1024
> +
> +
> +/**
> + * When allocating an SKB, allocate this many bytes extra.
> + */
> +#define SKB_PAD			(16 - sizeof(struct sshdr))
> +
> +
> +/**
> + * Pending structure.
> + * One of these is used to track each in progress transmit.
> + */
> +struct pending {
> +	struct sk_buff		*skb;
> +	struct pending		*next;
> +	void			*bounce;
> +};
> +
> +
> +/**
> + * SeaStar driver private data.
> + */
Ben Hutchings Feb. 2, 2010, 10:12 p.m. UTC | #3
On Tue, 2010-02-02 at 13:59 -0800, Randy Dunlap wrote:
> On 02/02/10 12:58, Kevin Pedretti wrote:
[...]
> > +static void seastar_map_host_region(struct ss_priv *ssp, const void *addr)
> > +{
> > +	/* Round addr to the nearest 128 MB */
> > +	unsigned long raw_paddr = __pa(addr);
> > +	unsigned long paddr = raw_paddr&  ~((1<<  28) - 1);
> > +
> > +	htb_map[8] = 0x8000 | ((paddr>>  28) + 0);
> > +	htb_map[9] = 0x8000 | ((paddr>>  28) + 1);
> 
> space before <<, &, and >>
[...]

The spacing looked correct here.

Ben.
stephen hemminger Feb. 2, 2010, 11:38 p.m. UTC | #4
On Tue, 2 Feb 2010 13:58:45 -0700
"Kevin Pedretti" <ktpedre@sandia.gov> wrote:

> +
> +static const struct net_device_ops ss_netdev_ops = {
> +	.ndo_open		= ss_open,
> +	.ndo_start_xmit		= ss_tx,
> +	.ndo_set_mac_address	= eth_mac_addr,
> +};

You should have a validate_address as well.
> +
> +
> +static const struct header_ops ss_header_ops = {
> +	.create			= ss_header_create,
> +};
> +
> +
> +static void ss_ht_irq_update(struct pci_dev *dev, int irq,
> +			     struct ht_irq_msg *msg)
> +{
> +	seastar_setup_htb_bi(msg->address_lo);
> +}
> +
> +
> +static int __devinit ss_probe(struct pci_dev *pdev,
> +			      const struct pci_device_id *id)
> +{
> +	struct net_device *netdev;
> +	struct ss_priv *ssp;
> +	int i, irq, err = 0;
> +
> +	err = pci_enable_device(pdev);
> +	if (err != 0) {
> +		dev_err(&pdev->dev, "Could not enable PCI device.\n");
> +		return -ENODEV;
> +	}
> +
> +	netdev = alloc_etherdev(sizeof(*ssp));
> +	if (netdev == NULL) {
> +		dev_err(&pdev->dev, "Could not allocate ethernet device.\n");
> +		return -ENOMEM;
> +	}

You may want to use alloc_netdev() since this starts to look
like a non-ethernet device.
> +
> +	SET_NETDEV_DEV(netdev, &pdev->dev);
> +
> +	strcpy(netdev->name, "ss");
> +	netdev->netdev_ops	= &ss_netdev_ops;
> +	netdev->header_ops	= &ss_header_ops;
> +	netdev->mtu		= 16000;
> +	netdev->flags		= IFF_NOARP;
> +
> +	/* Setup private state */
> +	ssp = netdev_priv(netdev);
> +	memset(ssp, 0, sizeof(*ssp));

memset is unnecessary, since alloc_netdev/alloc_etherdev zero that area
already.
Kevin Pedretti Feb. 3, 2010, 1:24 a.m. UTC | #5
Thank you all for the review comments.  I believe most of the issues
have been addressed in the patch just posted.  I apologize if there are
still issues, and certainly appreciate further comments.

David Miller's comments:
1. Use u32, u16, etc.  -> Done.
2. Bad code formating -> Fixed, I believe. Went through everything.
3. Call netif_start_queue() after hw init -> Done.
4. Device only supports IPv4? -> Yes, that's correct. No IPv6 support.
The driver squashes everything but IPv4 in eth2ss().
5. No need for suspend/resume NOPs -> Done.  functions removed.

Randy Dunlap's comments:
1. Remove /** comments -> Done.
2. Odd spacing -> I'm not seeing this. Spacing looks correct to me.
3. Limit while (1) loops somehow -> Done. 
4. Limit while (1) in intr handler -> In practice we've never seen more
than a few packets processed per interrupt.

Ben Hutching's comments:
1. Spacing looks correct -> Thanks. 

Stephen Hemminger's comments:
1. Add ndo_validate_address -> Done
2. May want to use alloc_netdev() -> Didn't do this. Would there be a
substantial advantage to doing this?
3. memset() unnecessary -> removed

Kevin


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Feb. 3, 2010, 1:40 a.m. UTC | #6
From: "Kevin Pedretti" <ktpedre@sandia.gov>
Date: Tue, 2 Feb 2010 18:24:02 -0700

> 4. Device only supports IPv4? -> Yes, that's correct. No IPv6 support.
> The driver squashes everything but IPv4 in eth2ss().

Not just IPV6, what about other ethernet protocols?

What about ARP?  How does IPV4 work if you only accept ETH_P_IP?  You
need to accept at least ETH_P_ARP for things to work.

> 2. May want to use alloc_netdev() -> Didn't do this. Would there be a
> substantial advantage to doing this?

I think you're going to end up having to make this change.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Feb. 3, 2010, 1:41 a.m. UTC | #7
From: David Miller <davem@davemloft.net>
Date: Tue, 02 Feb 2010 17:40:45 -0800 (PST)

> From: "Kevin Pedretti" <ktpedre@sandia.gov>
> Date: Tue, 2 Feb 2010 18:24:02 -0700
> 
>> 2. May want to use alloc_netdev() -> Didn't do this. Would there be a
>> substantial advantage to doing this?
> 
> I think you're going to end up having to make this change.

Ignore this, using alloc_etherdev() should be just fine.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Randy.Dunlap Feb. 3, 2010, 4:12 p.m. UTC | #8
On 02/02/10 17:24, Kevin Pedretti wrote:
> Thank you all for the review comments.  I believe most of the issues
> have been addressed in the patch just posted.  I apologize if there are
> still issues, and certainly appreciate further comments.
>
>
> Randy Dunlap's comments:
> 2. Odd spacing ->  I'm not seeing this. Spacing looks correct to me.

Thanks.  Seems to be something that Thunderbird 3.0 is doing for me.  :(
Kevin Pedretti Feb. 4, 2010, 11:01 p.m. UTC | #9
On Tue, 2010-02-02 at 18:40 -0700, David Miller wrote:
> From: "Kevin Pedretti" <ktpedre@sandia.gov>
> Date: Tue, 2 Feb 2010 18:24:02 -0700
> 
> > 4. Device only supports IPv4? -> Yes, that's correct. No IPv6 support.
> > The driver squashes everything but IPv4 in eth2ss().
> 
> Not just IPV6, what about other ethernet protocols?
> 
> What about ARP?  How does IPV4 work if you only accept ETH_P_IP?  You
> need to accept at least ETH_P_ARP for things to work.


The only thing the driver supports currently is point-to-point IPv4,
nothing else.  The limitation is that the header format for datagram
messages is fixed, and it isn't really setup for Ethernet encapsulation:

Ethernet Frame:     [6 bytes h_dest][6 bytes h_source][2 bytes h_proto][data...]
SeaStar DG Message: [2 bytes length][1 byte MBZ][1 byte msg type (2 << 5) for IP)][data...]

I think it would be possible to re-factor it so that the Ethernet frame
is encapsulated in its entirety within a seastar message, rather than
the current scheme of jamming the critical info from the Ethernet header
into the seastar datagram header.  I will pursue that if you want... the
drawback is that it would break compatibility with Cray's existing
proprietary IP over SeaStar driver, making this driver pretty much
useless for the kinds of things us and others would like to do (e.g.,
leave service nodes booted with Cray's proprietary software stack and
talking to compute nodes running this driver).

As far as ARP goes, it isn't supported since the underlying network is
point-to-point only with no hardware broadcast.  At bootup, each node's
ARP table is pre-populated with entries for every node in the system,
and each node's MAC address encodes its node ID on the mesh.  This
driver uses the NID in the target MAC address to know who to send the
skb to.

I think it would be possible to emulate ARP in software using
point-to-point messages (send this packet to my 6 nearest neighbors,
neighbors send to their neighbors, etc.) but that would be quite a bit
more complicated compared to the static ARP table solution.  Again, it
would also break compatibility with Cray's proprietary driver.

Please let me know if these issues are show-stoppers as far as inclusion
goes.  We would like to get this open-source driver into the kernel so
us and others with Cray XT systems can start to benefit from it, and
continue to automatically track kernel API changes. 

Kevin


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Feb. 4, 2010, 11:13 p.m. UTC | #10
From: "Kevin Pedretti" <ktpedre@sandia.gov>
Date: Thu, 4 Feb 2010 16:01:04 -0700

> Please let me know if these issues are show-stoppers as far as inclusion
> goes.  We would like to get this open-source driver into the kernel so
> us and others with Cray XT systems can start to benefit from it, and
> continue to automatically track kernel API changes. 

I may let it in as-is, thanks for the explanations.

Could you at least consider perhaps that since it isn't
really ethernet, not pretending that it is may be something
to look at?
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alan Cox Feb. 4, 2010, 11:16 p.m. UTC | #11
> Ethernet Frame:     [6 bytes h_dest][6 bytes h_source][2 bytes h_proto][data...]
> SeaStar DG Message: [2 bytes length][1 byte MBZ][1 byte msg type (2 << 5) for IP)][data...]
> 
> I think it would be possible to re-factor it so that the Ethernet frame
> is encapsulated in its entirety within a seastar message, rather than
> the current scheme of jamming the critical info from the Ethernet header
> into the seastar datagram header.  I will pursue that if you want... the
> drawback is that it would break compatibility with Cray's existing
> proprietary IP over SeaStar driver, making this driver pretty much
> useless for the kinds of things us and others would like to do (e.g.,
> leave service nodes booted with Cray's proprietary software stack and
> talking to compute nodes running this driver).

Perhaps it shouldn't be pretending to be an ethernet driver - that is
sort of the root cause of all the confusion and the fact things like the
bridging layer will try and grab it etc ? If it claimed to be a new
hardware type you'd take a brief hit on getting the new hw type into the
tools but it would mean

- tcpdump etc once coaxed would display seastar frames not fake ethernet
- the config tools would actually report what it really was
- non IP layers and userspace won't keep trying to do things you don't
  want (what does it do right now with vlans I wonder 8))
- there will be no ARP confusion

If it wants to stay compatible and pretend to be ethernet you probably
need a message type for "encapsulated ethernet", you can then encapsulate
anything not IP and stay compatible by keeping IP sent the way it is
now ? Thats if it wants to in the first place.

Alan
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kevin Pedretti Feb. 5, 2010, 12:36 a.m. UTC | #12
On Thu, 2010-02-04 at 16:13 -0700, David Miller wrote:
> From: "Kevin Pedretti" <ktpedre@sandia.gov>
> Date: Thu, 4 Feb 2010 16:01:04 -0700
> 
> > Please let me know if these issues are show-stoppers as far as inclusion
> > goes.  We would like to get this open-source driver into the kernel so
> > us and others with Cray XT systems can start to benefit from it, and
> > continue to automatically track kernel API changes. 
> 
> I may let it in as-is, thanks for the explanations.
> 
> Could you at least consider perhaps that since it isn't
> really ethernet, not pretending that it is may be something
> to look at?

Sure, I'm open to doing this and would appreciate any example drivers
you could point me at.  We do need to keep the capability though of
talking to Cray's existing proprietary "ethernet" over seastar driver,
which the existing driver does.  If we go this route of being a
non-Ethernet device, do you think it would still be possible to support
this?

Kevin


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kevin Pedretti Feb. 5, 2010, 12:59 a.m. UTC | #13
On Thu, 2010-02-04 at 16:16 -0700, Alan Cox wrote:
> - tcpdump etc once coaxed would display seastar frames not fake ethernet
> - the config tools would actually report what it really was
> - non IP layers and userspace won't keep trying to do things you don't
>   want (what does it do right now with vlans I wonder 8))
> - there will be no ARP confusion
> 
> If it wants to stay compatible and pretend to be ethernet you probably
> need a message type for "encapsulated ethernet", you can then encapsulate
> anything not IP and stay compatible by keeping IP sent the way it is
> now ? Thats if it wants to in the first place.

I agree the current situation is rather confusing and a lot or most of
the standard tools will break.  I have used tcpdump with some success,
but I'm sure there's lots of stuff broken.

We do need to stay compatible with the existing proprietary driver.  The
usage model we're after is leaving all of the service nodes (nodes users
login to, serve I/O, etc.) booted with the existing proprietary software
and rebooting the compute nodes (99% of the nodes) with a modern Linux
kernel running this open-source seastar driver.  The compute nodes need
to be able to communicate with each other and with the service nodes
using IP (point-to-point).  Users login to the service nodes and then,
for example, can ssh to compute nodes, scp files to compute nodes,
etc.  

Probably critical background: each service node and each compute node
has a seastar NIC that directly connects to nearest neighbors, forming a
3-D torus.  The only way for the nodes to communicate is via this
network. 

I like the idea of encapsulating whole ethernet frames in a new message
type.  But won't the lack of broadcast (and multi-cast) still be an
issue for most protocols?  I also don't have much control over the
seastar header, but can probably find an ignored bit somewhere to mark
the new message type.

Kevin


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Feb. 5, 2010, 1:04 a.m. UTC | #14
From: "Kevin Pedretti" <ktpedre@sandia.gov>
Date: Thu, 4 Feb 2010 17:36:54 -0700

> Sure, I'm open to doing this and would appreciate any example drivers
> you could point me at.

I can't think of any sorry.

> We do need to keep the capability though of talking to Cray's
> existing proprietary "ethernet" over seastar driver, which the
> existing driver does.  If we go this route of being a non-Ethernet
> device, do you think it would still be possible to support this?

I don't think it will break communication with Cray's stuff.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff -uprN -X linux-2.6.32.7-vanilla/Documentation/dontdiff linux-2.6.32.7-vanilla/drivers/net/Kconfig linux-2.6.32.7/drivers/net/Kconfig
--- linux-2.6.32.7-vanilla/drivers/net/Kconfig	2010-02-02 09:10:55.000000000 -0700
+++ linux-2.6.32.7/drivers/net/Kconfig	2010-02-02 09:12:04.000000000 -0700
@@ -2760,6 +2760,17 @@  config QLGE
 	  To compile this driver as a module, choose M here: the module
 	  will be called qlge.
 
+config SEASTAR
+	tristate "Cray XT SeaStar Ethernet driver"
+	depends on PCI
+	depends on HT_IRQ
+	---help---
+	  This driver supports the Cray XT SeaStar network interface in
+	  Ethernet mode.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called seastar.
+
 source "drivers/net/sfc/Kconfig"
 
 source "drivers/net/benet/Kconfig"
diff -uprN -X linux-2.6.32.7-vanilla/Documentation/dontdiff linux-2.6.32.7-vanilla/drivers/net/Makefile linux-2.6.32.7/drivers/net/Makefile
--- linux-2.6.32.7-vanilla/drivers/net/Makefile	2010-02-02 09:10:55.000000000 -0700
+++ linux-2.6.32.7/drivers/net/Makefile	2010-02-02 09:12:04.000000000 -0700
@@ -149,6 +149,7 @@  obj-$(CONFIG_XILINX_LL_TEMAC) += ll_tema
 obj-$(CONFIG_XILINX_EMACLITE) += xilinx_emaclite.o
 obj-$(CONFIG_QLA3XXX) += qla3xxx.o
 obj-$(CONFIG_QLGE) += qlge/
+obj-$(CONFIG_SEASTAR) += seastar/
 
 obj-$(CONFIG_PPP) += ppp_generic.o
 obj-$(CONFIG_PPP_ASYNC) += ppp_async.o
diff -uprN -X linux-2.6.32.7-vanilla/Documentation/dontdiff linux-2.6.32.7-vanilla/drivers/net/seastar/firmware.c linux-2.6.32.7/drivers/net/seastar/firmware.c
--- linux-2.6.32.7-vanilla/drivers/net/seastar/firmware.c	1969-12-31 17:00:00.000000000 -0700
+++ linux-2.6.32.7/drivers/net/seastar/firmware.c	2010-02-02 09:13:44.000000000 -0700
@@ -0,0 +1,236 @@ 
+/*******************************************************************************
+    SeaStar NIC Linux Driver
+
+    Copyright 2009-2010 Sandia Corporation. Under the terms of Contract
+    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government
+    retains certain rights in this software.
+
+    Copyright (c) 2009-2010 Cray Inc.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    Contact Information:
+    Kevin Pedretti <ktpedre@sandia.gov>
+    Sandia National Laboratories
+    P.O. Box 5800
+    Albuquerque, NM 87185-1319
+
+*******************************************************************************/
+
+#include <linux/netdevice.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include "firmware.h"
+#include "seastar.h"
+
+
+/**
+ * Maps a region of host memory into the SeaStar.
+ */
+static void seastar_map_host_region(struct ss_priv *ssp, const void *addr)
+{
+	/* Round addr to the nearest 128 MB */
+	unsigned long raw_paddr = __pa(addr);
+	unsigned long paddr = raw_paddr & ~((1 << 28) - 1);
+
+	htb_map[8] = 0x8000 | ((paddr >> 28) + 0);
+	htb_map[9] = 0x8000 | ((paddr >> 28) + 1);
+
+	ssp->host_region_phys = paddr;
+}
+
+
+/**
+ * Converts a kernel virtual address to a SeaStar address.
+ */
+static uint32_t virt_to_fw(struct ss_priv *ssp, void *addr)
+{
+	unsigned long saddr;
+
+	saddr = __pa(addr) - ssp->host_region_phys;
+	saddr &= (2 << 28) - 1;
+	saddr += (8 << 28);
+
+	return saddr;
+}
+
+
+/**
+ * Send a command to the Seastar.
+ */
+static uint32_t seastar_cmd(struct ss_priv *ssp, const struct command *cmd,
+			    int wait_for_result)
+{
+	struct mailbox *mbox = ssp->mailbox;
+	unsigned int next_write;
+	uint32_t tail, result;
+
+	/* Copy the command into the mailbox */
+	mbox->commandq[ssp->mailbox_cached_write] = *cmd;
+	next_write = ssp->mailbox_cached_write + 1;
+	if (next_write == COMMAND_Q_LENGTH)
+		next_write = 0;
+
+	/* Wait until it is safe to advance the write pointer */
+	while (next_write == ssp->mailbox_cached_read)
+		ssp->mailbox_cached_read = mbox->commandq_read;
+
+	/* Advance the write pointer */
+	mbox->commandq_write       = next_write;
+	ssp->mailbox_cached_write = next_write;
+
+	if (!wait_for_result)
+		return 0;
+
+	/* Wait for the result to arrive */
+	tail = mbox->resultq_read;
+	while (tail == mbox->resultq_write)
+		;
+
+	/* Read the result */
+	result = mbox->resultq[tail];
+	mbox->resultq_read = (tail >= RESULT_Q_LENGTH - 1) ? 0 : tail + 1;
+
+	return result;
+}
+
+
+/**
+ * Sends a datagram transmit command to the SeaStar.
+ */
+void seastar_ip_tx_cmd(struct ss_priv *ssp, uint16_t nid, uint16_t length,
+		       uint64_t address, uint16_t pending_index)
+{
+	struct command_ip_tx tx_cmd = {
+		.op		= COMMAND_IP_TX,
+		.nid		= nid,
+		.length		= length,
+		.address	= address,
+		.pending_index	= pending_index,
+	};
+
+	seastar_cmd(ssp, (struct command *) &tx_cmd, 0);
+}
+
+
+/**
+ * Programs the SeaStar's HTB_BI register.
+ */
+void seastar_setup_htb_bi(uint32_t idr)
+{
+	/* Mask the APIC dest setup by Linux, causes problems with SeaStar */
+	idr &= 0xFFFF0000;
+
+	*htb_bi = 0xFD000000 | (idr >> 8);
+}
+
+
+/**
+ * Brings up the low-level Seastar hardware.
+ */
+int seastar_hw_init(struct ss_priv *ssp)
+{
+	uint32_t lower_memory = SEASTAR_HOST_BASE;
+	const int num_eq = 1;
+	uint32_t lower_pending;
+	uint32_t lower_eqcb;
+	uint32_t result;
+	struct command_init init_cmd;
+	struct command_init_eqcb eqcb_cmd;
+	struct command_mark_alive alive_cmd;
+
+	/* Read our NID from SeaStar and write it to the NIC control block */
+	niccb->local_nid = *tx_source;
+
+	printk(KERN_INFO "%s: nid %d (0x%x) version %x built %x\n",
+		__func__,
+		niccb->local_nid,
+		niccb->local_nid,
+		niccb->version,
+		niccb->build_time
+	);
+
+	/* Allocate the PPC memory */
+	lower_pending = lower_memory;
+	lower_memory += NUM_PENDINGS * FW_PENDING_SIZE;
+
+	lower_eqcb = lower_memory;
+	lower_memory = num_eq * FW_EQCB_SIZE;
+
+	/* Initialize the HTB map so that the Seastar can see our memory.
+	 * Since we are only doing upper pendings, we just use the
+	 * upper_pending_phys instead of the host_phys area. */
+	seastar_map_host_region(ssp, ssp);
+
+	ssp->mailbox			= &seastar_mailbox[0];
+	ssp->mailbox_cached_read	= ssp->mailbox->commandq_read;
+	ssp->mailbox_cached_write	= ssp->mailbox->commandq_write;
+
+	/* Attempt to send a setup command to the NIC */
+	init_cmd.op			= COMMAND_INIT;
+	init_cmd.process_index		= 1;
+	init_cmd.uid			= 0;
+	init_cmd.jid			= 0;
+
+	init_cmd.num_pendings		= NUM_PENDINGS;
+	init_cmd.pending_tx_limit	= NUM_TX_PENDINGS;
+	init_cmd.pending_table_addr	= lower_pending;
+	init_cmd.up_pending_table_addr	= virt_to_fw(ssp, ssp->pending_table);
+	init_cmd.up_pending_table_ht_addr = 0;
+
+	init_cmd.num_memds		= 0;
+	init_cmd.memd_table_addr	= 0;
+
+	init_cmd.num_eqcbs		= num_eq;
+	init_cmd.eqcb_table_addr	= lower_eqcb;
+	init_cmd.eqheap_addr		= virt_to_fw(ssp, ssp->eq);
+	init_cmd.eqheap_length		= NUM_EQ_ENTRIES * sizeof(ssp->eq[0]);
+
+	init_cmd.shdr_table_ht_addr	= 0;
+	init_cmd.result_block_addr	= 0;
+	init_cmd.smb_table_addr		= 0;
+
+	result = seastar_cmd(ssp, (struct command *) &init_cmd, 1);
+	if (result != 0) {
+		dev_err(&ssp->pdev->dev,
+			"init command failed, result=%d.\n", result);
+		return -1;
+	}
+
+	eqcb_cmd.op			= COMMAND_INIT_EQCB;
+	eqcb_cmd.eqcb_index		= 0;
+	eqcb_cmd.base			= virt_to_fw(ssp, ssp->eq);
+	eqcb_cmd.count			= NUM_EQ_ENTRIES;
+
+	result = seastar_cmd(ssp, (struct command *) &eqcb_cmd, 1);
+	if (result != 1) {
+		dev_err(&ssp->pdev->dev,
+			"init_eqcb command failed, result=%d.\n", result);
+		return -1;
+	}
+
+	alive_cmd.op			= COMMAND_MARK_ALIVE;
+	alive_cmd.index			= 1;
+
+	result = seastar_cmd(ssp, (struct command *) &alive_cmd, 1);
+	if (result != 0) {
+		dev_err(&ssp->pdev->dev,
+			"mark_alive command failed, result=%d\n", result);
+		return -1;
+	}
+
+	return 0;
+}
diff -uprN -X linux-2.6.32.7-vanilla/Documentation/dontdiff linux-2.6.32.7-vanilla/drivers/net/seastar/firmware.h linux-2.6.32.7/drivers/net/seastar/firmware.h
--- linux-2.6.32.7-vanilla/drivers/net/seastar/firmware.h	1969-12-31 17:00:00.000000000 -0700
+++ linux-2.6.32.7/drivers/net/seastar/firmware.h	2010-02-02 09:15:02.000000000 -0700
@@ -0,0 +1,329 @@ 
+/*******************************************************************************
+    SeaStar NIC Linux Driver
+
+    Copyright 2009-2010 Sandia Corporation. Under the terms of Contract
+    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government
+    retains certain rights in this software.
+
+    Copyright (c) 2009-2010 Cray Inc.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    Contact Information:
+    Kevin Pedretti <ktpedre@sandia.gov>
+    Sandia National Laboratories
+    P.O. Box 5800
+    Albuquerque, NM 87185-1319
+
+*******************************************************************************/
+
+#ifndef _SEASTAR_FIRMWARE_H
+#define _SEASTAR_FIRMWARE_H
+
+
+/**
+ * Number of entries in Host -> SeaStar command queue.
+ *
+ * WARNING: This must match the definition used by the
+ *          closed-source SeaStar firmware.
+ */
+#define COMMAND_Q_LENGTH		63
+
+
+/**
+ * Number of entries in SeaStar -> Host result queue.
+ *
+ * WARNING: This must match the definition used by the
+ *          closed-source SeaStar firmware.
+ */
+#define RESULT_Q_LENGTH			2
+
+
+/**
+ * SeaStar -> Host event types.
+ *
+ * WARNING: These must match the definitions used by the
+ *          closed-source SeaStar firmware.
+ */
+#define EVENT_TX_END			125
+#define EVENT_RX			126
+#define EVENT_RX_EMPTY			127
+
+
+/**
+ * Host -> SeaStar command types.
+ *
+ * WARNING: These must match the definitions used by the
+ *          closed-source SeaStar firmware.
+ */
+#define COMMAND_INIT			0
+#define COMMAND_MARK_ALIVE		1
+#define COMMAND_INIT_EQCB		2
+#define COMMAND_IP_TX			13
+
+
+/**
+ * Number of entries in the incoming datagram buffer table.
+ *
+ * WARNING: This must match the definition used by the
+ *          closed-source SeaStar firmware.
+ */
+#define NUM_SKBS			64
+
+
+/**
+ * Size of the pending structure used by the SeaStar firmware.
+ *
+ * WARNING: This must match the definition used by the
+ *          closed-source SeaStar firmware.
+ */
+#define FW_PENDING_SIZE			32
+
+
+/**
+ * Size of the event queue control block structure used by the SeaStar firmware.
+ *
+ * WARNING: This must match the definition used by the
+ *          closed-source SeaStar firmware.
+ */
+#define FW_EQCB_SIZE			32
+
+
+/**
+ * SeaStar addresses of important structures in SeaStar memory.
+ *
+ * WARNING: These must match the definitions used by the
+ *          closed-source SeaStar firmware.
+ */
+#define SEASTAR_SCRATCH_BASE		0xFFFA0000
+#define SEASTAR_TX_SOURCE		0xFFE00108
+#define SEASTAR_MAILBOX_BASE		0xFFFA0000
+#define SEASTAR_SKB_BASE		0xFFFA4000
+#define SEASTAR_HOST_BASE		0xFFFA5000
+#define SEASTAR_HTB_BASE		0xFFE20000
+#define SEASTAR_HTB_BI			0xFFE20048
+#define SEASTAR_NICCB_BASE		0xFFFFE000
+
+
+/**
+ * Kernel virtual address where the SeaStar memory is mapped.
+ */
+#define SEASTAR_VIRT_BASE		(0xFFFFFFFFull << 32)
+
+
+/**
+ * Kernel virtual address of the SeaStar's NIC control block.
+ */
+static volatile struct niccb * const niccb
+	= (void *)(SEASTAR_VIRT_BASE + SEASTAR_NICCB_BASE);
+
+
+/**
+ * Kernel virtual address of the SeaStar's HTB_BI register.
+ */
+static volatile uint32_t * const htb_bi
+	= (void *)(SEASTAR_VIRT_BASE + SEASTAR_HTB_BI);
+
+
+/**
+ * Kernel virtual address of the SeaStar's HyperTransport map.
+ */
+static volatile uint32_t * const htb_map
+	= (void *)(SEASTAR_VIRT_BASE + SEASTAR_HTB_BASE);
+
+
+/**
+ * Kernel virtual address of the Host <-> SeaStar mailbox.
+ */
+static struct mailbox * const seastar_mailbox
+	= (void *)(SEASTAR_VIRT_BASE + SEASTAR_MAILBOX_BASE);
+
+
+/**
+ * Kernel virtual address of the incoming datagram buffer table.
+ */
+static volatile uint64_t * const seastar_skb
+	= (void *)(SEASTAR_VIRT_BASE + SEASTAR_SKB_BASE);
+
+
+/**
+ * Kernel virtual address of the SeaStar TX Source register.
+ */
+static volatile uint16_t * const tx_source
+	= (void *)(SEASTAR_VIRT_BASE + SEASTAR_TX_SOURCE);
+
+
+/**
+ * The SeaStar NIC Control Block.
+ *
+ * WARNING: This must match the definition used by the
+ *          closed-source SeaStar firmware.
+ */
+struct niccb {
+	uint32_t	version;			/* 0   */
+	uint8_t		pad[24];
+	uint32_t	build_time;			/* 28  */
+	uint8_t		pad2[68];
+	uint32_t	ip_tx;				/* 100 */
+	uint32_t	ip_tx_drop;			/* 104 */
+	uint32_t	ip_rx;				/* 108 */
+	uint32_t	ip_rx_drop;			/* 112 */
+	uint8_t		pad3[52];
+	uint16_t	local_nid;			/* 168 */
+} __attribute__((packed, aligned));
+
+
+/**
+ * SeaStar datagram packet wire header.
+ *
+ * WARNING: This must match the definition used by the
+ *          closed-source SeaStar firmware.
+ */
+struct sshdr {
+	uint16_t	length;				/* 0 */
+	uint8_t		lo_macs;			/* 2 */
+	uint8_t		hdr_type;			/* 3 */
+} __attribute__((packed));
+
+
+/**
+ * Generic Host -> SeaStar command structure.
+ *
+ * WARNING: This must match the definition used by the
+ *          closed-source SeaStar firmware.
+ */
+struct command {
+	uint8_t		op;				/* 0      */
+	uint8_t		pad[63];			/* [1,63] */
+} __attribute__((packed));
+
+
+/**
+ * Initialize firmware command.
+ *
+ * WARNING: This must match the definition used by the
+ *          closed-source SeaStar firmware.
+ */
+struct command_init {
+	uint8_t		op;				/* 0  */
+	uint8_t		process_index;			/* 1  */
+	uint16_t	pad;				/* 2  */
+	uint16_t	pid;				/* 4  */
+	uint16_t	jid;				/* 6  */
+	uint16_t	num_pendings;			/* 8  */
+	uint16_t	num_memds;			/* 10 */
+	uint16_t	num_eqcbs;			/* 12 */
+	uint16_t	pending_tx_limit;		/* 14 */
+	uint32_t	pending_table_addr;		/* 16 */
+	uint32_t	up_pending_table_addr;		/* 20 */
+	uint32_t	up_pending_table_ht_addr;	/* 24 */
+	uint32_t	memd_table_addr;		/* 28 */
+	uint32_t	eqcb_table_addr;		/* 32 */
+	uint32_t	shdr_table_ht_addr;		/* 36 */
+	uint32_t	result_block_addr;		/* 40 */
+	uint32_t	eqheap_addr;			/* 44 */
+	uint32_t	eqheap_length;			/* 48 */
+	uint32_t	smb_table_addr;			/* 52 */
+	uint32_t	uid;				/* 56 */
+} __attribute__((packed));
+
+
+/**
+ * Start firmware running command.
+ *
+ * WARNING: This must match the definition used by the
+ *          closed-source SeaStar firmware.
+ */
+struct command_mark_alive {
+	uint8_t		op;				/* 0 */
+	uint8_t		index;				/* 1 */
+} __attribute__((packed));
+
+
+/**
+ * Initialize event queue command.
+ *
+ * WARNING: This must match the definition used by the
+ *          closed-source SeaStar firmware.
+ */
+struct command_init_eqcb {
+	uint8_t		op;				/* 0 */
+	uint8_t 	pad;				/* 1 */
+	uint16_t	eqcb_index;			/* 2 */
+	uint32_t	base;				/* 4 */
+	uint32_t	count;				/* 8 */
+} __attribute__((packed));
+
+
+/**
+ * Send datagram command.
+ *
+ * WARNING: This must match the definition used by the
+ *          closed-source SeaStar firmware.
+ */
+struct command_ip_tx {
+	uint8_t		op;				/* 0  */
+	uint8_t		pad;				/* 1  */
+	uint16_t	nid;				/* 2  */
+	uint16_t	length;				/* 4  */
+	uint16_t	pad2;				/* 6  */
+	uint64_t	address;			/* 8  */
+	uint16_t	pending_index;			/* 16 */
+} __attribute__((packed));
+
+
+/**
+ * Host <-> SeaStar Mailbox structure.
+ *
+ * WARNING: This must match the definition used by the
+ *          closed-source SeaStar firmware.
+ */
+struct mailbox {
+	volatile struct command		commandq[COMMAND_Q_LENGTH]; /* 0    */
+	volatile uint32_t		resultq[RESULT_Q_LENGTH];   /* 4032 */
+
+	volatile uint32_t		resultq_read;		    /* 4040 */
+	volatile uint32_t		resultq_write;		    /* 4044 */
+	volatile uint32_t		commandq_write;		    /* 4048 */
+	volatile uint32_t		commandq_read;		    /* 4052 */
+} __attribute__((packed, aligned(PAGE_SIZE)));
+
+
+struct ss_priv;
+
+
+extern void
+seastar_ip_tx_cmd(
+	struct ss_priv		*ssp,
+	uint16_t		nid,
+	uint16_t		length,
+	uint64_t		address,
+	uint16_t		pending_index
+);
+
+
+void
+seastar_setup_htb_bi(
+	uint32_t		idr
+);
+
+
+extern int
+seastar_hw_init(
+	struct ss_priv		*ssp
+);
+
+
+#endif
diff -uprN -X linux-2.6.32.7-vanilla/Documentation/dontdiff linux-2.6.32.7-vanilla/drivers/net/seastar/main.c linux-2.6.32.7/drivers/net/seastar/main.c
--- linux-2.6.32.7-vanilla/drivers/net/seastar/main.c	1969-12-31 17:00:00.000000000 -0700
+++ linux-2.6.32.7/drivers/net/seastar/main.c	2010-02-02 12:49:20.000000000 -0700
@@ -0,0 +1,584 @@ 
+/*******************************************************************************
+    SeaStar NIC Linux Driver
+
+    Copyright 2009-2010 Sandia Corporation. Under the terms of Contract
+    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government
+    retains certain rights in this software.
+
+    Copyright (c) 2009-2010 Cray Inc.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    Contact Information:
+    Kevin Pedretti <ktpedre@sandia.gov>
+    Sandia National Laboratories
+    P.O. Box 5800
+    Albuquerque, NM 87185-1319
+
+*******************************************************************************/
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <linux/htirq.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <net/arp.h>
+#include "firmware.h"
+#include "seastar.h"
+
+
+#define SEASTAR_VERSION_STR "1.0"
+
+
+MODULE_DESCRIPTION("Cray SeaStar Native IP driver");
+MODULE_AUTHOR("Maintainer: Kevin Pedretti <ktpedre@sandia.gov>");
+MODULE_VERSION(SEASTAR_VERSION_STR);
+MODULE_LICENSE("GPL");
+
+
+static struct pending *alloc_tx_pending(struct ss_priv *ssp)
+{
+	struct pending *pending = ssp->tx_pending_free_list;
+	if (!pending)
+		return NULL;
+
+	ssp->tx_pending_free_list = pending->next;
+	pending->next = 0;
+
+	return pending;
+}
+
+
+static void free_tx_pending(struct ss_priv *ssp, struct pending *pending)
+{
+	pending->next             = ssp->tx_pending_free_list;
+	ssp->tx_pending_free_list = pending;
+}
+
+
+static uint16_t pending_to_index(struct ss_priv *ssp, struct pending *pending)
+{
+	return pending - ssp->pending_table;
+}
+
+
+static struct pending *index_to_pending(struct ss_priv *ssp, unsigned int index)
+{
+	return &ssp->pending_table[index];
+}
+
+
+static void refill_skb(struct net_device *netdev, int i)
+{
+	struct ss_priv *ssp = netdev_priv(netdev);
+	struct sk_buff *skb;
+
+	skb = dev_alloc_skb(netdev->mtu + SKB_PAD);
+	if (!skb) {
+		dev_err(&ssp->pdev->dev, "dev_alloc_skb() failed.\n");
+		return;
+	}
+
+	skb->dev = netdev;
+	skb_reserve(skb, SKB_PAD);
+
+	/* Push it down to the PPC as a quadbyte address */
+	ssp->skb_table_phys[i] = virt_to_phys(skb->data) >> 2;
+	ssp->skb_table_virt[i] = skb;
+}
+
+
+static int ss_open(struct net_device *netdev)
+{
+	struct ss_priv *ssp = netdev_priv(netdev);
+	int i;
+
+	netif_start_queue(netdev);
+
+	for (i = 0; i < NUM_SKBS; i++) {
+		ssp->skb_table_phys[i] = 0;
+		ssp->skb_table_virt[i] = 0;
+		refill_skb(netdev, i);
+	}
+
+	return 0;
+}
+
+
+static int eth2ss(struct ss_priv *ssp, struct sk_buff *skb)
+{
+	struct ethhdr *ethhdr;
+	struct sshdr *sshdr;
+	uint8_t source_lo_mac, dest_lo_mac;
+	uint32_t qb_len;
+
+	/* Read the "low" bytes of the source and destination MAC addresses */
+	ethhdr = (struct ethhdr *)skb->data;
+	source_lo_mac = ethhdr->h_source[5];
+	dest_lo_mac   = ethhdr->h_dest[5];
+
+	/* Drop anything not IPv4 */
+	if (ethhdr->h_proto != ntohs(ETH_P_IP)) {
+		dev_err(&ssp->pdev->dev, "squashing non-IPv4 packet.");
+		return -1;
+	}
+
+	/* Squash broadcast packets, SeaStar doesn't support broadcast */
+	if (dest_lo_mac == 0xFF) {
+		dev_err(&ssp->pdev->dev, "squashing broadcast packet.");
+		return -1;
+	}
+
+	/* We only support 4 bits of virtual hosts per physical node */
+	if ((source_lo_mac & ~0xF) || (dest_lo_mac & ~0xF)) {
+		dev_err(&ssp->pdev->dev, "lo_mac out of range.");
+		return -1;
+	}
+
+	/* Move ahead to allow sshdr to be filled in overtop of the ethhdr */
+	sshdr = (struct sshdr *)
+		skb_pull(skb, (unsigned int)(ETH_HLEN - sizeof(struct sshdr)));
+
+	/* The length in quad bytes, rounded up to the nearest quad byte.
+	 * SS header is already counted in skb->len as per skb_pull() above */
+	qb_len = (ROUNDUP4(skb->len) >> 2) - 1;
+
+	/* Build the SeaStar header */
+	sshdr->length   = qb_len;
+	sshdr->lo_macs  = (source_lo_mac << 4) | dest_lo_mac;
+	sshdr->hdr_type = (2 << 5); /* Datagram 2, type 0 == IP */
+
+	return 0;
+}
+
+
+static int ss2eth(struct sk_buff *skb)
+{
+	struct sshdr *sshdr;
+	struct ethhdr *ethhdr;
+	uint8_t source_lo_mac, dest_lo_mac;
+
+	/* Read the "low" bytes of the source and destination MAC addresses */
+	sshdr = (struct sshdr *)skb->data;
+	source_lo_mac = (sshdr->lo_macs >> 4);
+	dest_lo_mac    = sshdr->lo_macs & 0xF;
+
+	/* Make room for the rest of the ethernet header and zero it */
+	ethhdr = (struct ethhdr *)
+	     skb_push(skb, (unsigned int)(ETH_HLEN - sizeof(struct sshdr)));
+	memset(ethhdr, 0x00, ETH_HLEN);
+
+	/* h_proto and h_dest[] are available.  Just 0xff h_source[2-5] */
+	ethhdr->h_proto = htons(ETH_P_IP);
+
+	/* We're assuming the source MAC is the same as the local
+	 * host's MAC in order to support loopback in promiscous mode */
+	memcpy(&ethhdr->h_source, &skb->dev->dev_addr, ETH_ALEN);
+	memcpy(&ethhdr->h_dest, &skb->dev->dev_addr, ETH_ALEN);
+	ethhdr->h_source[5] = source_lo_mac;
+	ethhdr->h_dest[5]   = dest_lo_mac;
+
+	return 0;
+}
+
+
+static int ss_tx(struct sk_buff *skb, struct net_device *netdev)
+{
+	unsigned long flags;
+	struct ss_priv *ssp = netdev_priv(netdev);
+	struct ethhdr *eh = (struct ethhdr *)skb->data;
+	struct sshdr *sshdr;
+	uint32_t dest_nid = ntohl(*(uint32_t *)eh->h_dest);
+	struct pending *pending = NULL;
+	void *msg;
+
+	spin_lock_irqsave(&ssp->lock, flags);
+
+	if (netif_queue_stopped(netdev)) {
+		spin_unlock_irqrestore(&ssp->lock, flags);
+		return NETDEV_TX_BUSY;
+	}
+
+	/* Convert the SKB from an ethernet frame to a seastar frame */
+	if (eth2ss(ssp, skb)) {
+		netdev->stats.tx_errors++;
+		goto drop;
+	}
+
+	sshdr = (struct sshdr *)skb->data;
+
+	/* Get a tx_pending so that we can track the completion of this SKB */
+	pending = alloc_tx_pending(ssp);
+	if (!pending) {
+		netif_stop_queue(netdev);
+		spin_unlock_irqrestore(&ssp->lock, flags);
+		return NETDEV_TX_BUSY;
+	}
+
+	/* Stash skb away in the pending, will be needed in ss_tx_end() */
+	pending->skb = skb;
+
+	/* Make sure buffer we pass to SeaStar is quad-byte aligned */
+	if (((unsigned long)skb->data & 0x3) == 0) {
+		pending->bounce = NULL;
+		msg = skb->data;
+	} else {
+		/* Need to use bounce buffer to get quad-byte alignment */
+		pending->bounce = kmalloc(skb->len, GFP_KERNEL);
+		if (!pending->bounce) {
+			dev_err(&ssp->pdev->dev, "dev_alloc_skb() failed.\n");
+			goto drop;
+		}
+		memcpy(pending->bounce, skb->data, skb->len);
+		msg = pending->bounce;
+	}
+
+	seastar_ip_tx_cmd(
+		ssp,
+		dest_nid,
+		sshdr->length,
+		virt_to_phys(msg) >> 2,
+		pending_to_index(ssp, pending)
+	);
+
+	netdev->stats.tx_packets++;
+	netdev->stats.tx_bytes += skb->len;
+
+	spin_unlock_irqrestore(&ssp->lock, flags);
+	return 0;
+
+drop:
+	dev_kfree_skb_any(skb);
+	if (pending)
+		free_tx_pending(ssp, pending);
+	spin_unlock_irqrestore(&ssp->lock, flags);
+	return 0;
+}
+
+
+static void ss_tx_end(struct net_device *netdev, unsigned int pending_index)
+{
+	unsigned long flags;
+	struct ss_priv *ssp = netdev_priv(netdev);
+	struct pending *pending = index_to_pending(ssp, pending_index);
+
+	spin_lock_irqsave(&ssp->lock, flags);
+
+	if (pending->skb)
+		dev_kfree_skb_any(pending->skb);
+
+	kfree(pending->bounce);
+
+	free_tx_pending(ssp, pending);
+
+	if (netif_queue_stopped(netdev))
+		netif_wake_queue(netdev);
+
+	spin_unlock_irqrestore(&ssp->lock, flags);
+}
+
+
+static void ss_rx_skb(struct net_device *netdev, struct sk_buff *skb)
+{
+	struct sshdr *sshdr = (struct sshdr *)skb_tail_pointer(skb);
+
+	const uint32_t qb_len = sshdr->length;
+	const uint32_t len    = (qb_len + 1) << 2;
+
+	skb_put(skb, len);
+	ss2eth(skb);
+
+	skb->protocol  = htons(ETH_P_IP);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+	skb_set_mac_header(skb, 0);
+
+	/* Skip past the ethernet header we just built */
+	skb_pull(skb, ETH_HLEN);
+
+	netdev->stats.rx_packets++;
+	netdev->stats.rx_bytes += len;
+
+	netif_rx(skb);
+}
+
+
+static void ss_rx(struct net_device *netdev, unsigned int skb_index)
+{
+	struct ss_priv *ssp = netdev_priv(netdev);
+	struct sk_buff *skb = ssp->skb_table_virt[skb_index];
+
+	ssp->skb_table_virt[skb_index] = 0;
+	ss_rx_skb(netdev, skb);
+
+	refill_skb(netdev, skb_index);
+}
+
+
+static int ss_header_create(struct sk_buff *skb, struct net_device *netdev,
+			    unsigned short type, const void *daddr,
+			    const void *saddr, unsigned int length)
+{
+	struct ethhdr *eh;
+
+	/* Make room for the ethernet header and zero it */
+	eh = (struct ethhdr *)skb_push(skb, ETH_HLEN);
+	memset(eh, 0, ETH_HLEN);
+
+	/* Although we can only do IPv4, build other packets correctly for
+	 * now and drop it in the ndo_start_xmit hook.  This way the fact that
+	 * these packets are being generated is not invisible. */
+	eh->h_proto = htons(type);
+
+	/* Set the source hardware address */
+	if (!saddr)
+		saddr = netdev->dev_addr;
+	memcpy(eh->h_source, saddr, ETH_ALEN);
+
+	/* Set the destination hardware address */
+	if (daddr) {
+		memcpy(eh->h_dest, daddr, ETH_ALEN);
+		return ETH_HLEN;
+	}
+
+	/* No destination address supplied !?! */
+	return -ETH_HLEN;
+}
+
+
+static uint32_t next_event(struct ss_priv *ssp)
+{
+	uint32_t ev = ssp->eq[ssp->eq_read];
+	if (!ev)
+		return 0;
+
+	ssp->eq[ssp->eq_read] = 0;
+	ssp->eq_read = (ssp->eq_read + 1) % NUM_EQ_ENTRIES;
+
+	return ev;
+}
+
+
+static void ss_rx_refill(struct net_device *netdev)
+{
+	struct ss_priv *ssp = netdev_priv(netdev);
+	int i;
+
+	for (i = 0; i < NUM_SKBS; i++) {
+		if (ssp->skb_table_virt[i] == 0)
+			refill_skb(netdev, i);
+	}
+}
+
+
+static irqreturn_t ss_interrupt(int irq, void *dev)
+{
+	struct net_device *netdev = (struct net_device *)dev;
+	struct ss_priv *ssp = netdev_priv(netdev);
+	uint32_t ev;
+	unsigned int type, index;
+
+	while (1) {
+		ev = next_event(ssp);
+		if (!ev)
+			break;
+
+		type  = (ev >> 16) & 0xFFFF;
+		index = (ev >>  0) & 0xFFFF;
+
+		switch (type) {
+
+		case EVENT_TX_END:
+			ss_tx_end(netdev, index);
+			break;
+
+		case EVENT_RX:
+			ss_rx(netdev, index);
+			break;
+
+		case EVENT_RX_EMPTY:
+			ss_rx_refill(netdev);
+			break;
+
+		default:
+			dev_err(&ssp->pdev->dev,
+				"unknown event type (type=%u, index=%u).\n",
+				type, index);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+
+static const struct net_device_ops ss_netdev_ops = {
+	.ndo_open		= ss_open,
+	.ndo_start_xmit		= ss_tx,
+	.ndo_set_mac_address	= eth_mac_addr,
+};
+
+
+static const struct header_ops ss_header_ops = {
+	.create			= ss_header_create,
+};
+
+
+static void ss_ht_irq_update(struct pci_dev *dev, int irq,
+			     struct ht_irq_msg *msg)
+{
+	seastar_setup_htb_bi(msg->address_lo);
+}
+
+
+static int __devinit ss_probe(struct pci_dev *pdev,
+			      const struct pci_device_id *id)
+{
+	struct net_device *netdev;
+	struct ss_priv *ssp;
+	int i, irq, err = 0;
+
+	err = pci_enable_device(pdev);
+	if (err != 0) {
+		dev_err(&pdev->dev, "Could not enable PCI device.\n");
+		return -ENODEV;
+	}
+
+	netdev = alloc_etherdev(sizeof(*ssp));
+	if (netdev == NULL) {
+		dev_err(&pdev->dev, "Could not allocate ethernet device.\n");
+		return -ENOMEM;
+	}
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+
+	strcpy(netdev->name, "ss");
+	netdev->netdev_ops	= &ss_netdev_ops;
+	netdev->header_ops	= &ss_header_ops;
+	netdev->mtu		= 16000;
+	netdev->flags		= IFF_NOARP;
+
+	/* Setup private state */
+	ssp = netdev_priv(netdev);
+	memset(ssp, 0, sizeof(*ssp));
+
+	spin_lock_init(&ssp->lock);
+	ssp->skb_table_phys	= seastar_skb;
+	ssp->eq_read		= 0;
+	ssp->pdev		= pdev;
+
+	/* Build the TX pending free list */
+	ssp->tx_pending_free_list = 0;
+	for (i = 0; i < NUM_TX_PENDINGS; i++)
+		free_tx_pending(ssp, index_to_pending(ssp, i));
+
+	irq = __ht_create_irq(pdev, 0, ss_ht_irq_update);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "__ht_create_irq() failed, err=%d.\n", err);
+		goto err_out;
+	}
+
+	err = request_irq(irq, ss_interrupt, IRQF_NOBALANCING,
+			  "seastar", netdev);
+	if (err != 0) {
+		dev_err(&pdev->dev, "request_irq() failed, err=%d.\n", err);
+		goto err_out;
+	}
+
+	err = seastar_hw_init(netdev_priv(netdev));
+	if (err != 0) {
+		dev_err(&pdev->dev, "seastar_hw_init() failed, err=%d.\n", err);
+		goto err_out;
+	}
+
+	err = register_netdev(netdev);
+	if (err != 0) {
+		dev_err(&pdev->dev, "register_netdev() failed, err=%d.\n", err);
+		goto err_out;
+	}
+
+	return 0;
+
+err_out:
+	free_netdev(netdev);
+	return err;
+}
+
+
+static void __devexit ss_remove(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+
+	unregister_netdev(netdev);
+	free_netdev(netdev);
+	pci_disable_device(pdev);
+}
+
+
+#define PCI_VENDOR_ID_CRAY		0x17DB
+#define PCI_DEVICE_ID_SEASTAR		0x0101
+
+
+static struct pci_device_id ss_pci_tbl[] = {
+	{PCI_DEVICE(PCI_VENDOR_ID_CRAY, PCI_DEVICE_ID_SEASTAR)},
+	{0},
+};
+
+
+#ifdef CONFIG_PM
+static int ss_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	return -ENOSYS;
+}
+
+
+static int ss_resume(struct pci_dev *pdev)
+{
+	return -ENOSYS;
+}
+#endif
+
+
+static struct pci_driver ss_driver = {
+	.name = "seastar",
+	.probe = ss_probe,
+	.remove = __devexit_p(ss_remove),
+	.id_table = ss_pci_tbl,
+#ifdef CONFIG_PM
+	.suspend = ss_suspend,
+	.resume = ss_resume,
+#endif
+};
+
+
+static __init int ss_init_module(void)
+{
+	printk(KERN_INFO "%s: module loaded (version %s)\n",
+	       ss_driver.name, SEASTAR_VERSION_STR);
+
+	return pci_register_driver(&ss_driver);
+}
+
+
+static __exit void ss_cleanup_module(void)
+{
+	pci_unregister_driver(&ss_driver);
+}
+
+
+module_init(ss_init_module);
+module_exit(ss_cleanup_module);
diff -uprN -X linux-2.6.32.7-vanilla/Documentation/dontdiff linux-2.6.32.7-vanilla/drivers/net/seastar/Makefile linux-2.6.32.7/drivers/net/seastar/Makefile
--- linux-2.6.32.7-vanilla/drivers/net/seastar/Makefile	1969-12-31 17:00:00.000000000 -0700
+++ linux-2.6.32.7/drivers/net/seastar/Makefile	2010-02-02 09:12:04.000000000 -0700
@@ -0,0 +1,3 @@ 
+obj-$(CONFIG_SEASTAR) += seastar.o
+
+seastar-y := main.o firmware.o
diff -uprN -X linux-2.6.32.7-vanilla/Documentation/dontdiff linux-2.6.32.7-vanilla/drivers/net/seastar/seastar.h linux-2.6.32.7/drivers/net/seastar/seastar.h
--- linux-2.6.32.7-vanilla/drivers/net/seastar/seastar.h	1969-12-31 17:00:00.000000000 -0700
+++ linux-2.6.32.7/drivers/net/seastar/seastar.h	2010-02-02 09:14:33.000000000 -0700
@@ -0,0 +1,104 @@ 
+/*******************************************************************************
+    SeaStar NIC Linux Driver
+
+    Copyright 2009-2010 Sandia Corporation. Under the terms of Contract
+    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government
+    retains certain rights in this software.
+
+    Copyright (c) 2009-2010 Cray Inc.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    Contact Information:
+    Kevin Pedretti <ktpedre@sandia.gov>
+    Sandia National Laboratories
+    P.O. Box 5800
+    Albuquerque, NM 87185-1319
+
+*******************************************************************************/
+
+#ifndef _SEASTAR_H
+#define _SEASTAR_H
+
+
+/**
+ * Rounds up to the nearest quadbyte.
+ */
+#define ROUNDUP4(val)		((val + (4-1)) & ~(4-1))
+
+
+/**
+ * SeaStar datagram packet maximum transfer unit size in bytes.
+ */
+#define SEASTAR_MTU		8192
+
+
+/**
+ * Number of transmit and receive pending structures.
+ */
+#define NUM_TX_PENDINGS		64
+#define NUM_RX_PENDINGS		64
+#define NUM_PENDINGS		(NUM_TX_PENDINGS + NUM_RX_PENDINGS)
+
+
+/**
+ * Number of entries in the SeaStar -> Host event queue.
+ */
+#define NUM_EQ_ENTRIES		1024
+
+
+/**
+ * When allocating an SKB, allocate this many bytes extra.
+ */
+#define SKB_PAD			(16 - sizeof(struct sshdr))
+
+
+/**
+ * Pending structure.
+ * One of these is used to track each in progress transmit.
+ */
+struct pending {
+	struct sk_buff		*skb;
+	struct pending		*next;
+	void			*bounce;
+};
+
+
+/**
+ * SeaStar driver private data.
+ */
+struct ss_priv {
+	spinlock_t		lock;
+
+	unsigned long		host_region_phys;
+
+	volatile uint64_t	*skb_table_phys;
+	struct sk_buff		*skb_table_virt[NUM_SKBS];
+
+	struct pending		pending_table[NUM_PENDINGS];
+	struct pending		*tx_pending_free_list;
+
+	uint32_t		eq[NUM_EQ_ENTRIES];
+	unsigned int		eq_read;
+
+	struct mailbox		*mailbox;
+	unsigned int		mailbox_cached_read;
+	unsigned int		mailbox_cached_write;
+
+	struct pci_dev		*pdev;
+};
+
+
+#endif