Patchwork sky2 panic in 2.6.32.1 under load (new oops)

login
register
mail settings
Submitter stephen hemminger
Date Dec. 29, 2009, 7:49 p.m.
Message ID <20091229114938.1f4d1c5c@nehalam>
Download mbox | patch
Permalink /patch/41889/
State RFC
Delegated to: David Miller
Headers show

Comments

stephen hemminger - Dec. 29, 2009, 7:49 p.m.
This patch will dump more info in case of hardware error.
Michael Breuer - Dec. 29, 2009, 8:41 p.m.
The error is coming from sky2_err_intr, not sky2_hw_intr:

sky2 0000:06:00.0: error interrupt status=0x40000008
sky2 0000:06:00.0: error interrupt status=0x8

Also, fwiw, when running with vt-d disabled, I don't get the 
dhcpinform/ack messages, but continue to get the interrupt status 
messages. That said, I am not certain the the dhcpinform/ack is related 
at  to the sky2 issue.
On 12/29/2009 2:49 PM, Stephen Hemminger wrote:
> This patch will dump more info in case of hardware error.
>
> --- a/drivers/net/sky2.c	2009-12-29 11:30:06.441137685 -0800
> +++ b/drivers/net/sky2.c	2009-12-29 11:46:37.389262922 -0800
> @@ -2629,12 +2625,71 @@ static void sky2_hw_error(struct sky2_hw
>   	}
>   }
>
> +static void dump_txring(struct sky2_hw *hw, unsigned port)
> +{
> +	struct sky2_port *sky2 = netdev_priv(hw->dev[port]);
> +	int sop = 1;
> +	unsigned idx;
> +
> +	printk(KERN_INFO PFX "Tx ring pending=%u...%u report=%d done=%d\n",
> +		   sky2->tx_cons, sky2->tx_prod,
> +		   sky2_read16(hw, port == 0 ? STAT_TXA1_RIDX : STAT_TXA2_RIDX),
> +		   sky2_read16(hw, Q_ADDR(txqaddr[port], Q_DONE)));
> +
> +	/* Dump contents of tx ring */
> +	for (idx = sky2->tx_next;
> +	     idx != sky2->tx_prod&&  idx<  sky2->tx_ring_size;
> +	     idx = RING_NEXT(idx, sky2->tx_ring_size)) {
> +		const struct sky2_tx_le *le = sky2->tx_le + idx;
> +		u32 a = le32_to_cpu(le->addr);
> +
> +		if (sop)
> +			printk(KERN_DEBUG "%u:", idx);
> +		sop = 0;
> +
> +		switch(le->opcode&  ~HW_OWNER) {
> +		case OP_ADDR64:
> +			printk(" %#x:", a);
> +			break;
> +		case OP_LRGLEN:
> +			printk(" mtu=%d", a);
> +			break;
> +		case OP_VLAN:
> +			printk(" vlan=%d", be16_to_cpu(le->length));
> +			break;
> +		case OP_TCPLISW:
> +			printk(" csum=%#x", a);
> +			break;
> +		case OP_LARGESEND:
> +			printk(" tso=%#x(%d)", a, le16_to_cpu(le->length));
> +			break;
> +		case OP_PACKET:
> +			printk(" %#x(%d)", a, le16_to_cpu(le->length));
> +			break;
> +		case OP_BUFFER:
> +			printk(" frag=%#x(%d)", a, le16_to_cpu(le->length));
> +			break;
> +		default:
> +			printk(" op=%#x,%#x(%d)", le->opcode, a,
> +			       le16_to_cpu(le->length));
> +		}
> +
> +		if (le->ctrl&  EOP) {
> +			printk("\n");
> +			sop = 1;
> +		}
> +	}
> +}
> +
>   static void sky2_hw_intr(struct sky2_hw *hw)
>   {
>   	struct pci_dev *pdev = hw->pdev;
>   	u32 status = sky2_read32(hw, B0_HWE_ISRC);
>   	u32 hwmsk = sky2_read32(hw, B0_HWE_IMSK);
>
> +	printk(KERN_DEBUG PFX "hardware interrupt status %#x\n", status);
> +	dump_txring(hw, 0);
> +
>   	status&= hwmsk;
>
>   	if (status&  Y2_IS_TIST_OV)
>
>    

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

--- a/drivers/net/sky2.c	2009-12-29 11:30:06.441137685 -0800
+++ b/drivers/net/sky2.c	2009-12-29 11:46:37.389262922 -0800
@@ -2629,12 +2625,71 @@  static void sky2_hw_error(struct sky2_hw
 	}
 }
 
+static void dump_txring(struct sky2_hw *hw, unsigned port)
+{
+	struct sky2_port *sky2 = netdev_priv(hw->dev[port]);
+	int sop = 1;
+	unsigned idx;
+
+	printk(KERN_INFO PFX "Tx ring pending=%u...%u report=%d done=%d\n",
+		   sky2->tx_cons, sky2->tx_prod,
+		   sky2_read16(hw, port == 0 ? STAT_TXA1_RIDX : STAT_TXA2_RIDX),
+		   sky2_read16(hw, Q_ADDR(txqaddr[port], Q_DONE)));
+
+	/* Dump contents of tx ring */
+	for (idx = sky2->tx_next;
+	     idx != sky2->tx_prod && idx < sky2->tx_ring_size;
+	     idx = RING_NEXT(idx, sky2->tx_ring_size)) {
+		const struct sky2_tx_le *le = sky2->tx_le + idx;
+		u32 a = le32_to_cpu(le->addr);
+
+		if (sop)
+			printk(KERN_DEBUG "%u:", idx);
+		sop = 0;
+
+		switch(le->opcode & ~HW_OWNER) {
+		case OP_ADDR64:
+			printk(" %#x:", a);
+			break;
+		case OP_LRGLEN:
+			printk(" mtu=%d", a);
+			break;
+		case OP_VLAN:
+			printk(" vlan=%d", be16_to_cpu(le->length));
+			break;
+		case OP_TCPLISW:
+			printk(" csum=%#x", a);
+			break;
+		case OP_LARGESEND:
+			printk(" tso=%#x(%d)", a, le16_to_cpu(le->length));
+			break;
+		case OP_PACKET:
+			printk(" %#x(%d)", a, le16_to_cpu(le->length));
+			break;
+		case OP_BUFFER:
+			printk(" frag=%#x(%d)", a, le16_to_cpu(le->length));
+			break;
+		default:
+			printk(" op=%#x,%#x(%d)", le->opcode, a,
+			       le16_to_cpu(le->length));
+		}
+
+		if (le->ctrl & EOP) {
+			printk("\n");
+			sop = 1;
+		}
+	}
+}
+
 static void sky2_hw_intr(struct sky2_hw *hw)
 {
 	struct pci_dev *pdev = hw->pdev;
 	u32 status = sky2_read32(hw, B0_HWE_ISRC);
 	u32 hwmsk = sky2_read32(hw, B0_HWE_IMSK);
 
+	printk(KERN_DEBUG PFX "hardware interrupt status %#x\n", status);
+	dump_txring(hw, 0);
+
 	status &= hwmsk;
 
 	if (status & Y2_IS_TIST_OV)