diff mbox

[Bugme-new,Bug,16626] New: Machine hangs with EIP at skb_copy_and_csum_dev

Message ID 20100823124736.GA16966@ff.dom.local
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Jarek Poplawski Aug. 23, 2010, 12:47 p.m. UTC
On Mon, Aug 23, 2010 at 02:47:23PM +0300, Plamen Petrov wrote:
> ???? 21.8.2010 ??. 11:07, Jarek Poplawski ????????????:
>> On Sat, Aug 21, 2010 at 09:50:58AM +0200, Eric Dumazet wrote:
>>> Le samedi 21 ao??t 2010 ?? 09:47 +0200, Jarek Poplawski a écrit :
>>>> On Fri, Aug 20, 2010 at 09:38:35PM +0200, Jarek Poplawski wrote:
>>>>> Plamen Petrov wrote, On 20.08.2010 12:53:
>>>>>> So, I guess its David and Herbert's turn?...
>>>>>
>>>>> If you're bored in the meantime I'd suggest to do check the realtek
>>>>> driver eg:
>>>>> - for locking with the patch below,
>>>>> - to turn off with ethtool its tx-checksumming and/or scatter-gather,
...
> Yeah, 3 days and counting, right until I decided to try the freshly
> announced 2.6.36-rc2.
>
> So I upgraded the kernel, but left the scripts that turn GRO off for
> the tg3 card still run at system startup. This way the system ran for
> 2 and a half hours, when I decided its time to try turning GRO on.
>
> I first tried to turn GRO on for the tg3 nic, and the system oopsed
> immediately (if the panic screen is necessary - please, ask for it).
>
> After the system came back, I tried turning GRO on for the 2 RealTek
> 8139 nics, too, but ethtool only accepted turning GRO off.
>
> And unfortunately, I can't test if other nics will fail the same way
> as the motherboard integrated tg3 I have does, so for now, this is
> only a tg3 + GRO on problem; I don't have any other hardware to test
> with available.

A little misunderstanding: I was intersted with turning off some
features on realteks to change the packet path from tg3 with gro
to realtek without gro and without tx-checksumming etc.

But maybe you could try the patch below instead (so the patched
kernel, tg3 with gro on, and realteks without any change).

Thanks,
Jarek P.

--- (for debugging only)

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Eric Dumazet Aug. 23, 2010, 1 p.m. UTC | #1
Le lundi 23 août 2010 à 12:47 +0000, Jarek Poplawski a écrit :
> On Mon, Aug 23, 2010 at 02:47:23PM +0300, Plamen Petrov wrote:
> > ???? 21.8.2010 ??. 11:07, Jarek Poplawski ????????????:
> >> On Sat, Aug 21, 2010 at 09:50:58AM +0200, Eric Dumazet wrote:
> >>> Le samedi 21 ao??t 2010 ?? 09:47 +0200, Jarek Poplawski a écrit :
> >>>> On Fri, Aug 20, 2010 at 09:38:35PM +0200, Jarek Poplawski wrote:
> >>>>> Plamen Petrov wrote, On 20.08.2010 12:53:
> >>>>>> So, I guess its David and Herbert's turn?...
> >>>>>
> >>>>> If you're bored in the meantime I'd suggest to do check the realtek
> >>>>> driver eg:
> >>>>> - for locking with the patch below,
> >>>>> - to turn off with ethtool its tx-checksumming and/or scatter-gather,
> ...
> > Yeah, 3 days and counting, right until I decided to try the freshly
> > announced 2.6.36-rc2.
> >
> > So I upgraded the kernel, but left the scripts that turn GRO off for
> > the tg3 card still run at system startup. This way the system ran for
> > 2 and a half hours, when I decided its time to try turning GRO on.
> >
> > I first tried to turn GRO on for the tg3 nic, and the system oopsed
> > immediately (if the panic screen is necessary - please, ask for it).
> >
> > After the system came back, I tried turning GRO on for the 2 RealTek
> > 8139 nics, too, but ethtool only accepted turning GRO off.
> >
> > And unfortunately, I can't test if other nics will fail the same way
> > as the motherboard integrated tg3 I have does, so for now, this is
> > only a tg3 + GRO on problem; I don't have any other hardware to test
> > with available.
> 
> A little misunderstanding: I was intersted with turning off some
> features on realteks to change the packet path from tg3 with gro
> to realtek without gro and without tx-checksumming etc.
> 
> But maybe you could try the patch below instead (so the patched
> kernel, tg3 with gro on, and realteks without any change).
> 
> Thanks,
> Jarek P.
> 
> --- (for debugging only)
> 
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 3721fbb..51823cd 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -1935,6 +1935,23 @@ static inline int skb_needs_linearize(struct sk_buff *skb,
>  					      illegal_highdma(dev, skb))));
>  }
>  
> +static int skb_csum_start_bug(struct sk_buff *skb)
> +{
> +
> +	if (skb->ip_summed == CHECKSUM_PARTIAL) {
> +		long csstart;
> +
> +		csstart = skb->csum_start - skb_headroom(skb);
> +		if (WARN_ON(csstart > skb_headlen(skb))) {
> +			pr_warning("csum_start %d, headroom %d, headlen %d\n",
> +				   skb->csum_start, skb_headroom(skb),
> +				   skb_headlen(skb));

I was about to suggest a similar patch ;)

Also prints skb->csum_offset and skb->len if possible

pr_err("csum_start %u, offset %u, headroom %d, headlen %d, len %d\n",
        skb->csum_start,
	skb->csum_offset,
	skb_headroom(skb),
        skb_headlen(skb),
	skb->len);


> +			return 1;
> +		}
> +	}
> +	return 0;
> +}
> +
>  int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
>  			struct netdev_queue *txq)
>  {
> @@ -1955,11 +1972,13 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
>  		skb_orphan_try(skb);
>  
>  		if (netif_needs_gso(dev, skb)) {
> +			skb_csum_start_bug(skb);
>  			if (unlikely(dev_gso_segment(skb)))
>  				goto out_kfree_skb;
>  			if (skb->next)
>  				goto gso;
>  		} else {
> +			skb_csum_start_bug(skb);
>  			if (skb_needs_linearize(skb, dev) &&
>  			    __skb_linearize(skb))
>  				goto out_kfree_skb;
> @@ -1997,7 +2016,12 @@ gso:
>  		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
>  			skb_dst_drop(nskb);
>  
> -		rc = ops->ndo_start_xmit(nskb, dev);
> +		if (skb_csum_start_bug(skb)) {
> +			kfree_skb(skb);
> +			rc = NETDEV_TX_OK;
> +		} else
> +			rc = ops->ndo_start_xmit(nskb, dev);
> +
>  		if (unlikely(rc != NETDEV_TX_OK)) {
>  			if (rc & ~NETDEV_TX_MASK)
>  				goto out_kfree_gso_skb;


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jarek Poplawski Aug. 23, 2010, 1:10 p.m. UTC | #2
On Mon, Aug 23, 2010 at 03:00:43PM +0200, Eric Dumazet wrote:
...
> I was about to suggest a similar patch ;)
> 
> Also prints skb->csum_offset and skb->len if possible

Feel free to send it: I'm a bit in hurry now...

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/core/dev.c b/net/core/dev.c
index 3721fbb..51823cd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1935,6 +1935,23 @@  static inline int skb_needs_linearize(struct sk_buff *skb,
 					      illegal_highdma(dev, skb))));
 }
 
+static int skb_csum_start_bug(struct sk_buff *skb)
+{
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		long csstart;
+
+		csstart = skb->csum_start - skb_headroom(skb);
+		if (WARN_ON(csstart > skb_headlen(skb))) {
+			pr_warning("csum_start %d, headroom %d, headlen %d\n",
+				   skb->csum_start, skb_headroom(skb),
+				   skb_headlen(skb));
+			return 1;
+		}
+	}
+	return 0;
+}
+
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq)
 {
@@ -1955,11 +1972,13 @@  int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		skb_orphan_try(skb);
 
 		if (netif_needs_gso(dev, skb)) {
+			skb_csum_start_bug(skb);
 			if (unlikely(dev_gso_segment(skb)))
 				goto out_kfree_skb;
 			if (skb->next)
 				goto gso;
 		} else {
+			skb_csum_start_bug(skb);
 			if (skb_needs_linearize(skb, dev) &&
 			    __skb_linearize(skb))
 				goto out_kfree_skb;
@@ -1997,7 +2016,12 @@  gso:
 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
 			skb_dst_drop(nskb);
 
-		rc = ops->ndo_start_xmit(nskb, dev);
+		if (skb_csum_start_bug(skb)) {
+			kfree_skb(skb);
+			rc = NETDEV_TX_OK;
+		} else
+			rc = ops->ndo_start_xmit(nskb, dev);
+
 		if (unlikely(rc != NETDEV_TX_OK)) {
 			if (rc & ~NETDEV_TX_MASK)
 				goto out_kfree_gso_skb;