diff mbox

TCP-MD5 checksum failure on x86_64 SMP

Message ID 1273219222.2261.11.camel@edumazet-laptop
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet May 7, 2010, 8 a.m. UTC
Le vendredi 07 mai 2010 à 07:39 +0200, Eric Dumazet a écrit :
> Le jeudi 06 mai 2010 à 17:25 +0530, Bhaskar Dutta a écrit :
> > On Thu, May 6, 2010 at 12:23 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> 
> > > I am not familiar with this code, but I suspect same per_cpu data can be
> > > used at both time by a sender (process context) and by a receiver
> > > (softirq context).
> > >
> > > To trigger this, you need at least two active md5 sockets.
> > >
> > > tcp_get_md5sig_pool() should probably disable bh to make sure current
> > > cpu wont be preempted by softirq processing
> > >
> > >
> > > Something like :
> > >
> > > diff --git a/include/net/tcp.h b/include/net/tcp.h
> > > index fb5c66b..e232123 100644
> > > --- a/include/net/tcp.h
> > > +++ b/include/net/tcp.h
> > > @@ -1221,12 +1221,15 @@ struct tcp_md5sig_pool          *tcp_get_md5sig_pool(void)
> > >        struct tcp_md5sig_pool *ret = __tcp_get_md5sig_pool(cpu);
> > >        if (!ret)
> > >                put_cpu();
> > > +       else
> > > +               local_bh_disable();
> > >        return ret;
> > >  }
> > >
> > >  static inline void             tcp_put_md5sig_pool(void)
> > >  {
> > >        __tcp_put_md5sig_pool();
> > > +       local_bh_enable();
> > >        put_cpu();
> > >  }
> > >
> > >
> > >
> > 
> > I put in the above change and ran some load tests with around 50
> > active TCP connections doing MD5.
> > I could see only 1 bad packet in 30 min (earlier the problem used to
> > occur instantaneously and repeatedly).
> > 
> 
> 
> > I think there is another possibility of being preempted when calling
> > tcp_alloc_md5sig_pool()
> > this function releases the spinlock when calling __tcp_alloc_md5sig_pool().
> > 
> > I will run some more tests after changing the  tcp_alloc_md5sig_pool
> > and see if the problem is completely resolved.

Here is my official patch submission, could you please test it ?

Thanks

[PATCH] tcp: fix MD5 (RFC2385) support

TCP MD5 support uses percpu data for temporary storage. It currently
disables preemption so that same storage cannot be reclaimed by another
thread on same cpu.

We also have to make sure a softirq handler wont try to use also same
context. Various bug reports demonstrated corruptions.

Fix is to disable preemption and BH.

Reported-by: Bhaskar Dutta <bhaskie@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/net/tcp.h |   21 +++------------------
 net/ipv4/tcp.c    |   34 ++++++++++++++++++++++++----------
 2 files changed, 27 insertions(+), 28 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Bhaskar Dutta May 7, 2010, 8:59 a.m. UTC | #1
On Fri, May 7, 2010 at 1:30 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le vendredi 07 mai 2010 à 07:39 +0200, Eric Dumazet a écrit :
>> Le jeudi 06 mai 2010 à 17:25 +0530, Bhaskar Dutta a écrit :
>> > On Thu, May 6, 2010 at 12:23 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>>
>> > > I am not familiar with this code, but I suspect same per_cpu data can be
>> > > used at both time by a sender (process context) and by a receiver
>> > > (softirq context).
>> > >
>> > > To trigger this, you need at least two active md5 sockets.
>> > >
>> > > tcp_get_md5sig_pool() should probably disable bh to make sure current
>> > > cpu wont be preempted by softirq processing
>> > >
>> > >
>> > > Something like :
>> > >
>> > > diff --git a/include/net/tcp.h b/include/net/tcp.h
>> > > index fb5c66b..e232123 100644
>> > > --- a/include/net/tcp.h
>> > > +++ b/include/net/tcp.h
>> > > @@ -1221,12 +1221,15 @@ struct tcp_md5sig_pool          *tcp_get_md5sig_pool(void)
>> > >        struct tcp_md5sig_pool *ret = __tcp_get_md5sig_pool(cpu);
>> > >        if (!ret)
>> > >                put_cpu();
>> > > +       else
>> > > +               local_bh_disable();
>> > >        return ret;
>> > >  }
>> > >
>> > >  static inline void             tcp_put_md5sig_pool(void)
>> > >  {
>> > >        __tcp_put_md5sig_pool();
>> > > +       local_bh_enable();
>> > >        put_cpu();
>> > >  }
>> > >
>> > >
>> > >
>> >
>> > I put in the above change and ran some load tests with around 50
>> > active TCP connections doing MD5.
>> > I could see only 1 bad packet in 30 min (earlier the problem used to
>> > occur instantaneously and repeatedly).
>> >
>>
>>
>> > I think there is another possibility of being preempted when calling
>> > tcp_alloc_md5sig_pool()
>> > this function releases the spinlock when calling __tcp_alloc_md5sig_pool().
>> >
>> > I will run some more tests after changing the  tcp_alloc_md5sig_pool
>> > and see if the problem is completely resolved.
>
> Here is my official patch submission, could you please test it ?
>


Eric,

Thanks a lot! I will test it out and let you know.
BTW this patch seems to essentially do the same as the earlier fix you
had posted (where you just do bh disable/enable).
Am I missing something?

With the earlier fix, I ran load tests with 80 TCP connections for
over 6 hrs and found 5 bad checksum packets.
So there is still a problem. Without the fix I see a bad packet every
minute or so.

Bhaskar
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet May 7, 2010, 9:37 a.m. UTC | #2
Le vendredi 07 mai 2010 à 14:29 +0530, Bhaskar Dutta a écrit :

> Eric,
> 
> Thanks a lot! I will test it out and let you know.
> BTW this patch seems to essentially do the same as the earlier fix you
> had posted (where you just do bh disable/enable).
> Am I missing something?
> 
> With the earlier fix, I ran load tests with 80 TCP connections for
> over 6 hrs and found 5 bad checksum packets.
> So there is still a problem. Without the fix I see a bad packet every
> minute or so.

My second patch is cleaner, using only out of line code (inline was not
necessary and made include file bigger than necessary). Inline is fine
if we can avoid a function call, but it was not the case.

If you notice another corruption, it may be because of another problem,
yet to be discovered.

To you have a userland suite to test/stress tcp md5 connections ?



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bhaskar Dutta May 7, 2010, 10:50 a.m. UTC | #3
On Fri, May 7, 2010 at 3:07 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le vendredi 07 mai 2010 à 14:29 +0530, Bhaskar Dutta a écrit :
>
>> Eric,
>>
>> Thanks a lot! I will test it out and let you know.
>> BTW this patch seems to essentially do the same as the earlier fix you
>> had posted (where you just do bh disable/enable).
>> Am I missing something?
>>
>> With the earlier fix, I ran load tests with 80 TCP connections for
>> over 6 hrs and found 5 bad checksum packets.
>> So there is still a problem. Without the fix I see a bad packet every
>> minute or so.
>
> My second patch is cleaner, using only out of line code (inline was not
> necessary and made include file bigger than necessary). Inline is fine
> if we can avoid a function call, but it was not the case.
>
> If you notice another corruption, it may be because of another problem,
> yet to be discovered.
>
> To you have a userland suite to test/stress tcp md5 connections ?
>
>

We are still trying to find the other corruption.

I will send you the tarball of a userland client/server suite to
stress test the TCP-MD5 that we've been using to reproduce the issue.

Thanks,
Bhaskar
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet May 7, 2010, 3:18 p.m. UTC | #4
OK, I found the second problem.

if/when IP route cache is invalidated, ip_queue_xmit() has to refetch a
route and calls sk_setup_caps(sk, &rt->u.dst), destroying the 

sk->sk_route_caps &= ~NETIF_F_GSO_MASK

that MD5 desesperatly try to make all over its way (from
tcp_transmit_skb() for example)

So we send few bad packets, and everything is fine when
tcp_transmit_skb() is called again.

You get many errors on remote peer if you do

ip route flush cache



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller May 16, 2010, 7:35 a.m. UTC | #5
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 07 May 2010 10:00:22 +0200

> [PATCH] tcp: fix MD5 (RFC2385) support
> 
> TCP MD5 support uses percpu data for temporary storage. It currently
> disables preemption so that same storage cannot be reclaimed by another
> thread on same cpu.
> 
> We also have to make sure a softirq handler wont try to use also same
> context. Various bug reports demonstrated corruptions.
> 
> Fix is to disable preemption and BH.
> 
> Reported-by: Bhaskar Dutta <bhaskie@gmail.com>
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Applied.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 75be5a2..aa04b9a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1197,30 +1197,15 @@  extern int			tcp_v4_md5_do_del(struct sock *sk,
 extern struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *);
 extern void			tcp_free_md5sig_pool(void);
 
-extern struct tcp_md5sig_pool	*__tcp_get_md5sig_pool(int cpu);
-extern void			__tcp_put_md5sig_pool(void);
+extern struct tcp_md5sig_pool	*tcp_get_md5sig_pool(void);
+extern void			tcp_put_md5sig_pool(void);
+
 extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, struct tcphdr *);
 extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, struct sk_buff *,
 				 unsigned header_len);
 extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
 			    struct tcp_md5sig_key *key);
 
-static inline
-struct tcp_md5sig_pool		*tcp_get_md5sig_pool(void)
-{
-	int cpu = get_cpu();
-	struct tcp_md5sig_pool *ret = __tcp_get_md5sig_pool(cpu);
-	if (!ret)
-		put_cpu();
-	return ret;
-}
-
-static inline void		tcp_put_md5sig_pool(void)
-{
-	__tcp_put_md5sig_pool();
-	put_cpu();
-}
-
 /* write queue abstraction */
 static inline void tcp_write_queue_purge(struct sock *sk)
 {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0f8caf6..296150b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2839,7 +2839,6 @@  static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool)
 			if (p->md5_desc.tfm)
 				crypto_free_hash(p->md5_desc.tfm);
 			kfree(p);
-			p = NULL;
 		}
 	}
 	free_percpu(pool);
@@ -2937,25 +2936,40 @@  retry:
 
 EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
 
-struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu)
+
+/**
+ *	tcp_get_md5sig_pool - get md5sig_pool for this user
+ *
+ *	We use percpu structure, so if we succeed, we exit with preemption
+ *	and BH disabled, to make sure another thread or softirq handling
+ *	wont try to get same context.
+ */
+struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
 {
 	struct tcp_md5sig_pool * __percpu *p;
-	spin_lock_bh(&tcp_md5sig_pool_lock);
+
+	local_bh_disable();
+
+	spin_lock(&tcp_md5sig_pool_lock);
 	p = tcp_md5sig_pool;
 	if (p)
 		tcp_md5sig_users++;
-	spin_unlock_bh(&tcp_md5sig_pool_lock);
-	return (p ? *per_cpu_ptr(p, cpu) : NULL);
-}
+	spin_unlock(&tcp_md5sig_pool_lock);
+
+	if (p)
+		return *per_cpu_ptr(p, smp_processor_id());
 
-EXPORT_SYMBOL(__tcp_get_md5sig_pool);
+	local_bh_enable();
+	return NULL;
+}
+EXPORT_SYMBOL(tcp_get_md5sig_pool);
 
-void __tcp_put_md5sig_pool(void)
+void tcp_put_md5sig_pool(void)
 {
+	local_bh_enable();
 	tcp_free_md5sig_pool();
 }
-
-EXPORT_SYMBOL(__tcp_put_md5sig_pool);
+EXPORT_SYMBOL(tcp_put_md5sig_pool);
 
 int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
 			struct tcphdr *th)