diff mbox

TCP-MD5 checksum failure on x86_64 SMP

Message ID 1273267137.2325.31.camel@edumazet-laptop
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet May 7, 2010, 9:18 p.m. UTC
Le vendredi 07 mai 2010 à 17:44 +0200, Eric Dumazet a écrit :
> Le vendredi 07 mai 2010 à 17:18 +0200, Eric Dumazet a écrit :
> > OK, I found the second problem.
> > 
> > if/when IP route cache is invalidated, ip_queue_xmit() has to refetch a
> > route and calls sk_setup_caps(sk, &rt->u.dst), destroying the 
> > 
> > sk->sk_route_caps &= ~NETIF_F_GSO_MASK
> > 
> > that MD5 desesperatly try to make all over its way (from
> > tcp_transmit_skb() for example)
> > 
> > So we send few bad packets, and everything is fine when
> > tcp_transmit_skb() is called again.
> > 
> > You get many errors on remote peer if you do
> > 
> > ip route flush cache
> > 


Patch solves the problem for me. I tested it with 200 MD5 sockets
established between two 16 cpus machine, with a multiqueue NIC. Trafic
of 100.000 pps per second, and "ip route flush cache" every minute on
both machines. After five hours, not a single frame had a bad hash
value.

Here is the official submission.

[PATCH] net: Introduce sk_route_nocaps

TCP-MD5 sessions have intermittent failures, when route cache is
invalidated. ip_queue_xmit() has to find a new route, calls
sk_setup_caps(sk, &rt->u.dst), destroying the 

sk->sk_route_caps &= ~NETIF_F_GSO_MASK

that MD5 desperately try to make all over its way (from
tcp_transmit_skb() for example)

So we send few bad packets, and everything is fine when
tcp_transmit_skb() is called again for this socket.

Since ip_queue_xmit() is at a lower level than TCP-MD5, I chose to use a
socket field, sk_route_nocaps, containing bits to mask on sk_route_caps.

Reported-by: Bhaskar Dutta <bhaskie@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/net/sock.h    |    8 ++++++++
 net/core/sock.c       |    1 +
 net/ipv4/tcp_ipv4.c   |    6 +++---
 net/ipv4/tcp_output.c |    2 +-
 net/ipv6/tcp_ipv6.c   |    4 ++--
 5 files changed, 15 insertions(+), 6 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

David Miller May 16, 2010, 7:37 a.m. UTC | #1
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 07 May 2010 23:18:57 +0200

> [PATCH] net: Introduce sk_route_nocaps
> 
> TCP-MD5 sessions have intermittent failures, when route cache is
> invalidated. ip_queue_xmit() has to find a new route, calls
> sk_setup_caps(sk, &rt->u.dst), destroying the 
> 
> sk->sk_route_caps &= ~NETIF_F_GSO_MASK
> 
> that MD5 desperately try to make all over its way (from
> tcp_transmit_skb() for example)
> 
> So we send few bad packets, and everything is fine when
> tcp_transmit_skb() is called again for this socket.
> 
> Since ip_queue_xmit() is at a lower level than TCP-MD5, I chose to use a
> socket field, sk_route_nocaps, containing bits to mask on sk_route_caps.
> 
> Reported-by: Bhaskar Dutta <bhaskie@gmail.com>
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Since the connection does recover eventually, I'm stuffing this into
net-next-2.6 into net-2.6

After some time in net-next-2.6, we can submit it to -stable.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/sock.h b/include/net/sock.h
index 1ad6435..abfadfe 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -177,6 +177,7 @@  struct sock_common {
   *		   %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
   *	@sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets
   *	@sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
+  *	@sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK)
   *	@sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
   *	@sk_gso_max_size: Maximum GSO segment size to build
   *	@sk_lingertime: %SO_LINGER l_linger setting
@@ -276,6 +277,7 @@  struct sock {
 	int			sk_forward_alloc;
 	gfp_t			sk_allocation;
 	int			sk_route_caps;
+	int			sk_route_nocaps;
 	int			sk_gso_type;
 	unsigned int		sk_gso_max_size;
 	int			sk_rcvlowat;
@@ -1257,6 +1259,12 @@  static inline int sk_can_gso(const struct sock *sk)
 
 extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst);
 
+static inline void sk_nocaps_add(struct sock *sk, int flags)
+{
+	sk->sk_route_nocaps |= flags;
+	sk->sk_route_caps &= ~flags;
+}
+
 static inline int skb_copy_to_page(struct sock *sk, char __user *from,
 				   struct sk_buff *skb, struct page *page,
 				   int off, int copy)
diff --git a/net/core/sock.c b/net/core/sock.c
index c5812bb..5056a6a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1227,6 +1227,7 @@  void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 	sk->sk_route_caps = dst->dev->features;
 	if (sk->sk_route_caps & NETIF_F_GSO)
 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
+	sk->sk_route_caps &= ~sk->sk_route_nocaps;
 	if (sk_can_gso(sk)) {
 		if (dst->header_len) {
 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3c23e70..f1a1dd9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -894,7 +894,7 @@  int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
 				kfree(newkey);
 				return -ENOMEM;
 			}
-			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+			sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 		}
 		if (tcp_alloc_md5sig_pool(sk) == NULL) {
 			kfree(newkey);
@@ -1024,7 +1024,7 @@  static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 			return -EINVAL;
 
 		tp->md5sig_info = p;
-		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 	}
 
 	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
@@ -1465,7 +1465,7 @@  struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		if (newkey != NULL)
 			tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
 					  newkey, key->keylen);
-		newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
 	}
 #endif
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0dda86e..0193a39 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -872,7 +872,7 @@  static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 #ifdef CONFIG_TCP_MD5SIG
 	/* Calculate the MD5 hash, as we have all we need now */
 	if (md5) {
-		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 		tp->af_specific->calc_md5_hash(opts.hash_location,
 					       md5, sk, NULL, skb);
 	}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 075f540..bf34893 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -600,7 +600,7 @@  static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer,
 				kfree(newkey);
 				return -ENOMEM;
 			}
-			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+			sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 		}
 		if (tcp_alloc_md5sig_pool(sk) == NULL) {
 			kfree(newkey);
@@ -737,7 +737,7 @@  static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
 			return -ENOMEM;
 
 		tp->md5sig_info = p;
-		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 	}
 
 	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);