diff mbox

[Xenial,SRU] Fix performance regression in tunneled connections

Message ID 1469457000-27124-1-git-send-email-stefan.bader@canonical.com
State New
Headers show

Commit Message

Stefan Bader July 25, 2016, 2:30 p.m. UTC
It was mentioned that we had this fix for yakkety (though I guess that
means in the 4.6+ kernel which may come soon). This is the backport
for Xenial which was tested by the patch author.

-Stefan


From b311341e0bd9aca6d1a7303b5a8da9bebee64338 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@kernel.org>
Date: Sat, 19 Mar 2016 09:32:02 -0700
Subject: [PATCH] tunnels: Remove encapsulation offloads on decap.

If a packet is either locally encapsulated or processed through GRO
it is marked with the offloads that it requires. However, when it is
decapsulated these tunnel offload indications are not removed. This
means that if we receive an encapsulated TCP packet, aggregate it with
GRO, decapsulate, and retransmit the resulting frame on a NIC that does
not support encapsulation, we won't be able to take advantage of hardware
offloads even though it is just a simple TCP packet at this point.

This fixes the problem by stripping off encapsulation offload indications
when packets are decapsulated.

The performance impacts of this bug are significant. In a test where a
Geneve encapsulated TCP stream is sent to a hypervisor, GRO'ed, decapsulated,
and bridged to a VM performance is improved by 60% (5Gbps->8Gbps) as a
result of avoiding unnecessary segmentation at the VM tap interface.

Reported-by: Ramu Ramamurthy <sramamur@linux.vnet.ibm.com>
Fixes: 68c33163 ("v4 GRE: Add TCP segmentation offload for GRE")
Signed-off-by: Jesse Gross <jesse@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

BugLink: http://bugs.launchpad.net/bugs/1602755

(backported from commit a09a4c8dd1ec7f830e1fb9e59eb72bddc965d168)
[adapt iptunnel_pull_header arguments, avoid 7f290c9]
Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
---
 include/net/ip_tunnels.h  | 16 ++++++++++++++++
 net/ipv4/fou.c            | 13 +++++++++++--
 net/ipv4/ip_tunnel_core.c |  3 ++-
 net/ipv6/sit.c            |  5 +++--
 4 files changed, 32 insertions(+), 5 deletions(-)

Comments

Kamal Mostafa July 25, 2016, 4:37 p.m. UTC | #1

Andy Whitcroft July 29, 2016, 3:57 p.m. UTC | #2
On Mon, Jul 25, 2016 at 04:30:00PM +0200, Stefan Bader wrote:
> It was mentioned that we had this fix for yakkety (though I guess that
> means in the 4.6+ kernel which may come soon). This is the backport
> for Xenial which was tested by the patch author.
> 
> -Stefan
> 
> 
> From b311341e0bd9aca6d1a7303b5a8da9bebee64338 Mon Sep 17 00:00:00 2001
> From: Jesse Gross <jesse@kernel.org>
> Date: Sat, 19 Mar 2016 09:32:02 -0700
> Subject: [PATCH] tunnels: Remove encapsulation offloads on decap.
> 
> If a packet is either locally encapsulated or processed through GRO
> it is marked with the offloads that it requires. However, when it is
> decapsulated these tunnel offload indications are not removed. This
> means that if we receive an encapsulated TCP packet, aggregate it with
> GRO, decapsulate, and retransmit the resulting frame on a NIC that does
> not support encapsulation, we won't be able to take advantage of hardware
> offloads even though it is just a simple TCP packet at this point.
> 
> This fixes the problem by stripping off encapsulation offload indications
> when packets are decapsulated.
> 
> The performance impacts of this bug are significant. In a test where a
> Geneve encapsulated TCP stream is sent to a hypervisor, GRO'ed, decapsulated,
> and bridged to a VM performance is improved by 60% (5Gbps->8Gbps) as a
> result of avoiding unnecessary segmentation at the VM tap interface.
> 
> Reported-by: Ramu Ramamurthy <sramamur@linux.vnet.ibm.com>
> Fixes: 68c33163 ("v4 GRE: Add TCP segmentation offload for GRE")
> Signed-off-by: Jesse Gross <jesse@kernel.org>
> Signed-off-by: David S. Miller <davem@davemloft.net>
> 
> BugLink: http://bugs.launchpad.net/bugs/1602755
> 
> (backported from commit a09a4c8dd1ec7f830e1fb9e59eb72bddc965d168)
> [adapt iptunnel_pull_header arguments, avoid 7f290c9]
> Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
> ---
>  include/net/ip_tunnels.h  | 16 ++++++++++++++++
>  net/ipv4/fou.c            | 13 +++++++++++--
>  net/ipv4/ip_tunnel_core.c |  3 ++-
>  net/ipv6/sit.c            |  5 +++--
>  4 files changed, 32 insertions(+), 5 deletions(-)
> 
> diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
> index 28a38e5..9e55c15 100644
> --- a/include/net/ip_tunnels.h
> +++ b/include/net/ip_tunnels.h
> @@ -310,6 +310,22 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
>  struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum,
>  					 int gso_type_mask);
>  
> +static inline int iptunnel_pull_offloads(struct sk_buff *skb)
> +{
> +	if (skb_is_gso(skb)) {
> +		int err;
> +
> +		err = skb_unclone(skb, GFP_ATOMIC);
> +		if (unlikely(err))
> +			return err;
> +		skb_shinfo(skb)->gso_type &= ~(NETIF_F_GSO_ENCAP_ALL >>
> +					       NETIF_F_GSO_SHIFT);
> +	}
> +
> +	skb->encapsulation = 0;
> +	return 0;
> +}
> +
>  static inline void iptunnel_xmit_stats(int err,
>  				       struct net_device_stats *err_stats,
>  				       struct pcpu_sw_netstats __percpu *stats)
> diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
> index bd903fe..08d7de5 100644
> --- a/net/ipv4/fou.c
> +++ b/net/ipv4/fou.c
> @@ -48,7 +48,7 @@ static inline struct fou *fou_from_sock(struct sock *sk)
>  	return sk->sk_user_data;
>  }
>  
> -static void fou_recv_pull(struct sk_buff *skb, size_t len)
> +static int fou_recv_pull(struct sk_buff *skb, size_t len)
>  {
>  	struct iphdr *iph = ip_hdr(skb);
>  
> @@ -59,6 +59,7 @@ static void fou_recv_pull(struct sk_buff *skb, size_t len)
>  	__skb_pull(skb, len);
>  	skb_postpull_rcsum(skb, udp_hdr(skb), len);
>  	skb_reset_transport_header(skb);
> +	return iptunnel_pull_offloads(skb);
>  }
>  
>  static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
> @@ -68,9 +69,14 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
>  	if (!fou)
>  		return 1;
>  
> -	fou_recv_pull(skb, sizeof(struct udphdr));
> +	if (fou_recv_pull(skb, sizeof(struct udphdr)))
> +		goto drop;
>  
>  	return -fou->protocol;
> +
> +drop:
> +	kfree_skb(skb);
> +	return 0;
>  }
>  
>  static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
> @@ -170,6 +176,9 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
>  	__skb_pull(skb, sizeof(struct udphdr) + hdrlen);
>  	skb_reset_transport_header(skb);
>  
> +	if (iptunnel_pull_offloads(skb))
> +		goto drop;
> +
>  	return -guehdr->proto_ctype;
>  
>  drop:
> diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
> index 6cb9009..dbda056 100644
> --- a/net/ipv4/ip_tunnel_core.c
> +++ b/net/ipv4/ip_tunnel_core.c
> @@ -116,7 +116,8 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
>  	skb->vlan_tci = 0;
>  	skb_set_queue_mapping(skb, 0);
>  	skb->pkt_type = PACKET_HOST;
> -	return 0;
> +
> +	return iptunnel_pull_offloads(skb);
>  }
>  EXPORT_SYMBOL_GPL(iptunnel_pull_header);
>  
> diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
> index dcccae8..e088f0e 100644
> --- a/net/ipv6/sit.c
> +++ b/net/ipv6/sit.c
> @@ -681,14 +681,15 @@ static int ipip6_rcv(struct sk_buff *skb)
>  		skb->mac_header = skb->network_header;
>  		skb_reset_network_header(skb);
>  		IPCB(skb)->flags = 0;
> -		skb->protocol = htons(ETH_P_IPV6);
> +		skb->dev = tunnel->dev;
>  
>  		if (packet_is_spoofed(skb, iph, tunnel)) {
>  			tunnel->dev->stats.rx_errors++;
>  			goto out;
>  		}
>  
> -		__skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
> +		if (iptunnel_pull_header(skb, 0, htons(ETH_P_IPV6)))
> +			goto out;
>  
>  		err = IP_ECN_decapsulate(iph, skb);
>  		if (unlikely(err)) {
> -- 

Seems to do what is claimed.

Acked-by: Andy Whitcroft <apw@canonical.com>

-apw
Kamal Mostafa July 29, 2016, 4:02 p.m. UTC | #3

diff mbox

Patch

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 28a38e5..9e55c15 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -310,6 +310,22 @@  struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
 struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum,
 					 int gso_type_mask);
 
+static inline int iptunnel_pull_offloads(struct sk_buff *skb)
+{
+	if (skb_is_gso(skb)) {
+		int err;
+
+		err = skb_unclone(skb, GFP_ATOMIC);
+		if (unlikely(err))
+			return err;
+		skb_shinfo(skb)->gso_type &= ~(NETIF_F_GSO_ENCAP_ALL >>
+					       NETIF_F_GSO_SHIFT);
+	}
+
+	skb->encapsulation = 0;
+	return 0;
+}
+
 static inline void iptunnel_xmit_stats(int err,
 				       struct net_device_stats *err_stats,
 				       struct pcpu_sw_netstats __percpu *stats)
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index bd903fe..08d7de5 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -48,7 +48,7 @@  static inline struct fou *fou_from_sock(struct sock *sk)
 	return sk->sk_user_data;
 }
 
-static void fou_recv_pull(struct sk_buff *skb, size_t len)
+static int fou_recv_pull(struct sk_buff *skb, size_t len)
 {
 	struct iphdr *iph = ip_hdr(skb);
 
@@ -59,6 +59,7 @@  static void fou_recv_pull(struct sk_buff *skb, size_t len)
 	__skb_pull(skb, len);
 	skb_postpull_rcsum(skb, udp_hdr(skb), len);
 	skb_reset_transport_header(skb);
+	return iptunnel_pull_offloads(skb);
 }
 
 static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
@@ -68,9 +69,14 @@  static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
 	if (!fou)
 		return 1;
 
-	fou_recv_pull(skb, sizeof(struct udphdr));
+	if (fou_recv_pull(skb, sizeof(struct udphdr)))
+		goto drop;
 
 	return -fou->protocol;
+
+drop:
+	kfree_skb(skb);
+	return 0;
 }
 
 static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
@@ -170,6 +176,9 @@  static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
 	__skb_pull(skb, sizeof(struct udphdr) + hdrlen);
 	skb_reset_transport_header(skb);
 
+	if (iptunnel_pull_offloads(skb))
+		goto drop;
+
 	return -guehdr->proto_ctype;
 
 drop:
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 6cb9009..dbda056 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -116,7 +116,8 @@  int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
 	skb->vlan_tci = 0;
 	skb_set_queue_mapping(skb, 0);
 	skb->pkt_type = PACKET_HOST;
-	return 0;
+
+	return iptunnel_pull_offloads(skb);
 }
 EXPORT_SYMBOL_GPL(iptunnel_pull_header);
 
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index dcccae8..e088f0e 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -681,14 +681,15 @@  static int ipip6_rcv(struct sk_buff *skb)
 		skb->mac_header = skb->network_header;
 		skb_reset_network_header(skb);
 		IPCB(skb)->flags = 0;
-		skb->protocol = htons(ETH_P_IPV6);
+		skb->dev = tunnel->dev;
 
 		if (packet_is_spoofed(skb, iph, tunnel)) {
 			tunnel->dev->stats.rx_errors++;
 			goto out;
 		}
 
-		__skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
+		if (iptunnel_pull_header(skb, 0, htons(ETH_P_IPV6)))
+			goto out;
 
 		err = IP_ECN_decapsulate(iph, skb);
 		if (unlikely(err)) {