diff mbox

[net-next,2/2] VXLAN: Support outer IPv4 Tx checksums by default

Message ID 20160219192631.15687.78165.stgit@localhost.localdomain
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Alexander Duyck Feb. 19, 2016, 7:26 p.m. UTC
This change makes it so that if UDP CSUM is not specified we will default
to enabling it.  The main motivation behind this is the fact that with the
use of outer checksum we can greatly improve the performance for VXLAN
tunnels on devices that don't know how to parse tunnel headers.

Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
---
 drivers/net/vxlan.c |   19 +++++++++----------
 include/net/vxlan.h |    2 +-
 2 files changed, 10 insertions(+), 11 deletions(-)

Comments

Tom Herbert Feb. 19, 2016, 8:27 p.m. UTC | #1
On Fri, Feb 19, 2016 at 11:26 AM, Alexander Duyck <aduyck@mirantis.com> wrote:
> This change makes it so that if UDP CSUM is not specified we will default
> to enabling it.  The main motivation behind this is the fact that with the
> use of outer checksum we can greatly improve the performance for VXLAN
> tunnels on devices that don't know how to parse tunnel headers.
>
> Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
> ---
>  drivers/net/vxlan.c |   19 +++++++++----------
>  include/net/vxlan.h |    2 +-
>  2 files changed, 10 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> index 766e6114a37f..909f7931c297 100644
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c
> @@ -1957,13 +1957,6 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
>                         goto drop;
>                 sk = vxlan->vn4_sock->sock->sk;
>
> -               if (info) {
> -                       if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
> -                               df = htons(IP_DF);
> -               } else {
> -                       udp_sum = !!(flags & VXLAN_F_UDP_CSUM);
> -               }
> -
>                 rt = vxlan_get_route(vxlan, skb,
>                                      rdst ? rdst->remote_ifindex : 0, tos,
>                                      dst->sin.sin_addr.s_addr, &saddr,
> @@ -1997,6 +1990,11 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
>                         return;
>                 }
>
> +               if (!info)
> +                       udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX);
> +               else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
> +                       df = htons(IP_DF);
> +
>                 tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
>                 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
>                 err = vxlan_build_skb(skb, &rt->dst, sizeof(struct iphdr),
> @@ -2920,8 +2918,9 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
>         if (data[IFLA_VXLAN_PORT])
>                 conf.dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
>
> -       if (data[IFLA_VXLAN_UDP_CSUM] && nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
> -               conf.flags |= VXLAN_F_UDP_CSUM;
> +       if (data[IFLA_VXLAN_UDP_CSUM] &&
> +           !nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
> +               conf.flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
>
>         if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] &&
>             nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
> @@ -3065,7 +3064,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
>             nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) ||
>             nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) ||
>             nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM,
> -                       !!(vxlan->flags & VXLAN_F_UDP_CSUM)) ||
> +                       !(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) ||
>             nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
>                         !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
>             nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
> diff --git a/include/net/vxlan.h b/include/net/vxlan.h
> index 748083de367a..6eda4ed4d78b 100644
> --- a/include/net/vxlan.h
> +++ b/include/net/vxlan.h
> @@ -197,7 +197,7 @@ struct vxlan_dev {
>  #define VXLAN_F_L2MISS                 0x08
>  #define VXLAN_F_L3MISS                 0x10
>  #define VXLAN_F_IPV6                   0x20
> -#define VXLAN_F_UDP_CSUM               0x40
> +#define VXLAN_F_UDP_ZERO_CSUM_TX       0x40
>  #define VXLAN_F_UDP_ZERO_CSUM6_TX      0x80
>  #define VXLAN_F_UDP_ZERO_CSUM6_RX      0x100
>  #define VXLAN_F_REMCSUM_TX             0x200
>

Acked-by: Tom Herbert <tom@herbertland.com>

I would also note RFC7348 specifies:

UDP Checksum: It SHOULD be transmitted as zero. ...

The RFC doesn't provide any rationale as to why this is a SHOULD
(neither is there any discussion as to whether this pertains to IPv6
which has stronger requirements for non-zero UDP checksum). I think
there are two possibilities in the intent: 1) The authors assume that
computing UDP checksums is a significant performance hit which is
dis-proven by this patch 2) They are worried about devices that are
unable to compute receive checksums, however this would be addressed
by an allowance that devices can ignore non-zero UDP checksums for
VXLAN ("When a decapsulating end point receives a packet with a
non-zero checksum, it MAY choose to verify the checksum value.")


.
Jesse Gross Feb. 19, 2016, 9:36 p.m. UTC | #2
On Fri, Feb 19, 2016 at 12:27 PM, Tom Herbert <tom@herbertland.com> wrote:
> I would also note RFC7348 specifies:
>
> UDP Checksum: It SHOULD be transmitted as zero. ...
>
> The RFC doesn't provide any rationale as to why this is a SHOULD
> (neither is there any discussion as to whether this pertains to IPv6
> which has stronger requirements for non-zero UDP checksum). I think
> there are two possibilities in the intent: 1) The authors assume that
> computing UDP checksums is a significant performance hit which is
> dis-proven by this patch 2) They are worried about devices that are
> unable to compute receive checksums, however this would be addressed
> by an allowance that devices can ignore non-zero UDP checksums for
> VXLAN ("When a decapsulating end point receives a packet with a
> non-zero checksum, it MAY choose to verify the checksum value.")

It's #2.

All of the performance concerns around checksums and tunneling stem
from devices implemented using switching ASICs. In those devices,
computing/verifying checksums is so slow (software path) that they are
effectively unable to do it.
diff mbox

Patch

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 766e6114a37f..909f7931c297 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1957,13 +1957,6 @@  static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 			goto drop;
 		sk = vxlan->vn4_sock->sock->sk;
 
-		if (info) {
-			if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
-				df = htons(IP_DF);
-		} else {
-			udp_sum = !!(flags & VXLAN_F_UDP_CSUM);
-		}
-
 		rt = vxlan_get_route(vxlan, skb,
 				     rdst ? rdst->remote_ifindex : 0, tos,
 				     dst->sin.sin_addr.s_addr, &saddr,
@@ -1997,6 +1990,11 @@  static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 			return;
 		}
 
+		if (!info)
+			udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX);
+		else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
+			df = htons(IP_DF);
+
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
 		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
 		err = vxlan_build_skb(skb, &rt->dst, sizeof(struct iphdr),
@@ -2920,8 +2918,9 @@  static int vxlan_newlink(struct net *src_net, struct net_device *dev,
 	if (data[IFLA_VXLAN_PORT])
 		conf.dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
 
-	if (data[IFLA_VXLAN_UDP_CSUM] && nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
-		conf.flags |= VXLAN_F_UDP_CSUM;
+	if (data[IFLA_VXLAN_UDP_CSUM] &&
+	    !nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
+		conf.flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
 
 	if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] &&
 	    nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
@@ -3065,7 +3064,7 @@  static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	    nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) ||
 	    nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) ||
 	    nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM,
-			!!(vxlan->flags & VXLAN_F_UDP_CSUM)) ||
+			!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) ||
 	    nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
 			!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
 	    nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 748083de367a..6eda4ed4d78b 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -197,7 +197,7 @@  struct vxlan_dev {
 #define VXLAN_F_L2MISS			0x08
 #define VXLAN_F_L3MISS			0x10
 #define VXLAN_F_IPV6			0x20
-#define VXLAN_F_UDP_CSUM		0x40
+#define VXLAN_F_UDP_ZERO_CSUM_TX	0x40
 #define VXLAN_F_UDP_ZERO_CSUM6_TX	0x80
 #define VXLAN_F_UDP_ZERO_CSUM6_RX	0x100
 #define VXLAN_F_REMCSUM_TX		0x200