diff mbox

[net-next,8/8] tou: Support for GSO

Message ID 1466099522-690741-9-git-send-email-tom@herbertland.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Tom Herbert June 16, 2016, 5:52 p.m. UTC
Add SKB_GSO_TOU. In udp[64]_ufo_fragment check for SKB_GSO_TOU. If this
is set call skb_udp_tou_segment. skb_udp_tou_segment is very similar
to skb_udp_tunnel_segment except that we only need to deal with the
L4 headers.

Signed-off-by: Tom Herbert <tom@herbertland.com>
---
 include/linux/netdev_features.h  |   3 +-
 include/linux/netdevice.h        |   1 +
 include/linux/skbuff.h           |   2 +-
 include/net/udp.h                |   2 +
 net/ipv4/fou.c                   |   2 +
 net/ipv4/ip_output.c             |   2 +
 net/ipv4/udp_offload.c           | 163 +++++++++++++++++++++++++++++++++++++--
 net/ipv6/inet6_connection_sock.c |   3 +
 net/ipv6/udp_offload.c           | 128 +++++++++++++++---------------
 9 files changed, 237 insertions(+), 69 deletions(-)
diff mbox

Patch

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index ab15c6a..ffc4e0a 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -37,7 +37,7 @@  enum {
 	NETIF_F_TSO_BIT			/* ... TCPv4 segmentation */
 		= NETIF_F_GSO_SHIFT,
 	NETIF_F_UFO_BIT,		/* ... UDPv4 fragmentation */
-	NETIF_F_GSO_RSVD,		/* ... Reserved */
+	NETIF_F_GSO_TOU_BIT,		/* ... Transports over UDP */
 	NETIF_F_TSO_ECN_BIT,		/* ... TCP ECN support */
 	NETIF_F_TSO_MANGLEID_BIT,	/* ... IPV4 ID mangling allowed */
 	NETIF_F_TSO6_BIT,		/* ... TCPv6 segmentation */
@@ -131,6 +131,7 @@  enum {
 #define NETIF_F_GSO_PARTIAL	 __NETIF_F(GSO_PARTIAL)
 #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM)
 #define NETIF_F_GSO_SCTP	__NETIF_F(GSO_SCTP)
+#define NETIF_F_GSO_TOU		__NETIF_F(GSO_TOU)
 #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
 #define NETIF_F_HW_VLAN_STAG_RX	__NETIF_F(HW_VLAN_STAG_RX)
 #define NETIF_F_HW_VLAN_STAG_TX	__NETIF_F(HW_VLAN_STAG_TX)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5969028..624d169 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4023,6 +4023,7 @@  static inline bool net_gso_ok(netdev_features_t features, int gso_type)
 	/* check flags correspondence */
 	BUILD_BUG_ON(SKB_GSO_TCPV4   != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_UDP     != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_TOU     != (NETIF_F_GSO_TOU >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_TCPV6   != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT));
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index be34e06..9f85a7d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -463,7 +463,7 @@  enum {
 	SKB_GSO_TCPV4 = 1 << 0,
 	SKB_GSO_UDP = 1 << 1,
 
-	SKB_GSO_RSVD = 1 << 2,
+	SKB_GSO_TOU = 1 << 2,
 
 	/* This indicates the tcp segment has CWR set. */
 	SKB_GSO_TCP_ECN = 1 << 3,
diff --git a/include/net/udp.h b/include/net/udp.h
index 8894d71..48b767f 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -262,6 +262,8 @@  unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait);
 struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 				       netdev_features_t features,
 				       bool is_ipv6);
+struct sk_buff *skb_udp_tou_segment(struct sk_buff *skb,
+				    netdev_features_t features, bool is_ipv6);
 int udp_lib_getsockopt(struct sock *sk, int level, int optname,
 		       char __user *optval, int __user *optlen);
 int udp_lib_setsockopt(struct sock *sk, int level, int optname,
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 9cd9168..3cdc060 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -435,6 +435,8 @@  next_proto:
 	/* Flag this frame as already having an outer encap header */
 	NAPI_GRO_CB(skb)->is_fou = 1;
 
+	skb_set_transport_header(skb, skb_gro_offset(skb));
+
 	rcu_read_lock();
 	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
 	ops = rcu_dereference(offloads[proto]);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 11cf4de..090cede 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -410,6 +410,8 @@  int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
 			res = -EINVAL;
 			goto fail;
 		}
+		skb_shinfo(skb)->gso_type |= SKB_GSO_TOU;
+		skb_set_inner_ipproto(skb, sk->sk_protocol);
 	} else {
 		dport = inet->inet_dport;
 		sport = inet->inet_sport;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 81f253b..8e56a21 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -184,6 +184,155 @@  out_unlock:
 }
 EXPORT_SYMBOL(skb_udp_tunnel_segment);
 
+/* __skb_udp_tou_segment
+ *
+ * Handle segmentation of TOU (Transports Protocols over UDP). Note that this
+ * is very similar __skb_udp_tunnel_segment however here we don't need to
+ * deal with MAC or nework layers. Everything is done base on transport
+ * headers only.
+ */
+static struct sk_buff *__skb_udp_tou_segment(struct sk_buff *skb,
+	netdev_features_t features,
+	struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
+					     netdev_features_t features),
+	bool is_ipv6)
+{
+	int tnl_hlen = skb_inner_transport_header(skb) -
+		       skb_transport_header(skb);
+	bool remcsum, need_csum, offload_csum, ufo;
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	struct udphdr *uh = udp_hdr(skb);
+	__wsum partial;
+
+	if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
+		goto out;
+
+	/* Adjust partial header checksum to negate old length.
+	 * We cannot rely on the value contained in uh->len as it is
+	 * possible that the actual value exceeds the boundaries of the
+	 * 16 bit length field due to the header being added outside of an
+	 * IP or IPv6 frame that was already limited to 64K - 1.
+	 */
+	if (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL)
+		partial = (__force __wsum)uh->len;
+	else
+		partial = (__force __wsum)htonl(skb->len);
+	partial = csum_sub(csum_unfold(uh->check), partial);
+
+	/* Setup inner skb. Only the transport header is relevant */
+	skb->encapsulation = 0;
+	SKB_GSO_CB(skb)->encap_level = 0;
+	__skb_pull(skb, tnl_hlen);
+	skb_reset_transport_header(skb);
+
+	need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
+	skb->encap_hdr_csum = need_csum;
+
+	remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
+	skb->remcsum_offload = remcsum;
+
+	ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
+
+	/* Try to offload checksum if possible */
+	offload_csum = !!(need_csum &&
+			  (skb->dev->features &
+			   (is_ipv6 ? (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) :
+				      (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM))));
+
+	features &= skb->dev->hw_enc_features;
+
+	/* The only checksum offload we care about from here on out is the
+	 * outer one so strip the existing checksum feature flags and
+	 * instead set the flag based on our outer checksum offload value.
+	 */
+	if (remcsum || ufo) {
+		features &= ~NETIF_F_CSUM_MASK;
+		if (!need_csum || offload_csum)
+			features |= NETIF_F_HW_CSUM;
+	}
+
+	/* segment inner packet. */
+	segs = gso_inner_segment(skb, features);
+	if (IS_ERR_OR_NULL(segs)) {
+		skb->encapsulation = 1;
+		skb_push(skb, tnl_hlen);
+		skb_reset_transport_header(skb);
+
+		goto out;
+	}
+
+	skb = segs;
+	do {
+		unsigned int len;
+
+		if (remcsum)
+			skb->ip_summed = CHECKSUM_NONE;
+
+		/* Adjust transport header back to UDP header */
+
+		skb->transport_header -= tnl_hlen;
+		uh = udp_hdr(skb);
+		len = skb->len - ((unsigned char *)uh - skb->data);
+
+		/* If we are only performing partial GSO the inner header
+		 * will be using a length value equal to only one MSS sized
+		 * segment instead of the entire frame.
+		 */
+		if (skb_is_gso(skb)) {
+			uh->len = htons(skb_shinfo(skb)->gso_size +
+					SKB_GSO_CB(skb)->data_offset +
+					skb->head - (unsigned char *)uh);
+		} else {
+			uh->len = htons(len);
+		}
+
+		if (!need_csum)
+			continue;
+
+		uh->check = ~csum_fold(csum_add(partial,
+				       (__force __wsum)htonl(len)));
+
+		if (skb->encapsulation || !offload_csum) {
+			uh->check = gso_make_checksum(skb, ~uh->check);
+			if (uh->check == 0)
+				uh->check = CSUM_MANGLED_0;
+		} else {
+			skb->ip_summed = CHECKSUM_PARTIAL;
+			skb->csum_start = skb_transport_header(skb) - skb->head;
+			skb->csum_offset = offsetof(struct udphdr, check);
+		}
+	} while ((skb = skb->next));
+out:
+	return segs;
+}
+
+struct sk_buff *skb_udp_tou_segment(struct sk_buff *skb,
+				    netdev_features_t features,
+				    bool is_ipv6)
+{
+	const struct net_offload **offloads;
+	const struct net_offload *ops;
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
+					     netdev_features_t features);
+
+	rcu_read_lock();
+
+	offloads = is_ipv6 ? inet6_offloads : inet_offloads;
+	ops = rcu_dereference(offloads[skb->inner_ipproto]);
+	if (!ops || !ops->callbacks.gso_segment)
+		goto out_unlock;
+	gso_inner_segment = ops->callbacks.gso_segment;
+
+	segs = __skb_udp_tou_segment(skb, features, gso_inner_segment, is_ipv6);
+
+out_unlock:
+	rcu_read_unlock();
+
+	return segs;
+}
+EXPORT_SYMBOL(skb_udp_tou_segment);
+
 static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 					 netdev_features_t features)
 {
@@ -193,11 +342,15 @@  static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 	struct udphdr *uh;
 	struct iphdr *iph;
 
-	if (skb->encapsulation &&
-	    (skb_shinfo(skb)->gso_type &
-	     (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
-		segs = skb_udp_tunnel_segment(skb, features, false);
-		goto out;
+	if (skb->encapsulation) {
+		if (skb_shinfo(skb)->gso_type & SKB_GSO_TOU) {
+			segs = skb_udp_tou_segment(skb, features, false);
+			goto out;
+		} else if ((skb_shinfo(skb)->gso_type &
+		    (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM))) {
+			segs = skb_udp_tunnel_segment(skb, features, false);
+			goto out;
+		}
 	}
 
 	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 6c971bc..7b3978a 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -189,6 +189,9 @@  int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
 			goto fail;
 		}
 
+		skb_shinfo(skb)->gso_type |= SKB_GSO_TOU;
+		skb_set_inner_ipproto(skb, sk->sk_protocol);
+
 		/* Changing ports and protocol to be routed */
 		fl6.fl6_sport = e->sport;
 		fl6.fl6_dport = e->dport;
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index ac858c4..b53486b 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -29,6 +29,8 @@  static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 	u8 frag_hdr_sz = sizeof(struct frag_hdr);
 	__wsum csum;
 	int tnl_hlen;
+	const struct ipv6hdr *ipv6h;
+	struct udphdr *uh;
 
 	mss = skb_shinfo(skb)->gso_size;
 	if (unlikely(skb->len <= mss))
@@ -47,74 +49,76 @@  static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 		goto out;
 	}
 
-	if (skb->encapsulation && skb_shinfo(skb)->gso_type &
-	    (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
-		segs = skb_udp_tunnel_segment(skb, features, true);
-	else {
-		const struct ipv6hdr *ipv6h;
-		struct udphdr *uh;
-
-		if (!pskb_may_pull(skb, sizeof(struct udphdr)))
+	if (skb->encapsulation) {
+		if (skb_shinfo(skb)->gso_type & SKB_GSO_TOU) {
+			segs = skb_udp_tou_segment(skb, features, true);
+			goto out;
+		} else if (skb_shinfo(skb)->gso_type &
+			   (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM)) {
+			segs = skb_udp_tunnel_segment(skb, features, true);
 			goto out;
-
-		/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
-		 * do checksum of UDP packets sent as multiple IP fragments.
-		 */
-
-		uh = udp_hdr(skb);
-		ipv6h = ipv6_hdr(skb);
-
-		uh->check = 0;
-		csum = skb_checksum(skb, 0, skb->len, 0);
-		uh->check = udp_v6_check(skb->len, &ipv6h->saddr,
-					  &ipv6h->daddr, csum);
-		if (uh->check == 0)
-			uh->check = CSUM_MANGLED_0;
-
-		skb->ip_summed = CHECKSUM_NONE;
-
-		/* If there is no outer header we can fake a checksum offload
-		 * due to the fact that we have already done the checksum in
-		 * software prior to segmenting the frame.
-		 */
-		if (!skb->encap_hdr_csum)
-			features |= NETIF_F_HW_CSUM;
-
-		/* Check if there is enough headroom to insert fragment header. */
-		tnl_hlen = skb_tnl_header_len(skb);
-		if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) {
-			if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
-				goto out;
 		}
+	}
 
-		/* Find the unfragmentable header and shift it left by frag_hdr_sz
-		 * bytes to insert fragment header.
-		 */
-		unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
-		nexthdr = *prevhdr;
-		*prevhdr = NEXTHDR_FRAGMENT;
-		unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
-			     unfrag_ip6hlen + tnl_hlen;
-		packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
-		memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
-
-		SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
-		skb->mac_header -= frag_hdr_sz;
-		skb->network_header -= frag_hdr_sz;
-
-		fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
-		fptr->nexthdr = nexthdr;
-		fptr->reserved = 0;
-		if (!skb_shinfo(skb)->ip6_frag_id)
-			ipv6_proxy_select_ident(dev_net(skb->dev), skb);
-		fptr->identification = skb_shinfo(skb)->ip6_frag_id;
+	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
+		goto out;
 
-		/* Fragment the skb. ipv6 header and the remaining fields of the
-		 * fragment header are updated in ipv6_gso_segment()
-		 */
-		segs = skb_segment(skb, features);
+	/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
+	 * do checksum of UDP packets sent as multiple IP fragments.
+	 */
+
+	uh = udp_hdr(skb);
+	ipv6h = ipv6_hdr(skb);
+
+	uh->check = 0;
+	csum = skb_checksum(skb, 0, skb->len, 0);
+	uh->check = udp_v6_check(skb->len, &ipv6h->saddr,
+				  &ipv6h->daddr, csum);
+	if (uh->check == 0)
+		uh->check = CSUM_MANGLED_0;
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	/* If there is no outer header we can fake a checksum offload
+	 * due to the fact that we have already done the checksum in
+	 * software prior to segmenting the frame.
+	 */
+	if (!skb->encap_hdr_csum)
+		features |= NETIF_F_HW_CSUM;
+
+	/* Check if there is enough headroom to insert fragment header. */
+	tnl_hlen = skb_tnl_header_len(skb);
+	if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) {
+		if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
+			goto out;
 	}
 
+	/* Find the unfragmentable header and shift it left by frag_hdr_sz
+	 * bytes to insert fragment header.
+	 */
+	unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+	nexthdr = *prevhdr;
+	*prevhdr = NEXTHDR_FRAGMENT;
+	unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
+		     unfrag_ip6hlen + tnl_hlen;
+	packet_start = (u8 *)skb->head + SKB_GSO_CB(skb)->mac_offset;
+	memmove(packet_start - frag_hdr_sz, packet_start, unfrag_len);
+
+	SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
+	skb->mac_header -= frag_hdr_sz;
+	skb->network_header -= frag_hdr_sz;
+
+	fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
+	fptr->nexthdr = nexthdr;
+	fptr->reserved = 0;
+	if (!skb_shinfo(skb)->ip6_frag_id)
+		ipv6_proxy_select_ident(dev_net(skb->dev), skb);
+	fptr->identification = skb_shinfo(skb)->ip6_frag_id;
+
+	/* Fragment the skb. ipv6 header and the remaining fields of the
+	 * fragment header are updated in ipv6_gso_segment()
+	 */
+	segs = skb_segment(skb, features);
 out:
 	return segs;
 }