Patchwork [2/2] v2 GRE: Add segmentation offload for GRE

login
register
mail settings
Submitter Pravin B Shelar
Date Jan. 24, 2013, 10:16 p.m.
Message ID <1359065793-1796-1-git-send-email-pshelar@nicira.com>
Download mbox | patch
Permalink /patch/215504/
State Changes Requested
Delegated to: David Miller
Headers show

Comments

Pravin B Shelar - Jan. 24, 2013, 10:16 p.m.
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
---
Fixed according to comments from Jesse and Eric.
 - Factored a MAC layer handler out of skb_gso_segment().
 - Eliminated copy operation from gre_gso_segment().
 - Refresh header pointer after pskb_may_pull().
---
 include/linux/netdevice.h |    4 +-
 include/linux/skbuff.h    |    7 +++
 net/core/dev.c            |   58 ++++++++++++++----------
 net/core/skbuff.c         |    7 ++-
 net/ipv4/af_inet.c        |    1 +
 net/ipv4/gre.c            |  109 +++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/ip_gre.c         |   94 ++++++++++++++++++++++++++++++--------
 net/ipv4/tcp.c            |    1 +
 net/ipv4/udp.c            |    3 +-
 net/ipv6/ip6_offload.c    |    1 +
 net/ipv6/udp_offload.c    |    3 +-
 11 files changed, 243 insertions(+), 45 deletions(-)
Eric Dumazet - Jan. 24, 2013, 11:29 p.m.
On Thu, 2013-01-24 at 14:16 -0800, Pravin B Shelar wrote:
> Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
> ---
> Fixed according to comments from Jesse and Eric.
>  - Factored a MAC layer handler out of skb_gso_segment().
>  - Eliminated copy operation from gre_gso_segment().
>  - Refresh header pointer after pskb_may_pull().

Seems nice !

> +	if (skb_is_gso(skb)) {
> +		err = skb_unclone(skb, GFP_ATOMIC);
> +		if (unlikely(err))
> +			goto error;
> +		skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
> +		return skb;
> +	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
> +		/* Pages aren't locked and could change at any time.
> +		 * If this happens after we compute the checksum, the
> +		 * checksum will be wrong.  We linearize now to avoid
> +		 * this problem.
> +		 */
> +		if (skb_is_nonlinear(skb)) {
> +			err = __skb_linearize(skb);
> +			if (unlikely(err))
> +				goto error;
> +		}
> +
> +		err = skb_checksum_help(skb);
> +		if (unlikely(err))
> +			goto error;
> +	}
> +

I really don't understand why you put chunk this in this patch.

Packet being GSO or not, the underlying problem still remains.

This must be addressed separately and at a different layer.

(in skb_checksum_help() most probably)

If the packet is GSO and we compute checksum in software,
then we also have to copy all frags that could potentially
be overwritten.


> +	skb->ip_summed = CHECKSUM_NONE;
> +
> +	return skb;
> +
> +error:
> +	kfree_skb(skb);
> +	return ERR_PTR(err);
> +}
> +
>  static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
>  {
>  	struct ip_tunnel *tunnel = netdev_priv(dev);
> @@ -751,10 +787,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
>  	__be32 dst;
>  	int    mtu;
>  	u8     ttl;
> -
> -	if (skb->ip_summed == CHECKSUM_PARTIAL &&
> -	    skb_checksum_help(skb))
> -		goto tx_error;
> +	int pkt_len;
> +	struct pcpu_tstats *tstats;
> +	int err;
>  
>  	if (dev->type == ARPHRD_ETHER)
>  		IPCB(skb)->flags = 0;
> @@ -852,13 +887,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
>  
>  	if (skb->protocol == htons(ETH_P_IP)) {
>  		df |= (old_iph->frag_off&htons(IP_DF));
> -
> -		if ((old_iph->frag_off&htons(IP_DF)) &&
> -		    mtu < ntohs(old_iph->tot_len)) {
> -			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
> -			ip_rt_put(rt);
> -			goto tx_error;
> -		}

Not clear why this chunk can be safely removed, even for non GSO
packet ?



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pravin B Shelar - Jan. 25, 2013, 12:14 a.m.
On Thu, Jan 24, 2013 at 3:29 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Thu, 2013-01-24 at 14:16 -0800, Pravin B Shelar wrote:
>> Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
>> ---
>> Fixed according to comments from Jesse and Eric.
>>  - Factored a MAC layer handler out of skb_gso_segment().
>>  - Eliminated copy operation from gre_gso_segment().
>>  - Refresh header pointer after pskb_may_pull().
>
> Seems nice !
>
>> +     if (skb_is_gso(skb)) {
>> +             err = skb_unclone(skb, GFP_ATOMIC);
>> +             if (unlikely(err))
>> +                     goto error;
>> +             skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
>> +             return skb;
>> +     } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
>> +             /* Pages aren't locked and could change at any time.
>> +              * If this happens after we compute the checksum, the
>> +              * checksum will be wrong.  We linearize now to avoid
>> +              * this problem.
>> +              */
>> +             if (skb_is_nonlinear(skb)) {
>> +                     err = __skb_linearize(skb);
>> +                     if (unlikely(err))
>> +                             goto error;
>> +             }
>> +
>> +             err = skb_checksum_help(skb);
>> +             if (unlikely(err))
>> +                     goto error;
>> +     }
>> +
>
> I really don't understand why you put chunk this in this patch.
>
> Packet being GSO or not, the underlying problem still remains.
>
> This must be addressed separately and at a different layer.
>
> (in skb_checksum_help() most probably)
>
> If the packet is GSO and we compute checksum in software,
> then we also have to copy all frags that could potentially
> be overwritten.
>

I think this patch does fix csum issue without causing any performance
regression. So this patch shld be enough to solve GRE-GSO issue. Once
you have fix, this code can be optimized even more.

>
>> +     skb->ip_summed = CHECKSUM_NONE;
>> +
>> +     return skb;
>> +
>> +error:
>> +     kfree_skb(skb);
>> +     return ERR_PTR(err);
>> +}
>> +
>>  static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
>>  {
>>       struct ip_tunnel *tunnel = netdev_priv(dev);
>> @@ -751,10 +787,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
>>       __be32 dst;
>>       int    mtu;
>>       u8     ttl;
>> -
>> -     if (skb->ip_summed == CHECKSUM_PARTIAL &&
>> -         skb_checksum_help(skb))
>> -             goto tx_error;
>> +     int pkt_len;
>> +     struct pcpu_tstats *tstats;
>> +     int err;
>>
>>       if (dev->type == ARPHRD_ETHER)
>>               IPCB(skb)->flags = 0;
>> @@ -852,13 +887,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
>>
>>       if (skb->protocol == htons(ETH_P_IP)) {
>>               df |= (old_iph->frag_off&htons(IP_DF));
>> -
>> -             if ((old_iph->frag_off&htons(IP_DF)) &&
>> -                 mtu < ntohs(old_iph->tot_len)) {
>> -                     icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
>> -                     ip_rt_put(rt);
>> -                     goto tx_error;
>> -             }
>
> Not clear why this chunk can be safely removed, even for non GSO
> packet ?
>
This actually does not work specially for TAP devices since ICMP
response won't work because the tunnel endpoint is not part of that IP
network.
This was discussed in VXLAN patch thread.
(http://markmail.org/message/xmqmvdh4noljfq2n).

But I agree we shld keep it for for non tap GRE and non-gso packets.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
=?ISO-8859-2?Q?Micha=B3_Miros=B3aw?= - Jan. 25, 2013, 1:14 a.m.
2013/1/24 Pravin B Shelar <pshelar@nicira.com>:
[...]
> @@ -1374,6 +1421,10 @@ static int ipgre_tunnel_init(struct net_device *dev)
>                 return err;
>         }
>
> +       if (!(tunnel->parms.o_flags & GRE_SEQ)) {
> +               dev->features |= NETIF_F_GSO_SOFTWARE;
> +               dev->hw_features |= NETIF_F_GSO_SOFTWARE;
> +       }
>         return 0;
>  }
>

Can o_flags change after tunnel creation? If so, NETIF_F_GSO_SOFTWARE
should be set in dev->hw_features always, and it should be forced zero
in ndo_fix_features when GRE_SEQ is set. ipgre_netlink_parms() should
call netdev_update_features() when o_flags changes.

> @@ -1564,6 +1615,10 @@ static int ipgre_tap_init(struct net_device *dev)
>         if (!dev->tstats)
>                 return -ENOMEM;
>
> +       if (!(tunnel->parms.o_flags & GRE_SEQ)) {
> +               dev->features |= NETIF_F_GSO_SOFTWARE;
> +               dev->hw_features |= NETIF_F_GSO_SOFTWARE;
> +       }
>         return 0;
>  }
>

Same here.

Best Regards,
Michał Mirosław
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet - Jan. 25, 2013, 1:34 a.m.
On Thu, 2013-01-24 at 16:14 -0800, Pravin Shelar wrote:

> I think this patch does fix csum issue without causing any performance
> regression. So this patch shld be enough to solve GRE-GSO issue. Once
> you have fix, this code can be optimized even more.

It adds the extra copy, since you assume no SG capability so
skb_segment() _does_ a copy.

As the checksum is needed, its true the copy is almost not noticed,
but one day NIC will be able to perform the checksum for us.
(Maybe its already the case for some of them)

I would first fix the checksum issue in a generic way, then
apply this patch on top of the fix, so that we can use SG and avoid
the extra copy for the typical tcp_sendmsg()

It seems you focus on the TAP use case only, seeing you removed
code that doesn't work for TAP but do work for regular locally
terminated flows.

You did a lot of implementation choices and none of them
are described in a changelog, making future work a bit hard.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pravin B Shelar - Jan. 25, 2013, 3:38 a.m.
On Thu, Jan 24, 2013 at 5:34 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Thu, 2013-01-24 at 16:14 -0800, Pravin Shelar wrote:
>
>> I think this patch does fix csum issue without causing any performance
>> regression. So this patch shld be enough to solve GRE-GSO issue. Once
>> you have fix, this code can be optimized even more.
>
> It adds the extra copy, since you assume no SG capability so
> skb_segment() _does_ a copy.
>
OK,I wil use device features.

> As the checksum is needed, its true the copy is almost not noticed,
> but one day NIC will be able to perform the checksum for us.
> (Maybe its already the case for some of them)
>
> I would first fix the checksum issue in a generic way, then
> apply this patch on top of the fix, so that we can use SG and avoid
> the extra copy for the typical tcp_sendmsg()

I thought you were working on the fix. If not I will post patch.
>
> It seems you focus on the TAP use case only, seeing you removed
> code that doesn't work for TAP but do work for regular locally
> terminated flows.
>
I have tested patch with GRE TAP and non TAP devices.

> You did a lot of implementation choices and none of them
> are described in a changelog, making future work a bit hard.
>

ok. I will update changelog.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet - Jan. 25, 2013, 7:52 p.m.
On Thu, 2013-01-24 at 19:38 -0800, Pravin Shelar wrote:

> I thought you were working on the fix. If not I will post patch.

I am working on a patch, will send it shortly.

As it is a stable candidate, I did a short one.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Hutchings - Jan. 28, 2013, 6:28 p.m.
On Thu, 2013-01-24 at 14:16 -0800, Pravin B Shelar wrote:
> Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
> ---
> Fixed according to comments from Jesse and Eric.
>  - Factored a MAC layer handler out of skb_gso_segment().
>  - Eliminated copy operation from gre_gso_segment().
>  - Refresh header pointer after pskb_may_pull().
[...]
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -307,6 +307,8 @@ enum {
>  	SKB_GSO_TCPV6 = 1 << 4,
>  
>  	SKB_GSO_FCOE = 1 << 5,
> +
> +	SKB_GSO_GRE = 1 << 6,
>  };
>  
>  #if BITS_PER_LONG > 32
[...]

I think each new GSO flag must have a corresponding net device feature
flag.  In that case you'll need to replace NETIF_F_GSO_RESERVED1 in
include/linux/netdev_features.h and add a name for the feature in
net/core/ethtool.c.

Ben.

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 549f5ad..109d27b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2662,8 +2662,10 @@  extern int netdev_master_upper_dev_link(struct net_device *dev,
 extern void netdev_upper_dev_unlink(struct net_device *dev,
 				    struct net_device *upper_dev);
 extern int skb_checksum_help(struct sk_buff *skb);
+extern struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
+					  netdev_features_t features);
 extern struct sk_buff *skb_gso_segment(struct sk_buff *skb,
-	netdev_features_t features);
+				       netdev_features_t features);
 #ifdef CONFIG_BUG
 extern void netdev_rx_csum_fault(struct net_device *dev);
 #else
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7c00664..7b98ee6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -307,6 +307,8 @@  enum {
 	SKB_GSO_TCPV6 = 1 << 4,
 
 	SKB_GSO_FCOE = 1 << 5,
+
+	SKB_GSO_GRE = 1 << 6,
 };
 
 #if BITS_PER_LONG > 32
@@ -2711,6 +2713,11 @@  static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
 }
 #endif
 
+struct skb_gso_cb {
+	int tnl_hoffset;
+};
+#define NAPI_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb)
+
 static inline bool skb_is_gso(const struct sk_buff *skb)
 {
 	return skb_shinfo(skb)->gso_size;
diff --git a/net/core/dev.c b/net/core/dev.c
index c69cd87..208eb8b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2293,18 +2293,8 @@  out:
 }
 EXPORT_SYMBOL(skb_checksum_help);
 
-/**
- *	skb_gso_segment - Perform segmentation on skb.
- *	@skb: buffer to segment
- *	@features: features for the output path (see dev->features)
- *
- *	This function segments the given skb and returns a list of segments.
- *
- *	It may return NULL if the skb requires no segmentation.  This is
- *	only possible when GSO is used for verifying header integrity.
- */
-struct sk_buff *skb_gso_segment(struct sk_buff *skb,
-	netdev_features_t features)
+struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
+				   netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
 	struct packet_offload *ptype;
@@ -2323,18 +2313,7 @@  struct sk_buff *skb_gso_segment(struct sk_buff *skb,
 		vlan_depth += VLAN_HLEN;
 	}
 
-	skb_reset_mac_header(skb);
-	skb->mac_len = skb->network_header - skb->mac_header;
 	__skb_pull(skb, skb->mac_len);
-
-	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
-		skb_warn_bad_offload(skb);
-
-		if (skb_header_cloned(skb) &&
-		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
-			return ERR_PTR(err);
-	}
-
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &offload_base, list) {
 		if (ptype->type == type && ptype->callbacks.gso_segment) {
@@ -2356,6 +2335,39 @@  struct sk_buff *skb_gso_segment(struct sk_buff *skb,
 
 	return segs;
 }
+EXPORT_SYMBOL(skb_mac_gso_segment);
+
+/**
+ *	skb_gso_segment - Perform segmentation on skb.
+ *	@skb: buffer to segment
+ *	@features: features for the output path (see dev->features)
+ *
+ *	This function segments the given skb and returns a list of segments.
+ *
+ *	It may return NULL if the skb requires no segmentation.  This is
+ *	only possible when GSO is used for verifying header integrity.
+ */
+struct sk_buff *skb_gso_segment(struct sk_buff *skb,
+				netdev_features_t features)
+{
+	int err;
+
+	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+		skb_warn_bad_offload(skb);
+
+		if (skb_header_cloned(skb)) {
+			err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+			if (err)
+				return ERR_PTR(err);
+		}
+	}
+	NAPI_GSO_CB(skb)->tnl_hoffset = skb_headroom(skb);
+
+	skb_reset_mac_header(skb);
+	skb->mac_len = skb->network_header - skb->mac_header;
+
+	return skb_mac_gso_segment(skb, features);
+}
 EXPORT_SYMBOL(skb_gso_segment);
 
 /* Take action when hardware reception checksum errors are detected. */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2568c44..c12b6f3 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2752,6 +2752,8 @@  struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 	unsigned int doffset = skb->data - skb_mac_header(skb);
 	unsigned int offset = doffset;
 	unsigned int headroom;
+	unsigned int tnl_hlen = (skb_mac_header(skb) - skb->head) -
+				 NAPI_GSO_CB(skb)->tnl_hoffset;
 	unsigned int len;
 	int sg = !!(features & NETIF_F_SG);
 	int nfrags = skb_shinfo(skb)->nr_frags;
@@ -2827,7 +2829,10 @@  struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 		skb_set_network_header(nskb, skb->mac_len);
 		nskb->transport_header = (nskb->network_header +
 					  skb_network_header_len(skb));
-		skb_copy_from_linear_data(skb, nskb->data, doffset);
+
+		skb_copy_from_linear_data_offset(skb, -tnl_hlen,
+						 nskb->data - tnl_hlen,
+						 doffset + tnl_hlen);
 
 		if (fskb != skb_shinfo(skb)->frag_list)
 			continue;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 4b70539..2bd9998 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1306,6 +1306,7 @@  static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_UDP |
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
+		       SKB_GSO_GRE |
 		       0)))
 		goto out;
 
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index 42a4910..1f86421 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -19,6 +19,7 @@ 
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/netdevice.h>
+#include <linux/if_tunnel.h>
 #include <linux/spinlock.h>
 #include <net/protocol.h>
 #include <net/gre.h>
@@ -26,6 +27,11 @@ 
 
 static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
 static DEFINE_SPINLOCK(gre_proto_lock);
+struct gre_base_hdr {
+	__be16 flags;
+	__be16 protocol;
+};
+#define GRE_HEADER_SECTION 4
 
 int gre_add_protocol(const struct gre_protocol *proto, u8 version)
 {
@@ -112,12 +118,108 @@  static void gre_err(struct sk_buff *skb, u32 info)
 	rcu_read_unlock();
 }
 
+static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
+				       netdev_features_t features)
+{
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	int ghl = GRE_HEADER_SECTION;
+	struct gre_base_hdr *greh;
+	int mac_len = skb->mac_len;
+	int hlen;
+	bool csum;
+
+	if (unlikely(skb_shinfo(skb)->gso_type &
+				~(SKB_GSO_TCPV4 |
+				  SKB_GSO_TCPV6 |
+				  SKB_GSO_UDP |
+				  SKB_GSO_DODGY |
+				  SKB_GSO_TCP_ECN |
+				  SKB_GSO_GRE)))
+		goto out;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(*greh))))
+		goto out;
+
+	greh = (struct gre_base_hdr *)skb_transport_header(skb);
+
+	if (greh->flags & GRE_KEY)
+		ghl += GRE_HEADER_SECTION;
+	if (greh->flags & GRE_CSUM) {
+		ghl += GRE_HEADER_SECTION;
+		csum = true;
+	} else
+		csum = false;
+
+	/* setup inner skb. */
+	if (greh->protocol == htons(ETH_P_TEB)) {
+		struct ethhdr *eth = eth_hdr(skb);
+		skb->protocol = eth->h_proto;
+	} else {
+		skb->protocol = greh->protocol;
+	}
+
+	hlen = mac_len + sizeof(struct iphdr);
+	skb->encapsulation = 0;
+
+	if (unlikely(!pskb_may_pull(skb, ghl)))
+		goto out;
+	__skb_pull(skb, ghl);
+	skb_reset_mac_header(skb);
+	skb_set_network_header(skb, skb_inner_network_offset(skb));
+	skb->mac_len = skb_inner_network_offset(skb);
+
+	/* segment inner packet. */
+	segs = skb_mac_gso_segment(skb, 0);
+	if (!segs || IS_ERR(segs))
+		goto out;
+
+	skb = segs;
+	do {
+		__skb_push(skb, ghl + hlen);
+		if (csum) {
+			__be32 *pcsum;
+
+			greh = (struct gre_base_hdr *)(skb->data + hlen);
+			pcsum = (__be32 *)(greh + 1);
+			*pcsum = 0;
+			*(__sum16 *)pcsum = csum_fold(skb_checksum(skb, hlen,
+								   skb->len - hlen, 0));
+		}
+
+		skb_reset_mac_header(skb);
+		skb_set_network_header(skb, mac_len);
+		skb->mac_len = mac_len;
+	} while ((skb = skb->next));
+out:
+	return segs;
+}
+
+static int gre_gso_send_check(struct sk_buff *skb)
+{
+	struct gre_base_hdr *greh;
+
+	if (!skb->encapsulation)
+		return -EINVAL;
+
+	greh = (struct gre_base_hdr *)skb_transport_header(skb);
+	if (greh->flags & GRE_SEQ)
+		return -EINVAL;
+	return 0;
+}
+
 static const struct net_protocol net_gre_protocol = {
 	.handler     = gre_rcv,
 	.err_handler = gre_err,
 	.netns_ok    = 1,
 };
 
+static const struct net_offload gre_offload = {
+	.callbacks = {
+		.gso_send_check =	gre_gso_send_check,
+		.gso_segment    =	gre_gso_segment,
+	},
+};
+
 static int __init gre_init(void)
 {
 	pr_info("GRE over IPv4 demultiplexor driver\n");
@@ -127,11 +229,18 @@  static int __init gre_init(void)
 		return -EAGAIN;
 	}
 
+	if (inet_add_offload(&gre_offload, IPPROTO_GRE)) {
+		pr_err("can't add protocol offload\n");
+		inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
+		return -EAGAIN;
+	}
+
 	return 0;
 }
 
 static void __exit gre_exit(void)
 {
+	inet_del_offload(&gre_offload, IPPROTO_GRE);
 	inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
 }
 
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 303012a..ac8cb5e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -735,6 +735,42 @@  drop:
 	return 0;
 }
 
+static struct sk_buff *handle_offloads(struct sk_buff *skb)
+{
+	int err;
+
+	if (skb_is_gso(skb)) {
+		err = skb_unclone(skb, GFP_ATOMIC);
+		if (unlikely(err))
+			goto error;
+		skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
+		return skb;
+	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		/* Pages aren't locked and could change at any time.
+		 * If this happens after we compute the checksum, the
+		 * checksum will be wrong.  We linearize now to avoid
+		 * this problem.
+		 */
+		if (skb_is_nonlinear(skb)) {
+			err = __skb_linearize(skb);
+			if (unlikely(err))
+				goto error;
+		}
+
+		err = skb_checksum_help(skb);
+		if (unlikely(err))
+			goto error;
+	}
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	return skb;
+
+error:
+	kfree_skb(skb);
+	return ERR_PTR(err);
+}
+
 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
@@ -751,10 +787,9 @@  static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	__be32 dst;
 	int    mtu;
 	u8     ttl;
-
-	if (skb->ip_summed == CHECKSUM_PARTIAL &&
-	    skb_checksum_help(skb))
-		goto tx_error;
+	int pkt_len;
+	struct pcpu_tstats *tstats;
+	int err;
 
 	if (dev->type == ARPHRD_ETHER)
 		IPCB(skb)->flags = 0;
@@ -852,13 +887,6 @@  static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 
 	if (skb->protocol == htons(ETH_P_IP)) {
 		df |= (old_iph->frag_off&htons(IP_DF));
-
-		if ((old_iph->frag_off&htons(IP_DF)) &&
-		    mtu < ntohs(old_iph->tot_len)) {
-			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
-			ip_rt_put(rt);
-			goto tx_error;
-		}
 	}
 #if IS_ENABLED(CONFIG_IPV6)
 	else if (skb->protocol == htons(ETH_P_IPV6)) {
@@ -873,11 +901,6 @@  static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			}
 		}
 
-		if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-			ip_rt_put(rt);
-			goto tx_error;
-		}
 	}
 #endif
 
@@ -908,10 +931,19 @@  static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			skb_set_owner_w(new_skb, skb->sk);
 		dev_kfree_skb(skb);
 		skb = new_skb;
-		old_iph = ip_hdr(skb);
 		/* Warning : tiph value might point to freed memory */
 	}
 
+	if (!skb->encapsulation) {
+		skb_reset_inner_headers(skb);
+		skb->encapsulation = 1;
+	}
+
+	skb = handle_offloads(skb);
+	if (IS_ERR(skb))
+		return NETDEV_TX_OK;
+
+	old_iph = ip_hdr(skb);
 	skb_push(skb, gre_hlen);
 	skb_reset_network_header(skb);
 	skb_set_transport_header(skb, sizeof(*iph));
@@ -967,8 +999,23 @@  static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			*(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr));
 		}
 	}
+	pkt_len = skb->len - skb_transport_offset(skb);
+	tstats = this_cpu_ptr(dev->tstats);
+
+	nf_reset(skb);
+	ip_select_ident(iph, skb_dst(skb), NULL);
+
+	err = ip_local_out(skb);
+	if (likely(net_xmit_eval(err) == 0)) {
+		u64_stats_update_begin(&tstats->syncp);
+		tstats->tx_bytes += pkt_len;
+		tstats->tx_packets++;
+		u64_stats_update_end(&tstats->syncp);
+	} else {
+		dev->stats.tx_errors++;
+		dev->stats.tx_aborted_errors++;
+	}
 
-	iptunnel_xmit(skb, dev);
 	return NETDEV_TX_OK;
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -1374,6 +1421,10 @@  static int ipgre_tunnel_init(struct net_device *dev)
 		return err;
 	}
 
+	if (!(tunnel->parms.o_flags & GRE_SEQ)) {
+		dev->features |= NETIF_F_GSO_SOFTWARE;
+		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+	}
 	return 0;
 }
 
@@ -1564,6 +1615,10 @@  static int ipgre_tap_init(struct net_device *dev)
 	if (!dev->tstats)
 		return -ENOMEM;
 
+	if (!(tunnel->parms.o_flags & GRE_SEQ)) {
+		dev->features |= NETIF_F_GSO_SOFTWARE;
+		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+	}
 	return 0;
 }
 
@@ -1587,6 +1642,9 @@  static void ipgre_tap_setup(struct net_device *dev)
 
 	dev->iflink		= 0;
 	dev->features		|= NETIF_F_NETNS_LOCAL;
+
+	dev->features		|= GRE_FEATURES;
+	dev->hw_features	|= GRE_FEATURES;
 }
 
 static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5227194..8c47b7d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3032,6 +3032,7 @@  struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
 			       SKB_GSO_DODGY |
 			       SKB_GSO_TCP_ECN |
 			       SKB_GSO_TCPV6 |
+			       SKB_GSO_GRE |
 			       0) ||
 			     !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
 			goto out;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e0610e4..6824272 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2304,7 +2304,8 @@  struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 		/* Packet is from an untrusted source, reset gso_segs. */
 		int type = skb_shinfo(skb)->gso_type;
 
-		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
+		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
+				      SKB_GSO_GRE) ||
 			     !(type & (SKB_GSO_UDP))))
 			goto out;
 
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index f26f0da..8234c1d 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -99,6 +99,7 @@  static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 		     ~(SKB_GSO_UDP |
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
+		       SKB_GSO_GRE |
 		       SKB_GSO_TCPV6 |
 		       0)))
 		goto out;
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 0c8934a..cf05cf0 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -56,7 +56,8 @@  static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 		/* Packet is from an untrusted source, reset gso_segs. */
 		int type = skb_shinfo(skb)->gso_type;
 
-		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
+		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
+				      SKB_GSO_GRE) ||
 			     !(type & (SKB_GSO_UDP))))
 			goto out;