diff mbox series

[bpf-next] bpf: fix memory leak in bpf_lwt_xmit_reroute

Message ID 20190214060939.101851-1-posk@google.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series [bpf-next] bpf: fix memory leak in bpf_lwt_xmit_reroute | expand

Commit Message

Peter Oskolkov Feb. 14, 2019, 6:09 a.m. UTC
On error the skb should be freed. Tested with diff/steps
provided by David Ahern.

Reported-by: David Ahern <dsahern@gmail.com>
Fixes: 3bd0b15281af ("bpf: add handling of BPF_LWT_REROUTE to lwt_bpf.c")
Signed-off-by: Peter Oskolkov <posk@google.com>
---
 net/core/lwt_bpf.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

Comments

David Ahern Feb. 14, 2019, 6:11 p.m. UTC | #1
On 2/13/19 11:09 PM, Peter Oskolkov wrote:
> On error the skb should be freed. Tested with diff/steps
> provided by David Ahern.
> 
> Reported-by: David Ahern <dsahern@gmail.com>
> Fixes: 3bd0b15281af ("bpf: add handling of BPF_LWT_REROUTE to lwt_bpf.c")
> Signed-off-by: Peter Oskolkov <posk@google.com>
> ---
>  net/core/lwt_bpf.c | 24 ++++++++++++++++--------
>  1 file changed, 16 insertions(+), 8 deletions(-)
> 
> diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
> index 32251f3fcda0..f3273cbb6b22 100644
> --- a/net/core/lwt_bpf.c
> +++ b/net/core/lwt_bpf.c
> @@ -179,18 +179,19 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
>  	struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev);
>  	int oif = l3mdev ? l3mdev->ifindex : 0;
>  	struct dst_entry *dst = NULL;
> +	int err = -EAFNOSUPPORT;
>  	struct sock *sk;
>  	struct net *net;
>  	bool ipv4;
> -	int err;
>  
>  	if (skb->protocol == htons(ETH_P_IP))
>  		ipv4 = true;
>  	else if (skb->protocol == htons(ETH_P_IPV6))
>  		ipv4 = false;
>  	else
> -		return -EAFNOSUPPORT;
> +		goto err;
>  
> +	err = -EINVAL;
>  	sk = sk_to_full_sk(skb->sk);
>  	if (sk) {
>  		if (sk->sk_bound_dev_if)
> @@ -216,7 +217,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
>  
>  		rt = ip_route_output_key(net, &fl4);
>  		if (IS_ERR(rt))
> -			return -EINVAL;
> +			goto err;
>  		dst = &rt->dst;
>  	} else {
>  		struct ipv6hdr *iph6 = ipv6_hdr(skb);
> @@ -231,12 +232,15 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
>  		fl6.saddr = iph6->saddr;
>  
>  		err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6);
> -		if (err || IS_ERR(dst))
> -			return -EINVAL;
> +		if (err || IS_ERR(dst)) {
> +			err = -EINVAL;
> +			goto err;
> +		}
>  	}
>  	if (unlikely(dst->error)) {
>  		dst_release(dst);
> -		return -EINVAL;
> +		err = -EINVAL;
> +		goto err;
>  	}
>  
>  	/* Although skb header was reserved in bpf_lwt_push_ip_encap(), it

EINVAL is a confusing return code; it is not an EINVAL problem, it is a
routing problem:

...
starting egress IPv4 encap test
ping: sendmsg: Invalid argument
FAIL: test_ping: 1


Versus returning the error from the lookup:
...
starting egress IPv4 encap test
ping: sendmsg: No route to host
FAIL: test_ping: 1


diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index f3273cbb6b22..a1901ba319fc 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -191,7 +191,6 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
        else
                goto err;

-       err = -EINVAL;
        sk = sk_to_full_sk(skb->sk);
        if (sk) {
                if (sk->sk_bound_dev_if)
@@ -216,8 +215,10 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
                fl4.saddr = iph->saddr;

                rt = ip_route_output_key(net, &fl4);
-               if (IS_ERR(rt))
+               if (IS_ERR(rt)) {
+                       err = PTR_ERR(rt);
                        goto err;
+               }
                dst = &rt->dst;
        } else {
                struct ipv6hdr *iph6 = ipv6_hdr(skb);
@@ -232,14 +233,12 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
                fl6.saddr = iph6->saddr;

                err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6);
-               if (err || IS_ERR(dst)) {
-                       err = -EINVAL;
+               if (err || IS_ERR(dst))
                        goto err;
-               }
        }
        if (unlikely(dst->error)) {
                dst_release(dst);
-               err = -EINVAL;
+               err = dst->error;
                goto err;
        }




> @@ -246,17 +250,21 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
>  	 */
>  	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
>  	if (unlikely(err))
> -		return err;
> +		goto err;
>  
>  	skb_dst_drop(skb);
>  	skb_dst_set(skb, dst);
>  
>  	err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb);
>  	if (unlikely(err))
> -		return err;
> +		goto err;
>  
>  	/* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */
>  	return LWTUNNEL_XMIT_DONE;
> +
> +err:
> +	kfree_skb(skb);
> +	return err;
>  }
>  
>  static int bpf_xmit(struct sk_buff *skb)
> 

I figured it was a leaked skb.

Also, the test script needs to be updated as well with the negative
tests -- ie., toggle the route from a dev/gateway to a reject
(e.g.,unreachable) and back.

Also, don't exit on the first failure - run all of them.

Having the result line up is more user friendly. e.g.,

# ./fib_tests.sh

Single path route test
    Start point
    TEST: IPv4 fibmatch                                     [ OK ]
    TEST: IPv6 fibmatch                                     [ OK ]
    Nexthop device deleted
    TEST: IPv4 fibmatch - no route                          [ OK ]
    TEST: IPv6 fibmatch - no route                          [ OK ]
...
Peter Oskolkov Feb. 14, 2019, 6:42 p.m. UTC | #2
On Thu, Feb 14, 2019 at 10:11 AM David Ahern <dsahern@gmail.com> wrote:
>
> On 2/13/19 11:09 PM, Peter Oskolkov wrote:
> > On error the skb should be freed. Tested with diff/steps
> > provided by David Ahern.
> >
> > Reported-by: David Ahern <dsahern@gmail.com>
> > Fixes: 3bd0b15281af ("bpf: add handling of BPF_LWT_REROUTE to lwt_bpf.c")
> > Signed-off-by: Peter Oskolkov <posk@google.com>
> > ---
> >  net/core/lwt_bpf.c | 24 ++++++++++++++++--------
> >  1 file changed, 16 insertions(+), 8 deletions(-)
> >
> > diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
> > index 32251f3fcda0..f3273cbb6b22 100644
> > --- a/net/core/lwt_bpf.c
> > +++ b/net/core/lwt_bpf.c
> > @@ -179,18 +179,19 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
> >       struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev);
> >       int oif = l3mdev ? l3mdev->ifindex : 0;
> >       struct dst_entry *dst = NULL;
> > +     int err = -EAFNOSUPPORT;
> >       struct sock *sk;
> >       struct net *net;
> >       bool ipv4;
> > -     int err;
> >
> >       if (skb->protocol == htons(ETH_P_IP))
> >               ipv4 = true;
> >       else if (skb->protocol == htons(ETH_P_IPV6))
> >               ipv4 = false;
> >       else
> > -             return -EAFNOSUPPORT;
> > +             goto err;
> >
> > +     err = -EINVAL;
> >       sk = sk_to_full_sk(skb->sk);
> >       if (sk) {
> >               if (sk->sk_bound_dev_if)
> > @@ -216,7 +217,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
> >
> >               rt = ip_route_output_key(net, &fl4);
> >               if (IS_ERR(rt))
> > -                     return -EINVAL;
> > +                     goto err;
> >               dst = &rt->dst;
> >       } else {
> >               struct ipv6hdr *iph6 = ipv6_hdr(skb);
> > @@ -231,12 +232,15 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
> >               fl6.saddr = iph6->saddr;
> >
> >               err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6);
> > -             if (err || IS_ERR(dst))
> > -                     return -EINVAL;
> > +             if (err || IS_ERR(dst)) {
> > +                     err = -EINVAL;
> > +                     goto err;
> > +             }
> >       }
> >       if (unlikely(dst->error)) {
> >               dst_release(dst);
> > -             return -EINVAL;
> > +             err = -EINVAL;
> > +             goto err;
> >       }
> >
> >       /* Although skb header was reserved in bpf_lwt_push_ip_encap(), it
>
> EINVAL is a confusing return code; it is not an EINVAL problem, it is a
> routing problem:

Thanks, David! Sent a v2 of the patch.

>
> ...
> starting egress IPv4 encap test
> ping: sendmsg: Invalid argument
> FAIL: test_ping: 1
>
>
> Versus returning the error from the lookup:
> ...
> starting egress IPv4 encap test
> ping: sendmsg: No route to host
> FAIL: test_ping: 1
>
>
> diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
> index f3273cbb6b22..a1901ba319fc 100644
> --- a/net/core/lwt_bpf.c
> +++ b/net/core/lwt_bpf.c
> @@ -191,7 +191,6 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
>         else
>                 goto err;
>
> -       err = -EINVAL;
>         sk = sk_to_full_sk(skb->sk);
>         if (sk) {
>                 if (sk->sk_bound_dev_if)
> @@ -216,8 +215,10 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
>                 fl4.saddr = iph->saddr;
>
>                 rt = ip_route_output_key(net, &fl4);
> -               if (IS_ERR(rt))
> +               if (IS_ERR(rt)) {
> +                       err = PTR_ERR(rt);
>                         goto err;
> +               }
>                 dst = &rt->dst;
>         } else {
>                 struct ipv6hdr *iph6 = ipv6_hdr(skb);
> @@ -232,14 +233,12 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
>                 fl6.saddr = iph6->saddr;
>
>                 err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6);
> -               if (err || IS_ERR(dst)) {
> -                       err = -EINVAL;
> +               if (err || IS_ERR(dst))
>                         goto err;
> -               }
>         }
>         if (unlikely(dst->error)) {
>                 dst_release(dst);
> -               err = -EINVAL;
> +               err = dst->error;
>                 goto err;
>         }
>
>
>
>
> > @@ -246,17 +250,21 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
> >        */
> >       err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
> >       if (unlikely(err))
> > -             return err;
> > +             goto err;
> >
> >       skb_dst_drop(skb);
> >       skb_dst_set(skb, dst);
> >
> >       err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb);
> >       if (unlikely(err))
> > -             return err;
> > +             goto err;
> >
> >       /* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */
> >       return LWTUNNEL_XMIT_DONE;
> > +
> > +err:
> > +     kfree_skb(skb);
> > +     return err;
> >  }
> >
> >  static int bpf_xmit(struct sk_buff *skb)
> >
>
> I figured it was a leaked skb.
>
> Also, the test script needs to be updated as well with the negative
> tests -- ie., toggle the route from a dev/gateway to a reject
> (e.g.,unreachable) and back.
>
> Also, don't exit on the first failure - run all of them.

I'll refactor the test as you suggest here
when I add VRF and GRO tests in a couple of weeks, if this is OK.

>
> Having the result line up is more user friendly. e.g.,
>
> # ./fib_tests.sh
>
> Single path route test
>     Start point
>     TEST: IPv4 fibmatch                                     [ OK ]
>     TEST: IPv6 fibmatch                                     [ OK ]
>     Nexthop device deleted
>     TEST: IPv4 fibmatch - no route                          [ OK ]
>     TEST: IPv6 fibmatch - no route                          [ OK ]
> ...
David Ahern Feb. 14, 2019, 7:10 p.m. UTC | #3
On 2/14/19 11:42 AM, Peter Oskolkov wrote:
> I'll refactor the test as you suggest here
> when I add VRF and GRO tests in a couple of weeks, if this is OK.

IMO, the tests should go in with the feature, not a release later. If we
are at -rc6 this week then you might get next week as well.

The unreachable toggle is a fairly quick integration. GRO really should
also get in the same cycle as the feature. Preferably VRF tests as well
since you have the commands.

The pretty printing cleanup can be done later.
Peter Oskolkov Feb. 14, 2019, 7:28 p.m. UTC | #4
On Thu, Feb 14, 2019 at 11:10 AM David Ahern <dsahern@gmail.com> wrote:
>
> On 2/14/19 11:42 AM, Peter Oskolkov wrote:
> > I'll refactor the test as you suggest here
> > when I add VRF and GRO tests in a couple of weeks, if this is OK.
>
> IMO, the tests should go in with the feature, not a release later. If we
> are at -rc6 this week then you might get next week as well.
>
> The unreachable toggle is a fairly quick integration. GRO really should
> also get in the same cycle as the feature. Preferably VRF tests as well
> since you have the commands.

OK, I'll work on the negative tests and GRO first.

>
> The pretty printing cleanup can be done later.
David Ahern March 2, 2019, 2:27 a.m. UTC | #5
On 2/28/19 10:57 AM, Peter Oskolkov wrote:
> David: I'm not sure how to test GSO (I assume we are talking about GSO
> here) in
> the selftest: the encapping code sets SKB_GSO_DODGY flag, and veth does
> not support
> dodginess: "tx-gso-robust: off [fixed]".
> 
> If the "dodgy" flag is not set, then gso validation in dev.c passes, and
> large GSO packets
> happily go through; if the "dodgy" flag is set, "dodgy" GSO packets are
> rejected, TCP does
> segmentation, and non-GSO packets happily go through (with an mtu tweak
> to the LWT tunnel).
> 
> So I see three options:
> - add a sysctl to _not_ set SKB_GSO_DODGY flag in lwt_bpf.c =>
> handle_gso_type();
> - change veth to accept "dodgy" GSO packets
> - test the code "as is", meaning that GSO will be tried and disabled by
> TCP stack
> 
> Which approach would you prefer?
> 

definitely not a sysctl.

After that, I don't have a suggestion for GSO at the moment.
Willem de Bruijn March 4, 2019, 2:54 a.m. UTC | #6
On Fri, Mar 1, 2019 at 9:27 PM David Ahern <dsahern@gmail.com> wrote:
>
> On 2/28/19 10:57 AM, Peter Oskolkov wrote:
> > David: I'm not sure how to test GSO (I assume we are talking about GSO
> > here) in
> > the selftest: the encapping code sets SKB_GSO_DODGY flag, and veth does
> > not support
> > dodginess: "tx-gso-robust: off [fixed]".
> >
> > If the "dodgy" flag is not set, then gso validation in dev.c passes, and
> > large GSO packets
> > happily go through; if the "dodgy" flag is set, "dodgy" GSO packets are
> > rejected, TCP does
> > segmentation, and non-GSO packets happily go through (with an mtu tweak
> > to the LWT tunnel).

Very few devices unconditionally accept dodgy packets (only veth?).

A device that lacks the robust gso feature will cause a gso packet
with dodgy flag to enter software gso instead of passing to device
segmentation offload.

That should be perfect for checking that the packets can be segmented
correctly with the new header.

If the gso layer drops the packets, that is not due to dropping all
dodgy sources. It will be dropped somewhere else inside gso,
indication that something is not as expected with the packet.

> > So I see three options:
> > - add a sysctl to _not_ set SKB_GSO_DODGY flag in lwt_bpf.c =>
> > handle_gso_type();
> > - change veth to accept "dodgy" GSO packets

Neither, as these would bypass segmentation offload and pass the large
packet to the receive path. It is more interesting to validate the
packet in gso.

> > - test the code "as is", meaning that GSO will be tried and disabled by
> > TCP stack
> >
> > Which approach would you prefer?
> >
>
> definitely not a sysctl.
>
> After that, I don't have a suggestion for GSO at the moment.
Willem de Bruijn March 4, 2019, 4:05 a.m. UTC | #7
On Sun, Mar 3, 2019 at 9:55 PM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> On Fri, Mar 1, 2019 at 9:27 PM David Ahern <dsahern@gmail.com> wrote:
> >
> > On 2/28/19 10:57 AM, Peter Oskolkov wrote:
> > > David: I'm not sure how to test GSO (I assume we are talking about GSO
> > > here) in
> > > the selftest: the encapping code sets SKB_GSO_DODGY flag, and veth does
> > > not support
> > > dodginess: "tx-gso-robust: off [fixed]".
> > >
> > > If the "dodgy" flag is not set, then gso validation in dev.c passes, and
> > > large GSO packets
> > > happily go through; if the "dodgy" flag is set, "dodgy" GSO packets are
> > > rejected, TCP does
> > > segmentation, and non-GSO packets happily go through (with an mtu tweak
> > > to the LWT tunnel).
>
> Very few devices unconditionally accept dodgy packets (only veth?).

virtio-net, I meant. But there are a few other virtual devices, like
macvlan and xen-netfront

> A device that lacks the robust gso feature will cause a gso packet
> with dodgy flag to enter software gso instead of passing to device
> segmentation offload.
>
> That should be perfect for checking that the packets can be segmented
> correctly with the new header.
>
> If the gso layer drops the packets, that is not due to dropping all
> dodgy sources. It will be dropped somewhere else inside gso,
> indication that something is not as expected with the packet.
>
> > > So I see three options:
> > > - add a sysctl to _not_ set SKB_GSO_DODGY flag in lwt_bpf.c =>
> > > handle_gso_type();
> > > - change veth to accept "dodgy" GSO packets
>
> Neither, as these would bypass segmentation offload and pass the large
> packet to the receive path. It is more interesting to validate the
> packet in gso.

That said, one solution would be to make veth gso_robust configurable
through ethtool, by advertising it in hw_features.



> > > - test the code "as is", meaning that GSO will be tried and disabled by
> > > TCP stack
> > >
> > > Which approach would you prefer?
> > >
> >
> > definitely not a sysctl.
> >
> > After that, I don't have a suggestion for GSO at the moment.
Peter Oskolkov March 4, 2019, 8:39 p.m. UTC | #8
On Sun, Mar 3, 2019 at 6:55 PM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> On Fri, Mar 1, 2019 at 9:27 PM David Ahern <dsahern@gmail.com> wrote:
> >
> > On 2/28/19 10:57 AM, Peter Oskolkov wrote:
> > > David: I'm not sure how to test GSO (I assume we are talking about GSO
> > > here) in
> > > the selftest: the encapping code sets SKB_GSO_DODGY flag, and veth does
> > > not support
> > > dodginess: "tx-gso-robust: off [fixed]".
> > >
> > > If the "dodgy" flag is not set, then gso validation in dev.c passes, and
> > > large GSO packets
> > > happily go through; if the "dodgy" flag is set, "dodgy" GSO packets are
> > > rejected, TCP does
> > > segmentation, and non-GSO packets happily go through (with an mtu tweak
> > > to the LWT tunnel).
>
> Very few devices unconditionally accept dodgy packets (only veth?).
>
> A device that lacks the robust gso feature will cause a gso packet
> with dodgy flag to enter software gso instead of passing to device
> segmentation offload.
>
> That should be perfect for checking that the packets can be segmented
> correctly with the new header.
>
> If the gso layer drops the packets, that is not due to dropping all
> dodgy sources. It will be dropped somewhere else inside gso,
> indication that something is not as expected with the packet.
>
> > > So I see three options:
> > > - add a sysctl to _not_ set SKB_GSO_DODGY flag in lwt_bpf.c =>
> > > handle_gso_type();
> > > - change veth to accept "dodgy" GSO packets
>
> Neither, as these would bypass segmentation offload and pass the large
> packet to the receive path. It is more interesting to validate the
> packet in gso.

I found the problem: skb->inner_protocol was not set, so software GSO
fallback failed.  I have a patch that fixes the issue: IPIP+GRE+TCP
gso works! net-next is closed though... Will have to wait for net-next
to reopen.


>
> > > - test the code "as is", meaning that GSO will be tried and disabled by
> > > TCP stack
> > >
> > > Which approach would you prefer?
> > >
> >
> > definitely not a sysctl.
> >
> > After that, I don't have a suggestion for GSO at the moment.
David Ahern March 4, 2019, 9:03 p.m. UTC | #9
On 3/4/19 1:39 PM, Peter Oskolkov wrote:
> I found the problem: skb->inner_protocol was not set, so software GSO
> fallback failed.  I have a patch that fixes the issue: IPIP+GRE+TCP
> gso works! net-next is closed though... Will have to wait for net-next
> to reopen.

That's a bug fix. I suggest sending now.
Peter Oskolkov March 4, 2019, 10:37 p.m. UTC | #10
On Mon, Mar 4, 2019 at 1:03 PM David Ahern <dsahern@gmail.com> wrote:
>
> On 3/4/19 1:39 PM, Peter Oskolkov wrote:
> > I found the problem: skb->inner_protocol was not set, so software GSO
> > fallback failed.  I have a patch that fixes the issue: IPIP+GRE+TCP
> > gso works! net-next is closed though... Will have to wait for net-next
> > to reopen.
>
> That's a bug fix. I suggest sending now.

I see the encap patches neither in net nor in bpf trees, only in
net-next and bpf-next. And *-next trees are closed, so there is
nowhere to send the fix. Am I missing something?
Eric Dumazet March 4, 2019, 11:28 p.m. UTC | #11
On 03/04/2019 02:37 PM, Peter Oskolkov wrote:
> On Mon, Mar 4, 2019 at 1:03 PM David Ahern <dsahern@gmail.com> wrote:
>>
>> On 3/4/19 1:39 PM, Peter Oskolkov wrote:
>>> I found the problem: skb->inner_protocol was not set, so software GSO
>>> fallback failed.  I have a patch that fixes the issue: IPIP+GRE+TCP
>>> gso works! net-next is closed though... Will have to wait for net-next
>>> to reopen.
>>
>> That's a bug fix. I suggest sending now.
> 
> I see the encap patches neither in net nor in bpf trees, only in
> net-next and bpf-next. And *-next trees are closed, so there is
> nowhere to send the fix. Am I missing something?
> 

David has not yet sent his pull request to Linus.

This means your fix needs to target net-next

net-next is closed for patches that are meant for linux-5.2,
but 'open' for fixes (targeting 5.1)
diff mbox series

Patch

diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 32251f3fcda0..f3273cbb6b22 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -179,18 +179,19 @@  static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
 	struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev);
 	int oif = l3mdev ? l3mdev->ifindex : 0;
 	struct dst_entry *dst = NULL;
+	int err = -EAFNOSUPPORT;
 	struct sock *sk;
 	struct net *net;
 	bool ipv4;
-	int err;
 
 	if (skb->protocol == htons(ETH_P_IP))
 		ipv4 = true;
 	else if (skb->protocol == htons(ETH_P_IPV6))
 		ipv4 = false;
 	else
-		return -EAFNOSUPPORT;
+		goto err;
 
+	err = -EINVAL;
 	sk = sk_to_full_sk(skb->sk);
 	if (sk) {
 		if (sk->sk_bound_dev_if)
@@ -216,7 +217,7 @@  static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
 
 		rt = ip_route_output_key(net, &fl4);
 		if (IS_ERR(rt))
-			return -EINVAL;
+			goto err;
 		dst = &rt->dst;
 	} else {
 		struct ipv6hdr *iph6 = ipv6_hdr(skb);
@@ -231,12 +232,15 @@  static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
 		fl6.saddr = iph6->saddr;
 
 		err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6);
-		if (err || IS_ERR(dst))
-			return -EINVAL;
+		if (err || IS_ERR(dst)) {
+			err = -EINVAL;
+			goto err;
+		}
 	}
 	if (unlikely(dst->error)) {
 		dst_release(dst);
-		return -EINVAL;
+		err = -EINVAL;
+		goto err;
 	}
 
 	/* Although skb header was reserved in bpf_lwt_push_ip_encap(), it
@@ -246,17 +250,21 @@  static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
 	 */
 	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
 	if (unlikely(err))
-		return err;
+		goto err;
 
 	skb_dst_drop(skb);
 	skb_dst_set(skb, dst);
 
 	err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb);
 	if (unlikely(err))
-		return err;
+		goto err;
 
 	/* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */
 	return LWTUNNEL_XMIT_DONE;
+
+err:
+	kfree_skb(skb);
+	return err;
 }
 
 static int bpf_xmit(struct sk_buff *skb)