diff mbox series

[net] net: fix use-after-free in __netif_receive_skb_core

Message ID e909b8fe24b9eac71de52c4f80f7f3f6e5770199.1562766613.git.sd@queasysnail.net
State Changes Requested
Delegated to: David Miller
Headers show
Series [net] net: fix use-after-free in __netif_receive_skb_core | expand

Commit Message

Sabrina Dubroca July 10, 2019, 1:52 p.m. UTC
When __netif_receive_skb_core handles a shared skb, it can be
reallocated in a few different places:
 - the device's rx_handler
 - vlan_do_receive
 - skb_vlan_untag

To deal with that, rx_handlers and vlan_do_receive get passed a
reference to the skb, and skb_vlan_untag just returns the new
skb. This was not a problem until commit 88eb1944e18c ("net: core:
propagate SKB lists through packet_type lookup"), which moved the
final handling of the skb via pt_prev out of
__netif_receive_skb_core. After this commit, when the skb is
reallocated by __netif_receive_skb_core, KASAN reports a
use-after-free on the old skb:

BUG: KASAN: use-after-free in __netif_receive_skb_one_core+0x15c/0x180
Call Trace:
 <IRQ>
 __netif_receive_skb_one_core+0x15c/0x180
 process_backlog+0x1b5/0x630
 ? net_rx_action+0x247/0xd00
 net_rx_action+0x3fa/0xd00
 ? napi_complete_done+0x360/0x360
 __do_softirq+0x257/0xa0b
 do_softirq_own_stack+0x2a/0x40
 </IRQ>
 ? __dev_queue_xmit+0x12ba/0x3120
 do_softirq+0x5d/0x60
 [...]

Allocated by task 505:
 __kasan_kmalloc.constprop.0+0xd6/0x140
 kmem_cache_alloc+0xd4/0x2e0
 skb_clone+0x106/0x300
 deliver_clone+0x3f/0xa0
 maybe_deliver+0x1c0/0x2b0
 br_flood+0xd4/0x320
 br_dev_xmit+0xbc0/0x1080
 dev_hard_start_xmit+0x139/0x750
 __dev_queue_xmit+0x24eb/0x3120
 packet_sendmsg+0x1bfa/0x50e0
 [...]

Freed by task 505:
 __kasan_slab_free+0x138/0x1e0
 kmem_cache_free+0xa2/0x2e0
 macsec_handle_frame+0xa24/0x2e60
 __netif_receive_skb_core+0xe2a/0x2c90
 __netif_receive_skb_one_core+0x96/0x180
 process_backlog+0x1b5/0x630
 net_rx_action+0x3fa/0xd00
 __do_softirq+0x257/0xa0b

The solution is to pass a reference to the skb to
__netif_receive_skb_core, as we already do with the rx_handlers, so
that its callers use the new skb.

Fixes: 88eb1944e18c ("net: core: propagate SKB lists through packet_type lookup")
Reported-by: Andreas Steinmetz <ast@domdv.de>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
---
 net/core/dev.c | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

Comments

Edward Cree July 10, 2019, 3:07 p.m. UTC | #1
On 10/07/2019 14:52, Sabrina Dubroca wrote:
> When __netif_receive_skb_core handles a shared skb, it can be
> reallocated in a few different places:
>  - the device's rx_handler
>  - vlan_do_receive
>  - skb_vlan_untag
>
> To deal with that, rx_handlers and vlan_do_receive get passed a
> reference to the skb, and skb_vlan_untag just returns the new
> skb. This was not a problem until commit 88eb1944e18c ("net: core:
> propagate SKB lists through packet_type lookup"), which moved the
> final handling of the skb via pt_prev out of
> __netif_receive_skb_core. After this commit, when the skb is
> reallocated by __netif_receive_skb_core, KASAN reports a
> use-after-free on the old skb:
>
> BUG: KASAN: use-after-free in __netif_receive_skb_one_core+0x15c/0x180
> Call Trace:
>  <IRQ>
>  __netif_receive_skb_one_core+0x15c/0x180
>  process_backlog+0x1b5/0x630
>  ? net_rx_action+0x247/0xd00
>  net_rx_action+0x3fa/0xd00
>  ? napi_complete_done+0x360/0x360
>  __do_softirq+0x257/0xa0b
>  do_softirq_own_stack+0x2a/0x40
>  </IRQ>
>  ? __dev_queue_xmit+0x12ba/0x3120
>  do_softirq+0x5d/0x60
>  [...]
>
> Allocated by task 505:
>  __kasan_kmalloc.constprop.0+0xd6/0x140
>  kmem_cache_alloc+0xd4/0x2e0
>  skb_clone+0x106/0x300
>  deliver_clone+0x3f/0xa0
>  maybe_deliver+0x1c0/0x2b0
>  br_flood+0xd4/0x320
>  br_dev_xmit+0xbc0/0x1080
>  dev_hard_start_xmit+0x139/0x750
>  __dev_queue_xmit+0x24eb/0x3120
>  packet_sendmsg+0x1bfa/0x50e0
>  [...]
>
> Freed by task 505:
>  __kasan_slab_free+0x138/0x1e0
>  kmem_cache_free+0xa2/0x2e0
>  macsec_handle_frame+0xa24/0x2e60
>  __netif_receive_skb_core+0xe2a/0x2c90
>  __netif_receive_skb_one_core+0x96/0x180
>  process_backlog+0x1b5/0x630
>  net_rx_action+0x3fa/0xd00
>  __do_softirq+0x257/0xa0b
>
> The solution is to pass a reference to the skb to
> __netif_receive_skb_core, as we already do with the rx_handlers, so
> that its callers use the new skb.
>
> Fixes: 88eb1944e18c ("net: core: propagate SKB lists through packet_type lookup")
> Reported-by: Andreas Steinmetz <ast@domdv.de>
> Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
> ---
>  net/core/dev.c | 26 ++++++++++++++++++++------
>  1 file changed, 20 insertions(+), 6 deletions(-)
>
> diff --git a/net/core/dev.c b/net/core/dev.c
> index d6edd218babd..0bbf6d2a9c32 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -4809,11 +4809,12 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
>  	return 0;
>  }
>  
> -static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
> +static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
>  				    struct packet_type **ppt_prev)
>  {
>  	struct packet_type *ptype, *pt_prev;
>  	rx_handler_func_t *rx_handler;
> +	struct sk_buff *skb = *pskb;
Would it not be simpler just to change all users of skb to *pskb?
Then you avoid having to keep doing "*pskb = skb;" whenever skb changes
 (with concomitant risk of bugs if one gets missed).

-Ed
Sabrina Dubroca July 10, 2019, 10:47 p.m. UTC | #2
2019-07-10, 16:07:43 +0100, Edward Cree wrote:
> On 10/07/2019 14:52, Sabrina Dubroca wrote:
> > -static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
> > +static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
> >  				    struct packet_type **ppt_prev)
> >  {
> >  	struct packet_type *ptype, *pt_prev;
> >  	rx_handler_func_t *rx_handler;
> > +	struct sk_buff *skb = *pskb;
> Would it not be simpler just to change all users of skb to *pskb?
> Then you avoid having to keep doing "*pskb = skb;" whenever skb changes
>  (with concomitant risk of bugs if one gets missed).

Yes, that would be less risky. I wrote a version of the patch that did
exactly that, but found it really too ugly (both the patch and the
resulting code). We have more than 50 occurences of skb, including
things like:

    atomic_long_inc(&skb->dev->rx_dropped);
Edward Cree July 12, 2019, 3:29 p.m. UTC | #3
On 10/07/2019 23:47, Sabrina Dubroca wrote:
> 2019-07-10, 16:07:43 +0100, Edward Cree wrote:
>> On 10/07/2019 14:52, Sabrina Dubroca wrote:
>>> -static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
>>> +static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
>>>  				    struct packet_type **ppt_prev)
>>>  {
>>>  	struct packet_type *ptype, *pt_prev;
>>>  	rx_handler_func_t *rx_handler;
>>> +	struct sk_buff *skb = *pskb;
>> Would it not be simpler just to change all users of skb to *pskb?
>> Then you avoid having to keep doing "*pskb = skb;" whenever skb changes
>>  (with concomitant risk of bugs if one gets missed).
> Yes, that would be less risky. I wrote a version of the patch that did
> exactly that, but found it really too ugly (both the patch and the
> resulting code).
If you've still got that version (or can dig it out of your reflog), can
 you post it so we can see just how ugly it turns out?

>  We have more than 50 occurences of skb, including
> things like:
>
>     atomic_long_inc(&skb->dev->rx_dropped);
Ooh, yes, I can see why that ends up looking funny...

Here's a thought, how about switching round the return-vs-pass-by-pointer
 and writing:

static struct sk_buff *__netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
                                                struct packet_type **ppt_prev, int *ret)
?
(Although, you might want to rename 'ret' in that case.)

Does that make things any less ugly?
-Ed
Jonathan Lemon July 12, 2019, 5:06 p.m. UTC | #4
On 12 Jul 2019, at 8:29, Edward Cree wrote:

> On 10/07/2019 23:47, Sabrina Dubroca wrote:
>> 2019-07-10, 16:07:43 +0100, Edward Cree wrote:
>>> On 10/07/2019 14:52, Sabrina Dubroca wrote:
>>>> -static int __netif_receive_skb_core(struct sk_buff *skb, bool 
>>>> pfmemalloc,
>>>> +static int __netif_receive_skb_core(struct sk_buff **pskb, bool 
>>>> pfmemalloc,
>>>>  				    struct packet_type **ppt_prev)
>>>>  {
>>>>  	struct packet_type *ptype, *pt_prev;
>>>>  	rx_handler_func_t *rx_handler;
>>>> +	struct sk_buff *skb = *pskb;
>>> Would it not be simpler just to change all users of skb to *pskb?
>>> Then you avoid having to keep doing "*pskb = skb;" whenever skb 
>>> changes
>>>  (with concomitant risk of bugs if one gets missed).
>> Yes, that would be less risky. I wrote a version of the patch that 
>> did
>> exactly that, but found it really too ugly (both the patch and the
>> resulting code).
> If you've still got that version (or can dig it out of your reflog), 
> can
>  you post it so we can see just how ugly it turns out?
>
>>  We have more than 50 occurences of skb, including
>> things like:
>>
>>     atomic_long_inc(&skb->dev->rx_dropped);
> Ooh, yes, I can see why that ends up looking funny...
>
> Here's a thought, how about switching round the 
> return-vs-pass-by-pointer
>  and writing:
>
> static struct sk_buff *__netif_receive_skb_core(struct sk_buff *skb, 
> bool pfmemalloc,
>                                                 struct packet_type 
> **ppt_prev, int *ret)
> ?
> (Although, you might want to rename 'ret' in that case.)
>
> Does that make things any less ugly?

That was actually my first thought as well - this seems to fit well with 
the
other APIS which can return a different skb.
Sabrina Dubroca July 16, 2019, 3:26 p.m. UTC | #5
2019-07-12, 16:29:48 +0100, Edward Cree wrote:
> On 10/07/2019 23:47, Sabrina Dubroca wrote:
> > 2019-07-10, 16:07:43 +0100, Edward Cree wrote:
> >> On 10/07/2019 14:52, Sabrina Dubroca wrote:
> >>> -static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
> >>> +static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
> >>>  				    struct packet_type **ppt_prev)
> >>>  {
> >>>  	struct packet_type *ptype, *pt_prev;
> >>>  	rx_handler_func_t *rx_handler;
> >>> +	struct sk_buff *skb = *pskb;
> >> Would it not be simpler just to change all users of skb to *pskb?
> >> Then you avoid having to keep doing "*pskb = skb;" whenever skb changes
> >>  (with concomitant risk of bugs if one gets missed).
> > Yes, that would be less risky. I wrote a version of the patch that did
> > exactly that, but found it really too ugly (both the patch and the
> > resulting code).
> If you've still got that version (or can dig it out of your reflog), can
>  you post it so we can see just how ugly it turns out?

No, I didn't even commit it. Rewrote it now, it's rewriting over 1/4
of the lines. Considering that the patch would need to go in stable, I
don't think that's appropriate.

(This has been only compiled, not actually tested)

diff --git a/net/core/dev.c b/net/core/dev.c
index fc676b2610e3..5fb2a15d42e1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4799,7 +4799,7 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
 	return 0;
 }
 
-static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
+static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
 				    struct packet_type **ppt_prev)
 {
 	struct packet_type *ptype, *pt_prev;
@@ -4809,21 +4809,21 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
 	int ret = NET_RX_DROP;
 	__be16 type;
 
-	net_timestamp_check(!netdev_tstamp_prequeue, skb);
+	net_timestamp_check(!netdev_tstamp_prequeue, *pskb);
 
-	trace_netif_receive_skb(skb);
+	trace_netif_receive_skb(*pskb);
 
-	orig_dev = skb->dev;
+	orig_dev = (*pskb)->dev;
 
-	skb_reset_network_header(skb);
-	if (!skb_transport_header_was_set(skb))
-		skb_reset_transport_header(skb);
-	skb_reset_mac_len(skb);
+	skb_reset_network_header(*pskb);
+	if (!skb_transport_header_was_set(*pskb))
+		skb_reset_transport_header(*pskb);
+	skb_reset_mac_len(*pskb);
 
 	pt_prev = NULL;
 
 another_round:
-	skb->skb_iif = skb->dev->ifindex;
+	(*pskb)->skb_iif = (*pskb)->dev->ifindex;
 
 	__this_cpu_inc(softnet_data.processed);
 
@@ -4831,22 +4831,22 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
 		int ret2;
 
 		preempt_disable();
-		ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
+		ret2 = do_xdp_generic(rcu_dereference((*pskb)->dev->xdp_prog), *pskb);
 		preempt_enable();
 
 		if (ret2 != XDP_PASS)
 			return NET_RX_DROP;
-		skb_reset_mac_len(skb);
+		skb_reset_mac_len(*pskb);
 	}
 
-	if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
-	    skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
-		skb = skb_vlan_untag(skb);
-		if (unlikely(!skb))
+	if ((*pskb)->protocol == cpu_to_be16(ETH_P_8021Q) ||
+	    (*pskb)->protocol == cpu_to_be16(ETH_P_8021AD)) {
+		*pskb = skb_vlan_untag(*pskb);
+		if (unlikely(!*pskb))
 			goto out;
 	}
 
-	if (skb_skip_tc_classify(skb))
+	if (skb_skip_tc_classify(*pskb))
 		goto skip_classify;
 
 	if (pfmemalloc)
@@ -4854,50 +4854,50 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
 
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
 		if (pt_prev)
-			ret = deliver_skb(skb, pt_prev, orig_dev);
+			ret = deliver_skb(*pskb, pt_prev, orig_dev);
 		pt_prev = ptype;
 	}
 
-	list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
+	list_for_each_entry_rcu(ptype, &(*pskb)->dev->ptype_all, list) {
 		if (pt_prev)
-			ret = deliver_skb(skb, pt_prev, orig_dev);
+			ret = deliver_skb(*pskb, pt_prev, orig_dev);
 		pt_prev = ptype;
 	}
 
 skip_taps:
 #ifdef CONFIG_NET_INGRESS
 	if (static_branch_unlikely(&ingress_needed_key)) {
-		skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev);
-		if (!skb)
+		*pskb = sch_handle_ingress(*pskb, &pt_prev, &ret, orig_dev);
+		if (!*pskb)
 			goto out;
 
-		if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
+		if (nf_ingress(*pskb, &pt_prev, &ret, orig_dev) < 0)
 			goto out;
 	}
 #endif
-	skb_reset_tc(skb);
+	skb_reset_tc(*pskb);
 skip_classify:
-	if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
+	if (pfmemalloc && !skb_pfmemalloc_protocol(*pskb))
 		goto drop;
 
-	if (skb_vlan_tag_present(skb)) {
+	if (skb_vlan_tag_present(*pskb)) {
 		if (pt_prev) {
-			ret = deliver_skb(skb, pt_prev, orig_dev);
+			ret = deliver_skb(*pskb, pt_prev, orig_dev);
 			pt_prev = NULL;
 		}
-		if (vlan_do_receive(&skb))
+		if (vlan_do_receive(pskb))
 			goto another_round;
-		else if (unlikely(!skb))
+		else if (unlikely(!*pskb))
 			goto out;
 	}
 
-	rx_handler = rcu_dereference(skb->dev->rx_handler);
+	rx_handler = rcu_dereference((*pskb)->dev->rx_handler);
 	if (rx_handler) {
 		if (pt_prev) {
-			ret = deliver_skb(skb, pt_prev, orig_dev);
+			ret = deliver_skb(*pskb, pt_prev, orig_dev);
 			pt_prev = NULL;
 		}
-		switch (rx_handler(&skb)) {
+		switch (rx_handler(pskb)) {
 		case RX_HANDLER_CONSUMED:
 			ret = NET_RX_SUCCESS;
 			goto out;
@@ -4912,29 +4912,29 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
 		}
 	}
 
-	if (unlikely(skb_vlan_tag_present(skb))) {
+	if (unlikely(skb_vlan_tag_present(*pskb))) {
 check_vlan_id:
-		if (skb_vlan_tag_get_id(skb)) {
+		if (skb_vlan_tag_get_id(*pskb)) {
 			/* Vlan id is non 0 and vlan_do_receive() above couldn't
 			 * find vlan device.
 			 */
-			skb->pkt_type = PACKET_OTHERHOST;
-		} else if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
-			   skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
+			(*pskb)->pkt_type = PACKET_OTHERHOST;
+		} else if ((*pskb)->protocol == cpu_to_be16(ETH_P_8021Q) ||
+			   (*pskb)->protocol == cpu_to_be16(ETH_P_8021AD)) {
 			/* Outer header is 802.1P with vlan 0, inner header is
 			 * 802.1Q or 802.1AD and vlan_do_receive() above could
 			 * not find vlan dev for vlan id 0.
 			 */
-			__vlan_hwaccel_clear_tag(skb);
-			skb = skb_vlan_untag(skb);
-			if (unlikely(!skb))
+			__vlan_hwaccel_clear_tag(*pskb);
+			*pskb = skb_vlan_untag(*pskb);
+			if (unlikely(!*pskb))
 				goto out;
-			if (vlan_do_receive(&skb))
+			if (vlan_do_receive(pskb))
 				/* After stripping off 802.1P header with vlan 0
 				 * vlan dev is found for inner header.
 				 */
 				goto another_round;
-			else if (unlikely(!skb))
+			else if (unlikely(!*pskb))
 				goto out;
 			else
 				/* We have stripped outer 802.1P vlan 0 header.
@@ -4944,40 +4944,40 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
 				goto check_vlan_id;
 		}
 		/* Note: we might in the future use prio bits
-		 * and set skb->priority like in vlan_do_receive()
+		 * and set (*pskb)->priority like in vlan_do_receive()
 		 * For the time being, just ignore Priority Code Point
 		 */
-		__vlan_hwaccel_clear_tag(skb);
+		__vlan_hwaccel_clear_tag(*pskb);
 	}
 
-	type = skb->protocol;
+	type = (*pskb)->protocol;
 
 	/* deliver only exact match when indicated */
 	if (likely(!deliver_exact)) {
-		deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+		deliver_ptype_list_skb(*pskb, &pt_prev, orig_dev, type,
 				       &ptype_base[ntohs(type) &
 						   PTYPE_HASH_MASK]);
 	}
 
-	deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+	deliver_ptype_list_skb(*pskb, &pt_prev, orig_dev, type,
 			       &orig_dev->ptype_specific);
 
-	if (unlikely(skb->dev != orig_dev)) {
-		deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
-				       &skb->dev->ptype_specific);
+	if (unlikely((*pskb)->dev != orig_dev)) {
+		deliver_ptype_list_skb(*pskb, &pt_prev, orig_dev, type,
+				       &(*pskb)->dev->ptype_specific);
 	}
 
 	if (pt_prev) {
-		if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
+		if (unlikely(skb_orphan_frags_rx(*pskb, GFP_ATOMIC)))
 			goto drop;
 		*ppt_prev = pt_prev;
 	} else {
 drop:
 		if (!deliver_exact)
-			atomic_long_inc(&skb->dev->rx_dropped);
+			atomic_long_inc(&(*pskb)->dev->rx_dropped);
 		else
-			atomic_long_inc(&skb->dev->rx_nohandler);
-		kfree_skb(skb);
+			atomic_long_inc(&(*pskb)->dev->rx_nohandler);
+		kfree_skb(*pskb);
 		/* Jamal, now you will not able to escape explaining
 		 * me how you were going to use this. :-)
 		 */
@@ -4994,7 +4994,7 @@ static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
 	struct packet_type *pt_prev = NULL;
 	int ret;
 
-	ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+	ret = __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
 	if (pt_prev)
 		ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb,
 					 skb->dev, pt_prev, orig_dev);
@@ -5072,7 +5072,7 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo
 		struct packet_type *pt_prev = NULL;
 
 		skb_list_del_init(skb);
-		__netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+		__netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
 		if (!pt_prev)
 			continue;
 		if (pt_curr != pt_prev || od_curr != orig_dev) {


> Here's a thought, how about switching round the return-vs-pass-by-pointer
>  and writing:
> 
> static struct sk_buff *__netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
>                                                 struct packet_type **ppt_prev, int *ret)
> ?
> (Although, you might want to rename 'ret' in that case.)
> 
> Does that make things any less ugly?

That seems more reasonable (also only compiled so far):

diff --git a/net/core/dev.c b/net/core/dev.c
index fc676b2610e3..e09b6a14851c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4799,8 +4799,8 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
 	return 0;
 }
 
-static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
-				    struct packet_type **ppt_prev)
+static struct sk_buff *__netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
+						struct packet_type **ppt_prev, int *retp)
 {
 	struct packet_type *ptype, *pt_prev;
 	rx_handler_func_t *rx_handler;
@@ -4834,8 +4834,10 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
 		ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
 		preempt_enable();
 
-		if (ret2 != XDP_PASS)
-			return NET_RX_DROP;
+		if (ret2 != XDP_PASS) {
+			*retp = NET_RX_DROP;
+			return NULL;
+		}
 		skb_reset_mac_len(skb);
 	}
 
@@ -4985,7 +4987,8 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
 	}
 
 out:
-	return ret;
+	*retp = ret;
+	return skb;
 }
 
 static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
@@ -4994,7 +4997,7 @@ static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
 	struct packet_type *pt_prev = NULL;
 	int ret;
 
-	ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+	skb = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev, &ret);
 	if (pt_prev)
 		ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb,
 					 skb->dev, pt_prev, orig_dev);
@@ -5070,9 +5073,10 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo
 	list_for_each_entry_safe(skb, next, head, list) {
 		struct net_device *orig_dev = skb->dev;
 		struct packet_type *pt_prev = NULL;
+		int ret;
 
 		skb_list_del_init(skb);
-		__netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+		skb = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev, &ret);
 		if (!pt_prev)
 			continue;
 		if (pt_curr != pt_prev || od_curr != orig_dev) {
diff mbox series

Patch

diff --git a/net/core/dev.c b/net/core/dev.c
index d6edd218babd..0bbf6d2a9c32 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4809,11 +4809,12 @@  static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
 	return 0;
 }
 
-static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
+static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
 				    struct packet_type **ppt_prev)
 {
 	struct packet_type *ptype, *pt_prev;
 	rx_handler_func_t *rx_handler;
+	struct sk_buff *skb = *pskb;
 	struct net_device *orig_dev;
 	bool deliver_exact = false;
 	int ret = NET_RX_DROP;
@@ -4852,6 +4853,7 @@  static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
 	if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
 	    skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
 		skb = skb_vlan_untag(skb);
+		*pskb = skb;
 		if (unlikely(!skb))
 			goto out;
 	}
@@ -4878,6 +4880,7 @@  static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
 #ifdef CONFIG_NET_INGRESS
 	if (static_branch_unlikely(&ingress_needed_key)) {
 		skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev);
+		*pskb = skb;
 		if (!skb)
 			goto out;
 
@@ -4891,11 +4894,14 @@  static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
 		goto drop;
 
 	if (skb_vlan_tag_present(skb)) {
+		bool ret2;
 		if (pt_prev) {
 			ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = NULL;
 		}
-		if (vlan_do_receive(&skb))
+		ret2 = vlan_do_receive(pskb);
+		skb = *pskb;
+		if (ret2)
 			goto another_round;
 		else if (unlikely(!skb))
 			goto out;
@@ -4903,11 +4909,14 @@  static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
 
 	rx_handler = rcu_dereference(skb->dev->rx_handler);
 	if (rx_handler) {
+		rx_handler_result_t res;
 		if (pt_prev) {
 			ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = NULL;
 		}
-		switch (rx_handler(&skb)) {
+		res = rx_handler(pskb);
+		skb = *pskb;
+		switch (res) {
 		case RX_HANDLER_CONSUMED:
 			ret = NET_RX_SUCCESS;
 			goto out;
@@ -4931,15 +4940,20 @@  static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
 			skb->pkt_type = PACKET_OTHERHOST;
 		} else if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
 			   skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
+			bool ret2;
+
 			/* Outer header is 802.1P with vlan 0, inner header is
 			 * 802.1Q or 802.1AD and vlan_do_receive() above could
 			 * not find vlan dev for vlan id 0.
 			 */
 			__vlan_hwaccel_clear_tag(skb);
 			skb = skb_vlan_untag(skb);
+			*pskb = skb;
 			if (unlikely(!skb))
 				goto out;
-			if (vlan_do_receive(&skb))
+			ret2 = vlan_do_receive(pskb);
+			skb = *pskb;
+			if (ret2)
 				/* After stripping off 802.1P header with vlan 0
 				 * vlan dev is found for inner header.
 				 */
@@ -5004,7 +5018,7 @@  static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
 	struct packet_type *pt_prev = NULL;
 	int ret;
 
-	ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+	ret = __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
 	if (pt_prev)
 		ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb,
 					 skb->dev, pt_prev, orig_dev);
@@ -5082,7 +5096,7 @@  static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo
 		struct packet_type *pt_prev = NULL;
 
 		skb_list_del_init(skb);
-		__netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+		__netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
 		if (!pt_prev)
 			continue;
 		if (pt_curr != pt_prev || od_curr != orig_dev) {