diff mbox series

[nf-next,v1,1/3] netfilter: nf_dup{4, 6}: Move duplication check to task_struct

Message ID 20250512102846.234111-2-bigeasy@linutronix.de
State Accepted
Headers show
Series netfilter: Cover more per-CPU storage with local nested BH locking. | expand

Commit Message

Sebastian Andrzej Siewior May 12, 2025, 10:28 a.m. UTC
nf_skb_duplicated is a per-CPU variable and relies on disabled BH for its
locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT
this data structure requires explicit locking.

Due to the recursion involved, the simplest change is to make it a
per-task variable.

Move the per-CPU variable nf_skb_duplicated to task_struct and name it
in_nf_duplicate. Add it to the existing bitfield so it doesn't use
additional memory.

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Valentin Schneider <vschneid@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 include/linux/netfilter.h        | 11 -----------
 include/linux/sched.h            |  1 +
 net/ipv4/netfilter/ip_tables.c   |  2 +-
 net/ipv4/netfilter/nf_dup_ipv4.c |  6 +++---
 net/ipv6/netfilter/ip6_tables.c  |  2 +-
 net/ipv6/netfilter/nf_dup_ipv6.c |  6 +++---
 net/netfilter/core.c             |  3 ---
 7 files changed, 9 insertions(+), 22 deletions(-)

Comments

Pablo Neira Ayuso May 21, 2025, 2:24 p.m. UTC | #1
Hi Sebastian,

On Mon, May 12, 2025 at 12:28:44PM +0200, Sebastian Andrzej Siewior wrote:
[...]
> diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
> index 0c39c77fe8a8a..b903c62c00c9e 100644
> --- a/net/ipv6/netfilter/nf_dup_ipv6.c
> +++ b/net/ipv6/netfilter/nf_dup_ipv6.c
> @@ -48,7 +48,7 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
>  		 const struct in6_addr *gw, int oif)
>  {
>  	local_bh_disable();
> -	if (this_cpu_read(nf_skb_duplicated))
> +	if (current->in_nf_duplicate)

Netfilter runs from the forwarding path too, where no current process
is available.

>  		goto out;
>  	skb = pskb_copy(skb, GFP_ATOMIC);
>  	if (skb == NULL)
> @@ -64,9 +64,9 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
>  		--iph->hop_limit;
>  	}
>  	if (nf_dup_ipv6_route(net, skb, gw, oif)) {
> -		__this_cpu_write(nf_skb_duplicated, true);
> +		current->in_nf_duplicate = true;
>  		ip6_local_out(net, skb->sk, skb);
> -		__this_cpu_write(nf_skb_duplicated, false);
> +		current->in_nf_duplicate = false;
>  	} else {
>  		kfree_skb(skb);
>  	}
Sebastian Andrzej Siewior May 21, 2025, 2:40 p.m. UTC | #2
On 2025-05-21 16:24:59 [+0200], Pablo Neira Ayuso wrote:
> Hi Sebastian,
Hi Pablo,

> On Mon, May 12, 2025 at 12:28:44PM +0200, Sebastian Andrzej Siewior wrote:
> [...]
> > diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
> > index 0c39c77fe8a8a..b903c62c00c9e 100644
> > --- a/net/ipv6/netfilter/nf_dup_ipv6.c
> > +++ b/net/ipv6/netfilter/nf_dup_ipv6.c
> > @@ -48,7 +48,7 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
> >  		 const struct in6_addr *gw, int oif)
> >  {
> >  	local_bh_disable();
> > -	if (this_cpu_read(nf_skb_duplicated))
> > +	if (current->in_nf_duplicate)
> 
> Netfilter runs from the forwarding path too, where no current process
> is available.

If you refer to in-softirq with no task running then there is the idle
task/ swapper which is pointed to by current in this case. There is one
idle task for each CPU, they don't migrate.

> >  		goto out;
> >  	skb = pskb_copy(skb, GFP_ATOMIC);
> >  	if (skb == NULL)
> > @@ -64,9 +64,9 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
> >  		--iph->hop_limit;
> >  	}
> >  	if (nf_dup_ipv6_route(net, skb, gw, oif)) {
> > -		__this_cpu_write(nf_skb_duplicated, true);
> > +		current->in_nf_duplicate = true;
> >  		ip6_local_out(net, skb->sk, skb);
> > -		__this_cpu_write(nf_skb_duplicated, false);
> > +		current->in_nf_duplicate = false;
> >  	} else {
> >  		kfree_skb(skb);
> >  	}

Sebastian
Pablo Neira Ayuso May 21, 2025, 3:56 p.m. UTC | #3
On Wed, May 21, 2025 at 04:40:43PM +0200, Sebastian Andrzej Siewior wrote:
> On 2025-05-21 16:24:59 [+0200], Pablo Neira Ayuso wrote:
> > Hi Sebastian,
> Hi Pablo,
> 
> > On Mon, May 12, 2025 at 12:28:44PM +0200, Sebastian Andrzej Siewior wrote:
> > [...]
> > > diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
> > > index 0c39c77fe8a8a..b903c62c00c9e 100644
> > > --- a/net/ipv6/netfilter/nf_dup_ipv6.c
> > > +++ b/net/ipv6/netfilter/nf_dup_ipv6.c
> > > @@ -48,7 +48,7 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
> > >  		 const struct in6_addr *gw, int oif)
> > >  {
> > >  	local_bh_disable();
> > > -	if (this_cpu_read(nf_skb_duplicated))
> > > +	if (current->in_nf_duplicate)
> > 
> > Netfilter runs from the forwarding path too, where no current process
> > is available.
> 
> If you refer to in-softirq with no task running then there is the idle
> task/ swapper which is pointed to by current in this case. There is one
> idle task for each CPU, they don't migrate.

Thanks for explaining.

I am going to place this series in nf-next.
diff mbox series

Patch

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 2b8aac2c70ada..892d12823ed4b 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -497,17 +497,6 @@  struct nf_defrag_hook {
 extern const struct nf_defrag_hook __rcu *nf_defrag_v4_hook;
 extern const struct nf_defrag_hook __rcu *nf_defrag_v6_hook;
 
-/*
- * nf_skb_duplicated - TEE target has sent a packet
- *
- * When a xtables target sends a packet, the OUTPUT and POSTROUTING
- * hooks are traversed again, i.e. nft and xtables are invoked recursively.
- *
- * This is used by xtables TEE target to prevent the duplicated skb from
- * being duplicated again.
- */
-DECLARE_PER_CPU(bool, nf_skb_duplicated);
-
 /*
  * Contains bitmask of ctnetlink event subscribers, if any.
  * Can't be pernet due to NETLINK_LISTEN_ALL_NSID setsockopt flag.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f96ac19828934..52d9c52dc8f27 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1044,6 +1044,7 @@  struct task_struct {
 	/* delay due to memory thrashing */
 	unsigned                        in_thrashing:1;
 #endif
+	unsigned			in_nf_duplicate:1;
 #ifdef CONFIG_PREEMPT_RT
 	struct netdev_xmit		net_xmit;
 #endif
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 3d101613f27fa..23c8deff8095a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -270,7 +270,7 @@  ipt_do_table(void *priv,
 	 * but it is no problem since absolute verdict is issued by these.
 	 */
 	if (static_key_false(&xt_tee_enabled))
-		jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
+		jumpstack += private->stacksize * current->in_nf_duplicate;
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 
diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c
index 25e1e8eb18dd5..ed08fb78cfa8c 100644
--- a/net/ipv4/netfilter/nf_dup_ipv4.c
+++ b/net/ipv4/netfilter/nf_dup_ipv4.c
@@ -54,7 +54,7 @@  void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum,
 	struct iphdr *iph;
 
 	local_bh_disable();
-	if (this_cpu_read(nf_skb_duplicated))
+	if (current->in_nf_duplicate)
 		goto out;
 	/*
 	 * Copy the skb, and route the copy. Will later return %XT_CONTINUE for
@@ -86,9 +86,9 @@  void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum,
 		--iph->ttl;
 
 	if (nf_dup_ipv4_route(net, skb, gw, oif)) {
-		__this_cpu_write(nf_skb_duplicated, true);
+		current->in_nf_duplicate = true;
 		ip_local_out(net, skb->sk, skb);
-		__this_cpu_write(nf_skb_duplicated, false);
+		current->in_nf_duplicate = false;
 	} else {
 		kfree_skb(skb);
 	}
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7d5602950ae72..d585ac3c11133 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -292,7 +292,7 @@  ip6t_do_table(void *priv, struct sk_buff *skb,
 	 * but it is no problem since absolute verdict is issued by these.
 	 */
 	if (static_key_false(&xt_tee_enabled))
-		jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
+		jumpstack += private->stacksize * current->in_nf_duplicate;
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 
diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
index 0c39c77fe8a8a..b903c62c00c9e 100644
--- a/net/ipv6/netfilter/nf_dup_ipv6.c
+++ b/net/ipv6/netfilter/nf_dup_ipv6.c
@@ -48,7 +48,7 @@  void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
 		 const struct in6_addr *gw, int oif)
 {
 	local_bh_disable();
-	if (this_cpu_read(nf_skb_duplicated))
+	if (current->in_nf_duplicate)
 		goto out;
 	skb = pskb_copy(skb, GFP_ATOMIC);
 	if (skb == NULL)
@@ -64,9 +64,9 @@  void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
 		--iph->hop_limit;
 	}
 	if (nf_dup_ipv6_route(net, skb, gw, oif)) {
-		__this_cpu_write(nf_skb_duplicated, true);
+		current->in_nf_duplicate = true;
 		ip6_local_out(net, skb->sk, skb);
-		__this_cpu_write(nf_skb_duplicated, false);
+		current->in_nf_duplicate = false;
 	} else {
 		kfree_skb(skb);
 	}
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index b9f551f02c813..11a702065bab5 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -31,9 +31,6 @@ 
 const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
 EXPORT_SYMBOL_GPL(nf_ipv6_ops);
 
-DEFINE_PER_CPU(bool, nf_skb_duplicated);
-EXPORT_SYMBOL_GPL(nf_skb_duplicated);
-
 #ifdef CONFIG_JUMP_LABEL
 struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 EXPORT_SYMBOL(nf_hooks_needed);