diff mbox

[nf] netfilter: invoke synchronize_rcu after set the _hook_ to NULL

Message ID 1490015030-16403-1-git-send-email-zlpnobody@163.com
State Changes Requested
Delegated to: Pablo Neira
Headers show

Commit Message

Liping Zhang March 20, 2017, 1:03 p.m. UTC
From: Liping Zhang <zlpnobody@gmail.com>

Otherwise, another CPU may access the invalid pointer. For example:
    CPU0                CPU1
     -              rcu_read_lock();
     -              pfunc = _hook_;
  _hook_ = NULL;          -
  mod unload              -
     -                 pfunc(); // invalid, panic
     -             rcu_read_unlock();

So we must call synchronize_rcu() to wait the rcu reader to finish.

Also note, in nf_nat_snmp_basic_fini, synchronize_rcu() will be invoked
by later nf_conntrack_helper_unregister, but I'm inclined to add a
explicit synchronize_rcu after set the nf_nat_snmp_hook to NULL. Depend
on such obscure assumptions is not a good idea.

Last, in nfnetlink_cttimeout, we use kfree_rcu to free the time object,
so in cttimeout_exit, invoking rcu_barrier() is not necessary at all,
remove it now.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
---
 net/ipv4/netfilter/nf_nat_snmp_basic.c | 1 +
 net/netfilter/nf_conntrack_ecache.c    | 2 ++
 net/netfilter/nf_conntrack_netlink.c   | 1 +
 net/netfilter/nf_nat_core.c            | 2 ++
 net/netfilter/nfnetlink_cttimeout.c    | 2 +-
 5 files changed, 7 insertions(+), 1 deletion(-)

Comments

Pablo Neira Ayuso March 24, 2017, 12:17 p.m. UTC | #1
On Mon, Mar 20, 2017 at 09:03:50PM +0800, Liping Zhang wrote:
> From: Liping Zhang <zlpnobody@gmail.com>
> 
> Otherwise, another CPU may access the invalid pointer. For example:
>     CPU0                CPU1
>      -              rcu_read_lock();
>      -              pfunc = _hook_;
>   _hook_ = NULL;          -
>   mod unload              -
>      -                 pfunc(); // invalid, panic
>      -             rcu_read_unlock();
> 
> So we must call synchronize_rcu() to wait the rcu reader to finish.
> 
> Also note, in nf_nat_snmp_basic_fini, synchronize_rcu() will be invoked
> by later nf_conntrack_helper_unregister, but I'm inclined to add a
> explicit synchronize_rcu after set the nf_nat_snmp_hook to NULL. Depend
> on such obscure assumptions is not a good idea.
> 
> Last, in nfnetlink_cttimeout, we use kfree_rcu to free the time object,
> so in cttimeout_exit, invoking rcu_barrier() is not necessary at all,
> remove it now.
> 
> Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
> ---
>  net/ipv4/netfilter/nf_nat_snmp_basic.c | 1 +
>  net/netfilter/nf_conntrack_ecache.c    | 2 ++
>  net/netfilter/nf_conntrack_netlink.c   | 1 +
>  net/netfilter/nf_nat_core.c            | 2 ++
>  net/netfilter/nfnetlink_cttimeout.c    | 2 +-
>  5 files changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
> index c9b52c3..5a8f7c3 100644
> --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
> +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
> @@ -1304,6 +1304,7 @@ static int __init nf_nat_snmp_basic_init(void)
>  static void __exit nf_nat_snmp_basic_fini(void)
>  {
>  	RCU_INIT_POINTER(nf_nat_snmp_hook, NULL);
> +	synchronize_rcu();
>  	nf_conntrack_helper_unregister(&snmp_trap_helper);
>  }
>  
> diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
> index da9df2d..12cc98f 100644
> --- a/net/netfilter/nf_conntrack_ecache.c
> +++ b/net/netfilter/nf_conntrack_ecache.c
> @@ -290,6 +290,7 @@ void nf_conntrack_unregister_notifier(struct net *net,
>  	BUG_ON(notify != new);
>  	RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
>  	mutex_unlock(&nf_ct_ecache_mutex);
> +	synchronize_rcu();
>  }
>  EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
>  
> @@ -326,6 +327,7 @@ void nf_ct_expect_unregister_notifier(struct net *net,
>  	BUG_ON(notify != new);
>  	RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL);
>  	mutex_unlock(&nf_ct_ecache_mutex);
> +	synchronize_rcu();
>  }
>  EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
>  
> diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
> index 6806b5e..455c2c2 100644
> --- a/net/netfilter/nf_conntrack_netlink.c
> +++ b/net/netfilter/nf_conntrack_netlink.c
> @@ -3441,6 +3441,7 @@ static void __exit ctnetlink_exit(void)
>  	nfnetlink_subsys_unregister(&ctnl_subsys);
>  #ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
>  	RCU_INIT_POINTER(nfnl_ct_hook, NULL);
> +	synchronize_rcu();
>  #endif
>  }
>  
> diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
> index 94b14c5..82802e4 100644
> --- a/net/netfilter/nf_nat_core.c
> +++ b/net/netfilter/nf_nat_core.c
> @@ -903,6 +903,8 @@ static void __exit nf_nat_cleanup(void)
>  #ifdef CONFIG_XFRM
>  	RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL);
>  #endif
> +	synchronize_rcu();
> +
>  	for (i = 0; i < NFPROTO_NUMPROTO; i++)
>  		kfree(nf_nat_l4protos[i]);
>  
> diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
> index 139e086..47d6656 100644
> --- a/net/netfilter/nfnetlink_cttimeout.c
> +++ b/net/netfilter/nfnetlink_cttimeout.c
> @@ -646,8 +646,8 @@ static void __exit cttimeout_exit(void)
>  #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
>  	RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
>  	RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
> +	synchronize_rcu();
>  #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
> -	rcu_barrier();

cttimeout relies on kfree_rcu().

Are you sure we don't need this?

According to:

https://www.kernel.org/doc/Documentation/RCU/rcubarrier.txt

"We could try placing a synchronize_rcu() in the module-exit code path,
but this is not sufficient."

Then:

"Please note that rcu_barrier() does -not- imply synchronize_rcu(), in
particular, if there are no RCU callbacks queued anywhere,
rcu_barrier() is within its rights to return immediately, without
waiting for a grace period to elapse."
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Liping Zhang March 24, 2017, 1:01 p.m. UTC | #2
Hi Pablo,

2017-03-24 20:17 GMT+08:00 Pablo Neira Ayuso <pablo@netfilter.org>:
>> --- a/net/netfilter/nfnetlink_cttimeout.c
>> +++ b/net/netfilter/nfnetlink_cttimeout.c
>> @@ -646,8 +646,8 @@ static void __exit cttimeout_exit(void)
>>  #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
>>       RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
>>       RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
>> +     synchronize_rcu();
>>  #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
>> -     rcu_barrier();
>
> cttimeout relies on kfree_rcu().
>
> Are you sure we don't need this?
>
> According to:
>
> https://www.kernel.org/doc/Documentation/RCU/rcubarrier.txt
>
> "We could try placing a synchronize_rcu() in the module-exit code path,
> but this is not sufficient."
>
> Then:
>
> "Please note that rcu_barrier() does -not- imply synchronize_rcu(), in
> particular, if there are no RCU callbacks queued anywhere,
> rcu_barrier() is within its rights to return immediately, without
> waiting for a grace period to elapse."

This is because we use kfree_rcu to free the cttimeout objects. So I think
rcu_barrier() is not needed anymore.

Quoted from https://lwn.net/Articles/433493/ :
"And kfree_rcu() is also help for unloadable modules, kfree_rcu() does not
queue any function which belong to the module, so a rcu_barrier() can
be avoid when module exit."

Also from commit 9ab1544eb419 ("rcu: introduce kfree_rcu()"):
"Many rcu callbacks functions just call kfree() on the base structure.
These functions are trivial, but their size adds up, and furthermore
when they are used in a kernel module, that module must invoke the
high-latency rcu_barrier() function at module-unload time."
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pablo Neira Ayuso March 24, 2017, 6:45 p.m. UTC | #3
On Fri, Mar 24, 2017 at 09:01:17PM +0800, Liping Zhang wrote:
> Hi Pablo,
> 
> 2017-03-24 20:17 GMT+08:00 Pablo Neira Ayuso <pablo@netfilter.org>:
> >> --- a/net/netfilter/nfnetlink_cttimeout.c
> >> +++ b/net/netfilter/nfnetlink_cttimeout.c
> >> @@ -646,8 +646,8 @@ static void __exit cttimeout_exit(void)
> >>  #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
> >>       RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
> >>       RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
> >> +     synchronize_rcu();
> >>  #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
> >> -     rcu_barrier();
> >
> > cttimeout relies on kfree_rcu().
> >
> > Are you sure we don't need this?
> >
> > According to:
> >
> > https://www.kernel.org/doc/Documentation/RCU/rcubarrier.txt
> >
> > "We could try placing a synchronize_rcu() in the module-exit code path,
> > but this is not sufficient."
> >
> > Then:
> >
> > "Please note that rcu_barrier() does -not- imply synchronize_rcu(), in
> > particular, if there are no RCU callbacks queued anywhere,
> > rcu_barrier() is within its rights to return immediately, without
> > waiting for a grace period to elapse."
> 
> This is because we use kfree_rcu to free the cttimeout objects. So I think
> rcu_barrier() is not needed anymore.
> 
> Quoted from https://lwn.net/Articles/433493/ :
> "And kfree_rcu() is also help for unloadable modules, kfree_rcu() does not
> queue any function which belong to the module, so a rcu_barrier() can
> be avoid when module exit."
> 
> Also from commit 9ab1544eb419 ("rcu: introduce kfree_rcu()"):
> "Many rcu callbacks functions just call kfree() on the base structure.
> These functions are trivial, but their size adds up, and furthermore
> when they are used in a kernel module, that module must invoke the
> high-latency rcu_barrier() function at module-unload time."

Right, thanks for explaining.

I think we can get this smaller: it should be possible to avoid this
synchronize_rcu() call from nf_conntrack_{register,unregister}_notifier().
These two are called from ctnetlink netns path, this patch already
adds a synchronize_rcu() spot to ctnetlink module removal which is
where the event callback can vanish.

You can just add a comment there so we don't forget about this, eg.

@@ -290,6 +290,7 @@  void nf_conntrack_unregister_notifier(struct net
*net,
        BUG_ON(notify != new);
        RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
        mutex_unlock(&nf_ct_ecache_mutex);
+       /* synchronize_rcu() is called from ctnetlink. */
 }

What do you think?
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Liping Zhang March 24, 2017, 11:55 p.m. UTC | #4
Hi Pablo,

2017-03-25 2:45 GMT+08:00 Pablo Neira Ayuso <pablo@netfilter.org>:
[...]
> I think we can get this smaller: it should be possible to avoid this
> synchronize_rcu() call from nf_conntrack_{register,unregister}_notifier().
> These two are called from ctnetlink netns path, this patch already
> adds a synchronize_rcu() spot to ctnetlink module removal which is
> where the event callback can vanish.
>
> You can just add a comment there so we don't forget about this, eg.
>
> @@ -290,6 +290,7 @@  void nf_conntrack_unregister_notifier(struct net
> *net,
>         BUG_ON(notify != new);
>         RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
>         mutex_unlock(&nf_ct_ecache_mutex);
> +       /* synchronize_rcu() is called from ctnetlink. */
>  }
>
> What do you think?

Right, this can avoid two useless synchronize_rcu() invocations when
deleting netns, I will send V2.

Thanks Pablo.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index c9b52c3..5a8f7c3 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1304,6 +1304,7 @@  static int __init nf_nat_snmp_basic_init(void)
 static void __exit nf_nat_snmp_basic_fini(void)
 {
 	RCU_INIT_POINTER(nf_nat_snmp_hook, NULL);
+	synchronize_rcu();
 	nf_conntrack_helper_unregister(&snmp_trap_helper);
 }
 
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index da9df2d..12cc98f 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -290,6 +290,7 @@  void nf_conntrack_unregister_notifier(struct net *net,
 	BUG_ON(notify != new);
 	RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
 	mutex_unlock(&nf_ct_ecache_mutex);
+	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
 
@@ -326,6 +327,7 @@  void nf_ct_expect_unregister_notifier(struct net *net,
 	BUG_ON(notify != new);
 	RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL);
 	mutex_unlock(&nf_ct_ecache_mutex);
+	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 6806b5e..455c2c2 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -3441,6 +3441,7 @@  static void __exit ctnetlink_exit(void)
 	nfnetlink_subsys_unregister(&ctnl_subsys);
 #ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
 	RCU_INIT_POINTER(nfnl_ct_hook, NULL);
+	synchronize_rcu();
 #endif
 }
 
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 94b14c5..82802e4 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -903,6 +903,8 @@  static void __exit nf_nat_cleanup(void)
 #ifdef CONFIG_XFRM
 	RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL);
 #endif
+	synchronize_rcu();
+
 	for (i = 0; i < NFPROTO_NUMPROTO; i++)
 		kfree(nf_nat_l4protos[i]);
 
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 139e086..47d6656 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -646,8 +646,8 @@  static void __exit cttimeout_exit(void)
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
 	RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
+	synchronize_rcu();
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	rcu_barrier();
 }
 
 module_init(cttimeout_init);