diff mbox series

[v2] net: ipmr: fix suspicious RCU warning

Message ID 20191120152255.18928-1-anders.roxell@linaro.org
State Changes Requested
Delegated to: David Miller
Headers show
Series [v2] net: ipmr: fix suspicious RCU warning | expand

Commit Message

Anders Roxell Nov. 20, 2019, 3:22 p.m. UTC
When booting an arm64 allmodconfig kernel on linux-next next-20191115
The following "suspicious RCU usage" warning shows up.  This bug seems
to have been introduced by commit f0ad0860d01e ("ipv4: ipmr: support
multiple tables") in 2010, but the warning was added only in this past
year by commit 28875945ba98 ("rcu: Add support for consolidated-RCU
reader checking").

[   32.496021][    T1] =============================
[   32.497616][    T1] WARNING: suspicious RCU usage
[   32.499614][    T1] 5.4.0-rc6-next-20191108-00003-gf74bac957b5c-dirty #2 Not tainted
[   32.502018][    T1] -----------------------------
[   32.503976][    T1] net/ipv4/ipmr.c:136 RCU-list traversed in non-reader section!!
[   32.506746][    T1]
[   32.506746][    T1] other info that might help us debug this:
[   32.506746][    T1]
[   32.509794][    T1]
[   32.509794][    T1] rcu_scheduler_active = 2, debug_locks = 1
[   32.512661][    T1] 1 lock held by swapper/0/1:
[   32.514169][    T1]  #0: ffffa000150dd678 (pernet_ops_rwsem){+.+.}, at: register_pernet_subsys+0x24/0x50
[   32.517621][    T1]
[   32.517621][    T1] stack backtrace:
[   32.519930][    T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.4.0-rc6-next-20191108-00003-gf74bac957b5c-dirty #2
[   32.523063][    T1] Hardware name: linux,dummy-virt (DT)
[   32.524787][    T1] Call trace:
[   32.525946][    T1]  dump_backtrace+0x0/0x2d0
[   32.527433][    T1]  show_stack+0x20/0x30
[   32.528811][    T1]  dump_stack+0x204/0x2ac
[   32.530258][    T1]  lockdep_rcu_suspicious+0xf4/0x108
[   32.531993][    T1]  ipmr_get_table+0xc8/0x170
[   32.533496][    T1]  ipmr_new_table+0x48/0xa0
[   32.535002][    T1]  ipmr_net_init+0xe8/0x258
[   32.536465][    T1]  ops_init+0x280/0x2d8
[   32.537876][    T1]  register_pernet_operations+0x210/0x420
[   32.539707][    T1]  register_pernet_subsys+0x30/0x50
[   32.541372][    T1]  ip_mr_init+0x54/0x180
[   32.542785][    T1]  inet_init+0x25c/0x3e8
[   32.544186][    T1]  do_one_initcall+0x4c0/0xad8
[   32.545757][    T1]  kernel_init_freeable+0x3e0/0x500
[   32.547443][    T1]  kernel_init+0x14/0x1f0
[   32.548875][    T1]  ret_from_fork+0x10/0x18

This commit therefore holds RTNL mutex around the problematic code path,
which is function ipmr_rules_init() in ipmr_net_init().  This commit
also adds a lockdep_rtnl_is_held() check to the ipmr_for_each_table()
macro.

Suggested-by: David Miller <davem@davemloft.net>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
---
 net/ipv4/ipmr.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

Comments

Eric Dumazet Nov. 20, 2019, 5:45 p.m. UTC | #1
On 11/20/19 7:22 AM, Anders Roxell wrote:
> When booting an arm64 allmodconfig kernel on linux-next next-20191115
> The following "suspicious RCU usage" warning shows up.  This bug seems
> to have been introduced by commit f0ad0860d01e ("ipv4: ipmr: support
> multiple tables") in 2010, but the warning was added only in this past
> year by commit 28875945ba98 ("rcu: Add support for consolidated-RCU
> reader checking").
> 
> [   32.496021][    T1] =============================
> [   32.497616][    T1] WARNING: suspicious RCU usage
> [   32.499614][    T1] 5.4.0-rc6-next-20191108-00003-gf74bac957b5c-dirty #2 Not tainted
> [   32.502018][    T1] -----------------------------
> [   32.503976][    T1] net/ipv4/ipmr.c:136 RCU-list traversed in non-reader section!!
> [   32.506746][    T1]
> [   32.506746][    T1] other info that might help us debug this:
> [   32.506746][    T1]
> [   32.509794][    T1]
> [   32.509794][    T1] rcu_scheduler_active = 2, debug_locks = 1
> [   32.512661][    T1] 1 lock held by swapper/0/1:
> [   32.514169][    T1]  #0: ffffa000150dd678 (pernet_ops_rwsem){+.+.}, at: register_pernet_subsys+0x24/0x50
> [   32.517621][    T1]
> [   32.517621][    T1] stack backtrace:
> [   32.519930][    T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.4.0-rc6-next-20191108-00003-gf74bac957b5c-dirty #2
> [   32.523063][    T1] Hardware name: linux,dummy-virt (DT)
> [   32.524787][    T1] Call trace:
> [   32.525946][    T1]  dump_backtrace+0x0/0x2d0
> [   32.527433][    T1]  show_stack+0x20/0x30
> [   32.528811][    T1]  dump_stack+0x204/0x2ac
> [   32.530258][    T1]  lockdep_rcu_suspicious+0xf4/0x108
> [   32.531993][    T1]  ipmr_get_table+0xc8/0x170
> [   32.533496][    T1]  ipmr_new_table+0x48/0xa0
> [   32.535002][    T1]  ipmr_net_init+0xe8/0x258
> [   32.536465][    T1]  ops_init+0x280/0x2d8
> [   32.537876][    T1]  register_pernet_operations+0x210/0x420
> [   32.539707][    T1]  register_pernet_subsys+0x30/0x50
> [   32.541372][    T1]  ip_mr_init+0x54/0x180
> [   32.542785][    T1]  inet_init+0x25c/0x3e8
> [   32.544186][    T1]  do_one_initcall+0x4c0/0xad8
> [   32.545757][    T1]  kernel_init_freeable+0x3e0/0x500
> [   32.547443][    T1]  kernel_init+0x14/0x1f0
> [   32.548875][    T1]  ret_from_fork+0x10/0x18
> 
> This commit therefore holds RTNL mutex around the problematic code path,
> which is function ipmr_rules_init() in ipmr_net_init().  This commit
> also adds a lockdep_rtnl_is_held() check to the ipmr_for_each_table()
> macro.
> 
> Suggested-by: David Miller <davem@davemloft.net>
> Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
> Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
> ---
>  net/ipv4/ipmr.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index 6e68def66822..53dff9a0e60a 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -110,7 +110,8 @@ static void ipmr_expire_process(struct timer_list *t);
>  
>  #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
>  #define ipmr_for_each_table(mrt, net) \
> -	list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
> +	list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \
> +				lockdep_rtnl_is_held())
>  
>  static struct mr_table *ipmr_mr_table_iter(struct net *net,
>  					   struct mr_table *mrt)
> @@ -3086,7 +3087,9 @@ static int __net_init ipmr_net_init(struct net *net)
>  	if (err)
>  		goto ipmr_notifier_fail;
>  
> +	rtnl_lock();
>  	err = ipmr_rules_init(net);
> +	rtnl_unlock();
>  	if (err < 0)
>  		goto ipmr_rules_fail;

Hmmm... this might have performance impact for creation of a new netns

Since the 'struct net' is not yet fully initialized (thus published/visible),
should we really have to grab RTNL (again) only to silence a warning ?

What about the following alternative ?

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 6e68def66822f47fc08d94eddd32a4bd4f9fdfb0..b6dcdce08f1d82c83756a319623e24ae0174092c 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -94,7 +94,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __ro_after_init;
 
-static struct mr_table *ipmr_new_table(struct net *net, u32 id);
+static struct mr_table *ipmr_new_table(struct net *net, u32 id, bool init);
 static void ipmr_free_table(struct mr_table *mrt);
 
 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
@@ -245,7 +245,7 @@ static int __net_init ipmr_rules_init(struct net *net)
 
        INIT_LIST_HEAD(&net->ipv4.mr_tables);
 
-       mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
+       mrt = ipmr_new_table(net, RT_TABLE_DEFAULT, true);
        if (IS_ERR(mrt)) {
                err = PTR_ERR(mrt);
                goto err1;
@@ -322,7 +322,7 @@ static int __net_init ipmr_rules_init(struct net *net)
 {
        struct mr_table *mrt;
 
-       mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
+       mrt = ipmr_new_table(net, RT_TABLE_DEFAULT, true);
        if (IS_ERR(mrt))
                return PTR_ERR(mrt);
        net->ipv4.mrt = mrt;
@@ -392,7 +392,7 @@ static struct mr_table_ops ipmr_mr_table_ops = {
        .cmparg_any = &ipmr_mr_table_ops_cmparg_any,
 };
 
-static struct mr_table *ipmr_new_table(struct net *net, u32 id)
+static struct mr_table *ipmr_new_table(struct net *net, u32 id, bool init)
 {
        struct mr_table *mrt;
 
@@ -400,9 +400,11 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
        if (id != RT_TABLE_DEFAULT && id >= 1000000000)
                return ERR_PTR(-EINVAL);
 
-       mrt = ipmr_get_table(net, id);
-       if (mrt)
-               return mrt;
+       if (!init) {
+               mrt = ipmr_get_table(net, id);
+               if (mrt)
+                       return mrt;
+       }
 
        return mr_table_alloc(net, id, &ipmr_mr_table_ops,
                              ipmr_expire_process, ipmr_new_table_set);
@@ -1547,7 +1549,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
                if (sk == rtnl_dereference(mrt->mroute_sk)) {
                        ret = -EBUSY;
                } else {
-                       mrt = ipmr_new_table(net, uval);
+                       mrt = ipmr_new_table(net, uval, false);
                        if (IS_ERR(mrt))
                                ret = PTR_ERR(mrt);
                        else
Anders Roxell Nov. 21, 2019, 7:15 a.m. UTC | #2
On Wed, 20 Nov 2019 at 18:45, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
>
> On 11/20/19 7:22 AM, Anders Roxell wrote:
> > When booting an arm64 allmodconfig kernel on linux-next next-20191115
> > The following "suspicious RCU usage" warning shows up.  This bug seems
> > to have been introduced by commit f0ad0860d01e ("ipv4: ipmr: support
> > multiple tables") in 2010, but the warning was added only in this past
> > year by commit 28875945ba98 ("rcu: Add support for consolidated-RCU
> > reader checking").
> >
> > [   32.496021][    T1] =============================
> > [   32.497616][    T1] WARNING: suspicious RCU usage
> > [   32.499614][    T1] 5.4.0-rc6-next-20191108-00003-gf74bac957b5c-dirty #2 Not tainted
> > [   32.502018][    T1] -----------------------------
> > [   32.503976][    T1] net/ipv4/ipmr.c:136 RCU-list traversed in non-reader section!!
> > [   32.506746][    T1]
> > [   32.506746][    T1] other info that might help us debug this:
> > [   32.506746][    T1]
> > [   32.509794][    T1]
> > [   32.509794][    T1] rcu_scheduler_active = 2, debug_locks = 1
> > [   32.512661][    T1] 1 lock held by swapper/0/1:
> > [   32.514169][    T1]  #0: ffffa000150dd678 (pernet_ops_rwsem){+.+.}, at: register_pernet_subsys+0x24/0x50
> > [   32.517621][    T1]
> > [   32.517621][    T1] stack backtrace:
> > [   32.519930][    T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.4.0-rc6-next-20191108-00003-gf74bac957b5c-dirty #2
> > [   32.523063][    T1] Hardware name: linux,dummy-virt (DT)
> > [   32.524787][    T1] Call trace:
> > [   32.525946][    T1]  dump_backtrace+0x0/0x2d0
> > [   32.527433][    T1]  show_stack+0x20/0x30
> > [   32.528811][    T1]  dump_stack+0x204/0x2ac
> > [   32.530258][    T1]  lockdep_rcu_suspicious+0xf4/0x108
> > [   32.531993][    T1]  ipmr_get_table+0xc8/0x170
> > [   32.533496][    T1]  ipmr_new_table+0x48/0xa0
> > [   32.535002][    T1]  ipmr_net_init+0xe8/0x258
> > [   32.536465][    T1]  ops_init+0x280/0x2d8
> > [   32.537876][    T1]  register_pernet_operations+0x210/0x420
> > [   32.539707][    T1]  register_pernet_subsys+0x30/0x50
> > [   32.541372][    T1]  ip_mr_init+0x54/0x180
> > [   32.542785][    T1]  inet_init+0x25c/0x3e8
> > [   32.544186][    T1]  do_one_initcall+0x4c0/0xad8
> > [   32.545757][    T1]  kernel_init_freeable+0x3e0/0x500
> > [   32.547443][    T1]  kernel_init+0x14/0x1f0
> > [   32.548875][    T1]  ret_from_fork+0x10/0x18
> >
> > This commit therefore holds RTNL mutex around the problematic code path,
> > which is function ipmr_rules_init() in ipmr_net_init().  This commit
> > also adds a lockdep_rtnl_is_held() check to the ipmr_for_each_table()
> > macro.
> >
> > Suggested-by: David Miller <davem@davemloft.net>
> > Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
> > Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
> > ---
> >  net/ipv4/ipmr.c | 5 ++++-
> >  1 file changed, 4 insertions(+), 1 deletion(-)
> >
> > diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> > index 6e68def66822..53dff9a0e60a 100644
> > --- a/net/ipv4/ipmr.c
> > +++ b/net/ipv4/ipmr.c
> > @@ -110,7 +110,8 @@ static void ipmr_expire_process(struct timer_list *t);
> >
> >  #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
> >  #define ipmr_for_each_table(mrt, net) \
> > -     list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
> > +     list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \
> > +                             lockdep_rtnl_is_held())
> >
> >  static struct mr_table *ipmr_mr_table_iter(struct net *net,
> >                                          struct mr_table *mrt)
> > @@ -3086,7 +3087,9 @@ static int __net_init ipmr_net_init(struct net *net)
> >       if (err)
> >               goto ipmr_notifier_fail;
> >
> > +     rtnl_lock();
> >       err = ipmr_rules_init(net);
> > +     rtnl_unlock();
> >       if (err < 0)
> >               goto ipmr_rules_fail;
>
> Hmmm... this might have performance impact for creation of a new netns
>
> Since the 'struct net' is not yet fully initialized (thus published/visible),
> should we really have to grab RTNL (again) only to silence a warning ?
>
> What about the following alternative ?

I tried what you suggested, unfortunately, I still got the warning.


[   43.253850][    T1] =============================
[   43.255473][    T1] WARNING: suspicious RCU usage
[   43.259068][    T1]
5.4.0-rc8-next-20191120-00003-g3aa7c2a8649e-dirty #6 Not tainted
[   43.263078][    T1] -----------------------------
[   43.265134][    T1] net/ipv4/ipmr.c:1759 RCU-list traversed in
non-reader section!!
[   43.267587][    T1]
[   43.267587][    T1] other info that might help us debug this:
[   43.267587][    T1]
[   43.271129][    T1]
[   43.271129][    T1] rcu_scheduler_active = 2, debug_locks = 1
[   43.274021][    T1] 2 locks held by swapper/0/1:
[   43.275532][    T1]  #0: ffff000065abeaa0 (&dev->mutex){....}, at:
__device_driver_lock+0xa0/0xb0
[   43.278930][    T1]  #1: ffffa000153017f0 (rtnl_mutex){+.+.}, at:
rtnl_lock+0x1c/0x28
[   43.282023][    T1]
[   43.282023][    T1] stack backtrace:
[   43.283921][    T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
5.4.0-rc8-next-20191120-00003-g3aa7c2a8649e-dirty #6
[   43.287152][    T1] Hardware name: linux,dummy-virt (DT)
[   43.288920][    T1] Call trace:
[   43.290057][    T1]  dump_backtrace+0x0/0x2d0
[   43.291535][    T1]  show_stack+0x20/0x30
[   43.292967][    T1]  dump_stack+0x204/0x2ac
[   43.294423][    T1]  lockdep_rcu_suspicious+0xf4/0x108
[   43.296163][    T1]  ipmr_device_event+0x100/0x1e8
[   43.297812][    T1]  notifier_call_chain+0x100/0x1a8
[   43.299486][    T1]  raw_notifier_call_chain+0x38/0x48
[   43.301248][    T1]  call_netdevice_notifiers_info+0x128/0x148
[   43.303158][    T1]  rollback_registered_many+0x684/0xb48
[   43.304963][    T1]  rollback_registered+0xd8/0x150
[   43.306595][    T1]  unregister_netdevice_queue+0x194/0x1b8
[   43.308406][    T1]  unregister_netdev+0x24/0x38
[   43.310012][    T1]  virtnet_remove+0x44/0x78
[   43.311519][    T1]  virtio_dev_remove+0x5c/0xc0
[   43.313114][    T1]  really_probe+0x508/0x920
[   43.314635][    T1]  driver_probe_device+0x164/0x230
[   43.316337][    T1]  device_driver_attach+0x8c/0xc0
[   43.318024][    T1]  __driver_attach+0x1e0/0x1f8
[   43.319584][    T1]  bus_for_each_dev+0xf0/0x188
[   43.321169][    T1]  driver_attach+0x34/0x40
[   43.322645][    T1]  bus_add_driver+0x204/0x3c8
[   43.324202][    T1]  driver_register+0x160/0x1f8
[   43.325788][    T1]  register_virtio_driver+0x7c/0x88
[   43.327480][    T1]  virtio_net_driver_init+0xa8/0xf4
[   43.329196][    T1]  do_one_initcall+0x4c0/0xad8
[   43.330767][    T1]  kernel_init_freeable+0x3e0/0x500
[   43.332444][    T1]  kernel_init+0x14/0x1f0
[   43.333901][    T1]  ret_from_fork+0x10/0x18


Cheers,
Anders

>
> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index 6e68def66822f47fc08d94eddd32a4bd4f9fdfb0..b6dcdce08f1d82c83756a319623e24ae0174092c 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -94,7 +94,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
>
>  static struct kmem_cache *mrt_cachep __ro_after_init;
>
> -static struct mr_table *ipmr_new_table(struct net *net, u32 id);
> +static struct mr_table *ipmr_new_table(struct net *net, u32 id, bool init);
>  static void ipmr_free_table(struct mr_table *mrt);
>
>  static void ip_mr_forward(struct net *net, struct mr_table *mrt,
> @@ -245,7 +245,7 @@ static int __net_init ipmr_rules_init(struct net *net)
>
>         INIT_LIST_HEAD(&net->ipv4.mr_tables);
>
> -       mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
> +       mrt = ipmr_new_table(net, RT_TABLE_DEFAULT, true);
>         if (IS_ERR(mrt)) {
>                 err = PTR_ERR(mrt);
>                 goto err1;
> @@ -322,7 +322,7 @@ static int __net_init ipmr_rules_init(struct net *net)
>  {
>         struct mr_table *mrt;
>
> -       mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
> +       mrt = ipmr_new_table(net, RT_TABLE_DEFAULT, true);
>         if (IS_ERR(mrt))
>                 return PTR_ERR(mrt);
>         net->ipv4.mrt = mrt;
> @@ -392,7 +392,7 @@ static struct mr_table_ops ipmr_mr_table_ops = {
>         .cmparg_any = &ipmr_mr_table_ops_cmparg_any,
>  };
>
> -static struct mr_table *ipmr_new_table(struct net *net, u32 id)
> +static struct mr_table *ipmr_new_table(struct net *net, u32 id, bool init)
>  {
>         struct mr_table *mrt;
>
> @@ -400,9 +400,11 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
>         if (id != RT_TABLE_DEFAULT && id >= 1000000000)
>                 return ERR_PTR(-EINVAL);
>
> -       mrt = ipmr_get_table(net, id);
> -       if (mrt)
> -               return mrt;
> +       if (!init) {
> +               mrt = ipmr_get_table(net, id);
> +               if (mrt)
> +                       return mrt;
> +       }
>
>         return mr_table_alloc(net, id, &ipmr_mr_table_ops,
>                               ipmr_expire_process, ipmr_new_table_set);
> @@ -1547,7 +1549,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
>                 if (sk == rtnl_dereference(mrt->mroute_sk)) {
>                         ret = -EBUSY;
>                 } else {
> -                       mrt = ipmr_new_table(net, uval);
> +                       mrt = ipmr_new_table(net, uval, false);
>                         if (IS_ERR(mrt))
>                                 ret = PTR_ERR(mrt);
>                         else
>
>
Anders Roxell Nov. 21, 2019, 10:17 a.m. UTC | #3
On Thu, 21 Nov 2019 at 08:15, Anders Roxell <anders.roxell@linaro.org> wrote:
>
> On Wed, 20 Nov 2019 at 18:45, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> >
> >
> >
> > On 11/20/19 7:22 AM, Anders Roxell wrote:

[snippet]

> > > +     rtnl_lock();
> > >       err = ipmr_rules_init(net);
> > > +     rtnl_unlock();
> > >       if (err < 0)
> > >               goto ipmr_rules_fail;
> >
> > Hmmm... this might have performance impact for creation of a new netns
> >
> > Since the 'struct net' is not yet fully initialized (thus published/visible),
> > should we really have to grab RTNL (again) only to silence a warning ?
> >
> > What about the following alternative ?
>
> I tried what you suggested, unfortunately, I still got the warning.

I was wrong, so if I also add "lockdep_rtnl_is_held()" to the
"ipmr_for_each_table()" macro it works.

>
>
> [   43.253850][    T1] =============================
> [   43.255473][    T1] WARNING: suspicious RCU usage
> [   43.259068][    T1]
> 5.4.0-rc8-next-20191120-00003-g3aa7c2a8649e-dirty #6 Not tainted
> [   43.263078][    T1] -----------------------------
> [   43.265134][    T1] net/ipv4/ipmr.c:1759 RCU-list traversed in
> non-reader section!!
> [   43.267587][    T1]
> [   43.267587][    T1] other info that might help us debug this:
> [   43.267587][    T1]
> [   43.271129][    T1]
> [   43.271129][    T1] rcu_scheduler_active = 2, debug_locks = 1
> [   43.274021][    T1] 2 locks held by swapper/0/1:
> [   43.275532][    T1]  #0: ffff000065abeaa0 (&dev->mutex){....}, at:
> __device_driver_lock+0xa0/0xb0
> [   43.278930][    T1]  #1: ffffa000153017f0 (rtnl_mutex){+.+.}, at:
> rtnl_lock+0x1c/0x28
> [   43.282023][    T1]
> [   43.282023][    T1] stack backtrace:
> [   43.283921][    T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
> 5.4.0-rc8-next-20191120-00003-g3aa7c2a8649e-dirty #6
> [   43.287152][    T1] Hardware name: linux,dummy-virt (DT)
> [   43.288920][    T1] Call trace:
> [   43.290057][    T1]  dump_backtrace+0x0/0x2d0
> [   43.291535][    T1]  show_stack+0x20/0x30
> [   43.292967][    T1]  dump_stack+0x204/0x2ac
> [   43.294423][    T1]  lockdep_rcu_suspicious+0xf4/0x108
> [   43.296163][    T1]  ipmr_device_event+0x100/0x1e8
> [   43.297812][    T1]  notifier_call_chain+0x100/0x1a8
> [   43.299486][    T1]  raw_notifier_call_chain+0x38/0x48
> [   43.301248][    T1]  call_netdevice_notifiers_info+0x128/0x148
> [   43.303158][    T1]  rollback_registered_many+0x684/0xb48
> [   43.304963][    T1]  rollback_registered+0xd8/0x150
> [   43.306595][    T1]  unregister_netdevice_queue+0x194/0x1b8
> [   43.308406][    T1]  unregister_netdev+0x24/0x38
> [   43.310012][    T1]  virtnet_remove+0x44/0x78
> [   43.311519][    T1]  virtio_dev_remove+0x5c/0xc0
> [   43.313114][    T1]  really_probe+0x508/0x920
> [   43.314635][    T1]  driver_probe_device+0x164/0x230
> [   43.316337][    T1]  device_driver_attach+0x8c/0xc0
> [   43.318024][    T1]  __driver_attach+0x1e0/0x1f8
> [   43.319584][    T1]  bus_for_each_dev+0xf0/0x188
> [   43.321169][    T1]  driver_attach+0x34/0x40
> [   43.322645][    T1]  bus_add_driver+0x204/0x3c8
> [   43.324202][    T1]  driver_register+0x160/0x1f8
> [   43.325788][    T1]  register_virtio_driver+0x7c/0x88
> [   43.327480][    T1]  virtio_net_driver_init+0xa8/0xf4
> [   43.329196][    T1]  do_one_initcall+0x4c0/0xad8
> [   43.330767][    T1]  kernel_init_freeable+0x3e0/0x500
> [   43.332444][    T1]  kernel_init+0x14/0x1f0
> [   43.333901][    T1]  ret_from_fork+0x10/0x18
>
>
> Cheers,
> Anders
>
> >
> > diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> > index 6e68def66822f47fc08d94eddd32a4bd4f9fdfb0..b6dcdce08f1d82c83756a319623e24ae0174092c 100644
> > --- a/net/ipv4/ipmr.c
> > +++ b/net/ipv4/ipmr.c
> > @@ -94,7 +94,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
> >
> >  static struct kmem_cache *mrt_cachep __ro_after_init;
> >
> > -static struct mr_table *ipmr_new_table(struct net *net, u32 id);
> > +static struct mr_table *ipmr_new_table(struct net *net, u32 id, bool init);
> >  static void ipmr_free_table(struct mr_table *mrt);
> >

 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
@@ -110,7 +110,8 @@ static void ipmr_expire_process(struct timer_list *t);

 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
 #define ipmr_for_each_table(mrt, net) \
-       list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
+       list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \
+                               lockdep_rtnl_is_held())
 static struct mr_table *ipmr_mr_table_iter(struct net *net,
                                           struct mr_table *mrt)


Cheers,
Anders

> >  static void ip_mr_forward(struct net *net, struct mr_table *mrt,
> > @@ -245,7 +245,7 @@ static int __net_init ipmr_rules_init(struct net *net)
> >
> >         INIT_LIST_HEAD(&net->ipv4.mr_tables);
> >
> > -       mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
> > +       mrt = ipmr_new_table(net, RT_TABLE_DEFAULT, true);
> >         if (IS_ERR(mrt)) {
> >                 err = PTR_ERR(mrt);
> >                 goto err1;
> > @@ -322,7 +322,7 @@ static int __net_init ipmr_rules_init(struct net *net)
> >  {
> >         struct mr_table *mrt;
> >
> > -       mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
> > +       mrt = ipmr_new_table(net, RT_TABLE_DEFAULT, true);
> >         if (IS_ERR(mrt))
> >                 return PTR_ERR(mrt);
> >         net->ipv4.mrt = mrt;
> > @@ -392,7 +392,7 @@ static struct mr_table_ops ipmr_mr_table_ops = {
> >         .cmparg_any = &ipmr_mr_table_ops_cmparg_any,
> >  };
> >
> > -static struct mr_table *ipmr_new_table(struct net *net, u32 id)
> > +static struct mr_table *ipmr_new_table(struct net *net, u32 id, bool init)
> >  {
> >         struct mr_table *mrt;
> >
> > @@ -400,9 +400,11 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
> >         if (id != RT_TABLE_DEFAULT && id >= 1000000000)
> >                 return ERR_PTR(-EINVAL);
> >
> > -       mrt = ipmr_get_table(net, id);
> > -       if (mrt)
> > -               return mrt;
> > +       if (!init) {
> > +               mrt = ipmr_get_table(net, id);
> > +               if (mrt)
> > +                       return mrt;
> > +       }
> >
> >         return mr_table_alloc(net, id, &ipmr_mr_table_ops,
> >                               ipmr_expire_process, ipmr_new_table_set);
> > @@ -1547,7 +1549,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
> >                 if (sk == rtnl_dereference(mrt->mroute_sk)) {
> >                         ret = -EBUSY;
> >                 } else {
> > -                       mrt = ipmr_new_table(net, uval);
> > +                       mrt = ipmr_new_table(net, uval, false);
> >                         if (IS_ERR(mrt))
> >                                 ret = PTR_ERR(mrt);
> >                         else
> >
> >
Eric Dumazet Nov. 21, 2019, 5:41 p.m. UTC | #4
On 11/21/19 2:17 AM, Anders Roxell wrote:
> On Thu, 21 Nov 2019 at 08:15, Anders Roxell <anders.roxell@linaro.org> wrote:
>>
>> On Wed, 20 Nov 2019 at 18:45, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>>>
>>>
>>>
>>> On 11/20/19 7:22 AM, Anders Roxell wrote:
> 
> [snippet]
> 
>>>> +     rtnl_lock();
>>>>       err = ipmr_rules_init(net);
>>>> +     rtnl_unlock();
>>>>       if (err < 0)
>>>>               goto ipmr_rules_fail;
>>>
>>> Hmmm... this might have performance impact for creation of a new netns
>>>
>>> Since the 'struct net' is not yet fully initialized (thus published/visible),
>>> should we really have to grab RTNL (again) only to silence a warning ?
>>>
>>> What about the following alternative ?
>>
>> I tried what you suggested, unfortunately, I still got the warning.
> 
> I was wrong, so if I also add "lockdep_rtnl_is_held()" to the
> "ipmr_for_each_table()" macro it works.
> 
>>
>>
>> [   43.253850][    T1] =============================
>> [   43.255473][    T1] WARNING: suspicious RCU usage
>> [   43.259068][    T1]
>> 5.4.0-rc8-next-20191120-00003-g3aa7c2a8649e-dirty #6 Not tainted
>> [   43.263078][    T1] -----------------------------
>> [   43.265134][    T1] net/ipv4/ipmr.c:1759 RCU-list traversed in
>> non-reader section!!
>> [   43.267587][    T1]
>> [   43.267587][    T1] other info that might help us debug this:
>> [   43.267587][    T1]
>> [   43.271129][    T1]
>> [   43.271129][    T1] rcu_scheduler_active = 2, debug_locks = 1
>> [   43.274021][    T1] 2 locks held by swapper/0/1:
>> [   43.275532][    T1]  #0: ffff000065abeaa0 (&dev->mutex){....}, at:
>> __device_driver_lock+0xa0/0xb0
>> [   43.278930][    T1]  #1: ffffa000153017f0 (rtnl_mutex){+.+.}, at:
>> rtnl_lock+0x1c/0x28
>> [   43.282023][    T1]
>> [   43.282023][    T1] stack backtrace:
>> [   43.283921][    T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
>> 5.4.0-rc8-next-20191120-00003-g3aa7c2a8649e-dirty #6
>> [   43.287152][    T1] Hardware name: linux,dummy-virt (DT)
>> [   43.288920][    T1] Call trace:
>> [   43.290057][    T1]  dump_backtrace+0x0/0x2d0
>> [   43.291535][    T1]  show_stack+0x20/0x30
>> [   43.292967][    T1]  dump_stack+0x204/0x2ac
>> [   43.294423][    T1]  lockdep_rcu_suspicious+0xf4/0x108
>> [   43.296163][    T1]  ipmr_device_event+0x100/0x1e8
>> [   43.297812][    T1]  notifier_call_chain+0x100/0x1a8
>> [   43.299486][    T1]  raw_notifier_call_chain+0x38/0x48
>> [   43.301248][    T1]  call_netdevice_notifiers_info+0x128/0x148
>> [   43.303158][    T1]  rollback_registered_many+0x684/0xb48
>> [   43.304963][    T1]  rollback_registered+0xd8/0x150
>> [   43.306595][    T1]  unregister_netdevice_queue+0x194/0x1b8
>> [   43.308406][    T1]  unregister_netdev+0x24/0x38
>> [   43.310012][    T1]  virtnet_remove+0x44/0x78
>> [   43.311519][    T1]  virtio_dev_remove+0x5c/0xc0
>> [   43.313114][    T1]  really_probe+0x508/0x920
>> [   43.314635][    T1]  driver_probe_device+0x164/0x230
>> [   43.316337][    T1]  device_driver_attach+0x8c/0xc0
>> [   43.318024][    T1]  __driver_attach+0x1e0/0x1f8
>> [   43.319584][    T1]  bus_for_each_dev+0xf0/0x188
>> [   43.321169][    T1]  driver_attach+0x34/0x40
>> [   43.322645][    T1]  bus_add_driver+0x204/0x3c8
>> [   43.324202][    T1]  driver_register+0x160/0x1f8
>> [   43.325788][    T1]  register_virtio_driver+0x7c/0x88
>> [   43.327480][    T1]  virtio_net_driver_init+0xa8/0xf4
>> [   43.329196][    T1]  do_one_initcall+0x4c0/0xad8
>> [   43.330767][    T1]  kernel_init_freeable+0x3e0/0x500
>> [   43.332444][    T1]  kernel_init+0x14/0x1f0
>> [   43.333901][    T1]  ret_from_fork+0x10/0x18
>>
>>
>> Cheers,
>> Anders
>>
>>>
>>> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
>>> index 6e68def66822f47fc08d94eddd32a4bd4f9fdfb0..b6dcdce08f1d82c83756a319623e24ae0174092c 100644
>>> --- a/net/ipv4/ipmr.c
>>> +++ b/net/ipv4/ipmr.c
>>> @@ -94,7 +94,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
>>>
>>>  static struct kmem_cache *mrt_cachep __ro_after_init;
>>>
>>> -static struct mr_table *ipmr_new_table(struct net *net, u32 id);
>>> +static struct mr_table *ipmr_new_table(struct net *net, u32 id, bool init);
>>>  static void ipmr_free_table(struct mr_table *mrt);
>>>
> 
>  static void ip_mr_forward(struct net *net, struct mr_table *mrt,
> @@ -110,7 +110,8 @@ static void ipmr_expire_process(struct timer_list *t);
> 
>  #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
>  #define ipmr_for_each_table(mrt, net) \
> -       list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
> +       list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \
> +                               lockdep_rtnl_is_held())
>  static struct mr_table *ipmr_mr_table_iter(struct net *net,
>                                            struct mr_table *mrt)
> 
> 
> Cheers,
> Anders

That is great, I guess we can submit the two patches in a series.
diff mbox series

Patch

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 6e68def66822..53dff9a0e60a 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -110,7 +110,8 @@  static void ipmr_expire_process(struct timer_list *t);
 
 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
 #define ipmr_for_each_table(mrt, net) \
-	list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
+	list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \
+				lockdep_rtnl_is_held())
 
 static struct mr_table *ipmr_mr_table_iter(struct net *net,
 					   struct mr_table *mrt)
@@ -3086,7 +3087,9 @@  static int __net_init ipmr_net_init(struct net *net)
 	if (err)
 		goto ipmr_notifier_fail;
 
+	rtnl_lock();
 	err = ipmr_rules_init(net);
+	rtnl_unlock();
 	if (err < 0)
 		goto ipmr_rules_fail;