diff mbox series

[net-next] net: flow_offload: convert block_ing_cb_list to regular list type

Message ID 20190816150654.22106-1-vladbu@mellanox.com
State Accepted
Delegated to: David Miller
Headers show
Series [net-next] net: flow_offload: convert block_ing_cb_list to regular list type | expand

Commit Message

Vlad Buslov Aug. 16, 2019, 3:06 p.m. UTC
RCU list block_ing_cb_list is protected by rcu read lock in
flow_block_ing_cmd() and with flow_indr_block_ing_cb_lock mutex in all
functions that use it. However, flow_block_ing_cmd() needs to call blocking
functions while iterating block_ing_cb_list which leads to following
suspicious RCU usage warning:

[  401.510948] =============================
[  401.510952] WARNING: suspicious RCU usage
[  401.510993] 5.3.0-rc3+ #589 Not tainted
[  401.510996] -----------------------------
[  401.511001] include/linux/rcupdate.h:265 Illegal context switch in RCU read-side critical section!
[  401.511004]
               other info that might help us debug this:

[  401.511008]
               rcu_scheduler_active = 2, debug_locks = 1
[  401.511012] 7 locks held by test-ecmp-add-v/7576:
[  401.511015]  #0: 00000000081d71a5 (sb_writers#4){.+.+}, at: vfs_write+0x166/0x1d0
[  401.511037]  #1: 000000002bd338c3 (&of->mutex){+.+.}, at: kernfs_fop_write+0xef/0x1b0
[  401.511051]  #2: 00000000c921c634 (kn->count#317){.+.+}, at: kernfs_fop_write+0xf7/0x1b0
[  401.511062]  #3: 00000000a19cdd56 (&dev->mutex){....}, at: sriov_numvfs_store+0x6b/0x130
[  401.511079]  #4: 000000005425fa52 (pernet_ops_rwsem){++++}, at: unregister_netdevice_notifier+0x30/0x140
[  401.511092]  #5: 00000000c5822793 (rtnl_mutex){+.+.}, at: unregister_netdevice_notifier+0x35/0x140
[  401.511101]  #6: 00000000c2f3507e (rcu_read_lock){....}, at: flow_block_ing_cmd+0x5/0x130
[  401.511115]
               stack backtrace:
[  401.511121] CPU: 21 PID: 7576 Comm: test-ecmp-add-v Not tainted 5.3.0-rc3+ #589
[  401.511124] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017
[  401.511127] Call Trace:
[  401.511138]  dump_stack+0x85/0xc0
[  401.511146]  ___might_sleep+0x100/0x180
[  401.511154]  __mutex_lock+0x5b/0x960
[  401.511162]  ? find_held_lock+0x2b/0x80
[  401.511173]  ? __tcf_get_next_chain+0x1d/0xb0
[  401.511179]  ? mark_held_locks+0x49/0x70
[  401.511194]  ? __tcf_get_next_chain+0x1d/0xb0
[  401.511198]  __tcf_get_next_chain+0x1d/0xb0
[  401.511251]  ? uplink_rep_async_event+0x70/0x70 [mlx5_core]
[  401.511261]  tcf_block_playback_offloads+0x39/0x160
[  401.511276]  tcf_block_setup+0x1b0/0x240
[  401.511312]  ? mlx5e_rep_indr_setup_tc_cb+0xca/0x290 [mlx5_core]
[  401.511347]  ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
[  401.511359]  tc_indr_block_get_and_ing_cmd+0x11b/0x1e0
[  401.511404]  ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
[  401.511414]  flow_block_ing_cmd+0x7e/0x130
[  401.511453]  ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
[  401.511462]  __flow_indr_block_cb_unregister+0x7f/0xf0
[  401.511502]  mlx5e_nic_rep_netdevice_event+0x75/0xb0 [mlx5_core]
[  401.511513]  unregister_netdevice_notifier+0xe9/0x140
[  401.511554]  mlx5e_cleanup_rep_tx+0x6f/0xe0 [mlx5_core]
[  401.511597]  mlx5e_detach_netdev+0x4b/0x60 [mlx5_core]
[  401.511637]  mlx5e_vport_rep_unload+0x71/0xc0 [mlx5_core]
[  401.511679]  esw_offloads_disable+0x5b/0x90 [mlx5_core]
[  401.511724]  mlx5_eswitch_disable.cold+0xdf/0x176 [mlx5_core]
[  401.511759]  mlx5_device_disable_sriov+0xab/0xb0 [mlx5_core]
[  401.511794]  mlx5_core_sriov_configure+0xaf/0xd0 [mlx5_core]
[  401.511805]  sriov_numvfs_store+0xf8/0x130
[  401.511817]  kernfs_fop_write+0x122/0x1b0
[  401.511826]  vfs_write+0xdb/0x1d0
[  401.511835]  ksys_write+0x65/0xe0
[  401.511847]  do_syscall_64+0x5c/0xb0
[  401.511857]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  401.511862] RIP: 0033:0x7fad892d30f8
[  401.511868] Code: 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 25 96 0d 00 8b 00 85 c0 75 17 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 60 c3 0f 1f 80 00 00 00 00 48 83
 ec 28 48 89
[  401.511871] RSP: 002b:00007ffca2a9fad8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[  401.511875] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fad892d30f8
[  401.511878] RDX: 0000000000000002 RSI: 000055afeb072a90 RDI: 0000000000000001
[  401.511881] RBP: 000055afeb072a90 R08: 00000000ffffffff R09: 000000000000000a
[  401.511884] R10: 000055afeb058710 R11: 0000000000000246 R12: 0000000000000002
[  401.511887] R13: 00007fad893a8780 R14: 0000000000000002 R15: 00007fad893a3740

To fix the described incorrect RCU usage, convert block_ing_cb_list from
RCU list to regular list and protect it with flow_indr_block_ing_cb_lock
mutex in flow_block_ing_cmd().

Fixes: 1150ab0f1b33 ("flow_offload: support get multi-subsystem block")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
---
 net/core/flow_offload.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

Comments

Jakub Kicinski Aug. 16, 2019, 7:58 p.m. UTC | #1
On Fri, 16 Aug 2019 18:06:54 +0300, Vlad Buslov wrote:
> diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
> index 64c3d4d72b9c..cf52d9c422fa 100644
> --- a/net/core/flow_offload.c
> +++ b/net/core/flow_offload.c
> @@ -391,6 +391,8 @@ static void flow_indr_block_cb_del(struct flow_indr_block_cb *indr_block_cb)
>  	kfree(indr_block_cb);
>  }
>  
> +static DEFINE_MUTEX(flow_indr_block_ing_cb_lock);

I'd be tempted to place this definition next to:

static LIST_HEAD(block_ing_cb_list);

as it seems this is the list it protects. The reason for the name
discrepancy between the two is not immediately obvious to me :S 
but you're not changing that.

Otherwise makes sense, so FWIW:

Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>

>  static void flow_block_ing_cmd(struct net_device *dev,
>  			       flow_indr_block_bind_cb_t *cb,
>  			       void *cb_priv,
> @@ -398,11 +400,11 @@ static void flow_block_ing_cmd(struct net_device *dev,
>  {
>  	struct flow_indr_block_ing_entry *entry;
>  
> -	rcu_read_lock();
> -	list_for_each_entry_rcu(entry, &block_ing_cb_list, list) {
> +	mutex_lock(&flow_indr_block_ing_cb_lock);
> +	list_for_each_entry(entry, &block_ing_cb_list, list) {
>  		entry->cb(dev, cb, cb_priv, command);
>  	}
> -	rcu_read_unlock();
> +	mutex_unlock(&flow_indr_block_ing_cb_lock);
>  }
Jiri Pirko Aug. 17, 2019, 5:27 a.m. UTC | #2
Fri, Aug 16, 2019 at 05:06:54PM CEST, vladbu@mellanox.com wrote:
>RCU list block_ing_cb_list is protected by rcu read lock in
>flow_block_ing_cmd() and with flow_indr_block_ing_cb_lock mutex in all
>functions that use it. However, flow_block_ing_cmd() needs to call blocking
>functions while iterating block_ing_cb_list which leads to following
>suspicious RCU usage warning:
>
>[  401.510948] =============================
>[  401.510952] WARNING: suspicious RCU usage
>[  401.510993] 5.3.0-rc3+ #589 Not tainted
>[  401.510996] -----------------------------
>[  401.511001] include/linux/rcupdate.h:265 Illegal context switch in RCU read-side critical section!
>[  401.511004]
>               other info that might help us debug this:
>
>[  401.511008]
>               rcu_scheduler_active = 2, debug_locks = 1
>[  401.511012] 7 locks held by test-ecmp-add-v/7576:
>[  401.511015]  #0: 00000000081d71a5 (sb_writers#4){.+.+}, at: vfs_write+0x166/0x1d0
>[  401.511037]  #1: 000000002bd338c3 (&of->mutex){+.+.}, at: kernfs_fop_write+0xef/0x1b0
>[  401.511051]  #2: 00000000c921c634 (kn->count#317){.+.+}, at: kernfs_fop_write+0xf7/0x1b0
>[  401.511062]  #3: 00000000a19cdd56 (&dev->mutex){....}, at: sriov_numvfs_store+0x6b/0x130
>[  401.511079]  #4: 000000005425fa52 (pernet_ops_rwsem){++++}, at: unregister_netdevice_notifier+0x30/0x140
>[  401.511092]  #5: 00000000c5822793 (rtnl_mutex){+.+.}, at: unregister_netdevice_notifier+0x35/0x140
>[  401.511101]  #6: 00000000c2f3507e (rcu_read_lock){....}, at: flow_block_ing_cmd+0x5/0x130
>[  401.511115]
>               stack backtrace:
>[  401.511121] CPU: 21 PID: 7576 Comm: test-ecmp-add-v Not tainted 5.3.0-rc3+ #589
>[  401.511124] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017
>[  401.511127] Call Trace:
>[  401.511138]  dump_stack+0x85/0xc0
>[  401.511146]  ___might_sleep+0x100/0x180
>[  401.511154]  __mutex_lock+0x5b/0x960
>[  401.511162]  ? find_held_lock+0x2b/0x80
>[  401.511173]  ? __tcf_get_next_chain+0x1d/0xb0
>[  401.511179]  ? mark_held_locks+0x49/0x70
>[  401.511194]  ? __tcf_get_next_chain+0x1d/0xb0
>[  401.511198]  __tcf_get_next_chain+0x1d/0xb0
>[  401.511251]  ? uplink_rep_async_event+0x70/0x70 [mlx5_core]
>[  401.511261]  tcf_block_playback_offloads+0x39/0x160
>[  401.511276]  tcf_block_setup+0x1b0/0x240
>[  401.511312]  ? mlx5e_rep_indr_setup_tc_cb+0xca/0x290 [mlx5_core]
>[  401.511347]  ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
>[  401.511359]  tc_indr_block_get_and_ing_cmd+0x11b/0x1e0
>[  401.511404]  ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
>[  401.511414]  flow_block_ing_cmd+0x7e/0x130
>[  401.511453]  ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
>[  401.511462]  __flow_indr_block_cb_unregister+0x7f/0xf0
>[  401.511502]  mlx5e_nic_rep_netdevice_event+0x75/0xb0 [mlx5_core]
>[  401.511513]  unregister_netdevice_notifier+0xe9/0x140
>[  401.511554]  mlx5e_cleanup_rep_tx+0x6f/0xe0 [mlx5_core]
>[  401.511597]  mlx5e_detach_netdev+0x4b/0x60 [mlx5_core]
>[  401.511637]  mlx5e_vport_rep_unload+0x71/0xc0 [mlx5_core]
>[  401.511679]  esw_offloads_disable+0x5b/0x90 [mlx5_core]
>[  401.511724]  mlx5_eswitch_disable.cold+0xdf/0x176 [mlx5_core]
>[  401.511759]  mlx5_device_disable_sriov+0xab/0xb0 [mlx5_core]
>[  401.511794]  mlx5_core_sriov_configure+0xaf/0xd0 [mlx5_core]
>[  401.511805]  sriov_numvfs_store+0xf8/0x130
>[  401.511817]  kernfs_fop_write+0x122/0x1b0
>[  401.511826]  vfs_write+0xdb/0x1d0
>[  401.511835]  ksys_write+0x65/0xe0
>[  401.511847]  do_syscall_64+0x5c/0xb0
>[  401.511857]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
>[  401.511862] RIP: 0033:0x7fad892d30f8
>[  401.511868] Code: 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 25 96 0d 00 8b 00 85 c0 75 17 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 60 c3 0f 1f 80 00 00 00 00 48 83
> ec 28 48 89
>[  401.511871] RSP: 002b:00007ffca2a9fad8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
>[  401.511875] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fad892d30f8
>[  401.511878] RDX: 0000000000000002 RSI: 000055afeb072a90 RDI: 0000000000000001
>[  401.511881] RBP: 000055afeb072a90 R08: 00000000ffffffff R09: 000000000000000a
>[  401.511884] R10: 000055afeb058710 R11: 0000000000000246 R12: 0000000000000002
>[  401.511887] R13: 00007fad893a8780 R14: 0000000000000002 R15: 00007fad893a3740
>
>To fix the described incorrect RCU usage, convert block_ing_cb_list from
>RCU list to regular list and protect it with flow_indr_block_ing_cb_lock
>mutex in flow_block_ing_cmd().
>
>Fixes: 1150ab0f1b33 ("flow_offload: support get multi-subsystem block")
>Signed-off-by: Vlad Buslov <vladbu@mellanox.com>

Acked-by: Jiri Pirko <jiri@mellanox.com>
David Miller Aug. 19, 2019, 8:03 p.m. UTC | #3
From: Vlad Buslov <vladbu@mellanox.com>
Date: Fri, 16 Aug 2019 18:06:54 +0300

> RCU list block_ing_cb_list is protected by rcu read lock in
> flow_block_ing_cmd() and with flow_indr_block_ing_cb_lock mutex in all
> functions that use it. However, flow_block_ing_cmd() needs to call blocking
> functions while iterating block_ing_cb_list which leads to following
> suspicious RCU usage warning:
 ...
> To fix the described incorrect RCU usage, convert block_ing_cb_list from
> RCU list to regular list and protect it with flow_indr_block_ing_cb_lock
> mutex in flow_block_ing_cmd().
> 
> Fixes: 1150ab0f1b33 ("flow_offload: support get multi-subsystem block")
> Signed-off-by: Vlad Buslov <vladbu@mellanox.com>

Applied.
diff mbox series

Patch

diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 64c3d4d72b9c..cf52d9c422fa 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -391,6 +391,8 @@  static void flow_indr_block_cb_del(struct flow_indr_block_cb *indr_block_cb)
 	kfree(indr_block_cb);
 }
 
+static DEFINE_MUTEX(flow_indr_block_ing_cb_lock);
+
 static void flow_block_ing_cmd(struct net_device *dev,
 			       flow_indr_block_bind_cb_t *cb,
 			       void *cb_priv,
@@ -398,11 +400,11 @@  static void flow_block_ing_cmd(struct net_device *dev,
 {
 	struct flow_indr_block_ing_entry *entry;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(entry, &block_ing_cb_list, list) {
+	mutex_lock(&flow_indr_block_ing_cb_lock);
+	list_for_each_entry(entry, &block_ing_cb_list, list) {
 		entry->cb(dev, cb, cb_priv, command);
 	}
-	rcu_read_unlock();
+	mutex_unlock(&flow_indr_block_ing_cb_lock);
 }
 
 int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
@@ -497,11 +499,10 @@  void flow_indr_block_call(struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(flow_indr_block_call);
 
-static DEFINE_MUTEX(flow_indr_block_ing_cb_lock);
 void flow_indr_add_block_ing_cb(struct flow_indr_block_ing_entry *entry)
 {
 	mutex_lock(&flow_indr_block_ing_cb_lock);
-	list_add_tail_rcu(&entry->list, &block_ing_cb_list);
+	list_add_tail(&entry->list, &block_ing_cb_list);
 	mutex_unlock(&flow_indr_block_ing_cb_lock);
 }
 EXPORT_SYMBOL_GPL(flow_indr_add_block_ing_cb);
@@ -509,7 +510,7 @@  EXPORT_SYMBOL_GPL(flow_indr_add_block_ing_cb);
 void flow_indr_del_block_ing_cb(struct flow_indr_block_ing_entry *entry)
 {
 	mutex_lock(&flow_indr_block_ing_cb_lock);
-	list_del_rcu(&entry->list);
+	list_del(&entry->list);
 	mutex_unlock(&flow_indr_block_ing_cb_lock);
 }
 EXPORT_SYMBOL_GPL(flow_indr_del_block_ing_cb);