diff mbox series

ipv4: fix a RCU-list lock in fib_triestat_seq_show

Message ID 20200325155532.7238-1-cai@lca.pw
State Superseded
Delegated to: David Miller
Headers show
Series ipv4: fix a RCU-list lock in fib_triestat_seq_show | expand

Commit Message

Qian Cai March 25, 2020, 3:55 p.m. UTC
fib_triestat_seq_show() calls hlist_for_each_entry_rcu(tb, head,
tb_hlist) without rcu_read_lock() will trigger a warning,

 net/ipv4/fib_trie.c:2579 RCU-list traversed in non-reader section!!

 other info that might help us debug this:

 rcu_scheduler_active = 2, debug_locks = 1
 1 lock held by proc01/115277:
  #0: c0000014507acf00 (&p->lock){+.+.}-{3:3}, at: seq_read+0x58/0x670

 Call Trace:
  dump_stack+0xf4/0x164 (unreliable)
  lockdep_rcu_suspicious+0x140/0x164
  fib_triestat_seq_show+0x750/0x880
  seq_read+0x1a0/0x670
  proc_reg_read+0x10c/0x1b0
  __vfs_read+0x3c/0x70
  vfs_read+0xac/0x170
  ksys_read+0x7c/0x140
  system_call+0x5c/0x68

Signed-off-by: Qian Cai <cai@lca.pw>
---
 net/ipv4/fib_trie.c | 2 ++
 1 file changed, 2 insertions(+)

Comments

Eric Dumazet March 25, 2020, 4:13 p.m. UTC | #1
On 3/25/20 8:55 AM, Qian Cai wrote:
> fib_triestat_seq_show() calls hlist_for_each_entry_rcu(tb, head,
> tb_hlist) without rcu_read_lock() will trigger a warning,
> 
>  net/ipv4/fib_trie.c:2579 RCU-list traversed in non-reader section!!
> 
>  other info that might help us debug this:
> 
>  rcu_scheduler_active = 2, debug_locks = 1
>  1 lock held by proc01/115277:
>   #0: c0000014507acf00 (&p->lock){+.+.}-{3:3}, at: seq_read+0x58/0x670
> 
>  Call Trace:
>   dump_stack+0xf4/0x164 (unreliable)
>   lockdep_rcu_suspicious+0x140/0x164
>   fib_triestat_seq_show+0x750/0x880
>   seq_read+0x1a0/0x670
>   proc_reg_read+0x10c/0x1b0
>   __vfs_read+0x3c/0x70
>   vfs_read+0xac/0x170
>   ksys_read+0x7c/0x140
>   system_call+0x5c/0x68
> 
> Signed-off-by: Qian Cai <cai@lca.pw>
> ---
>  net/ipv4/fib_trie.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
> index ff0c24371e33..73fa37476f03 100644
> --- a/net/ipv4/fib_trie.c
> +++ b/net/ipv4/fib_trie.c
> @@ -2577,6 +2577,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
>  		   " %zd bytes, size of tnode: %zd bytes.\n",
>  		   LEAF_SIZE, TNODE_SIZE(0));
>  
> +	rcu_read_lock();
>  	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
>  		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
>  		struct fib_table *tb;
> @@ -2597,6 +2598,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
>  #endif
>  		}
>  	}
> +	rcu_read_unlock();
>  
>  	return 0;
>  }
> 

We probably want to be able to reschedule in the loop (adding a cond_resched() in net-next)

I would prefer :

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index ff0c24371e3309b3068980f46d1ed743337d2a3e..4b98ffb27136d3b43f179d6b1b42fe84586acc06 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2581,6 +2581,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
                struct hlist_head *head = &net->ipv4.fib_table_hash[h];
                struct fib_table *tb;
 
+               rcu_read_lock();
                hlist_for_each_entry_rcu(tb, head, tb_hlist) {
                        struct trie *t = (struct trie *) tb->tb_data;
                        struct trie_stat stat;
@@ -2596,6 +2597,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
                        trie_show_usage(seq, t->stats);
 #endif
                }
+               rcu_read_unlock();
        }
 
        return 0;
Qian Cai March 25, 2020, 5:34 p.m. UTC | #2
> On Mar 25, 2020, at 12:13 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> 
> I would prefer :
> 
> diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
> index ff0c24371e3309b3068980f46d1ed743337d2a3e..4b98ffb27136d3b43f179d6b1b42fe84586acc06 100644
> --- a/net/ipv4/fib_trie.c
> +++ b/net/ipv4/fib_trie.c
> @@ -2581,6 +2581,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
>                struct hlist_head *head = &net->ipv4.fib_table_hash[h];
>                struct fib_table *tb;
> 
> +               rcu_read_lock();
>                hlist_for_each_entry_rcu(tb, head, tb_hlist) {
>                        struct trie *t = (struct trie *) tb->tb_data;
>                        struct trie_stat stat;
> @@ -2596,6 +2597,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
>                        trie_show_usage(seq, t->stats);
> #endif
>                }
> +               rcu_read_unlock();
>        }
> 
>        return 0;

I have no strong opinion either way. My initial thought was to save 255 extra lock/unlock with a single lock/unlock, but I am not sure how time-consuming for each iteration of the outer loop could be. If it could take a bit too long, it does make a lot of sense to reduce the critical section.
Eric Dumazet March 25, 2020, 6:31 p.m. UTC | #3
On 3/25/20 10:34 AM, Qian Cai wrote:
> 
> 
>> On Mar 25, 2020, at 12:13 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>>
>> I would prefer :
>>
>> diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
>> index ff0c24371e3309b3068980f46d1ed743337d2a3e..4b98ffb27136d3b43f179d6b1b42fe84586acc06 100644
>> --- a/net/ipv4/fib_trie.c
>> +++ b/net/ipv4/fib_trie.c
>> @@ -2581,6 +2581,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
>>                struct hlist_head *head = &net->ipv4.fib_table_hash[h];
>>                struct fib_table *tb;
>>
>> +               rcu_read_lock();
>>                hlist_for_each_entry_rcu(tb, head, tb_hlist) {
>>                        struct trie *t = (struct trie *) tb->tb_data;
>>                        struct trie_stat stat;
>> @@ -2596,6 +2597,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
>>                        trie_show_usage(seq, t->stats);
>> #endif
>>                }
>> +               rcu_read_unlock();
>>        }
>>
>>        return 0;
> 
> I have no strong opinion either way. My initial thought was to save 255 extra lock/unlock with a single lock/unlock, but I am not sure how time-consuming for each iteration of the outer loop could be. If it could take a bit too long, it does make a lot of sense to reduce the critical section.
> 


This file could be quite big in some setups.

Alternatively you could use cond_resched_rcu()
diff mbox series

Patch

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index ff0c24371e33..73fa37476f03 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2577,6 +2577,7 @@  static int fib_triestat_seq_show(struct seq_file *seq, void *v)
 		   " %zd bytes, size of tnode: %zd bytes.\n",
 		   LEAF_SIZE, TNODE_SIZE(0));
 
+	rcu_read_lock();
 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
 		struct fib_table *tb;
@@ -2597,6 +2598,7 @@  static int fib_triestat_seq_show(struct seq_file *seq, void *v)
 #endif
 		}
 	}
+	rcu_read_unlock();
 
 	return 0;
 }