diff mbox

[2/3] netlink: Mark dumps as inconsistent which have been interrupted by a resize

Message ID cce004895ab6bcef267e2133a1c7ecab0fac403b.1421759056.git.tgraf@suug.ch
State Awaiting Upstream
Delegated to: Pablo Neira
Headers show

Commit Message

Thomas Graf Jan. 20, 2015, 1:20 p.m. UTC
A deferred resize of nl_table causes the offsets that Netlink diag keeps
to become inaccurate. Mark the dump as inconsistent and have user space
request a new dump.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
---
 net/netlink/af_netlink.c | 10 ++++++++++
 net/netlink/af_netlink.h |  1 +
 net/netlink/diag.c       |  1 +
 3 files changed, 12 insertions(+)

Comments

Ying Xue Jan. 21, 2015, 8:13 a.m. UTC | #1
On 01/20/2015 09:20 PM, Thomas Graf wrote:
> A deferred resize of nl_table causes the offsets that Netlink diag keeps
> to become inaccurate. Mark the dump as inconsistent and have user space
> request a new dump.
> 
> Signed-off-by: Thomas Graf <tgraf@suug.ch>
> ---
>  net/netlink/af_netlink.c | 10 ++++++++++
>  net/netlink/af_netlink.h |  1 +
>  net/netlink/diag.c       |  1 +
>  3 files changed, 12 insertions(+)
> 
> diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
> index 7a94185..e214557 100644
> --- a/net/netlink/af_netlink.c
> +++ b/net/netlink/af_netlink.c
> @@ -91,6 +91,9 @@ static inline int netlink_is_kernel(struct sock *sk)
>  struct netlink_table *nl_table;
>  EXPORT_SYMBOL_GPL(nl_table);
>  
> +atomic_t nl_table_seq;

It sounds like the atomic variable is not initialized.

Regards,
Ying

> +EXPORT_SYMBOL_GPL(nl_table_seq);
> +
>  static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
>  
>  static int netlink_dump(struct sock *sk);
> @@ -3071,6 +3074,12 @@ static const struct net_proto_family netlink_family_ops = {
>  	.owner	= THIS_MODULE,	/* for consistency 8) */
>  };
>  
> +static void nl_table_resize_notify(const struct rhashtable *ht,
> +				   enum rht_resize_op op)
> +{
> +	atomic_inc(&nl_table_seq);
> +}
> +
>  static int __net_init netlink_net_init(struct net *net)
>  {
>  #ifdef CONFIG_PROC_FS
> @@ -3124,6 +3133,7 @@ static int __init netlink_proto_init(void)
>  		.max_shift = 16, /* 64K */
>  		.grow_decision = rht_grow_above_75,
>  		.shrink_decision = rht_shrink_below_30,
> +		.resize_notify = nl_table_resize_notify,
>  	};
>  
>  	if (err != 0)
> diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
> index 7518375..51c23c0 100644
> --- a/net/netlink/af_netlink.h
> +++ b/net/netlink/af_netlink.h
> @@ -74,5 +74,6 @@ struct netlink_table {
>  
>  extern struct netlink_table *nl_table;
>  extern rwlock_t nl_table_lock;
> +extern atomic_t nl_table_seq;
>  
>  #endif
> diff --git a/net/netlink/diag.c b/net/netlink/diag.c
> index 3ee63a3..50aa385 100644
> --- a/net/netlink/diag.c
> +++ b/net/netlink/diag.c
> @@ -112,6 +112,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
>  	int ret = 0, num = 0, i;
>  
>  	req = nlmsg_data(cb->nlh);
> +	cb->seq = atomic_read(&nl_table_seq);
>  
>  	for (i = 0; i < htbl->size; i++) {
>  		struct rhash_head *pos;
> 

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Graf Jan. 21, 2015, 12:17 p.m. UTC | #2
On 01/21/15 at 04:13pm, Ying Xue wrote:
> On 01/20/2015 09:20 PM, Thomas Graf wrote:
> > A deferred resize of nl_table causes the offsets that Netlink diag keeps
> > to become inaccurate. Mark the dump as inconsistent and have user space
> > request a new dump.
> > 
> > Signed-off-by: Thomas Graf <tgraf@suug.ch>
> > ---
> >  net/netlink/af_netlink.c | 10 ++++++++++
> >  net/netlink/af_netlink.h |  1 +
> >  net/netlink/diag.c       |  1 +
> >  3 files changed, 12 insertions(+)
> > 
> > diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
> > index 7a94185..e214557 100644
> > --- a/net/netlink/af_netlink.c
> > +++ b/net/netlink/af_netlink.c
> > @@ -91,6 +91,9 @@ static inline int netlink_is_kernel(struct sock *sk)
> >  struct netlink_table *nl_table;
> >  EXPORT_SYMBOL_GPL(nl_table);
> >  
> > +atomic_t nl_table_seq;
> 
> It sounds like the atomic variable is not initialized.

Thanks for the review. We also need to avoid hitting 0 when we overflow
on a seq increment.  The netfilter code is doing this correctly but several
other users are suffering from this as well.

I'll address this in v2 together with the other discussed changes.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Herbert Xu Jan. 22, 2015, 8:49 a.m. UTC | #3
On Wed, Jan 21, 2015 at 12:17:48PM +0000, Thomas Graf wrote:
>
> Thanks for the review. We also need to avoid hitting 0 when we overflow
> on a seq increment.  The netfilter code is doing this correctly but several
> other users are suffering from this as well.
> 
> I'll address this in v2 together with the other discussed changes.

Could you hold off for a bit? I've got some changes that touch
this area that I'd like to push out.  Basically I'm trying to
eliminate direct access of rhashtable internals from the existing
users.

Thanks,
Herbert Xu Jan. 25, 2015, 11:20 p.m. UTC | #4
On Thu, Jan 22, 2015 at 07:49:24PM +1100, Herbert Xu wrote:
>
> Could you hold off for a bit? I've got some changes that touch
> this area that I'd like to push out.  Basically I'm trying to
> eliminate direct access of rhashtable internals from the existing
> users.

Here are the first two patches, one to add the primitives and one
to demonstrate its use in netlink.  In fact while testing this I
found that the existing netlink walking code is totally broken.

Cheers,
Herbert Xu Jan. 27, 2015, 11:19 p.m. UTC | #5
On Mon, Jan 26, 2015 at 10:20:40AM +1100, Herbert Xu wrote:
> 
> Here are the first two patches, one to add the primitives and one
> to demonstrate its use in netlink.  In fact while testing this I
> found that the existing netlink walking code is totally broken.

So this is a repost of the same thing.  These lockless iterators
won't be usable by netfilter as it stands because it wants to do
nested walks and also isn't currently lockless.

However, I'll wait for Patrick to post his pending patches to see
what exactly he needs before implementing helpers for that case.

Cheers,
David Miller Jan. 29, 2015, 10:42 p.m. UTC | #6
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 28 Jan 2015 10:19:50 +1100

> On Mon, Jan 26, 2015 at 10:20:40AM +1100, Herbert Xu wrote:
>> 
>> Here are the first two patches, one to add the primitives and one
>> to demonstrate its use in netlink.  In fact while testing this I
>> found that the existing netlink walking code is totally broken.
> 
> So this is a repost of the same thing.  These lockless iterators
> won't be usable by netfilter as it stands because it wants to do
> nested walks and also isn't currently lockless.
> 
> However, I'll wait for Patrick to post his pending patches to see
> what exactly he needs before implementing helpers for that case.

I'm holding off on this series for now.

Let me know if you want me to do something different.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Herbert Xu Jan. 31, 2015, 3:13 a.m. UTC | #7
On Thu, Jan 29, 2015 at 02:42:46PM -0800, David Miller wrote:
> 
> I'm holding off on this series for now.
> 
> Let me know if you want me to do something different.

No problems.  Here is a new version of these two patches which
hopefully should work on netfilter as well.

Note that the major functional difference compared to the previous
one is that resizes are now handled properly and will cause the
iterator to restart from scratch.

However, as with all existing hash table walker implementations,
if you remove an element from the chain that we're walking over
while we're waiting for user-space to give us a new buffer, then
the walk may miss elements that are still on that chain.

Thanks,
David Miller Feb. 3, 2015, 3:19 a.m. UTC | #8
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 31 Jan 2015 14:13:56 +1100

> On Thu, Jan 29, 2015 at 02:42:46PM -0800, David Miller wrote:
>> 
>> I'm holding off on this series for now.
>> 
>> Let me know if you want me to do something different.
> 
> No problems.  Here is a new version of these two patches which
> hopefully should work on netfilter as well.
> 
> Note that the major functional difference compared to the previous
> one is that resizes are now handled properly and will cause the
> iterator to restart from scratch.
> 
> However, as with all existing hash table walker implementations,
> if you remove an element from the chain that we're walking over
> while we're waiting for user-space to give us a new buffer, then
> the walk may miss elements that are still on that chain.

Series applied to net-next.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7a94185..e214557 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -91,6 +91,9 @@  static inline int netlink_is_kernel(struct sock *sk)
 struct netlink_table *nl_table;
 EXPORT_SYMBOL_GPL(nl_table);
 
+atomic_t nl_table_seq;
+EXPORT_SYMBOL_GPL(nl_table_seq);
+
 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
 
 static int netlink_dump(struct sock *sk);
@@ -3071,6 +3074,12 @@  static const struct net_proto_family netlink_family_ops = {
 	.owner	= THIS_MODULE,	/* for consistency 8) */
 };
 
+static void nl_table_resize_notify(const struct rhashtable *ht,
+				   enum rht_resize_op op)
+{
+	atomic_inc(&nl_table_seq);
+}
+
 static int __net_init netlink_net_init(struct net *net)
 {
 #ifdef CONFIG_PROC_FS
@@ -3124,6 +3133,7 @@  static int __init netlink_proto_init(void)
 		.max_shift = 16, /* 64K */
 		.grow_decision = rht_grow_above_75,
 		.shrink_decision = rht_shrink_below_30,
+		.resize_notify = nl_table_resize_notify,
 	};
 
 	if (err != 0)
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 7518375..51c23c0 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -74,5 +74,6 @@  struct netlink_table {
 
 extern struct netlink_table *nl_table;
 extern rwlock_t nl_table_lock;
+extern atomic_t nl_table_seq;
 
 #endif
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index 3ee63a3..50aa385 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -112,6 +112,7 @@  static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 	int ret = 0, num = 0, i;
 
 	req = nlmsg_data(cb->nlh);
+	cb->seq = atomic_read(&nl_table_seq);
 
 	for (i = 0; i < htbl->size; i++) {
 		struct rhash_head *pos;