diff mbox series

netfilter: ipset: ipset list may return wrong member count on bitmap types

Message ID 20220629212109.3045794-1-vpai@akamai.com
State Changes Requested
Delegated to: Jozsef Kadlecsik
Headers show
Series netfilter: ipset: ipset list may return wrong member count on bitmap types | expand

Commit Message

Vishwanath Pai June 29, 2022, 9:21 p.m. UTC
We fixed a similar problem before in commit 7f4f7dd4417d ("netfilter:
ipset: ipset list may return wrong member count for set with timeout").
The same issue exists in ip_set_bitmap_gen.h as well.

Test case:

$ ipset create test bitmap:ip range 10.0.0.0/8 netmask 24 timeout 5
$ ipset add test 10.0.0.0
$ ipset add test 10.255.255.255
$ sleep 5s

$ ipset list test
Name: test
Type: bitmap:ip
Revision: 3
Header: range 10.0.0.0-10.255.255.255 netmask 24 timeout 5
Size in memory: 532568
References: 0
Number of entries: 2
Members:

We return "Number of entries: 2" but no members are listed. That is
because when we run mtype_head the garbage collector hasn't run yet, but
mtype_list checks and cleans up members with expired timeout. To avoid
this we can run mtype_expire before printing the number of elements in
mytype_head().

Reviewed-by: Joshua Hunt <johunt@akamai.com>
Signed-off-by: Vishwanath Pai <vpai@akamai.com>
---
 net/netfilter/ipset/ip_set_bitmap_gen.h | 46 ++++++++++++++++++-------
 1 file changed, 34 insertions(+), 12 deletions(-)

Comments

Jozsef Kadlecsik July 12, 2022, 9:06 p.m. UTC | #1
Hi Vishwanath,

On Wed, 29 Jun 2022, Vishwanath Pai wrote:

> We fixed a similar problem before in commit 7f4f7dd4417d ("netfilter:
> ipset: ipset list may return wrong member count for set with timeout").
> The same issue exists in ip_set_bitmap_gen.h as well.

Could you rework the patch to solve the issue the same way as for the hash 
types (i.e. scanning the set without locking) like in the commit 
33f08da28324 (netfilter: ipset: Fix "INFO: rcu detected stall in hash_xxx" 
reports)? I know the bitmap types have got a limited size but it'd be 
great if the general method would be the same across the different types.

Best regards,
Jozsef
 
> Test case:
> 
> $ ipset create test bitmap:ip range 10.0.0.0/8 netmask 24 timeout 5
> $ ipset add test 10.0.0.0
> $ ipset add test 10.255.255.255
> $ sleep 5s
> 
> $ ipset list test
> Name: test
> Type: bitmap:ip
> Revision: 3
> Header: range 10.0.0.0-10.255.255.255 netmask 24 timeout 5
> Size in memory: 532568
> References: 0
> Number of entries: 2
> Members:
> 
> We return "Number of entries: 2" but no members are listed. That is
> because when we run mtype_head the garbage collector hasn't run yet, but
> mtype_list checks and cleans up members with expired timeout. To avoid
> this we can run mtype_expire before printing the number of elements in
> mytype_head().
> 
> Reviewed-by: Joshua Hunt <johunt@akamai.com>
> Signed-off-by: Vishwanath Pai <vpai@akamai.com>
> ---
>  net/netfilter/ipset/ip_set_bitmap_gen.h | 46 ++++++++++++++++++-------
>  1 file changed, 34 insertions(+), 12 deletions(-)
> 
> diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
> index 26ab0e9612d8..dd871305bd6e 100644
> --- a/net/netfilter/ipset/ip_set_bitmap_gen.h
> +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
> @@ -28,6 +28,7 @@
>  #define mtype_del		IPSET_TOKEN(MTYPE, _del)
>  #define mtype_list		IPSET_TOKEN(MTYPE, _list)
>  #define mtype_gc		IPSET_TOKEN(MTYPE, _gc)
> +#define mtype_expire		IPSET_TOKEN(MTYPE, _expire)
>  #define mtype			MTYPE
>  
>  #define get_ext(set, map, id)	((map)->extensions + ((set)->dsize * (id)))
> @@ -88,13 +89,44 @@ mtype_memsize(const struct mtype *map, size_t dsize)
>  	       map->elements * dsize;
>  }
>  
> +/* We should grab set->lock before calling this function */
> +static void
> +mtype_expire(struct ip_set *set)
> +{
> +	struct mtype *map = set->data;
> +	void *x;
> +	u32 id;
> +
> +	for (id = 0; id < map->elements; id++)
> +		if (mtype_gc_test(id, map, set->dsize)) {
> +			x = get_ext(set, map, id);
> +			if (ip_set_timeout_expired(ext_timeout(x, set))) {
> +				clear_bit(id, map->members);
> +				ip_set_ext_destroy(set, x);
> +				set->elements--;
> +			}
> +		}
> +}
> +
>  static int
>  mtype_head(struct ip_set *set, struct sk_buff *skb)
>  {
>  	const struct mtype *map = set->data;
>  	struct nlattr *nested;
> -	size_t memsize = mtype_memsize(map, set->dsize) + set->ext_size;
> +	size_t memsize;
> +
> +	/* If any members have expired, set->elements will be wrong,
> +	 * mytype_expire function will update it with the right count.
> +	 * set->elements can still be incorrect in the case of a huge set
> +	 * because elements can timeout during set->list().
> +	 */
> +	if (SET_WITH_TIMEOUT(set)) {
> +		spin_lock_bh(&set->lock);
> +		mtype_expire(set);
> +		spin_unlock_bh(&set->lock);
> +	}
>  
> +	memsize = mtype_memsize(map, set->dsize) + set->ext_size;
>  	nested = nla_nest_start(skb, IPSET_ATTR_DATA);
>  	if (!nested)
>  		goto nla_put_failure;
> @@ -266,22 +298,12 @@ mtype_gc(struct timer_list *t)
>  {
>  	struct mtype *map = from_timer(map, t, gc);
>  	struct ip_set *set = map->set;
> -	void *x;
> -	u32 id;
>  
>  	/* We run parallel with other readers (test element)
>  	 * but adding/deleting new entries is locked out
>  	 */
>  	spin_lock_bh(&set->lock);
> -	for (id = 0; id < map->elements; id++)
> -		if (mtype_gc_test(id, map, set->dsize)) {
> -			x = get_ext(set, map, id);
> -			if (ip_set_timeout_expired(ext_timeout(x, set))) {
> -				clear_bit(id, map->members);
> -				ip_set_ext_destroy(set, x);
> -				set->elements--;
> -			}
> -		}
> +	mtype_expire(set);
>  	spin_unlock_bh(&set->lock);
>  
>  	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
> -- 
> 2.25.1
> 
> 

-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.hu
PGP key : https://wigner.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics
          H-1525 Budapest 114, POB. 49, Hungary
Vishwanath Pai July 14, 2022, 12:08 a.m. UTC | #2
On 7/12/22 17:06, Jozsef Kadlecsik wrote:
> Hi Vishwanath,
> 
> On Wed, 29 Jun 2022, Vishwanath Pai wrote:
> 
>> We fixed a similar problem before in commit 7f4f7dd4417d ("netfilter:
>> ipset: ipset list may return wrong member count for set with timeout").
>> The same issue exists in ip_set_bitmap_gen.h as well.
> 
> Could you rework the patch to solve the issue the same way as for the hash 
> types (i.e. scanning the set without locking) like in the commit 
> 33f08da28324 (netfilter: ipset: Fix "INFO: rcu detected stall in hash_xxx" 
> reports)? I know the bitmap types have got a limited size but it'd be 
> great if the general method would be the same across the different types.
> 
> Best regards,
> Jozsef
>  
>> Test case:
>>
>> $ ipset create test bitmap:ip range 10.0.0.0/8 netmask 24 timeout 5
>> $ ipset add test 10.0.0.0
>> $ ipset add test 10.255.255.255
>> $ sleep 5s
>>
>> $ ipset list test
>> Name: test
>> Type: bitmap:ip
>> Revision: 3
>> Header: range 10.0.0.0-10.255.255.255 netmask 24 timeout 5
>> Size in memory: 532568
>> References: 0
>> Number of entries: 2
>> Members:
>>
>> We return "Number of entries: 2" but no members are listed. That is
>> because when we run mtype_head the garbage collector hasn't run yet, but
>> mtype_list checks and cleans up members with expired timeout. To avoid
>> this we can run mtype_expire before printing the number of elements in
>> mytype_head().
>>
>> Reviewed-by: Joshua Hunt <johunt@akamai.com>
>> Signed-off-by: Vishwanath Pai <vpai@akamai.com>
>> ---
>>  net/netfilter/ipset/ip_set_bitmap_gen.h | 46 ++++++++++++++++++-------
>>  1 file changed, 34 insertions(+), 12 deletions(-)
>>
>> diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
>> index 26ab0e9612d8..dd871305bd6e 100644
>> --- a/net/netfilter/ipset/ip_set_bitmap_gen.h
>> +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
>> @@ -28,6 +28,7 @@
>>  #define mtype_del		IPSET_TOKEN(MTYPE, _del)
>>  #define mtype_list		IPSET_TOKEN(MTYPE, _list)
>>  #define mtype_gc		IPSET_TOKEN(MTYPE, _gc)
>> +#define mtype_expire		IPSET_TOKEN(MTYPE, _expire)
>>  #define mtype			MTYPE
>>  
>>  #define get_ext(set, map, id)	((map)->extensions + ((set)->dsize * (id)))
>> @@ -88,13 +89,44 @@ mtype_memsize(const struct mtype *map, size_t dsize)
>>  	       map->elements * dsize;
>>  }
>>  
>> +/* We should grab set->lock before calling this function */
>> +static void
>> +mtype_expire(struct ip_set *set)
>> +{
>> +	struct mtype *map = set->data;
>> +	void *x;
>> +	u32 id;
>> +
>> +	for (id = 0; id < map->elements; id++)
>> +		if (mtype_gc_test(id, map, set->dsize)) {
>> +			x = get_ext(set, map, id);
>> +			if (ip_set_timeout_expired(ext_timeout(x, set))) {
>> +				clear_bit(id, map->members);
>> +				ip_set_ext_destroy(set, x);
>> +				set->elements--;
>> +			}
>> +		}
>> +}
>> +
>>  static int
>>  mtype_head(struct ip_set *set, struct sk_buff *skb)
>>  {
>>  	const struct mtype *map = set->data;
>>  	struct nlattr *nested;
>> -	size_t memsize = mtype_memsize(map, set->dsize) + set->ext_size;
>> +	size_t memsize;
>> +
>> +	/* If any members have expired, set->elements will be wrong,
>> +	 * mytype_expire function will update it with the right count.
>> +	 * set->elements can still be incorrect in the case of a huge set
>> +	 * because elements can timeout during set->list().
>> +	 */
>> +	if (SET_WITH_TIMEOUT(set)) {
>> +		spin_lock_bh(&set->lock);
>> +		mtype_expire(set);
>> +		spin_unlock_bh(&set->lock);
>> +	}
>>  
>> +	memsize = mtype_memsize(map, set->dsize) + set->ext_size;
>>  	nested = nla_nest_start(skb, IPSET_ATTR_DATA);
>>  	if (!nested)
>>  		goto nla_put_failure;
>> @@ -266,22 +298,12 @@ mtype_gc(struct timer_list *t)
>>  {
>>  	struct mtype *map = from_timer(map, t, gc);
>>  	struct ip_set *set = map->set;
>> -	void *x;
>> -	u32 id;
>>  
>>  	/* We run parallel with other readers (test element)
>>  	 * but adding/deleting new entries is locked out
>>  	 */
>>  	spin_lock_bh(&set->lock);
>> -	for (id = 0; id < map->elements; id++)
>> -		if (mtype_gc_test(id, map, set->dsize)) {
>> -			x = get_ext(set, map, id);
>> -			if (ip_set_timeout_expired(ext_timeout(x, set))) {
>> -				clear_bit(id, map->members);
>> -				ip_set_ext_destroy(set, x);
>> -				set->elements--;
>> -			}
>> -		}
>> +	mtype_expire(set);
>>  	spin_unlock_bh(&set->lock);
>>  
>>  	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
>> -- 
>> 2.25.1
>>
>>
> 
> -
> E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.hu
> PGP key : https://urldefense.com/v3/__https://wigner.hu/*kadlec/pgp_public_key.txt__;fg!!GjvTz_vk!WmVWx08Jyxg7z7R4rXMUupSXptToGLCMfZ7GhOy1yP_Ty0YQZFSbPbvlocRcFRBnQqy787ijhdI-ig$ 
> Address : Wigner Research Centre for Physics
>           H-1525 Budapest 114, POB. 49, Hungary

Hi Jozsef,

Sure, I'll give it a try. It might take me a bit longer cause it looks like a
complex set of changes.

Thanks,
Vishwanath
Jozsef Kadlecsik July 14, 2022, 6:08 a.m. UTC | #3
On Wed, 13 Jul 2022, Vishwanath Pai wrote:

> On 7/12/22 17:06, Jozsef Kadlecsik wrote:
> > 
> > On Wed, 29 Jun 2022, Vishwanath Pai wrote:
> > 
> >> We fixed a similar problem before in commit 7f4f7dd4417d ("netfilter:
> >> ipset: ipset list may return wrong member count for set with timeout").
> >> The same issue exists in ip_set_bitmap_gen.h as well.
> > 
> > Could you rework the patch to solve the issue the same way as for the hash 
> > types (i.e. scanning the set without locking) like in the commit 
> > 33f08da28324 (netfilter: ipset: Fix "INFO: rcu detected stall in hash_xxx" 
> > reports)? I know the bitmap types have got a limited size but it'd be 
> > great if the general method would be the same across the different types.
>
> Sure, I'll give it a try. It might take me a bit longer cause it looks like a
> complex set of changes.

I meant not the whole commit 33f08da28324, of course there is no need for 
region locking at the bitmap types. Just to count non-expired elements 
(and extension size) so that it's a reader function and doesn't need 
locking.

Best regards,
Jozsef
-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.hu
PGP key : https://wigner.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics
          H-1525 Budapest 114, POB. 49, Hungary
diff mbox series

Patch

diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 26ab0e9612d8..dd871305bd6e 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -28,6 +28,7 @@ 
 #define mtype_del		IPSET_TOKEN(MTYPE, _del)
 #define mtype_list		IPSET_TOKEN(MTYPE, _list)
 #define mtype_gc		IPSET_TOKEN(MTYPE, _gc)
+#define mtype_expire		IPSET_TOKEN(MTYPE, _expire)
 #define mtype			MTYPE
 
 #define get_ext(set, map, id)	((map)->extensions + ((set)->dsize * (id)))
@@ -88,13 +89,44 @@  mtype_memsize(const struct mtype *map, size_t dsize)
 	       map->elements * dsize;
 }
 
+/* We should grab set->lock before calling this function */
+static void
+mtype_expire(struct ip_set *set)
+{
+	struct mtype *map = set->data;
+	void *x;
+	u32 id;
+
+	for (id = 0; id < map->elements; id++)
+		if (mtype_gc_test(id, map, set->dsize)) {
+			x = get_ext(set, map, id);
+			if (ip_set_timeout_expired(ext_timeout(x, set))) {
+				clear_bit(id, map->members);
+				ip_set_ext_destroy(set, x);
+				set->elements--;
+			}
+		}
+}
+
 static int
 mtype_head(struct ip_set *set, struct sk_buff *skb)
 {
 	const struct mtype *map = set->data;
 	struct nlattr *nested;
-	size_t memsize = mtype_memsize(map, set->dsize) + set->ext_size;
+	size_t memsize;
+
+	/* If any members have expired, set->elements will be wrong,
+	 * mytype_expire function will update it with the right count.
+	 * set->elements can still be incorrect in the case of a huge set
+	 * because elements can timeout during set->list().
+	 */
+	if (SET_WITH_TIMEOUT(set)) {
+		spin_lock_bh(&set->lock);
+		mtype_expire(set);
+		spin_unlock_bh(&set->lock);
+	}
 
+	memsize = mtype_memsize(map, set->dsize) + set->ext_size;
 	nested = nla_nest_start(skb, IPSET_ATTR_DATA);
 	if (!nested)
 		goto nla_put_failure;
@@ -266,22 +298,12 @@  mtype_gc(struct timer_list *t)
 {
 	struct mtype *map = from_timer(map, t, gc);
 	struct ip_set *set = map->set;
-	void *x;
-	u32 id;
 
 	/* We run parallel with other readers (test element)
 	 * but adding/deleting new entries is locked out
 	 */
 	spin_lock_bh(&set->lock);
-	for (id = 0; id < map->elements; id++)
-		if (mtype_gc_test(id, map, set->dsize)) {
-			x = get_ext(set, map, id);
-			if (ip_set_timeout_expired(ext_timeout(x, set))) {
-				clear_bit(id, map->members);
-				ip_set_ext_destroy(set, x);
-				set->elements--;
-			}
-		}
+	mtype_expire(set);
 	spin_unlock_bh(&set->lock);
 
 	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;