[v2] ipset: list:set: Decrease refcount synchronously on deletion and replace

Message ID dc95106369592147aac026d39212fe960cc22fe1.1530895281.git.sbrivio@redhat.com
State Changes Requested
Delegated to: Jozsef Kadlecsik
Headers show
Series
  • [v2] ipset: list:set: Decrease refcount synchronously on deletion and replace
Related show

Commit Message

Stefano Brivio July 8, 2018, 9:46 a.m.
Commit 45040978c899 ("netfilter: ipset: Fix set:list type crash
when flush/dump set in parallel") postponed decreasing set
reference counters to the RCU callback.

An 'ipset del' command can terminate before the RCU grace period
is elapsed, and if sets are listed before then, the reference
counter shown in userspace will be wrong:

 # ipset create h hash:ip; ipset create l list:set; ipset add l
 # ipset del l h; ipset list h
 Name: h
 Type: hash:ip
 Revision: 4
 Header: family inet hashsize 1024 maxelem 65536
 Size in memory: 88
 References: 1
 Number of entries: 0
 Members:
 # sleep 1; ipset list h
 Name: h
 Type: hash:ip
 Revision: 4
 Header: family inet hashsize 1024 maxelem 65536
 Size in memory: 88
 References: 0
 Number of entries: 0
 Members:

Fix this by making the reference count update synchronous again.

As a result, when sets are listed, ip_set_name_byindex() might
now fetch a set whose reference count is already zero. Instead
of relying on the reference count to protect against concurrent
set renaming, grab the netlink refcount while copying the name,
and avoid renaming if the netlink refcount is taken, in a
similar way as it's already done for swap and delete operations.

This also reverts commit db268d4dfdb9 ("ipset: remove unused
function __ip_set_get_netlink") and restores that function
since we now need it.

Reported-by: Li Shuang <shuali@redhat.com>
Fixes: 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
---
v2: As pointed out by Joszef, we can't assume that the nfnl mutex
    is taken while dumping, so we need some explicit locking
    to protect names of list:set members against changes

 include/linux/netfilter/ipset/ip_set.h |  2 +-
 net/netfilter/ipset/ip_set_core.c      | 31 +++++++++++++++++++++----------
 net/netfilter/ipset/ip_set_list_set.c  | 17 +++++++++++------
 3 files changed, 33 insertions(+), 17 deletions(-)

Comments

Jozsef Kadlecsik July 9, 2018, 8:36 p.m. | #1
Hi Stefano,

On Sun, 8 Jul 2018, Stefano Brivio wrote:

> Commit 45040978c899 ("netfilter: ipset: Fix set:list type crash
> when flush/dump set in parallel") postponed decreasing set
> reference counters to the RCU callback.
> 
> An 'ipset del' command can terminate before the RCU grace period
> is elapsed, and if sets are listed before then, the reference
> counter shown in userspace will be wrong:
> 
>  # ipset create h hash:ip; ipset create l list:set; ipset add l
>  # ipset del l h; ipset list h
>  Name: h
>  Type: hash:ip
>  Revision: 4
>  Header: family inet hashsize 1024 maxelem 65536
>  Size in memory: 88
>  References: 1
>  Number of entries: 0
>  Members:
>  # sleep 1; ipset list h
>  Name: h
>  Type: hash:ip
>  Revision: 4
>  Header: family inet hashsize 1024 maxelem 65536
>  Size in memory: 88
>  References: 0
>  Number of entries: 0
>  Members:
> 
> Fix this by making the reference count update synchronous again.
> 
> As a result, when sets are listed, ip_set_name_byindex() might
> now fetch a set whose reference count is already zero. Instead
> of relying on the reference count to protect against concurrent
> set renaming, grab the netlink refcount while copying the name,
> and avoid renaming if the netlink refcount is taken, in a
> similar way as it's already done for swap and delete operations.
> 
> This also reverts commit db268d4dfdb9 ("ipset: remove unused
> function __ip_set_get_netlink") and restores that function
> since we now need it.
> 
> Reported-by: Li Shuang <shuali@redhat.com>
> Fixes: 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel")
> Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
> ---
> v2: As pointed out by Joszef, we can't assume that the nfnl mutex
>     is taken while dumping, so we need some explicit locking
>     to protect names of list:set members against changes
> 
>  include/linux/netfilter/ipset/ip_set.h |  2 +-
>  net/netfilter/ipset/ip_set_core.c      | 31 +++++++++++++++++++++----------
>  net/netfilter/ipset/ip_set_list_set.c  | 17 +++++++++++------
>  3 files changed, 33 insertions(+), 17 deletions(-)
> 
> diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
> index 34fc80f3eb90..1d100efe74ec 100644
> --- a/include/linux/netfilter/ipset/ip_set.h
> +++ b/include/linux/netfilter/ipset/ip_set.h
> @@ -314,7 +314,7 @@ enum {
>  extern ip_set_id_t ip_set_get_byname(struct net *net,
>  				     const char *name, struct ip_set **set);
>  extern void ip_set_put_byindex(struct net *net, ip_set_id_t index);
> -extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index);
> +extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name);
>  extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index);
>  extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index);
>  
> diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
> index bc4bd247bb7d..4062c0396a02 100644
> --- a/net/netfilter/ipset/ip_set_core.c
> +++ b/net/netfilter/ipset/ip_set_core.c
> @@ -527,6 +527,15 @@ __ip_set_put(struct ip_set *set)
>  /* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need
>   * a separate reference counter
>   */
> +static inline void
> +__ip_set_get_netlink(struct ip_set *set)
> +{
> +	write_lock_bh(&ip_set_ref_lock);
> +	BUG_ON(set->ref_netlink != 0);
> +	set->ref_netlink++;
> +	write_unlock_bh(&ip_set_ref_lock);
> +}
> +
>  static inline void
>  __ip_set_put_netlink(struct ip_set *set)
>  {
> @@ -693,21 +702,19 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index)
>  EXPORT_SYMBOL_GPL(ip_set_put_byindex);
>  
>  /* Get the name of a set behind a set index.
> - * We assume the set is referenced, so it does exist and
> - * can't be destroyed. The set cannot be renamed due to
> - * the referencing either.
> - *
> + * Set itself is protected by RCU, but its name isn't: to protect against
> + * renaming, grab its netlink refcount (see ip_set_rename()) and copy it.
>   */
> -const char *
> -ip_set_name_byindex(struct net *net, ip_set_id_t index)
> +void
> +ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
>  {
> -	const struct ip_set *set = ip_set_rcu_get(net, index);
> +	struct ip_set *set = ip_set_rcu_get(net, index);
>  
>  	BUG_ON(!set);
> -	BUG_ON(set->ref == 0);
>  
> -	/* Referenced, so it's safe */
> -	return set->name;
> +	__ip_set_get_netlink(set);
> +	strncpy(name, set->name, IPSET_MAXNAMELEN);
> +	__ip_set_put_netlink(set);
>  }
>  EXPORT_SYMBOL_GPL(ip_set_name_byindex);

These are not required: when we call ip_set_name_byindex(), the set is 
still referenced (set->ref cannot be zero), because it's the member of the
list type of set.
  
> @@ -1158,6 +1165,10 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
>  		ret = -IPSET_ERR_REFERENCED;
>  		goto out;
>  	}
> +	if (set->ref_netlink != 0) {
> +		ret = -EBUSY;
> +		goto out;
> +	}

This is also not required: the set is protected by the normal reference 
counter, so it cannot be renamed. When the set is just deleted from the 
list type of set, it can again be renamed :-).
  
>  	name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
>  	for (i = 0; i < inst->ip_set_max; i++) {
> diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
> index 072a658fde04..3a39f20ba6fd 100644
> --- a/net/netfilter/ipset/ip_set_list_set.c
> +++ b/net/netfilter/ipset/ip_set_list_set.c
> @@ -148,9 +148,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
>  {
>  	struct set_elem *e = container_of(rcu, struct set_elem, rcu);
>  	struct ip_set *set = e->set;
> -	struct list_set *map = set->data;
>  
> -	ip_set_put_byindex(map->net, e->id);
>  	ip_set_ext_destroy(set, e);
>  	kfree(e);
>  }
> @@ -158,14 +156,20 @@ __list_set_del_rcu(struct rcu_head * rcu)
>  static inline void
>  list_set_del(struct ip_set *set, struct set_elem *e)
>  {
> +	struct list_set *map = set->data;
> +
>  	set->elements--;
> +	ip_set_put_byindex(map->net, e->id);
>  	list_del_rcu(&e->list);
>  	call_rcu(&e->rcu, __list_set_del_rcu);
>  }
>  
>  static inline void
> -list_set_replace(struct set_elem *e, struct set_elem *old)
> +list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
>  {
> +	struct list_set *map = set->data;
> +
> +	ip_set_put_byindex(map->net, old->id);
>  	list_replace_rcu(&old->list, &e->list);
>  	call_rcu(&old->rcu, __list_set_del_rcu);
>  }
> @@ -298,7 +302,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
>  	INIT_LIST_HEAD(&e->list);
>  	list_set_init_extensions(set, ext, e);
>  	if (n)
> -		list_set_replace(e, n);
> +		list_set_replace(set, e, n);
>  	else if (next)
>  		list_add_tail_rcu(&e->list, &next->list);
>  	else if (prev)

I believe the only modifications which are required are up till this point 
in ip_set_list_set.c: ip_set_put_byindex(), moved outside of the 
call_rcu() call. However, the ip_set_put_byindex() calls should be placed 
after list_del_rcu() and list_replace_rcu().

> @@ -486,6 +490,7 @@ list_set_list(const struct ip_set *set,
>  	const struct list_set *map = set->data;
>  	struct nlattr *atd, *nested;
>  	u32 i = 0, first = cb->args[IPSET_CB_ARG0];
> +	char name[IPSET_MAXNAMELEN];
>  	struct set_elem *e;
>  	int ret = 0;
>  
> @@ -504,8 +509,8 @@ list_set_list(const struct ip_set *set,
>  		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
>  		if (!nested)
>  			goto nla_put_failure;
> -		if (nla_put_string(skb, IPSET_ATTR_NAME,
> -				   ip_set_name_byindex(map->net, e->id)))
> +		ip_set_name_byindex(map->net, e->id, name);
> +		if (nla_put_string(skb, IPSET_ATTR_NAME, name))
>  			goto nla_put_failure;
>  		if (ip_set_put_extensions(skb, set, e, true))
>  			goto nla_put_failure;

And these lines are not required either.

Best regards,
Jozsef
-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stefano Brivio July 9, 2018, 9:37 p.m. | #2
Hi Jozsef,

On Mon, 9 Jul 2018 22:36:51 +0200 (CEST)
Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> wrote:

> I believe the only modifications which are required are up till this point 
> in ip_set_list_set.c: ip_set_put_byindex(), moved outside of the 
> call_rcu() call. However, the ip_set_put_byindex() calls should be placed 
> after list_del_rcu() and list_replace_rcu().

This was also the first approach I took. With the changes you
suggested, I guess this should look like the diff at [1]. Is this what
you meant?

However, list, flush and destroy are not serialised, so, with that
patch, I do actually hit the BUG_ON(set->ref == 0) in
ip_set_name_byindex(). That's why I think I can't rely on set->ref
anymore with those changes:

# while true; do ipset create h hash:ip; ipset create l list:set; ipset add l h; ipset flush; ipset destroy; done &
# while true; do ipset list >/dev/null; done

[wait a few minutes]

[  743.473640] ------------[ cut here ]------------
[  743.477645] kernel BUG at net/netfilter/ipset/ip_set_core.c:707!
[  743.478954] invalid opcode: 0000 [#1] SMP NOPTI
[  743.479919] CPU: 1 PID: 22243 Comm: ipset Not tainted 4.18.0-rc3+ #94
[  743.481273] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
[  743.482677] RIP: 0010:ip_set_name_byindex+0x30/0x40 [ip_set]
[  743.483456] Code: 15 7d 69 00 00 48 8b 87 d0 13 00 00 0f b7 f6 48 8b 04 d0 48 8b 00 48 8b 04 f0 48 85 c0 74 09 8b 50 24 85 d2 74 04 f3 c3 0f 0b <0f> 0b 0f 1f 40 00 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 53 
[  743.486075] RSP: 0018:ffffb5acc2f03b50 EFLAGS: 00010246
[  743.486799] RAX: ffff92fbaf93e900 RBX: ffff92fbba1edf00 RCX: 0000000000000000
[  743.487748] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffa85daf80
[  743.488691] RBP: ffff92fb8bf11b00 R08: 0000000000000004 R09: ffff92fbafa18f30
[  743.489649] R10: 0000000000000607 R11: ffff92fbaa86f000 R12: ffff92fbafa18f2c
[  743.490591] R13: ffff92fbaf93e240 R14: ffff92fbad8e9a80 R15: 0000000000000000
[  743.491535] FS:  00007f3d6a460740(0000) GS:ffff92fbbfc80000(0000) knlGS:0000000000000000
[  743.492601] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  743.493378] CR2: 00007ffd9e70b0a8 CR3: 0000000107322000 CR4: 00000000000006e0
[  743.494339] Call Trace:
[  743.494677]  list_set_list+0xfe/0x242 [ip_set_list_set]
[  743.495370]  ip_set_dump_start+0x54c/0x6c0 [ip_set]
[  743.496017]  ? __kmalloc_reserve.isra.46+0x2e/0x80
[  743.496649]  ? __alloc_skb+0x96/0x1e0
[  743.497152]  netlink_dump+0x106/0x2b0
[  743.497640]  netlink_recvmsg+0x332/0x430
[  743.498163]  ___sys_recvmsg+0xf5/0x240
[  743.498662]  ? netlink_sendmsg+0x120/0x3a0
[  743.499212]  ? __sys_sendto+0x10e/0x140
[  743.499720]  ? __sys_recvmsg+0x5b/0xa0
[  743.500224]  __sys_recvmsg+0x5b/0xa0
[  743.500704]  do_syscall_64+0x5b/0x180
[  743.501210]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  743.501876] RIP: 0033:0x7f3d69b52df0
[  743.502350] Code: 48 3d 01 f0 ff ff 73 01 c3 48 8b 0d 9a 70 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 83 3d ad d1 2c 00 00 75 10 b8 2f 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be cd 00 00 48 89 04 24 
[  743.504825] RSP: 002b:00007ffd9e64b888 EFLAGS: 00000246 ORIG_RAX: 000000000000002f
[  743.505825] RAX: ffffffffffffffda RBX: ffffffffffffffff RCX: 00007f3d69b52df0
[  743.506769] RDX: 0000000000000000 RSI: 00007ffd9e64b8b0 RDI: 0000000000000003
[  743.507709] RBP: 0000000001aff4b8 R08: 00007f3d69851080 R09: 000000000000000c
[  743.508648] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000001000
[  743.509601] R13: 0000000001b004c0 R14: 0000000000000000 R15: 0000000000000037
[  743.510544] Modules linked in: ip_set_list_set ip_set_hash_ip ip_set nfnetlink sunrpc qxl ttm drm_kms_helper snd_hda_codec_generic syscopyarea sysfillrect snd_hda_intel sysimgblt fb_sys_fops snd_hda_codec drm edac_mce_amd snd_hda_core snd_hwdep joydev snd_pcm snd_timer snd soundcore pcspkr virtio_balloon i2c_piix4 ip_tables xfs libcrc32c ata_generic pata_acpi virtio_net net_failover virtio_blk failover virtio_console ata_piix serio_raw libata virtio_pci floppy virtio_ring virtio
[  743.516760] ---[ end trace ae334fe5735f7b30 ]---
[  743.518014] RIP: 0010:ip_set_name_byindex+0x30/0x40 [ip_set]
[  743.519397] Code: 15 7d 69 00 00 48 8b 87 d0 13 00 00 0f b7 f6 48 8b 04 d0 48 8b 00 48 8b 04 f0 48 85 c0 74 09 8b 50 24 85 d2 74 04 f3 c3 0f 0b <0f> 0b 0f 1f 40 00 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 53 
[  743.523177] RSP: 0018:ffffb5acc2f03b50 EFLAGS: 00010246
[  743.524519] RAX: ffff92fbaf93e900 RBX: ffff92fbba1edf00 RCX: 0000000000000000
[  743.526114] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffa85daf80
[  743.527701] RBP: ffff92fb8bf11b00 R08: 0000000000000004 R09: ffff92fbafa18f30
[  743.529292] R10: 0000000000000607 R11: ffff92fbaa86f000 R12: ffff92fbafa18f2c
[  743.530867] R13: ffff92fbaf93e240 R14: ffff92fbad8e9a80 R15: 0000000000000000
[  743.532438] FS:  00007f3d6a460740(0000) GS:ffff92fbbfc80000(0000) knlGS:0000000000000000
[  743.534141] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  743.535512] CR2: 00007ffd9e70b0a8 CR3: 0000000107322000 CR4: 00000000000006e0
[  743.537097] Kernel panic - not syncing: Fatal exception

Message from syslogd@localhost at Jul  9 17:26:29 ...
 kernel:Kernel panic - not syncing: Fatal exception
[  743.545671] Kernel Offset: 0x26400000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
[  743.547672] ---[ end Kernel panic - not syncing: Fatal exception ]---

[1]
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 072a658fde04..9a2392695d34 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -148,9 +148,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
 {
        struct set_elem *e = container_of(rcu, struct set_elem, rcu);
        struct ip_set *set = e->set;
-       struct list_set *map = set->data;
 
-       ip_set_put_byindex(map->net, e->id);
        ip_set_ext_destroy(set, e);
        kfree(e);
 }
@@ -158,15 +156,21 @@ __list_set_del_rcu(struct rcu_head * rcu)
 static inline void
 list_set_del(struct ip_set *set, struct set_elem *e)
 {
+       struct list_set *map = set->data;
+
        set->elements--;
        list_del_rcu(&e->list);
+       ip_set_put_byindex(map->net, e->id);
        call_rcu(&e->rcu, __list_set_del_rcu);
 }
 
 static inline void
-list_set_replace(struct set_elem *e, struct set_elem *old)
+list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
 {
+       struct list_set *map = set->data;
+
        list_replace_rcu(&old->list, &e->list);
+       ip_set_put_byindex(map->net, old->id);
        call_rcu(&old->rcu, __list_set_del_rcu);
 }
 
@@ -298,7 +302,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
        INIT_LIST_HEAD(&e->list);
        list_set_init_extensions(set, ext, e);
        if (n)
-               list_set_replace(e, n);
+               list_set_replace(set, e, n);
        else if (next)
                list_add_tail_rcu(&e->list, &next->list);
        else if (prev)
Jozsef Kadlecsik July 14, 2018, 12:12 p.m. | #3
Hi Stefano,

On Mon, 9 Jul 2018, Stefano Brivio wrote:

> On Mon, 9 Jul 2018 22:36:51 +0200 (CEST)
> Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> wrote:
> 
> > I believe the only modifications which are required are up till this point 
> > in ip_set_list_set.c: ip_set_put_byindex(), moved outside of the 
> > call_rcu() call. However, the ip_set_put_byindex() calls should be placed 
> > after list_del_rcu() and list_replace_rcu().
> 
> This was also the first approach I took. With the changes you
> suggested, I guess this should look like the diff at [1]. Is this what
> you meant?
> 
> However, list, flush and destroy are not serialised, so, with that
> patch, I do actually hit the BUG_ON(set->ref == 0) in
> ip_set_name_byindex(). That's why I think I can't rely on set->ref
> anymore with those changes:

Yes, you're right. I had some concerns with the second version of your 
patch and therefore looked for another way to solve the issue, badly.

So, back to the second version, see my comments in my next mail.

Best regards,
Jozsef
-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jozsef Kadlecsik July 14, 2018, 12:23 p.m. | #4
Hi Stefano,

On Sun, 8 Jul 2018, Stefano Brivio wrote:

> Commit 45040978c899 ("netfilter: ipset: Fix set:list type crash
> when flush/dump set in parallel") postponed decreasing set
> reference counters to the RCU callback.
> 
> An 'ipset del' command can terminate before the RCU grace period
> is elapsed, and if sets are listed before then, the reference
> counter shown in userspace will be wrong:
> 
>  # ipset create h hash:ip; ipset create l list:set; ipset add l
>  # ipset del l h; ipset list h
>  Name: h
>  Type: hash:ip
>  Revision: 4
>  Header: family inet hashsize 1024 maxelem 65536
>  Size in memory: 88
>  References: 1
>  Number of entries: 0
>  Members:
>  # sleep 1; ipset list h
>  Name: h
>  Type: hash:ip
>  Revision: 4
>  Header: family inet hashsize 1024 maxelem 65536
>  Size in memory: 88
>  References: 0
>  Number of entries: 0
>  Members:
> 
> Fix this by making the reference count update synchronous again.
> 
> As a result, when sets are listed, ip_set_name_byindex() might
> now fetch a set whose reference count is already zero. Instead
> of relying on the reference count to protect against concurrent
> set renaming, grab the netlink refcount while copying the name,
> and avoid renaming if the netlink refcount is taken, in a
> similar way as it's already done for swap and delete operations.
> 
> This also reverts commit db268d4dfdb9 ("ipset: remove unused
> function __ip_set_get_netlink") and restores that function
> since we now need it.
> 
> Reported-by: Li Shuang <shuali@redhat.com>
> Fixes: 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel")
> Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
> ---
> v2: As pointed out by Joszef, we can't assume that the nfnl mutex
>     is taken while dumping, so we need some explicit locking
>     to protect names of list:set members against changes
> 
>  include/linux/netfilter/ipset/ip_set.h |  2 +-
>  net/netfilter/ipset/ip_set_core.c      | 31 +++++++++++++++++++++----------
>  net/netfilter/ipset/ip_set_list_set.c  | 17 +++++++++++------
>  3 files changed, 33 insertions(+), 17 deletions(-)
> 
> diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
> index 34fc80f3eb90..1d100efe74ec 100644
> --- a/include/linux/netfilter/ipset/ip_set.h
> +++ b/include/linux/netfilter/ipset/ip_set.h
> @@ -314,7 +314,7 @@ enum {
>  extern ip_set_id_t ip_set_get_byname(struct net *net,
>  				     const char *name, struct ip_set **set);
>  extern void ip_set_put_byindex(struct net *net, ip_set_id_t index);
> -extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index);
> +extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name);
>  extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index);
>  extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index);
>  
> diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
> index bc4bd247bb7d..4062c0396a02 100644
> --- a/net/netfilter/ipset/ip_set_core.c
> +++ b/net/netfilter/ipset/ip_set_core.c
> @@ -527,6 +527,15 @@ __ip_set_put(struct ip_set *set)
>  /* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need
>   * a separate reference counter
>   */
> +static inline void
> +__ip_set_get_netlink(struct ip_set *set)
> +{
> +	write_lock_bh(&ip_set_ref_lock);
> +	BUG_ON(set->ref_netlink != 0);
> +	set->ref_netlink++;
> +	write_unlock_bh(&ip_set_ref_lock);
> +}
> +
>  static inline void
>  __ip_set_put_netlink(struct ip_set *set)
>  {
> @@ -693,21 +702,19 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index)
>  EXPORT_SYMBOL_GPL(ip_set_put_byindex);
>  
>  /* Get the name of a set behind a set index.
> - * We assume the set is referenced, so it does exist and
> - * can't be destroyed. The set cannot be renamed due to
> - * the referencing either.
> - *
> + * Set itself is protected by RCU, but its name isn't: to protect against
> + * renaming, grab its netlink refcount (see ip_set_rename()) and copy it.
>   */
> -const char *
> -ip_set_name_byindex(struct net *net, ip_set_id_t index)
> +void
> +ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
>  {
> -	const struct ip_set *set = ip_set_rcu_get(net, index);
> +	struct ip_set *set = ip_set_rcu_get(net, index);
>  
>  	BUG_ON(!set);
> -	BUG_ON(set->ref == 0);
>  
> -	/* Referenced, so it's safe */
> -	return set->name;
> +	__ip_set_get_netlink(set);
> +	strncpy(name, set->name, IPSET_MAXNAMELEN);
> +	__ip_set_put_netlink(set);
>  }
>  EXPORT_SYMBOL_GPL(ip_set_name_byindex);

This is my main concern about the patch: it boils down to *four* locking 
operations, for every single list members in a list type of set. It costs 
too much!

ip_set_name_byindex() is used in ip_set_list_set.c only, so please use 
write_lock_bh()/write_unlock_bh() directly, that should be sufficient.
 
> @@ -1158,6 +1165,10 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
>  		ret = -IPSET_ERR_REFERENCED;
>  		goto out;
>  	}
> +	if (set->ref_netlink != 0) {
> +		ret = -EBUSY;
> +		goto out;
> +	}

Please return -IPSET_ERR_REFERENCED instead, which is then translated into 
a proper textual error message to the users by ipset.
  
>  	name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
>  	for (i = 0; i < inst->ip_set_max; i++) {
> diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
> index 072a658fde04..3a39f20ba6fd 100644
> --- a/net/netfilter/ipset/ip_set_list_set.c
> +++ b/net/netfilter/ipset/ip_set_list_set.c
> @@ -148,9 +148,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
>  {
>  	struct set_elem *e = container_of(rcu, struct set_elem, rcu);
>  	struct ip_set *set = e->set;
> -	struct list_set *map = set->data;
>  
> -	ip_set_put_byindex(map->net, e->id);
>  	ip_set_ext_destroy(set, e);
>  	kfree(e);
>  }
>
> @@ -158,14 +156,20 @@ __list_set_del_rcu(struct rcu_head * rcu)
>  static inline void
>  list_set_del(struct ip_set *set, struct set_elem *e)
>  {
> +	struct list_set *map = set->data;
> +
>  	set->elements--;
> +	ip_set_put_byindex(map->net, e->id);
>  	list_del_rcu(&e->list);
>  	call_rcu(&e->rcu, __list_set_del_rcu);
>  }

Above and below please move ip_set_put_byindex() just before call_rcu().
  
>  static inline void
> -list_set_replace(struct set_elem *e, struct set_elem *old)
> +list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
>  {
> +	struct list_set *map = set->data;
> +
> +	ip_set_put_byindex(map->net, old->id);
>  	list_replace_rcu(&old->list, &e->list);
>  	call_rcu(&old->rcu, __list_set_del_rcu);
>  }
> @@ -298,7 +302,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
>  	INIT_LIST_HEAD(&e->list);
>  	list_set_init_extensions(set, ext, e);
>  	if (n)
> -		list_set_replace(e, n);
> +		list_set_replace(set, e, n);
>  	else if (next)
>  		list_add_tail_rcu(&e->list, &next->list);
>  	else if (prev)
> @@ -486,6 +490,7 @@ list_set_list(const struct ip_set *set,
>  	const struct list_set *map = set->data;
>  	struct nlattr *atd, *nested;
>  	u32 i = 0, first = cb->args[IPSET_CB_ARG0];
> +	char name[IPSET_MAXNAMELEN];
>  	struct set_elem *e;
>  	int ret = 0;
>  
> @@ -504,8 +509,8 @@ list_set_list(const struct ip_set *set,
>  		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
>  		if (!nested)
>  			goto nla_put_failure;
> -		if (nla_put_string(skb, IPSET_ATTR_NAME,
> -				   ip_set_name_byindex(map->net, e->id)))
> +		ip_set_name_byindex(map->net, e->id, name);
> +		if (nla_put_string(skb, IPSET_ATTR_NAME, name))
>  			goto nla_put_failure;
>  		if (ip_set_put_extensions(skb, set, e, true))
>  			goto nla_put_failure;

Thanks!

Best regards,
Jozsef
-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jozsef Kadlecsik July 14, 2018, 1:09 p.m. | #5
On Sat, 14 Jul 2018, Jozsef Kadlecsik wrote:

> Hi Stefano,
> 
> On Sun, 8 Jul 2018, Stefano Brivio wrote:
> 
> > Commit 45040978c899 ("netfilter: ipset: Fix set:list type crash
> > when flush/dump set in parallel") postponed decreasing set
> > reference counters to the RCU callback.
> > 
> > An 'ipset del' command can terminate before the RCU grace period
> > is elapsed, and if sets are listed before then, the reference
> > counter shown in userspace will be wrong:
> > 
> >  # ipset create h hash:ip; ipset create l list:set; ipset add l
> >  # ipset del l h; ipset list h
> >  Name: h
> >  Type: hash:ip
> >  Revision: 4
> >  Header: family inet hashsize 1024 maxelem 65536
> >  Size in memory: 88
> >  References: 1
> >  Number of entries: 0
> >  Members:
> >  # sleep 1; ipset list h
> >  Name: h
> >  Type: hash:ip
> >  Revision: 4
> >  Header: family inet hashsize 1024 maxelem 65536
> >  Size in memory: 88
> >  References: 0
> >  Number of entries: 0
> >  Members:
> > 
> > Fix this by making the reference count update synchronous again.
> > 
> > As a result, when sets are listed, ip_set_name_byindex() might
> > now fetch a set whose reference count is already zero. Instead
> > of relying on the reference count to protect against concurrent
> > set renaming, grab the netlink refcount while copying the name,
> > and avoid renaming if the netlink refcount is taken, in a
> > similar way as it's already done for swap and delete operations.
> > 
> > This also reverts commit db268d4dfdb9 ("ipset: remove unused
> > function __ip_set_get_netlink") and restores that function
> > since we now need it.
> > 
> > Reported-by: Li Shuang <shuali@redhat.com>
> > Fixes: 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel")
> > Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
> > ---
> > v2: As pointed out by Joszef, we can't assume that the nfnl mutex
> >     is taken while dumping, so we need some explicit locking
> >     to protect names of list:set members against changes
> > 
> >  include/linux/netfilter/ipset/ip_set.h |  2 +-
> >  net/netfilter/ipset/ip_set_core.c      | 31 +++++++++++++++++++++----------
> >  net/netfilter/ipset/ip_set_list_set.c  | 17 +++++++++++------
> >  3 files changed, 33 insertions(+), 17 deletions(-)
> > 
> > diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
> > index 34fc80f3eb90..1d100efe74ec 100644
> > --- a/include/linux/netfilter/ipset/ip_set.h
> > +++ b/include/linux/netfilter/ipset/ip_set.h
> > @@ -314,7 +314,7 @@ enum {
> >  extern ip_set_id_t ip_set_get_byname(struct net *net,
> >  				     const char *name, struct ip_set **set);
> >  extern void ip_set_put_byindex(struct net *net, ip_set_id_t index);
> > -extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index);
> > +extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name);
> >  extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index);
> >  extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index);
> >  
> > diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
> > index bc4bd247bb7d..4062c0396a02 100644
> > --- a/net/netfilter/ipset/ip_set_core.c
> > +++ b/net/netfilter/ipset/ip_set_core.c
> > @@ -527,6 +527,15 @@ __ip_set_put(struct ip_set *set)
> >  /* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need
> >   * a separate reference counter
> >   */
> > +static inline void
> > +__ip_set_get_netlink(struct ip_set *set)
> > +{
> > +	write_lock_bh(&ip_set_ref_lock);
> > +	BUG_ON(set->ref_netlink != 0);
> > +	set->ref_netlink++;
> > +	write_unlock_bh(&ip_set_ref_lock);
> > +}
> > +
> >  static inline void
> >  __ip_set_put_netlink(struct ip_set *set)
> >  {
> > @@ -693,21 +702,19 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index)
> >  EXPORT_SYMBOL_GPL(ip_set_put_byindex);
> >  
> >  /* Get the name of a set behind a set index.
> > - * We assume the set is referenced, so it does exist and
> > - * can't be destroyed. The set cannot be renamed due to
> > - * the referencing either.
> > - *
> > + * Set itself is protected by RCU, but its name isn't: to protect against
> > + * renaming, grab its netlink refcount (see ip_set_rename()) and copy it.
> >   */
> > -const char *
> > -ip_set_name_byindex(struct net *net, ip_set_id_t index)
> > +void
> > +ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
> >  {
> > -	const struct ip_set *set = ip_set_rcu_get(net, index);
> > +	struct ip_set *set = ip_set_rcu_get(net, index);
> >  
> >  	BUG_ON(!set);
> > -	BUG_ON(set->ref == 0);
> >  
> > -	/* Referenced, so it's safe */
> > -	return set->name;
> > +	__ip_set_get_netlink(set);
> > +	strncpy(name, set->name, IPSET_MAXNAMELEN);
> > +	__ip_set_put_netlink(set);
> >  }
> >  EXPORT_SYMBOL_GPL(ip_set_name_byindex);
> 
> This is my main concern about the patch: it boils down to *four* locking 
> operations, for every single list members in a list type of set. It costs 
> too much!
> 
> ip_set_name_byindex() is used in ip_set_list_set.c only, so please use 
> write_lock_bh()/write_unlock_bh() directly, that should be sufficient.

No, better use read_lock_bh()/read_unlock_bh() here and...

> > @@ -1158,6 +1165,10 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
> >  		ret = -IPSET_ERR_REFERENCED;
> >  		goto out;
> >  	}
> > +	if (set->ref_netlink != 0) {
> > +		ret = -EBUSY;
> > +		goto out;
> > +	}

... drop this part from ip_set_rename(), but change the read lock to write 
lock in the function. That's much better.

Best regards,
Jozsef 
> >  	name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
> >  	for (i = 0; i < inst->ip_set_max; i++) {
> > diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
> > index 072a658fde04..3a39f20ba6fd 100644
> > --- a/net/netfilter/ipset/ip_set_list_set.c
> > +++ b/net/netfilter/ipset/ip_set_list_set.c
> > @@ -148,9 +148,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
> >  {
> >  	struct set_elem *e = container_of(rcu, struct set_elem, rcu);
> >  	struct ip_set *set = e->set;
> > -	struct list_set *map = set->data;
> >  
> > -	ip_set_put_byindex(map->net, e->id);
> >  	ip_set_ext_destroy(set, e);
> >  	kfree(e);
> >  }
> >
> > @@ -158,14 +156,20 @@ __list_set_del_rcu(struct rcu_head * rcu)
> >  static inline void
> >  list_set_del(struct ip_set *set, struct set_elem *e)
> >  {
> > +	struct list_set *map = set->data;
> > +
> >  	set->elements--;
> > +	ip_set_put_byindex(map->net, e->id);
> >  	list_del_rcu(&e->list);
> >  	call_rcu(&e->rcu, __list_set_del_rcu);
> >  }
> 
> Above and below please move ip_set_put_byindex() just before call_rcu().
>   
> >  static inline void
> > -list_set_replace(struct set_elem *e, struct set_elem *old)
> > +list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
> >  {
> > +	struct list_set *map = set->data;
> > +
> > +	ip_set_put_byindex(map->net, old->id);
> >  	list_replace_rcu(&old->list, &e->list);
> >  	call_rcu(&old->rcu, __list_set_del_rcu);
> >  }
> > @@ -298,7 +302,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
> >  	INIT_LIST_HEAD(&e->list);
> >  	list_set_init_extensions(set, ext, e);
> >  	if (n)
> > -		list_set_replace(e, n);
> > +		list_set_replace(set, e, n);
> >  	else if (next)
> >  		list_add_tail_rcu(&e->list, &next->list);
> >  	else if (prev)
> > @@ -486,6 +490,7 @@ list_set_list(const struct ip_set *set,
> >  	const struct list_set *map = set->data;
> >  	struct nlattr *atd, *nested;
> >  	u32 i = 0, first = cb->args[IPSET_CB_ARG0];
> > +	char name[IPSET_MAXNAMELEN];
> >  	struct set_elem *e;
> >  	int ret = 0;
> >  
> > @@ -504,8 +509,8 @@ list_set_list(const struct ip_set *set,
> >  		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
> >  		if (!nested)
> >  			goto nla_put_failure;
> > -		if (nla_put_string(skb, IPSET_ATTR_NAME,
> > -				   ip_set_name_byindex(map->net, e->id)))
> > +		ip_set_name_byindex(map->net, e->id, name);
> > +		if (nla_put_string(skb, IPSET_ATTR_NAME, name))
> >  			goto nla_put_failure;
> >  		if (ip_set_put_extensions(skb, set, e, true))
> >  			goto nla_put_failure;
> 
> Thanks!
> 
> Best regards,
> Jozsef
> -
> E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu
> PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
> Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
>           H-1525 Budapest 114, POB. 49, Hungary
> --
> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stefano Brivio July 14, 2018, 7:58 p.m. | #6
Hi Jozsef,

On Sat, 14 Jul 2018 14:23:54 +0200 (CEST)
Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> wrote:

> > -const char *
> > -ip_set_name_byindex(struct net *net, ip_set_id_t index)
> > +void
> > +ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
> >  {
> > -	const struct ip_set *set = ip_set_rcu_get(net, index);
> > +	struct ip_set *set = ip_set_rcu_get(net, index);
> >  
> >  	BUG_ON(!set);
> > -	BUG_ON(set->ref == 0);
> >  
> > -	/* Referenced, so it's safe */
> > -	return set->name;
> > +	__ip_set_get_netlink(set);
> > +	strncpy(name, set->name, IPSET_MAXNAMELEN);
> > +	__ip_set_put_netlink(set);
> >  }
> >  EXPORT_SYMBOL_GPL(ip_set_name_byindex);  
> 
> This is my main concern about the patch: it boils down to *four* locking 
> operations, for every single list members in a list type of set. It costs 
> too much!
> 
> ip_set_name_byindex() is used in ip_set_list_set.c only, so please use 
> write_lock_bh()/write_unlock_bh() directly, that should be sufficient.

I see. I thought it would be "cleaner" this way, but I didn't consider
it's unnecessarily expensive to do that.

I'll send v3 in a moment (also addressing the rest of your comments).
Thanks for the review and all the pointers!

Patch

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 34fc80f3eb90..1d100efe74ec 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -314,7 +314,7 @@  enum {
 extern ip_set_id_t ip_set_get_byname(struct net *net,
 				     const char *name, struct ip_set **set);
 extern void ip_set_put_byindex(struct net *net, ip_set_id_t index);
-extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index);
+extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name);
 extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index);
 extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index);
 
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index bc4bd247bb7d..4062c0396a02 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -527,6 +527,15 @@  __ip_set_put(struct ip_set *set)
 /* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need
  * a separate reference counter
  */
+static inline void
+__ip_set_get_netlink(struct ip_set *set)
+{
+	write_lock_bh(&ip_set_ref_lock);
+	BUG_ON(set->ref_netlink != 0);
+	set->ref_netlink++;
+	write_unlock_bh(&ip_set_ref_lock);
+}
+
 static inline void
 __ip_set_put_netlink(struct ip_set *set)
 {
@@ -693,21 +702,19 @@  ip_set_put_byindex(struct net *net, ip_set_id_t index)
 EXPORT_SYMBOL_GPL(ip_set_put_byindex);
 
 /* Get the name of a set behind a set index.
- * We assume the set is referenced, so it does exist and
- * can't be destroyed. The set cannot be renamed due to
- * the referencing either.
- *
+ * Set itself is protected by RCU, but its name isn't: to protect against
+ * renaming, grab its netlink refcount (see ip_set_rename()) and copy it.
  */
-const char *
-ip_set_name_byindex(struct net *net, ip_set_id_t index)
+void
+ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
 {
-	const struct ip_set *set = ip_set_rcu_get(net, index);
+	struct ip_set *set = ip_set_rcu_get(net, index);
 
 	BUG_ON(!set);
-	BUG_ON(set->ref == 0);
 
-	/* Referenced, so it's safe */
-	return set->name;
+	__ip_set_get_netlink(set);
+	strncpy(name, set->name, IPSET_MAXNAMELEN);
+	__ip_set_put_netlink(set);
 }
 EXPORT_SYMBOL_GPL(ip_set_name_byindex);
 
@@ -1158,6 +1165,10 @@  static int ip_set_rename(struct net *net, struct sock *ctnl,
 		ret = -IPSET_ERR_REFERENCED;
 		goto out;
 	}
+	if (set->ref_netlink != 0) {
+		ret = -EBUSY;
+		goto out;
+	}
 
 	name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
 	for (i = 0; i < inst->ip_set_max; i++) {
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 072a658fde04..3a39f20ba6fd 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -148,9 +148,7 @@  __list_set_del_rcu(struct rcu_head * rcu)
 {
 	struct set_elem *e = container_of(rcu, struct set_elem, rcu);
 	struct ip_set *set = e->set;
-	struct list_set *map = set->data;
 
-	ip_set_put_byindex(map->net, e->id);
 	ip_set_ext_destroy(set, e);
 	kfree(e);
 }
@@ -158,14 +156,20 @@  __list_set_del_rcu(struct rcu_head * rcu)
 static inline void
 list_set_del(struct ip_set *set, struct set_elem *e)
 {
+	struct list_set *map = set->data;
+
 	set->elements--;
+	ip_set_put_byindex(map->net, e->id);
 	list_del_rcu(&e->list);
 	call_rcu(&e->rcu, __list_set_del_rcu);
 }
 
 static inline void
-list_set_replace(struct set_elem *e, struct set_elem *old)
+list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
 {
+	struct list_set *map = set->data;
+
+	ip_set_put_byindex(map->net, old->id);
 	list_replace_rcu(&old->list, &e->list);
 	call_rcu(&old->rcu, __list_set_del_rcu);
 }
@@ -298,7 +302,7 @@  list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	INIT_LIST_HEAD(&e->list);
 	list_set_init_extensions(set, ext, e);
 	if (n)
-		list_set_replace(e, n);
+		list_set_replace(set, e, n);
 	else if (next)
 		list_add_tail_rcu(&e->list, &next->list);
 	else if (prev)
@@ -486,6 +490,7 @@  list_set_list(const struct ip_set *set,
 	const struct list_set *map = set->data;
 	struct nlattr *atd, *nested;
 	u32 i = 0, first = cb->args[IPSET_CB_ARG0];
+	char name[IPSET_MAXNAMELEN];
 	struct set_elem *e;
 	int ret = 0;
 
@@ -504,8 +509,8 @@  list_set_list(const struct ip_set *set,
 		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 		if (!nested)
 			goto nla_put_failure;
-		if (nla_put_string(skb, IPSET_ATTR_NAME,
-				   ip_set_name_byindex(map->net, e->id)))
+		ip_set_name_byindex(map->net, e->id, name);
+		if (nla_put_string(skb, IPSET_ATTR_NAME, name))
 			goto nla_put_failure;
 		if (ip_set_put_extensions(skb, set, e, true))
 			goto nla_put_failure;