@@ -11,6 +11,7 @@
#include <linux/ipv6.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
+#include <linux/refcount.h>
#include <linux/netfilter/x_tables.h>
#include <linux/stringify.h>
#include <linux/vmalloc.h>
@@ -97,6 +98,7 @@ struct ip_set_counter {
};
struct ip_set_comment_rcu {
+ refcount_t ref;
struct rcu_head rcu;
char str[];
};
@@ -336,6 +338,7 @@ extern size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[],
size_t len, size_t align);
extern int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
struct ip_set_ext *ext);
+extern void ip_set_ext_get(struct ip_set *set, void *dst, const void *src);
extern int ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
const void *e, bool active);
extern bool ip_set_match_extensions(struct ip_set *set,
@@ -350,8 +350,10 @@ ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
size_t len = ext->comment ? strlen(ext->comment) : 0;
if (unlikely(c)) {
- set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
- kfree_rcu(c, rcu);
+ if (refcount_dec_and_test(&c->ref)) {
+ set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
+ kfree_rcu(c, rcu);
+ }
rcu_assign_pointer(comment->c, NULL);
}
if (!len)
@@ -361,12 +363,37 @@ ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
if (unlikely(!c))
return;
+ refcount_set(&c->ref, 1);
strscpy(c->str, ext->comment, len + 1);
set->ext_size += sizeof(*c) + strlen(c->str) + 1;
rcu_assign_pointer(comment->c, c);
}
EXPORT_SYMBOL_GPL(ip_set_init_comment);
+void
+ip_set_ext_get(struct ip_set *set, void *dst, const void *src)
+{
+ struct ip_set_comment_rcu *c;
+ struct ip_set_comment *dst_comment;
+ const struct ip_set_comment *src_comment;
+
+ if (!SET_WITH_COMMENT(set))
+ return;
+
+ dst_comment = ext_comment(dst, set);
+ src_comment = ext_comment(src, set);
+ RCU_INIT_POINTER(dst_comment->c, NULL);
+
+ c = rcu_dereference_bh(src_comment->c);
+ if (!c)
+ return;
+
+ if (!refcount_inc_not_zero(&c->ref))
+ return;
+ rcu_assign_pointer(dst_comment->c, c);
+}
+EXPORT_SYMBOL_GPL(ip_set_ext_get);
+
/* Used only when dumping a set, protected by rcu_read_lock() */
static int
ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment)
@@ -392,9 +419,11 @@ ip_set_comment_free(struct ip_set *set, void *ptr)
c = rcu_dereference_protected(comment->c, 1);
if (unlikely(!c))
return;
- set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
- kfree_rcu(c, rcu);
rcu_assign_pointer(comment->c, NULL);
+ if (refcount_dec_and_test(&c->ref)) {
+ set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
+ kfree_rcu(c, rcu);
+ }
}
typedef void (*destroyer)(struct ip_set *, void *);
@@ -485,13 +485,14 @@ mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r)
{
struct hbucket *n, *tmp;
struct mtype_elem *data;
+ struct mtype_resize_ad *x;
+ LIST_HEAD(list);
u32 i, j, d;
size_t dsize = set->dsize;
#ifdef IP_SET_HASH_WITH_NETS
u8 k;
#endif
u8 htable_bits = t->htable_bits;
-
spin_lock_bh(&t->hregion[r].lock);
for (i = ahash_bucket_start(r, htable_bits);
i < ahash_bucket_end(r, htable_bits); i++) {
@@ -516,6 +517,16 @@ mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r)
k);
#endif
t->hregion[r].elements--;
+ if (atomic_read(&t->ref)) {
+ x = kzalloc_obj(struct mtype_resize_ad,
+ GFP_ATOMIC);
+ if (x) {
+ x->ad = IPSET_DEL;
+ memcpy(&x->d, data,
+ sizeof(struct mtype_elem));
+ list_add_tail(&x->list, &list);
+ }
+ }
ip_set_ext_destroy(set, data);
d++;
}
@@ -551,6 +562,11 @@ mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r)
}
}
spin_unlock_bh(&t->hregion[r].lock);
+ if (!list_empty(&list)) {
+ spin_lock_bh(&set->lock);
+ list_splice_tail_init(&list, &h->ad);
+ spin_unlock_bh(&set->lock);
+ }
}
static void
@@ -584,7 +600,7 @@ mtype_gc(struct work_struct *work)
if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
pr_debug("Table destroy after resize by expire: %p\n", t);
- mtype_ahash_destroy(set, t, false);
+ mtype_ahash_destroy(set, t, true);
}
queue_delayed_work(system_power_efficient_wq, &gc->dwork, next_run);
@@ -743,6 +759,7 @@ mtype_resize(struct ip_set *set, bool retried)
}
d = ahash_data(m, m->pos, dsize);
memcpy(d, data, dsize);
+ ip_set_ext_get(set, d, data);
set_bit(m->pos++, m->used);
t->hregion[nr].elements++;
#ifdef IP_SET_HASH_WITH_NETS
@@ -775,10 +792,9 @@ mtype_resize(struct ip_set *set, bool retried)
list_del(l);
kfree(l);
}
- /* If there's nobody else using the table, destroy it */
if (atomic_dec_and_test(&orig->uref)) {
pr_debug("Table destroy by resize %p\n", orig);
- mtype_ahash_destroy(set, orig, false);
+ mtype_ahash_destroy(set, orig, true);
}
out:
@@ -791,7 +807,7 @@ mtype_resize(struct ip_set *set, bool retried)
rcu_read_unlock_bh();
atomic_set(&orig->ref, 0);
atomic_dec(&orig->uref);
- mtype_ahash_destroy(set, t, false);
+ mtype_ahash_destroy(set, t, true);
if (ret == -EAGAIN)
goto retry;
goto out;
@@ -1023,7 +1039,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
out:
if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
pr_debug("Table destroy after resize by add: %p\n", t);
- mtype_ahash_destroy(set, t, false);
+ mtype_ahash_destroy(set, t, true);
}
return ret;
}
@@ -1040,6 +1056,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct mtype_elem *data;
struct hbucket *n;
struct mtype_resize_ad *x = NULL;
+ bool resize_in_progress;
int i, j, k, r, ret = -IPSET_ERR_EXIST;
u32 key, multi = 0;
size_t dsize = set->dsize;
@@ -1066,7 +1083,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
data = ahash_data(n, i, dsize);
if (!mtype_data_equal(data, d, &multi))
continue;
- if (SET_ELEM_EXPIRED(set, data))
+ if (SET_ELEM_EXPIRED(set, data) && ext)
goto out;
ret = 0;
@@ -1075,6 +1092,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (i + 1 == n->pos)
n->pos--;
t->hregion[r].elements--;
+ resize_in_progress = atomic_read(&t->ref) && ext && ext->target;
#ifdef IP_SET_HASH_WITH_NETS
for (j = 0; j < IPSET_NET_COUNT; j++)
mtype_del_cidr(set, h,
@@ -1082,9 +1100,11 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
#endif
ip_set_ext_destroy(set, data);
- if (atomic_read(&t->ref) && ext->target) {
+ if (resize_in_progress) {
/* Resize is in process and kernel side del,
- * save values
+ * save values. The old-table ref is dropped above;
+ * the delete will be replayed on the resized table
+ * to drop the copied ref there too.
*/
x = kzalloc_obj(struct mtype_resize_ad, GFP_ATOMIC);
if (x) {
@@ -1135,7 +1155,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
}
if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
pr_debug("Table destroy after resize by del: %p\n", t);
- mtype_ahash_destroy(set, t, false);
+ mtype_ahash_destroy(set, t, true);
}
return ret;
}
@@ -1341,7 +1361,7 @@ mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start)
if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
pr_debug("Table destroy after resize "
" by dump: %p\n", t);
- mtype_ahash_destroy(set, t, false);
+ mtype_ahash_destroy(set, t, true);
}
cb->args[IPSET_CB_PRIVATE] = 0;
}