diff mbox series

[xtables-nft,5/6] xtables: handle concurrent ruleset modifications

Message ID 20190423131625.23377-6-fw@strlen.de
State Accepted
Delegated to: Pablo Neira
Headers show
Series handle parallel restore case | expand

Commit Message

Florian Westphal April 23, 2019, 1:16 p.m. UTC
We currently race when several xtables-nft-restore processes attempt to
handle rules in parallel.  For instance, when no rules are present at
all, then

iptables-nft-restore < X & iptables-nft-restore < X

... can cause rules to be restored twice.

Reason is that both processes might detect 'no rules exist', so
neither issues a flush operation.

We can't unconditionally issue the flush, because it would
cause kernel to fail with -ENOENT unless the to-be-flushed table
exists.

This change passes the generation id that was used to build
the transaction to the kernel.

In case another process changed *any* rule somewhere, the transaction
will now fail with -ERESTART.

We then flush the cache, re-fetch the ruleset and refresh
our transaction.

For example, in the above 'parallel restore' case, the iptables-restore
instance that lost the race would detect that the table has been created
already, and would add the needed flush.

In a similar vein, in case --noflush is used, we will add the flush
op for user-defined chains that were created in the mean-time.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 iptables/nft.c | 131 +++++++++++++++++++++++++++++++++++++++++++++++--
 iptables/nft.h |   1 +
 2 files changed, 128 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/iptables/nft.c b/iptables/nft.c
index da73e87011db..6354b7e8e72f 100644
--- a/iptables/nft.c
+++ b/iptables/nft.c
@@ -41,6 +41,7 @@ 
 #include <linux/netfilter/xt_limit.h>
 
 #include <libmnl/libmnl.h>
+#include <libnftnl/gen.h>
 #include <libnftnl/table.h>
 #include <libnftnl/chain.h>
 #include <libnftnl/rule.h>
@@ -60,6 +61,36 @@ 
 
 static void *nft_fn;
 
+static int genid_cb(const struct nlmsghdr *nlh, void *data)
+{
+	struct nft_handle *h = data;
+	struct nftnl_gen *gen;
+
+	gen = nftnl_gen_alloc();
+	if (!gen)
+		return MNL_CB_ERROR;
+
+	if (nftnl_gen_nlmsg_parse(nlh, gen) < 0)
+		goto out;
+
+	h->nft_genid = nftnl_gen_get_u32(gen, NFTNL_GEN_ID);
+
+	nftnl_gen_free(gen);
+	return MNL_CB_STOP;
+out:
+	nftnl_gen_free(gen);
+	return MNL_CB_ERROR;
+}
+
+static int mnl_genid_get(struct nft_handle *h)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	struct nlmsghdr *nlh;
+
+	nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETGEN, 0, 0, h->seq);
+	return mnl_talk(h, nlh, genid_cb, h);
+}
+
 int mnl_talk(struct nft_handle *h, struct nlmsghdr *nlh,
 	     int (*cb)(const struct nlmsghdr *nlh, void *data),
 	     void *data)
@@ -109,9 +140,14 @@  static void mnl_nft_batch_continue(struct nftnl_batch *batch)
 	assert(nftnl_batch_update(batch) >= 0);
 }
 
-static uint32_t mnl_batch_begin(struct nftnl_batch *batch, uint32_t seqnum)
+static uint32_t mnl_batch_begin(struct nftnl_batch *batch, uint32_t genid, uint32_t seqnum)
 {
-	nftnl_batch_begin(nftnl_batch_buffer(batch), seqnum);
+	struct nlmsghdr *nlh;
+
+	nlh = nftnl_batch_begin(nftnl_batch_buffer(batch), seqnum);
+
+	mnl_attr_put_u32(nlh, NFTA_GEN_ID, htonl(genid));
+
 	mnl_nft_batch_continue(batch);
 
 	return seqnum;
@@ -1490,6 +1526,7 @@  static int fetch_rule_cache(struct nft_handle *h)
 
 static void __nft_build_cache(struct nft_handle *h)
 {
+	mnl_genid_get(h);
 	fetch_chain_cache(h);
 	fetch_rule_cache(h);
 	h->have_cache = true;
@@ -2678,6 +2715,76 @@  static void batch_obj_del(struct nft_handle *h, struct obj_update *o)
 	free(o);
 }
 
+static void nft_refresh_transaction(struct nft_handle *h)
+{
+	const char *tablename, *chainname;
+	const struct nftnl_chain *c;
+	struct obj_update *n, *tmp;
+	bool exists;
+
+	h->error.lineno = 0;
+
+	list_for_each_entry_safe(n, tmp, &h->obj_list, head) {
+		if (n->implicit) {
+			batch_obj_del(h, n);
+			continue;
+		}
+
+		switch (n->type) {
+		case NFT_COMPAT_TABLE_FLUSH:
+			tablename = nftnl_table_get_str(n->table, NFTNL_TABLE_NAME);
+			if (!tablename)
+				continue;
+			exists = nft_table_find(h, tablename);
+			if (n->skip && exists)
+				n->skip = 0;
+			else if (!n->skip && !exists)
+				n->skip = 1;
+			break;
+		case NFT_COMPAT_TABLE_ADD:
+			tablename = nftnl_table_get_str(n->table, NFTNL_TABLE_NAME);
+			if (!tablename)
+				continue;
+
+			exists = nft_table_find(h, tablename);
+			if (n->skip && !exists)
+				n->skip = 0;
+			break;
+		case NFT_COMPAT_CHAIN_USER_ADD:
+			tablename = nftnl_chain_get_str(n->chain, NFTNL_CHAIN_TABLE);
+			if (!tablename)
+				continue;
+
+			chainname = nftnl_chain_get_str(n->chain, NFTNL_CHAIN_NAME);
+			if (!chainname)
+				continue;
+
+			c = nft_chain_find(h, tablename, chainname);
+			if (c && !n->skip) {
+				/* -restore -n flushes existing rules from redefined user-chain */
+				if (h->noflush)
+					__nft_rule_flush(h, tablename,
+							 chainname, false, true);
+			} else if (!c && n->skip) {
+				n->skip = 0;
+			}
+			break;
+		case NFT_COMPAT_CHAIN_ADD:
+		case NFT_COMPAT_CHAIN_ZERO:
+		case NFT_COMPAT_CHAIN_USER_DEL:
+		case NFT_COMPAT_CHAIN_USER_FLUSH:
+		case NFT_COMPAT_CHAIN_UPDATE:
+		case NFT_COMPAT_CHAIN_RENAME:
+		case NFT_COMPAT_RULE_APPEND:
+		case NFT_COMPAT_RULE_INSERT:
+		case NFT_COMPAT_RULE_REPLACE:
+		case NFT_COMPAT_RULE_DELETE:
+		case NFT_COMPAT_RULE_FLUSH:
+			break;
+		}
+	}
+}
+
 static int nft_action(struct nft_handle *h, int action)
 {
 	struct obj_update *n, *tmp;
@@ -2685,12 +2792,15 @@  static int nft_action(struct nft_handle *h, int action)
 	unsigned int buflen, i, len;
 	bool show_errors = true;
 	char errmsg[1024];
-	uint32_t seq = 1;
+	uint32_t seq;
 	int ret = 0;
 
+retry:
+	seq = 1;
 	h->batch = mnl_batch_init();
 
-	mnl_batch_begin(h->batch, seq++);
+	mnl_batch_begin(h->batch, h->nft_genid, seq++);
+	h->nft_genid++;
 
 	list_for_each_entry(n, &h->obj_list, head) {
 
@@ -2773,7 +2883,20 @@  static int nft_action(struct nft_handle *h, int action)
 		break;
 	}
 
+	errno = 0;
 	ret = mnl_batch_talk(h->nl, h->batch, &h->err_list);
+	if (ret && errno == ERESTART) {
+		nft_rebuild_cache(h);
+
+		nft_refresh_transaction(h);
+
+		i=0;
+		list_for_each_entry_safe(err, ne, &h->err_list, head)
+			mnl_err_list_free(err);
+
+		mnl_batch_reset(h->batch);
+		goto retry;
+	}
 
 	i = 0;
 	buflen = sizeof(errmsg);
diff --git a/iptables/nft.h b/iptables/nft.h
index 97c28b356a46..23bd2b79884c 100644
--- a/iptables/nft.h
+++ b/iptables/nft.h
@@ -32,6 +32,7 @@  struct nft_handle {
 	struct mnl_socket	*nl;
 	uint32_t		portid;
 	uint32_t		seq;
+	uint32_t		nft_genid;
 	uint32_t		rule_id;
 	struct list_head	obj_list;
 	int			obj_list_num;