[nf-next,v2,3/3] netfilter: nfnetlink_queue: resolve clash for unconfirmed conntracks

Message ID 20180515105212.2111-3-pablo@netfilter.org
State Changes Requested
Delegated to: Pablo Neira
Headers show
Series
  • [nf-next,v2,1/3] netfilter: add struct nf_ct_hook and use it
Related show

Commit Message

Pablo Neira Ayuso May 15, 2018, 10:52 a.m.
In nfqueue, two consecutive skbuffs may race to create the conntrack
entry. Hence, the one that loses the race gets dropped due to clash in
the insertion into the hashes from the nf_conntrack_confirm() path.

This patch adds a new nf_conntrack_update() function which searches for
possible clashes and resolve them. NAT mangling for the packet losing
race is corrected by using the conntrack information that won race.

In order to avoid direct module dependencies with conntrack and NAT, the
nf_ct_hook and nf_nat_hook structures are used for this purpose.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
v2: check for verdict == NF_ACCEPT || NF_STOP.

 include/linux/netfilter.h           |  2 +
 include/net/netfilter/nf_nat_core.h |  3 ++
 net/netfilter/nf_conntrack_core.c   | 73 +++++++++++++++++++++++++++++++++++++
 net/netfilter/nf_nat_core.c         | 41 +++++++++++++--------
 net/netfilter/nfnetlink_queue.c     | 33 +++++++++++++++--
 5 files changed, 133 insertions(+), 19 deletions(-)

Comments

kbuild test robot May 17, 2018, 2:25 a.m. | #1
Hi Pablo,

I love your patch! Yet something to improve:

[auto build test ERROR on nf-next/master]
[also build test ERROR on v4.17-rc5]
[cannot apply to nf/master next-20180516]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Pablo-Neira-Ayuso/netfilter-add-struct-nf_ct_hook-and-use-it/20180515-215248
base:   https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git master
config: x86_64-acpi-redef (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All error/warnings (new ones prefixed by >>):

   net/netfilter/nfnetlink_queue.c: In function 'nfqnl_reinject':
>> net/netfilter/nfnetlink_queue.c:237:7: error: implicit declaration of function 'nf_ct_get'; did you mean 'sk_dst_get'? [-Werror=implicit-function-declaration]
     ct = nf_ct_get(entry->skb, &ctinfo);
          ^~~~~~~~~
          sk_dst_get
>> net/netfilter/nfnetlink_queue.c:237:5: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
     ct = nf_ct_get(entry->skb, &ctinfo);
        ^
>> net/netfilter/nfnetlink_queue.c:238:13: error: implicit declaration of function 'nf_ct_is_confirmed'; did you mean 'sk_dst_confirm'? [-Werror=implicit-function-declaration]
     if (ct && !nf_ct_is_confirmed(ct) &&
                ^~~~~~~~~~~~~~~~~~
                sk_dst_confirm
   cc1: some warnings being treated as errors

vim +237 net/netfilter/nfnetlink_queue.c

   229	
   230	static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
   231	{
   232		enum ip_conntrack_info ctinfo;
   233		struct nf_ct_hook *ct_hook;
   234		struct nf_conn *ct;
   235		int err;
   236	
 > 237		ct = nf_ct_get(entry->skb, &ctinfo);
 > 238		if (ct && !nf_ct_is_confirmed(ct) &&
   239		    (verdict == NF_ACCEPT || verdict == NF_STOP)) {
   240			rcu_read_lock();
   241			ct_hook = rcu_dereference(nf_ct_hook);
   242			if (ct_hook) {
   243				err = ct_hook->update(entry->state.net, entry->skb,
   244						      ct, ctinfo);
   245				if (err < 0)
   246					verdict = NF_DROP;
   247			}
   248			rcu_read_unlock();
   249		}
   250	
   251		nf_reinject(entry, verdict);
   252	}
   253	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kbuild test robot May 17, 2018, 4:10 a.m. | #2
Hi Pablo,

I love your patch! Yet something to improve:

[auto build test ERROR on nf-next/master]
[also build test ERROR on v4.17-rc5]
[cannot apply to nf/master next-20180516]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Pablo-Neira-Ayuso/netfilter-add-struct-nf_ct_hook-and-use-it/20180515-215248
base:   https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git master
config: x86_64-randconfig-u0-05170929 (attached as .config)
compiler: gcc-5 (Debian 5.5.0-3) 5.4.1 20171010
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   net/netfilter/nfnetlink_queue.c: In function 'nfqnl_reinject':
>> net/netfilter/nfnetlink_queue.c:237:7: error: implicit declaration of function 'nf_ct_get' [-Werror=implicit-function-declaration]
     ct = nf_ct_get(entry->skb, &ctinfo);
          ^
   net/netfilter/nfnetlink_queue.c:237:5: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
     ct = nf_ct_get(entry->skb, &ctinfo);
        ^
>> net/netfilter/nfnetlink_queue.c:238:13: error: implicit declaration of function 'nf_ct_is_confirmed' [-Werror=implicit-function-declaration]
     if (ct && !nf_ct_is_confirmed(ct) &&
                ^
   cc1: some warnings being treated as errors

vim +/nf_ct_get +237 net/netfilter/nfnetlink_queue.c

   229	
   230	static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
   231	{
   232		enum ip_conntrack_info ctinfo;
   233		struct nf_ct_hook *ct_hook;
   234		struct nf_conn *ct;
   235		int err;
   236	
 > 237		ct = nf_ct_get(entry->skb, &ctinfo);
 > 238		if (ct && !nf_ct_is_confirmed(ct) &&
   239		    (verdict == NF_ACCEPT || verdict == NF_STOP)) {
   240			rcu_read_lock();
   241			ct_hook = rcu_dereference(nf_ct_hook);
   242			if (ct_hook) {
   243				err = ct_hook->update(entry->state.net, entry->skb,
   244						      ct, ctinfo);
   245				if (err < 0)
   246					verdict = NF_DROP;
   247			}
   248			rcu_read_unlock();
   249		}
   250	
   251		nf_reinject(entry, verdict);
   252	}
   253	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

Patch

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 3baa48a4bb90..8e264d0f6ea7 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -360,6 +360,8 @@  struct nf_conn;
 enum ip_conntrack_info;
 
 struct nf_ct_hook {
+	int (*update)(struct net *net, struct sk_buff *skb, struct nf_conn *ct,
+		      enum ip_conntrack_info ctinfo);
 	void (*destroy)(struct nf_conntrack *);
 };
 extern struct nf_ct_hook __rcu *nf_ct_hook;
diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h
index 25b037cfe248..0cf4dc80872f 100644
--- a/include/net/netfilter/nf_nat_core.h
+++ b/include/net/netfilter/nf_nat_core.h
@@ -30,6 +30,9 @@  struct nf_nat_hook {
 	int (*parse_nat_setup)(struct nf_conn *ct, enum nf_nat_manip_type manip,
 			       const struct nlattr *attr);
 	void (*decode_session)(struct sk_buff *skb, struct flowi *fl);
+	unsigned int (*manip_pkt)(struct sk_buff *skb, struct nf_conn *ct,
+				  enum nf_nat_manip_type mtype,
+				  enum ip_conntrack_dir dir);
 };
 
 extern struct nf_nat_hook __rcu *nf_nat_hook;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 8d109d750073..904aaf5826ff 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1607,6 +1607,78 @@  static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
 	nf_conntrack_get(skb_nfct(nskb));
 }
 
+static int nf_conntrack_update(struct net *net, struct sk_buff *skb,
+			       struct nf_conn *ct,
+			       enum ip_conntrack_info ctinfo)
+{
+	const struct nf_conntrack_l3proto *l3proto;
+	const struct nf_conntrack_l4proto *l4proto;
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conntrack_tuple tuple;
+	struct nf_nat_hook *nat_hook;
+	unsigned int dataoff, status;
+	u16 l3num;
+	u8 l4num;
+
+	l3num = nf_ct_l3num(ct);
+	l3proto = nf_ct_l3proto_find_get(l3num);
+
+	if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
+				 &l4num) <= 0)
+		return 0;
+
+	l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+
+	if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
+			     l4num, net, &tuple, l3proto, l4proto))
+		return 0;
+
+	if (ct->status & IPS_SRC_NAT) {
+		memcpy(tuple.src.u3.all,
+		       ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.all,
+		       sizeof(tuple.src.u3.all));
+		tuple.src.u.all =
+			ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all;
+	}
+
+	if (ct->status & IPS_DST_NAT) {
+		memcpy(tuple.dst.u3.all,
+		       ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.all,
+		       sizeof(tuple.dst.u3.all));
+		tuple.dst.u.all =
+			ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.all;
+	}
+
+	h = nf_conntrack_find_get(net, &ct->zone, &tuple);
+	if (!h)
+		return 0;
+
+	/* Store status bits of the conntrack that is clashing to re-do NAT
+	 * mangling according to what it has been done already to this packet.
+	 */
+	status = ct->status;
+
+	nf_ct_put(ct);
+	ct = nf_ct_tuplehash_to_ctrack(h);
+	nf_ct_set(skb, ct, ctinfo);
+
+	nat_hook = rcu_dereference(nf_nat_hook);
+	if (!nat_hook)
+		return 0;
+
+	if (status & IPS_SRC_NAT &&
+	    nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_SRC,
+				IP_CT_DIR_ORIGINAL) == NF_DROP)
+		return -1;
+
+	if (status & IPS_DST_NAT &&
+	    nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_DST,
+				IP_CT_DIR_ORIGINAL) == NF_DROP)
+		return -1;
+
+	return 0;
+}
+
 /* Bring out ya dead! */
 static struct nf_conn *
 get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
@@ -2126,6 +2198,7 @@  int nf_conntrack_init_start(void)
 }
 
 static struct nf_ct_hook nf_conntrack_hook = {
+	.update		= nf_conntrack_update,
 	.destroy	= destroy_conntrack,
 };
 
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index ae58088d00d4..e5603143a514 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -475,17 +475,36 @@  nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
 }
 EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding);
 
+static unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
+				     enum nf_nat_manip_type mtype,
+				     enum ip_conntrack_dir dir)
+{
+	const struct nf_nat_l3proto *l3proto;
+	const struct nf_nat_l4proto *l4proto;
+	struct nf_conntrack_tuple target;
+
+	/* We are aiming to look like inverse of other direction. */
+	nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+
+	l3proto = __nf_nat_l3proto_find(target.src.l3num);
+	l4proto = __nf_nat_l4proto_find(target.src.l3num,
+					target.dst.protonum);
+	if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype))
+		return NF_DROP;
+
+	return NF_ACCEPT;
+}
+
 /* Do packet manipulations according to nf_nat_setup_info. */
 unsigned int nf_nat_packet(struct nf_conn *ct,
 			   enum ip_conntrack_info ctinfo,
 			   unsigned int hooknum,
 			   struct sk_buff *skb)
 {
-	const struct nf_nat_l3proto *l3proto;
-	const struct nf_nat_l4proto *l4proto;
+	enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	unsigned int verdict = NF_ACCEPT;
 	unsigned long statusbit;
-	enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
 
 	if (mtype == NF_NAT_MANIP_SRC)
 		statusbit = IPS_SRC_NAT;
@@ -497,19 +516,10 @@  unsigned int nf_nat_packet(struct nf_conn *ct,
 		statusbit ^= IPS_NAT_MASK;
 
 	/* Non-atomic: these bits don't change. */
-	if (ct->status & statusbit) {
-		struct nf_conntrack_tuple target;
-
-		/* We are aiming to look like inverse of other direction. */
-		nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+	if (ct->status & statusbit)
+		verdict = nf_nat_manip_pkt(skb, ct, mtype, dir);
 
-		l3proto = __nf_nat_l3proto_find(target.src.l3num);
-		l4proto = __nf_nat_l4proto_find(target.src.l3num,
-						target.dst.protonum);
-		if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype))
-			return NF_DROP;
-	}
-	return NF_ACCEPT;
+	return verdict;
 }
 EXPORT_SYMBOL_GPL(nf_nat_packet);
 
@@ -806,6 +816,7 @@  struct nf_nat_hook nat_hook = {
 #ifdef CONFIG_XFRM
 	.decode_session		= __nf_nat_decode_session,
 #endif
+	.manip_pkt		= nf_nat_manip_pkt,
 };
 
 static int __init nf_nat_init(void)
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 74a04638ef03..d9c3f5e39926 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -227,6 +227,30 @@  find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
 	return entry;
 }
 
+static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_ct_hook *ct_hook;
+	struct nf_conn *ct;
+	int err;
+
+	ct = nf_ct_get(entry->skb, &ctinfo);
+	if (ct && !nf_ct_is_confirmed(ct) &&
+	    (verdict == NF_ACCEPT || verdict == NF_STOP)) {
+		rcu_read_lock();
+		ct_hook = rcu_dereference(nf_ct_hook);
+		if (ct_hook) {
+			err = ct_hook->update(entry->state.net, entry->skb,
+					      ct, ctinfo);
+			if (err < 0)
+				verdict = NF_DROP;
+		}
+		rcu_read_unlock();
+	}
+
+	nf_reinject(entry, verdict);
+}
+
 static void
 nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
 {
@@ -237,7 +261,7 @@  nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
 		if (!cmpfn || cmpfn(entry, data)) {
 			list_del(&entry->list);
 			queue->queue_total--;
-			nf_reinject(entry, NF_DROP);
+			nfqnl_reinject(entry, NF_DROP);
 		}
 	}
 	spin_unlock_bh(&queue->lock);
@@ -686,7 +710,7 @@  __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
 err_out_unlock:
 	spin_unlock_bh(&queue->lock);
 	if (failopen)
-		nf_reinject(entry, NF_ACCEPT);
+		nfqnl_reinject(entry, NF_ACCEPT);
 err_out:
 	return err;
 }
@@ -1085,7 +1109,8 @@  static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl,
 	list_for_each_entry_safe(entry, tmp, &batch_list, list) {
 		if (nfqa[NFQA_MARK])
 			entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
-		nf_reinject(entry, verdict);
+
+		nfqnl_reinject(entry, verdict);
 	}
 	return 0;
 }
@@ -1208,7 +1233,7 @@  static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
 	if (nfqa[NFQA_MARK])
 		entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
 
-	nf_reinject(entry, verdict);
+	nfqnl_reinject(entry, verdict);
 	return 0;
 }