diff mbox series

[net-next,2/2] net: sched: add em_ipt ematch for calling xtables matches

Message ID 1516699052-19259-3-git-send-email-eyal.birger@gmail.com
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series net: sched: introduce em_ipt ematch | expand

Commit Message

Eyal Birger Jan. 23, 2018, 9:17 a.m. UTC
From: Eyal Birger <eyal@metanetworks.com>

This module allows performing tc classification based on data structures
and implementations provided by netfilter extensions.

Example use case is classification based on the incoming IPSec policy used
during decpsulation using the 'policy' iptables extension (xt_policy).

Signed-off-by: Eyal Birger <eyal@metanetworks.com>

---

This ematch tries its best to provide matches with a similar
environment to the one they are used to; Due to the wide range of criteria
supported by matches, I am not able to test every combination of match and
traffic.
---
 include/uapi/linux/pkt_cls.h             |   3 +-
 include/uapi/linux/tc_ematch/tc_em_ipt.h |  19 +++
 net/sched/Kconfig                        |  10 ++
 net/sched/Makefile                       |   1 +
 net/sched/em_ipt.c                       | 244 +++++++++++++++++++++++++++++++
 5 files changed, 276 insertions(+), 1 deletion(-)
 create mode 100644 include/uapi/linux/tc_ematch/tc_em_ipt.h
 create mode 100644 net/sched/em_ipt.c

Comments

Pablo Neira Ayuso Jan. 23, 2018, 12:15 p.m. UTC | #1
On Tue, Jan 23, 2018 at 11:17:32AM +0200, Eyal Birger wrote:
> From: Eyal Birger <eyal@metanetworks.com>
> 
> This module allows performing tc classification based on data structures
> and implementations provided by netfilter extensions.
> 
> Example use case is classification based on the incoming IPSec policy used
> during decpsulation using the 'policy' iptables extension (xt_policy).
> 
> Signed-off-by: Eyal Birger <eyal@metanetworks.com>
> 
> ---
> 
> This ematch tries its best to provide matches with a similar
> environment to the one they are used to; Due to the wide range of criteria
> supported by matches, I am not able to test every combination of match and
> traffic.

Thanks! No objection to the Netfilter bits.

So I leave it up to the tc maintainers to decide what to do with this.

Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
David Miller Jan. 24, 2018, 9:37 p.m. UTC | #2
From: Eyal Birger <eyal.birger@gmail.com>
Date: Tue, 23 Jan 2018 11:17:32 +0200

> +	network_offset = skb_network_offset(skb);
> +	skb_pull(skb, network_offset);
> +
> +	rcu_read_lock();
> +
> +	if (skb->skb_iif)
> +		indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
> +
> +	nf_hook_state_init(&state, im->hook, im->nfproto, indev ?: skb->dev,
> +			   skb->dev, NULL, em->net, NULL);
> +
> +	acpar.match = im->match;
> +	acpar.matchinfo = im->match_data;
> +	acpar.state = &state;
> +
> +	ret = im->match->match(skb, &acpar);
> +
> +	rcu_read_unlock();
> +
> +	skb_push(skb, network_offset);

If the SKB is shared in any way, this pull/push around the NF hook
invocation is illegal.
Pablo Neira Ayuso Jan. 25, 2018, midnight UTC | #3
On Wed, Jan 24, 2018 at 04:37:16PM -0500, David Miller wrote:
> From: Eyal Birger <eyal.birger@gmail.com>
> Date: Tue, 23 Jan 2018 11:17:32 +0200
> 
> > +	network_offset = skb_network_offset(skb);
> > +	skb_pull(skb, network_offset);
> > +
> > +	rcu_read_lock();
> > +
> > +	if (skb->skb_iif)
> > +		indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
> > +
> > +	nf_hook_state_init(&state, im->hook, im->nfproto, indev ?: skb->dev,
> > +			   skb->dev, NULL, em->net, NULL);
> > +
> > +	acpar.match = im->match;
> > +	acpar.matchinfo = im->match_data;
> > +	acpar.state = &state;
> > +
> > +	ret = im->match->match(skb, &acpar);
> > +
> > +	rcu_read_unlock();
> > +
> > +	skb_push(skb, network_offset);
> 
> If the SKB is shared in any way, this pull/push around the NF hook
> invocation is illegal.

At ingress, skb->data points to the network header, which is what the
xtables matches expect, so these are actually noops, therefore,
skb_pull() and skb_push() can be removed.
Eyal Birger Jan. 26, 2018, 4:24 a.m. UTC | #4
On Thu, Jan 25, 2018 at 2:00 AM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> On Wed, Jan 24, 2018 at 04:37:16PM -0500, David Miller wrote:
>> From: Eyal Birger <eyal.birger@gmail.com>
>> Date: Tue, 23 Jan 2018 11:17:32 +0200
>>
>> > +   network_offset = skb_network_offset(skb);
>> > +   skb_pull(skb, network_offset);
>> > +
>> > +   rcu_read_lock();
>> > +
>> > +   if (skb->skb_iif)
>> > +           indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
>> > +
>> > +   nf_hook_state_init(&state, im->hook, im->nfproto, indev ?: skb->dev,
>> > +                      skb->dev, NULL, em->net, NULL);
>> > +
>> > +   acpar.match = im->match;
>> > +   acpar.matchinfo = im->match_data;
>> > +   acpar.state = &state;
>> > +
>> > +   ret = im->match->match(skb, &acpar);
>> > +
>> > +   rcu_read_unlock();
>> > +
>> > +   skb_push(skb, network_offset);
>>
>> If the SKB is shared in any way, this pull/push around the NF hook
>> invocation is illegal.
>
> At ingress, skb->data points to the network header, which is what the
> xtables matches expect, so these are actually noops, therefore,
> skb_pull() and skb_push() can be removed.

Right. I added those for completeness in supporting the xmit path.
In the xt_policy use-case it is irrelevant as tc is invoked after
encapsulation.

I will submit a v2 without these, asserting the ingress direction.

Note I have followed the example in em_ipset.c, so that might be a
problem there too...

Thanks!
Eyal.
diff mbox series

Patch

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 46c5066..7cafb26d 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -555,7 +555,8 @@  enum {
 #define	TCF_EM_VLAN		6
 #define	TCF_EM_CANID		7
 #define	TCF_EM_IPSET		8
-#define	TCF_EM_MAX		8
+#define	TCF_EM_IPT		9
+#define	TCF_EM_MAX		9
 
 enum {
 	TCF_EM_PROG_TC
diff --git a/include/uapi/linux/tc_ematch/tc_em_ipt.h b/include/uapi/linux/tc_ematch/tc_em_ipt.h
new file mode 100644
index 0000000..aecd252
--- /dev/null
+++ b/include/uapi/linux/tc_ematch/tc_em_ipt.h
@@ -0,0 +1,19 @@ 
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_TC_EM_IPT_H
+#define __LINUX_TC_EM_IPT_H
+
+#include <linux/types.h>
+#include <linux/pkt_cls.h>
+
+enum {
+	TCA_EM_IPT_UNSPEC,
+	TCA_EM_IPT_HOOK,
+	TCA_EM_IPT_MATCH_NAME,
+	TCA_EM_IPT_MATCH_REVISION,
+	TCA_EM_IPT_MATCH_DATA,
+	__TCA_EM_IPT_MAX
+};
+
+#define TCA_EM_IPT_MAX (__TCA_EM_IPT_MAX - 1)
+
+#endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index c03d86a..203e7f7 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -658,6 +658,16 @@  config NET_EMATCH_IPSET
 	  To compile this code as a module, choose M here: the
 	  module will be called em_ipset.
 
+config NET_EMATCH_IPT
+	tristate "IPtables Matches"
+	depends on NET_EMATCH && NETFILTER && NETFILTER_XTABLES
+	---help---
+	  Say Y here to be able to classify packets based on iptables
+	  matches.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_ipt.
+
 config NET_CLS_ACT
 	bool "Actions"
 	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 5b63544..8811d38 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -75,3 +75,4 @@  obj-$(CONFIG_NET_EMATCH_META)	+= em_meta.o
 obj-$(CONFIG_NET_EMATCH_TEXT)	+= em_text.o
 obj-$(CONFIG_NET_EMATCH_CANID)	+= em_canid.o
 obj-$(CONFIG_NET_EMATCH_IPSET)	+= em_ipset.o
+obj-$(CONFIG_NET_EMATCH_IPT)	+= em_ipt.o
diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c
new file mode 100644
index 0000000..9629031
--- /dev/null
+++ b/net/sched/em_ipt.c
@@ -0,0 +1,244 @@ 
+/*
+ * net/sched/em_ipt.c IPtables matches Ematch
+ *
+ * (c) 2018 Eyal Birger <eyal.birger@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/tc_ematch/tc_em_ipt.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/pkt_cls.h>
+#include <net/ip.h>
+
+struct em_ipt_match {
+	const struct xt_match *match;
+	u32 hook;
+	u8 nfproto;
+	u8 match_data[0] __aligned(8);
+};
+
+static int check_match(struct net *net, struct em_ipt_match *im, int mdata_len)
+{
+	struct xt_mtchk_param mtpar = {};
+	union {
+		struct ipt_entry e4;
+		struct ip6t_entry e6;
+	} e = {};
+
+	mtpar.net	= net;
+	mtpar.table	= "filter";
+	mtpar.hook_mask	= 1 << im->hook;
+	mtpar.family	= im->nfproto;
+	mtpar.match	= im->match;
+	mtpar.entryinfo = &e;
+	mtpar.matchinfo	= (void *)im->match_data;
+	return xt_check_match(&mtpar, mdata_len, 0, 0);
+}
+
+static const struct nla_policy em_ipt_policy[TCA_EM_IPT_MAX + 1] = {
+	[TCA_EM_IPT_HOOK]		= { .type = NLA_U32 },
+	[TCA_EM_IPT_MATCH_NAME]		= { .type = NLA_STRING,
+					    .len = XT_EXTENSION_MAXNAMELEN },
+	[TCA_EM_IPT_MATCH_REVISION]	= { .type = NLA_U8 },
+	[TCA_EM_IPT_MATCH_DATA]		= { .type = NLA_UNSPEC },
+};
+
+static int em_ipt_change(struct net *net, __be16 protocol, void *data,
+			 int data_len, struct tcf_ematch *em)
+{
+	struct nlattr *tb[TCA_EM_IPT_MAX + 1];
+	struct em_ipt_match *im = NULL;
+	struct xt_match *match;
+	u8 nfproto, mrev = 0;
+	int mdata_len, ret;
+	const char *mname;
+
+	switch (protocol) {
+	case htons(ETH_P_IP):
+		nfproto = NFPROTO_IPV4;
+		break;
+	case htons(ETH_P_IPV6):
+		nfproto = NFPROTO_IPV6;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = nla_parse(tb, TCA_EM_IPT_MAX, data, data_len, em_ipt_policy,
+			NULL);
+	if (ret < 0)
+		return ret;
+
+	if (!tb[TCA_EM_IPT_HOOK] || !tb[TCA_EM_IPT_MATCH_NAME] ||
+	    !tb[TCA_EM_IPT_MATCH_DATA])
+		return -EINVAL;
+
+	mname = nla_data(tb[TCA_EM_IPT_MATCH_NAME]);
+	if (tb[TCA_EM_IPT_MATCH_REVISION])
+		mrev = nla_get_u8(tb[TCA_EM_IPT_MATCH_REVISION]);
+
+	match = xt_request_find_match(nfproto, mname, mrev);
+	if (IS_ERR(match)) {
+		pr_err("unable to find match %s:%d\n", mname, mrev);
+		return PTR_ERR(match);
+	}
+
+	mdata_len = XT_ALIGN(nla_len(tb[TCA_EM_IPT_MATCH_DATA]));
+	im = kzalloc(sizeof(*im) + mdata_len, GFP_KERNEL);
+	if (!im) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	im->match = match;
+	im->hook = nla_get_u32(tb[TCA_EM_IPT_HOOK]);
+	im->nfproto = nfproto;
+	nla_memcpy(im->match_data, tb[TCA_EM_IPT_MATCH_DATA], mdata_len);
+
+	ret = check_match(net, im, mdata_len);
+	if (ret)
+		goto err;
+
+	em->datalen = sizeof(*im) + mdata_len;
+	em->data = (unsigned long)im;
+	return 0;
+
+err:
+	kfree(im);
+	module_put(match->me);
+	return ret;
+}
+
+static void em_ipt_destroy(struct tcf_ematch *em)
+{
+	struct em_ipt_match *im = (void *)em->data;
+
+	if (!im)
+		return;
+
+	if (im->match->destroy) {
+		struct xt_mtdtor_param par = {
+			.net = em->net,
+			.match = im->match,
+			.matchinfo = im->match_data,
+			.family = im->nfproto
+		};
+		im->match->destroy(&par);
+	}
+	module_put(im->match->me);
+	kfree((void *)im);
+}
+
+static int em_ipt_match(struct sk_buff *skb, struct tcf_ematch *em,
+			struct tcf_pkt_info *info)
+{
+	const struct em_ipt_match *im = (const void *)em->data;
+	struct xt_action_param acpar = {};
+	struct net_device *indev = NULL;
+	struct nf_hook_state state;
+	int ret, network_offset;
+
+	switch (tc_skb_protocol(skb)) {
+	case htons(ETH_P_IP):
+		if (im->nfproto != NFPROTO_IPV4)
+			return 0;
+		if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
+			return 0;
+		acpar.thoff = ip_hdrlen(skb);
+		break;
+	case htons(ETH_P_IPV6):
+		if (im->nfproto != NFPROTO_IPV6)
+			return 0;
+		if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
+			return 0;
+		if (ipv6_find_hdr(skb, &acpar.thoff, -1,
+				  (unsigned short *)&acpar.fragoff, NULL) < 0)
+			return 0;
+		break;
+	default:
+		return 0;
+	}
+
+	network_offset = skb_network_offset(skb);
+	skb_pull(skb, network_offset);
+
+	rcu_read_lock();
+
+	if (skb->skb_iif)
+		indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
+
+	nf_hook_state_init(&state, im->hook, im->nfproto, indev ?: skb->dev,
+			   skb->dev, NULL, em->net, NULL);
+
+	acpar.match = im->match;
+	acpar.matchinfo = im->match_data;
+	acpar.state = &state;
+
+	ret = im->match->match(skb, &acpar);
+
+	rcu_read_unlock();
+
+	skb_push(skb, network_offset);
+	return ret;
+}
+
+static int em_ipt_dump(struct sk_buff *skb, struct tcf_ematch *em)
+{
+	struct em_ipt_match *im = (void *)em->data;
+
+	if (nla_put_u32(skb, TCA_EM_IPT_HOOK, im->hook) < 0)
+		return -EMSGSIZE;
+	if (nla_put_string(skb, TCA_EM_IPT_MATCH_NAME, im->match->name) < 0)
+		return -EMSGSIZE;
+	if (nla_put_u8(skb, TCA_EM_IPT_MATCH_REVISION, im->match->revision) < 0)
+		return -EMSGSIZE;
+	if (nla_put(skb, TCA_EM_IPT_MATCH_DATA,
+		    im->match->usersize ?: im->match->matchsize,
+		    im->match_data) < 0)
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static struct tcf_ematch_ops em_ipt_ops = {
+	.kind	  = TCF_EM_IPT,
+	.change	  = em_ipt_change,
+	.destroy  = em_ipt_destroy,
+	.match	  = em_ipt_match,
+	.dump	  = em_ipt_dump,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_ipt_ops.link)
+};
+
+static int __init init_em_ipt(void)
+{
+	return tcf_em_register(&em_ipt_ops);
+}
+
+static void __exit exit_em_ipt(void)
+{
+	tcf_em_unregister(&em_ipt_ops);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eyal Birger <eyal.birger@gmail.com>");
+MODULE_DESCRIPTION("TC extended match for IPtables matches");
+
+module_init(init_em_ipt);
+module_exit(exit_em_ipt);
+
+MODULE_ALIAS_TCF_EMATCH(TCF_EM_IPT);