Message ID | 20fdc704558880831cbbaa8bba5e4855591cd4ba.1427209409.git.daniel@iogearbox.net |
---|---|
State | Not Applicable |
Delegated to: | Pablo Neira |
Headers | show |
On Tue, Mar 24, 2015 at 04:30:29PM +0100, Daniel Borkmann wrote: > diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c > index 7198d66..cd2468d 100644 > --- a/net/netfilter/xt_cgroup.c > +++ b/net/netfilter/xt_cgroup.c > @@ -16,8 +16,11 @@ > #include <linux/module.h> > #include <linux/netfilter/x_tables.h> > #include <linux/netfilter/xt_cgroup.h> > + > #include <net/sock.h> > > +#include "xt_sk_helper.h" > + > MODULE_LICENSE("GPL"); > MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>"); > MODULE_DESCRIPTION("Xtables: process control group matching"); > @@ -34,38 +37,85 @@ static int cgroup_mt_check(const struct xt_mtchk_param *par) > return 0; > } > > -static bool > -cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) > +static bool cgroup_mt(const struct sk_buff *skb, > + const struct xt_action_param *par, > + struct sock *(*cgroup_mt_slow)(const struct sk_buff *skb, > + const struct net_device *indev)) > { > const struct xt_cgroup_info *info = par->matchinfo; > + struct sock *sk = skb->sk; > + u32 sk_classid; > + > + if (sk) { > + sk_classid = sk->sk_classid; > + } else { > + if (par->in != NULL) > + sk = cgroup_mt_slow(skb, par->in); Is this working with timewait sock? > + if (sk == NULL) > + return false; > + > + sk_classid = sk->sk_classid; > + sock_gen_put(sk); > + } > + > + return (info->id == sk_classid) ^ info->invert; > +} -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/25/2015 05:03 PM, Pablo Neira Ayuso wrote: > On Tue, Mar 24, 2015 at 04:30:29PM +0100, Daniel Borkmann wrote: >> diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c >> index 7198d66..cd2468d 100644 >> --- a/net/netfilter/xt_cgroup.c >> +++ b/net/netfilter/xt_cgroup.c >> @@ -16,8 +16,11 @@ >> #include <linux/module.h> >> #include <linux/netfilter/x_tables.h> >> #include <linux/netfilter/xt_cgroup.h> >> + >> #include <net/sock.h> >> >> +#include "xt_sk_helper.h" >> + >> MODULE_LICENSE("GPL"); >> MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>"); >> MODULE_DESCRIPTION("Xtables: process control group matching"); >> @@ -34,38 +37,85 @@ static int cgroup_mt_check(const struct xt_mtchk_param *par) >> return 0; >> } >> >> -static bool >> -cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) >> +static bool cgroup_mt(const struct sk_buff *skb, >> + const struct xt_action_param *par, >> + struct sock *(*cgroup_mt_slow)(const struct sk_buff *skb, >> + const struct net_device *indev)) >> { >> const struct xt_cgroup_info *info = par->matchinfo; >> + struct sock *sk = skb->sk; >> + u32 sk_classid; >> + >> + if (sk) { >> + sk_classid = sk->sk_classid; >> + } else { >> + if (par->in != NULL) >> + sk = cgroup_mt_slow(skb, par->in); > > Is this working with timewait sock? Yes, all socket objects that are allocated (sk_alloc()) get a sk_classid of the current task. Given that both share the same lookup handler, we don't ignore them here as some xt_socket flags could after the lookup optionally do. >> + if (sk == NULL) >> + return false; >> + >> + sk_classid = sk->sk_classid; >> + sock_gen_put(sk); >> + } >> + >> + return (info->id == sk_classid) ^ info->invert; >> +} -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Mar 25, 2015 at 05:39:04PM +0100, Daniel Borkmann wrote: > On 03/25/2015 05:03 PM, Pablo Neira Ayuso wrote: > >On Tue, Mar 24, 2015 at 04:30:29PM +0100, Daniel Borkmann wrote: > >>diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c > >>index 7198d66..cd2468d 100644 > >>--- a/net/netfilter/xt_cgroup.c > >>+++ b/net/netfilter/xt_cgroup.c > >>@@ -16,8 +16,11 @@ > >> #include <linux/module.h> > >> #include <linux/netfilter/x_tables.h> > >> #include <linux/netfilter/xt_cgroup.h> > >>+ > >> #include <net/sock.h> > >> > >>+#include "xt_sk_helper.h" > >>+ > >> MODULE_LICENSE("GPL"); > >> MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>"); > >> MODULE_DESCRIPTION("Xtables: process control group matching"); > >>@@ -34,38 +37,85 @@ static int cgroup_mt_check(const struct xt_mtchk_param *par) > >> return 0; > >> } > >> > >>-static bool > >>-cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) > >>+static bool cgroup_mt(const struct sk_buff *skb, > >>+ const struct xt_action_param *par, > >>+ struct sock *(*cgroup_mt_slow)(const struct sk_buff *skb, > >>+ const struct net_device *indev)) > >> { > >> const struct xt_cgroup_info *info = par->matchinfo; > >>+ struct sock *sk = skb->sk; > >>+ u32 sk_classid; > >>+ > >>+ if (sk) { > >>+ sk_classid = sk->sk_classid; > >>+ } else { > >>+ if (par->in != NULL) > >>+ sk = cgroup_mt_slow(skb, par->in); > > > >Is this working with timewait sock? > > Yes, all socket objects that are allocated (sk_alloc()) get a > sk_classid of the current task. Given that both share the same > lookup handler, we don't ignore them here as some xt_socket > flags could after the lookup optionally do. I mean, we may get a packet from the input path while in TIME_WAIT, and sk will be actually a inet_timewait_sock, which has a different layout (no sk_classid). > >>+ if (sk == NULL) > >>+ return false; > >>+ > >>+ sk_classid = sk->sk_classid; > >>+ sock_gen_put(sk); > >>+ } > >>+ > >>+ return (info->id == sk_classid) ^ info->invert; > >>+} -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/25/2015 06:17 PM, Pablo Neira Ayuso wrote: ... > I mean, we may get a packet from the input path while in TIME_WAIT, and > sk will be actually a inet_timewait_sock, which has a different > layout (no sk_classid). Sorry, you are correct, thanks for the hint. We would actually need to test if we deal with a full socket, iow sk_fullsock(sk). -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Mar 24, 2015 at 04:30:29PM +0100, Daniel Borkmann wrote: > While originally only being intended for outgoing traffic, commit > a00e76349f35 ("netfilter: x_tables: allow to use cgroup match for > LOCAL_IN nf hooks") enabled xt_cgroups for the NF_INET_LOCAL_IN hook > as well, in order to allow for nfacct accounting. > > This basically was under the assumption that socket early demux will > resolve it. It's correct that demux happens after PRE_ROUTING, but > before LOCAL_IN. > > However, that as-is only partially works, i.e. it works for the case > of established TCP and connected UDP sockets when early demux is > enabled, but not for various other ingress scenarios e.g. unconnected > UDP, request sockets, etc. > > Instead of reverting commit a00e76349f35, I think it's worth to fix > it up as there are applications requiring xt_cgroup to match on > ingress and egress side. In order to do so, we need to perform a > full lookup on skb->sk (ingress) miss, similarly as being done in > xt_socket. > > Therefore, we need to make use of shared helpers xt_sk_lookup() and > xt_sk_lookup6(). Thanks to Daniel for the report and also additional > testing. So this is basically needed when early demux is disabled? This is a rather large rework, I would like to know what scenarios we're not currently catching with the existing code. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/25/2015 09:26 PM, Pablo Neira Ayuso wrote: > On Tue, Mar 24, 2015 at 04:30:29PM +0100, Daniel Borkmann wrote: >> While originally only being intended for outgoing traffic, commit >> a00e76349f35 ("netfilter: x_tables: allow to use cgroup match for >> LOCAL_IN nf hooks") enabled xt_cgroups for the NF_INET_LOCAL_IN hook >> as well, in order to allow for nfacct accounting. >> >> This basically was under the assumption that socket early demux will >> resolve it. It's correct that demux happens after PRE_ROUTING, but >> before LOCAL_IN. >> >> However, that as-is only partially works, i.e. it works for the case >> of established TCP and connected UDP sockets when early demux is >> enabled, but not for various other ingress scenarios e.g. unconnected >> UDP, request sockets, etc. >> >> Instead of reverting commit a00e76349f35, I think it's worth to fix >> it up as there are applications requiring xt_cgroup to match on >> ingress and egress side. In order to do so, we need to perform a >> full lookup on skb->sk (ingress) miss, similarly as being done in >> xt_socket. >> >> Therefore, we need to make use of shared helpers xt_sk_lookup() and >> xt_sk_lookup6(). Thanks to Daniel for the report and also additional >> testing. > > So this is basically needed when early demux is disabled? > > This is a rather large rework, I would like to know what scenarios > we're not currently catching with the existing code. Hm, perhaps Daniel can elaborate better, what I have seen in my testing when xt_cgroup fails to match the cgroup on ingress traffic is i) early demux sysctl disabled, ii) udp on unconnected sockets (which I understand is the majority of udp traffic), iii) tcp and udp (any kind) on localhost communications. Daniel's original report can be found here [1]. Thanks, Daniel [1] http://thread.gmane.org/gmane.linux.network/355527 -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/25/2015 10:34 PM, Daniel Borkmann wrote: > On 03/25/2015 09:26 PM, Pablo Neira Ayuso wrote: >> So this is basically needed when early demux is disabled? >> >> This is a rather large rework, I would like to know what scenarios >> we're not currently catching with the existing code. > > Hm, perhaps Daniel can elaborate better, what I have seen in my > testing when xt_cgroup fails to match the cgroup on ingress traffic > is i) early demux sysctl disabled, ii) udp on unconnected sockets > (which I understand is the majority of udp traffic), iii) tcp and > udp (any kind) on localhost communications. Daniel's original report > can be found here [1]. Currently, ingress matching fails if the xt_cgroup module's match callback is called with skb->sk == NULL, which is the case in the scenarios described above. Also, according to Cong, this is as well always the case if the ingress network device is 'lo'. We want to use xt_cgroup to realize a per-application firewall for both filtering and accounting. For this, being able to catch every network packet that is destined for or originated by a task that is assigned to a certain net_cls CGroup controller is essential. Also, the match has to be effective regardless of the network interface in use. In my tests, Daniel's patches work perfectly fine. Thanks, Daniel -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 971cd75..044bd22 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -960,8 +960,13 @@ config NETFILTER_XT_MATCH_BPF config NETFILTER_XT_MATCH_CGROUP tristate '"control group" match support' + depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED + depends on !NF_CONNTRACK || NF_CONNTRACK + depends on (IPV6 || IPV6=n) depends on CGROUPS + select NF_DEFRAG_IPV4 + select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES select CGROUP_NET_CLASSID ---help--- Socket/process control group matching allows you to match locally diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index 7198d66..cd2468d 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -16,8 +16,11 @@ #include <linux/module.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_cgroup.h> + #include <net/sock.h> +#include "xt_sk_helper.h" + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>"); MODULE_DESCRIPTION("Xtables: process control group matching"); @@ -34,38 +37,85 @@ static int cgroup_mt_check(const struct xt_mtchk_param *par) return 0; } -static bool -cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) +static bool cgroup_mt(const struct sk_buff *skb, + const struct xt_action_param *par, + struct sock *(*cgroup_mt_slow)(const struct sk_buff *skb, + const struct net_device *indev)) { const struct xt_cgroup_info *info = par->matchinfo; + struct sock *sk = skb->sk; + u32 sk_classid; + + if (sk) { + sk_classid = sk->sk_classid; + } else { + if (par->in != NULL) + sk = cgroup_mt_slow(skb, par->in); + if (sk == NULL) + return false; + + sk_classid = sk->sk_classid; + sock_gen_put(sk); + } + + return (info->id == sk_classid) ^ info->invert; +} - if (skb->sk == NULL) - return false; +static bool +cgroup_mt_v4(const struct sk_buff *skb, struct xt_action_param *par) +{ + return cgroup_mt(skb, par, xt_sk_lookup); +} - return (info->id == skb->sk->sk_classid) ^ info->invert; +#ifdef XT_HAVE_IPV6 +static bool +cgroup_mt_v6(const struct sk_buff *skb, struct xt_action_param *par) +{ + return cgroup_mt(skb, par, xt_sk_lookup6); } +#endif -static struct xt_match cgroup_mt_reg __read_mostly = { - .name = "cgroup", - .revision = 0, - .family = NFPROTO_UNSPEC, - .checkentry = cgroup_mt_check, - .match = cgroup_mt, - .matchsize = sizeof(struct xt_cgroup_info), - .me = THIS_MODULE, - .hooks = (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING) | - (1 << NF_INET_LOCAL_IN), +static struct xt_match cgroup_mt_reg[] __read_mostly = { + { + .name = "cgroup", + .revision = 0, + .family = NFPROTO_IPV4, + .checkentry = cgroup_mt_check, + .match = cgroup_mt_v4, + .matchsize = sizeof(struct xt_cgroup_info), + .me = THIS_MODULE, + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_IN), + }, +#ifdef XT_HAVE_IPV6 + { + .name = "cgroup", + .revision = 0, + .family = NFPROTO_IPV6, + .checkentry = cgroup_mt_check, + .match = cgroup_mt_v6, + .matchsize = sizeof(struct xt_cgroup_info), + .me = THIS_MODULE, + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_IN), + } +#endif }; static int __init cgroup_mt_init(void) { - return xt_register_match(&cgroup_mt_reg); + nf_defrag_ipv4_enable(); +#ifdef XT_HAVE_IPV6 + nf_defrag_ipv6_enable(); +#endif + return xt_register_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg)); } static void __exit cgroup_mt_exit(void) { - xt_unregister_match(&cgroup_mt_reg); + xt_unregister_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg)); } module_init(cgroup_mt_init);