Message ID | 213b822a711fb7af77f6ecbdfbe41a079b27ddcb.1427394874.git.daniel@iogearbox.net |
---|---|
State | Changes Requested |
Delegated to: | Pablo Neira |
Headers | show |
On Thu, Mar 26, 2015 at 08:14:48PM +0100, Daniel Borkmann wrote: [...] > However, that as-is only partially works, i.e. it works for the case > of established TCP and connected UDP sockets when early demux is > enabled, but not for various other ingress scenarios: i) early demux > disabled (sysctl), ii) udp on unconnected sockets, iii) tcp and udp > (any kind) on localhost communications. This extension has been around since Dec 2013, I'd rather see a new revision that includes an option --lookup-sock. More comments below. > net/netfilter/Kconfig | 5 +++ > net/netfilter/xt_cgroup.c | 92 +++++++++++++++++++++++++++++++++++++---------- > 2 files changed, 79 insertions(+), 18 deletions(-) > > diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig > index 971cd75..044bd22 100644 > --- a/net/netfilter/Kconfig > +++ b/net/netfilter/Kconfig > @@ -960,8 +960,13 @@ config NETFILTER_XT_MATCH_BPF > > config NETFILTER_XT_MATCH_CGROUP > tristate '"control group" match support' > + depends on NETFILTER_XTABLES why this? I think NETFILTER_ADVANCED is sufficient. > depends on NETFILTER_ADVANCED > + depends on !NF_CONNTRACK || NF_CONNTRACK why conntrack? > + depends on (IPV6 || IPV6=n) Do we depend on any ipv6 symbol? > depends on CGROUPS > + select NF_DEFRAG_IPV4 > + select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES No need for defrag either. Please, revisit the Kconfig trickery. > select CGROUP_NET_CLASSID > ---help--- > Socket/process control group matching allows you to match locally > diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c > index 7198d66..17f5a98 100644 > --- a/net/netfilter/xt_cgroup.c > +++ b/net/netfilter/xt_cgroup.c > @@ -16,14 +16,20 @@ > #include <linux/module.h> > #include <linux/netfilter/x_tables.h> > #include <linux/netfilter/xt_cgroup.h> > + > #include <net/sock.h> > > +#include "xt_sk_helper.h" > + > MODULE_LICENSE("GPL"); > MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>"); > MODULE_DESCRIPTION("Xtables: process control group matching"); > MODULE_ALIAS("ipt_cgroup"); > MODULE_ALIAS("ip6t_cgroup"); > > +typedef struct sock *(*cgroup_lookup_t)(const struct sk_buff *skb, > + const struct net_device *indev); > + > static int cgroup_mt_check(const struct xt_mtchk_param *par) > { > struct xt_cgroup_info *info = par->matchinfo; > @@ -34,38 +40,88 @@ static int cgroup_mt_check(const struct xt_mtchk_param *par) > return 0; > } > > -static bool > -cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) > +static bool cgroup_mt(const struct sk_buff *skb, > + const struct xt_action_param *par, > + cgroup_lookup_t cgroup_mt_slow) > { > const struct xt_cgroup_info *info = par->matchinfo; > + struct sock *sk = skb->sk; > + u32 sk_classid; > > - if (skb->sk == NULL) > - return false; > + if (sk) { > + sk_classid = sk->sk_classid; > + } else { > + if (par->in != NULL) > + sk = cgroup_mt_slow(skb, par->in); > + if (sk == NULL) > + return false; > + if (!sk_fullsock(sk)) { > + sock_gen_put(sk); > + return false; > + } > + > + sk_classid = sk->sk_classid; > + sock_gen_put(sk); > + } > + > + return (info->id == sk_classid) ^ info->invert; > +} > > - return (info->id == skb->sk->sk_classid) ^ info->invert; > +static bool > +cgroup_mt_v4(const struct sk_buff *skb, struct xt_action_param *par) > +{ > + return cgroup_mt(skb, par, xt_sk_lookup); > +} > + > +#ifdef XT_HAVE_IPV6 Please, kill this custom XT_HAVE_IPV6 and now use IS_ENABLED(NF_SOCK_IPV6) > +static bool > +cgroup_mt_v6(const struct sk_buff *skb, struct xt_action_param *par) > +{ > + return cgroup_mt(skb, par, xt_sk_lookup6); > } > +#endif > > -static struct xt_match cgroup_mt_reg __read_mostly = { > - .name = "cgroup", > - .revision = 0, > - .family = NFPROTO_UNSPEC, > - .checkentry = cgroup_mt_check, > - .match = cgroup_mt, > - .matchsize = sizeof(struct xt_cgroup_info), > - .me = THIS_MODULE, > - .hooks = (1 << NF_INET_LOCAL_OUT) | > - (1 << NF_INET_POST_ROUTING) | > - (1 << NF_INET_LOCAL_IN), > +static struct xt_match cgroup_mt_reg[] __read_mostly = { > + { > + .name = "cgroup", > + .revision = 0, > + .family = NFPROTO_IPV4, > + .checkentry = cgroup_mt_check, > + .match = cgroup_mt_v4, > + .matchsize = sizeof(struct xt_cgroup_info), > + .me = THIS_MODULE, > + .hooks = (1 << NF_INET_LOCAL_OUT) | > + (1 << NF_INET_POST_ROUTING) | > + (1 << NF_INET_LOCAL_IN), > + }, > +#ifdef XT_HAVE_IPV6 > + { > + .name = "cgroup", > + .revision = 0, > + .family = NFPROTO_IPV6, > + .checkentry = cgroup_mt_check, > + .match = cgroup_mt_v6, > + .matchsize = sizeof(struct xt_cgroup_info), > + .me = THIS_MODULE, > + .hooks = (1 << NF_INET_LOCAL_OUT) | > + (1 << NF_INET_POST_ROUTING) | > + (1 << NF_INET_LOCAL_IN), > + } > +#endif > }; > > static int __init cgroup_mt_init(void) > { > - return xt_register_match(&cgroup_mt_reg); > + nf_defrag_ipv4_enable(); Why did you add this? > +#ifdef XT_HAVE_IPV6 > + nf_defrag_ipv6_enable(); > +#endif > + return xt_register_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg)); > } > > static void __exit cgroup_mt_exit(void) > { > - xt_unregister_match(&cgroup_mt_reg); > + xt_unregister_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg)); > } > > module_init(cgroup_mt_init); > -- > 1.9.3 > > -- > To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Mar 27, 2015 at 01:14:08AM +0100, Pablo Neira Ayuso wrote: > > diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig > > index 971cd75..044bd22 100644 > > --- a/net/netfilter/Kconfig > > +++ b/net/netfilter/Kconfig > > @@ -960,8 +960,13 @@ config NETFILTER_XT_MATCH_BPF > > > > config NETFILTER_XT_MATCH_CGROUP > > tristate '"control group" match support' > > + depends on NETFILTER_XTABLES > > why this? I think NETFILTER_ADVANCED is sufficient. > > > depends on NETFILTER_ADVANCED > > + depends on !NF_CONNTRACK || NF_CONNTRACK > > why conntrack? > > > + depends on (IPV6 || IPV6=n) > > Do we depend on any ipv6 symbol? > > > depends on CGROUPS > > + select NF_DEFRAG_IPV4 > > + select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES > > No need for defrag either. Wait, now I see why you need this. What started a simple cgroup match extension is turning into a more complicated thing. And you want to do firewalling with this, which doesn't work for other socket families than TCP and UDP. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/27/2015 01:14 AM, Pablo Neira Ayuso wrote: > On Thu, Mar 26, 2015 at 08:14:48PM +0100, Daniel Borkmann wrote: > [...] >> However, that as-is only partially works, i.e. it works for the case >> of established TCP and connected UDP sockets when early demux is >> enabled, but not for various other ingress scenarios: i) early demux >> disabled (sysctl), ii) udp on unconnected sockets, iii) tcp and udp >> (any kind) on localhost communications. > > This extension has been around since Dec 2013, I'd rather see a new > revision that includes an option --lookup-sock. Okay, I'm totally fine with that. Please note, the commit I'm trying to fix is _not_ the original xt_cgroup inclusion, but rather a00e76349f35 ("netfilter: x_tables: allow to use cgroup match for LOCAL_IN nf hooks"), which is March 2014, fwiw. > More comments below. ... >> +#ifdef XT_HAVE_IPV6 > > Please, kill this custom XT_HAVE_IPV6 and now use IS_ENABLED(NF_SOCK_IPV6) Will do, thanks. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/27/2015 03:10 AM, Pablo Neira Ayuso wrote: > On Fri, Mar 27, 2015 at 01:14:08AM +0100, Pablo Neira Ayuso wrote: >>> diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig >>> index 971cd75..044bd22 100644 >>> --- a/net/netfilter/Kconfig >>> +++ b/net/netfilter/Kconfig >>> @@ -960,8 +960,13 @@ config NETFILTER_XT_MATCH_BPF >>> >>> config NETFILTER_XT_MATCH_CGROUP >>> tristate '"control group" match support' >>> + depends on NETFILTER_XTABLES >> >> why this? I think NETFILTER_ADVANCED is sufficient. >> >>> depends on NETFILTER_ADVANCED >>> + depends on !NF_CONNTRACK || NF_CONNTRACK >> >> why conntrack? >> >>> + depends on (IPV6 || IPV6=n) >> >> Do we depend on any ipv6 symbol? >> >>> depends on CGROUPS >>> + select NF_DEFRAG_IPV4 >>> + select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES >> >> No need for defrag either. > > Wait, now I see why you need this. > > What started a simple cgroup match extension is turning into a more > complicated thing. And you want to do firewalling with this, which > doesn't work for other socket families than TCP and UDP. Right, so for me it started out as a simple outgoing match extension for skb->sk and this should be protocol agnostic, for example, SCTP sets the skb owner in its output path, so the cgroup id would work there, too. (That should be the case for every protocol that's doing proper socket accounting.) People have since then seen a use case for accounting, so support was added for local-in (which we try to fix), where it's being used in Tizen OS apparently, but the idea for realizing a per-application, per-container, ... firewall for both filtering and accounting sounds appealing to me. So, I'd like to get this right for iptables and am also eager to help out fixing this in nft. I was thinking that if we add --lookup-sock in a second revision, the man-page would _clearly_ need to describe that when being used w/o the lookup option, it only works for protocols making use of early demuxes on ingress, and when being being used with the lookup option, we would have TCP/UDP covered on ingress. Would that be fine as a start to have this documented? Or, would nft also require niche protocols like SCTP/DCCP to be supported for the lookup up-front? What I've seen so far is, that besides the basic xt_sctp matching, the perhaps biggest request SCTP users might have, is that association tracking currently is missing for the conntracker and ipvs to make their multi-homed use-cases work, but I guess I'm starting to get off-topic. :) Thanks, Daniel -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Mar 27, 2015 at 10:48:51AM +0100, Daniel Borkmann wrote: > On 03/27/2015 03:10 AM, Pablo Neira Ayuso wrote: > > > > What started a simple cgroup match extension is turning into a more > > complicated thing. And you want to do firewalling with this, which > > doesn't work for other socket families than TCP and UDP. > > Right, so for me it started out as a simple outgoing match extension > for skb->sk and this should be protocol agnostic, for example, SCTP > sets the skb owner in its output path, so the cgroup id would work > there, too. (That should be the case for every protocol that's doing > proper socket accounting.) > > People have since then seen a use case for accounting, so support > was added for local-in (which we try to fix), where it's being used > in Tizen OS apparently, but the idea for realizing a per-application, > per-container, ... firewall for both filtering and accounting sounds > appealing to me. Yes, but the more I look into this the more I'm convinced that the nf socket infrastructure was not designed for generic socket-based firewalling. This is basically there for TPROXY and very simple socket filtering scenarios. This really needs more thinking. > So, I'd like to get this right for iptables and am also eager to help > out fixing this in nft. I'm just going to send two-liner patch for nft to get this working at least in the limited supported scenarios that we already have. > I was thinking that if we add --lookup-sock in a second revision, > the man-page would _clearly_ need to describe that when being used > w/o the lookup option, it only works for protocols making use of > early demuxes on ingress, and when being being used with the lookup > option, we would have TCP/UDP covered on ingress. Not even that, it seems to me this will not work for UDP multicast either. > Would that be fine as a start to have this documented? I think this is not going to work the way users expect, so I would either schedule INPUT cgroup filtering for removal (to get this aligned with the owner match) or document how limited this is. > Or, would nft also require niche protocols like SCTP/DCCP to be > supported for the lookup up-front? > > What I've seen so far is, that besides the basic xt_sctp matching, > the perhaps biggest request SCTP users might have, is that association > tracking currently is missing for the conntracker and ipvs to make > their multi-homed use-cases work, but I guess I'm starting to get > off-topic. :) Yes, that's a different front ;-). -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/27/2015 11:47 AM, Pablo Neira Ayuso wrote: > On Fri, Mar 27, 2015 at 10:48:51AM +0100, Daniel Borkmann wrote: >> On 03/27/2015 03:10 AM, Pablo Neira Ayuso wrote: >>> >>> What started a simple cgroup match extension is turning into a more >>> complicated thing. And you want to do firewalling with this, which >>> doesn't work for other socket families than TCP and UDP. >> >> Right, so for me it started out as a simple outgoing match extension >> for skb->sk and this should be protocol agnostic, for example, SCTP >> sets the skb owner in its output path, so the cgroup id would work >> there, too. (That should be the case for every protocol that's doing >> proper socket accounting.) >> >> People have since then seen a use case for accounting, so support >> was added for local-in (which we try to fix), where it's being used >> in Tizen OS apparently, but the idea for realizing a per-application, >> per-container, ... firewall for both filtering and accounting sounds >> appealing to me. > > Yes, but the more I look into this the more I'm convinced that the nf > socket infrastructure was not designed for generic socket-based > firewalling. > > This is basically there for TPROXY and very simple socket filtering > scenarios. This really needs more thinking. Okay, I understand, if we can come up with a better, more generic solution to this use-case, I'm all for it. >> So, I'd like to get this right for iptables and am also eager to help >> out fixing this in nft. > > I'm just going to send two-liner patch for nft to get this working at > least in the limited supported scenarios that we already have. Okay. Thanks again, Daniel -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 971cd75..044bd22 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -960,8 +960,13 @@ config NETFILTER_XT_MATCH_BPF config NETFILTER_XT_MATCH_CGROUP tristate '"control group" match support' + depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED + depends on !NF_CONNTRACK || NF_CONNTRACK + depends on (IPV6 || IPV6=n) depends on CGROUPS + select NF_DEFRAG_IPV4 + select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES select CGROUP_NET_CLASSID ---help--- Socket/process control group matching allows you to match locally diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index 7198d66..17f5a98 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -16,14 +16,20 @@ #include <linux/module.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_cgroup.h> + #include <net/sock.h> +#include "xt_sk_helper.h" + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>"); MODULE_DESCRIPTION("Xtables: process control group matching"); MODULE_ALIAS("ipt_cgroup"); MODULE_ALIAS("ip6t_cgroup"); +typedef struct sock *(*cgroup_lookup_t)(const struct sk_buff *skb, + const struct net_device *indev); + static int cgroup_mt_check(const struct xt_mtchk_param *par) { struct xt_cgroup_info *info = par->matchinfo; @@ -34,38 +40,88 @@ static int cgroup_mt_check(const struct xt_mtchk_param *par) return 0; } -static bool -cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) +static bool cgroup_mt(const struct sk_buff *skb, + const struct xt_action_param *par, + cgroup_lookup_t cgroup_mt_slow) { const struct xt_cgroup_info *info = par->matchinfo; + struct sock *sk = skb->sk; + u32 sk_classid; - if (skb->sk == NULL) - return false; + if (sk) { + sk_classid = sk->sk_classid; + } else { + if (par->in != NULL) + sk = cgroup_mt_slow(skb, par->in); + if (sk == NULL) + return false; + if (!sk_fullsock(sk)) { + sock_gen_put(sk); + return false; + } + + sk_classid = sk->sk_classid; + sock_gen_put(sk); + } + + return (info->id == sk_classid) ^ info->invert; +} - return (info->id == skb->sk->sk_classid) ^ info->invert; +static bool +cgroup_mt_v4(const struct sk_buff *skb, struct xt_action_param *par) +{ + return cgroup_mt(skb, par, xt_sk_lookup); +} + +#ifdef XT_HAVE_IPV6 +static bool +cgroup_mt_v6(const struct sk_buff *skb, struct xt_action_param *par) +{ + return cgroup_mt(skb, par, xt_sk_lookup6); } +#endif -static struct xt_match cgroup_mt_reg __read_mostly = { - .name = "cgroup", - .revision = 0, - .family = NFPROTO_UNSPEC, - .checkentry = cgroup_mt_check, - .match = cgroup_mt, - .matchsize = sizeof(struct xt_cgroup_info), - .me = THIS_MODULE, - .hooks = (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING) | - (1 << NF_INET_LOCAL_IN), +static struct xt_match cgroup_mt_reg[] __read_mostly = { + { + .name = "cgroup", + .revision = 0, + .family = NFPROTO_IPV4, + .checkentry = cgroup_mt_check, + .match = cgroup_mt_v4, + .matchsize = sizeof(struct xt_cgroup_info), + .me = THIS_MODULE, + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_IN), + }, +#ifdef XT_HAVE_IPV6 + { + .name = "cgroup", + .revision = 0, + .family = NFPROTO_IPV6, + .checkentry = cgroup_mt_check, + .match = cgroup_mt_v6, + .matchsize = sizeof(struct xt_cgroup_info), + .me = THIS_MODULE, + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_IN), + } +#endif }; static int __init cgroup_mt_init(void) { - return xt_register_match(&cgroup_mt_reg); + nf_defrag_ipv4_enable(); +#ifdef XT_HAVE_IPV6 + nf_defrag_ipv6_enable(); +#endif + return xt_register_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg)); } static void __exit cgroup_mt_exit(void) { - xt_unregister_match(&cgroup_mt_reg); + xt_unregister_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg)); } module_init(cgroup_mt_init);