diff mbox

[9/9] netfilter: implement xt_cgroup cgroup2 path match

Message ID 1448122441-9335-10-git-send-email-tj@kernel.org
State Changes Requested
Delegated to: Pablo Neira
Headers show

Commit Message

Tejun Heo Nov. 21, 2015, 4:14 p.m. UTC
This patch implements xt_cgroup path match which matches cgroup2
membership of the associated socket.  The match is recursive and
invertible.

For rationales on introducing another cgroup based match, please refer
to a preceding commit "sock, cgroup: add sock->sk_cgroup".

v3: Folded into xt_cgroup as a new revision interface as suggested by
    Pablo.

v2: Included linux/limits.h from xt_cgroup2.h for PATH_MAX.  Added
    explicit alignment to the priv field.  Both suggested by Jan.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Daniel Wagner <daniel.wagner@bmw-carit.de>
CC: Neil Horman <nhorman@tuxdriver.com>
Cc: Jan Engelhardt <jengelh@inai.de>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/xt_cgroup.h | 13 ++++++
 net/netfilter/xt_cgroup.c                | 69 ++++++++++++++++++++++++++++++++
 2 files changed, 82 insertions(+)

Comments

Florian Westphal Nov. 21, 2015, 4:56 p.m. UTC | #1
Tejun Heo <tj@kernel.org> wrote:
> This patch implements xt_cgroup path match which matches cgroup2
> membership of the associated socket.  The match is recursive and
> invertible.
> 
> For rationales on introducing another cgroup based match, please refer
> to a preceding commit "sock, cgroup: add sock->sk_cgroup".
> 
> v3: Folded into xt_cgroup as a new revision interface as suggested by
>     Pablo.
> 
> v2: Included linux/limits.h from xt_cgroup2.h for PATH_MAX.  Added
>     explicit alignment to the priv field.  Both suggested by Jan.
> 
> Signed-off-by: Tejun Heo <tj@kernel.org>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Cc: Daniel Wagner <daniel.wagner@bmw-carit.de>
> CC: Neil Horman <nhorman@tuxdriver.com>
> Cc: Jan Engelhardt <jengelh@inai.de>
> Cc: Pablo Neira Ayuso <pablo@netfilter.org>
> ---
>  include/uapi/linux/netfilter/xt_cgroup.h | 13 ++++++
>  net/netfilter/xt_cgroup.c                | 69 ++++++++++++++++++++++++++++++++
>  2 files changed, 82 insertions(+)
> 
> diff --git a/include/uapi/linux/netfilter/xt_cgroup.h b/include/uapi/linux/netfilter/xt_cgroup.h
> index 577c9e0..1e4b37b 100644
> --- a/include/uapi/linux/netfilter/xt_cgroup.h
> +++ b/include/uapi/linux/netfilter/xt_cgroup.h
> @@ -2,10 +2,23 @@
>  #define _UAPI_XT_CGROUP_H
>  
>  #include <linux/types.h>
> +#include <linux/limits.h>
>  
>  struct xt_cgroup_info_v0 {
>  	__u32 id;
>  	__u32 invert;
>  };
>  
> +struct xt_cgroup_info_v1 {
> +	__u8		has_path;
> +	__u8		has_classid;
> +	__u8		invert_path;
> +	__u8		invert_classid;
> +	char		path[PATH_MAX];
> +	__u32		classid;
> +
> +	/* kernel internal data */
> +	void		*priv __attribute__((aligned(8)));
> +};

Ahem.  Am I reading this right? This struct is > 4k in size?
If so -- Ugh.  Does sizeof(path) really have to be PATH_MAX?

Thanks!
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tejun Heo Nov. 21, 2015, 5:04 p.m. UTC | #2
Hello,

On Sat, Nov 21, 2015 at 05:56:06PM +0100, Florian Westphal wrote:
> > +struct xt_cgroup_info_v1 {
> > +	__u8		has_path;
> > +	__u8		has_classid;
> > +	__u8		invert_path;
> > +	__u8		invert_classid;
> > +	char		path[PATH_MAX];
> > +	__u32		classid;
> > +
> > +	/* kernel internal data */
> > +	void		*priv __attribute__((aligned(8)));
> > +};
> 
> Ahem.  Am I reading this right? This struct is > 4k in size?
> If so -- Ugh.  Does sizeof(path) really have to be PATH_MAX?

Hmmm... yeap but would this be an acutual problem?  We can try to make
it shorter but idk it ultimately is a path.  Another solution would be
trying to pass inode around but that is problematic with showing and
printing rules as the only way to reverse-map inode to path is walking
the tree and the cgroup may already be gone at that point.  While >4k
struct isn't pretty, this looks like the path of least resistance.

Thanks.
Florian Westphal Nov. 21, 2015, 6:54 p.m. UTC | #3
Tejun Heo <tj@kernel.org> wrote:
> On Sat, Nov 21, 2015 at 05:56:06PM +0100, Florian Westphal wrote:
> > > +struct xt_cgroup_info_v1 {
> > > +	__u8		has_path;
> > > +	__u8		has_classid;
> > > +	__u8		invert_path;
> > > +	__u8		invert_classid;
> > > +	char		path[PATH_MAX];
> > > +	__u32		classid;
> > > +
> > > +	/* kernel internal data */
> > > +	void		*priv __attribute__((aligned(8)));
> > > +};
> > 
> > Ahem.  Am I reading this right? This struct is > 4k in size?
> > If so -- Ugh.  Does sizeof(path) really have to be PATH_MAX?
> 
> Hmmm... yeap but would this be an acutual problem?

Since rule blob can be allocated via vmalloc i guess "no", its not
really a problem unless someone needs realy insane amount of such rules.

I don't have any better suggestion, so I guess its necessary evil.

The only other question I have is wheter PATH_MAX might be a possible
ABI breaker in future.  It would have to be guaranteed that this is the
same size forever, else you'd get strange errors on rule insertion if
the sizes of the kernel and userspace version differs.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Engelhardt Nov. 21, 2015, 8:26 p.m. UTC | #4
On Saturday 2015-11-21 19:54, Florian Westphal wrote:
>
>The only other question I have is wheter PATH_MAX might be a possible
>ABI breaker in future.  It would have to be guaranteed that this is the
>same size forever, else you'd get strange errors on rule insertion if
>the sizes of the kernel and userspace version differs.
>
The same goes for IFNAMSIZ. But, so far, nobody changed it in the kernel,
even though there are voices that 15 characters + '\0' were a tight choice.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Daniel Wagner Nov. 23, 2015, 12:43 p.m. UTC | #5
Hi Tejun,

On 11/21/2015 05:14 PM, Tejun Heo wrote:> +static int
> cgroup_mt_check_v1(const struct xt_mtchk_param *par)
> +{
> +	struct xt_cgroup_info_v1 *info = par->matchinfo;
> +	struct cgroup *cgrp;
> +
> +	if ((info->invert_path & ~1) || (info->invert_classid & ~1))
> +		return -EINVAL;

The checks below use pr_info() in case the configuration is not valid.
Is this missing here on purpose?

I have tested it slightly and it seems to work (also on an older
kernel). I don't know if that qualifies it for a Tested-by but at least
Acked-by should do the trick:

Tested-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Acked-by: Daniel Wagner <daniel.wagner@bmw-carit.de>

cheers,
daniel
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Daniel Borkmann Nov. 23, 2015, 1:43 p.m. UTC | #6
On 11/21/2015 07:54 PM, Florian Westphal wrote:
> Tejun Heo <tj@kernel.org> wrote:
>> On Sat, Nov 21, 2015 at 05:56:06PM +0100, Florian Westphal wrote:
>>>> +struct xt_cgroup_info_v1 {
>>>> +	__u8		has_path;
>>>> +	__u8		has_classid;
>>>> +	__u8		invert_path;
>>>> +	__u8		invert_classid;
>>>> +	char		path[PATH_MAX];
>>>> +	__u32		classid;
>>>> +
>>>> +	/* kernel internal data */
>>>> +	void		*priv __attribute__((aligned(8)));
>>>> +};
>>>
>>> Ahem.  Am I reading this right? This struct is > 4k in size?
>>> If so -- Ugh.  Does sizeof(path) really have to be PATH_MAX?
>>
>> Hmmm... yeap but would this be an acutual problem?
>
> Since rule blob can be allocated via vmalloc i guess "no", its not
> really a problem unless someone needs realy insane amount of such rules.
>
> I don't have any better suggestion, so I guess its necessary evil.
>
> The only other question I have is wheter PATH_MAX might be a possible
> ABI breaker in future.  It would have to be guaranteed that this is the
> same size forever, else you'd get strange errors on rule insertion if
> the sizes of the kernel and userspace version differs.

Haven't looked deeply into kernfs, but if it's possible to get the object
from the struct file eventually, you could let iptables frontend open that
path and just pass the fd down. Would be sizeof(int) vs PATH_MAX then, i.e.
when you have a large number of rules to load.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Daniel Borkmann Nov. 23, 2015, 1:51 p.m. UTC | #7
On 11/23/2015 02:43 PM, Daniel Borkmann wrote:
> On 11/21/2015 07:54 PM, Florian Westphal wrote:
>> Tejun Heo <tj@kernel.org> wrote:
>>> On Sat, Nov 21, 2015 at 05:56:06PM +0100, Florian Westphal wrote:
>>>>> +struct xt_cgroup_info_v1 {
>>>>> +    __u8        has_path;
>>>>> +    __u8        has_classid;
>>>>> +    __u8        invert_path;
>>>>> +    __u8        invert_classid;
>>>>> +    char        path[PATH_MAX];
>>>>> +    __u32        classid;
>>>>> +
>>>>> +    /* kernel internal data */
>>>>> +    void        *priv __attribute__((aligned(8)));
>>>>> +};
>>>>
>>>> Ahem.  Am I reading this right? This struct is > 4k in size?
>>>> If so -- Ugh.  Does sizeof(path) really have to be PATH_MAX?
>>>
>>> Hmmm... yeap but would this be an acutual problem?
>>
>> Since rule blob can be allocated via vmalloc i guess "no", its not
>> really a problem unless someone needs realy insane amount of such rules.
>>
>> I don't have any better suggestion, so I guess its necessary evil.
>>
>> The only other question I have is wheter PATH_MAX might be a possible
>> ABI breaker in future.  It would have to be guaranteed that this is the
>> same size forever, else you'd get strange errors on rule insertion if
>> the sizes of the kernel and userspace version differs.
>
> Haven't looked deeply into kernfs, but if it's possible to get the object
> from the struct file eventually, you could let iptables frontend open that
> path and just pass the fd down. Would be sizeof(int) vs PATH_MAX then, i.e.
> when you have a large number of rules to load.

( ... but with the downside that things like save/restore wouldn't work. )
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tejun Heo Nov. 23, 2015, 3:40 p.m. UTC | #8
Hello, Daniel.

On Mon, Nov 23, 2015 at 02:43:12PM +0100, Daniel Borkmann wrote:
...
> Haven't looked deeply into kernfs, but if it's possible to get the object
> from the struct file eventually, you could let iptables frontend open that
> path and just pass the fd down. Would be sizeof(int) vs PATH_MAX then, i.e.
> when you have a large number of rules to load.

That works in one direction but not in the other.  ie. You can tell
the kernel the path that way but can't retrieve.  If using path string
is unacceptable, the best alternative would be an inode number rather
than a fd; however, using an inode number would be quite cumbersome
and painful too.

Thanks.
Tejun Heo Nov. 23, 2015, 3:41 p.m. UTC | #9
Hello,

On Mon, Nov 23, 2015 at 01:43:01PM +0100, Daniel Wagner wrote:
> Hi Tejun,
> 
> On 11/21/2015 05:14 PM, Tejun Heo wrote:> +static int
> > cgroup_mt_check_v1(const struct xt_mtchk_param *par)
> > +{
> > +	struct xt_cgroup_info_v1 *info = par->matchinfo;
> > +	struct cgroup *cgrp;
> > +
> > +	if ((info->invert_path & ~1) || (info->invert_classid & ~1))
> > +		return -EINVAL;
> 
> The checks below use pr_info() in case the configuration is not valid.
> Is this missing here on purpose?

It's mostly copied from v0 function but I think it makes sense.  The
other errors can be caused by incorrect user input but the above one
can't happen unless iptables extension itself is broken.

> I have tested it slightly and it seems to work (also on an older
> kernel). I don't know if that qualifies it for a Tested-by but at least
> Acked-by should do the trick:

Will answer that there.

> Tested-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
> Acked-by: Daniel Wagner <daniel.wagner@bmw-carit.de>

Thanks.
David Laight Nov. 23, 2015, 5:35 p.m. UTC | #10
From: Florian Westphal
> Sent: 21 November 2015 16:56
> > +struct xt_cgroup_info_v1 {
> > +	__u8		has_path;
> > +	__u8		has_classid;
> > +	__u8		invert_path;
> > +	__u8		invert_classid;
> > +	char		path[PATH_MAX];
> > +	__u32		classid;
> > +
> > +	/* kernel internal data */
> > +	void		*priv __attribute__((aligned(8)));
> > +};
> 
> Ahem.  Am I reading this right? This struct is > 4k in size?
> If so -- Ugh.  Does sizeof(path) really have to be PATH_MAX?

I've not looked at the use, but could you put 'char path[];'
as the last member an require any allocations to be long enough
to contain the actual path?

	David

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Engelhardt Nov. 23, 2015, 5:55 p.m. UTC | #11
On Monday 2015-11-23 18:35, David Laight wrote:
>From: Florian Westphal
>> Sent: 21 November 2015 16:56
>> > +struct xt_cgroup_info_v1 {
>> > +	char		path[PATH_MAX];
>> > +	__u32		classid;
>> > +
>> > +	/* kernel internal data */
>> > +	void		*priv __attribute__((aligned(8)));
>> > +};
>> 
>> Ahem.  Am I reading this right? This struct is > 4k in size?
>> If so -- Ugh.  Does sizeof(path) really have to be PATH_MAX?
>
>I've not looked at the use, but could you put 'char path[];'
>as the last member an require any allocations to be long enough
>to contain the actual path?

Oh, smart :)  Yeah, ebt_among does something like that.
(.matchsize = -1, hint)

Except that the "priv" pointer seems to be ruining the fun here -
kernel vars have to be last, which collides with the requirements
for []-type members.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/uapi/linux/netfilter/xt_cgroup.h b/include/uapi/linux/netfilter/xt_cgroup.h
index 577c9e0..1e4b37b 100644
--- a/include/uapi/linux/netfilter/xt_cgroup.h
+++ b/include/uapi/linux/netfilter/xt_cgroup.h
@@ -2,10 +2,23 @@ 
 #define _UAPI_XT_CGROUP_H
 
 #include <linux/types.h>
+#include <linux/limits.h>
 
 struct xt_cgroup_info_v0 {
 	__u32 id;
 	__u32 invert;
 };
 
+struct xt_cgroup_info_v1 {
+	__u8		has_path;
+	__u8		has_classid;
+	__u8		invert_path;
+	__u8		invert_classid;
+	char		path[PATH_MAX];
+	__u32		classid;
+
+	/* kernel internal data */
+	void		*priv __attribute__((aligned(8)));
+};
+
 #endif /* _UAPI_XT_CGROUP_H */
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index 1730025..a086a91 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -34,6 +34,37 @@  static int cgroup_mt_check_v0(const struct xt_mtchk_param *par)
 	return 0;
 }
 
+static int cgroup_mt_check_v1(const struct xt_mtchk_param *par)
+{
+	struct xt_cgroup_info_v1 *info = par->matchinfo;
+	struct cgroup *cgrp;
+
+	if ((info->invert_path & ~1) || (info->invert_classid & ~1))
+		return -EINVAL;
+
+	if (!info->has_path && !info->has_classid) {
+		pr_info("xt_cgroup: no path or classid specified\n");
+		return -EINVAL;
+	}
+
+	if (info->has_path && info->has_classid) {
+		pr_info("xt_cgroup: both path and classid specified\n");
+		return -EINVAL;
+	}
+
+	if (info->has_path) {
+		cgrp = cgroup_get_from_path(info->path);
+		if (IS_ERR(cgrp)) {
+			pr_info("xt_cgroup: invalid path, errno=%ld\n",
+				PTR_ERR(cgrp));
+			return -EINVAL;
+		}
+		info->priv = cgrp;
+	}
+
+	return 0;
+}
+
 static bool
 cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
@@ -46,6 +77,31 @@  cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 		info->invert;
 }
 
+static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_cgroup_info_v1 *info = par->matchinfo;
+	struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data;
+	struct cgroup *ancestor = info->priv;
+
+	if (!skb->sk || !sk_fullsock(skb->sk))
+		return false;
+
+	if (ancestor)
+		return cgroup_is_descendant(sock_cgroup_ptr(skcd), ancestor) ^
+			info->invert_path;
+	else
+		return (info->classid == sock_cgroup_classid(skcd)) ^
+			info->invert_classid;
+}
+
+static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par)
+{
+	struct xt_cgroup_info_v1 *info = par->matchinfo;
+
+	if (info->priv)
+		cgroup_put(info->priv);
+}
+
 static struct xt_match cgroup_mt_reg[] __read_mostly = {
 	{
 		.name		= "cgroup",
@@ -59,6 +115,19 @@  static struct xt_match cgroup_mt_reg[] __read_mostly = {
 				  (1 << NF_INET_POST_ROUTING) |
 				  (1 << NF_INET_LOCAL_IN),
 	},
+	{
+		.name		= "cgroup",
+		.revision	= 1,
+		.family		= NFPROTO_UNSPEC,
+		.checkentry	= cgroup_mt_check_v1,
+		.match		= cgroup_mt_v1,
+		.matchsize	= sizeof(struct xt_cgroup_info_v1),
+		.destroy	= cgroup_mt_destroy_v1,
+		.me		= THIS_MODULE,
+		.hooks		= (1 << NF_INET_LOCAL_OUT) |
+				  (1 << NF_INET_POST_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+	},
 };
 
 static int __init cgroup_mt_init(void)