netfilter: ipset: export indexes via netlink

Message ID 1530794872-32485-1-git-send-email-florent.fourcot@wifirst.fr
State Under Review
Delegated to: Pablo Neira
Headers show
Series
  • netfilter: ipset: export indexes via netlink
Related show

Commit Message

Florent Fourcot July 5, 2018, 12:47 p.m.
Indexes are exported through getsockopt calls (IP_SET_OP_GET_BYNAME)
and are mandatory for external subsystem using ipset:
  * ipset module of tc-ematch (configured by netlink, but using
    getsockopt before to get index)
  * SET netfilter module

The goal of this patch is to allow one user to use only netlink to get
ipset indexes. However, since `ipset` userspace command does not accept
new/unknow nla (structure didn't change since years), a new flag is
introduced to ask for more data. Currently it adds only indexes, but
application setting the flag should be ready to accept new nla in
future.

Signed-off-by: Florent Fourcot <florent.fourcot@wifirst.fr>
Signed-off-by: Victorien Molle <victorien.molle@wifirst.fr>
---
 include/uapi/linux/netfilter/ipset/ip_set.h |  4 ++++
 net/netfilter/ipset/ip_set_core.c           | 18 +++++++++++++++++-
 2 files changed, 21 insertions(+), 1 deletion(-)

Comments

Jozsef Kadlecsik July 6, 2018, 6:48 a.m. | #1
Hi,

On Thu, 5 Jul 2018, Florent Fourcot wrote:

> Indexes are exported through getsockopt calls (IP_SET_OP_GET_BYNAME)
> and are mandatory for external subsystem using ipset:
>   * ipset module of tc-ematch (configured by netlink, but using
>     getsockopt before to get index)
>   * SET netfilter module
> 
> The goal of this patch is to allow one user to use only netlink to get
> ipset indexes. However, since `ipset` userspace command does not accept
> new/unknow nla (structure didn't change since years), a new flag is
> introduced to ask for more data. Currently it adds only indexes, but
> application setting the flag should be ready to accept new nla in
> future.

Technically I have no problem with your patch. However, it means a 
non-versioned protocol change. I'd like to think about it and check how 
would be best to introduce a version change.

Best regards,
Jozsef
 
> Signed-off-by: Florent Fourcot <florent.fourcot@wifirst.fr>
> Signed-off-by: Victorien Molle <victorien.molle@wifirst.fr>
> ---
>  include/uapi/linux/netfilter/ipset/ip_set.h |  4 ++++
>  net/netfilter/ipset/ip_set_core.c           | 18 +++++++++++++++++-
>  2 files changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
> index 60236f694143..8ef2560ff69e 100644
> --- a/include/uapi/linux/netfilter/ipset/ip_set.h
> +++ b/include/uapi/linux/netfilter/ipset/ip_set.h
> @@ -66,6 +66,8 @@ enum {
>  	IPSET_ATTR_LINENO,	/* 9: Restore lineno */
>  	IPSET_ATTR_PROTOCOL_MIN, /* 10: Minimal supported version number */
>  	IPSET_ATTR_REVISION_MIN	= IPSET_ATTR_PROTOCOL_MIN, /* type rev min */
> +	/* attributes not sent by default (see IPSET_FLAG_EXTRA_DATA) */
> +	IPSET_ATTR_INDEX,   /* 11: Index of the set */
>  	__IPSET_ATTR_CMD_MAX,
>  };
>  #define IPSET_ATTR_CMD_MAX	(__IPSET_ATTR_CMD_MAX - 1)
> @@ -182,6 +184,8 @@ enum ipset_cmd_flags {
>  	IPSET_FLAG_MAP_SKBPRIO = (1 << IPSET_FLAG_BIT_MAP_SKBPRIO),
>  	IPSET_FLAG_BIT_MAP_SKBQUEUE = 10,
>  	IPSET_FLAG_MAP_SKBQUEUE = (1 << IPSET_FLAG_BIT_MAP_SKBQUEUE),
> +	IPSET_FLAG_BIT_EXTRA_DATA = 11,
> +	IPSET_FLAG_EXTRA_DATA = (1 << IPSET_FLAG_BIT_EXTRA_DATA),
>  	IPSET_FLAG_CMD_MAX = 15,
>  };
>  
> diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
> index bc4bd247bb7d..370b79368ddb 100644
> --- a/net/netfilter/ipset/ip_set_core.c
> +++ b/net/netfilter/ipset/ip_set_core.c
> @@ -1409,6 +1409,11 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
>  				goto release_refcount;
>  			if (dump_flags & IPSET_FLAG_LIST_HEADER)
>  				goto next_set;
> +			if (dump_flags & IPSET_FLAG_EXTRA_DATA) {
> +				if (nla_put_u16(skb, IPSET_ATTR_INDEX,
> +						index))
> +					goto nla_put_failure;
> +			}
>  			if (set->variant->uref)
>  				set->variant->uref(set, cb, true);
>  			/* fall through */
> @@ -1695,6 +1700,7 @@ static int ip_set_header(struct net *net, struct sock *ctnl,
>  			 const struct nlattr * const attr[],
>  			 struct netlink_ext_ack *extack)
>  {
> +	ip_set_id_t index;
>  	struct ip_set_net *inst = ip_set_pernet(net);
>  	const struct ip_set *set;
>  	struct sk_buff *skb2;
> @@ -1705,7 +1711,7 @@ static int ip_set_header(struct net *net, struct sock *ctnl,
>  		     !attr[IPSET_ATTR_SETNAME]))
>  		return -IPSET_ERR_PROTOCOL;
>  
> -	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
> +	set = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &index);
>  	if (!set)
>  		return -ENOENT;
>  
> @@ -1723,6 +1729,16 @@ static int ip_set_header(struct net *net, struct sock *ctnl,
>  	    nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
>  	    nla_put_u8(skb2, IPSET_ATTR_REVISION, set->revision))
>  		goto nla_put_failure;
> +
> +	if (attr[IPSET_ATTR_FLAGS]) {
> +		u32 flags = ip_set_get_h32(attr[IPSET_ATTR_FLAGS]);
> +
> +		if (flags & IPSET_FLAG_EXTRA_DATA) {
> +			if (nla_put_u16(skb2, IPSET_ATTR_INDEX, index))
> +				goto nla_put_failure;
> +		}
> +	}
> +
>  	nlmsg_end(skb2, nlh2);
>  
>  	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
> -- 
> 2.11.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florent Fourcot July 16, 2018, 9:34 a.m. | #2
Hello Jozsef,

> 
> Technically I have no problem with your patch. However, it means a
> non-versioned protocol change. I'd like to think about it and check how
> would be best to introduce a version change.
> 

Do you have any update on this? In my opinion, there are already some 
flags to control list output (LIST_SETNAME and LIST_HEADER), and adding 
one more is not really a breaking change.

If you have any hints/idea to improve this patch, I can try to provide a 
new version.

Best regards,

Florent.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jozsef Kadlecsik July 16, 2018, 7:48 p.m. | #3
Hi Florent,

On Mon, 16 Jul 2018, Florent Fourcot wrote:

> > Technically I have no problem with your patch. However, it means a 
> > non-versioned protocol change. I'd like to think about it and check 
> > how would be best to introduce a version change.
> 
> Do you have any update on this? In my opinion, there are already some 
> flags to control list output (LIST_SETNAME and LIST_HEADER), and adding 
> one more is not really a breaking change.

Controlling list output is not the same: kernel versions which do not know 
the flags simply list the whole sets and userspace handles it fine. The 
only drawback is the unnecessary data transfer.

However with your proposed flag, additional data is returned. I'm 
concerned about backward compatilibity. What happens when a new userspace 
tool communicates with an old kernel? The only way to detect the 
incompatility is to check that the anticipated attributes are missing. But 
ipset strictly checks the attributes and reports protocol violations. With 
this solution there's no way to tell the difference between an old kernel 
or broken protocol.

> If you have any hints/idea to improve this patch, I can try to provide a 
> new version.

I want to introduce a new protocol version. Both the kernel and userspace 
have got the very basic parts to support multiple protocols, however have 
never been tested, obviously. Also, I'm thinking on adding two new 
commands to get the set by name and index and not a single one for both 
operations. The whole thing needs time and I'm busy these weeks with a 
cluster migration.

With a new protocol version, new userspace tools can "negotiate" the old 
protocol version with old kernels and can easily fall back to the 
getsockopt solution to get the required data from kernel.

Best regards,
Jozsef
-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index 60236f694143..8ef2560ff69e 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -66,6 +66,8 @@  enum {
 	IPSET_ATTR_LINENO,	/* 9: Restore lineno */
 	IPSET_ATTR_PROTOCOL_MIN, /* 10: Minimal supported version number */
 	IPSET_ATTR_REVISION_MIN	= IPSET_ATTR_PROTOCOL_MIN, /* type rev min */
+	/* attributes not sent by default (see IPSET_FLAG_EXTRA_DATA) */
+	IPSET_ATTR_INDEX,   /* 11: Index of the set */
 	__IPSET_ATTR_CMD_MAX,
 };
 #define IPSET_ATTR_CMD_MAX	(__IPSET_ATTR_CMD_MAX - 1)
@@ -182,6 +184,8 @@  enum ipset_cmd_flags {
 	IPSET_FLAG_MAP_SKBPRIO = (1 << IPSET_FLAG_BIT_MAP_SKBPRIO),
 	IPSET_FLAG_BIT_MAP_SKBQUEUE = 10,
 	IPSET_FLAG_MAP_SKBQUEUE = (1 << IPSET_FLAG_BIT_MAP_SKBQUEUE),
+	IPSET_FLAG_BIT_EXTRA_DATA = 11,
+	IPSET_FLAG_EXTRA_DATA = (1 << IPSET_FLAG_BIT_EXTRA_DATA),
 	IPSET_FLAG_CMD_MAX = 15,
 };
 
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index bc4bd247bb7d..370b79368ddb 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1409,6 +1409,11 @@  ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 				goto release_refcount;
 			if (dump_flags & IPSET_FLAG_LIST_HEADER)
 				goto next_set;
+			if (dump_flags & IPSET_FLAG_EXTRA_DATA) {
+				if (nla_put_u16(skb, IPSET_ATTR_INDEX,
+						index))
+					goto nla_put_failure;
+			}
 			if (set->variant->uref)
 				set->variant->uref(set, cb, true);
 			/* fall through */
@@ -1695,6 +1700,7 @@  static int ip_set_header(struct net *net, struct sock *ctnl,
 			 const struct nlattr * const attr[],
 			 struct netlink_ext_ack *extack)
 {
+	ip_set_id_t index;
 	struct ip_set_net *inst = ip_set_pernet(net);
 	const struct ip_set *set;
 	struct sk_buff *skb2;
@@ -1705,7 +1711,7 @@  static int ip_set_header(struct net *net, struct sock *ctnl,
 		     !attr[IPSET_ATTR_SETNAME]))
 		return -IPSET_ERR_PROTOCOL;
 
-	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
+	set = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &index);
 	if (!set)
 		return -ENOENT;
 
@@ -1723,6 +1729,16 @@  static int ip_set_header(struct net *net, struct sock *ctnl,
 	    nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
 	    nla_put_u8(skb2, IPSET_ATTR_REVISION, set->revision))
 		goto nla_put_failure;
+
+	if (attr[IPSET_ATTR_FLAGS]) {
+		u32 flags = ip_set_get_h32(attr[IPSET_ATTR_FLAGS]);
+
+		if (flags & IPSET_FLAG_EXTRA_DATA) {
+			if (nla_put_u16(skb2, IPSET_ATTR_INDEX, index))
+				goto nla_put_failure;
+		}
+	}
+
 	nlmsg_end(skb2, nlh2);
 
 	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);