diff mbox series

netfilter: ipset: export indexes via netlink

Message ID 1530794872-32485-1-git-send-email-florent.fourcot@wifirst.fr
State Changes Requested
Delegated to: Jozsef Kadlecsik
Headers show
Series netfilter: ipset: export indexes via netlink | expand

Commit Message

Florent Fourcot July 5, 2018, 12:47 p.m. UTC
Indexes are exported through getsockopt calls (IP_SET_OP_GET_BYNAME)
and are mandatory for external subsystem using ipset:
  * ipset module of tc-ematch (configured by netlink, but using
    getsockopt before to get index)
  * SET netfilter module

The goal of this patch is to allow one user to use only netlink to get
ipset indexes. However, since `ipset` userspace command does not accept
new/unknow nla (structure didn't change since years), a new flag is
introduced to ask for more data. Currently it adds only indexes, but
application setting the flag should be ready to accept new nla in
future.

Signed-off-by: Florent Fourcot <florent.fourcot@wifirst.fr>
Signed-off-by: Victorien Molle <victorien.molle@wifirst.fr>
---
 include/uapi/linux/netfilter/ipset/ip_set.h |  4 ++++
 net/netfilter/ipset/ip_set_core.c           | 18 +++++++++++++++++-
 2 files changed, 21 insertions(+), 1 deletion(-)

Comments

Jozsef Kadlecsik July 6, 2018, 6:48 a.m. UTC | #1
Hi,

On Thu, 5 Jul 2018, Florent Fourcot wrote:

> Indexes are exported through getsockopt calls (IP_SET_OP_GET_BYNAME)
> and are mandatory for external subsystem using ipset:
>   * ipset module of tc-ematch (configured by netlink, but using
>     getsockopt before to get index)
>   * SET netfilter module
> 
> The goal of this patch is to allow one user to use only netlink to get
> ipset indexes. However, since `ipset` userspace command does not accept
> new/unknow nla (structure didn't change since years), a new flag is
> introduced to ask for more data. Currently it adds only indexes, but
> application setting the flag should be ready to accept new nla in
> future.

Technically I have no problem with your patch. However, it means a 
non-versioned protocol change. I'd like to think about it and check how 
would be best to introduce a version change.

Best regards,
Jozsef
 
> Signed-off-by: Florent Fourcot <florent.fourcot@wifirst.fr>
> Signed-off-by: Victorien Molle <victorien.molle@wifirst.fr>
> ---
>  include/uapi/linux/netfilter/ipset/ip_set.h |  4 ++++
>  net/netfilter/ipset/ip_set_core.c           | 18 +++++++++++++++++-
>  2 files changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
> index 60236f694143..8ef2560ff69e 100644
> --- a/include/uapi/linux/netfilter/ipset/ip_set.h
> +++ b/include/uapi/linux/netfilter/ipset/ip_set.h
> @@ -66,6 +66,8 @@ enum {
>  	IPSET_ATTR_LINENO,	/* 9: Restore lineno */
>  	IPSET_ATTR_PROTOCOL_MIN, /* 10: Minimal supported version number */
>  	IPSET_ATTR_REVISION_MIN	= IPSET_ATTR_PROTOCOL_MIN, /* type rev min */
> +	/* attributes not sent by default (see IPSET_FLAG_EXTRA_DATA) */
> +	IPSET_ATTR_INDEX,   /* 11: Index of the set */
>  	__IPSET_ATTR_CMD_MAX,
>  };
>  #define IPSET_ATTR_CMD_MAX	(__IPSET_ATTR_CMD_MAX - 1)
> @@ -182,6 +184,8 @@ enum ipset_cmd_flags {
>  	IPSET_FLAG_MAP_SKBPRIO = (1 << IPSET_FLAG_BIT_MAP_SKBPRIO),
>  	IPSET_FLAG_BIT_MAP_SKBQUEUE = 10,
>  	IPSET_FLAG_MAP_SKBQUEUE = (1 << IPSET_FLAG_BIT_MAP_SKBQUEUE),
> +	IPSET_FLAG_BIT_EXTRA_DATA = 11,
> +	IPSET_FLAG_EXTRA_DATA = (1 << IPSET_FLAG_BIT_EXTRA_DATA),
>  	IPSET_FLAG_CMD_MAX = 15,
>  };
>  
> diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
> index bc4bd247bb7d..370b79368ddb 100644
> --- a/net/netfilter/ipset/ip_set_core.c
> +++ b/net/netfilter/ipset/ip_set_core.c
> @@ -1409,6 +1409,11 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
>  				goto release_refcount;
>  			if (dump_flags & IPSET_FLAG_LIST_HEADER)
>  				goto next_set;
> +			if (dump_flags & IPSET_FLAG_EXTRA_DATA) {
> +				if (nla_put_u16(skb, IPSET_ATTR_INDEX,
> +						index))
> +					goto nla_put_failure;
> +			}
>  			if (set->variant->uref)
>  				set->variant->uref(set, cb, true);
>  			/* fall through */
> @@ -1695,6 +1700,7 @@ static int ip_set_header(struct net *net, struct sock *ctnl,
>  			 const struct nlattr * const attr[],
>  			 struct netlink_ext_ack *extack)
>  {
> +	ip_set_id_t index;
>  	struct ip_set_net *inst = ip_set_pernet(net);
>  	const struct ip_set *set;
>  	struct sk_buff *skb2;
> @@ -1705,7 +1711,7 @@ static int ip_set_header(struct net *net, struct sock *ctnl,
>  		     !attr[IPSET_ATTR_SETNAME]))
>  		return -IPSET_ERR_PROTOCOL;
>  
> -	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
> +	set = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &index);
>  	if (!set)
>  		return -ENOENT;
>  
> @@ -1723,6 +1729,16 @@ static int ip_set_header(struct net *net, struct sock *ctnl,
>  	    nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
>  	    nla_put_u8(skb2, IPSET_ATTR_REVISION, set->revision))
>  		goto nla_put_failure;
> +
> +	if (attr[IPSET_ATTR_FLAGS]) {
> +		u32 flags = ip_set_get_h32(attr[IPSET_ATTR_FLAGS]);
> +
> +		if (flags & IPSET_FLAG_EXTRA_DATA) {
> +			if (nla_put_u16(skb2, IPSET_ATTR_INDEX, index))
> +				goto nla_put_failure;
> +		}
> +	}
> +
>  	nlmsg_end(skb2, nlh2);
>  
>  	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
> -- 
> 2.11.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florent Fourcot July 16, 2018, 9:34 a.m. UTC | #2
Hello Jozsef,

> 
> Technically I have no problem with your patch. However, it means a
> non-versioned protocol change. I'd like to think about it and check how
> would be best to introduce a version change.
> 

Do you have any update on this? In my opinion, there are already some 
flags to control list output (LIST_SETNAME and LIST_HEADER), and adding 
one more is not really a breaking change.

If you have any hints/idea to improve this patch, I can try to provide a 
new version.

Best regards,

Florent.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jozsef Kadlecsik July 16, 2018, 7:48 p.m. UTC | #3
Hi Florent,

On Mon, 16 Jul 2018, Florent Fourcot wrote:

> > Technically I have no problem with your patch. However, it means a 
> > non-versioned protocol change. I'd like to think about it and check 
> > how would be best to introduce a version change.
> 
> Do you have any update on this? In my opinion, there are already some 
> flags to control list output (LIST_SETNAME and LIST_HEADER), and adding 
> one more is not really a breaking change.

Controlling list output is not the same: kernel versions which do not know 
the flags simply list the whole sets and userspace handles it fine. The 
only drawback is the unnecessary data transfer.

However with your proposed flag, additional data is returned. I'm 
concerned about backward compatilibity. What happens when a new userspace 
tool communicates with an old kernel? The only way to detect the 
incompatility is to check that the anticipated attributes are missing. But 
ipset strictly checks the attributes and reports protocol violations. With 
this solution there's no way to tell the difference between an old kernel 
or broken protocol.

> If you have any hints/idea to improve this patch, I can try to provide a 
> new version.

I want to introduce a new protocol version. Both the kernel and userspace 
have got the very basic parts to support multiple protocols, however have 
never been tested, obviously. Also, I'm thinking on adding two new 
commands to get the set by name and index and not a single one for both 
operations. The whole thing needs time and I'm busy these weeks with a 
cluster migration.

With a new protocol version, new userspace tools can "negotiate" the old 
protocol version with old kernels and can easily fall back to the 
getsockopt solution to get the required data from kernel.

Best regards,
Jozsef
-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jozsef Kadlecsik July 24, 2018, 7:41 a.m. UTC | #4
Hi Florent,

On Mon, 16 Jul 2018, Jozsef Kadlecsik wrote:

> > Do you have any update on this? In my opinion, there are already some 
> > flags to control list output (LIST_SETNAME and LIST_HEADER), and 
> > adding one more is not really a breaking change.
> 
> Controlling list output is not the same: kernel versions which do not 
> know the flags simply list the whole sets and userspace handles it fine. 
> The only drawback is the unnecessary data transfer.
> 
> However with your proposed flag, additional data is returned. I'm 
> concerned about backward compatilibity. What happens when a new userspace 
> tool communicates with an old kernel? The only way to detect the 
> incompatility is to check that the anticipated attributes are missing. But 
> ipset strictly checks the attributes and reports protocol violations. With 
> this solution there's no way to tell the difference between an old kernel 
> or broken protocol.
> 
> > If you have any hints/idea to improve this patch, I can try to provide a 
> > new version.
> 
> I want to introduce a new protocol version. Both the kernel and 
> userspace have got the very basic parts to support multiple protocols, 
> however have never been tested, obviously. Also, I'm thinking on adding 
> two new commands to get the set by name and index and not a single one 
> for both operations. The whole thing needs time and I'm busy these weeks 
> with a cluster migration.
> 
> With a new protocol version, new userspace tools can "negotiate" the old 
> protocol version with old kernels and can easily fall back to the 
> getsockopt solution to get the required data from kernel.

Please have a look at the next patch: it introduces the two new commands 
to get the set by name or index. (The index number is transferred in 
network byte order, as every similar data in the ipset protocol.) 

According to my tests, the old userspace tool works just fine with the new 
kernel. 

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 34fc80f..c4ce074 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -303,11 +303,11 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
 /* Netlink CB args */
 enum {
 	IPSET_CB_NET = 0,	/* net namespace */
+	IPSET_CB_PROTO,		/* ipset protocol */
 	IPSET_CB_DUMP,		/* dump single set/all sets */
 	IPSET_CB_INDEX,		/* set index */
 	IPSET_CB_PRIVATE,	/* set private data */
 	IPSET_CB_ARG0,		/* type specific */
-	IPSET_CB_ARG1,
 };
 
 /* register and unregister set references */
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index 60236f6..ea69ca2 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -13,8 +13,9 @@
 
 #include <linux/types.h>
 
-/* The protocol version */
-#define IPSET_PROTOCOL		6
+/* The protocol versions */
+#define IPSET_PROTOCOL		7
+#define IPSET_PROTOCOL_MIN	6
 
 /* The max length of strings including NUL: set and type identifiers */
 #define IPSET_MAXNAMELEN	32
@@ -38,17 +39,19 @@ enum ipset_cmd {
 	IPSET_CMD_TEST,		/* 11: Test an element in a set */
 	IPSET_CMD_HEADER,	/* 12: Get set header data only */
 	IPSET_CMD_TYPE,		/* 13: Get set type */
+	IPSET_CMD_GET_BYNAME,	/* 14: Get set index by name */
+	IPSET_CMD_GET_BYINDEX,	/* 15: Get set name by index */
 	IPSET_MSG_MAX,		/* Netlink message commands */
 
 	/* Commands in userspace: */
-	IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 14: Enter restore mode */
-	IPSET_CMD_HELP,		/* 15: Get help */
-	IPSET_CMD_VERSION,	/* 16: Get program version */
-	IPSET_CMD_QUIT,		/* 17: Quit from interactive mode */
+	IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 16: Enter restore mode */
+	IPSET_CMD_HELP,		/* 17: Get help */
+	IPSET_CMD_VERSION,	/* 18: Get program version */
+	IPSET_CMD_QUIT,		/* 19: Quit from interactive mode */
 
 	IPSET_CMD_MAX,
 
-	IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 18: Commit buffered commands */
+	IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 20: Commit buffered commands */
 };
 
 /* Attributes at command level */
@@ -66,6 +69,7 @@ enum {
 	IPSET_ATTR_LINENO,	/* 9: Restore lineno */
 	IPSET_ATTR_PROTOCOL_MIN, /* 10: Minimal supported version number */
 	IPSET_ATTR_REVISION_MIN	= IPSET_ATTR_PROTOCOL_MIN, /* type rev min */
+	IPSET_ATTR_INDEX,	/* 11: Kernel index of set */
 	__IPSET_ATTR_CMD_MAX,
 };
 #define IPSET_ATTR_CMD_MAX	(__IPSET_ATTR_CMD_MAX - 1)
@@ -223,6 +227,7 @@ enum ipset_adt {
 
 /* Sets are identified by an index in kernel space. Tweak with ip_set_id_t
  * and IPSET_INVALID_ID if you want to increase the max number of sets.
+ * Also, IPSET_ATTR_INDEX must be changed.
  */
 typedef __u16 ip_set_id_t;
 
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index bc4bd247..cc75d51 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -768,11 +768,21 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
  * The commands are serialized by the nfnl mutex.
  */
 
+static inline u8 protocol(const struct nlattr * const tb[])
+{
+	return nla_get_u8(tb[IPSET_ATTR_PROTOCOL]);
+}
+
 static inline bool
 protocol_failed(const struct nlattr * const tb[])
 {
-	return !tb[IPSET_ATTR_PROTOCOL] ||
-	       nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL;
+	return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) != IPSET_PROTOCOL;
+}
+
+static inline bool
+protocol_min_failed(const struct nlattr * const tb[])
+{
+	return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) < IPSET_PROTOCOL_MIN;
 }
 
 static inline u32
@@ -886,7 +896,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl,
 	u32 flags = flag_exist(nlh);
 	int ret = 0;
 
-	if (unlikely(protocol_failed(attr) ||
+	if (unlikely(protocol_min_failed(attr) ||
 		     !attr[IPSET_ATTR_SETNAME] ||
 		     !attr[IPSET_ATTR_TYPENAME] ||
 		     !attr[IPSET_ATTR_REVISION] ||
@@ -1024,7 +1034,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl,
 	ip_set_id_t i;
 	int ret = 0;
 
-	if (unlikely(protocol_failed(attr)))
+	if (unlikely(protocol_min_failed(attr)))
 		return -IPSET_ERR_PROTOCOL;
 
 	/* Must wait for flush to be really finished in list:set */
@@ -1102,7 +1112,7 @@ static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 	struct ip_set *s;
 	ip_set_id_t i;
 
-	if (unlikely(protocol_failed(attr)))
+	if (unlikely(protocol_min_failed(attr)))
 		return -IPSET_ERR_PROTOCOL;
 
 	if (!attr[IPSET_ATTR_SETNAME]) {
@@ -1144,7 +1154,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
 	ip_set_id_t i;
 	int ret = 0;
 
-	if (unlikely(protocol_failed(attr) ||
+	if (unlikely(protocol_min_failed(attr) ||
 		     !attr[IPSET_ATTR_SETNAME] ||
 		     !attr[IPSET_ATTR_SETNAME2]))
 		return -IPSET_ERR_PROTOCOL;
@@ -1193,7 +1203,7 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 	ip_set_id_t from_id, to_id;
 	char from_name[IPSET_MAXNAMELEN];
 
-	if (unlikely(protocol_failed(attr) ||
+	if (unlikely(protocol_min_failed(attr) ||
 		     !attr[IPSET_ATTR_SETNAME] ||
 		     !attr[IPSET_ATTR_SETNAME2]))
 		return -IPSET_ERR_PROTOCOL;
@@ -1288,6 +1298,7 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
 	nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len,
 		  ip_set_setname_policy, NULL);
 
+	cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]);
 	if (cda[IPSET_ATTR_SETNAME]) {
 		struct ip_set *set;
 
@@ -1389,7 +1400,8 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 			ret = -EMSGSIZE;
 			goto release_refcount;
 		}
-		if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
+		if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL,
+			       cb->args[IPSET_CB_PROTO]) ||
 		    nla_put_string(skb, IPSET_ATTR_SETNAME, set->name))
 			goto nla_put_failure;
 		if (dump_flags & IPSET_FLAG_LIST_SETNAME)
@@ -1463,7 +1475,7 @@ static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 		       const struct nlattr * const attr[],
 		       struct netlink_ext_ack *extack)
 {
-	if (unlikely(protocol_failed(attr)))
+	if (unlikely(protocol_min_failed(attr)))
 		return -IPSET_ERR_PROTOCOL;
 
 	{
@@ -1557,7 +1569,7 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 	bool use_lineno;
 	int ret = 0;
 
-	if (unlikely(protocol_failed(attr) ||
+	if (unlikely(protocol_min_failed(attr) ||
 		     !attr[IPSET_ATTR_SETNAME] ||
 		     !((attr[IPSET_ATTR_DATA] != NULL) ^
 		       (attr[IPSET_ATTR_ADT] != NULL)) ||
@@ -1612,7 +1624,7 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 	bool use_lineno;
 	int ret = 0;
 
-	if (unlikely(protocol_failed(attr) ||
+	if (unlikely(protocol_min_failed(attr) ||
 		     !attr[IPSET_ATTR_SETNAME] ||
 		     !((attr[IPSET_ATTR_DATA] != NULL) ^
 		       (attr[IPSET_ATTR_ADT] != NULL)) ||
@@ -1664,7 +1676,7 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 	struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
 	int ret = 0;
 
-	if (unlikely(protocol_failed(attr) ||
+	if (unlikely(protocol_min_failed(attr) ||
 		     !attr[IPSET_ATTR_SETNAME] ||
 		     !attr[IPSET_ATTR_DATA] ||
 		     !flag_nested(attr[IPSET_ATTR_DATA])))
@@ -1701,7 +1713,7 @@ static int ip_set_header(struct net *net, struct sock *ctnl,
 	struct nlmsghdr *nlh2;
 	int ret = 0;
 
-	if (unlikely(protocol_failed(attr) ||
+	if (unlikely(protocol_min_failed(attr) ||
 		     !attr[IPSET_ATTR_SETNAME]))
 		return -IPSET_ERR_PROTOCOL;
 
@@ -1717,7 +1729,7 @@ static int ip_set_header(struct net *net, struct sock *ctnl,
 			 IPSET_CMD_HEADER);
 	if (!nlh2)
 		goto nlmsg_failure;
-	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
+	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
 	    nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) ||
 	    nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) ||
 	    nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
@@ -1758,7 +1770,7 @@ static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 	const char *typename;
 	int ret = 0;
 
-	if (unlikely(protocol_failed(attr) ||
+	if (unlikely(protocol_min_failed(attr) ||
 		     !attr[IPSET_ATTR_TYPENAME] ||
 		     !attr[IPSET_ATTR_FAMILY]))
 		return -IPSET_ERR_PROTOCOL;
@@ -1777,7 +1789,7 @@ static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 			 IPSET_CMD_TYPE);
 	if (!nlh2)
 		goto nlmsg_failure;
-	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
+	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
 	    nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) ||
 	    nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) ||
 	    nla_put_u8(skb2, IPSET_ATTR_REVISION, max) ||
@@ -1828,6 +1840,113 @@ static int ip_set_protocol(struct net *net, struct sock *ctnl,
 		goto nlmsg_failure;
 	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL))
 		goto nla_put_failure;
+	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL_MIN, IPSET_PROTOCOL_MIN))
+		goto nla_put_failure;
+	nlmsg_end(skb2, nlh2);
+
+	ret = netlink_unicast(ctnl, skb2, NETLINK_PORTID(skb), MSG_DONTWAIT);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+
+nla_put_failure:
+	nlmsg_cancel(skb2, nlh2);
+nlmsg_failure:
+	kfree_skb(skb2);
+	return -EMSGSIZE;
+}
+
+/* Get set by name or index, from userspace */
+
+static int
+ip_set_byname(struct net *net, struct sock *ctnl,
+	      struct sk_buff *skb, const struct nlmsghdr *nlh,
+	      const struct nlattr * const attr[],
+	      struct netlink_ext_ack *extack)
+{
+	struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl));
+	struct sk_buff *skb2;
+	struct nlmsghdr *nlh2;
+	ip_set_id_t id = IPSET_INVALID_ID;
+	const struct ip_set *set;
+	int ret = 0;
+
+	if (unlikely(protocol_failed(attr) ||
+		     !attr[IPSET_ATTR_SETNAME]))
+		return -IPSET_ERR_PROTOCOL;
+
+	set = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &id);
+	if (id == IPSET_INVALID_ID)
+		return -ENOENT;
+
+	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb2)
+		return -ENOMEM;
+
+	nlh2 = start_msg(skb2, NETLINK_PORTID(skb), nlh->nlmsg_seq, 0,
+			 IPSET_CMD_GET_BYNAME);
+	if (!nlh2)
+		goto nlmsg_failure;
+	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
+	    nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
+	    nla_put_net16(skb2, IPSET_ATTR_INDEX, id))
+		goto nla_put_failure;
+	nlmsg_end(skb2, nlh2);
+
+	ret = netlink_unicast(ctnl, skb2, NETLINK_PORTID(skb), MSG_DONTWAIT);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+
+nla_put_failure:
+	nlmsg_cancel(skb2, nlh2);
+nlmsg_failure:
+	kfree_skb(skb2);
+	return -EMSGSIZE;
+}
+
+static const struct nla_policy ip_set_index_policy[IPSET_ATTR_CMD_MAX + 1] = {
+	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
+	[IPSET_ATTR_INDEX]	= { .type = NLA_U16 },
+};
+
+static int
+ip_set_byindex(struct net *net, struct sock *ctnl,
+	       struct sk_buff *skb, const struct nlmsghdr *nlh,
+	       const struct nlattr * const attr[],
+	       struct netlink_ext_ack *extack)
+{
+	struct ip_set_net *inst = ip_set_pernet(IPSET_SOCK_NET(net, ctnl));
+	struct sk_buff *skb2;
+	struct nlmsghdr *nlh2;
+	ip_set_id_t id = IPSET_INVALID_ID;
+	const struct ip_set *set;
+	int ret = 0;
+
+	if (unlikely(protocol_failed(attr) ||
+		     !attr[IPSET_ATTR_INDEX]))
+		return -IPSET_ERR_PROTOCOL;
+
+	id = ip_set_get_h16(attr[IPSET_ATTR_INDEX]);
+	if (id >= inst->ip_set_max)
+		return -ENOENT;
+	set = ip_set(inst, id);
+	if (set == NULL)
+		return -ENOENT;
+
+	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb2)
+		return -ENOMEM;
+
+	nlh2 = start_msg(skb2, NETLINK_PORTID(skb), nlh->nlmsg_seq, 0,
+			 IPSET_CMD_GET_BYINDEX);
+	if (!nlh2)
+		goto nlmsg_failure;
+	if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
+	    nla_put_string(skb, IPSET_ATTR_SETNAME, set->name))
+		goto nla_put_failure;
 	nlmsg_end(skb2, nlh2);
 
 	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
@@ -1913,6 +2032,16 @@ static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
 		.attr_count	= IPSET_ATTR_CMD_MAX,
 		.policy		= ip_set_protocol_policy,
 	},
+	[IPSET_CMD_GET_BYNAME]	= {
+		.call		= ip_set_byname,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_setname_policy,
+	},
+	[IPSET_CMD_GET_BYINDEX]	= {
+		.call		= ip_set_byindex,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_index_policy,
+	},
 };
 
 static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
@@ -1958,7 +2087,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 			goto done;
 		}
 
-		if (req_version->version != IPSET_PROTOCOL) {
+		if (req_version->version < IPSET_PROTOCOL_MIN) {
 			ret = -EPROTO;
 			goto done;
 		}

Best regards,
Jozsef
-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florent Fourcot July 25, 2018, 11:56 a.m. UTC | #5
Hi Jozsef,

Thanks a lot, I will test it on my side this week.

Just a small comment after a short code review: what about adding 
IPSET_ATTR_INDEX in list command when proto is greater than 6? I agree 
that specific commands are a good idea, but i still think that adding it 
in list is a good idea (it's probably less relevant in ip_set_header 
with your patch).

Best regards,

Florent
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jozsef Kadlecsik July 26, 2018, 6:05 p.m. UTC | #6
Hi Florent,

On Wed, 25 Jul 2018, Florent Fourcot wrote:

> Just a small comment after a short code review: what about adding 
> IPSET_ATTR_INDEX in list command when proto is greater than 6? I agree 
> that specific commands are a good idea, but i still think that adding it 
> in list is a good idea (it's probably less relevant in ip_set_header 
> with your patch).

But what is the usercase for IPSET_ATTR_INDEX in a list command?

Best regards,
Jozsef
-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florent Fourcot July 27, 2018, 3:32 p.m. UTC | #7
Hi Jozsef,

On pyroute2 library, a method can build a python object based on netlink 
messages:

https://github.com/svinota/pyroute2/blob/master/pyroute2/wiset.py#L174

We could of course fill index attribute with your new command, but that 
could be nice as well to read it in the same way (list) than other 
attributes.

Best regards,

Florent
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jozsef Kadlecsik July 28, 2018, 3:13 p.m. UTC | #8
Hi Florent,

On Fri, 27 Jul 2018, Florent Fourcot wrote:

> On pyroute2 library, a method can build a python object based on netlink 
> messages:
> 
> https://github.com/svinota/pyroute2/blob/master/pyroute2/wiset.py#L174
> 
> We could of course fill index attribute with your new command, but that 
> could be nice as well to read it in the same way (list) than other 
> attributes.

Don't get me wrong, I'm just trying to understand how it'd be used.

The rules of set indices are quite straightforward: counted successively 
from zero. When a set is deleted there'll be a hole in the numbering which 
will be reused next when a new set is created. Renaming doesn't change the 
index, however swapping does.

So if one could guarantee that your library alone communicates to the 
ip_set module in the kernel, then it makes sense to pass the indices at 
listing and cache them. However that cannot be guaranteed. So you should 
recheck an index before you use it - and even in that case it's not 
mutable. Therefore I'm reluctant to add it to listing. Do you really gain 
something with it? Wouldn't make sense to add it to the header-only 
listing (as you proposed), to make possible a quick check for all indices?

The two new proposed commands are for direct, single-shot usage.

Best regards,
Jozsef
-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florent Fourcot Aug. 20, 2018, 1:51 p.m. UTC | #9
Hi Jozsef,

Sorry for the slow answer.

> 
> So if one could guarantee that your library alone communicates to the
> ip_set module in the kernel, then it makes sense to pass the indices at
> listing and cache them. However that cannot be guaranteed. 

It's indeed the main use case of this library. You are right, if 
something change ipset stored in kernel the cache is useless and dangerous.


> Wouldn't make sense to add it to the header-only
> listing (as you proposed), to make possible a quick check for all indices?
> 

This solution is fine for me

> The two new proposed commands are for direct, single-shot usage.
> 

Yes, and they are perfect for that. Thanks for your time on this topic.

Best regards,

Florent Fourcot.
Florent Fourcot Oct. 1, 2018, 9:48 a.m. UTC | #10
Hi Jozsef,

Do you have any news on this topic? Can I help you to move forward for 
inclusion?

Best regards,
Jozsef Kadlecsik Oct. 1, 2018, 6:39 p.m. UTC | #11
Hi,

On Mon, 1 Oct 2018, Florent Fourcot wrote:

> Do you have any news on this topic? Can I help you to move forward for 
> inclusion?

Sorry for the extremely long delay: I have been working on the userspace 
library and it still needs a couple of days. There'll be a new release in 
the first half of this month :-)

Best regards,
Jozsef
-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary
diff mbox series

Patch

diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index 60236f694143..8ef2560ff69e 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -66,6 +66,8 @@  enum {
 	IPSET_ATTR_LINENO,	/* 9: Restore lineno */
 	IPSET_ATTR_PROTOCOL_MIN, /* 10: Minimal supported version number */
 	IPSET_ATTR_REVISION_MIN	= IPSET_ATTR_PROTOCOL_MIN, /* type rev min */
+	/* attributes not sent by default (see IPSET_FLAG_EXTRA_DATA) */
+	IPSET_ATTR_INDEX,   /* 11: Index of the set */
 	__IPSET_ATTR_CMD_MAX,
 };
 #define IPSET_ATTR_CMD_MAX	(__IPSET_ATTR_CMD_MAX - 1)
@@ -182,6 +184,8 @@  enum ipset_cmd_flags {
 	IPSET_FLAG_MAP_SKBPRIO = (1 << IPSET_FLAG_BIT_MAP_SKBPRIO),
 	IPSET_FLAG_BIT_MAP_SKBQUEUE = 10,
 	IPSET_FLAG_MAP_SKBQUEUE = (1 << IPSET_FLAG_BIT_MAP_SKBQUEUE),
+	IPSET_FLAG_BIT_EXTRA_DATA = 11,
+	IPSET_FLAG_EXTRA_DATA = (1 << IPSET_FLAG_BIT_EXTRA_DATA),
 	IPSET_FLAG_CMD_MAX = 15,
 };
 
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index bc4bd247bb7d..370b79368ddb 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1409,6 +1409,11 @@  ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 				goto release_refcount;
 			if (dump_flags & IPSET_FLAG_LIST_HEADER)
 				goto next_set;
+			if (dump_flags & IPSET_FLAG_EXTRA_DATA) {
+				if (nla_put_u16(skb, IPSET_ATTR_INDEX,
+						index))
+					goto nla_put_failure;
+			}
 			if (set->variant->uref)
 				set->variant->uref(set, cb, true);
 			/* fall through */
@@ -1695,6 +1700,7 @@  static int ip_set_header(struct net *net, struct sock *ctnl,
 			 const struct nlattr * const attr[],
 			 struct netlink_ext_ack *extack)
 {
+	ip_set_id_t index;
 	struct ip_set_net *inst = ip_set_pernet(net);
 	const struct ip_set *set;
 	struct sk_buff *skb2;
@@ -1705,7 +1711,7 @@  static int ip_set_header(struct net *net, struct sock *ctnl,
 		     !attr[IPSET_ATTR_SETNAME]))
 		return -IPSET_ERR_PROTOCOL;
 
-	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
+	set = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &index);
 	if (!set)
 		return -ENOENT;
 
@@ -1723,6 +1729,16 @@  static int ip_set_header(struct net *net, struct sock *ctnl,
 	    nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
 	    nla_put_u8(skb2, IPSET_ATTR_REVISION, set->revision))
 		goto nla_put_failure;
+
+	if (attr[IPSET_ATTR_FLAGS]) {
+		u32 flags = ip_set_get_h32(attr[IPSET_ATTR_FLAGS]);
+
+		if (flags & IPSET_FLAG_EXTRA_DATA) {
+			if (nla_put_u16(skb2, IPSET_ATTR_INDEX, index))
+				goto nla_put_failure;
+		}
+	}
+
 	nlmsg_end(skb2, nlh2);
 
 	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);