diff mbox series

ipvs: generic netlink multicast event group

Message ID 20240205192828.187494-1-terin@cloudflare.com
State Changes Requested
Headers show
Series ipvs: generic netlink multicast event group | expand

Commit Message

Terin Stock Feb. 5, 2024, 7:28 p.m. UTC
The IPVS subsystem allows for configuration from userspace programs
using the generic netlink interface. However, the program is currently
unable to react to changes in the configuration of services or
destinations made on the system without polling over all configuration
in IPVS.

Adds an "events" multicast group for the IPVS generic netlink family,
allowing for userspace programs to monitor changes made by any other
netlink client (or legacy tools using the IPVS socket options). As
service and destination statistics are separate from configuration,
those netlink attributes are excluded in events.

Signed-off-by: Terin Stock <terin@cloudflare.com>
---
 include/uapi/linux/ip_vs.h     |   2 +
 net/netfilter/ipvs/ip_vs_ctl.c | 107 +++++++++++++++++++++++++++++----
 2 files changed, 96 insertions(+), 13 deletions(-)

Comments

Julian Anastasov Feb. 7, 2024, 4:44 p.m. UTC | #1
Hello,

On Mon, 5 Feb 2024, Terin Stock wrote:

> The IPVS subsystem allows for configuration from userspace programs
> using the generic netlink interface. However, the program is currently
> unable to react to changes in the configuration of services or
> destinations made on the system without polling over all configuration
> in IPVS.
> 
> Adds an "events" multicast group for the IPVS generic netlink family,
> allowing for userspace programs to monitor changes made by any other
> netlink client (or legacy tools using the IPVS socket options). As
> service and destination statistics are separate from configuration,
> those netlink attributes are excluded in events.
> 
> Signed-off-by: Terin Stock <terin@cloudflare.com>
> ---
>  include/uapi/linux/ip_vs.h     |   2 +
>  net/netfilter/ipvs/ip_vs_ctl.c | 107 +++++++++++++++++++++++++++++----
>  2 files changed, 96 insertions(+), 13 deletions(-)
> 
> diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h
> index 1ed234e7f251..0aa119ebaf85 100644
> --- a/include/uapi/linux/ip_vs.h
> +++ b/include/uapi/linux/ip_vs.h
> @@ -299,6 +299,8 @@ struct ip_vs_daemon_user {
>  #define IPVS_GENL_NAME		"IPVS"
>  #define IPVS_GENL_VERSION	0x1
>  
> +#define IPVS_GENL_MCAST_EVENT_NAME "events"
> +
>  struct ip_vs_flags {
>  	__u32 flags;
>  	__u32 mask;
> diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
> index 143a341bbc0a..ced232361d02 100644
> --- a/net/netfilter/ipvs/ip_vs_ctl.c
> +++ b/net/netfilter/ipvs/ip_vs_ctl.c
> @@ -64,6 +64,11 @@ int ip_vs_get_debug_level(void)
>  
>  
>  /*  Protos */
> +static struct genl_family ip_vs_genl_family;
> +static int ip_vs_genl_fill_service(struct sk_buff *skb,
> +				   struct ip_vs_service *svc, bool stats);
> +static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
> +				bool stats);
>  static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup);
>  
>  
> @@ -960,6 +965,62 @@ void ip_vs_stats_free(struct ip_vs_stats *stats)
>  	}
>  }
>  
> +static int ip_vs_genl_service_event(u8 event, struct ip_vs_service *svc)
> +{
> +	struct sk_buff *msg;
> +	void *hdr;
> +
> +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
> +	if (!msg)
> +		return -ENOMEM;
> +
> +	hdr = genlmsg_put(msg, 0, 0, &ip_vs_genl_family, 0, event);
> +	if (!hdr)
> +		goto free_msg;
> +
> +	if (ip_vs_genl_fill_service(msg, svc, false))
> +		goto free_msg;
> +
> +	genlmsg_end(msg, hdr);
> +	genlmsg_multicast(&ip_vs_genl_family, msg, 0, 0, GFP_ATOMIC);

	May be genlmsg_multicast_netns because the rules are per netns?
As result, may be on net cleanup such events should not be generated,
see 'bool cleanup'.

	And to use GFP_KERNEL because all configuration happens in process
context? On module removal, ip_vs_unregister_nl_ioctl() is called before 
the rules are flushed, not sure if we should add some check or it is not
fatal to post events for unregistered genl family?

	Also, IP_VS_SO_SET_FLUSH can flush rules and now it will not be 
reported.

	I also worry that such events slowdown the configuration
process for setups with many rules which do not use listeners.
Should we enable it with some sysctl var? Currently, many CPU cycles are 
spent before we notice that there are no listeners.

Regards

--
Julian Anastasov <ja@ssi.bg>
Pablo Neira Ayuso Feb. 29, 2024, 4:37 p.m. UTC | #2
On Wed, Feb 07, 2024 at 06:44:48PM +0200, Julian Anastasov wrote:
[...]
> 	I also worry that such events slowdown the configuration
> process for setups with many rules which do not use listeners.
> Should we enable it with some sysctl var? Currently, many CPU cycles are 
> spent before we notice that there are no listeners.

There is netlink_has_listeners(), IIRC there was a bit of missing work
in genetlink to make this work?
Julian Anastasov Feb. 29, 2024, 5:56 p.m. UTC | #3
Hello,

On Thu, 29 Feb 2024, Pablo Neira Ayuso wrote:

> On Wed, Feb 07, 2024 at 06:44:48PM +0200, Julian Anastasov wrote:
> [...]
> > 	I also worry that such events slowdown the configuration
> > process for setups with many rules which do not use listeners.
> > Should we enable it with some sysctl var? Currently, many CPU cycles are 
> > spent before we notice that there are no listeners.
> 
> There is netlink_has_listeners(), IIRC there was a bit of missing work
> in genetlink to make this work?

	Looks like genl_has_listeners() should be sufficient...

Regards

--
Julian Anastasov <ja@ssi.bg>
Terin Stock Feb. 29, 2024, 6:38 p.m. UTC | #4
Thanks, I'll work on implementing these suggestions in a v2 patch.
diff mbox series

Patch

diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h
index 1ed234e7f251..0aa119ebaf85 100644
--- a/include/uapi/linux/ip_vs.h
+++ b/include/uapi/linux/ip_vs.h
@@ -299,6 +299,8 @@  struct ip_vs_daemon_user {
 #define IPVS_GENL_NAME		"IPVS"
 #define IPVS_GENL_VERSION	0x1
 
+#define IPVS_GENL_MCAST_EVENT_NAME "events"
+
 struct ip_vs_flags {
 	__u32 flags;
 	__u32 mask;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 143a341bbc0a..ced232361d02 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -64,6 +64,11 @@  int ip_vs_get_debug_level(void)
 
 
 /*  Protos */
+static struct genl_family ip_vs_genl_family;
+static int ip_vs_genl_fill_service(struct sk_buff *skb,
+				   struct ip_vs_service *svc, bool stats);
+static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
+				bool stats);
 static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup);
 
 
@@ -960,6 +965,62 @@  void ip_vs_stats_free(struct ip_vs_stats *stats)
 	}
 }
 
+static int ip_vs_genl_service_event(u8 event, struct ip_vs_service *svc)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(msg, 0, 0, &ip_vs_genl_family, 0, event);
+	if (!hdr)
+		goto free_msg;
+
+	if (ip_vs_genl_fill_service(msg, svc, false))
+		goto free_msg;
+
+	genlmsg_end(msg, hdr);
+	genlmsg_multicast(&ip_vs_genl_family, msg, 0, 0, GFP_ATOMIC);
+
+	return 0;
+
+free_msg:
+	nlmsg_free(msg);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dest_event(u8 event, struct ip_vs_service *svc,
+				 struct ip_vs_dest *dest)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(msg, 0, 0, &ip_vs_genl_family, 0, event);
+	if (!hdr)
+		goto free_msg;
+
+	if (ip_vs_genl_fill_service(msg, svc, false))
+		goto free_msg;
+
+	if (ip_vs_genl_fill_dest(msg, dest, false))
+		goto free_msg;
+
+	genlmsg_end(msg, hdr);
+	genlmsg_multicast(&ip_vs_genl_family, msg, 0, 0, GFP_ATOMIC);
+
+	return 0;
+
+free_msg:
+	nlmsg_free(msg);
+	return -EMSGSIZE;
+}
+
 /*
  *	Update a destination in the given service
  */
@@ -1043,10 +1104,12 @@  __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 		sched = rcu_dereference_protected(svc->scheduler, 1);
 		if (sched && sched->add_dest)
 			sched->add_dest(svc, dest);
+		ip_vs_genl_dest_event(IPVS_CMD_NEW_DEST, svc, dest);
 	} else {
 		sched = rcu_dereference_protected(svc->scheduler, 1);
 		if (sched && sched->upd_dest)
 			sched->upd_dest(svc, dest);
+		ip_vs_genl_dest_event(IPVS_CMD_SET_DEST, svc, dest);
 	}
 }
 
@@ -1316,6 +1379,7 @@  ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 		return -ENOENT;
 	}
 
+	ip_vs_genl_dest_event(IPVS_CMD_DEL_DEST, svc, dest);
 	/*
 	 *	Unlink dest from the service
 	 */
@@ -1480,6 +1544,8 @@  ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
 	/* Hash the service into the service table */
 	ip_vs_svc_hash(svc);
 
+	ip_vs_genl_service_event(IPVS_CMD_NEW_SERVICE, svc);
+
 	*svc_p = svc;
 
 	if (!ipvs->enable) {
@@ -1593,6 +1659,8 @@  ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
 			atomic_dec(&svc->ipvs->conn_out_counter);
 	}
 
+	ip_vs_genl_service_event(IPVS_CMD_SET_SERVICE, svc);
+
 out:
 	ip_vs_scheduler_put(old_sched);
 	ip_vs_pe_put(old_pe);
@@ -1667,6 +1735,8 @@  static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)
 	ip_vs_unregister_conntrack(svc);
 	/* Hold svc to avoid double release from dest_trash */
 	atomic_inc(&svc->refcnt);
+
+	ip_vs_genl_service_event(IPVS_CMD_DEL_SERVICE, svc);
 	/*
 	 * Unhash it from the service table
 	 */
@@ -3313,7 +3383,7 @@  static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type,
 }
 
 static int ip_vs_genl_fill_service(struct sk_buff *skb,
-				   struct ip_vs_service *svc)
+				   struct ip_vs_service *svc, bool stats)
 {
 	struct ip_vs_scheduler *sched;
 	struct ip_vs_pe *pe;
@@ -3349,10 +3419,12 @@  static int ip_vs_genl_fill_service(struct sk_buff *skb,
 	    nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
 		goto nla_put_failure;
 	ip_vs_copy_stats(&kstats, &svc->stats);
-	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats))
-		goto nla_put_failure;
-	if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats))
-		goto nla_put_failure;
+	if (stats) {
+		if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats))
+			goto nla_put_failure;
+		if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats))
+			goto nla_put_failure;
+	}
 
 	nla_nest_end(skb, nl_service);
 
@@ -3375,7 +3447,7 @@  static int ip_vs_genl_dump_service(struct sk_buff *skb,
 	if (!hdr)
 		return -EMSGSIZE;
 
-	if (ip_vs_genl_fill_service(skb, svc) < 0)
+	if (ip_vs_genl_fill_service(skb, svc, true) < 0)
 		goto nla_put_failure;
 
 	genlmsg_end(skb, hdr);
@@ -3528,7 +3600,8 @@  static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs,
 	return ret ? ERR_PTR(ret) : svc;
 }
 
-static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
+static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
+				bool stats)
 {
 	struct nlattr *nl_dest;
 	struct ip_vs_kstats kstats;
@@ -3561,10 +3634,12 @@  static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
 	    nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
 		goto nla_put_failure;
 	ip_vs_copy_stats(&kstats, &dest->stats);
-	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats))
-		goto nla_put_failure;
-	if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats))
-		goto nla_put_failure;
+	if (stats) {
+		if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats))
+			goto nla_put_failure;
+		if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats))
+			goto nla_put_failure;
+	}
 
 	nla_nest_end(skb, nl_dest);
 
@@ -3586,7 +3661,7 @@  static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
 	if (!hdr)
 		return -EMSGSIZE;
 
-	if (ip_vs_genl_fill_dest(skb, dest) < 0)
+	if (ip_vs_genl_fill_dest(skb, dest, true) < 0)
 		goto nla_put_failure;
 
 	genlmsg_end(skb, hdr);
@@ -4078,7 +4153,7 @@  static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
 			ret = PTR_ERR(svc);
 			goto out_err;
 		} else if (svc) {
-			ret = ip_vs_genl_fill_service(msg, svc);
+			ret = ip_vs_genl_fill_service(msg, svc, true);
 			if (ret)
 				goto nla_put_failure;
 		} else {
@@ -4235,6 +4310,10 @@  static const struct genl_small_ops ip_vs_genl_ops[] = {
 	},
 };
 
+static const struct genl_multicast_group ip_vs_genl_mcgrps[] = {
+	{ .name = IPVS_GENL_MCAST_EVENT_NAME },
+};
+
 static struct genl_family ip_vs_genl_family __ro_after_init = {
 	.hdrsize	= 0,
 	.name		= IPVS_GENL_NAME,
@@ -4246,6 +4325,8 @@  static struct genl_family ip_vs_genl_family __ro_after_init = {
 	.small_ops	= ip_vs_genl_ops,
 	.n_small_ops	= ARRAY_SIZE(ip_vs_genl_ops),
 	.resv_start_op	= IPVS_CMD_FLUSH + 1,
+	.mcgrps = ip_vs_genl_mcgrps,
+	.n_mcgrps = ARRAY_SIZE(ip_vs_genl_mcgrps),
 };
 
 static int __init ip_vs_genl_register(void)