diff mbox

[net-next,1/2] rtnetlink: add new RTM_GETSTATS message to dump link stats

Message ID 1457834186-45727-2-git-send-email-roopa@cumulusnetworks.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Roopa Prabhu March 13, 2016, 1:56 a.m. UTC
From: Roopa Prabhu <roopa@cumulusnetworks.com>

This patch adds a new RTM_GETSTATS message to query link stats via netlink
from the kernel. RTM_NEWLINK also dumps stats today, but RTM_NEWLINK
returns a lot more than just stats and is expensive in some cases when
frequent polling for stats from userspace is a common operation.

RTM_GETSTATS is an attempt to provide a light weight netlink message
to explicity query only link stats from the kernel on an interface.
The idea is to also keep it extensible so that new kinds of stats can be
added to it in the future.

This patch adds the following attribute for NETDEV stats:
struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
        [IFLA_STATS_LINK64]  = { .len = sizeof(struct rtnl_link_stats64) },
};

This patch also allows for af family stats (an example af stats for IPV6
is available with the second patch in the series).

Like any other rtnetlink message, RTM_GETSTATS can be used to get stats of
a single interface or all interfaces with NLM_F_DUMP.

Future possible new types of stat attributes:
- IFLA_MPLS_STATS  (nested. for mpls/mdev stats)
- IFLA_EXTENDED_STATS (nested. extended software netdev stats like bridge,
  vlan, vxlan etc)
- IFLA_EXTENDED_HW_STATS (nested. extended hardware stats which are
  available via ethtool today)

This patch also declares a filter mask for all stat attributes.
User has to provide a mask of stats attributes to query. This will be
specified in a new hdr 'struct if_stats_msg' for stats messages.

Without any attributes in the filter_mask, no stats will be returned.

This patch has been tested with modified iproute2 ifstat.

Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
---
 include/net/rtnetlink.h        |   5 ++
 include/uapi/linux/if_link.h   |  19 ++++
 include/uapi/linux/rtnetlink.h |   7 ++
 net/core/rtnetlink.c           | 200 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 231 insertions(+)

Comments

Jiri Pirko March 14, 2016, 2:51 p.m. UTC | #1
Sun, Mar 13, 2016 at 02:56:25AM CET, roopa@cumulusnetworks.com wrote:
>From: Roopa Prabhu <roopa@cumulusnetworks.com>
>
>This patch adds a new RTM_GETSTATS message to query link stats via netlink
>from the kernel. RTM_NEWLINK also dumps stats today, but RTM_NEWLINK
>returns a lot more than just stats and is expensive in some cases when
>frequent polling for stats from userspace is a common operation.
>
>RTM_GETSTATS is an attempt to provide a light weight netlink message
>to explicity query only link stats from the kernel on an interface.
>The idea is to also keep it extensible so that new kinds of stats can be
>added to it in the future.
>
>This patch adds the following attribute for NETDEV stats:
>struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
>        [IFLA_STATS_LINK64]  = { .len = sizeof(struct rtnl_link_stats64) },
>};
>
>This patch also allows for af family stats (an example af stats for IPV6
>is available with the second patch in the series).
>
>Like any other rtnetlink message, RTM_GETSTATS can be used to get stats of
>a single interface or all interfaces with NLM_F_DUMP.
>
>Future possible new types of stat attributes:
>- IFLA_MPLS_STATS  (nested. for mpls/mdev stats)
>- IFLA_EXTENDED_STATS (nested. extended software netdev stats like bridge,
>  vlan, vxlan etc)
>- IFLA_EXTENDED_HW_STATS (nested. extended hardware stats which are
>  available via ethtool today)
>
>This patch also declares a filter mask for all stat attributes.
>User has to provide a mask of stats attributes to query. This will be
>specified in a new hdr 'struct if_stats_msg' for stats messages.
>
>Without any attributes in the filter_mask, no stats will be returned.
>
>This patch has been tested with modified iproute2 ifstat.
>
>Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
>Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
>---
> include/net/rtnetlink.h        |   5 ++
> include/uapi/linux/if_link.h   |  19 ++++
> include/uapi/linux/rtnetlink.h |   7 ++
> net/core/rtnetlink.c           | 200 +++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 231 insertions(+)
>
>diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
>index 2f87c1b..fa68158 100644
>--- a/include/net/rtnetlink.h
>+++ b/include/net/rtnetlink.h
>@@ -131,6 +131,11 @@ struct rtnl_af_ops {
> 						    const struct nlattr *attr);
> 	int			(*set_link_af)(struct net_device *dev,
> 					       const struct nlattr *attr);
>+	size_t			(*get_link_af_stats_size)(const struct net_device *dev,
>+							  u32 filter_mask);
>+	int			(*fill_link_af_stats)(struct sk_buff *skb,
>+						      const struct net_device *dev,
>+						      u32 filter_mask);
> };
> 
> void __rtnl_af_unregister(struct rtnl_af_ops *ops);
>diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
>index 249eef9..0840f3e 100644
>--- a/include/uapi/linux/if_link.h
>+++ b/include/uapi/linux/if_link.h
>@@ -741,4 +741,23 @@ enum {
> 
> #define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1)
> 
>+/* STATS section */
>+
>+struct if_stats_msg {
>+	__u8  family;
>+	__u32 ifindex;
>+	__u32 filter_mask;

This limit future extension to only 32 groups of stats. I can imagine
that more than that can be added, easily. Why don't you use nested
attribute IFLA_STATS_FILTER with flag attributes for every type? That
would be easily extendable.

Using netlink header struct for this does not look correct to me.
In past, this was done lot of times and turned out to be a problem later.



>+};
>+
>+enum {
>+	IFLA_STATS_UNSPEC,
>+	IFLA_STATS_LINK64,
>+	IFLA_STATS_INET6,
>+	__IFLA_STATS_MAX,
>+};
>+
>+#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1)
>+
>+#define IFLA_STATS_FILTER_BIT(ATTR)	(1 << (ATTR))
>+
Nicolas Dichtel March 14, 2016, 3 p.m. UTC | #2
Le 13/03/2016 02:56, Roopa Prabhu a écrit :
> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>
> This patch adds a new RTM_GETSTATS message to query link stats via netlink
> from the kernel. RTM_NEWLINK also dumps stats today, but RTM_NEWLINK
> returns a lot more than just stats and is expensive in some cases when
> frequent polling for stats from userspace is a common operation.
>
> RTM_GETSTATS is an attempt to provide a light weight netlink message
> to explicity query only link stats from the kernel on an interface.
> The idea is to also keep it extensible so that new kinds of stats can be
> added to it in the future.
>
> This patch adds the following attribute for NETDEV stats:
> struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
>          [IFLA_STATS_LINK64]  = { .len = sizeof(struct rtnl_link_stats64) },
> };
>
> This patch also allows for af family stats (an example af stats for IPV6
> is available with the second patch in the series).
>
> Like any other rtnetlink message, RTM_GETSTATS can be used to get stats of
> a single interface or all interfaces with NLM_F_DUMP.
>
> Future possible new types of stat attributes:
> - IFLA_MPLS_STATS  (nested. for mpls/mdev stats)
> - IFLA_EXTENDED_STATS (nested. extended software netdev stats like bridge,
>    vlan, vxlan etc)
> - IFLA_EXTENDED_HW_STATS (nested. extended hardware stats which are
>    available via ethtool today)
>
> This patch also declares a filter mask for all stat attributes.
> User has to provide a mask of stats attributes to query. This will be
> specified in a new hdr 'struct if_stats_msg' for stats messages.
>
> Without any attributes in the filter_mask, no stats will be returned.
>
> This patch has been tested with modified iproute2 ifstat.
>
> Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
[snip]
> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> index 249eef9..0840f3e 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
[snip]
> +enum {
> +	IFLA_STATS_UNSPEC,
> +	IFLA_STATS_LINK64,
> +	IFLA_STATS_INET6,
IFLA_STATS_INET6 is part on patch #2, it's not used in this patch.

> +	__IFLA_STATS_MAX,
> +};
> +
> +#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1)
> +
> +#define IFLA_STATS_FILTER_BIT(ATTR)	(1 << (ATTR))
> +
>   #endif /* _UAPI_LINUX_IF_LINK_H */
> diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
> index ca764b5..2bbb300 100644
> --- a/include/uapi/linux/rtnetlink.h
> +++ b/include/uapi/linux/rtnetlink.h
> @@ -139,6 +139,13 @@ enum {
>   	RTM_GETNSID = 90,
>   #define RTM_GETNSID RTM_GETNSID
>
> +	RTM_NEWSTATS = 92,
> +#define RTM_NEWSTATS RTM_NEWSTATS
> +	RTM_DELSTATS = 93,
> +#define RTM_DELSTATS RTM_DELSTATS
RTM_DELSTATS is never used.

> +	RTM_GETSTATS = 94,
> +#define RTM_GETSTATS RTM_GETSTATS
> +
>   	__RTM_MAX,
>   #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
>   };
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index d2d9e5e..d1e3d17 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
[snip]
> +static noinline size_t if_nlmsg_stats_size(const struct net_device *dev,
> +					   u32 filter_mask)
Why are you using the 'noinline' attribute?

> +{
> +	size_t size = 0;
> +
> +	if (filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK64))
> +		size += nla_total_size(sizeof(struct rtnl_link_stats64));
> +
> +	size += rtnl_link_get_af_stats_size(dev, filter_mask);
> +
> +	return size;
> +}
Elad Raz March 14, 2016, 3:11 p.m. UTC | #3
> -----Original Message-----
> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org]
> On Behalf Of Roopa Prabhu
> Sent: Sunday, March 13, 2016 3:56 AM
> To: netdev@vger.kernel.org
> Cc: jhs@mojatatu.com; davem@davemloft.net
> Subject: [PATCH net-next 1/2] rtnetlink: add new RTM_GETSTATS message to
> dump link stats
> 
> From: Roopa Prabhu <roopa@cumulusnetworks.com>
> 
> This patch adds a new RTM_GETSTATS message to query link stats via
> netlink from the kernel. RTM_NEWLINK also dumps stats today, but
> RTM_NEWLINK returns a lot more than just stats and is expensive in some
> cases when frequent polling for stats from userspace is a common
> operation.
> 
> RTM_GETSTATS is an attempt to provide a light weight netlink message to
> explicity query only link stats from the kernel on an interface.
> The idea is to also keep it extensible so that new kinds of stats can be
> added to it in the future.
> 
> This patch adds the following attribute for NETDEV stats:
> struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
>         [IFLA_STATS_LINK64]  = { .len = sizeof(struct rtnl_link_stats64)
> }, };
> 
> This patch also allows for af family stats (an example af stats for IPV6
> is available with the second patch in the series).
> 
> Like any other rtnetlink message, RTM_GETSTATS can be used to get stats
> of a single interface or all interfaces with NLM_F_DUMP.
> 
> Future possible new types of stat attributes:
> - IFLA_MPLS_STATS  (nested. for mpls/mdev stats)
> - IFLA_EXTENDED_STATS (nested. extended software netdev stats like
> bridge,
>   vlan, vxlan etc)
> - IFLA_EXTENDED_HW_STATS (nested. extended hardware stats which are
>   available via ethtool today)
> 
> This patch also declares a filter mask for all stat attributes.
> User has to provide a mask of stats attributes to query. This will be
> specified in a new hdr 'struct if_stats_msg' for stats messages.
> 
> Without any attributes in the filter_mask, no stats will be returned.
> 
> This patch has been tested with modified iproute2 ifstat.
> 
> Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
> ---
>  include/net/rtnetlink.h        |   5 ++
>  include/uapi/linux/if_link.h   |  19 ++++
>  include/uapi/linux/rtnetlink.h |   7 ++
>  net/core/rtnetlink.c           | 200
> +++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 231 insertions(+)
> 
> diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index
> 2f87c1b..fa68158 100644
> --- a/include/net/rtnetlink.h
> +++ b/include/net/rtnetlink.h
> @@ -131,6 +131,11 @@ struct rtnl_af_ops {
>  						    const struct nlattr *attr);
>  	int			(*set_link_af)(struct net_device *dev,
>  					       const struct nlattr *attr);
> +	size_t			(*get_link_af_stats_size)(const struct
> net_device *dev,
> +							  u32 filter_mask);
> +	int			(*fill_link_af_stats)(struct sk_buff *skb,
> +						      const struct net_device *dev,
> +						      u32 filter_mask);
>  };
> 
>  void __rtnl_af_unregister(struct rtnl_af_ops *ops); diff --git
> a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index
> 249eef9..0840f3e 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -741,4 +741,23 @@ enum {
> 
>  #define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1)
> 
> +/* STATS section */
> +
> +struct if_stats_msg {
> +	__u8  family;
> +	__u32 ifindex;
> +	__u32 filter_mask;
> +};
> +
> +enum {
> +	IFLA_STATS_UNSPEC,
> +	IFLA_STATS_LINK64,
> +	IFLA_STATS_INET6,
> +	__IFLA_STATS_MAX,
> +};
> +
> +#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1)
> +
> +#define IFLA_STATS_FILTER_BIT(ATTR)	(1 << (ATTR))
> +
>  #endif /* _UAPI_LINUX_IF_LINK_H */
> diff --git a/include/uapi/linux/rtnetlink.h
> b/include/uapi/linux/rtnetlink.h index ca764b5..2bbb300 100644
> --- a/include/uapi/linux/rtnetlink.h
> +++ b/include/uapi/linux/rtnetlink.h
> @@ -139,6 +139,13 @@ enum {
>  	RTM_GETNSID = 90,
>  #define RTM_GETNSID RTM_GETNSID
> 
> +	RTM_NEWSTATS = 92,
> +#define RTM_NEWSTATS RTM_NEWSTATS

I think that RTM_NEWSTATS and RTM_DELSTATS aren't good names, since user doesn't add/del statistics but only query.
Maybe just stay with RTM_GETSTATS and the message back to user will be RTM_GETSTATS as well?

> +	RTM_DELSTATS = 93,
> +#define RTM_DELSTATS RTM_DELSTATS

This is not in used

> +	RTM_GETSTATS = 94,
> +#define RTM_GETSTATS RTM_GETSTATS
> +
>  	__RTM_MAX,
>  #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
>  };
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index
> d2d9e5e..d1e3d17 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -3410,6 +3410,203 @@ out:
>  	return err;
>  }
> 
> +static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device
> *dev,
> +			       int type, u32 pid, u32 seq, u32 change,
> +			       unsigned int flags, unsigned int filter_mask) {
> +	const struct rtnl_link_stats64 *stats;
> +	struct rtnl_link_stats64 temp;
> +	struct if_stats_msg *ifsm;
> +	struct nlmsghdr *nlh;
> +	struct rtnl_af_ops *af_ops;
> +	struct nlattr *attr;
> +
> +	ASSERT_RTNL();
> +
> +	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifsm), flags);
> +	if (!nlh)
> +		return -EMSGSIZE;
> +
> +	ifsm = nlmsg_data(nlh);
> +	ifsm->ifindex = dev->ifindex;
> +	ifsm->filter_mask = filter_mask;
> +
> +	if (filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK64)) {
> +		attr = nla_reserve(skb, IFLA_STATS_LINK64,
> +				   sizeof(struct rtnl_link_stats64));
> +		if (!attr)
> +			return -EMSGSIZE;
> +
> +		stats = dev_get_stats(dev, &temp);
> +
> +		copy_rtnl_link_stats64(nla_data(attr), stats);
> +	}
> +
> +	list_for_each_entry(af_ops, &rtnl_af_ops, list) {
> +		if (af_ops->fill_link_af_stats) {
> +			int err;
> +
> +			err = af_ops->fill_link_af_stats(skb, dev, filter_mask);
> +			if (err < 0)
> +				goto nla_put_failure;
> +		}
> +	}
> +
> +	nlmsg_end(skb, nlh);
> +
> +	return 0;
> +
> +nla_put_failure:
> +	nlmsg_cancel(skb, nlh);
> +
> +	return -EMSGSIZE;
> +}
> +
> +static const struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] =
> {
> +	[IFLA_STATS_LINK64]	= { .len = sizeof(struct rtnl_link_stats64)
> },
> +};
> +
> +static size_t rtnl_link_get_af_stats_size(const struct net_device *dev,
> +					  u32 filter_mask)
> +{
> +	struct rtnl_af_ops *af_ops;
> +	size_t size = 0;
> +
> +	list_for_each_entry(af_ops, &rtnl_af_ops, list) {
> +		if (af_ops->get_link_af_stats_size)
> +			size += af_ops->get_link_af_stats_size(dev,
> +							       filter_mask);
> +	}
> +
> +	return size;
> +}
> +
> +static noinline size_t if_nlmsg_stats_size(const struct net_device
> *dev,
> +					   u32 filter_mask)
> +{
> +	size_t size = 0;
> +
> +	if (filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK64))
> +		size += nla_total_size(sizeof(struct rtnl_link_stats64));
> +
> +	size += rtnl_link_get_af_stats_size(dev, filter_mask);
> +
> +	return size;
> +}
> +
> +static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh) {
> +	struct net *net = sock_net(skb->sk);
> +	struct if_stats_msg *ifsm;
> +	struct net_device *dev = NULL;
> +	struct sk_buff *nskb;
> +	u32 filter_mask;
> +	int err;
> +
> +	ifsm = nlmsg_data(nlh);
> +	if (ifsm->ifindex > 0)
> +		dev = __dev_get_by_index(net, ifsm->ifindex);
> +	else
> +		return -EINVAL;
> +
> +	if (!dev)
> +		return -ENODEV;
> +
> +	filter_mask = ifsm->filter_mask;
> +	if (!filter_mask)
> +		return -EINVAL;
> +
> +	nskb = nlmsg_new(if_nlmsg_stats_size(dev, filter_mask),
> GFP_KERNEL);
> +	if (!nskb)
> +		return -ENOBUFS;
> +
> +	err = rtnl_fill_statsinfo(nskb, dev, RTM_NEWSTATS,
> +				  NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
> +				  0, filter_mask);
> +	if (err < 0) {
> +		/* -EMSGSIZE implies BUG in if_nlmsg_stats_size */
> +		WARN_ON(err == -EMSGSIZE);
> +		kfree_skb(nskb);
> +	} else {
> +		err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
> +	}
> +
> +	return err;
> +}
> +
> +static u16 rtnl_stats_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
> +{
> +	struct net *net = sock_net(skb->sk);
> +	struct net_device *dev;
> +	u16 min_ifinfo_dump_size = 0;
> +	struct if_stats_msg *ifsm;
> +	u32 filter_mask;
> +
> +	ifsm = nlmsg_data(nlh);
> +	filter_mask = ifsm->filter_mask;
> +
> +	/* traverse the list of net devices and compute the minimum
> +	 * buffer size based upon the filter mask.
> +	 */
> +	list_for_each_entry(dev, &net->dev_base_head, dev_list) {
> +		min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size,
> +					     if_nlmsg_stats_size(dev,
> +								 filter_mask));
> +	}
> +
> +	return min_ifinfo_dump_size;
> +}
> +
> +static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback
> +*cb) {
> +	struct net *net = sock_net(skb->sk);
> +	struct if_stats_msg *ifsm;
> +	int h, s_h;
> +	int idx = 0, s_idx;
> +	struct net_device *dev;
> +	struct hlist_head *head;
> +	unsigned int flags = NLM_F_MULTI;
> +	u32 filter_mask = 0;
> +	int err;
> +
> +	s_h = cb->args[0];
> +	s_idx = cb->args[1];
> +
> +	cb->seq = net->dev_base_seq;
> +
> +	ifsm = nlmsg_data(cb->nlh);
> +	filter_mask = ifsm->filter_mask;
> +
> +	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
> +		idx = 0;
> +		head = &net->dev_index_head[h];
> +		hlist_for_each_entry(dev, head, index_hlist) {
> +			if (idx < s_idx)
> +				goto cont;
> +			err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS,
> +						  NETLINK_CB(cb->skb).portid,
> +						  cb->nlh->nlmsg_seq, 0,
> +						  flags, filter_mask);
> +			/* If we ran out of room on the first message,
> +			 * we're in trouble
> +			 */
> +			WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
> +
> +			if (err < 0)
> +				goto out;
> +
> +			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
> +cont:
> +			idx++;
> +		}
> +	}
> +out:
> +	cb->args[1] = idx;
> +	cb->args[0] = h;
> +
> +	return skb->len;
> +}
> +
>  /* Process one rtnetlink message. */
> 
>  static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
> @@ -3559,4 +3756,7 @@ void __init rtnetlink_init(void)
>  	rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink,
> NULL);
>  	rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL,
> NULL);
>  	rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL,
> NULL);
> +
> +	rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get,
> rtnl_stats_dump,
> +		      rtnl_stats_calcit);
>  }
> --
> 1.9.1
Roopa Prabhu March 14, 2016, 6:45 p.m. UTC | #4
On 3/14/16, 7:51 AM, Jiri Pirko wrote:
> Sun, Mar 13, 2016 at 02:56:25AM CET, roopa@cumulusnetworks.com wrote:
>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>
>> This patch adds a new RTM_GETSTATS message to query link stats via netlink
> >from the kernel. RTM_NEWLINK also dumps stats today, but RTM_NEWLINK
>> returns a lot more than just stats and is expensive in some cases when
>> frequent polling for stats from userspace is a common operation.
>>
>> RTM_GETSTATS is an attempt to provide a light weight netlink message
>> to explicity query only link stats from the kernel on an interface.
>> The idea is to also keep it extensible so that new kinds of stats can be
>> added to it in the future.
>>
>> This patch adds the following attribute for NETDEV stats:
>> struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
>>        [IFLA_STATS_LINK64]  = { .len = sizeof(struct rtnl_link_stats64) },
>> };
>>
>> This patch also allows for af family stats (an example af stats for IPV6
>> is available with the second patch in the series).
>>
>> Like any other rtnetlink message, RTM_GETSTATS can be used to get stats of
>> a single interface or all interfaces with NLM_F_DUMP.
>>
>> Future possible new types of stat attributes:
>> - IFLA_MPLS_STATS  (nested. for mpls/mdev stats)
>> - IFLA_EXTENDED_STATS (nested. extended software netdev stats like bridge,
>>  vlan, vxlan etc)
>> - IFLA_EXTENDED_HW_STATS (nested. extended hardware stats which are
>>  available via ethtool today)
>>
>> This patch also declares a filter mask for all stat attributes.
>> User has to provide a mask of stats attributes to query. This will be
>> specified in a new hdr 'struct if_stats_msg' for stats messages.
>>
>> Without any attributes in the filter_mask, no stats will be returned.
>>
>> This patch has been tested with modified iproute2 ifstat.
>>
>> Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
>> ---
>> include/net/rtnetlink.h        |   5 ++
>> include/uapi/linux/if_link.h   |  19 ++++
>> include/uapi/linux/rtnetlink.h |   7 ++
>> net/core/rtnetlink.c           | 200 +++++++++++++++++++++++++++++++++++++++++
>> 4 files changed, 231 insertions(+)
>>
>> diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
>> index 2f87c1b..fa68158 100644
>> --- a/include/net/rtnetlink.h
>> +++ b/include/net/rtnetlink.h
>> @@ -131,6 +131,11 @@ struct rtnl_af_ops {
>> 						    const struct nlattr *attr);
>> 	int			(*set_link_af)(struct net_device *dev,
>> 					       const struct nlattr *attr);
>> +	size_t			(*get_link_af_stats_size)(const struct net_device *dev,
>> +							  u32 filter_mask);
>> +	int			(*fill_link_af_stats)(struct sk_buff *skb,
>> +						      const struct net_device *dev,
>> +						      u32 filter_mask);
>> };
>>
>> void __rtnl_af_unregister(struct rtnl_af_ops *ops);
>> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
>> index 249eef9..0840f3e 100644
>> --- a/include/uapi/linux/if_link.h
>> +++ b/include/uapi/linux/if_link.h
>> @@ -741,4 +741,23 @@ enum {
>>
>> #define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1)
>>
>> +/* STATS section */
>> +
>> +struct if_stats_msg {
>> +	__u8  family;
>> +	__u32 ifindex;
>> +	__u32 filter_mask;
> This limit future extension to only 32 groups of stats. I can imagine
> that more than that can be added, easily.
I thought about that, but it is going to be a while before we run out of the u32.
Most of the other stats will be nested like per logical interface stats or
per hw stats. If we do run out of them, in the future we could add a netlink
attribute for extended filter mask to carry more bits (similar to IFLA_EXT_MASK).
I did also start with just having a IFLA_STATS_EXT_MASK like attribute
to begin with, but since no stats are dumped by default, having a way to easily specify
mask in the hdr will be easier on apps. And this will again be a u32 anyways.


>  Why don't you use nested
> attribute IFLA_STATS_FILTER with flag attributes for every type?
>  That
> would be easily extendable.
a u8 for each stats selector seems like an overkill.
> Using netlink header struct for this does not look correct to me.
> In past, this was done lot of times and turned out to be a problem later.
>
>
I started with not adding it, but rtnetlink rcv handler looks for family
in the hdr. And hence all of the messages have a struct header
with family as the first field (you can correct me if you find that it is not necessary.)
Jiri Pirko March 14, 2016, 7:04 p.m. UTC | #5
Mon, Mar 14, 2016 at 07:45:23PM CET, roopa@cumulusnetworks.com wrote:
>On 3/14/16, 7:51 AM, Jiri Pirko wrote:
>> Sun, Mar 13, 2016 at 02:56:25AM CET, roopa@cumulusnetworks.com wrote:
>>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>>
>>> This patch adds a new RTM_GETSTATS message to query link stats via netlink
>> >from the kernel. RTM_NEWLINK also dumps stats today, but RTM_NEWLINK
>>> returns a lot more than just stats and is expensive in some cases when
>>> frequent polling for stats from userspace is a common operation.
>>>
>>> RTM_GETSTATS is an attempt to provide a light weight netlink message
>>> to explicity query only link stats from the kernel on an interface.
>>> The idea is to also keep it extensible so that new kinds of stats can be
>>> added to it in the future.
>>>
>>> This patch adds the following attribute for NETDEV stats:
>>> struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
>>>        [IFLA_STATS_LINK64]  = { .len = sizeof(struct rtnl_link_stats64) },
>>> };
>>>
>>> This patch also allows for af family stats (an example af stats for IPV6
>>> is available with the second patch in the series).
>>>
>>> Like any other rtnetlink message, RTM_GETSTATS can be used to get stats of
>>> a single interface or all interfaces with NLM_F_DUMP.
>>>
>>> Future possible new types of stat attributes:
>>> - IFLA_MPLS_STATS  (nested. for mpls/mdev stats)
>>> - IFLA_EXTENDED_STATS (nested. extended software netdev stats like bridge,
>>>  vlan, vxlan etc)
>>> - IFLA_EXTENDED_HW_STATS (nested. extended hardware stats which are
>>>  available via ethtool today)
>>>
>>> This patch also declares a filter mask for all stat attributes.
>>> User has to provide a mask of stats attributes to query. This will be
>>> specified in a new hdr 'struct if_stats_msg' for stats messages.
>>>
>>> Without any attributes in the filter_mask, no stats will be returned.
>>>
>>> This patch has been tested with modified iproute2 ifstat.
>>>
>>> Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
>>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
>>> ---
>>> include/net/rtnetlink.h        |   5 ++
>>> include/uapi/linux/if_link.h   |  19 ++++
>>> include/uapi/linux/rtnetlink.h |   7 ++
>>> net/core/rtnetlink.c           | 200 +++++++++++++++++++++++++++++++++++++++++
>>> 4 files changed, 231 insertions(+)
>>>
>>> diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
>>> index 2f87c1b..fa68158 100644
>>> --- a/include/net/rtnetlink.h
>>> +++ b/include/net/rtnetlink.h
>>> @@ -131,6 +131,11 @@ struct rtnl_af_ops {
>>> 						    const struct nlattr *attr);
>>> 	int			(*set_link_af)(struct net_device *dev,
>>> 					       const struct nlattr *attr);
>>> +	size_t			(*get_link_af_stats_size)(const struct net_device *dev,
>>> +							  u32 filter_mask);
>>> +	int			(*fill_link_af_stats)(struct sk_buff *skb,
>>> +						      const struct net_device *dev,
>>> +						      u32 filter_mask);
>>> };
>>>
>>> void __rtnl_af_unregister(struct rtnl_af_ops *ops);
>>> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
>>> index 249eef9..0840f3e 100644
>>> --- a/include/uapi/linux/if_link.h
>>> +++ b/include/uapi/linux/if_link.h
>>> @@ -741,4 +741,23 @@ enum {
>>>
>>> #define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1)
>>>
>>> +/* STATS section */
>>> +
>>> +struct if_stats_msg {
>>> +	__u8  family;
>>> +	__u32 ifindex;
>>> +	__u32 filter_mask;
>> This limit future extension to only 32 groups of stats. I can imagine
>> that more than that can be added, easily.
>I thought about that, but it is going to be a while before we run out of the u32.
>Most of the other stats will be nested like per logical interface stats or
>per hw stats. If we do run out of them, in the future we could add a netlink
>attribute for extended filter mask to carry more bits (similar to IFLA_EXT_MASK).
>I did also start with just having a IFLA_STATS_EXT_MASK like attribute
>to begin with, but since no stats are dumped by default, having a way to easily specify
>mask in the hdr will be easier on apps. And this will again be a u32 anyways.

I believe that using *any* structs to send over netlink is a mistake.
Netlink is capable to transfer everything using attrs. Easy to compose,
easy to parse. easy to extend. Couple of more bytes in the message? So what?
For newly introduced things, I suggest to do this properly.


>
>
>>  Why don't you use nested
>> attribute IFLA_STATS_FILTER with flag attributes for every type?
>>  That
>> would be easily extendable.
>a u8 for each stats selector seems like an overkill.
>> Using netlink header struct for this does not look correct to me.
>> In past, this was done lot of times and turned out to be a problem later.
>>
>>
>I started with not adding it, but rtnetlink rcv handler looks for family
>in the hdr. And hence all of the messages have a struct header
>with family as the first field (you can correct me if you find that it is not necessary.)
>
>
David Miller March 14, 2016, 7:56 p.m. UTC | #6
From: Jiri Pirko <jiri@resnulli.us>
Date: Mon, 14 Mar 2016 20:04:35 +0100

> I believe that using *any* structs to send over netlink is a mistake.
> Netlink is capable to transfer everything using attrs. Easy to compose,
> easy to parse. easy to extend. Couple of more bytes in the message? So what?
> For newly introduced things, I suggest to do this properly.

It is not so straight-forward.

What to put into the header is a tradeoff.

The most basic use cases should be as efficient as possible, and if we
can put reasonable knobs into the base commend header we should do that
as avoiding attribute processing makes things faster.

And I think in this case it is reasonable to put the mask in there.

The only problem I see with this series is the naming of the netlink
command (it isn't a "new" operation, and the "del" is unused).

Maybe the suggestion to use just "GET" as the name is ok.
Jiri Pirko March 14, 2016, 8:22 p.m. UTC | #7
Mon, Mar 14, 2016 at 08:56:40PM CET, davem@davemloft.net wrote:
>From: Jiri Pirko <jiri@resnulli.us>
>Date: Mon, 14 Mar 2016 20:04:35 +0100
>
>> I believe that using *any* structs to send over netlink is a mistake.
>> Netlink is capable to transfer everything using attrs. Easy to compose,
>> easy to parse. easy to extend. Couple of more bytes in the message? So what?
>> For newly introduced things, I suggest to do this properly.
>
>It is not so straight-forward.
>
>What to put into the header is a tradeoff.
>
>The most basic use cases should be as efficient as possible, and if we
>can put reasonable knobs into the base commend header we should do that
>as avoiding attribute processing makes things faster.

Faster in which matter? Regarding the user app complexicity, I think that
processing attrs is very simple and straightforward. I might be missing
something very obvious, but I don't think that processing header struct
is that much easier that it advocates for the unclean approach.

I personally believe that introducing possibility to pass Netlink
headers was a mistake from the very beginning. If we have clean Netlink
interface, why to pollute that with ioclt-like struct approach. Okay,
the mistake was done. But as I said, for the future usage, I believe
that it should be avoided.


>
>And I think in this case it is reasonable to put the mask in there.
>
>The only problem I see with this series is the naming of the netlink
>command (it isn't a "new" operation, and the "del" is unused).
>
>Maybe the suggestion to use just "GET" as the name is ok.

+1
Roopa Prabhu March 15, 2016, 6:24 a.m. UTC | #8
On 3/14/16, 12:04 PM, Jiri Pirko wrote:
> Mon, Mar 14, 2016 at 07:45:23PM CET, roopa@cumulusnetworks.com wrote:
>> On 3/14/16, 7:51 AM, Jiri Pirko wrote:
>>> Sun, Mar 13, 2016 at 02:56:25AM CET, roopa@cumulusnetworks.com wrote:
>>>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>>>
>>>> This patch adds a new RTM_GETSTATS message to query link stats via netlink
>>> >from the kernel. RTM_NEWLINK also dumps stats today, but RTM_NEWLINK
>>>> returns a lot more than just stats and is expensive in some cases when
>>>> frequent polling for stats from userspace is a common operation.
>>>>
>>>> RTM_GETSTATS is an attempt to provide a light weight netlink message
>>>> to explicity query only link stats from the kernel on an interface.
>>>> The idea is to also keep it extensible so that new kinds of stats can be
>>>> added to it in the future.
>>>>
>>>> This patch adds the following attribute for NETDEV stats:
>>>> struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
>>>>        [IFLA_STATS_LINK64]  = { .len = sizeof(struct rtnl_link_stats64) },
>>>> };
>>>>
>>>> This patch also allows for af family stats (an example af stats for IPV6
>>>> is available with the second patch in the series).
>>>>
>>>> Like any other rtnetlink message, RTM_GETSTATS can be used to get stats of
>>>> a single interface or all interfaces with NLM_F_DUMP.
>>>>
>>>> Future possible new types of stat attributes:
>>>> - IFLA_MPLS_STATS  (nested. for mpls/mdev stats)
>>>> - IFLA_EXTENDED_STATS (nested. extended software netdev stats like bridge,
>>>>  vlan, vxlan etc)
>>>> - IFLA_EXTENDED_HW_STATS (nested. extended hardware stats which are
>>>>  available via ethtool today)
>>>>
>>>> This patch also declares a filter mask for all stat attributes.
>>>> User has to provide a mask of stats attributes to query. This will be
>>>> specified in a new hdr 'struct if_stats_msg' for stats messages.
>>>>
>>>> Without any attributes in the filter_mask, no stats will be returned.
>>>>
>>>> This patch has been tested with modified iproute2 ifstat.
>>>>
>>>> Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
>>>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
>>>> ---
[snip]
>>>> +
>>>> +struct if_stats_msg {
>>>> +	__u8  family;
>>>> +	__u32 ifindex;
>>>> +	__u32 filter_mask;
>>> This limit future extension to only 32 groups of stats. I can imagine
>>> that more than that can be added, easily.
>> I thought about that, but it is going to be a while before we run out of the u32.
>> Most of the other stats will be nested like per logical interface stats or
>> per hw stats. If we do run out of them, in the future we could add a netlink
>> attribute for extended filter mask to carry more bits (similar to IFLA_EXT_MASK).
>> I did also start with just having a IFLA_STATS_EXT_MASK like attribute
>> to begin with, but since no stats are dumped by default, having a way to easily specify
>> mask in the hdr will be easier on apps. And this will again be a u32 anyways.
> I believe that using *any* structs to send over netlink is a mistake.
> Netlink is capable to transfer everything using attrs. Easy to compose,
> easy to parse. easy to extend. Couple of more bytes in the message? So what?
> For newly introduced things, I suggest to do this properly.

Jiri, I hear you. I don't prefer structs for netlink attributes either.
But in this case, the struct is for the msg hdr which immediately follows the netlink
header. Its not an attribute value. see my last reply below. rtnetlink_rcv_msg does assume
a struct and family right after the netlink header.
All messages define this struct (see struct ndmsg, or ifinfomsg, rtmsg, br_port_msg etc).
so it is required.

And I do think this struct simplifies a minimum request message and
I have also realized that it really helps if this struct contains basic minimum required
attributes. Ifindex as a filter really helps with RTM_GETSTATS when not used with NLM_F_DUMP
and filter_mask is important for RTM_GETSTATS with NLM_F_DUMP because without
a filter no stats are reported. so, making it part of the base message simplifies the stats
request message from app perspective.

yes the struct cannot be extended, but further extensions can be done as netlink attributes.




>
>
>>
>>>  Why don't you use nested
>>> attribute IFLA_STATS_FILTER with flag attributes for every type?
>>>  That
>>> would be easily extendable.
>> a u8 for each stats selector seems like an overkill.
>>> Using netlink header struct for this does not look correct to me.
>>> In past, this was done lot of times and turned out to be a problem later.
>>>
>>>
>> I started with not adding it, but rtnetlink rcv handler looks for family
>> in the hdr. And hence all of the messages have a struct header
>> with family as the first field (you can correct me if you find that it is not necessary.)
>>
>>
Roopa Prabhu March 15, 2016, 6:30 a.m. UTC | #9
On 3/14/16, 8:00 AM, Nicolas Dichtel wrote:
> Le 13/03/2016 02:56, Roopa Prabhu a écrit :
>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>
>> This patch adds a new RTM_GETSTATS message to query link stats via netlink
>> from the kernel. RTM_NEWLINK also dumps stats today, but RTM_NEWLINK
>> returns a lot more than just stats and is expensive in some cases when
>> frequent polling for stats from userspace is a common operation.
>>
>> RTM_GETSTATS is an attempt to provide a light weight netlink message
>> to explicity query only link stats from the kernel on an interface.
>> The idea is to also keep it extensible so that new kinds of stats can be
>> added to it in the future.
>>
>> This patch adds the following attribute for NETDEV stats:
>> struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
>>          [IFLA_STATS_LINK64]  = { .len = sizeof(struct rtnl_link_stats64) },
>> };
>>
>> This patch also allows for af family stats (an example af stats for IPV6
>> is available with the second patch in the series).
>>
>> Like any other rtnetlink message, RTM_GETSTATS can be used to get stats of
>> a single interface or all interfaces with NLM_F_DUMP.
>>
>> Future possible new types of stat attributes:
>> - IFLA_MPLS_STATS  (nested. for mpls/mdev stats)
>> - IFLA_EXTENDED_STATS (nested. extended software netdev stats like bridge,
>>    vlan, vxlan etc)
>> - IFLA_EXTENDED_HW_STATS (nested. extended hardware stats which are
>>    available via ethtool today)
>>
>> This patch also declares a filter mask for all stat attributes.
>> User has to provide a mask of stats attributes to query. This will be
>> specified in a new hdr 'struct if_stats_msg' for stats messages.
>>
>> Without any attributes in the filter_mask, no stats will be returned.
>>
>> This patch has been tested with modified iproute2 ifstat.
>>
>> Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
> [snip]
>> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
>> index 249eef9..0840f3e 100644
>> --- a/include/uapi/linux/if_link.h
>> +++ b/include/uapi/linux/if_link.h
> [snip]
>> +enum {
>> +    IFLA_STATS_UNSPEC,
>> +    IFLA_STATS_LINK64,
>> +    IFLA_STATS_INET6,
> IFLA_STATS_INET6 is part on patch #2, it's not used in this patch.
yep, will fix it
>
>> +    __IFLA_STATS_MAX,
>> +};
>> +
>> +#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1)
>> +
>> +#define IFLA_STATS_FILTER_BIT(ATTR)    (1 << (ATTR))
>> +
>>   #endif /* _UAPI_LINUX_IF_LINK_H */
>> diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
>> index ca764b5..2bbb300 100644
>> --- a/include/uapi/linux/rtnetlink.h
>> +++ b/include/uapi/linux/rtnetlink.h
>> @@ -139,6 +139,13 @@ enum {
>>       RTM_GETNSID = 90,
>>   #define RTM_GETNSID RTM_GETNSID
>>
>> +    RTM_NEWSTATS = 92,
>> +#define RTM_NEWSTATS RTM_NEWSTATS
>> +    RTM_DELSTATS = 93,
>> +#define RTM_DELSTATS RTM_DELSTATS
> RTM_DELSTATS is never used.

yeah, i had to define it just because rtnetlink_rcv_msg seems to expect all three to be
there when it tries to check if it is a get msg. But, i could sure not declare this
but make rtnetlink_rcv_msg happy by keeping the GET msg at the right offset.
>
>> +    RTM_GETSTATS = 94,
>> +#define RTM_GETSTATS RTM_GETSTATS
>> +
>>       __RTM_MAX,
>>   #define RTM_MAX        (((__RTM_MAX + 3) & ~3) - 1)
>>   };
>> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
>> index d2d9e5e..d1e3d17 100644
>> --- a/net/core/rtnetlink.c
>> +++ b/net/core/rtnetlink.c
> [snip]
>> +static noinline size_t if_nlmsg_stats_size(const struct net_device *dev,
>> +                       u32 filter_mask)
> Why are you using the 'noinline' attribute?

I actually picked it up from if_nlmsg_size ...

>
>> +{
>> +    size_t size = 0;
>> +
>> +    if (filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK64))
>> +        size += nla_total_size(sizeof(struct rtnl_link_stats64));
>> +
>> +    size += rtnl_link_get_af_stats_size(dev, filter_mask);
>> +
>> +    return size;
>> +}
>
Roopa Prabhu March 15, 2016, 6:37 a.m. UTC | #10
On 3/14/16, 8:11 AM, Elad Raz wrote:
>
>> -----Original Message-----
>> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org]
>> On Behalf Of Roopa Prabhu
>> Sent: Sunday, March 13, 2016 3:56 AM
>> To: netdev@vger.kernel.org
>> Cc: jhs@mojatatu.com; davem@davemloft.net
>> Subject: [PATCH net-next 1/2] rtnetlink: add new RTM_GETSTATS message to
>> dump link stats
>>
>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>
[snip]
>> Future possible new types of stat attributes:
>> - IFLA_MPLS_STATS  (nested. for mpls/mdev stats)
>> - IFLA_EXTENDED_STATS (nested. extended software netdev stats like
>> bridge,
>>   vlan, vxlan etc)
>> - IFLA_EXTENDED_HW_STATS (nested. extended hardware stats which are
>>   available via ethtool today)
>>
>> This patch also declares a filter mask for all stat attributes.
>> User has to provide a mask of stats attributes to query. This will be
>> specified in a new hdr 'struct if_stats_msg' for stats messages.
>>
>> Without any attributes in the filter_mask, no stats will be returned.
>>
>> This patch has been tested with modified iproute2 ifstat.
>>
>> Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
>> ---
>>  
[snip]
>> diff --git a/include/uapi/linux/rtnetlink.h
>> b/include/uapi/linux/rtnetlink.h index ca764b5..2bbb300 100644
>> --- a/include/uapi/linux/rtnetlink.h
>> +++ b/include/uapi/linux/rtnetlink.h
>> @@ -139,6 +139,13 @@ enum {
>>  	RTM_GETNSID = 90,
>>  #define RTM_GETNSID RTM_GETNSID
>>
>> +	RTM_NEWSTATS = 92,
>> +#define RTM_NEWSTATS RTM_NEWSTATS
> I think that RTM_NEWSTATS and RTM_DELSTATS aren't good names, since user doesn't add/del statistics but only query.
> Maybe just stay with RTM_GETSTATS and the message back to user will be RTM_GETSTATS as well?

yeah, i defined all of these because rtnetlink_rcv_msg seems to expect all three to be
there when it tries to check if it is a get msg. But, i could sure not declare this
but make rtnetlink_rcv_msg happy by keeping the GET msg at the right offset.

The RTM_NEWSTATS i thought i should leave it in there because the original thought from
jamal also had periodic stats notification to user-space...which should be RTM_NEWSTATS.
 But, then again that can be added  when we implement that function.


>
>> +	RTM_DELSTATS = 93,
>> +#define RTM_DELSTATS RTM_DELSTATS
> This is not in used
>
>
same response as above. will drop some of them in v2.
Roopa Prabhu March 15, 2016, 7:02 a.m. UTC | #11
On 3/14/16, 12:56 PM, David Miller wrote:
> From: Jiri Pirko <jiri@resnulli.us>
> Date: Mon, 14 Mar 2016 20:04:35 +0100
>
>> I believe that using *any* structs to send over netlink is a mistake.
>> Netlink is capable to transfer everything using attrs. Easy to compose,
>> easy to parse. easy to extend. Couple of more bytes in the message? So what?
>> For newly introduced things, I suggest to do this properly.
> It is not so straight-forward.
>
> What to put into the header is a tradeoff.
>
> The most basic use cases should be as efficient as possible, and if we
> can put reasonable knobs into the base commend header we should do that
> as avoiding attribute processing makes things faster.
yes, i have recently realized this after looking at all other message
types and the userspace part of it. It does make the default message much simpler.
>
> And I think in this case it is reasonable to put the mask in there.
>
> The only problem I see with this series is the naming of the netlink
> command (it isn't a "new" operation, and the "del" is unused).
I just replied to the other responses on this: I did declare all three because
rtnetlink_rcv_msg seems to expect the get message at a particular offset
 (when it derives kind from nlmsg_type). But, i can fix it accordingly.
>
> Maybe the suggestion to use just "GET" as the name is ok.
I am thinking RTM_NEWSTATS is ok here. Because from userspace you are looking at message per interface
as a separate stats object. It also adheres to existing convention.

Besides, jamals original request/suggestion also had periodic stats notification to user-space...,
in which case it would be more appropriate to use RTM_NEWSTATS (if we implement it in the future ofcourse).

And from userspace perspective, dumps and notifications should come in with the same msg type.
userspace sees it as a stats message and does not care if it came as part of a dump or a notification.
also, user space netlink caches expect this (i work with libnl a lot and it is based on this assumption).
so, RTM_NEWSTATS seems more appropriate here.

But, if you have stronger reasons for RTM_GETSTATS, sure, pls let me know.

Thanks,
Roopa
Jiri Pirko March 15, 2016, 7:28 a.m. UTC | #12
Tue, Mar 15, 2016 at 07:24:22AM CET, roopa@cumulusnetworks.com wrote:
>On 3/14/16, 12:04 PM, Jiri Pirko wrote:
>> Mon, Mar 14, 2016 at 07:45:23PM CET, roopa@cumulusnetworks.com wrote:
>>> On 3/14/16, 7:51 AM, Jiri Pirko wrote:
>>>> Sun, Mar 13, 2016 at 02:56:25AM CET, roopa@cumulusnetworks.com wrote:
>>>>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>>>>
>>>>> This patch adds a new RTM_GETSTATS message to query link stats via netlink
>>>> >from the kernel. RTM_NEWLINK also dumps stats today, but RTM_NEWLINK
>>>>> returns a lot more than just stats and is expensive in some cases when
>>>>> frequent polling for stats from userspace is a common operation.
>>>>>
>>>>> RTM_GETSTATS is an attempt to provide a light weight netlink message
>>>>> to explicity query only link stats from the kernel on an interface.
>>>>> The idea is to also keep it extensible so that new kinds of stats can be
>>>>> added to it in the future.
>>>>>
>>>>> This patch adds the following attribute for NETDEV stats:
>>>>> struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
>>>>>        [IFLA_STATS_LINK64]  = { .len = sizeof(struct rtnl_link_stats64) },
>>>>> };
>>>>>
>>>>> This patch also allows for af family stats (an example af stats for IPV6
>>>>> is available with the second patch in the series).
>>>>>
>>>>> Like any other rtnetlink message, RTM_GETSTATS can be used to get stats of
>>>>> a single interface or all interfaces with NLM_F_DUMP.
>>>>>
>>>>> Future possible new types of stat attributes:
>>>>> - IFLA_MPLS_STATS  (nested. for mpls/mdev stats)
>>>>> - IFLA_EXTENDED_STATS (nested. extended software netdev stats like bridge,
>>>>>  vlan, vxlan etc)
>>>>> - IFLA_EXTENDED_HW_STATS (nested. extended hardware stats which are
>>>>>  available via ethtool today)
>>>>>
>>>>> This patch also declares a filter mask for all stat attributes.
>>>>> User has to provide a mask of stats attributes to query. This will be
>>>>> specified in a new hdr 'struct if_stats_msg' for stats messages.
>>>>>
>>>>> Without any attributes in the filter_mask, no stats will be returned.
>>>>>
>>>>> This patch has been tested with modified iproute2 ifstat.
>>>>>
>>>>> Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
>>>>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
>>>>> ---
>[snip]
>>>>> +
>>>>> +struct if_stats_msg {
>>>>> +	__u8  family;
>>>>> +	__u32 ifindex;
>>>>> +	__u32 filter_mask;
>>>> This limit future extension to only 32 groups of stats. I can imagine
>>>> that more than that can be added, easily.
>>> I thought about that, but it is going to be a while before we run out of the u32.
>>> Most of the other stats will be nested like per logical interface stats or
>>> per hw stats. If we do run out of them, in the future we could add a netlink
>>> attribute for extended filter mask to carry more bits (similar to IFLA_EXT_MASK).
>>> I did also start with just having a IFLA_STATS_EXT_MASK like attribute
>>> to begin with, but since no stats are dumped by default, having a way to easily specify
>>> mask in the hdr will be easier on apps. And this will again be a u32 anyways.
>> I believe that using *any* structs to send over netlink is a mistake.
>> Netlink is capable to transfer everything using attrs. Easy to compose,
>> easy to parse. easy to extend. Couple of more bytes in the message? So what?
>> For newly introduced things, I suggest to do this properly.
>
>Jiri, I hear you. I don't prefer structs for netlink attributes either.

Looks like you clearly prefer structs, otherwise we wouldn't be having
this discussion.


>But in this case, the struct is for the msg hdr which immediately follows the netlink
>header. Its not an attribute value. see my last reply below. rtnetlink_rcv_msg does assume
>a struct and family right after the netlink header.
>All messages define this struct (see struct ndmsg, or ifinfomsg, rtmsg, br_port_msg etc).
>so it is required.

Okay. So let's kee[ that struct as small as possible. Containing only
family and ifindex. That should be enough. 


>
>And I do think this struct simplifies a minimum request message and
>I have also realized that it really helps if this struct contains basic minimum required
>attributes. Ifindex as a filter really helps with RTM_GETSTATS when not used with NLM_F_DUMP
>and filter_mask is important for RTM_GETSTATS with NLM_F_DUMP because without
>a filter no stats are reported. so, making it part of the base message simplifies the stats
>request message from app perspective.

I don't understand this argument. As I wrote earlier, user app can
easily specify filter mask by flag attrs. It is very easy.


>
>yes the struct cannot be extended, but further extensions can be done as netlink attributes.

Exactly, we now now that this is not extendable, we know that if will
likely get extended, yet you still argue for the non-extendable
approach. I don't get it, sorry :(


>
>
>
>
>>
>>
>>>
>>>>  Why don't you use nested
>>>> attribute IFLA_STATS_FILTER with flag attributes for every type?
>>>>  That
>>>> would be easily extendable.
>>> a u8 for each stats selector seems like an overkill.
>>>> Using netlink header struct for this does not look correct to me.
>>>> In past, this was done lot of times and turned out to be a problem later.
>>>>
>>>>
>>> I started with not adding it, but rtnetlink rcv handler looks for family
>>> in the hdr. And hence all of the messages have a struct header
>>> with family as the first field (you can correct me if you find that it is not necessary.)
>>>
>>>
>
Roopa Prabhu March 15, 2016, 7:38 a.m. UTC | #13
On 3/15/16, 12:28 AM, Jiri Pirko wrote:
> Tue, Mar 15, 2016 at 07:24:22AM CET, roopa@cumulusnetworks.com wrote:
>> On 3/14/16, 12:04 PM, Jiri Pirko wrote:
>>> Mon, Mar 14, 2016 at 07:45:23PM CET, roopa@cumulusnetworks.com wrote:
>>>> On 3/14/16, 7:51 AM, Jiri Pirko wrote:
>>>>> Sun, Mar 13, 2016 at 02:56:25AM CET, roopa@cumulusnetworks.com wrote:
>>>>>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>>>>>
>>>>>> This patch adds a new RTM_GETSTATS message to query link stats via netlink
>>>>> >from the kernel. RTM_NEWLINK also dumps stats today, but RTM_NEWLINK
>>>>>> returns a lot more than just stats and is expensive in some cases when
>>>>>> frequent polling for stats from userspace is a common operation.
>>>>>>
>>>>>> RTM_GETSTATS is an attempt to provide a light weight netlink message
>>>>>> to explicity query only link stats from the kernel on an interface.
>>>>>> The idea is to also keep it extensible so that new kinds of stats can be
>>>>>> added to it in the future.
>>>>>>
>>>>>> This patch adds the following attribute for NETDEV stats:
>>>>>> struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
>>>>>>        [IFLA_STATS_LINK64]  = { .len = sizeof(struct rtnl_link_stats64) },
>>>>>> };
>>>>>>
>>>>>> This patch also allows for af family stats (an example af stats for IPV6
>>>>>> is available with the second patch in the series).
>>>>>>
>>>>>> Like any other rtnetlink message, RTM_GETSTATS can be used to get stats of
>>>>>> a single interface or all interfaces with NLM_F_DUMP.
>>>>>>
>>>>>> Future possible new types of stat attributes:
>>>>>> - IFLA_MPLS_STATS  (nested. for mpls/mdev stats)
>>>>>> - IFLA_EXTENDED_STATS (nested. extended software netdev stats like bridge,
>>>>>>  vlan, vxlan etc)
>>>>>> - IFLA_EXTENDED_HW_STATS (nested. extended hardware stats which are
>>>>>>  available via ethtool today)
>>>>>>
>>>>>> This patch also declares a filter mask for all stat attributes.
>>>>>> User has to provide a mask of stats attributes to query. This will be
>>>>>> specified in a new hdr 'struct if_stats_msg' for stats messages.
>>>>>>
>>>>>> Without any attributes in the filter_mask, no stats will be returned.
>>>>>>
>>>>>> This patch has been tested with modified iproute2 ifstat.
>>>>>>
>>>>>> Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
>>>>>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
>>>>>> ---
>> [snip]
>>>>>> +
>>>>>> +struct if_stats_msg {
>>>>>> +	__u8  family;
>>>>>> +	__u32 ifindex;
>>>>>> +	__u32 filter_mask;
>>>>> This limit future extension to only 32 groups of stats. I can imagine
>>>>> that more than that can be added, easily.
>>>> I thought about that, but it is going to be a while before we run out of the u32.
>>>> Most of the other stats will be nested like per logical interface stats or
>>>> per hw stats. If we do run out of them, in the future we could add a netlink
>>>> attribute for extended filter mask to carry more bits (similar to IFLA_EXT_MASK).
>>>> I did also start with just having a IFLA_STATS_EXT_MASK like attribute
>>>> to begin with, but since no stats are dumped by default, having a way to easily specify
>>>> mask in the hdr will be easier on apps. And this will again be a u32 anyways.
>>> I believe that using *any* structs to send over netlink is a mistake.
>>> Netlink is capable to transfer everything using attrs. Easy to compose,
>>> easy to parse. easy to extend. Couple of more bytes in the message? So what?
>>> For newly introduced things, I suggest to do this properly.
>> Jiri, I hear you. I don't prefer structs for netlink attributes either.
> Looks like you clearly prefer structs, otherwise we wouldn't be having
> this discussion.
>
>
>> But in this case, the struct is for the msg hdr which immediately follows the netlink
>> header. Its not an attribute value. see my last reply below. rtnetlink_rcv_msg does assume
>> a struct and family right after the netlink header.
>> All messages define this struct (see struct ndmsg, or ifinfomsg, rtmsg, br_port_msg etc).
>> so it is required.
> Okay. So let's kee[ that struct as small as possible. Containing only
> family and ifindex. That should be enough. 

how does it matter if we have reached an agreement that the struct is required ?.
unlike other messages, a filter_mask is an important and must attribute for
stats. If you are worried about us running out of bits in u32, the netlink attribute you will
 define for the filter_mask will also be u32 to begin with.
So, i don't understand what we gain from making filter_mask a separate attribute right now.
I would have agreed with your argument if filter_mask was optional.


>> And I do think this struct simplifies a minimum request message and
>> I have also realized that it really helps if this struct contains basic minimum required
>> attributes. Ifindex as a filter really helps with RTM_GETSTATS when not used with NLM_F_DUMP
>> and filter_mask is important for RTM_GETSTATS with NLM_F_DUMP because without
>> a filter no stats are reported. so, making it part of the base message simplifies the stats
>> request message from app perspective.
> I don't understand this argument. As I wrote earlier, user app can
> easily specify filter mask by flag attrs. It is very easy.
>
>
>> yes the struct cannot be extended, but further extensions can be done as netlink attributes.
> Exactly, we now now that this is not extendable, we know that if will
> likely get extended, yet you still argue for the non-extendable
> approach. I don't get it, sorry :(
>
Jiri Pirko March 15, 2016, 7:52 a.m. UTC | #14
Tue, Mar 15, 2016 at 08:38:51AM CET, roopa@cumulusnetworks.com wrote:
>On 3/15/16, 12:28 AM, Jiri Pirko wrote:
>> Tue, Mar 15, 2016 at 07:24:22AM CET, roopa@cumulusnetworks.com wrote:
>>> On 3/14/16, 12:04 PM, Jiri Pirko wrote:
>>>> Mon, Mar 14, 2016 at 07:45:23PM CET, roopa@cumulusnetworks.com wrote:
>>>>> On 3/14/16, 7:51 AM, Jiri Pirko wrote:
>>>>>> Sun, Mar 13, 2016 at 02:56:25AM CET, roopa@cumulusnetworks.com wrote:
>>>>>>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>>>>>>
>>>>>>> This patch adds a new RTM_GETSTATS message to query link stats via netlink
>>>>>> >from the kernel. RTM_NEWLINK also dumps stats today, but RTM_NEWLINK
>>>>>>> returns a lot more than just stats and is expensive in some cases when
>>>>>>> frequent polling for stats from userspace is a common operation.
>>>>>>>
>>>>>>> RTM_GETSTATS is an attempt to provide a light weight netlink message
>>>>>>> to explicity query only link stats from the kernel on an interface.
>>>>>>> The idea is to also keep it extensible so that new kinds of stats can be
>>>>>>> added to it in the future.
>>>>>>>
>>>>>>> This patch adds the following attribute for NETDEV stats:
>>>>>>> struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
>>>>>>>        [IFLA_STATS_LINK64]  = { .len = sizeof(struct rtnl_link_stats64) },
>>>>>>> };
>>>>>>>
>>>>>>> This patch also allows for af family stats (an example af stats for IPV6
>>>>>>> is available with the second patch in the series).
>>>>>>>
>>>>>>> Like any other rtnetlink message, RTM_GETSTATS can be used to get stats of
>>>>>>> a single interface or all interfaces with NLM_F_DUMP.
>>>>>>>
>>>>>>> Future possible new types of stat attributes:
>>>>>>> - IFLA_MPLS_STATS  (nested. for mpls/mdev stats)
>>>>>>> - IFLA_EXTENDED_STATS (nested. extended software netdev stats like bridge,
>>>>>>>  vlan, vxlan etc)
>>>>>>> - IFLA_EXTENDED_HW_STATS (nested. extended hardware stats which are
>>>>>>>  available via ethtool today)
>>>>>>>
>>>>>>> This patch also declares a filter mask for all stat attributes.
>>>>>>> User has to provide a mask of stats attributes to query. This will be
>>>>>>> specified in a new hdr 'struct if_stats_msg' for stats messages.
>>>>>>>
>>>>>>> Without any attributes in the filter_mask, no stats will be returned.
>>>>>>>
>>>>>>> This patch has been tested with modified iproute2 ifstat.
>>>>>>>
>>>>>>> Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
>>>>>>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
>>>>>>> ---
>>> [snip]
>>>>>>> +
>>>>>>> +struct if_stats_msg {
>>>>>>> +	__u8  family;
>>>>>>> +	__u32 ifindex;
>>>>>>> +	__u32 filter_mask;
>>>>>> This limit future extension to only 32 groups of stats. I can imagine
>>>>>> that more than that can be added, easily.
>>>>> I thought about that, but it is going to be a while before we run out of the u32.
>>>>> Most of the other stats will be nested like per logical interface stats or
>>>>> per hw stats. If we do run out of them, in the future we could add a netlink
>>>>> attribute for extended filter mask to carry more bits (similar to IFLA_EXT_MASK).
>>>>> I did also start with just having a IFLA_STATS_EXT_MASK like attribute
>>>>> to begin with, but since no stats are dumped by default, having a way to easily specify
>>>>> mask in the hdr will be easier on apps. And this will again be a u32 anyways.
>>>> I believe that using *any* structs to send over netlink is a mistake.
>>>> Netlink is capable to transfer everything using attrs. Easy to compose,
>>>> easy to parse. easy to extend. Couple of more bytes in the message? So what?
>>>> For newly introduced things, I suggest to do this properly.
>>> Jiri, I hear you. I don't prefer structs for netlink attributes either.
>> Looks like you clearly prefer structs, otherwise we wouldn't be having
>> this discussion.
>>
>>
>>> But in this case, the struct is for the msg hdr which immediately follows the netlink
>>> header. Its not an attribute value. see my last reply below. rtnetlink_rcv_msg does assume
>>> a struct and family right after the netlink header.
>>> All messages define this struct (see struct ndmsg, or ifinfomsg, rtmsg, br_port_msg etc).
>>> so it is required.
>> Okay. So let's kee[ that struct as small as possible. Containing only
>> family and ifindex. That should be enough. 
>
>how does it matter if we have reached an agreement that the struct is required ?.
>unlike other messages, a filter_mask is an important and must attribute for
>stats. If you are worried about us running out of bits in u32, the netlink attribute you will
> define for the filter_mask will also be u32 to begin with.

No, you are missing the point:

enum {
	IFLA_STATS_UNSPEC,
	IFLA_STATS_FILTER, /* nest */
	IFLA_STATS_LINK64,
	IFLA_STATS_INET6,
	__IFLA_STATS_MAX,
};

#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1)

enum {
	IFLA_STATS_FILTER_UNSPEC,
	IFLA_STATS_FILTER_LINK64, /* flag */
	IFLA_STATS_FILTER_INET6, /* flag */
	...
	IFLA_STATS_FILTER_WHATEVER, /* flag */
	__IFLA_STATS_FILTER_MAX,
};

#define IFLA_STATS_FILTERMAX (__IFLA_STATS_FILTER_MAX - 1)


>So, i don't understand what we gain from making filter_mask a separate attribute right now.
>I would have agreed with your argument if filter_mask was optional.

Optional or not, that does not matter.


>
>
>>> And I do think this struct simplifies a minimum request message and
>>> I have also realized that it really helps if this struct contains basic minimum required
>>> attributes. Ifindex as a filter really helps with RTM_GETSTATS when not used with NLM_F_DUMP
>>> and filter_mask is important for RTM_GETSTATS with NLM_F_DUMP because without
>>> a filter no stats are reported. so, making it part of the base message simplifies the stats
>>> request message from app perspective.
>> I don't understand this argument. As I wrote earlier, user app can
>> easily specify filter mask by flag attrs. It is very easy.
>>
>>
>>> yes the struct cannot be extended, but further extensions can be done as netlink attributes.
>> Exactly, we now now that this is not extendable, we know that if will
>> likely get extended, yet you still argue for the non-extendable
>> approach. I don't get it, sorry :(
>>
>
Roopa Prabhu March 15, 2016, 8:08 a.m. UTC | #15
On 3/15/16, 12:52 AM, Jiri Pirko wrote:
> Tue, Mar 15, 2016 at 08:38:51AM CET, roopa@cumulusnetworks.com wrote:
>>
[snip]
>> how does it matter if we have reached an agreement that the struct is required ?.
>> unlike other messages, a filter_mask is an important and must attribute for
>> stats. If you are worried about us running out of bits in u32, the netlink attribute you will
>> define for the filter_mask will also be u32 to begin with.
> No, you are missing the point:

I understood what you meant the first time...(to which i did comment if you really wanted a u8 for every filter attribute).

>
> enum {
> 	IFLA_STATS_UNSPEC,
> 	IFLA_STATS_FILTER, /* nest */
> 	IFLA_STATS_LINK64,
> 	IFLA_STATS_INET6,
> 	__IFLA_STATS_MAX,
> };
>
> #define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1)
>
> enum {
> 	IFLA_STATS_FILTER_UNSPEC,
> 	IFLA_STATS_FILTER_LINK64, /* flag */
> 	IFLA_STATS_FILTER_INET6, /* flag */
> 	...
> 	IFLA_STATS_FILTER_WHATEVER, /* flag */
> 	__IFLA_STATS_FILTER_MAX,
> };
>
> #define IFLA_STATS_FILTERMAX (__IFLA_STATS_FILTER_MAX - 1)

Apart from the usability concern i have described earlier, this just seems an overkill ...having to redefine every attribute.
Nicolas Dichtel March 15, 2016, 8:20 a.m. UTC | #16
Le 15/03/2016 07:30, roopa a écrit :
> On 3/14/16, 8:00 AM, Nicolas Dichtel wrote:
>> Le 13/03/2016 02:56, Roopa Prabhu a écrit :
[snip]
>>> diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
>>> index ca764b5..2bbb300 100644
>>> --- a/include/uapi/linux/rtnetlink.h
>>> +++ b/include/uapi/linux/rtnetlink.h
>>> @@ -139,6 +139,13 @@ enum {
>>>        RTM_GETNSID = 90,
>>>    #define RTM_GETNSID RTM_GETNSID
>>>
>>> +    RTM_NEWSTATS = 92,
>>> +#define RTM_NEWSTATS RTM_NEWSTATS
>>> +    RTM_DELSTATS = 93,
>>> +#define RTM_DELSTATS RTM_DELSTATS
>> RTM_DELSTATS is never used.
>
> yeah, i had to define it just because rtnetlink_rcv_msg seems to expect all three to be
> there when it tries to check if it is a get msg. But, i could sure not declare this
> but make rtnetlink_rcv_msg happy by keeping the GET msg at the right offset.
Not sure to understand what is the problem.
Look at RTM_NEWNETCONF/RTM_GETNETCONF, there is no DEL command.

>>
>>> +    RTM_GETSTATS = 94,
>>> +#define RTM_GETSTATS RTM_GETSTATS
>>> +
>>>        __RTM_MAX,
>>>    #define RTM_MAX        (((__RTM_MAX + 3) & ~3) - 1)
>>>    };
>>> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
>>> index d2d9e5e..d1e3d17 100644
>>> --- a/net/core/rtnetlink.c
>>> +++ b/net/core/rtnetlink.c
>> [snip]
>>> +static noinline size_t if_nlmsg_stats_size(const struct net_device *dev,
>>> +                       u32 filter_mask)
>> Why are you using the 'noinline' attribute?
>
> I actually picked it up from if_nlmsg_size ...
If there is no better reason, I suggest to remove it ;-)
It was introduced by Eric (commit 9e34a5b51684 ("net/core: EXPORT_SYMBOL
cleanups").
Eric, do you remember why you add this 'noinline' attribute?
Jiri Pirko March 15, 2016, 8:24 a.m. UTC | #17
Tue, Mar 15, 2016 at 09:08:44AM CET, roopa@cumulusnetworks.com wrote:
>On 3/15/16, 12:52 AM, Jiri Pirko wrote:
>> Tue, Mar 15, 2016 at 08:38:51AM CET, roopa@cumulusnetworks.com wrote:
>>>
>[snip]
>>> how does it matter if we have reached an agreement that the struct is required ?.
>>> unlike other messages, a filter_mask is an important and must attribute for
>>> stats. If you are worried about us running out of bits in u32, the netlink attribute you will
>>> define for the filter_mask will also be u32 to begin with.
>> No, you are missing the point:
>
>I understood what you meant the first time...(to which i did comment if you really wanted a u8 for every filter attribute).

You mentioned "u32 attr" in last reply and that is clearly something I
didn't suggested and wasn't talking about at all, so I had to make
things clear.


>
>>
>> enum {
>> 	IFLA_STATS_UNSPEC,
>> 	IFLA_STATS_FILTER, /* nest */
>> 	IFLA_STATS_LINK64,
>> 	IFLA_STATS_INET6,
>> 	__IFLA_STATS_MAX,
>> };
>>
>> #define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1)
>>
>> enum {
>> 	IFLA_STATS_FILTER_UNSPEC,
>> 	IFLA_STATS_FILTER_LINK64, /* flag */
>> 	IFLA_STATS_FILTER_INET6, /* flag */
>> 	...
>> 	IFLA_STATS_FILTER_WHATEVER, /* flag */
>> 	__IFLA_STATS_FILTER_MAX,
>> };
>>
>> #define IFLA_STATS_FILTERMAX (__IFLA_STATS_FILTER_MAX - 1)
>
>Apart from the usability concern i have described earlier, this just seems an overkill ...having to redefine every attribute.

I don't hear the usability concern. User knows how to compose/parse
attrs, no problem there.

You just add flag attr as if you add bit in u32 mask. The same thing.

I don't understand why you say this is an "overkill". I believe that
this is the correct way to do.
Roopa Prabhu March 16, 2016, 6:44 a.m. UTC | #18
On 3/15/16, 1:20 AM, Nicolas Dichtel wrote:
> Le 15/03/2016 07:30, roopa a écrit :
>> On 3/14/16, 8:00 AM, Nicolas Dichtel wrote:
>>> Le 13/03/2016 02:56, Roopa Prabhu a écrit :
> [snip]
>>>> diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
>>>> index ca764b5..2bbb300 100644
>>>> --- a/include/uapi/linux/rtnetlink.h
>>>> +++ b/include/uapi/linux/rtnetlink.h
>>>> @@ -139,6 +139,13 @@ enum {
>>>>        RTM_GETNSID = 90,
>>>>    #define RTM_GETNSID RTM_GETNSID
>>>>
>>>> +    RTM_NEWSTATS = 92,
>>>> +#define RTM_NEWSTATS RTM_NEWSTATS
>>>> +    RTM_DELSTATS = 93,
>>>> +#define RTM_DELSTATS RTM_DELSTATS
>>> RTM_DELSTATS is never used.
>>
>> yeah, i had to define it just because rtnetlink_rcv_msg seems to expect all three to be
>> there when it tries to check if it is a get msg. But, i could sure not declare this
>> but make rtnetlink_rcv_msg happy by keeping the GET msg at the right offset.
> Not sure to understand what is the problem.
> Look at RTM_NEWNETCONF/RTM_GETNETCONF, there is no DEL command.

yep, same thing as RTM_NEWNETCONF, there is no DEL but GET becomes +2.
I was trying to say that I can do the same also. will drop DEL and make GET
RTM_NEWSTATS + 2.  will take care of it in v2. thanks.

>
>>>
>>>> +    RTM_GETSTATS = 94,
>>>> +#define RTM_GETSTATS RTM_GETSTATS
>>>> +
>>>>        __RTM_MAX,
>>>>    #define RTM_MAX        (((__RTM_MAX + 3) & ~3) - 1)
>>>>    };
>>>> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
>>>> index d2d9e5e..d1e3d17 100644
>>>> --- a/net/core/rtnetlink.c
>>>> +++ b/net/core/rtnetlink.c
>>> [snip]
>>>> +static noinline size_t if_nlmsg_stats_size(const struct net_device *dev,
>>>> +                       u32 filter_mask)
>>> Why are you using the 'noinline' attribute?
>>
>> I actually picked it up from if_nlmsg_size ...
> If there is no better reason, I suggest to remove it ;-)
> It was introduced by Eric (commit 9e34a5b51684 ("net/core: EXPORT_SYMBOL
> cleanups").
> Eric, do you remember why you add this 'noinline' attribute?
>
David Miller March 21, 2016, 7:04 p.m. UTC | #19
From: Jiri Pirko <jiri@resnulli.us>
Date: Tue, 15 Mar 2016 09:24:17 +0100

> Tue, Mar 15, 2016 at 09:08:44AM CET, roopa@cumulusnetworks.com wrote:
>>Apart from the usability concern i have described earlier, this just seems an overkill ...having to redefine every attribute.
> 
> I don't hear the usability concern. User knows how to compose/parse
> attrs, no problem there.

But if we compartmentalize the most common use case into the header
itself, then the user doesn't even have to deal with attributes at
all.

This is what we've (intelligently, not stupidly) done in the past.

When we build a new facility, we put the most fundamental elements
into the message header structure.

There is nothing wrong with this at all.

If we run out of bits for the filter mask, we can (intelligently, not
stupidly) add a new attribute for that.

This is nothing in the realm of "good netlink design" that says that
we should use attributes for everything.  In fact I would say that
netlink attribute diahrrea should be avoided.

Therefore I am more than happy to see the filter mask stay in the
message header, and I will apply changes from Roopa which implement
it this way.

I've read your arguments, I understand your position and your
reasoning, but I simply disagree with you.

Thanks.
diff mbox

Patch

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 2f87c1b..fa68158 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -131,6 +131,11 @@  struct rtnl_af_ops {
 						    const struct nlattr *attr);
 	int			(*set_link_af)(struct net_device *dev,
 					       const struct nlattr *attr);
+	size_t			(*get_link_af_stats_size)(const struct net_device *dev,
+							  u32 filter_mask);
+	int			(*fill_link_af_stats)(struct sk_buff *skb,
+						      const struct net_device *dev,
+						      u32 filter_mask);
 };
 
 void __rtnl_af_unregister(struct rtnl_af_ops *ops);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 249eef9..0840f3e 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -741,4 +741,23 @@  enum {
 
 #define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1)
 
+/* STATS section */
+
+struct if_stats_msg {
+	__u8  family;
+	__u32 ifindex;
+	__u32 filter_mask;
+};
+
+enum {
+	IFLA_STATS_UNSPEC,
+	IFLA_STATS_LINK64,
+	IFLA_STATS_INET6,
+	__IFLA_STATS_MAX,
+};
+
+#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1)
+
+#define IFLA_STATS_FILTER_BIT(ATTR)	(1 << (ATTR))
+
 #endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index ca764b5..2bbb300 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -139,6 +139,13 @@  enum {
 	RTM_GETNSID = 90,
 #define RTM_GETNSID RTM_GETNSID
 
+	RTM_NEWSTATS = 92,
+#define RTM_NEWSTATS RTM_NEWSTATS
+	RTM_DELSTATS = 93,
+#define RTM_DELSTATS RTM_DELSTATS
+	RTM_GETSTATS = 94,
+#define RTM_GETSTATS RTM_GETSTATS
+
 	__RTM_MAX,
 #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d2d9e5e..d1e3d17 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3410,6 +3410,203 @@  out:
 	return err;
 }
 
+static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
+			       int type, u32 pid, u32 seq, u32 change,
+			       unsigned int flags, unsigned int filter_mask)
+{
+	const struct rtnl_link_stats64 *stats;
+	struct rtnl_link_stats64 temp;
+	struct if_stats_msg *ifsm;
+	struct nlmsghdr *nlh;
+	struct rtnl_af_ops *af_ops;
+	struct nlattr *attr;
+
+	ASSERT_RTNL();
+
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifsm), flags);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	ifsm = nlmsg_data(nlh);
+	ifsm->ifindex = dev->ifindex;
+	ifsm->filter_mask = filter_mask;
+
+	if (filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK64)) {
+		attr = nla_reserve(skb, IFLA_STATS_LINK64,
+				   sizeof(struct rtnl_link_stats64));
+		if (!attr)
+			return -EMSGSIZE;
+
+		stats = dev_get_stats(dev, &temp);
+
+		copy_rtnl_link_stats64(nla_data(attr), stats);
+	}
+
+	list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+		if (af_ops->fill_link_af_stats) {
+			int err;
+
+			err = af_ops->fill_link_af_stats(skb, dev, filter_mask);
+			if (err < 0)
+				goto nla_put_failure;
+		}
+	}
+
+	nlmsg_end(skb, nlh);
+
+	return 0;
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+
+	return -EMSGSIZE;
+}
+
+static const struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
+	[IFLA_STATS_LINK64]	= { .len = sizeof(struct rtnl_link_stats64) },
+};
+
+static size_t rtnl_link_get_af_stats_size(const struct net_device *dev,
+					  u32 filter_mask)
+{
+	struct rtnl_af_ops *af_ops;
+	size_t size = 0;
+
+	list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+		if (af_ops->get_link_af_stats_size)
+			size += af_ops->get_link_af_stats_size(dev,
+							       filter_mask);
+	}
+
+	return size;
+}
+
+static noinline size_t if_nlmsg_stats_size(const struct net_device *dev,
+					   u32 filter_mask)
+{
+	size_t size = 0;
+
+	if (filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK64))
+		size += nla_total_size(sizeof(struct rtnl_link_stats64));
+
+	size += rtnl_link_get_af_stats_size(dev, filter_mask);
+
+	return size;
+}
+
+static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct net *net = sock_net(skb->sk);
+	struct if_stats_msg *ifsm;
+	struct net_device *dev = NULL;
+	struct sk_buff *nskb;
+	u32 filter_mask;
+	int err;
+
+	ifsm = nlmsg_data(nlh);
+	if (ifsm->ifindex > 0)
+		dev = __dev_get_by_index(net, ifsm->ifindex);
+	else
+		return -EINVAL;
+
+	if (!dev)
+		return -ENODEV;
+
+	filter_mask = ifsm->filter_mask;
+	if (!filter_mask)
+		return -EINVAL;
+
+	nskb = nlmsg_new(if_nlmsg_stats_size(dev, filter_mask), GFP_KERNEL);
+	if (!nskb)
+		return -ENOBUFS;
+
+	err = rtnl_fill_statsinfo(nskb, dev, RTM_NEWSTATS,
+				  NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+				  0, filter_mask);
+	if (err < 0) {
+		/* -EMSGSIZE implies BUG in if_nlmsg_stats_size */
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(nskb);
+	} else {
+		err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
+	}
+
+	return err;
+}
+
+static u16 rtnl_stats_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct net *net = sock_net(skb->sk);
+	struct net_device *dev;
+	u16 min_ifinfo_dump_size = 0;
+	struct if_stats_msg *ifsm;
+	u32 filter_mask;
+
+	ifsm = nlmsg_data(nlh);
+	filter_mask = ifsm->filter_mask;
+
+	/* traverse the list of net devices and compute the minimum
+	 * buffer size based upon the filter mask.
+	 */
+	list_for_each_entry(dev, &net->dev_base_head, dev_list) {
+		min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size,
+					     if_nlmsg_stats_size(dev,
+								 filter_mask));
+	}
+
+	return min_ifinfo_dump_size;
+}
+
+static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct if_stats_msg *ifsm;
+	int h, s_h;
+	int idx = 0, s_idx;
+	struct net_device *dev;
+	struct hlist_head *head;
+	unsigned int flags = NLM_F_MULTI;
+	u32 filter_mask = 0;
+	int err;
+
+	s_h = cb->args[0];
+	s_idx = cb->args[1];
+
+	cb->seq = net->dev_base_seq;
+
+	ifsm = nlmsg_data(cb->nlh);
+	filter_mask = ifsm->filter_mask;
+
+	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+		idx = 0;
+		head = &net->dev_index_head[h];
+		hlist_for_each_entry(dev, head, index_hlist) {
+			if (idx < s_idx)
+				goto cont;
+			err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS,
+						  NETLINK_CB(cb->skb).portid,
+						  cb->nlh->nlmsg_seq, 0,
+						  flags, filter_mask);
+			/* If we ran out of room on the first message,
+			 * we're in trouble
+			 */
+			WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
+
+			if (err < 0)
+				goto out;
+
+			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+			idx++;
+		}
+	}
+out:
+	cb->args[1] = idx;
+	cb->args[0] = h;
+
+	return skb->len;
+}
+
 /* Process one rtnetlink message. */
 
 static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -3559,4 +3756,7 @@  void __init rtnetlink_init(void)
 	rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL);
 	rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL);
 	rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
+
+	rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump,
+		      rtnl_stats_calcit);
 }