diff mbox series

[v3] hv_netvsc: Add per-cpu ethtool stats for netvsc

Message ID 20180724012622.26873-1-yidren@linuxonhyperv.com
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series [v3] hv_netvsc: Add per-cpu ethtool stats for netvsc | expand

Commit Message

Yidong Ren July 24, 2018, 1:26 a.m. UTC
From: Yidong Ren <yidren@microsoft.com>

This patch implements following ethtool stats fields for netvsc:
cpu<n>_tx/rx_packets/bytes
cpu<n>_vf_tx/rx_packets/bytes

Corresponding per-cpu counters already exist in current code. Exposing
these counters will help troubleshooting performance issues.

Signed-off-by: Yidong Ren <yidren@microsoft.com>
---
Changes since v2:
 * Reimplemented with kvmalloc instead of alloc_percpu
 drivers/net/hyperv/hyperv_net.h |  11 ++++
 drivers/net/hyperv/netvsc_drv.c | 103 +++++++++++++++++++++++++++++++-
 2 files changed, 112 insertions(+), 2 deletions(-)

Comments

Yidong Ren July 24, 2018, 1:42 a.m. UTC | #1
> From: Yidong Ren <yidren@linuxonhyperv.com>
> Sent: Monday, July 23, 2018 6:26 PM
> +	pcpu_sum = kvmalloc(sizeof(struct netvsc_ethtool_pcpu_stats) *
> +			num_present_cpus(), GFP_KERNEL);

Since there is no plan for CPU hotplug in Hyper-V in short term, it is fine 
to use num_present_cpus for now. We can move to debugfs later if necessary.
Vitaly Kuznetsov July 24, 2018, 11 a.m. UTC | #2
Yidong Ren <Yidong.Ren@microsoft.com> writes:

>> From: Yidong Ren <yidren@linuxonhyperv.com>
>> Sent: Monday, July 23, 2018 6:26 PM
>> +	pcpu_sum = kvmalloc(sizeof(struct netvsc_ethtool_pcpu_stats) *
>> +			num_present_cpus(), GFP_KERNEL);
>
> Since there is no plan for CPU hotplug in Hyper-V in short term, it is fine 
> to use num_present_cpus for now. We can move to debugfs later if necessary.

While you do for_each_present_cpu() in netvsc_get_ethtool_stats(),
netvsc_get_pcpu_stats() does for_each_possible_cpu(). This looks
inconsistent.

The allocation you're doing here is short-lived so I would suggest you
use possible_cpus everywhere. Even knowing there's no CPU hotplug on
Hyper-V at this moment, it can appear later and we'll get a hard-to-find
issue. Moreover, we may consider using netvsc driver on e.g. KVM with
Hyper-V enlightenments and KVM has CPU hotplug already.
Yidong Ren July 25, 2018, 10:54 p.m. UTC | #3
> From: Vitaly Kuznetsov <vkuznets@redhat.com>
> While you do for_each_present_cpu() in netvsc_get_ethtool_stats(),
> netvsc_get_pcpu_stats() does for_each_possible_cpu(). This looks
> inconsistent.

I made a mistake there.  Thanks for catch me.
 
> The allocation you're doing here is short-lived so I would suggest you use
> possible_cpus everywhere. Even knowing there's no CPU hotplug on Hyper-
> V at this moment, it can appear later and we'll get a hard-to-find issue.
> Moreover, we may consider using netvsc driver on e.g. KVM with Hyper-V
> enlightenments and KVM has CPU hotplug already.

That would cause the output to be very long and useless.
I will submit a revision that allocates for num_possible_cpus, but only copy out stats for each present cpu.

-Yidong
diff mbox series

Patch

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 4b6e308199d2..a32ded5b4f41 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -873,6 +873,17 @@  struct netvsc_ethtool_stats {
 	unsigned long wake_queue;
 };
 
+struct netvsc_ethtool_pcpu_stats {
+	u64     rx_packets;
+	u64     rx_bytes;
+	u64     tx_packets;
+	u64     tx_bytes;
+	u64     vf_rx_packets;
+	u64     vf_rx_bytes;
+	u64     vf_tx_packets;
+	u64     vf_tx_bytes;
+};
+
 struct netvsc_vf_pcpu_stats {
 	u64     rx_packets;
 	u64     rx_bytes;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index dd1d6e115145..51f9cab2da5b 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1118,6 +1118,64 @@  static void netvsc_get_vf_stats(struct net_device *net,
 	}
 }
 
+static void netvsc_get_pcpu_stats(struct net_device *net,
+				  struct netvsc_ethtool_pcpu_stats *pcpu_tot)
+{
+	struct net_device_context *ndev_ctx = netdev_priv(net);
+	struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
+	int i;
+
+	/* fetch percpu stats of vf */
+	for_each_possible_cpu(i) {
+		const struct netvsc_vf_pcpu_stats *stats =
+			per_cpu_ptr(ndev_ctx->vf_stats, i);
+		struct netvsc_ethtool_pcpu_stats *this_tot = &pcpu_tot[i];
+		unsigned int start;
+
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			this_tot->vf_rx_packets = stats->rx_packets;
+			this_tot->vf_tx_packets = stats->tx_packets;
+			this_tot->vf_rx_bytes = stats->rx_bytes;
+			this_tot->vf_tx_bytes = stats->tx_bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+		this_tot->rx_packets = this_tot->vf_rx_packets;
+		this_tot->tx_packets = this_tot->vf_tx_packets;
+		this_tot->rx_bytes   = this_tot->vf_rx_bytes;
+		this_tot->tx_bytes   = this_tot->vf_tx_bytes;
+	}
+
+	/* fetch percpu stats of netvsc */
+	for (i = 0; i < nvdev->num_chn; i++) {
+		const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
+		const struct netvsc_stats *stats;
+		struct netvsc_ethtool_pcpu_stats *this_tot =
+			&pcpu_tot[nvchan->channel->target_cpu];
+		u64 packets, bytes;
+		unsigned int start;
+
+		stats = &nvchan->tx_stats;
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			packets = stats->packets;
+			bytes = stats->bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		this_tot->tx_bytes	+= bytes;
+		this_tot->tx_packets	+= packets;
+
+		stats = &nvchan->rx_stats;
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			packets = stats->packets;
+			bytes = stats->bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		this_tot->rx_bytes	+= bytes;
+		this_tot->rx_packets	+= packets;
+	}
+}
+
 static void netvsc_get_stats64(struct net_device *net,
 			       struct rtnl_link_stats64 *t)
 {
@@ -1215,6 +1273,23 @@  static const struct {
 	{ "rx_no_memory", offsetof(struct netvsc_ethtool_stats, rx_no_memory) },
 	{ "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) },
 	{ "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) },
+}, pcpu_stats[] = {
+	{ "cpu%u_rx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, rx_packets) },
+	{ "cpu%u_rx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, rx_bytes) },
+	{ "cpu%u_tx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, tx_packets) },
+	{ "cpu%u_tx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, tx_bytes) },
+	{ "cpu%u_vf_rx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_rx_packets) },
+	{ "cpu%u_vf_rx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_rx_bytes) },
+	{ "cpu%u_vf_tx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_tx_packets) },
+	{ "cpu%u_vf_tx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_tx_bytes) },
 }, vf_stats[] = {
 	{ "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) },
 	{ "vf_rx_bytes",   offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) },
@@ -1226,6 +1301,9 @@  static const struct {
 #define NETVSC_GLOBAL_STATS_LEN	ARRAY_SIZE(netvsc_stats)
 #define NETVSC_VF_STATS_LEN	ARRAY_SIZE(vf_stats)
 
+/* statistics per queue (rx/tx packets/bytes) */
+#define NETVSC_PCPU_STATS_LEN (num_present_cpus() * ARRAY_SIZE(pcpu_stats))
+
 /* 4 statistics per queue (rx/tx packets/bytes) */
 #define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4)
 
@@ -1241,6 +1319,7 @@  static int netvsc_get_sset_count(struct net_device *dev, int string_set)
 	case ETH_SS_STATS:
 		return NETVSC_GLOBAL_STATS_LEN
 			+ NETVSC_VF_STATS_LEN
+			+ NETVSC_PCPU_STATS_LEN
 			+ NETVSC_QUEUE_STATS_LEN(nvdev);
 	default:
 		return -EINVAL;
@@ -1255,9 +1334,10 @@  static void netvsc_get_ethtool_stats(struct net_device *dev,
 	const void *nds = &ndc->eth_stats;
 	const struct netvsc_stats *qstats;
 	struct netvsc_vf_pcpu_stats sum;
+	struct netvsc_ethtool_pcpu_stats *pcpu_sum;
 	unsigned int start;
 	u64 packets, bytes;
-	int i, j;
+	int i, j, cpu;
 
 	if (!nvdev)
 		return;
@@ -1269,6 +1349,18 @@  static void netvsc_get_ethtool_stats(struct net_device *dev,
 	for (j = 0; j < NETVSC_VF_STATS_LEN; j++)
 		data[i++] = *(u64 *)((void *)&sum + vf_stats[j].offset);
 
+	pcpu_sum = kvmalloc(sizeof(struct netvsc_ethtool_pcpu_stats) *
+			num_present_cpus(), GFP_KERNEL);
+	netvsc_get_pcpu_stats(dev, pcpu_sum);
+	for_each_present_cpu(cpu) {
+		struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu];
+
+		for (j = 0; j < ARRAY_SIZE(pcpu_stats); j++)
+			data[i++] = *(u64 *)((void *)this_sum
+					     + pcpu_stats[j].offset);
+	}
+	kvfree(pcpu_sum);
+
 	for (j = 0; j < nvdev->num_chn; j++) {
 		qstats = &nvdev->chan_table[j].tx_stats;
 
@@ -1296,7 +1388,7 @@  static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 	struct net_device_context *ndc = netdev_priv(dev);
 	struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
 	u8 *p = data;
-	int i;
+	int i, cpu;
 
 	if (!nvdev)
 		return;
@@ -1313,6 +1405,13 @@  static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 			p += ETH_GSTRING_LEN;
 		}
 
+		for_each_present_cpu(cpu) {
+			for (i = 0; i < ARRAY_SIZE(pcpu_stats); i++) {
+				sprintf(p, pcpu_stats[i].name, cpu);
+				p += ETH_GSTRING_LEN;
+			}
+		}
+
 		for (i = 0; i < nvdev->num_chn; i++) {
 			sprintf(p, "tx_queue_%u_packets", i);
 			p += ETH_GSTRING_LEN;