diff mbox series

[net-next,v2] net: mvpp2: add ethtool GOP statistics

Message ID 20171103110425.16097-1-miquel.raynal@free-electrons.com
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series [net-next,v2] net: mvpp2: add ethtool GOP statistics | expand

Commit Message

Miquel Raynal Nov. 3, 2017, 11:04 a.m. UTC
Add ethtool statistics support by reading the GOP statistics from the
hardware counters. Also implement a workqueue to gather the statistics
every second or some 32-bit counters could overflow.

Suggested-by: Stefan Chulski <stefanc@marvell.com>
Signed-off-by: Miquel Raynal <miquel.raynal@free-electrons.com>
---
 drivers/net/ethernet/marvell/mvpp2.c | 237 ++++++++++++++++++++++++++++++++++-
 1 file changed, 231 insertions(+), 6 deletions(-)

Comments

Miquel Raynal Nov. 3, 2017, 11:26 a.m. UTC | #1
On Fri,  3 Nov 2017 12:04:25 +0100
Miquel Raynal <miquel.raynal@free-electrons.com> wrote:

> Add ethtool statistics support by reading the GOP statistics from the
> hardware counters. Also implement a workqueue to gather the statistics
> every second or some 32-bit counters could overflow.
> 
> Suggested-by: Stefan Chulski <stefanc@marvell.com>
> Signed-off-by: Miquel Raynal <miquel.raynal@free-electrons.com>
> ---

Changes since v1:
- Constified the ethtool array (with strings and offset) and added a
  third parameter to the repeated structure: if the register is 64-bit
  wide.
- Added locking inside the mvpp2_gather_stats() to avoid starting
  reading the registers while a read is already pending.
- Used devm_* allocators.
- Switched to cancel_delayed_work_sync().
- Fixed the size allocated to the statistics array.
- Added an index to the workqueue name, derived with IDA because the
  name shown in 'ps' is only 15 chars wide (so must be short) and there
  was no existing way to retrieve a unique number per engine instance.
- Start gathering the statistics only in ndo_start(), stop in
  ndo_stop().

Best regards,
Miquèl
Andrew Lunn Nov. 3, 2017, 3:19 p.m. UTC | #2
> @@ -817,6 +856,12 @@ struct mvpp2 {
>  
>  	/* Maximum number of RXQs per port */
>  	unsigned int max_port_rxqs;
> +
> +	/* Workqueue to gather hardware statistics with its lock */
> +	struct mutex gather_stats_lock;
> +	struct delayed_work stats_work;
> +	char queue_name[20];
> +	struct workqueue_struct *stats_queue;
>  };
  
> +static u64 mvpp2_read_count(struct mvpp2_port *port,
> +			    const struct mvpp2_ethtool_counter *counter)
> +{
> +	void __iomem *base;
> +	u64 val;
> +
> +	if (port->priv->hw_version == MVPP21)
> +		base = port->priv->lms_base + MVPP21_MIB_COUNTERS_OFFSET +
> +		       port->gop_id * MVPP21_MIB_COUNTERS_PORT_SZ;
> +	else
> +		base = port->priv->iface_base + MVPP22_MIB_COUNTERS_OFFSET +
> +		       port->gop_id * MVPP22_MIB_COUNTERS_PORT_SZ;

This seems like something which could be calculated once and then
stored away, e.g. next to stats_queue.

> +static void mvpp2_ethtool_get_stats(struct net_device *dev,
> +				    struct ethtool_stats *stats, u64 *data)
> +{
> +	struct mvpp2_port *port = netdev_priv(dev);
> +
> +	/* Update statistics for all ports, copy only those actually needed */
> +	mvpp2_gather_hw_statistics(&port->priv->stats_work.work);
> +
> +	memcpy(data, port->ethtool_stats,
> +	       sizeof(u64) * ARRAY_SIZE(mvpp2_ethtool_regs));

Thanks for adding the mutex in mvpp2_gather_hw_statistics(). However,
should we be holding the mutex while performing this copy? There is no
snapshot support, so the statistics are not guaranteed to be
consistent. However, since a statistics is a u64, it is possible the
copy will get the new lower 32 bits and the old 32 bits if the copy
happens while mvpp2_gather_hw_statistics() is running.

> +	port->ethtool_stats = devm_kcalloc(&pdev->dev,
> +					   ARRAY_SIZE(mvpp2_ethtool_regs),
> +					   sizeof(u64), GFP_KERNEL);
> +	if (!port->ethtool_stats) {
> +		err = -ENOMEM;
> +		goto err_free_stats;
> +	}
> +
>  	mvpp2_port_copy_mac_addr(dev, priv, port_node, &mac_from);
>  
>  	port->tx_ring_size = MVPP2_MAX_TXD;
> @@ -7707,6 +7904,7 @@ static void mvpp2_port_remove(struct mvpp2_port *port)
>  	of_node_put(port->phy_node);
>  	free_percpu(port->pcpu);
>  	free_percpu(port->stats);
> +	kfree(port->ethtool_stats);

You allocate the memory using devm_. You should not use plain kfree()
on it. You might want to spend some time reading about devm_

> +	mutex_init(&priv->gather_stats_lock);
> +	index = ida_simple_get(&engine_index_ida, 0, 0, GFP_KERNEL);
> +	if (index < 0)
> +		goto err_mg_clk;
> +
> +	snprintf(priv->queue_name, sizeof(priv->queue_name),
> +		 "mvpp2_stats_%d", index);

I know Florian asked for unique names, which IDA will give you. But
could you derive the name from device tree? It then becomes a name you
can actually map back to the hardware, rather than being semi-random.

    Thanks
	Andrew
Miquel Raynal Nov. 6, 2017, 10:06 a.m. UTC | #3
Hi Andrew,

On Fri, 3 Nov 2017 16:19:06 +0100
Andrew Lunn <andrew@lunn.ch> wrote:

> > @@ -817,6 +856,12 @@ struct mvpp2 {
> >  
> >  	/* Maximum number of RXQs per port */
> >  	unsigned int max_port_rxqs;
> > +
> > +	/* Workqueue to gather hardware statistics with its lock */
> > +	struct mutex gather_stats_lock;
> > +	struct delayed_work stats_work;
> > +	char queue_name[20];
> > +	struct workqueue_struct *stats_queue;
> >  };  
>   
> > +static u64 mvpp2_read_count(struct mvpp2_port *port,
> > +			    const struct mvpp2_ethtool_counter
> > *counter) +{
> > +	void __iomem *base;
> > +	u64 val;
> > +
> > +	if (port->priv->hw_version == MVPP21)
> > +		base = port->priv->lms_base +
> > MVPP21_MIB_COUNTERS_OFFSET +
> > +		       port->gop_id * MVPP21_MIB_COUNTERS_PORT_SZ;
> > +	else
> > +		base = port->priv->iface_base +
> > MVPP22_MIB_COUNTERS_OFFSET +
> > +		       port->gop_id *
> > MVPP22_MIB_COUNTERS_PORT_SZ;  
> 
> This seems like something which could be calculated once and then
> stored away, e.g. next to stats_queue.

Sure, it is even more important now that there is a workqueue calling
quite often this function. I actually had to move the new field into
the "mvpp2_port" structure otherwise the offset still should have been
calculated each time.

> 
> > +static void mvpp2_ethtool_get_stats(struct net_device *dev,
> > +				    struct ethtool_stats *stats,
> > u64 *data) +{
> > +	struct mvpp2_port *port = netdev_priv(dev);
> > +
> > +	/* Update statistics for all ports, copy only those
> > actually needed */
> > +	mvpp2_gather_hw_statistics(&port->priv->stats_work.work);
> > +
> > +	memcpy(data, port->ethtool_stats,
> > +	       sizeof(u64) * ARRAY_SIZE(mvpp2_ethtool_regs));  
> 
> Thanks for adding the mutex in mvpp2_gather_hw_statistics(). However,
> should we be holding the mutex while performing this copy? There is no
> snapshot support, so the statistics are not guaranteed to be
> consistent. However, since a statistics is a u64, it is possible the
> copy will get the new lower 32 bits and the old 32 bits if the copy
> happens while mvpp2_gather_hw_statistics() is running.

I thought the concurrency problem was only when reading from the
hardware registers so I did not thought about the situation you
describe here, I updated (see the coming v3) by surrounding the memcpy
call by acquiring/releasing the lock.

> 
> > +	port->ethtool_stats = devm_kcalloc(&pdev->dev,
> > +
> > ARRAY_SIZE(mvpp2_ethtool_regs),
> > +					   sizeof(u64),
> > GFP_KERNEL);
> > +	if (!port->ethtool_stats) {
> > +		err = -ENOMEM;
> > +		goto err_free_stats;
> > +	}
> > +
> >  	mvpp2_port_copy_mac_addr(dev, priv, port_node, &mac_from);
> >  
> >  	port->tx_ring_size = MVPP2_MAX_TXD;
> > @@ -7707,6 +7904,7 @@ static void mvpp2_port_remove(struct
> > mvpp2_port *port) of_node_put(port->phy_node);
> >  	free_percpu(port->pcpu);
> >  	free_percpu(port->stats);
> > +	kfree(port->ethtool_stats);  
> 
> You allocate the memory using devm_. You should not use plain kfree()
> on it. You might want to spend some time reading about devm_

Rha, right, was too hurry to resend, I obviously forgot to update the
*_remove() function, sorry about that. 

> 
> > +	mutex_init(&priv->gather_stats_lock);
> > +	index = ida_simple_get(&engine_index_ida, 0, 0,
> > GFP_KERNEL);
> > +	if (index < 0)
> > +		goto err_mg_clk;
> > +
> > +	snprintf(priv->queue_name, sizeof(priv->queue_name),
> > +		 "mvpp2_stats_%d", index);  
> 
> I know Florian asked for unique names, which IDA will give you. But
> could you derive the name from device tree? It then becomes a name you
> can actually map back to the hardware, rather than being semi-random.

Here I do have a problem: I choose the IDA solution because it was
quite straightforward but I agree it would be better to use an unique
name. Unfortunately, on the Armada-8040-DB that instantiate this driver
twice, the node name is not unique. There are two CP (master and
slave) and both names are "ethernet@0". Otherwise, if I use the full
path, I get something like
"/cp110-master/config-space@f2000000/ethernet@0" and
"/cp110-slave/config-space@f4000000/ethernet@0" but the problem is that
workqueue names are truncated to 24 characters and only 15 appears in
ps, so it would not solve the issue and choosing the "parent
parent node name" would work here but does not scale very well. Do you
have any idea to get this right?

Thanks for the review,
Miquèl
Andrew Lunn Nov. 6, 2017, 2:25 p.m. UTC | #4
> Here I do have a problem: I choose the IDA solution because it was
> quite straightforward but I agree it would be better to use an unique
> name. Unfortunately, on the Armada-8040-DB that instantiate this driver
> twice, the node name is not unique. There are two CP (master and
> slave) and both names are "ethernet@0". Otherwise, if I use the full
> path, I get something like
> "/cp110-master/config-space@f2000000/ethernet@0" and
> "/cp110-slave/config-space@f4000000/ethernet@0" but the problem is that
> workqueue names are truncated to 24 characters and only 15 appears in
> ps, so it would not solve the issue and choosing the "parent
> parent node name" would work here but does not scale very well. Do you
> have any idea to get this right?

Hi Miquel

You could move the starting of the thread into mvpp2_port_probe(). If
you do it after register_netdev(dev), you can use netdev_name(dev).

    Andrew
Miquel Raynal Nov. 6, 2017, 9:02 p.m. UTC | #5
Hi Andrew,

> > Here I do have a problem: I choose the IDA solution because it was
> > quite straightforward but I agree it would be better to use an
> > unique name. Unfortunately, on the Armada-8040-DB that instantiate
> > this driver twice, the node name is not unique. There are two CP
> > (master and slave) and both names are "ethernet@0". Otherwise, if I
> > use the full path, I get something like
> > "/cp110-master/config-space@f2000000/ethernet@0" and
> > "/cp110-slave/config-space@f4000000/ethernet@0" but the problem is
> > that workqueue names are truncated to 24 characters and only 15
> > appears in ps, so it would not solve the issue and choosing the
> > "parent parent node name" would work here but does not scale very
> > well. Do you have any idea to get this right?  
> 
> Hi Miquel
> 
> You could move the starting of the thread into mvpp2_port_probe(). If
> you do it after register_netdev(dev), you can use netdev_name(dev).
> 
>     Andrew

There is one workqueue per instance of the driver, not per port.
Hence, I choose to use the netdev_name() (short) with a '+' after it
if there are other ports involved, ie. "stats-wq-eth0+" and
"stats-wq-eth2+".


Thanks for your help,
Miquèl
Miquel Raynal Nov. 6, 2017, 10:45 p.m. UTC | #6
Hi Stefan,

+David Miller/Net ML

> > @@ -6844,6 +7023,10 @@ static int mvpp2_open(struct net_device
> > *dev)
> > 
> >  	mvpp2_start_dev(port);
> > 
> > +	/* Start hardware statistics gathering */
> > +	queue_delayed_work(priv->stats_queue, &priv->stats_work,
> > +			   MVPP2_MIB_COUNTERS_STATS_DELAY);
> > +
> >  	return 0;
> > 
> >  err_free_link_irq:
> > @@ -6888,6 +7071,9 @@ static int mvpp2_stop(struct net_device *dev)
> >  	mvpp2_cleanup_rxqs(port);
> >  	mvpp2_cleanup_txqs(port);
> > 
> > +	cancel_delayed_work_sync(&priv->stats_work);
> > +	flush_workqueue(priv->stats_queue);
> > +  
> 
> Hi Miquel,
> 
> I think there are bug here.
> priv is common for all ports on same CPN and they have same
> priv->stats_work.
> 
> For example on A7K board with 3 Ports. queue_delayed_work and
> cancel_delayed_work_sync called for each port stop and start
> procedure. For example:
> If Port0 and Port1 were started, then if only Port0 stopped, delayed
> work would be canceled for both ports.


Thanks for spotting it, you are right this is a bug since I moved
starting/stopping the queues in the opening and close procedure of the
ports (to avoid using CPU time while no interface is actually up).

Maybe I should have a work per port, it would be easier to
handle.

Thank you,
Miquèl
Stefan Chulski Nov. 7, 2017, 8:22 a.m. UTC | #7
> -----Original Message-----

> From: Miquel RAYNAL [mailto:miquel.raynal@free-electrons.com]

> Sent: Tuesday, November 07, 2017 12:45 AM

> To: Stefan Chulski <stefanc@marvell.com>

> Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>; Antoine Tenart

> <antoine.tenart@free-electrons.com>; David S. Miller

> <davem@davemloft.net>; netdev@vger.kernel.org

> Subject: [EXT] Re: [PATCH net-next v2] net: mvpp2: add ethtool GOP statistics

> 

> External Email

> 

> ----------------------------------------------------------------------

> Hi Stefan,

> 

> +David Miller/Net ML

> 

> > > @@ -6844,6 +7023,10 @@ static int mvpp2_open(struct net_device

> > > *dev)

> > >

> > >  	mvpp2_start_dev(port);

> > >

> > > +	/* Start hardware statistics gathering */

> > > +	queue_delayed_work(priv->stats_queue, &priv->stats_work,

> > > +			   MVPP2_MIB_COUNTERS_STATS_DELAY);

> > > +

> > >  	return 0;

> > >

> > >  err_free_link_irq:

> > > @@ -6888,6 +7071,9 @@ static int mvpp2_stop(struct net_device *dev)

> > >  	mvpp2_cleanup_rxqs(port);

> > >  	mvpp2_cleanup_txqs(port);

> > >

> > > +	cancel_delayed_work_sync(&priv->stats_work);

> > > +	flush_workqueue(priv->stats_queue);

> > > +

> >

> > Hi Miquel,

> >

> > I think there are bug here.

> > priv is common for all ports on same CPN and they have same

> > priv->stats_work.

> >

> > For example on A7K board with 3 Ports. queue_delayed_work and

> > cancel_delayed_work_sync called for each port stop and start

> > procedure. For example:

> > If Port0 and Port1 were started, then if only Port0 stopped, delayed

> > work would be canceled for both ports.

> 

> 

> Thanks for spotting it, you are right this is a bug since I moved starting/stopping

> the queues in the opening and close procedure of the ports (to avoid using CPU

> time while no interface is actually up).

> 

> Maybe I should have a work per port, it would be easier to handle.

> 

> Thank you,

> Miquèl


I agree with you.
If delayed_work enable/disabled upon port start/stop procedure, it should be
per port and gather MIB statistics for single port.

Stefan.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 97efe4733661..ca46eca27981 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -38,6 +38,8 @@ 
 #include <net/ipv6.h>
 #include <net/tso.h>
 
+static DEFINE_IDA(engine_index_ida);
+
 /* RX Fifo Registers */
 #define MVPP2_RX_DATA_FIFO_SIZE_REG(port)	(0x00 + 4 * (port))
 #define MVPP2_RX_ATTR_FIFO_SIZE_REG(port)	(0x20 + 4 * (port))
@@ -769,6 +771,42 @@  enum mvpp2_bm_type {
 	MVPP2_BM_SWF_SHORT
 };
 
+/* GMAC MIB Counters register definitions */
+#define MVPP21_MIB_COUNTERS_OFFSET		0x1000
+#define MVPP21_MIB_COUNTERS_PORT_SZ		0x400
+#define MVPP22_MIB_COUNTERS_OFFSET		0x0
+#define MVPP22_MIB_COUNTERS_PORT_SZ		0x100
+
+#define MVPP2_MIB_GOOD_OCTETS_RCVD		0x0
+#define MVPP2_MIB_BAD_OCTETS_RCVD		0x8
+#define MVPP2_MIB_CRC_ERRORS_SENT		0xc
+#define MVPP2_MIB_UNICAST_FRAMES_RCVD		0x10
+#define MVPP2_MIB_BROADCAST_FRAMES_RCVD		0x18
+#define MVPP2_MIB_MULTICAST_FRAMES_RCVD		0x1c
+#define MVPP2_MIB_FRAMES_64_OCTETS		0x20
+#define MVPP2_MIB_FRAMES_65_TO_127_OCTETS	0x24
+#define MVPP2_MIB_FRAMES_128_TO_255_OCTETS	0x28
+#define MVPP2_MIB_FRAMES_256_TO_511_OCTETS	0x2c
+#define MVPP2_MIB_FRAMES_512_TO_1023_OCTETS	0x30
+#define MVPP2_MIB_FRAMES_1024_TO_MAX_OCTETS	0x34
+#define MVPP2_MIB_GOOD_OCTETS_SENT		0x38
+#define MVPP2_MIB_UNICAST_FRAMES_SENT		0x40
+#define MVPP2_MIB_MULTICAST_FRAMES_SENT		0x48
+#define MVPP2_MIB_BROADCAST_FRAMES_SENT		0x4c
+#define MVPP2_MIB_FC_SENT			0x54
+#define MVPP2_MIB_FC_RCVD			0x58
+#define MVPP2_MIB_RX_FIFO_OVERRUN		0x5c
+#define MVPP2_MIB_UNDERSIZE_RCVD		0x60
+#define MVPP2_MIB_FRAGMENTS_RCVD		0x64
+#define MVPP2_MIB_OVERSIZE_RCVD			0x68
+#define MVPP2_MIB_JABBER_RCVD			0x6c
+#define MVPP2_MIB_MAC_RCV_ERROR			0x70
+#define MVPP2_MIB_BAD_CRC_EVENT			0x74
+#define MVPP2_MIB_COLLISION			0x78
+#define MVPP2_MIB_LATE_COLLISION		0x7c
+
+#define MVPP2_MIB_COUNTERS_STATS_DELAY		(1 * HZ)
+
 /* Definitions */
 
 /* Shared Packet Processor resources */
@@ -796,6 +834,7 @@  struct mvpp2 {
 	struct clk *axi_clk;
 
 	/* List of pointers to port structures */
+	int port_count;
 	struct mvpp2_port **port_list;
 
 	/* Aggregated TXQs */
@@ -817,6 +856,12 @@  struct mvpp2 {
 
 	/* Maximum number of RXQs per port */
 	unsigned int max_port_rxqs;
+
+	/* Workqueue to gather hardware statistics with its lock */
+	struct mutex gather_stats_lock;
+	struct delayed_work stats_work;
+	char queue_name[20];
+	struct workqueue_struct *stats_queue;
 };
 
 struct mvpp2_pcpu_stats {
@@ -879,6 +924,7 @@  struct mvpp2_port {
 	u16 tx_ring_size;
 	u16 rx_ring_size;
 	struct mvpp2_pcpu_stats __percpu *stats;
+	u64 *ethtool_stats;
 
 	phy_interface_t phy_interface;
 	struct device_node *phy_node;
@@ -4743,9 +4789,142 @@  static void mvpp2_port_loopback_set(struct mvpp2_port *port)
 	writel(val, port->base + MVPP2_GMAC_CTRL_1_REG);
 }
 
+struct mvpp2_ethtool_counter {
+	unsigned int offset;
+	const char string[ETH_GSTRING_LEN];
+	bool reg_is_64b;
+};
+
+static u64 mvpp2_read_count(struct mvpp2_port *port,
+			    const struct mvpp2_ethtool_counter *counter)
+{
+	void __iomem *base;
+	u64 val;
+
+	if (port->priv->hw_version == MVPP21)
+		base = port->priv->lms_base + MVPP21_MIB_COUNTERS_OFFSET +
+		       port->gop_id * MVPP21_MIB_COUNTERS_PORT_SZ;
+	else
+		base = port->priv->iface_base + MVPP22_MIB_COUNTERS_OFFSET +
+		       port->gop_id * MVPP22_MIB_COUNTERS_PORT_SZ;
+
+	val = readl(base + counter->offset);
+	if (counter->reg_is_64b)
+		val += (u64)readl(base + counter->offset + 4) << 32;
+
+	return val;
+}
+
+/* Due to the fact that software statistics and hardware statistics are, by
+ * design, incremented at different moments in the chain of packet processing,
+ * it is very likely that incoming packets could have been dropped after being
+ * counted by hardware but before reaching software statistics (most probably
+ * multicast packets), and in the oppposite way, during transmission, FCS bytes
+ * are added in between as well as TSO skb will be split and header bytes added.
+ * Hence, statistics gathered from userspace with ifconfig (software) and
+ * ethtool (hardware) cannot be compared.
+ */
+static const struct mvpp2_ethtool_counter mvpp2_ethtool_regs[] = {
+	{ MVPP2_MIB_GOOD_OCTETS_RCVD, "good_octets_received", true },
+	{ MVPP2_MIB_BAD_OCTETS_RCVD, "bad_octets_received" },
+	{ MVPP2_MIB_CRC_ERRORS_SENT, "crc_errors_sent" },
+	{ MVPP2_MIB_UNICAST_FRAMES_RCVD, "unicast_frames_received" },
+	{ MVPP2_MIB_BROADCAST_FRAMES_RCVD, "broadcast_frames_received" },
+	{ MVPP2_MIB_MULTICAST_FRAMES_RCVD, "multicast_frames_received" },
+	{ MVPP2_MIB_FRAMES_64_OCTETS, "frames_64_octets" },
+	{ MVPP2_MIB_FRAMES_65_TO_127_OCTETS, "frames_65_to_127_octet" },
+	{ MVPP2_MIB_FRAMES_128_TO_255_OCTETS, "frames_128_to_255_octet" },
+	{ MVPP2_MIB_FRAMES_256_TO_511_OCTETS, "frames_256_to_511_octet" },
+	{ MVPP2_MIB_FRAMES_512_TO_1023_OCTETS, "frames_512_to_1023_octet" },
+	{ MVPP2_MIB_FRAMES_1024_TO_MAX_OCTETS, "frames_1024_to_max_octet" },
+	{ MVPP2_MIB_GOOD_OCTETS_SENT, "good_octets_sent", true },
+	{ MVPP2_MIB_UNICAST_FRAMES_SENT, "unicast_frames_sent" },
+	{ MVPP2_MIB_MULTICAST_FRAMES_SENT, "multicast_frames_sent" },
+	{ MVPP2_MIB_BROADCAST_FRAMES_SENT, "broadcast_frames_sent" },
+	{ MVPP2_MIB_FC_SENT, "fc_sent" },
+	{ MVPP2_MIB_FC_RCVD, "fc_received" },
+	{ MVPP2_MIB_RX_FIFO_OVERRUN, "rx_fifo_overrun" },
+	{ MVPP2_MIB_UNDERSIZE_RCVD, "undersize_received" },
+	{ MVPP2_MIB_FRAGMENTS_RCVD, "fragments_received" },
+	{ MVPP2_MIB_OVERSIZE_RCVD, "oversize_received" },
+	{ MVPP2_MIB_JABBER_RCVD, "jabber_received" },
+	{ MVPP2_MIB_MAC_RCV_ERROR, "mac_receive_error" },
+	{ MVPP2_MIB_BAD_CRC_EVENT, "bad_crc_event" },
+	{ MVPP2_MIB_COLLISION, "collision" },
+	{ MVPP2_MIB_LATE_COLLISION, "late_collision" },
+};
+
+static void mvpp2_ethtool_get_strings(struct net_device *netdev, u32 sset,
+				      u8 *data)
+{
+	if (sset == ETH_SS_STATS) {
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++)
+			memcpy(data + i * ETH_GSTRING_LEN,
+			       &mvpp2_ethtool_regs[i].string, ETH_GSTRING_LEN);
+	}
+}
+
+static void mvpp2_gather_hw_statistics(struct work_struct *work)
+{
+	struct delayed_work *del_work = to_delayed_work(work);
+	struct mvpp2 *priv = container_of(del_work, struct mvpp2, stats_work);
+	struct mvpp2_port *port;
+	u64 *pstats;
+	int i, j;
+
+	mutex_lock(&priv->gather_stats_lock);
+
+	for (i = 0; i < priv->port_count; i++) {
+		if (!priv->port_list[i])
+			continue;
+
+		port = priv->port_list[i];
+		pstats = port->ethtool_stats;
+		for (j = 0; j < ARRAY_SIZE(mvpp2_ethtool_regs); j++)
+			*pstats++ += mvpp2_read_count(port,
+						      &mvpp2_ethtool_regs[j]);
+	}
+
+	/* No need to read again the counters right after this function if it
+	 * was called asynchronously by the user (ie. use of ethtool).
+	 */
+	cancel_delayed_work(&priv->stats_work);
+	queue_delayed_work(priv->stats_queue, &priv->stats_work,
+			   MVPP2_MIB_COUNTERS_STATS_DELAY);
+
+	mutex_unlock(&priv->gather_stats_lock);
+}
+
+static void mvpp2_ethtool_get_stats(struct net_device *dev,
+				    struct ethtool_stats *stats, u64 *data)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	/* Update statistics for all ports, copy only those actually needed */
+	mvpp2_gather_hw_statistics(&port->priv->stats_work.work);
+
+	memcpy(data, port->ethtool_stats,
+	       sizeof(u64) * ARRAY_SIZE(mvpp2_ethtool_regs));
+}
+
+static int mvpp2_ethtool_get_sset_count(struct net_device *dev, int sset)
+{
+	if (sset == ETH_SS_STATS)
+		return ARRAY_SIZE(mvpp2_ethtool_regs);
+
+	return -EOPNOTSUPP;
+}
+
 static void mvpp2_port_reset(struct mvpp2_port *port)
 {
 	u32 val;
+	unsigned int i;
+
+	/* Read the GOP statistics to reset the hardware counters */
+	for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++)
+		mvpp2_read_count(port, &mvpp2_ethtool_regs[i]);
 
 	val = readl(port->base + MVPP2_GMAC_CTRL_2_REG) &
 		    ~MVPP2_GMAC_PORT_RESET_MASK;
@@ -6844,6 +7023,10 @@  static int mvpp2_open(struct net_device *dev)
 
 	mvpp2_start_dev(port);
 
+	/* Start hardware statistics gathering */
+	queue_delayed_work(priv->stats_queue, &priv->stats_work,
+			   MVPP2_MIB_COUNTERS_STATS_DELAY);
+
 	return 0;
 
 err_free_link_irq:
@@ -6888,6 +7071,9 @@  static int mvpp2_stop(struct net_device *dev)
 	mvpp2_cleanup_rxqs(port);
 	mvpp2_cleanup_txqs(port);
 
+	cancel_delayed_work_sync(&priv->stats_work);
+	flush_workqueue(priv->stats_queue);
+
 	return 0;
 }
 
@@ -7199,6 +7385,9 @@  static const struct ethtool_ops mvpp2_eth_tool_ops = {
 	.get_drvinfo	= mvpp2_ethtool_get_drvinfo,
 	.get_ringparam	= mvpp2_ethtool_get_ringparam,
 	.set_ringparam	= mvpp2_ethtool_set_ringparam,
+	.get_strings	= mvpp2_ethtool_get_strings,
+	.get_ethtool_stats = mvpp2_ethtool_get_stats,
+	.get_sset_count	= mvpp2_ethtool_get_sset_count,
 	.get_link_ksettings = phy_ethtool_get_link_ksettings,
 	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
@@ -7613,13 +7802,21 @@  static int mvpp2_port_probe(struct platform_device *pdev,
 		port->base = priv->iface_base + MVPP22_GMAC_BASE(port->gop_id);
 	}
 
-	/* Alloc per-cpu stats */
+	/* Alloc per-cpu and ethtool stats */
 	port->stats = netdev_alloc_pcpu_stats(struct mvpp2_pcpu_stats);
 	if (!port->stats) {
 		err = -ENOMEM;
 		goto err_free_irq;
 	}
 
+	port->ethtool_stats = devm_kcalloc(&pdev->dev,
+					   ARRAY_SIZE(mvpp2_ethtool_regs),
+					   sizeof(u64), GFP_KERNEL);
+	if (!port->ethtool_stats) {
+		err = -ENOMEM;
+		goto err_free_stats;
+	}
+
 	mvpp2_port_copy_mac_addr(dev, priv, port_node, &mac_from);
 
 	port->tx_ring_size = MVPP2_MAX_TXD;
@@ -7707,6 +7904,7 @@  static void mvpp2_port_remove(struct mvpp2_port *port)
 	of_node_put(port->phy_node);
 	free_percpu(port->pcpu);
 	free_percpu(port->stats);
+	kfree(port->ethtool_stats);
 	for (i = 0; i < port->ntxqs; i++)
 		free_percpu(port->txqs[i]->pcpu);
 	mvpp2_queue_vectors_deinit(port);
@@ -7892,8 +8090,9 @@  static int mvpp2_probe(struct platform_device *pdev)
 	struct device_node *port_node;
 	struct mvpp2 *priv;
 	struct resource *res;
+	unsigned int index;
 	void __iomem *base;
-	int port_count, i;
+	int i;
 	int err;
 
 	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
@@ -8008,14 +8207,14 @@  static int mvpp2_probe(struct platform_device *pdev)
 		goto err_mg_clk;
 	}
 
-	port_count = of_get_available_child_count(dn);
-	if (port_count == 0) {
+	priv->port_count = of_get_available_child_count(dn);
+	if (priv->port_count == 0) {
 		dev_err(&pdev->dev, "no ports enabled\n");
 		err = -ENODEV;
 		goto err_mg_clk;
 	}
 
-	priv->port_list = devm_kcalloc(&pdev->dev, port_count,
+	priv->port_list = devm_kcalloc(&pdev->dev, priv->port_count,
 				       sizeof(*priv->port_list),
 				       GFP_KERNEL);
 	if (!priv->port_list) {
@@ -8023,18 +8222,41 @@  static int mvpp2_probe(struct platform_device *pdev)
 		goto err_mg_clk;
 	}
 
+	/* Statistics must be gathered regularly because some of them (like
+	 * packets counters) are 32-bit registers and could overflow quite
+	 * quickly. For instance, a 10Gb link used at full bandwidth with the
+	 * smallest packets (64B) will overflow a 32-bit counter in less than
+	 * 30 seconds. Then, use a workqueue to fill 64-bit counters.
+	 */
+	mutex_init(&priv->gather_stats_lock);
+	index = ida_simple_get(&engine_index_ida, 0, 0, GFP_KERNEL);
+	if (index < 0)
+		goto err_mg_clk;
+
+	snprintf(priv->queue_name, sizeof(priv->queue_name),
+		 "mvpp2_stats_%d", index);
+	priv->stats_queue = create_singlethread_workqueue(priv->queue_name);
+	if (!priv->stats_queue) {
+		err = -ENOMEM;
+		goto err_ida;
+	}
+
+	INIT_DELAYED_WORK(&priv->stats_work, mvpp2_gather_hw_statistics);
+
 	/* Initialize ports */
 	i = 0;
 	for_each_available_child_of_node(dn, port_node) {
 		err = mvpp2_port_probe(pdev, port_node, priv, i);
 		if (err < 0)
-			goto err_mg_clk;
+			goto err_ida;
 		i++;
 	}
 
 	platform_set_drvdata(pdev, priv);
 	return 0;
 
+err_ida:
+	ida_simple_remove(&engine_index_ida, index);
 err_mg_clk:
 	clk_disable_unprepare(priv->axi_clk);
 	if (priv->hw_version == MVPP22)
@@ -8053,6 +8275,9 @@  static int mvpp2_remove(struct platform_device *pdev)
 	struct device_node *port_node;
 	int i = 0;
 
+	destroy_workqueue(priv->stats_queue);
+	mutex_destroy(&priv->gather_stats_lock);
+
 	for_each_available_child_of_node(dn, port_node) {
 		if (priv->port_list[i])
 			mvpp2_port_remove(priv->port_list[i]);