Patchwork [net-next] r8169: Add 64bit statistics

login
register
mail settings
Submitter Junchang Wang
Date Nov. 17, 2011, 6:48 a.m.
Message ID <20111117064826.GA4429@Desktop-Junchang>
Download mbox | patch
Permalink /patch/126117/
State Changes Requested
Delegated to: David Miller
Headers show

Comments

Junchang Wang - Nov. 17, 2011, 6:48 a.m.
Switch to use ndo_get_stats64 to get 64bit statistics.
Per cpu data is used to avoid lock operations.


Signed-off-by: Junchang Wang <junchangwang@gmail.com>
---
 drivers/net/ethernet/realtek/r8169.c |  113 ++++++++++++++++++++++++++++------
 1 files changed, 93 insertions(+), 20 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stephen Hemminger - Nov. 17, 2011, 7:03 a.m.
> Switch to use ndo_get_stats64 to get 64bit statistics.
> Per cpu data is used to avoid lock operations.
> 
> 
> Signed-off-by: Junchang Wang <junchangwang@gmail.com>

This was recently brought up in the proposed forcedeth patch.
You dont need per-cpu since Tx is locked by dev->xmit_lock and
rx is implicitly single threaded by NAPI. You do need to have
two u64_stat_sync entries (one for Tx and one for Rx).  
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet - Nov. 17, 2011, 7:21 a.m.
Le jeudi 17 novembre 2011 à 14:48 +0800, Junchang Wang a écrit :
> Switch to use ndo_get_stats64 to get 64bit statistics.
> Per cpu data is used to avoid lock operations.
> 
> 
> Signed-off-by: Junchang Wang <junchangwang@gmail.com>
> ---
>  drivers/net/ethernet/realtek/r8169.c |  113 ++++++++++++++++++++++++++++------
>  1 files changed, 93 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
> index cdf66d6..0165646 100644
> --- a/drivers/net/ethernet/realtek/r8169.c
> +++ b/drivers/net/ethernet/realtek/r8169.c
> @@ -670,11 +670,31 @@ struct rtl8169_counters {
>  	__le16	tx_underun;
>  };
>  
> +struct rtl8169_pcpu_stats {
> +	u64			rx_packets;
> +	u64			rx_bytes;
> +	u64			tx_packets;
> +	u64			tx_bytes;
> +	struct u64_stats_sync	syncp;
> +	/*
> +	 * The following variables are updated
> +	 * without syncp protection.
> +	 */
> +	unsigned long		rx_dropped;
> +	unsigned long		tx_dropped;
> +	unsigned long		rx_length_errors;
> +	unsigned long		rx_errors;
> +	unsigned long		rx_crc_errors;
> +	unsigned long		rx_fifo_errors;
> +	unsigned long		rx_missed_errors;
> +};
> +

Thats overkill. Dont copy what have been done for virtual devices
(loopback, tunnels, ...)

RX and TX path are serialized (only one cpu can fly at one moment)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Junchang Wang - Nov. 17, 2011, 7:39 a.m.
On Thu, Nov 17, 2011 at 3:21 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le jeudi 17 novembre 2011 à 14:48 +0800, Junchang Wang a écrit :
>> Switch to use ndo_get_stats64 to get 64bit statistics.
>> Per cpu data is used to avoid lock operations.
>>
>>
>> Signed-off-by: Junchang Wang <junchangwang@gmail.com>
>> ---
>>  drivers/net/ethernet/realtek/r8169.c |  113 ++++++++++++++++++++++++++++------
>>  1 files changed, 93 insertions(+), 20 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
>> index cdf66d6..0165646 100644
>> --- a/drivers/net/ethernet/realtek/r8169.c
>> +++ b/drivers/net/ethernet/realtek/r8169.c
>> @@ -670,11 +670,31 @@ struct rtl8169_counters {
>>       __le16  tx_underun;
>>  };
>>
>> +struct rtl8169_pcpu_stats {
>> +     u64                     rx_packets;
>> +     u64                     rx_bytes;
>> +     u64                     tx_packets;
>> +     u64                     tx_bytes;
>> +     struct u64_stats_sync   syncp;
>> +     /*
>> +      * The following variables are updated
>> +      * without syncp protection.
>> +      */
>> +     unsigned long           rx_dropped;
>> +     unsigned long           tx_dropped;
>> +     unsigned long           rx_length_errors;
>> +     unsigned long           rx_errors;
>> +     unsigned long           rx_crc_errors;
>> +     unsigned long           rx_fifo_errors;
>> +     unsigned long           rx_missed_errors;
>> +};
>> +
>
> Thats overkill. Dont copy what have been done for virtual devices
> (loopback, tunnels, ...)
>
> RX and TX path are serialized (only one cpu can fly at one moment)
>
Thanks. I'll submit a new version.
Junchang Wang - Nov. 17, 2011, 7:46 a.m.
> You dont need per-cpu since Tx is locked by dev->xmit_lock and
> rx is implicitly single threaded by NAPI.

Thanks.

>You do need to have
> two u64_stat_sync entries (one for Tx and one for Rx).

You mean Rx and Tx will perform on different cores at one moment.
So I need a sync for Tx to protect tx_xxx, and another for Rx to
protect rx_xxx. Is that right?

Thanks.
Eric Dumazet - Nov. 17, 2011, 8:11 a.m.
Le jeudi 17 novembre 2011 à 15:46 +0800, Junchang Wang a écrit :
> > You dont need per-cpu since Tx is locked by dev->xmit_lock and
> > rx is implicitly single threaded by NAPI.
> 
> Thanks.
> 
> >You do need to have
> > two u64_stat_sync entries (one for Tx and one for Rx).
> 
> You mean Rx and Tx will perform on different cores at one moment.
> So I need a sync for Tx to protect tx_xxx, and another for Rx to
> protect rx_xxx. Is that right?
> 

Yes, look at sky2.c for a template

drivers/net/ethernet/marvell/sky2.c contains code like that
(different syncp for rx/tx)

TX path:
                        u64_stats_update_begin(&sky2->tx_stats.syncp);
                        ++sky2->tx_stats.packets;
                        sky2->tx_stats.bytes += skb->len;
                        u64_stats_update_end(&sky2->tx_stats.syncp);


RX path:

        u64_stats_update_begin(&sky2->rx_stats.syncp);
        sky2->rx_stats.packets += packets;
        sky2->rx_stats.bytes += bytes;
        u64_stats_update_end(&sky2->rx_stats.syncp);




--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
françois romieu - Nov. 17, 2011, 9:36 a.m.
Junchang Wang <junchangwang@gmail.com> :
> 
> Switch to use ndo_get_stats64 to get 64bit statistics.
> Per cpu data is used to avoid lock operations.

The 816x chipsets have hardware stats registers. The driver already
use them. Please use them more.
Eric Dumazet - Nov. 17, 2011, 10:51 a.m.
Le jeudi 17 novembre 2011 à 10:36 +0100, Francois Romieu a écrit :
> Junchang Wang <junchangwang@gmail.com> :
> > 
> > Switch to use ndo_get_stats64 to get 64bit statistics.
> > Per cpu data is used to avoid lock operations.
> 
> The 816x chipsets have hardware stats registers. The driver already
> use them. Please use them more.
> 

I would like to mention a possible bias.

I know for that tg3 includes in RX counters frames/bytes that were
dropped (because napi handler couldnot keep up with the load)

They also include FCS in the byte count.

When using software counters, we can compare "ethtool -S" and "ifconfig"
ones.

But generaly speaking, if hardware stats are available we should use
them and save few instructions per packet ;)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Junchang Wang - Nov. 17, 2011, 10:59 a.m.
> Yes, look at sky2.c for a template
>
> drivers/net/ethernet/marvell/sky2.c contains code like that
> (different syncp for rx/tx)
>
> TX path:
>                        u64_stats_update_begin(&sky2->tx_stats.syncp);
>                        ++sky2->tx_stats.packets;
>                        sky2->tx_stats.bytes += skb->len;
>                        u64_stats_update_end(&sky2->tx_stats.syncp);
>
>
> RX path:
>
>        u64_stats_update_begin(&sky2->rx_stats.syncp);
>        sky2->rx_stats.packets += packets;
>        sky2->rx_stats.bytes += bytes;
>        u64_stats_update_end(&sky2->rx_stats.syncp);
>

Thanks, Eric.

I'm still confused about why we need two sync entries. Please correct
me if I'm wrong.

Take r8169 for example, All statistic entries are updated in
rtl8169_rx_interrupt() or rtl8169_tx_interrupt(). Those two functions
are called in rtl8169_poll().
As far as I know, rtl8169_poll() is protected by NAPI_STATE_SCHED bit
to run on a single core at one moment. So there is not compulsory to
use two sync entries.

One benefit from two sync is that readers can avoid many retries.

Thanks.
David Laight - Nov. 17, 2011, 11:13 a.m.
The 64bit stats update sequence used to get valid
counts on 32bit systems (that can't do locked 64bit
memory access) seems to be:

>     u64_stats_update_begin(&sky2->tx_stats.syncp);
>     ++sky2->tx_stats.packets;
>     sky2->tx_stats.bytes += skb->len;
>     u64_stats_update_end(&sky2->tx_stats.syncp);

I'm not sure what the begin/end markers do, but
they need to hold off the readers during updates
and the writers during reads - this is probably
expensive on the update path.

A thought that might work is for the writer to
write the middle bits of the 64 bit walue to
another location, eg:
       count = sky2->tx_stats.bytes + skb->len;
       sky2->tx_stats.bytes = count;
       sky2->tx_stats.bytes_check = count >> 16;
The reader then loops until the two value are
consistent.

I think this doesn't even require a memory barrier
in the ISR since the order of the reads an writes
doesn't matter at all.

	David


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet - Nov. 17, 2011, 12:58 p.m.
Le jeudi 17 novembre 2011 à 11:13 +0000, David Laight a écrit :
> The 64bit stats update sequence used to get valid
> counts on 32bit systems (that can't do locked 64bit
> memory access) seems to be:
> 
> >     u64_stats_update_begin(&sky2->tx_stats.syncp);
> >     ++sky2->tx_stats.packets;
> >     sky2->tx_stats.bytes += skb->len;
> >     u64_stats_update_end(&sky2->tx_stats.syncp);
> 
> I'm not sure what the begin/end markers do, but
> they need to hold off the readers during updates
> and the writers during reads - this is probably
> expensive on the update path.
> 
> A thought that might work is for the writer to
> write the middle bits of the 64 bit walue to
> another location, eg:
>        count = sky2->tx_stats.bytes + skb->len;
>        sky2->tx_stats.bytes = count;
>        sky2->tx_stats.bytes_check = count >> 16;
> The reader then loops until the two value are
> consistent.
> 
> I think this doesn't even require a memory barrier
> in the ISR since the order of the reads an writes
> doesn't matter at all.
> 
> 	David
> 
> 

Oh well...

Before claiming all this, you really should read 
include/linux/u64_stats_sync.h

This should answer all your questions.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet - Nov. 17, 2011, 1:02 p.m.
Le jeudi 17 novembre 2011 à 18:59 +0800, Junchang Wang a écrit :
> > Yes, look at sky2.c for a template
> >
> > drivers/net/ethernet/marvell/sky2.c contains code like that
> > (different syncp for rx/tx)
> >
> > TX path:
> >                        u64_stats_update_begin(&sky2->tx_stats.syncp);
> >                        ++sky2->tx_stats.packets;
> >                        sky2->tx_stats.bytes += skb->len;
> >                        u64_stats_update_end(&sky2->tx_stats.syncp);
> >
> >
> > RX path:
> >
> >        u64_stats_update_begin(&sky2->rx_stats.syncp);
> >        sky2->rx_stats.packets += packets;
> >        sky2->rx_stats.bytes += bytes;
> >        u64_stats_update_end(&sky2->rx_stats.syncp);
> >
> 
> Thanks, Eric.
> 
> I'm still confused about why we need two sync entries. Please correct
> me if I'm wrong.
> 
> Take r8169 for example, All statistic entries are updated in
> rtl8169_rx_interrupt() or rtl8169_tx_interrupt(). Those two functions
> are called in rtl8169_poll().
> As far as I know, rtl8169_poll() is protected by NAPI_STATE_SCHED bit
> to run on a single core at one moment. So there is not compulsory to
> use two sync entries.
> 
> One benefit from two sync is that readers can avoid many retries.
> 

Oh well...

Dont try to optimize all this stuff, I spent some time on it already,
just use the infrastructure and forget about races and bugs.

The short answer is :

CPU1                    CPU2                  
TX path                 RX path               


since CPU1 & CPU2 will do the update_begin(), and the memory increment
is not atomic, we might lose one increment, and block a stat reader
forever.




--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Laight - Nov. 17, 2011, 2:01 p.m.
> From: Eric Dumazet [mailto:eric.dumazet@gmail.com] 
> Le jeudi 17 novembre 2011 à 11:13 +0000, David Laight a écrit :
> > The 64bit stats update sequence used to get valid
> > counts on 32bit systems (that can't do locked 64bit
> > memory access) seems to be:
> > 
> > >     u64_stats_update_begin(&sky2->tx_stats.syncp);
> > >     ++sky2->tx_stats.packets;
> > >     sky2->tx_stats.bytes += skb->len;
> > >     u64_stats_update_end(&sky2->tx_stats.syncp);
> > 
> > I'm not sure what the begin/end markers do, but
> > they need to hold off the readers during updates
> > and the writers during reads - this is probably
> > expensive on the update path.
> > 
> > A thought that might work is for the writer to
> > write the middle bits of the 64 bit walue to
> > another location, eg:
> >        count = sky2->tx_stats.bytes + skb->len;
> >        sky2->tx_stats.bytes = count;
> >        sky2->tx_stats.bytes_check = count >> 16;
> > The reader then loops until the two value are
> > consistent.
> > 
> > I think this doesn't even require a memory barrier
> > in the ISR since the order of the reads an writes
> > doesn't matter at all.
> > 
> > 	David
> > 
> > 
> 
> Oh well...
> 
> Before claiming all this, you really should read 
> include/linux/u64_stats_sync.h
> 
> This should answer all your questions.

Ah yes ...

Both u64_stats_update_begin & _end increment a numeric field
with an appropriate memory barrier. So the 'update' path
has two extra read-modify-write sequences (possibly the
2nd read can be optimised out), and two smp_wmb() that may
introduce bus delays.

Would be fine if it were guaranteed to work!
Consider the following sequence of events:
       u64_stats_update_begin()
       calculate 'count+1'
                                read_seqcount_begin()
                                read count_hi
       write count_lo
                                read count_lo
       write count_hi
                                read_seqcount_retry()
       ... update other counters ...
       u64_stats_update_end()
The reader gets an invalid value since it reads the same
'sequence' both times.

Could be fixed by using '|= 1' in update_begin and
looping on odd values in read_seqcount_begin().

	David

                      


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet - Nov. 17, 2011, 2:27 p.m.
Le jeudi 17 novembre 2011 à 14:01 +0000, David Laight a écrit :

> Ah yes ...
> 
> Both u64_stats_update_begin & _end increment a numeric field
> with an appropriate memory barrier. So the 'update' path
> has two extra read-modify-write sequences (possibly the
> 2nd read can be optimised out), and two smp_wmb() that may
> introduce bus delays.
> 
> Would be fine if it were guaranteed to work!
> Consider the following sequence of events:
>        u64_stats_update_begin()
>        calculate 'count+1'
>                                 read_seqcount_begin()
>                                 read count_hi
>        write count_lo
>                                 read count_lo
>        write count_hi
>                                 read_seqcount_retry()
>        ... update other counters ...
>        u64_stats_update_end()
> The reader gets an invalid value since it reads the same
> 'sequence' both times.
> 
> Could be fixed by using '|= 1' in update_begin and
> looping on odd values in read_seqcount_begin().
> 

I am a bit tired of this discussion.

You obviously dont understand how the thing is working.

Spend some time on it before claiming there are bugs.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Junchang Wang - Nov. 18, 2011, 2:24 a.m.
> The 816x chipsets have hardware stats registers. The driver already
> use them. Please use them more.

Hi Francois,

I'm not sure which hardware stats registers are accurate. Besides, it
seem r8169.c are also designed for other chipsets (8129?).

I would like other people who are quite familiar with those chipsets
to do this job.

Is that OK? Thanks.
françois romieu - Nov. 18, 2011, 9:18 a.m.
Junchang Wang <junchangwang@gmail.com> :
[...]
> I'm not sure which hardware stats registers are accurate.

See rtl8169_gstrings.

> Besides, it seem r8169.c are also designed for other chipsets (8129?).

Afaik the basic statistics are consistent through the 816x and
810{2, 3} family.

> I would like other people who are quite familiar with those chipsets
> to do this job.
> 
> Is that OK ? Thanks.

Fair enough.

Eric's "ethtool -S = hardware, ip -s = software" remark makes sense
anyway. So there is no reason to be retentive.

Patch

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index cdf66d6..0165646 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -670,11 +670,31 @@  struct rtl8169_counters {
 	__le16	tx_underun;
 };
 
+struct rtl8169_pcpu_stats {
+	u64			rx_packets;
+	u64			rx_bytes;
+	u64			tx_packets;
+	u64			tx_bytes;
+	struct u64_stats_sync	syncp;
+	/*
+	 * The following variables are updated
+	 * without syncp protection.
+	 */
+	unsigned long		rx_dropped;
+	unsigned long		tx_dropped;
+	unsigned long		rx_length_errors;
+	unsigned long		rx_errors;
+	unsigned long		rx_crc_errors;
+	unsigned long		rx_fifo_errors;
+	unsigned long		rx_missed_errors;
+};
+
 struct rtl8169_private {
 	void __iomem *mmio_addr;	/* memory map physical address */
 	struct pci_dev *pci_dev;
 	struct net_device *dev;
 	struct napi_struct napi;
+	struct rtl8169_pcpu_stats __percpu *pcpu_stats;
 	spinlock_t lock;
 	u32 msg_enable;
 	u16 txd_version;
@@ -766,7 +786,9 @@  static void rtl_hw_start(struct net_device *dev);
 static int rtl8169_close(struct net_device *dev);
 static void rtl_set_rx_mode(struct net_device *dev);
 static void rtl8169_tx_timeout(struct net_device *dev);
-static struct net_device_stats *rtl8169_get_stats(struct net_device *dev);
+static struct rtnl_link_stats64 *rtl8169_get_stats64(struct net_device *dev,
+						     struct rtnl_link_stats64
+						     *stats);
 static int rtl8169_rx_interrupt(struct net_device *, struct rtl8169_private *,
 				void __iomem *, u32 budget);
 static int rtl8169_change_mtu(struct net_device *dev, int new_mtu);
@@ -3454,7 +3476,7 @@  static void rtl_disable_msi(struct pci_dev *pdev, struct rtl8169_private *tp)
 static const struct net_device_ops rtl8169_netdev_ops = {
 	.ndo_open		= rtl8169_open,
 	.ndo_stop		= rtl8169_close,
-	.ndo_get_stats		= rtl8169_get_stats,
+	.ndo_get_stats64	= rtl8169_get_stats64,
 	.ndo_start_xmit		= rtl8169_start_xmit,
 	.ndo_tx_timeout		= rtl8169_tx_timeout,
 	.ndo_validate_addr	= eth_validate_addr,
@@ -4138,6 +4160,7 @@  rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	tp->rtl_fw = RTL_FIRMWARE_UNKNOWN;
 
+	tp->pcpu_stats = alloc_percpu(struct rtl8169_pcpu_stats);
 	rc = register_netdev(dev);
 	if (rc < 0)
 		goto err_out_msi_4;
@@ -4196,6 +4219,7 @@  static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
 
 	cancel_delayed_work_sync(&tp->task);
 
+	free_percpu(tp->pcpu_stats);
 	unregister_netdev(dev);
 
 	rtl_release_firmware(tp);
@@ -5310,7 +5334,7 @@  static void rtl8169_tx_clear_range(struct rtl8169_private *tp, u32 start,
 			rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb,
 					     tp->TxDescArray + entry);
 			if (skb) {
-				tp->dev->stats.tx_dropped++;
+				this_cpu_inc(tp->pcpu_stats->tx_dropped);
 				dev_kfree_skb(skb);
 				tx_skb->skb = NULL;
 			}
@@ -5562,12 +5586,12 @@  err_dma_1:
 	rtl8169_unmap_tx_skb(d, tp->tx_skb + entry, txd);
 err_dma_0:
 	dev_kfree_skb(skb);
-	dev->stats.tx_dropped++;
+	this_cpu_inc(tp->pcpu_stats->tx_dropped);
 	return NETDEV_TX_OK;
 
 err_stop_0:
 	netif_stop_queue(dev);
-	dev->stats.tx_dropped++;
+	this_cpu_inc(tp->pcpu_stats->tx_dropped);
 	return NETDEV_TX_BUSY;
 }
 
@@ -5641,8 +5665,13 @@  static void rtl8169_tx_interrupt(struct net_device *dev,
 		rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb,
 				     tp->TxDescArray + entry);
 		if (status & LastFrag) {
-			dev->stats.tx_packets++;
-			dev->stats.tx_bytes += tx_skb->skb->len;
+			struct rtl8169_pcpu_stats *pcpu_stats;
+
+			pcpu_stats = this_cpu_ptr(tp->pcpu_stats);
+			u64_stats_update_begin(&pcpu_stats->syncp);
+			pcpu_stats->tx_packets++;
+			pcpu_stats->tx_bytes += tx_skb->skb->len;
+			u64_stats_update_end(&pcpu_stats->syncp);
 			dev_kfree_skb(tx_skb->skb);
 			tx_skb->skb = NULL;
 		}
@@ -5728,20 +5757,21 @@  static int rtl8169_rx_interrupt(struct net_device *dev,
 		if (unlikely(status & RxRES)) {
 			netif_info(tp, rx_err, dev, "Rx ERROR. status = %08x\n",
 				   status);
-			dev->stats.rx_errors++;
+			this_cpu_inc(tp->pcpu_stats->rx_errors);
 			if (status & (RxRWT | RxRUNT))
-				dev->stats.rx_length_errors++;
+				this_cpu_inc(tp->pcpu_stats->rx_length_errors);
 			if (status & RxCRC)
-				dev->stats.rx_crc_errors++;
+				this_cpu_inc(tp->pcpu_stats->rx_crc_errors);
 			if (status & RxFOVF) {
 				rtl8169_schedule_work(dev, rtl8169_reset_task);
-				dev->stats.rx_fifo_errors++;
+				this_cpu_inc(tp->pcpu_stats->rx_fifo_errors);
 			}
 			rtl8169_mark_to_asic(desc, rx_buf_sz);
 		} else {
 			struct sk_buff *skb;
 			dma_addr_t addr = le64_to_cpu(desc->addr);
 			int pkt_size = (status & 0x00003fff) - 4;
+			struct rtl8169_pcpu_stats *pcpu_stats;
 
 			/*
 			 * The driver does not support incoming fragmented
@@ -5749,8 +5779,8 @@  static int rtl8169_rx_interrupt(struct net_device *dev,
 			 * sized frames.
 			 */
 			if (unlikely(rtl8169_fragmented_frame(status))) {
-				dev->stats.rx_dropped++;
-				dev->stats.rx_length_errors++;
+				this_cpu_inc(tp->pcpu_stats->rx_dropped);
+				this_cpu_inc(tp->pcpu_stats->rx_length_errors);
 				rtl8169_mark_to_asic(desc, rx_buf_sz);
 				continue;
 			}
@@ -5759,7 +5789,7 @@  static int rtl8169_rx_interrupt(struct net_device *dev,
 						  tp, pkt_size, addr);
 			rtl8169_mark_to_asic(desc, rx_buf_sz);
 			if (!skb) {
-				dev->stats.rx_dropped++;
+				this_cpu_inc(tp->pcpu_stats->rx_dropped);
 				continue;
 			}
 
@@ -5771,8 +5801,11 @@  static int rtl8169_rx_interrupt(struct net_device *dev,
 
 			napi_gro_receive(&tp->napi, skb);
 
-			dev->stats.rx_bytes += pkt_size;
-			dev->stats.rx_packets++;
+			pcpu_stats = this_cpu_ptr(tp->pcpu_stats);
+			u64_stats_update_begin(&pcpu_stats->syncp);
+			pcpu_stats->rx_bytes += pkt_size;
+			pcpu_stats->rx_packets++;
+			u64_stats_update_end(&pcpu_stats->syncp);
 		}
 
 		/* Work around for AMD plateform. */
@@ -5916,7 +5949,8 @@  static void rtl8169_rx_missed(struct net_device *dev, void __iomem *ioaddr)
 	if (tp->mac_version > RTL_GIGA_MAC_VER_06)
 		return;
 
-	dev->stats.rx_missed_errors += (RTL_R32(RxMissed) & 0xffffff);
+	this_cpu_add(tp->pcpu_stats->rx_missed_errors,
+		     (RTL_R32(RxMissed) & 0xffffff));
 	RTL_W32(RxMissed, 0);
 }
 
@@ -6034,16 +6068,24 @@  static void rtl_set_rx_mode(struct net_device *dev)
 }
 
 /**
- *  rtl8169_get_stats - Get rtl8169 read/write statistics
+ *  rtl8169_get_stats64 - Get rtl8169 read/write statistics
  *  @dev: The Ethernet Device to get statistics for
  *
  *  Get TX/RX statistics for rtl8169
  */
-static struct net_device_stats *rtl8169_get_stats(struct net_device *dev)
+static struct rtnl_link_stats64 *
+rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 	void __iomem *ioaddr = tp->mmio_addr;
+	struct rtl8169_pcpu_stats *pcpu_stats;
+	u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+	unsigned long rx_dropped = 0, tx_dropped = 0, rx_length_errors = 0;
+	unsigned long rx_errors = 0, rx_crc_errors = 0, rx_fifo_errors = 0;
+	unsigned long rx_missed_errors = 0;
 	unsigned long flags;
+	unsigned int start;
+	int i;
 
 	if (netif_running(dev)) {
 		spin_lock_irqsave(&tp->lock, flags);
@@ -6051,7 +6093,38 @@  static struct net_device_stats *rtl8169_get_stats(struct net_device *dev)
 		spin_unlock_irqrestore(&tp->lock, flags);
 	}
 
-	return &dev->stats;
+	for_each_possible_cpu(i) {
+		pcpu_stats = per_cpu_ptr(tp->pcpu_stats, i);
+		do {
+			start = u64_stats_fetch_begin_bh(&pcpu_stats->syncp);
+			rx_packets	= pcpu_stats->rx_packets;
+			rx_bytes	= pcpu_stats->rx_bytes;
+			tx_packets	= pcpu_stats->tx_packets;
+			tx_bytes	= pcpu_stats->tx_bytes;
+		} while (u64_stats_fetch_retry_bh(&pcpu_stats->syncp, start));
+
+		stats->rx_packets	+= rx_packets;
+		stats->rx_bytes		+= rx_bytes;
+		stats->tx_packets	+= tx_packets;
+		stats->tx_bytes		+= tx_bytes;
+
+		rx_dropped		+= pcpu_stats->rx_dropped;
+		tx_dropped		+= pcpu_stats->tx_dropped;
+		rx_length_errors	+= pcpu_stats->rx_length_errors;
+		rx_errors		+= pcpu_stats->rx_errors;
+		rx_crc_errors		+= pcpu_stats->rx_crc_errors;
+		rx_fifo_errors		+= pcpu_stats->rx_fifo_errors;
+		rx_missed_errors	+= pcpu_stats->rx_missed_errors;
+	}
+	stats->rx_dropped	= rx_dropped;
+	stats->tx_dropped	= tx_dropped;
+	stats->rx_length_errors = rx_length_errors;
+	stats->rx_errors	= rx_errors;
+	stats->rx_crc_errors	= rx_crc_errors;
+	stats->rx_fifo_errors	= rx_fifo_errors;
+	stats->rx_missed_errors = rx_missed_errors;
+
+	return stats;
 }
 
 static void rtl8169_net_suspend(struct net_device *dev)