diff mbox

[net-next,v4,5/8] forcedeth: implement ndo_get_stats64() API

Message ID 6c785722f068deef5ff546f53b8011ecff43a4c1.1321384662.git.david.decotigny@google.com
State Deferred, archived
Delegated to: David Miller
Headers show

Commit Message

david decotigny Nov. 15, 2011, 7:25 p.m. UTC
This commit implements the ndo_get_stats64() API for forcedeth. Since
hardware stats are being updated from different contexts (process and
timer), this commit adds protection (locking + atomic variables). For
software stats, it relies on the u64_stats_sync.h API.

Tested:
  - 16-way SMP x86_64 ->
    RX bytes:7244556582 (7.2 GB)  TX bytes:181904254 (181.9 MB)
  - pktgen + loopback: identical rx_bytes/tx_bytes and rx_packets/tx_packets



Signed-off-by: David Decotigny <david.decotigny@google.com>
---
 drivers/net/ethernet/nvidia/forcedeth.c |  195 +++++++++++++++++++++++--------
 1 files changed, 144 insertions(+), 51 deletions(-)

Comments

david decotigny Nov. 15, 2011, 10:01 p.m. UTC | #1
Hi all,


I'm afraid this version (http://patchwork.ozlabs.org/patch/125861/) is wrong.

Each software stat field is updated by one single writer. But these
different stats are guarded by a single seqcount, so effectively
different writers are fiddling with the same seqcount. Question is: is
it Ok for the seqcount to be updated concurrently without protection?
Is the seqcount guaranteed to be correctly updated from the readers'
perspective? Or should I serialize the sections that update the
seqcount?

If I should protect it, then I need to revisit that patch again: I'd
prefer not to lock in the fast paths just because of the stats. I
could for example revert to v3 (using atomic_t stats). Would you have
any recommendation/suggestion?

Thanks! Regards,

--
David Decotigny



On Tue, Nov 15, 2011 at 11:25 AM, David Decotigny
<david.decotigny@google.com> wrote:
> This commit implements the ndo_get_stats64() API for forcedeth. Since
> hardware stats are being updated from different contexts (process and
> timer), this commit adds protection (locking + atomic variables). For
> software stats, it relies on the u64_stats_sync.h API.
>
> Tested:
>  - 16-way SMP x86_64 ->
>    RX bytes:7244556582 (7.2 GB)  TX bytes:181904254 (181.9 MB)
>  - pktgen + loopback: identical rx_bytes/tx_bytes and rx_packets/tx_packets
>
>
>
> Signed-off-by: David Decotigny <david.decotigny@google.com>
> ---
>  drivers/net/ethernet/nvidia/forcedeth.c |  195 +++++++++++++++++++++++--------
>  1 files changed, 144 insertions(+), 51 deletions(-)
>
> diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
> index ee8cce5..ff01d5e 100644
> --- a/drivers/net/ethernet/nvidia/forcedeth.c
> +++ b/drivers/net/ethernet/nvidia/forcedeth.c
> @@ -65,7 +65,8 @@
>  #include <linux/slab.h>
>  #include <linux/uaccess.h>
>  #include <linux/prefetch.h>
> -#include  <linux/io.h>
> +#include <linux/u64_stats_sync.h>
> +#include <linux/io.h>
>
>  #include <asm/irq.h>
>  #include <asm/system.h>
> @@ -736,6 +737,18 @@ struct nv_skb_map {
>  * - tx setup is lockless: it relies on netif_tx_lock. Actual submission
>  *     needs netdev_priv(dev)->lock :-(
>  * - set_multicast_list: preparation lockless, relies on netif_tx_lock.
> + *
> + * Hardware stats updates are protected by hwstats_lock:
> + * - updated by nv_do_stats_poll (timer). This is meant to avoid
> + *   integer wraparound in the NIC stats registers, at low frequency
> + *   (0.1 Hz)
> + * - updated by nv_get_ethtool_stats + nv_get_stats64
> + *
> + * Software stats are accessed only through a 64b synchronization
> + * point and are not subject to other synchronization techniques (one
> + * unique updating thread for each stat [single queue RX/TX fast
> + * paths], or callers already synchronized [for tx_dropped, except from
> + * nv_open/nv_close]).
>  */
>
>  /* in dev: base, irq */
> @@ -745,9 +758,13 @@ struct fe_priv {
>        struct net_device *dev;
>        struct napi_struct napi;
>
> -       /* General data:
> -        * Locking: spin_lock(&np->lock); */
> +       /* hardware stats are updated in syscall and timer */
> +       spinlock_t hwstats_lock;
>        struct nv_ethtool_stats estats;
> +
> +       /* software stats are accessed through a 64b synchronization point */
> +       struct u64_stats_sync swstats_syncp;
> +
>        int in_shutdown;
>        u32 linkspeed;
>        int duplex;
> @@ -798,6 +815,11 @@ struct fe_priv {
>        u32 nic_poll_irq;
>        int rx_ring_size;
>
> +       /* RX software stats */
> +       u64 stat_rx_packets;
> +       u64 stat_rx_bytes; /* not always available in HW */
> +       u64 stat_rx_missed_errors;
> +
>        /* media detection workaround.
>         * Locking: Within irq hander or disable_irq+spin_lock(&np->lock);
>         */
> @@ -820,6 +842,11 @@ struct fe_priv {
>        struct nv_skb_map *tx_end_flip;
>        int tx_stop;
>
> +       /* TX software stats */
> +       u64 stat_tx_packets; /* not always available in HW */
> +       u64 stat_tx_bytes;
> +       u64 stat_tx_dropped;
> +
>        /* msi/msi-x fields */
>        u32 msi_flags;
>        struct msix_entry msi_x_entry[NV_MSI_X_MAX_VECTORS];
> @@ -1635,11 +1662,19 @@ static void nv_mac_reset(struct net_device *dev)
>        pci_push(base);
>  }
>
> -static void nv_get_hw_stats(struct net_device *dev)
> +/* Caller must appropriately lock netdev_priv(dev)->hwstats_lock */
> +static void nv_update_stats(struct net_device *dev)
>  {
>        struct fe_priv *np = netdev_priv(dev);
>        u8 __iomem *base = get_hwbase(dev);
>
> +       /* If it happens that this is run in top-half context, then
> +        * replace the spin_lock of hwstats_lock with
> +        * spin_lock_irqsave() in calling functions. */
> +       WARN_ONCE(in_irq(), "forcedeth: estats spin_lock(_bh) from top-half");
> +       assert_spin_locked(&np->hwstats_lock);
> +
> +       /* query hardware */
>        np->estats.tx_bytes += readl(base + NvRegTxCnt);
>        np->estats.tx_zero_rexmt += readl(base + NvRegTxZeroReXmt);
>        np->estats.tx_one_rexmt += readl(base + NvRegTxOneReXmt);
> @@ -1698,40 +1733,67 @@ static void nv_get_hw_stats(struct net_device *dev)
>  }
>
>  /*
> - * nv_get_stats: dev->get_stats function
> + * nv_get_stats64: dev->ndo_get_stats64 function
>  * Get latest stats value from the nic.
>  * Called with read_lock(&dev_base_lock) held for read -
>  * only synchronized against unregister_netdevice.
>  */
> -static struct net_device_stats *nv_get_stats(struct net_device *dev)
> +static struct rtnl_link_stats64*
> +nv_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *storage)
> +       __acquires(&netdev_priv(dev)->hwstats_lock)
> +       __releases(&netdev_priv(dev)->hwstats_lock)
>  {
>        struct fe_priv *np = netdev_priv(dev);
> +       unsigned int syncp_start;
> +
> +       /*
> +        * Note: because HW stats are not always available and for
> +        * consistency reasons, the following ifconfig stats are
> +        * managed by software: rx_bytes, tx_bytes, rx_packets and
> +        * tx_packets. The related hardware stats reported by ethtool
> +        * should be equivalent to these ifconfig stats, with 4
> +        * additional bytes per packet (Ethernet FCS CRC).
> +        */
> +
> +       /* software stats */
> +       do {
> +               syncp_start = u64_stats_fetch_begin(&np->swstats_syncp);
> +               storage->rx_packets       = np->stat_rx_packets;
> +               storage->tx_packets       = np->stat_tx_packets;
> +               storage->rx_bytes         = np->stat_rx_bytes;
> +               storage->tx_bytes         = np->stat_tx_bytes;
> +               storage->tx_dropped       = np->stat_tx_dropped;
> +               storage->rx_missed_errors = np->stat_rx_missed_errors;
> +       } while (u64_stats_fetch_retry(&np->swstats_syncp, syncp_start));
>
>        /* If the nic supports hw counters then retrieve latest values */
> -       if (np->driver_data & (DEV_HAS_STATISTICS_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_STATISTICS_V3)) {
> -               nv_get_hw_stats(dev);
> +       if (np->driver_data & DEV_HAS_STATISTICS_V123) {
> +               spin_lock_bh(&np->hwstats_lock);
>
> -               /*
> -                * Note: because HW stats are not always available and
> -                * for consistency reasons, the following ifconfig
> -                * stats are managed by software: rx_bytes, tx_bytes,
> -                * rx_packets and tx_packets. The related hardware
> -                * stats reported by ethtool should be equivalent to
> -                * these ifconfig stats, with 4 additional bytes per
> -                * packet (Ethernet FCS CRC).
> -                */
> +               nv_update_stats(dev);
> +
> +               /* generic stats */
> +               storage->rx_errors = np->estats.rx_errors_total;
> +               storage->tx_errors = np->estats.tx_errors_total;
> +
> +               /* meaningful only when NIC supports stats v3 */
> +               storage->multicast = np->estats.rx_multicast;
> +
> +               /* detailed rx_errors */
> +               storage->rx_length_errors = np->estats.rx_length_error;
> +               storage->rx_over_errors   = np->estats.rx_over_errors;
> +               storage->rx_crc_errors    = np->estats.rx_crc_errors;
> +               storage->rx_frame_errors  = np->estats.rx_frame_align_error;
> +               storage->rx_fifo_errors   = np->estats.rx_drop_frame;
>
> -               /* copy to net_device stats */
> -               dev->stats.tx_fifo_errors = np->estats.tx_fifo_errors;
> -               dev->stats.tx_carrier_errors = np->estats.tx_carrier_errors;
> -               dev->stats.rx_crc_errors = np->estats.rx_crc_errors;
> -               dev->stats.rx_over_errors = np->estats.rx_over_errors;
> -               dev->stats.rx_fifo_errors = np->estats.rx_drop_frame;
> -               dev->stats.rx_errors = np->estats.rx_errors_total;
> -               dev->stats.tx_errors = np->estats.tx_errors_total;
> +               /* detailed tx_errors */
> +               storage->tx_carrier_errors = np->estats.tx_carrier_errors;
> +               storage->tx_fifo_errors    = np->estats.tx_fifo_errors;
> +
> +               spin_unlock_bh(&np->hwstats_lock);
>        }
>
> -       return &dev->stats;
> +       return storage;
>  }
>
>  /*
> @@ -1932,8 +1994,11 @@ static void nv_drain_tx(struct net_device *dev)
>                        np->tx_ring.ex[i].bufhigh = 0;
>                        np->tx_ring.ex[i].buflow = 0;
>                }
> -               if (nv_release_txskb(np, &np->tx_skb[i]))
> -                       dev->stats.tx_dropped++;
> +               if (nv_release_txskb(np, &np->tx_skb[i])) {
> +                       u64_stats_update_begin(&np->swstats_syncp);
> +                       np->stat_tx_dropped++;
> +                       u64_stats_update_end(&np->swstats_syncp);
> +               }
>                np->tx_skb[i].dma = 0;
>                np->tx_skb[i].dma_len = 0;
>                np->tx_skb[i].dma_single = 0;
> @@ -2390,11 +2455,14 @@ static int nv_tx_done(struct net_device *dev, int limit)
>                if (np->desc_ver == DESC_VER_1) {
>                        if (flags & NV_TX_LASTPACKET) {
>                                if (flags & NV_TX_ERROR) {
> -                                       if ((flags & NV_TX_RETRYERROR) && !(flags & NV_TX_RETRYCOUNT_MASK))
> +                                       if ((flags & NV_TX_RETRYERROR)
> +                                           && !(flags & NV_TX_RETRYCOUNT_MASK))
>                                                nv_legacybackoff_reseed(dev);
>                                } else {
> -                                       dev->stats.tx_packets++;
> -                                       dev->stats.tx_bytes += np->get_tx_ctx->skb->len;
> +                                       u64_stats_update_begin(&np->swstats_syncp);
> +                                       np->stat_tx_packets++;
> +                                       np->stat_tx_bytes += np->get_tx_ctx->skb->len;
> +                                       u64_stats_update_end(&np->swstats_syncp);
>                                }
>                                dev_kfree_skb_any(np->get_tx_ctx->skb);
>                                np->get_tx_ctx->skb = NULL;
> @@ -2403,11 +2471,14 @@ static int nv_tx_done(struct net_device *dev, int limit)
>                } else {
>                        if (flags & NV_TX2_LASTPACKET) {
>                                if (flags & NV_TX2_ERROR) {
> -                                       if ((flags & NV_TX2_RETRYERROR) && !(flags & NV_TX2_RETRYCOUNT_MASK))
> +                                       if ((flags & NV_TX2_RETRYERROR)
> +                                           && !(flags & NV_TX2_RETRYCOUNT_MASK))
>                                                nv_legacybackoff_reseed(dev);
>                                } else {
> -                                       dev->stats.tx_packets++;
> -                                       dev->stats.tx_bytes += np->get_tx_ctx->skb->len;
> +                                       u64_stats_update_begin(&np->swstats_syncp);
> +                                       np->stat_tx_packets++;
> +                                       np->stat_tx_bytes += np->get_tx_ctx->skb->len;
> +                                       u64_stats_update_end(&np->swstats_syncp);
>                                }
>                                dev_kfree_skb_any(np->get_tx_ctx->skb);
>                                np->get_tx_ctx->skb = NULL;
> @@ -2441,15 +2512,18 @@ static int nv_tx_done_optimized(struct net_device *dev, int limit)
>
>                if (flags & NV_TX2_LASTPACKET) {
>                        if (flags & NV_TX2_ERROR) {
> -                               if ((flags & NV_TX2_RETRYERROR) && !(flags & NV_TX2_RETRYCOUNT_MASK)) {
> +                               if ((flags & NV_TX2_RETRYERROR)
> +                                   && !(flags & NV_TX2_RETRYCOUNT_MASK)) {
>                                        if (np->driver_data & DEV_HAS_GEAR_MODE)
>                                                nv_gear_backoff_reseed(dev);
>                                        else
>                                                nv_legacybackoff_reseed(dev);
>                                }
>                        } else {
> -                               dev->stats.tx_packets++;
> -                               dev->stats.tx_bytes += np->get_tx_ctx->skb->len;
> +                                       u64_stats_update_begin(&np->swstats_syncp);
> +                                       np->stat_tx_packets++;
> +                                       np->stat_tx_bytes += np->get_tx_ctx->skb->len;
> +                                       u64_stats_update_end(&np->swstats_syncp);
>                        }
>
>                        dev_kfree_skb_any(np->get_tx_ctx->skb);
> @@ -2662,8 +2736,11 @@ static int nv_rx_process(struct net_device *dev, int limit)
>                                        }
>                                        /* the rest are hard errors */
>                                        else {
> -                                               if (flags & NV_RX_MISSEDFRAME)
> -                                                       dev->stats.rx_missed_errors++;
> +                                               if (flags & NV_RX_MISSEDFRAME) {
> +                                                       u64_stats_update_begin(&np->swstats_syncp);
> +                                                       np->stat_rx_missed_errors++;
> +                                                       u64_stats_update_end(&np->swstats_syncp);
> +                                               }
>                                                dev_kfree_skb(skb);
>                                                goto next_pkt;
>                                        }
> @@ -2706,8 +2783,10 @@ static int nv_rx_process(struct net_device *dev, int limit)
>                skb_put(skb, len);
>                skb->protocol = eth_type_trans(skb, dev);
>                napi_gro_receive(&np->napi, skb);
> -               dev->stats.rx_packets++;
> -               dev->stats.rx_bytes += len;
> +               u64_stats_update_begin(&np->swstats_syncp);
> +               np->stat_rx_packets++;
> +               np->stat_rx_bytes += len;
> +               u64_stats_update_end(&np->swstats_syncp);
>  next_pkt:
>                if (unlikely(np->get_rx.orig++ == np->last_rx.orig))
>                        np->get_rx.orig = np->first_rx.orig;
> @@ -2790,8 +2869,10 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
>                                __vlan_hwaccel_put_tag(skb, vid);
>                        }
>                        napi_gro_receive(&np->napi, skb);
> -                       dev->stats.rx_packets++;
> -                       dev->stats.rx_bytes += len;
> +                       u64_stats_update_begin(&np->swstats_syncp);
> +                       np->stat_rx_packets++;
> +                       np->stat_rx_bytes += len;
> +                       u64_stats_update_end(&np->swstats_syncp);
>                } else {
>                        dev_kfree_skb(skb);
>                }
> @@ -4000,11 +4081,18 @@ static void nv_poll_controller(struct net_device *dev)
>  #endif
>
>  static void nv_do_stats_poll(unsigned long data)
> +       __acquires(&netdev_priv(dev)->hwstats_lock)
> +       __releases(&netdev_priv(dev)->hwstats_lock)
>  {
>        struct net_device *dev = (struct net_device *) data;
>        struct fe_priv *np = netdev_priv(dev);
>
> -       nv_get_hw_stats(dev);
> +       /* If lock is currently taken, the stats are being refreshed
> +        * and hence fresh enough */
> +       if (spin_trylock(&np->hwstats_lock)) {
> +               nv_update_stats(dev);
> +               spin_unlock(&np->hwstats_lock);
> +       }
>
>        if (!np->in_shutdown)
>                mod_timer(&np->stats_poll,
> @@ -4711,14 +4799,18 @@ static int nv_get_sset_count(struct net_device *dev, int sset)
>        }
>  }
>
> -static void nv_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *estats, u64 *buffer)
> +static void nv_get_ethtool_stats(struct net_device *dev,
> +                                struct ethtool_stats *estats, u64 *buffer)
> +       __acquires(&netdev_priv(dev)->hwstats_lock)
> +       __releases(&netdev_priv(dev)->hwstats_lock)
>  {
>        struct fe_priv *np = netdev_priv(dev);
>
> -       /* update stats */
> -       nv_get_hw_stats(dev);
> -
> -       memcpy(buffer, &np->estats, nv_get_sset_count(dev, ETH_SS_STATS)*sizeof(u64));
> +       spin_lock_bh(&np->hwstats_lock);
> +       nv_update_stats(dev);
> +       memcpy(buffer, &np->estats,
> +              nv_get_sset_count(dev, ETH_SS_STATS)*sizeof(u64));
> +       spin_unlock_bh(&np->hwstats_lock);
>  }
>
>  static int nv_link_test(struct net_device *dev)
> @@ -5362,7 +5454,7 @@ static int nv_close(struct net_device *dev)
>  static const struct net_device_ops nv_netdev_ops = {
>        .ndo_open               = nv_open,
>        .ndo_stop               = nv_close,
> -       .ndo_get_stats          = nv_get_stats,
> +       .ndo_get_stats64        = nv_get_stats64,
>        .ndo_start_xmit         = nv_start_xmit,
>        .ndo_tx_timeout         = nv_tx_timeout,
>        .ndo_change_mtu         = nv_change_mtu,
> @@ -5379,7 +5471,7 @@ static const struct net_device_ops nv_netdev_ops = {
>  static const struct net_device_ops nv_netdev_ops_optimized = {
>        .ndo_open               = nv_open,
>        .ndo_stop               = nv_close,
> -       .ndo_get_stats          = nv_get_stats,
> +       .ndo_get_stats64        = nv_get_stats64,
>        .ndo_start_xmit         = nv_start_xmit_optimized,
>        .ndo_tx_timeout         = nv_tx_timeout,
>        .ndo_change_mtu         = nv_change_mtu,
> @@ -5418,6 +5510,7 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
>        np->dev = dev;
>        np->pci_dev = pci_dev;
>        spin_lock_init(&np->lock);
> +       spin_lock_init(&np->hwstats_lock);
>        SET_NETDEV_DEV(dev, &pci_dev->dev);
>
>        init_timer(&np->oom_kick);
> --
> 1.7.3.1
>
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index ee8cce5..ff01d5e 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -65,7 +65,8 @@ 
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/prefetch.h>
-#include  <linux/io.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/io.h>
 
 #include <asm/irq.h>
 #include <asm/system.h>
@@ -736,6 +737,18 @@  struct nv_skb_map {
  * - tx setup is lockless: it relies on netif_tx_lock. Actual submission
  *	needs netdev_priv(dev)->lock :-(
  * - set_multicast_list: preparation lockless, relies on netif_tx_lock.
+ *
+ * Hardware stats updates are protected by hwstats_lock:
+ * - updated by nv_do_stats_poll (timer). This is meant to avoid
+ *   integer wraparound in the NIC stats registers, at low frequency
+ *   (0.1 Hz)
+ * - updated by nv_get_ethtool_stats + nv_get_stats64
+ *
+ * Software stats are accessed only through a 64b synchronization
+ * point and are not subject to other synchronization techniques (one
+ * unique updating thread for each stat [single queue RX/TX fast
+ * paths], or callers already synchronized [for tx_dropped, except from
+ * nv_open/nv_close]).
  */
 
 /* in dev: base, irq */
@@ -745,9 +758,13 @@  struct fe_priv {
 	struct net_device *dev;
 	struct napi_struct napi;
 
-	/* General data:
-	 * Locking: spin_lock(&np->lock); */
+	/* hardware stats are updated in syscall and timer */
+	spinlock_t hwstats_lock;
 	struct nv_ethtool_stats estats;
+
+	/* software stats are accessed through a 64b synchronization point */
+	struct u64_stats_sync swstats_syncp;
+
 	int in_shutdown;
 	u32 linkspeed;
 	int duplex;
@@ -798,6 +815,11 @@  struct fe_priv {
 	u32 nic_poll_irq;
 	int rx_ring_size;
 
+	/* RX software stats */
+	u64 stat_rx_packets;
+	u64 stat_rx_bytes; /* not always available in HW */
+	u64 stat_rx_missed_errors;
+
 	/* media detection workaround.
 	 * Locking: Within irq hander or disable_irq+spin_lock(&np->lock);
 	 */
@@ -820,6 +842,11 @@  struct fe_priv {
 	struct nv_skb_map *tx_end_flip;
 	int tx_stop;
 
+	/* TX software stats */
+	u64 stat_tx_packets; /* not always available in HW */
+	u64 stat_tx_bytes;
+	u64 stat_tx_dropped;
+
 	/* msi/msi-x fields */
 	u32 msi_flags;
 	struct msix_entry msi_x_entry[NV_MSI_X_MAX_VECTORS];
@@ -1635,11 +1662,19 @@  static void nv_mac_reset(struct net_device *dev)
 	pci_push(base);
 }
 
-static void nv_get_hw_stats(struct net_device *dev)
+/* Caller must appropriately lock netdev_priv(dev)->hwstats_lock */
+static void nv_update_stats(struct net_device *dev)
 {
 	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 
+	/* If it happens that this is run in top-half context, then
+	 * replace the spin_lock of hwstats_lock with
+	 * spin_lock_irqsave() in calling functions. */
+	WARN_ONCE(in_irq(), "forcedeth: estats spin_lock(_bh) from top-half");
+	assert_spin_locked(&np->hwstats_lock);
+
+	/* query hardware */
 	np->estats.tx_bytes += readl(base + NvRegTxCnt);
 	np->estats.tx_zero_rexmt += readl(base + NvRegTxZeroReXmt);
 	np->estats.tx_one_rexmt += readl(base + NvRegTxOneReXmt);
@@ -1698,40 +1733,67 @@  static void nv_get_hw_stats(struct net_device *dev)
 }
 
 /*
- * nv_get_stats: dev->get_stats function
+ * nv_get_stats64: dev->ndo_get_stats64 function
  * Get latest stats value from the nic.
  * Called with read_lock(&dev_base_lock) held for read -
  * only synchronized against unregister_netdevice.
  */
-static struct net_device_stats *nv_get_stats(struct net_device *dev)
+static struct rtnl_link_stats64*
+nv_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *storage)
+	__acquires(&netdev_priv(dev)->hwstats_lock)
+	__releases(&netdev_priv(dev)->hwstats_lock)
 {
 	struct fe_priv *np = netdev_priv(dev);
+	unsigned int syncp_start;
+
+	/*
+	 * Note: because HW stats are not always available and for
+	 * consistency reasons, the following ifconfig stats are
+	 * managed by software: rx_bytes, tx_bytes, rx_packets and
+	 * tx_packets. The related hardware stats reported by ethtool
+	 * should be equivalent to these ifconfig stats, with 4
+	 * additional bytes per packet (Ethernet FCS CRC).
+	 */
+
+	/* software stats */
+	do {
+		syncp_start = u64_stats_fetch_begin(&np->swstats_syncp);
+		storage->rx_packets       = np->stat_rx_packets;
+		storage->tx_packets       = np->stat_tx_packets;
+		storage->rx_bytes         = np->stat_rx_bytes;
+		storage->tx_bytes         = np->stat_tx_bytes;
+		storage->tx_dropped       = np->stat_tx_dropped;
+		storage->rx_missed_errors = np->stat_rx_missed_errors;
+	} while (u64_stats_fetch_retry(&np->swstats_syncp, syncp_start));
 
 	/* If the nic supports hw counters then retrieve latest values */
-	if (np->driver_data & (DEV_HAS_STATISTICS_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_STATISTICS_V3)) {
-		nv_get_hw_stats(dev);
+	if (np->driver_data & DEV_HAS_STATISTICS_V123) {
+		spin_lock_bh(&np->hwstats_lock);
 
-		/*
-		 * Note: because HW stats are not always available and
-		 * for consistency reasons, the following ifconfig
-		 * stats are managed by software: rx_bytes, tx_bytes,
-		 * rx_packets and tx_packets. The related hardware
-		 * stats reported by ethtool should be equivalent to
-		 * these ifconfig stats, with 4 additional bytes per
-		 * packet (Ethernet FCS CRC).
-		 */
+		nv_update_stats(dev);
+
+		/* generic stats */
+		storage->rx_errors = np->estats.rx_errors_total;
+		storage->tx_errors = np->estats.tx_errors_total;
+
+		/* meaningful only when NIC supports stats v3 */
+		storage->multicast = np->estats.rx_multicast;
+
+		/* detailed rx_errors */
+		storage->rx_length_errors = np->estats.rx_length_error;
+		storage->rx_over_errors   = np->estats.rx_over_errors;
+		storage->rx_crc_errors    = np->estats.rx_crc_errors;
+		storage->rx_frame_errors  = np->estats.rx_frame_align_error;
+		storage->rx_fifo_errors   = np->estats.rx_drop_frame;
 
-		/* copy to net_device stats */
-		dev->stats.tx_fifo_errors = np->estats.tx_fifo_errors;
-		dev->stats.tx_carrier_errors = np->estats.tx_carrier_errors;
-		dev->stats.rx_crc_errors = np->estats.rx_crc_errors;
-		dev->stats.rx_over_errors = np->estats.rx_over_errors;
-		dev->stats.rx_fifo_errors = np->estats.rx_drop_frame;
-		dev->stats.rx_errors = np->estats.rx_errors_total;
-		dev->stats.tx_errors = np->estats.tx_errors_total;
+		/* detailed tx_errors */
+		storage->tx_carrier_errors = np->estats.tx_carrier_errors;
+		storage->tx_fifo_errors    = np->estats.tx_fifo_errors;
+
+		spin_unlock_bh(&np->hwstats_lock);
 	}
 
-	return &dev->stats;
+	return storage;
 }
 
 /*
@@ -1932,8 +1994,11 @@  static void nv_drain_tx(struct net_device *dev)
 			np->tx_ring.ex[i].bufhigh = 0;
 			np->tx_ring.ex[i].buflow = 0;
 		}
-		if (nv_release_txskb(np, &np->tx_skb[i]))
-			dev->stats.tx_dropped++;
+		if (nv_release_txskb(np, &np->tx_skb[i])) {
+			u64_stats_update_begin(&np->swstats_syncp);
+			np->stat_tx_dropped++;
+			u64_stats_update_end(&np->swstats_syncp);
+		}
 		np->tx_skb[i].dma = 0;
 		np->tx_skb[i].dma_len = 0;
 		np->tx_skb[i].dma_single = 0;
@@ -2390,11 +2455,14 @@  static int nv_tx_done(struct net_device *dev, int limit)
 		if (np->desc_ver == DESC_VER_1) {
 			if (flags & NV_TX_LASTPACKET) {
 				if (flags & NV_TX_ERROR) {
-					if ((flags & NV_TX_RETRYERROR) && !(flags & NV_TX_RETRYCOUNT_MASK))
+					if ((flags & NV_TX_RETRYERROR)
+					    && !(flags & NV_TX_RETRYCOUNT_MASK))
 						nv_legacybackoff_reseed(dev);
 				} else {
-					dev->stats.tx_packets++;
-					dev->stats.tx_bytes += np->get_tx_ctx->skb->len;
+					u64_stats_update_begin(&np->swstats_syncp);
+					np->stat_tx_packets++;
+					np->stat_tx_bytes += np->get_tx_ctx->skb->len;
+					u64_stats_update_end(&np->swstats_syncp);
 				}
 				dev_kfree_skb_any(np->get_tx_ctx->skb);
 				np->get_tx_ctx->skb = NULL;
@@ -2403,11 +2471,14 @@  static int nv_tx_done(struct net_device *dev, int limit)
 		} else {
 			if (flags & NV_TX2_LASTPACKET) {
 				if (flags & NV_TX2_ERROR) {
-					if ((flags & NV_TX2_RETRYERROR) && !(flags & NV_TX2_RETRYCOUNT_MASK))
+					if ((flags & NV_TX2_RETRYERROR)
+					    && !(flags & NV_TX2_RETRYCOUNT_MASK))
 						nv_legacybackoff_reseed(dev);
 				} else {
-					dev->stats.tx_packets++;
-					dev->stats.tx_bytes += np->get_tx_ctx->skb->len;
+					u64_stats_update_begin(&np->swstats_syncp);
+					np->stat_tx_packets++;
+					np->stat_tx_bytes += np->get_tx_ctx->skb->len;
+					u64_stats_update_end(&np->swstats_syncp);
 				}
 				dev_kfree_skb_any(np->get_tx_ctx->skb);
 				np->get_tx_ctx->skb = NULL;
@@ -2441,15 +2512,18 @@  static int nv_tx_done_optimized(struct net_device *dev, int limit)
 
 		if (flags & NV_TX2_LASTPACKET) {
 			if (flags & NV_TX2_ERROR) {
-				if ((flags & NV_TX2_RETRYERROR) && !(flags & NV_TX2_RETRYCOUNT_MASK)) {
+				if ((flags & NV_TX2_RETRYERROR)
+				    && !(flags & NV_TX2_RETRYCOUNT_MASK)) {
 					if (np->driver_data & DEV_HAS_GEAR_MODE)
 						nv_gear_backoff_reseed(dev);
 					else
 						nv_legacybackoff_reseed(dev);
 				}
 			} else {
-				dev->stats.tx_packets++;
-				dev->stats.tx_bytes += np->get_tx_ctx->skb->len;
+					u64_stats_update_begin(&np->swstats_syncp);
+					np->stat_tx_packets++;
+					np->stat_tx_bytes += np->get_tx_ctx->skb->len;
+					u64_stats_update_end(&np->swstats_syncp);
 			}
 
 			dev_kfree_skb_any(np->get_tx_ctx->skb);
@@ -2662,8 +2736,11 @@  static int nv_rx_process(struct net_device *dev, int limit)
 					}
 					/* the rest are hard errors */
 					else {
-						if (flags & NV_RX_MISSEDFRAME)
-							dev->stats.rx_missed_errors++;
+						if (flags & NV_RX_MISSEDFRAME) {
+							u64_stats_update_begin(&np->swstats_syncp);
+							np->stat_rx_missed_errors++;
+							u64_stats_update_end(&np->swstats_syncp);
+						}
 						dev_kfree_skb(skb);
 						goto next_pkt;
 					}
@@ -2706,8 +2783,10 @@  static int nv_rx_process(struct net_device *dev, int limit)
 		skb_put(skb, len);
 		skb->protocol = eth_type_trans(skb, dev);
 		napi_gro_receive(&np->napi, skb);
-		dev->stats.rx_packets++;
-		dev->stats.rx_bytes += len;
+		u64_stats_update_begin(&np->swstats_syncp);
+		np->stat_rx_packets++;
+		np->stat_rx_bytes += len;
+		u64_stats_update_end(&np->swstats_syncp);
 next_pkt:
 		if (unlikely(np->get_rx.orig++ == np->last_rx.orig))
 			np->get_rx.orig = np->first_rx.orig;
@@ -2790,8 +2869,10 @@  static int nv_rx_process_optimized(struct net_device *dev, int limit)
 				__vlan_hwaccel_put_tag(skb, vid);
 			}
 			napi_gro_receive(&np->napi, skb);
-			dev->stats.rx_packets++;
-			dev->stats.rx_bytes += len;
+			u64_stats_update_begin(&np->swstats_syncp);
+			np->stat_rx_packets++;
+			np->stat_rx_bytes += len;
+			u64_stats_update_end(&np->swstats_syncp);
 		} else {
 			dev_kfree_skb(skb);
 		}
@@ -4000,11 +4081,18 @@  static void nv_poll_controller(struct net_device *dev)
 #endif
 
 static void nv_do_stats_poll(unsigned long data)
+	__acquires(&netdev_priv(dev)->hwstats_lock)
+	__releases(&netdev_priv(dev)->hwstats_lock)
 {
 	struct net_device *dev = (struct net_device *) data;
 	struct fe_priv *np = netdev_priv(dev);
 
-	nv_get_hw_stats(dev);
+	/* If lock is currently taken, the stats are being refreshed
+	 * and hence fresh enough */
+	if (spin_trylock(&np->hwstats_lock)) {
+		nv_update_stats(dev);
+		spin_unlock(&np->hwstats_lock);
+	}
 
 	if (!np->in_shutdown)
 		mod_timer(&np->stats_poll,
@@ -4711,14 +4799,18 @@  static int nv_get_sset_count(struct net_device *dev, int sset)
 	}
 }
 
-static void nv_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *estats, u64 *buffer)
+static void nv_get_ethtool_stats(struct net_device *dev,
+				 struct ethtool_stats *estats, u64 *buffer)
+	__acquires(&netdev_priv(dev)->hwstats_lock)
+	__releases(&netdev_priv(dev)->hwstats_lock)
 {
 	struct fe_priv *np = netdev_priv(dev);
 
-	/* update stats */
-	nv_get_hw_stats(dev);
-
-	memcpy(buffer, &np->estats, nv_get_sset_count(dev, ETH_SS_STATS)*sizeof(u64));
+	spin_lock_bh(&np->hwstats_lock);
+	nv_update_stats(dev);
+	memcpy(buffer, &np->estats,
+	       nv_get_sset_count(dev, ETH_SS_STATS)*sizeof(u64));
+	spin_unlock_bh(&np->hwstats_lock);
 }
 
 static int nv_link_test(struct net_device *dev)
@@ -5362,7 +5454,7 @@  static int nv_close(struct net_device *dev)
 static const struct net_device_ops nv_netdev_ops = {
 	.ndo_open		= nv_open,
 	.ndo_stop		= nv_close,
-	.ndo_get_stats		= nv_get_stats,
+	.ndo_get_stats64	= nv_get_stats64,
 	.ndo_start_xmit		= nv_start_xmit,
 	.ndo_tx_timeout		= nv_tx_timeout,
 	.ndo_change_mtu		= nv_change_mtu,
@@ -5379,7 +5471,7 @@  static const struct net_device_ops nv_netdev_ops = {
 static const struct net_device_ops nv_netdev_ops_optimized = {
 	.ndo_open		= nv_open,
 	.ndo_stop		= nv_close,
-	.ndo_get_stats		= nv_get_stats,
+	.ndo_get_stats64	= nv_get_stats64,
 	.ndo_start_xmit		= nv_start_xmit_optimized,
 	.ndo_tx_timeout		= nv_tx_timeout,
 	.ndo_change_mtu		= nv_change_mtu,
@@ -5418,6 +5510,7 @@  static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 	np->dev = dev;
 	np->pci_dev = pci_dev;
 	spin_lock_init(&np->lock);
+	spin_lock_init(&np->hwstats_lock);
 	SET_NETDEV_DEV(dev, &pci_dev->dev);
 
 	init_timer(&np->oom_kick);