diff mbox series

[v6,1/7] i40e/i40evf: Eliminate duplicate barriers on weakly-ordered archs

Message ID 1521829277-9398-2-git-send-email-okaya@codeaurora.org
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series netdev: intel: Eliminate duplicate barriers on weakly-ordered archs | expand

Commit Message

Sinan Kaya March 23, 2018, 6:21 p.m. UTC
Code includes wmb() followed by writel(). writel() already has a barrier
on some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the
register write.

Since code already has an explicit barrier call, changing writel() to
writel_relaxed().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c   | 24 ++++++++++++++++++++----
 drivers/net/ethernet/intel/i40evf/i40e_txrx.c |  9 +++++++--
 2 files changed, 27 insertions(+), 6 deletions(-)

Comments

Alexander H Duyck March 23, 2018, 6:30 p.m. UTC | #1
On Fri, Mar 23, 2018 at 11:21 AM, Sinan Kaya <okaya@codeaurora.org> wrote:
> Code includes wmb() followed by writel(). writel() already has a barrier
> on some architectures like arm64.
>
> This ends up CPU observing two barriers back to back before executing the
> register write.
>
> Since code already has an explicit barrier call, changing writel() to
> writel_relaxed().
>
> Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
> ---
>  drivers/net/ethernet/intel/i40e/i40e_txrx.c   | 24 ++++++++++++++++++++----
>  drivers/net/ethernet/intel/i40evf/i40e_txrx.c |  9 +++++++--
>  2 files changed, 27 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> index c6972bd..fc10cc0 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> @@ -186,7 +186,13 @@ static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data,
>         /* Mark the data descriptor to be watched */
>         first->next_to_watch = tx_desc;
>
> -       writel(tx_ring->next_to_use, tx_ring->tail);
> +       writel_relaxed(tx_ring->next_to_use, tx_ring->tail);
> +
> +       /* We need this if more than one processor can write to our tail
> +        * at a time, it synchronizes IO on IA64/Altix systems
> +        */
> +       mmiowb();
> +
>         return 0;
>

The addition of mmiowb here is valid. All of the others in this patch
are invalid.

>  dma_fail:
> @@ -1529,7 +1535,12 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
>          * such as IA-64).
>          */
>         wmb();
> -       writel(val, rx_ring->tail);
> +       writel_relaxed(val, rx_ring->tail);
> +
> +       /* We need this if more than one processor can write to our tail
> +        * at a time, it synchronizes IO on IA64/Altix systems
> +        */
> +       mmiowb();
>  }
>
>  /**
> @@ -2412,7 +2423,12 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
>                  */
>                 wmb();
>
> -               writel(xdp_ring->next_to_use, xdp_ring->tail);
> +               writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
> +
> +               /* We need this if more than one processor can write to our tail
> +                * at a time, it synchronizes IO on IA64/Altix systems
> +                */
> +               mmiowb();
>         }
>
>         rx_ring->skb = skb;
> @@ -3437,7 +3453,7 @@ static inline int i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
>
>         /* notify HW of packet */
>         if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
> -               writel(i, tx_ring->tail);
> +               writel_relaxed(i, tx_ring->tail);
>
>                 /* we need this if more than one processor can write to our tail
>                  * at a time, it synchronizes IO on IA64/Altix systems
> diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
> index 1ae112f..ca02762 100644
> --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
> +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
> @@ -810,7 +810,12 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
>          * such as IA-64).
>          */
>         wmb();
> -       writel(val, rx_ring->tail);
> +       writel_relaxed(val, rx_ring->tail);
> +
> +       /* We need this if more than one processor can write to our tail
> +        * at a time, it synchronizes IO on IA64/Altix systems
> +        */
> +       mmiowb();
>  }
>
>  /**
> @@ -2379,7 +2384,7 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
>
>         /* notify HW of packet */
>         if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
> -               writel(i, tx_ring->tail);
> +               writel_relaxed(i, tx_ring->tail);
>
>                 /* we need this if more than one processor can write to our tail
>                  * at a time, it synchronizes IO on IA64/Altix systems
> --
> 2.7.4
>
> _______________________________________________
> Intel-wired-lan mailing list
> Intel-wired-lan@osuosl.org
> https://lists.osuosl.org/mailman/listinfo/intel-wired-lan
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index c6972bd..fc10cc0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -186,7 +186,13 @@  static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data,
 	/* Mark the data descriptor to be watched */
 	first->next_to_watch = tx_desc;
 
-	writel(tx_ring->next_to_use, tx_ring->tail);
+	writel_relaxed(tx_ring->next_to_use, tx_ring->tail);
+
+	/* We need this if more than one processor can write to our tail
+	 * at a time, it synchronizes IO on IA64/Altix systems
+	 */
+	mmiowb();
+
 	return 0;
 
 dma_fail:
@@ -1529,7 +1535,12 @@  static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
 	 * such as IA-64).
 	 */
 	wmb();
-	writel(val, rx_ring->tail);
+	writel_relaxed(val, rx_ring->tail);
+
+	/* We need this if more than one processor can write to our tail
+	 * at a time, it synchronizes IO on IA64/Altix systems
+	 */
+	mmiowb();
 }
 
 /**
@@ -2412,7 +2423,12 @@  static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 		 */
 		wmb();
 
-		writel(xdp_ring->next_to_use, xdp_ring->tail);
+		writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
+
+		/* We need this if more than one processor can write to our tail
+		 * at a time, it synchronizes IO on IA64/Altix systems
+		 */
+		mmiowb();
 	}
 
 	rx_ring->skb = skb;
@@ -3437,7 +3453,7 @@  static inline int i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 
 	/* notify HW of packet */
 	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
-		writel(i, tx_ring->tail);
+		writel_relaxed(i, tx_ring->tail);
 
 		/* we need this if more than one processor can write to our tail
 		 * at a time, it synchronizes IO on IA64/Altix systems
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 1ae112f..ca02762 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -810,7 +810,12 @@  static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
 	 * such as IA-64).
 	 */
 	wmb();
-	writel(val, rx_ring->tail);
+	writel_relaxed(val, rx_ring->tail);
+
+	/* We need this if more than one processor can write to our tail
+	 * at a time, it synchronizes IO on IA64/Altix systems
+	 */
+	mmiowb();
 }
 
 /**
@@ -2379,7 +2384,7 @@  static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 
 	/* notify HW of packet */
 	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
-		writel(i, tx_ring->tail);
+		writel_relaxed(i, tx_ring->tail);
 
 		/* we need this if more than one processor can write to our tail
 		 * at a time, it synchronizes IO on IA64/Altix systems