diff mbox series

[S19,10/15] ice: Reorganize tx_buf and ring structs

Message ID 20190416172439.27908-11-anirudh.venkataramanan@intel.com
State Accepted
Delegated to: Jeff Kirsher
Headers show
Series Fixes and feature updates for ice | expand

Commit Message

Venkataramanan, Anirudh April 16, 2019, 5:24 p.m. UTC
From: Jesse Brandeburg <jesse.brandeburg@intel.com>

Use more efficient structure ordering by using the pahole tool
and a lot of code inspection to get hot cache lines to have
packed data (no holes if possible) and adjacent warm data.

ice_ring prior to this change:
  /* size: 192, cachelines: 3, members: 23 */
  /* sum members: 158, holes: 4, sum holes: 12 */
  /* padding: 22 */

ice_ring after this change:
  /* size: 192, cachelines: 3, members: 25 */
  /* sum members: 162, holes: 1, sum holes: 1 */
  /* padding: 29 */

ice_tx_buf prior to this change:
  /* size: 48, cachelines: 1, members: 7 */
  /* sum members: 38, holes: 2, sum holes: 6 */
  /* padding: 4 */
  /* last cacheline: 48 bytes */

ice_tx_buf after this change:
  /* size: 40, cachelines: 1, members: 7 */
  /* sum members: 38, holes: 1, sum holes: 2 */
  /* last cacheline: 40 bytes */

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
---
[Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> cleaned up commit message]
---
 drivers/net/ethernet/intel/ice/ice_txrx.h | 35 ++++++++++++++---------
 1 file changed, 21 insertions(+), 14 deletions(-)

Comments

Bowers, AndrewX April 26, 2019, 10 p.m. UTC | #1
> -----Original Message-----
> From: Intel-wired-lan [mailto:intel-wired-lan-bounces@osuosl.org] On
> Behalf Of Anirudh Venkataramanan
> Sent: Tuesday, April 16, 2019 10:25 AM
> To: intel-wired-lan@lists.osuosl.org
> Subject: [Intel-wired-lan] [PATCH S19 10/15] ice: Reorganize tx_buf and ring
> structs
> 
> From: Jesse Brandeburg <jesse.brandeburg@intel.com>
> 
> Use more efficient structure ordering by using the pahole tool and a lot of
> code inspection to get hot cache lines to have packed data (no holes if
> possible) and adjacent warm data.
> 
> ice_ring prior to this change:
>   /* size: 192, cachelines: 3, members: 23 */
>   /* sum members: 158, holes: 4, sum holes: 12 */
>   /* padding: 22 */
> 
> ice_ring after this change:
>   /* size: 192, cachelines: 3, members: 25 */
>   /* sum members: 162, holes: 1, sum holes: 1 */
>   /* padding: 29 */
> 
> ice_tx_buf prior to this change:
>   /* size: 48, cachelines: 1, members: 7 */
>   /* sum members: 38, holes: 2, sum holes: 6 */
>   /* padding: 4 */
>   /* last cacheline: 48 bytes */
> 
> ice_tx_buf after this change:
>   /* size: 40, cachelines: 1, members: 7 */
>   /* sum members: 38, holes: 1, sum holes: 2 */
>   /* last cacheline: 40 bytes */
> 
> Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
> Signed-off-by: Anirudh Venkataramanan
> <anirudh.venkataramanan@intel.com>
> ---
> [Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> cleaned
> up commit message]
> ---
>  drivers/net/ethernet/intel/ice/ice_txrx.h | 35 ++++++++++++++---------
>  1 file changed, 21 insertions(+), 14 deletions(-)

Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 66e05032ee56..4cff52ffefe0 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -58,19 +58,19 @@  struct ice_tx_buf {
 	unsigned int bytecount;
 	unsigned short gso_segs;
 	u32 tx_flags;
-	DEFINE_DMA_UNMAP_ADDR(dma);
 	DEFINE_DMA_UNMAP_LEN(len);
+	DEFINE_DMA_UNMAP_ADDR(dma);
 };
 
 struct ice_tx_offload_params {
-	u8 header_len;
+	u64 cd_qw1;
+	struct ice_ring *tx_ring;
 	u32 td_cmd;
 	u32 td_offset;
 	u32 td_l2tag1;
-	u16 cd_l2tag2;
 	u32 cd_tunnel_params;
-	u64 cd_qw1;
-	struct ice_ring *tx_ring;
+	u16 cd_l2tag2;
+	u8 header_len;
 };
 
 struct ice_rx_buf {
@@ -150,6 +150,7 @@  enum ice_rx_dtype {
 
 /* descriptor ring, associated with a VSI */
 struct ice_ring {
+	/* CL1 - 1st cacheline starts here */
 	struct ice_ring *next;		/* pointer to next ring in q_vector */
 	void *desc;			/* Descriptor ring memory */
 	struct device *dev;		/* Used for DMA mapping */
@@ -161,11 +162,11 @@  struct ice_ring {
 		struct ice_tx_buf *tx_buf;
 		struct ice_rx_buf *rx_buf;
 	};
+	/* CL2 - 2nd cacheline starts here */
 	u16 q_index;			/* Queue number of ring */
-	u32 txq_teid;			/* Added Tx queue TEID */
-#ifdef CONFIG_DCB
-	u8 dcb_tc;		/* Traffic class of ring */
-#endif /* CONFIG_DCB */
+	u16 q_handle;			/* Queue handle per TC */
+
+	u8 ring_active;			/* is ring online or not */
 
 	u16 count;			/* Number of descriptors */
 	u16 reg_idx;			/* HW register index of the ring */
@@ -173,8 +174,7 @@  struct ice_ring {
 	/* used in interrupt processing */
 	u16 next_to_use;
 	u16 next_to_clean;
-
-	u8 ring_active;			/* is ring online or not */
+	u16 next_to_alloc;
 
 	/* stats structs */
 	struct ice_q_stats	stats;
@@ -184,10 +184,17 @@  struct ice_ring {
 		struct ice_rxq_stats rx_stats;
 	};
 
-	unsigned int size;		/* length of descriptor ring in bytes */
-	dma_addr_t dma;			/* physical address of ring */
 	struct rcu_head rcu;		/* to avoid race on free */
-	u16 next_to_alloc;
+	/* CLX - the below items are only accessed infrequently and should be
+	 * in their own cache line if possible
+	 */
+	dma_addr_t dma;			/* physical address of ring */
+	unsigned int size;		/* length of descriptor ring in bytes */
+	u32 txq_teid;			/* Added Tx queue TEID */
+	u16 rx_buf_len;
+#ifdef CONFIG_DCB
+	u8 dcb_tc;			/* Traffic class of ring */
+#endif /* CONFIG_DCB */
 } ____cacheline_internodealigned_in_smp;
 
 struct ice_ring_container {