Patchwork [net-next.git,3/7] stmmac: review private structure fields

login
register
mail settings
Submitter Giuseppe CAVALLARO
Date April 3, 2013, 5:41 a.m.
Message ID <1364967689-11155-3-git-send-email-peppe.cavallaro@st.com>
Download mbox | patch
Permalink /patch/233256/
State Changes Requested
Delegated to: David Miller
Headers show

Comments

Giuseppe CAVALLARO - April 3, 2013, 5:41 a.m.
recently many new supports have been added in the stmmac driver w/o taking care
about where each new field had to be placed inside the private structure for
guaranteeing the best cache usage.
This is what I wanted in the beginning, so this patch reorganizes all the fields
in order to keep adjacent fields for cache effect.
I have also tried to optimize them by using pahole.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h |   70 +++++++++++++-------------
 1 files changed, 35 insertions(+), 35 deletions(-)
Eric Dumazet - April 3, 2013, 7:21 a.m.
On Wed, 2013-04-03 at 07:41 +0200, Giuseppe CAVALLARO wrote:
> recently many new supports have been added in the stmmac driver w/o taking care
> about where each new field had to be placed inside the private structure for
> guaranteeing the best cache usage.
> This is what I wanted in the beginning, so this patch reorganizes all the fields
> in order to keep adjacent fields for cache effect.
> I have also tried to optimize them by using pahole.
> 
> Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
> ---
>  drivers/net/ethernet/stmicro/stmmac/stmmac.h |   70 +++++++++++++-------------
>  1 files changed, 35 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> index 75f997b..8aa28c5 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> @@ -35,36 +35,45 @@
>  
>  struct stmmac_priv {
>  	/* Frequently used values are kept adjacent for cache effect */
> -	struct dma_desc *dma_tx ____cacheline_aligned;	/* Basic TX desc */
> -	struct dma_extended_desc *dma_etx;	/* Extended TX descriptor */
> -	dma_addr_t dma_tx_phy;
> -	struct sk_buff **tx_skbuff;
> -	dma_addr_t *tx_skbuff_dma;
> +	struct dma_extended_desc *dma_etx;
> +	struct dma_desc *dma_tx ____cacheline_aligned_in_smp;
> +	struct sk_buff **tx_skbuff ____cacheline_aligned_in_smp;

dma_tx & tx_skbuff are readonly, why put them in separate cache lines ?

It seems there is an abuse of ____cacheline_aligned_in_smp in this
driver (especially if this driver only runs on UP arch)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Giuseppe CAVALLARO - April 3, 2013, 7:33 a.m.
On 4/3/2013 9:21 AM, Eric Dumazet wrote:
> On Wed, 2013-04-03 at 07:41 +0200, Giuseppe CAVALLARO wrote:
>> recently many new supports have been added in the stmmac driver w/o taking care
>> about where each new field had to be placed inside the private structure for
>> guaranteeing the best cache usage.
>> This is what I wanted in the beginning, so this patch reorganizes all the fields
>> in order to keep adjacent fields for cache effect.
>> I have also tried to optimize them by using pahole.
>>
>> Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
>> ---
>>   drivers/net/ethernet/stmicro/stmmac/stmmac.h |   70 +++++++++++++-------------
>>   1 files changed, 35 insertions(+), 35 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
>> index 75f997b..8aa28c5 100644
>> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
>> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
>> @@ -35,36 +35,45 @@
>>
>>   struct stmmac_priv {
>>   	/* Frequently used values are kept adjacent for cache effect */
>> -	struct dma_desc *dma_tx ____cacheline_aligned;	/* Basic TX desc */
>> -	struct dma_extended_desc *dma_etx;	/* Extended TX descriptor */
>> -	dma_addr_t dma_tx_phy;
>> -	struct sk_buff **tx_skbuff;
>> -	dma_addr_t *tx_skbuff_dma;
>> +	struct dma_extended_desc *dma_etx;
>> +	struct dma_desc *dma_tx ____cacheline_aligned_in_smp;
>> +	struct sk_buff **tx_skbuff ____cacheline_aligned_in_smp;
>
> dma_tx & tx_skbuff are readonly, why put them in separate cache lines ?

I put tx_skbuff in a separate cache line because, when we use extended
descriptors, the driver works with dma_etx instead of dma_tx.
So my idea was to have both dma_etx, dma_tx and tx_skbuff aligned in
any case.

>
> It seems there is an abuse of ____cacheline_aligned_in_smp in this
> driver (especially if this driver only runs on UP arch)

Yes I know that there is this abuse but why do you see an abuse for UP?
In that case we should fall through ____cacheline_aligned (e.g. it is ok 
for SH4).

peppe

>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Hutchings - April 3, 2013, 4:09 p.m.
On Wed, 2013-04-03 at 09:33 +0200, Giuseppe CAVALLARO wrote:
> On 4/3/2013 9:21 AM, Eric Dumazet wrote:
> > On Wed, 2013-04-03 at 07:41 +0200, Giuseppe CAVALLARO wrote:
> >> recently many new supports have been added in the stmmac driver w/o taking care
> >> about where each new field had to be placed inside the private structure for
> >> guaranteeing the best cache usage.
> >> This is what I wanted in the beginning, so this patch reorganizes all the fields
> >> in order to keep adjacent fields for cache effect.
> >> I have also tried to optimize them by using pahole.
> >>
> >> Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
> >> ---
> >>   drivers/net/ethernet/stmicro/stmmac/stmmac.h |   70 +++++++++++++-------------
> >>   1 files changed, 35 insertions(+), 35 deletions(-)
> >>
> >> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> >> index 75f997b..8aa28c5 100644
> >> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> >> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> >> @@ -35,36 +35,45 @@
> >>
> >>   struct stmmac_priv {
> >>   	/* Frequently used values are kept adjacent for cache effect */
> >> -	struct dma_desc *dma_tx ____cacheline_aligned;	/* Basic TX desc */
> >> -	struct dma_extended_desc *dma_etx;	/* Extended TX descriptor */
> >> -	dma_addr_t dma_tx_phy;
> >> -	struct sk_buff **tx_skbuff;
> >> -	dma_addr_t *tx_skbuff_dma;
> >> +	struct dma_extended_desc *dma_etx;
> >> +	struct dma_desc *dma_tx ____cacheline_aligned_in_smp;
> >> +	struct sk_buff **tx_skbuff ____cacheline_aligned_in_smp;
> >
> > dma_tx & tx_skbuff are readonly, why put them in separate cache lines ?
> 
> I put tx_skbuff in a separate cache line because, when we use extended
> descriptors, the driver works with dma_etx instead of dma_tx.
> So my idea was to have both dma_etx, dma_tx and tx_skbuff aligned in
> any case.
[...]

It's generally not that important to put fields at the beginning of a
cache line.  (If you've measured with typical systems using stmmac and
found an advantage, then I accept that you have a good reason to do
this.  But that advantage is unlikely to be specific to SMP systems.)

I would use ____cacheline_aligned_in_smp to separate fields that are
likely to be changed on different CPUs, so that the cache line doesn't
bounce between their caches.  Fields that are rarely modified (such as
these pointers), or are used together on the same CPU should be packed
together into a small number of cache lines.

Ben.
Giuseppe CAVALLARO - April 4, 2013, 6:11 a.m.
On 4/3/2013 6:09 PM, Ben Hutchings wrote:
> On Wed, 2013-04-03 at 09:33 +0200, Giuseppe CAVALLARO wrote:
>> On 4/3/2013 9:21 AM, Eric Dumazet wrote:
>>> On Wed, 2013-04-03 at 07:41 +0200, Giuseppe CAVALLARO wrote:
>>>> recently many new supports have been added in the stmmac driver w/o taking care
>>>> about where each new field had to be placed inside the private structure for
>>>> guaranteeing the best cache usage.
>>>> This is what I wanted in the beginning, so this patch reorganizes all the fields
>>>> in order to keep adjacent fields for cache effect.
>>>> I have also tried to optimize them by using pahole.
>>>>
>>>> Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
>>>> ---
>>>>    drivers/net/ethernet/stmicro/stmmac/stmmac.h |   70 +++++++++++++-------------
>>>>    1 files changed, 35 insertions(+), 35 deletions(-)
>>>>
>>>> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
>>>> index 75f997b..8aa28c5 100644
>>>> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
>>>> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
>>>> @@ -35,36 +35,45 @@
>>>>
>>>>    struct stmmac_priv {
>>>>    	/* Frequently used values are kept adjacent for cache effect */
>>>> -	struct dma_desc *dma_tx ____cacheline_aligned;	/* Basic TX desc */
>>>> -	struct dma_extended_desc *dma_etx;	/* Extended TX descriptor */
>>>> -	dma_addr_t dma_tx_phy;
>>>> -	struct sk_buff **tx_skbuff;
>>>> -	dma_addr_t *tx_skbuff_dma;
>>>> +	struct dma_extended_desc *dma_etx;
>>>> +	struct dma_desc *dma_tx ____cacheline_aligned_in_smp;
>>>> +	struct sk_buff **tx_skbuff ____cacheline_aligned_in_smp;
>>>
>>> dma_tx & tx_skbuff are readonly, why put them in separate cache lines ?
>>
>> I put tx_skbuff in a separate cache line because, when we use extended
>> descriptors, the driver works with dma_etx instead of dma_tx.
>> So my idea was to have both dma_etx, dma_tx and tx_skbuff aligned in
>> any case.
> [...]
>
> It's generally not that important to put fields at the beginning of a
> cache line.  (If you've measured with typical systems using stmmac and
> found an advantage, then I accept that you have a good reason to do
> this.  But that advantage is unlikely to be specific to SMP systems.)

That is the point. I had seen an improvement when testing on SH4
platforms if these pointers were at the beginning of a cache line.

> I would use ____cacheline_aligned_in_smp to separate fields that are
> likely to be changed on different CPUs, so that the cache line doesn't
> bounce between their caches.  Fields that are rarely modified (such as
> these pointers), or are used together on the same CPU should be packed
> together into a small number of cache lines.

Thx Ben for this explanation. Let me do some other tests on SMP ARM.
I'll rework this patch trying to balance the "abuse" of
____cacheline_aligned_in_smp and the best performances (I can re-test
on ARM and SH4).

peppe
>
> Ben.
>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 75f997b..8aa28c5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -35,36 +35,45 @@ 
 
 struct stmmac_priv {
 	/* Frequently used values are kept adjacent for cache effect */
-	struct dma_desc *dma_tx ____cacheline_aligned;	/* Basic TX desc */
-	struct dma_extended_desc *dma_etx;	/* Extended TX descriptor */
-	dma_addr_t dma_tx_phy;
-	struct sk_buff **tx_skbuff;
-	dma_addr_t *tx_skbuff_dma;
+	struct dma_extended_desc *dma_etx;
+	struct dma_desc *dma_tx ____cacheline_aligned_in_smp;
+	struct sk_buff **tx_skbuff ____cacheline_aligned_in_smp;
 	unsigned int cur_tx;
 	unsigned int dirty_tx;
 	unsigned int dma_tx_size;
+	u32 tx_count_frames;
+	u32 tx_coal_frames;
+	u32 tx_coal_timer;
+	dma_addr_t *tx_skbuff_dma;
+	dma_addr_t dma_tx_phy;
 	int tx_coalesce;
+	int hwts_tx_en;
+	spinlock_t tx_lock;
+	bool tx_path_in_lpi_mode;
+	struct timer_list txtimer;
 
-	struct dma_desc *dma_rx;		/* Basic RX descriptor */
-	struct dma_extended_desc *dma_erx;	/* Extended RX descriptor */
+	struct dma_desc *dma_rx	____cacheline_aligned_in_smp;
+	struct dma_extended_desc *dma_erx ____cacheline_aligned_in_smp;
+	struct sk_buff **rx_skbuff ____cacheline_aligned_in_smp;
 	unsigned int cur_rx;
 	unsigned int dirty_rx;
-	struct sk_buff **rx_skbuff;
+	unsigned int dma_rx_size;
+	unsigned int dma_buf_sz;
+	u32 rx_riwt;
+	int hwts_rx_en;
 	dma_addr_t *rx_skbuff_dma;
+	dma_addr_t dma_rx_phy;
 
+	struct napi_struct napi ____cacheline_aligned_in_smp;
+
+	void __iomem *ioaddr ____cacheline_aligned_in_smp;
 	struct net_device *dev;
-	dma_addr_t dma_rx_phy;
-	unsigned int dma_rx_size;
-	unsigned int dma_buf_sz;
 	struct device *device;
 	struct mac_device_info *hw;
-	void __iomem *ioaddr;
-
-	struct stmmac_extra_stats xstats;
-	struct napi_struct napi;
 	int no_csum_insertion;
+	spinlock_t lock;
 
-	struct phy_device *phydev;
+	struct phy_device *phydev ____cacheline_aligned_in_smp;
 	int oldlink;
 	int speed;
 	int oldduplex;
@@ -73,39 +82,30 @@  struct stmmac_priv {
 	struct mii_bus *mii;
 	int mii_irq[PHY_MAX_ADDR];
 
-	u32 msg_enable;
-	spinlock_t lock;
-	spinlock_t tx_lock;
-	int wolopts;
-	int wol_irq;
+	struct stmmac_extra_stats xstats ____cacheline_aligned_in_smp;
 	struct plat_stmmacenet_data *plat;
-	struct stmmac_counters mmc;
 	struct dma_features dma_cap;
+	struct stmmac_counters mmc;
 	int hw_cap_support;
+	int synopsys_id;
+	u32 msg_enable;
+	int wolopts;
+	int wol_irq;
 	struct clk *stmmac_clk;
 	int clk_csr;
-	int synopsys_id;
 	struct timer_list eee_ctrl_timer;
-	bool tx_path_in_lpi_mode;
 	int lpi_irq;
 	int eee_enabled;
 	int eee_active;
 	int tx_lpi_timer;
-	struct timer_list txtimer;
-	u32 tx_count_frames;
-	u32 tx_coal_frames;
-	u32 tx_coal_timer;
-	int use_riwt;
-	u32 rx_riwt;
+	int pcs;
 	unsigned int mode;
 	int extend_desc;
-	int pcs;
-	int hwts_tx_en;
-	int hwts_rx_en;
-	unsigned int default_addend;
-	u32 adv_ts;
 	struct ptp_clock *ptp_clock;
 	struct ptp_clock_info ptp_clock_ops;
+	unsigned int default_addend;
+	u32 adv_ts;
+	int use_riwt;
 	spinlock_t ptp_lock;
 };