diff mbox

[net-next] net: hns: optimize XGE capability by reducing cpu usage

Message ID 1449300749-50275-1-git-send-email-yankejian@huawei.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

yankejian Dec. 5, 2015, 7:32 a.m. UTC
here is the patch raising the performance of XGE by:
1)changes the way page management method for enet momery, and
2)reduces the count of rmb, and
3)adds Memory prefetching

Signed-off-by: yankejian <yankejian@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns/hnae.h         |  5 +-
 drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c |  1 -
 drivers/net/ethernet/hisilicon/hns/hns_enet.c     | 79 +++++++++++++++--------
 3 files changed, 55 insertions(+), 30 deletions(-)

Comments

David Miller Dec. 7, 2015, 3:29 a.m. UTC | #1
From: yankejian <yankejian@huawei.com>
Date: Sat, 5 Dec 2015 15:32:29 +0800

> +#if (PAGE_SIZE < 8192)
> +	if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) {
> +		truesize = hnae_buf_size(ring);
> +	} else {
> +		truesize = ALIGN(size, L1_CACHE_BYTES);
> +		last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
> +	}
> +
> +#else
> +		truesize = ALIGN(size, L1_CACHE_BYTES);
> +		last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
> +#endif

This is not indented properly, and it looks terrible.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joe Perches Dec. 7, 2015, 3:32 a.m. UTC | #2
On Sun, 2015-12-06 at 22:29 -0500, David Miller wrote:
> From: yankejian <yankejian@huawei.com>
> Date: Sat, 5 Dec 2015 15:32:29 +0800
> 
> > +#if (PAGE_SIZE < 8192)
> > +     if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) {
> > +             truesize = hnae_buf_size(ring);
> > +     } else {
> > +             truesize = ALIGN(size, L1_CACHE_BYTES);
> > +             last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
> > +     }
> > +
> > +#else
> > +             truesize = ALIGN(size, L1_CACHE_BYTES);
> > +             last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
> > +#endif
> 
> This is not indented properly, and it looks terrible.

And it makes one curious as to why last_offset isn't set
in the first block.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
yankejian Dec. 7, 2015, 8:37 a.m. UTC | #3
On 2015/12/7 11:29, David Miller wrote:
> From: yankejian <yankejian@huawei.com>
> Date: Sat, 5 Dec 2015 15:32:29 +0800
>
>> +#if (PAGE_SIZE < 8192)
>> +	if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) {
>> +		truesize = hnae_buf_size(ring);
>> +	} else {
>> +		truesize = ALIGN(size, L1_CACHE_BYTES);
>> +		last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
>> +	}
>> +
>> +#else
>> +		truesize = ALIGN(size, L1_CACHE_BYTES);
>> +		last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
>> +#endif
> This is not indented properly, and it looks terrible.
>
> .
>
Hi David,
Thanks for pointing it out. i will pay attention next time.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Fan Du Dec. 7, 2015, 8:58 a.m. UTC | #4
On 2015/12/5 15:32, yankejian wrote:
> here is the patch raising the performance of XGE by:
> 1)changes the way page management method for enet momery, and
> 2)reduces the count of rmb, and
> 3)adds Memory prefetching

Any numbers on how much it boost performance?

> Signed-off-by: yankejian <yankejian@huawei.com>
> ---
>   drivers/net/ethernet/hisilicon/hns/hnae.h         |  5 +-
>   drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c |  1 -
>   drivers/net/ethernet/hisilicon/hns/hns_enet.c     | 79 +++++++++++++++--------
>   3 files changed, 55 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
> index d1f3316..6ca94dc 100644
> --- a/drivers/net/ethernet/hisilicon/hns/hnae.h
> +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
> @@ -341,7 +341,8 @@ struct hnae_queue {
>   	void __iomem *io_base;
>   	phys_addr_t phy_base;
>   	struct hnae_ae_dev *dev;	/* the device who use this queue */
> -	struct hnae_ring rx_ring, tx_ring;
> +	struct hnae_ring rx_ring ____cacheline_internodealigned_in_smp;
> +	struct hnae_ring tx_ring ____cacheline_internodealigned_in_smp;
>   	struct hnae_handle *handle;
>   };
>
> @@ -597,11 +598,9 @@ static inline void hnae_replace_buffer(struct hnae_ring *ring, int i,
>   				       struct hnae_desc_cb *res_cb)
>   {
>   	struct hnae_buf_ops *bops = ring->q->handle->bops;
> -	struct hnae_desc_cb tmp_cb = ring->desc_cb[i];
>
>   	bops->unmap_buffer(ring, &ring->desc_cb[i]);
>   	ring->desc_cb[i] = *res_cb;
> -	*res_cb = tmp_cb;
>   	ring->desc[i].addr = (__le64)ring->desc_cb[i].dma;
>   	ring->desc[i].rx.ipoff_bnum_pid_flag = 0;
>   }
> diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
> index 77c6edb..522b264 100644
> --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
> +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
> @@ -341,7 +341,6 @@ void hns_ae_toggle_ring_irq(struct hnae_ring *ring, u32 mask)
>   	else
>   		flag = RCB_INT_FLAG_RX;
>
> -	hns_rcb_int_clr_hw(ring->q, flag);
>   	hns_rcb_int_ctrl_hw(ring->q, flag, mask);
>   }
>
> diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
> index cad2663..e2be510 100644
> --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
> +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
> @@ -33,6 +33,7 @@
>
>   #define RCB_IRQ_NOT_INITED 0
>   #define RCB_IRQ_INITED 1
> +#define HNS_BUFFER_SIZE_2048 2048
>
>   #define BD_MAX_SEND_SIZE 8191
>   #define SKB_TMP_LEN(SKB) \
> @@ -491,13 +492,51 @@ static unsigned int hns_nic_get_headlen(unsigned char *data, u32 flag,
>   		return max_size;
>   }
>
> -static void
> -hns_nic_reuse_page(struct hnae_desc_cb *desc_cb, int tsize, int last_offset)
> +static void hns_nic_reuse_page(struct sk_buff *skb, int i,
> +			       struct hnae_ring *ring, int pull_len,
> +			       struct hnae_desc_cb *desc_cb)
>   {
> +	struct hnae_desc *desc;
> +	int truesize, size;
> +	int last_offset = 0;
> +
> +	desc = &ring->desc[ring->next_to_clean];
> +	size = le16_to_cpu(desc->rx.size);
> +
> +#if (PAGE_SIZE < 8192)
> +	if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) {
> +		truesize = hnae_buf_size(ring);
> +	} else {
> +		truesize = ALIGN(size, L1_CACHE_BYTES);
> +		last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
> +	}
> +
> +#else
> +		truesize = ALIGN(size, L1_CACHE_BYTES);
> +		last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
> +#endif
> +
> +	skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
> +			size - pull_len, truesize - pull_len);
> +
>   	 /* avoid re-using remote pages,flag default unreuse */
>   	if (likely(page_to_nid(desc_cb->priv) == numa_node_id())) {
> +#if (PAGE_SIZE < 8192)
> +		if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) {
> +			/* if we are only owner of page we can reuse it */
> +			if (likely(page_count(desc_cb->priv) == 1)) {
> +				/* flip page offset to other buffer */
> +				desc_cb->page_offset ^= truesize;
> +
> +				desc_cb->reuse_flag = 1;
> +				/* bump ref count on page before it is given*/
> +				get_page(desc_cb->priv);
> +			}
> +			return;
> +		}
> +#endif
>   		/* move offset up to the next cache line */
> -		desc_cb->page_offset += tsize;
> +		desc_cb->page_offset += truesize;
>
>   		if (desc_cb->page_offset <= last_offset) {
>   			desc_cb->reuse_flag = 1;
> @@ -529,11 +568,10 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
>   	struct hnae_desc *desc;
>   	struct hnae_desc_cb *desc_cb;
>   	unsigned char *va;
> -	int bnum, length, size, i, truesize, last_offset;
> +	int bnum, length, i;
>   	int pull_len;
>   	u32 bnum_flag;
>
> -	last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
>   	desc = &ring->desc[ring->next_to_clean];
>   	desc_cb = &ring->desc_cb[ring->next_to_clean];
>
> @@ -555,17 +593,12 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
>   		return -ENOMEM;
>   	}
>
> +	prefetchw(skb->data);
>   	length = le16_to_cpu(desc->rx.pkt_len);
>   	bnum_flag = le32_to_cpu(desc->rx.ipoff_bnum_pid_flag);
>   	priv->ops.get_rxd_bnum(bnum_flag, &bnum);
>   	*out_bnum = bnum;
>
> -	/* we will be copying header into skb->data in
> -	 * pskb_may_pull so it is in our interest to prefetch
> -	 * it now to avoid a possible cache miss
> -	 */
> -	prefetchw(skb->data);
> -
>   	if (length <= HNS_RX_HEAD_SIZE) {
>   		memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long)));
>
> @@ -588,13 +621,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
>   		memcpy(__skb_put(skb, pull_len), va,
>   		       ALIGN(pull_len, sizeof(long)));
>
> -		size = le16_to_cpu(desc->rx.size);
> -		truesize = ALIGN(size, L1_CACHE_BYTES);
> -		skb_add_rx_frag(skb, 0, desc_cb->priv,
> -				desc_cb->page_offset + pull_len,
> -				size - pull_len, truesize - pull_len);
> -
> -		hns_nic_reuse_page(desc_cb, truesize, last_offset);
> +		hns_nic_reuse_page(skb, 0, ring, pull_len, desc_cb);
>   		ring_ptr_move_fw(ring, next_to_clean);
>
>   		if (unlikely(bnum >= (int)MAX_SKB_FRAGS)) { /* check err*/
> @@ -604,13 +631,8 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
>   		for (i = 1; i < bnum; i++) {
>   			desc = &ring->desc[ring->next_to_clean];
>   			desc_cb = &ring->desc_cb[ring->next_to_clean];
> -			size = le16_to_cpu(desc->rx.size);
> -			truesize = ALIGN(size, L1_CACHE_BYTES);
> -			skb_add_rx_frag(skb, i, desc_cb->priv,
> -					desc_cb->page_offset,
> -					size, truesize);
>
> -			hns_nic_reuse_page(desc_cb, truesize, last_offset);
> +			hns_nic_reuse_page(skb, i, ring, 0, desc_cb);
>   			ring_ptr_move_fw(ring, next_to_clean);
>   		}
>   	}
> @@ -750,9 +772,10 @@ recv:
>   	/* make all data has been write before submit */
>   	if (recv_pkts < budget) {
>   		ex_num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM);
> -		rmb(); /*complete read rx ring bd number*/
> +
>   		if (ex_num > clean_count) {
>   			num += ex_num - clean_count;
> +			rmb(); /*complete read rx ring bd number*/
>   			goto recv;
>   		}
>   	}
> @@ -849,8 +872,11 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data,
>
>   	bytes = 0;
>   	pkts = 0;
> -	while (head != ring->next_to_clean)
> +	while (head != ring->next_to_clean) {
>   		hns_nic_reclaim_one_desc(ring, &bytes, &pkts);
> +		/* issue prefetch for next Tx descriptor */
> +		prefetch(&ring->desc_cb[ring->next_to_clean]);
> +	}
>
>   	NETIF_TX_UNLOCK(ndev);
>
> @@ -926,6 +952,7 @@ static int hns_nic_common_poll(struct napi_struct *napi, int budget)
>   			ring_data->ring, 0);
>
>   		ring_data->fini_process(ring_data);
> +		return 0;
>   	}
>
>   	return clean_complete;
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
yankejian Dec. 7, 2015, 8:58 a.m. UTC | #5
On 2015/12/7 11:32, Joe Perches wrote:
> On Sun, 2015-12-06 at 22:29 -0500, David Miller wrote:
>> > From: yankejian <yankejian@huawei.com>
>> > Date: Sat, 5 Dec 2015 15:32:29 +0800
>> > 
>>> > > +#if (PAGE_SIZE < 8192)
>>> > > +     if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) {
>>> > > +             truesize = hnae_buf_size(ring);
>>> > > +     } else {
>>> > > +             truesize = ALIGN(size, L1_CACHE_BYTES);
>>> > > +             last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
>>> > > +     }
>>> > > +
>>> > > +#else
>>> > > +             truesize = ALIGN(size, L1_CACHE_BYTES);
>>> > > +             last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
>>> > > +#endif
>> > 
>> > This is not indented properly, and it looks terrible.
> And it makes one curious as to why last_offset isn't set
> in the first block.

Hi Joe,
if hnae_buf_size que equal to HNS_BUFFER_SIZE, last_offset is useless in the routines of this function.
so it is ignored in the first block. thanks for your suggestion.

Best regards,
yankejian


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joe Perches Dec. 7, 2015, 9:05 a.m. UTC | #6
On Mon, 2015-12-07 at 16:58 +0800, Yankejian (Hackim Yim) wrote:
> On 2015/12/7 11:32, Joe Perches wrote:
> > On Sun, 2015-12-06 at 22:29 -0500, David Miller wrote:
> > > > From: yankejian <yankejian@huawei.com>
> > > > Date: Sat, 5 Dec 2015 15:32:29 +0800
> > > > 
> > > > > > +#if (PAGE_SIZE < 8192)
> > > > > > +     if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) {
> > > > > > +             truesize = hnae_buf_size(ring);
> > > > > > +     } else {
> > > > > > +             truesize = ALIGN(size, L1_CACHE_BYTES);
> > > > > > +             last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
> > > > > > +     }
> > > > > > +
> > > > > > +#else
> > > > > > +             truesize = ALIGN(size, L1_CACHE_BYTES);
> > > > > > +             last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
> > > > > > +#endif
> > > > 
> > > > This is not indented properly, and it looks terrible.
> > And it makes one curious as to why last_offset isn't set
> > in the first block.
> 
> Hi Joe,

Hello.

> if hnae_buf_size que equal to HNS_BUFFER_SIZE, last_offset is useless in the routines of this function.
> so it is ignored in the first block. thanks for your suggestion.

More to the point, last_offset is initialized to 0.

It'd be clearer not to initialize it at all and
set it to 0 in the first block and not overwrite
the initialization in each subsequent block.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
yankejian Dec. 7, 2015, 9:26 a.m. UTC | #7
On 2015/12/7 17:05, Joe Perches wrote:
> On Mon, 2015-12-07 at 16:58 +0800, Yankejian (Hackim Yim) wrote:
>> On 2015/12/7 11:32, Joe Perches wrote:
>>> On Sun, 2015-12-06 at 22:29 -0500, David Miller wrote:
>>>>> From: yankejian <yankejian@huawei.com>
>>>>> Date: Sat, 5 Dec 2015 15:32:29 +0800
>>>>>
>>>>>>> +#if (PAGE_SIZE < 8192)
>>>>>>> +     if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) {
>>>>>>> +             truesize = hnae_buf_size(ring);
>>>>>>> +     } else {
>>>>>>> +             truesize = ALIGN(size, L1_CACHE_BYTES);
>>>>>>> +             last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
>>>>>>> +     }
>>>>>>> +
>>>>>>> +#else
>>>>>>> +             truesize = ALIGN(size, L1_CACHE_BYTES);
>>>>>>> +             last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
>>>>>>> +#endif
>>>>> This is not indented properly, and it looks terrible.
>>> And it makes one curious as to why last_offset isn't set
>>> in the first block.
>> Hi Joe,
> Hello.
>
>> if hnae_buf_size que equal to HNS_BUFFER_SIZE, last_offset is useless in the routines of this function.
>> so it is ignored in the first block. thanks for your suggestion.
> More to the point, last_offset is initialized to 0.
>
> It'd be clearer not to initialize it at all and
thanks, that is a good idea.

> set it to 0 in the first block and not overwrite
> the initialization in each subsequent block.
because it is useless, i think we'd better ignored it.

> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
> .
>


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
yankejian Dec. 8, 2015, 6:22 a.m. UTC | #8
On 2015/12/7 16:58, Du, Fan wrote:
>
>
> On 2015/12/5 15:32, yankejian wrote:
>> here is the patch raising the performance of XGE by:
>> 1)changes the way page management method for enet momery, and
>> 2)reduces the count of rmb, and
>> 3)adds Memory prefetching
>
> Any numbers on how much it boost performance?
>

it is almost the same as 82599.

>> Signed-off-by: yankejian <yankejian@huawei.com>
>> ---
>>   drivers/net/ethernet/hisilicon/hns/hnae.h         |  5 +-
>>   drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c |  1 -
>>   drivers/net/ethernet/hisilicon/hns/hns_enet.c     | 79 +++++++++++++++--------
>>   3 files changed, 55 insertions(+), 30 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
>> index d1f3316..6ca94dc 100644
>> --- a/drivers/net/ethernet/hisilicon/hns/hnae.h
>> +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
>> @@ -341,7 +341,8 @@ struct hnae_queue {
>>       void __iomem *io_base;
>>       phys_addr_t phy_base;
>>       struct hnae_ae_dev *dev;    /* the device who use this queue */
>> -    struct hnae_ring rx_ring, tx_ring;
>> +    struct hnae_ring rx_ring ____cacheline_internodealigned_in_smp;
>> +    struct hnae_ring tx_ring ____cacheline_internodealigned_in_smp;
>>       struct hnae_handle *handle;
>>   };
>>
>> @@ -597,11 +598,9 @@ static inline void hnae_replace_buffer(struct hnae_ring *ring, int i,
>>                          struct hnae_desc_cb *res_cb)
>>   {
>>       struct hnae_buf_ops *bops = ring->q->handle->bops;
>> -    struct hnae_desc_cb tmp_cb = ring->desc_cb[i];
>>
>>       bops->unmap_buffer(ring, &ring->desc_cb[i]);
>>       ring->desc_cb[i] = *res_cb;
>> -    *res_cb = tmp_cb;
>>       ring->desc[i].addr = (__le64)ring->desc_cb[i].dma;
>>       ring->desc[i].rx.ipoff_bnum_pid_flag = 0;
>>   }
>> diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
>> index 77c6edb..522b264 100644
>> --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
>> +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
>> @@ -341,7 +341,6 @@ void hns_ae_toggle_ring_irq(struct hnae_ring *ring, u32 mask)
>>       else
>>           flag = RCB_INT_FLAG_RX;
>>
>> -    hns_rcb_int_clr_hw(ring->q, flag);
>>       hns_rcb_int_ctrl_hw(ring->q, flag, mask);
>>   }
>>
>> diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
>> index cad2663..e2be510 100644
>> --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
>> +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
>> @@ -33,6 +33,7 @@
>>
>>   #define RCB_IRQ_NOT_INITED 0
>>   #define RCB_IRQ_INITED 1
>> +#define HNS_BUFFER_SIZE_2048 2048
>>
>>   #define BD_MAX_SEND_SIZE 8191
>>   #define SKB_TMP_LEN(SKB) \
>> @@ -491,13 +492,51 @@ static unsigned int hns_nic_get_headlen(unsigned char *data, u32 flag,
>>           return max_size;
>>   }
>>
>> -static void
>> -hns_nic_reuse_page(struct hnae_desc_cb *desc_cb, int tsize, int last_offset)
>> +static void hns_nic_reuse_page(struct sk_buff *skb, int i,
>> +                   struct hnae_ring *ring, int pull_len,
>> +                   struct hnae_desc_cb *desc_cb)
>>   {
>> +    struct hnae_desc *desc;
>> +    int truesize, size;
>> +    int last_offset = 0;
>> +
>> +    desc = &ring->desc[ring->next_to_clean];
>> +    size = le16_to_cpu(desc->rx.size);
>> +
>> +#if (PAGE_SIZE < 8192)
>> +    if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) {
>> +        truesize = hnae_buf_size(ring);
>> +    } else {
>> +        truesize = ALIGN(size, L1_CACHE_BYTES);
>> +        last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
>> +    }
>> +
>> +#else
>> +        truesize = ALIGN(size, L1_CACHE_BYTES);
>> +        last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
>> +#endif
>> +
>> +    skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
>> +            size - pull_len, truesize - pull_len);
>> +
>>        /* avoid re-using remote pages,flag default unreuse */
>>       if (likely(page_to_nid(desc_cb->priv) == numa_node_id())) {
>> +#if (PAGE_SIZE < 8192)
>> +        if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) {
>> +            /* if we are only owner of page we can reuse it */
>> +            if (likely(page_count(desc_cb->priv) == 1)) {
>> +                /* flip page offset to other buffer */
>> +                desc_cb->page_offset ^= truesize;
>> +
>> +                desc_cb->reuse_flag = 1;
>> +                /* bump ref count on page before it is given*/
>> +                get_page(desc_cb->priv);
>> +            }
>> +            return;
>> +        }
>> +#endif
>>           /* move offset up to the next cache line */
>> -        desc_cb->page_offset += tsize;
>> +        desc_cb->page_offset += truesize;
>>
>>           if (desc_cb->page_offset <= last_offset) {
>>               desc_cb->reuse_flag = 1;
>> @@ -529,11 +568,10 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
>>       struct hnae_desc *desc;
>>       struct hnae_desc_cb *desc_cb;
>>       unsigned char *va;
>> -    int bnum, length, size, i, truesize, last_offset;
>> +    int bnum, length, i;
>>       int pull_len;
>>       u32 bnum_flag;
>>
>> -    last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
>>       desc = &ring->desc[ring->next_to_clean];
>>       desc_cb = &ring->desc_cb[ring->next_to_clean];
>>
>> @@ -555,17 +593,12 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
>>           return -ENOMEM;
>>       }
>>
>> +    prefetchw(skb->data);
>>       length = le16_to_cpu(desc->rx.pkt_len);
>>       bnum_flag = le32_to_cpu(desc->rx.ipoff_bnum_pid_flag);
>>       priv->ops.get_rxd_bnum(bnum_flag, &bnum);
>>       *out_bnum = bnum;
>>
>> -    /* we will be copying header into skb->data in
>> -     * pskb_may_pull so it is in our interest to prefetch
>> -     * it now to avoid a possible cache miss
>> -     */
>> -    prefetchw(skb->data);
>> -
>>       if (length <= HNS_RX_HEAD_SIZE) {
>>           memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long)));
>>
>> @@ -588,13 +621,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
>>           memcpy(__skb_put(skb, pull_len), va,
>>                  ALIGN(pull_len, sizeof(long)));
>>
>> -        size = le16_to_cpu(desc->rx.size);
>> -        truesize = ALIGN(size, L1_CACHE_BYTES);
>> -        skb_add_rx_frag(skb, 0, desc_cb->priv,
>> -                desc_cb->page_offset + pull_len,
>> -                size - pull_len, truesize - pull_len);
>> -
>> -        hns_nic_reuse_page(desc_cb, truesize, last_offset);
>> +        hns_nic_reuse_page(skb, 0, ring, pull_len, desc_cb);
>>           ring_ptr_move_fw(ring, next_to_clean);
>>
>>           if (unlikely(bnum >= (int)MAX_SKB_FRAGS)) { /* check err*/
>> @@ -604,13 +631,8 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
>>           for (i = 1; i < bnum; i++) {
>>               desc = &ring->desc[ring->next_to_clean];
>>               desc_cb = &ring->desc_cb[ring->next_to_clean];
>> -            size = le16_to_cpu(desc->rx.size);
>> -            truesize = ALIGN(size, L1_CACHE_BYTES);
>> -            skb_add_rx_frag(skb, i, desc_cb->priv,
>> -                    desc_cb->page_offset,
>> -                    size, truesize);
>>
>> -            hns_nic_reuse_page(desc_cb, truesize, last_offset);
>> +            hns_nic_reuse_page(skb, i, ring, 0, desc_cb);
>>               ring_ptr_move_fw(ring, next_to_clean);
>>           }
>>       }
>> @@ -750,9 +772,10 @@ recv:
>>       /* make all data has been write before submit */
>>       if (recv_pkts < budget) {
>>           ex_num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM);
>> -        rmb(); /*complete read rx ring bd number*/
>> +
>>           if (ex_num > clean_count) {
>>               num += ex_num - clean_count;
>> +            rmb(); /*complete read rx ring bd number*/
>>               goto recv;
>>           }
>>       }
>> @@ -849,8 +872,11 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data,
>>
>>       bytes = 0;
>>       pkts = 0;
>> -    while (head != ring->next_to_clean)
>> +    while (head != ring->next_to_clean) {
>>           hns_nic_reclaim_one_desc(ring, &bytes, &pkts);
>> +        /* issue prefetch for next Tx descriptor */
>> +        prefetch(&ring->desc_cb[ring->next_to_clean]);
>> +    }
>>
>>       NETIF_TX_UNLOCK(ndev);
>>
>> @@ -926,6 +952,7 @@ static int hns_nic_common_poll(struct napi_struct *napi, int budget)
>>               ring_data->ring, 0);
>>
>>           ring_data->fini_process(ring_data);
>> +        return 0;
>>       }
>>
>>       return clean_complete;
>>
>
> .
>


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Fan Du Dec. 8, 2015, 6:30 a.m. UTC | #9
On 2015/12/8 14:22, Yankejian (Hackim Yim) wrote:
>
> On 2015/12/7 16:58, Du, Fan wrote:
>> >
>> >
>> >On 2015/12/5 15:32, yankejian wrote:
>>> >>here is the patch raising the performance of XGE by:
>>> >>1)changes the way page management method for enet momery, and
>>> >>2)reduces the count of rmb, and
>>> >>3)adds Memory prefetching
>> >
>> >Any numbers on how much it boost performance?
>> >
> it is almost the same as 82599.

I mean how much it improves performance *BEFORE* and *AFTER* this patch
for Huawei XGE chip, because the commit log states it "raising the 
performance",
but did give numbers of the testing.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
yankejian Dec. 8, 2015, 6:58 a.m. UTC | #10
On 2015/12/8 14:30, Du, Fan wrote:
>
>
> On 2015/12/8 14:22, Yankejian (Hackim Yim) wrote:
>>
>> On 2015/12/7 16:58, Du, Fan wrote:
>>> >
>>> >
>>> >On 2015/12/5 15:32, yankejian wrote:
>>>> >>here is the patch raising the performance of XGE by:
>>>> >>1)changes the way page management method for enet momery, and
>>>> >>2)reduces the count of rmb, and
>>>> >>3)adds Memory prefetching
>>> >
>>> >Any numbers on how much it boost performance?
>>> >
>> it is almost the same as 82599.
>
> I mean how much it improves performance *BEFORE* and *AFTER* this patch
> for Huawei XGE chip, because the commit log states it "raising the performance",
> but did give numbers of the testing.
>
> .
>
Hi Du Fan,
the bandwidth's raising is not obviously, but the cpu usage degracing up to 50%


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index d1f3316..6ca94dc 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -341,7 +341,8 @@  struct hnae_queue {
 	void __iomem *io_base;
 	phys_addr_t phy_base;
 	struct hnae_ae_dev *dev;	/* the device who use this queue */
-	struct hnae_ring rx_ring, tx_ring;
+	struct hnae_ring rx_ring ____cacheline_internodealigned_in_smp;
+	struct hnae_ring tx_ring ____cacheline_internodealigned_in_smp;
 	struct hnae_handle *handle;
 };
 
@@ -597,11 +598,9 @@  static inline void hnae_replace_buffer(struct hnae_ring *ring, int i,
 				       struct hnae_desc_cb *res_cb)
 {
 	struct hnae_buf_ops *bops = ring->q->handle->bops;
-	struct hnae_desc_cb tmp_cb = ring->desc_cb[i];
 
 	bops->unmap_buffer(ring, &ring->desc_cb[i]);
 	ring->desc_cb[i] = *res_cb;
-	*res_cb = tmp_cb;
 	ring->desc[i].addr = (__le64)ring->desc_cb[i].dma;
 	ring->desc[i].rx.ipoff_bnum_pid_flag = 0;
 }
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index 77c6edb..522b264 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -341,7 +341,6 @@  void hns_ae_toggle_ring_irq(struct hnae_ring *ring, u32 mask)
 	else
 		flag = RCB_INT_FLAG_RX;
 
-	hns_rcb_int_clr_hw(ring->q, flag);
 	hns_rcb_int_ctrl_hw(ring->q, flag, mask);
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index cad2663..e2be510 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -33,6 +33,7 @@ 
 
 #define RCB_IRQ_NOT_INITED 0
 #define RCB_IRQ_INITED 1
+#define HNS_BUFFER_SIZE_2048 2048
 
 #define BD_MAX_SEND_SIZE 8191
 #define SKB_TMP_LEN(SKB) \
@@ -491,13 +492,51 @@  static unsigned int hns_nic_get_headlen(unsigned char *data, u32 flag,
 		return max_size;
 }
 
-static void
-hns_nic_reuse_page(struct hnae_desc_cb *desc_cb, int tsize, int last_offset)
+static void hns_nic_reuse_page(struct sk_buff *skb, int i,
+			       struct hnae_ring *ring, int pull_len,
+			       struct hnae_desc_cb *desc_cb)
 {
+	struct hnae_desc *desc;
+	int truesize, size;
+	int last_offset = 0;
+
+	desc = &ring->desc[ring->next_to_clean];
+	size = le16_to_cpu(desc->rx.size);
+
+#if (PAGE_SIZE < 8192)
+	if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) {
+		truesize = hnae_buf_size(ring);
+	} else {
+		truesize = ALIGN(size, L1_CACHE_BYTES);
+		last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
+	}
+
+#else
+		truesize = ALIGN(size, L1_CACHE_BYTES);
+		last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
+#endif
+
+	skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
+			size - pull_len, truesize - pull_len);
+
 	 /* avoid re-using remote pages,flag default unreuse */
 	if (likely(page_to_nid(desc_cb->priv) == numa_node_id())) {
+#if (PAGE_SIZE < 8192)
+		if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) {
+			/* if we are only owner of page we can reuse it */
+			if (likely(page_count(desc_cb->priv) == 1)) {
+				/* flip page offset to other buffer */
+				desc_cb->page_offset ^= truesize;
+
+				desc_cb->reuse_flag = 1;
+				/* bump ref count on page before it is given*/
+				get_page(desc_cb->priv);
+			}
+			return;
+		}
+#endif
 		/* move offset up to the next cache line */
-		desc_cb->page_offset += tsize;
+		desc_cb->page_offset += truesize;
 
 		if (desc_cb->page_offset <= last_offset) {
 			desc_cb->reuse_flag = 1;
@@ -529,11 +568,10 @@  static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
 	struct hnae_desc *desc;
 	struct hnae_desc_cb *desc_cb;
 	unsigned char *va;
-	int bnum, length, size, i, truesize, last_offset;
+	int bnum, length, i;
 	int pull_len;
 	u32 bnum_flag;
 
-	last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
 	desc = &ring->desc[ring->next_to_clean];
 	desc_cb = &ring->desc_cb[ring->next_to_clean];
 
@@ -555,17 +593,12 @@  static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
 		return -ENOMEM;
 	}
 
+	prefetchw(skb->data);
 	length = le16_to_cpu(desc->rx.pkt_len);
 	bnum_flag = le32_to_cpu(desc->rx.ipoff_bnum_pid_flag);
 	priv->ops.get_rxd_bnum(bnum_flag, &bnum);
 	*out_bnum = bnum;
 
-	/* we will be copying header into skb->data in
-	 * pskb_may_pull so it is in our interest to prefetch
-	 * it now to avoid a possible cache miss
-	 */
-	prefetchw(skb->data);
-
 	if (length <= HNS_RX_HEAD_SIZE) {
 		memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long)));
 
@@ -588,13 +621,7 @@  static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
 		memcpy(__skb_put(skb, pull_len), va,
 		       ALIGN(pull_len, sizeof(long)));
 
-		size = le16_to_cpu(desc->rx.size);
-		truesize = ALIGN(size, L1_CACHE_BYTES);
-		skb_add_rx_frag(skb, 0, desc_cb->priv,
-				desc_cb->page_offset + pull_len,
-				size - pull_len, truesize - pull_len);
-
-		hns_nic_reuse_page(desc_cb, truesize, last_offset);
+		hns_nic_reuse_page(skb, 0, ring, pull_len, desc_cb);
 		ring_ptr_move_fw(ring, next_to_clean);
 
 		if (unlikely(bnum >= (int)MAX_SKB_FRAGS)) { /* check err*/
@@ -604,13 +631,8 @@  static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
 		for (i = 1; i < bnum; i++) {
 			desc = &ring->desc[ring->next_to_clean];
 			desc_cb = &ring->desc_cb[ring->next_to_clean];
-			size = le16_to_cpu(desc->rx.size);
-			truesize = ALIGN(size, L1_CACHE_BYTES);
-			skb_add_rx_frag(skb, i, desc_cb->priv,
-					desc_cb->page_offset,
-					size, truesize);
 
-			hns_nic_reuse_page(desc_cb, truesize, last_offset);
+			hns_nic_reuse_page(skb, i, ring, 0, desc_cb);
 			ring_ptr_move_fw(ring, next_to_clean);
 		}
 	}
@@ -750,9 +772,10 @@  recv:
 	/* make all data has been write before submit */
 	if (recv_pkts < budget) {
 		ex_num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM);
-		rmb(); /*complete read rx ring bd number*/
+
 		if (ex_num > clean_count) {
 			num += ex_num - clean_count;
+			rmb(); /*complete read rx ring bd number*/
 			goto recv;
 		}
 	}
@@ -849,8 +872,11 @@  static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data,
 
 	bytes = 0;
 	pkts = 0;
-	while (head != ring->next_to_clean)
+	while (head != ring->next_to_clean) {
 		hns_nic_reclaim_one_desc(ring, &bytes, &pkts);
+		/* issue prefetch for next Tx descriptor */
+		prefetch(&ring->desc_cb[ring->next_to_clean]);
+	}
 
 	NETIF_TX_UNLOCK(ndev);
 
@@ -926,6 +952,7 @@  static int hns_nic_common_poll(struct napi_struct *napi, int budget)
 			ring_data->ring, 0);
 
 		ring_data->fini_process(ring_data);
+		return 0;
 	}
 
 	return clean_complete;