diff mbox series

iavf: use kvzalloc instead of kzalloc for rx/tx_bi buffer

Message ID 1598514788-31039-1-git-send-email-lirongqing@baidu.com
State Awaiting Upstream
Delegated to: David Miller
Headers show
Series iavf: use kvzalloc instead of kzalloc for rx/tx_bi buffer | expand

Commit Message

Li RongQing Aug. 27, 2020, 7:53 a.m. UTC
when changes the rx/tx ring to 4096, kzalloc may fail due to
a temporary shortage on slab entries.

kvmalloc is used to allocate this memory as there is no need
to have this memory area physical continuously.

Signed-off-by: Li RongQing <lirongqing@baidu.com>
---
 drivers/net/ethernet/intel/iavf/iavf_txrx.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

Comments

Eric Dumazet Aug. 27, 2020, 8:25 a.m. UTC | #1
On 8/27/20 12:53 AM, Li RongQing wrote:
> when changes the rx/tx ring to 4096, kzalloc may fail due to
> a temporary shortage on slab entries.
> 
> kvmalloc is used to allocate this memory as there is no need
> to have this memory area physical continuously.
> 
> Signed-off-by: Li RongQing <lirongqing@baidu.com>
> ---


Well, fallback to vmalloc() overhead because order-1 pages are not readily available
when the NIC is setup (usually one time per boot)
is adding TLB cost at run time, for billions of packets to come, maybe for months.

Surely trying a bit harder to get order-1 pages is desirable.

 __GFP_RETRY_MAYFAIL is supposed to help here.
Li RongQing Aug. 27, 2020, 8:53 a.m. UTC | #2
> -----Original Message-----
> From: Eric Dumazet [mailto:eric.dumazet@gmail.com]
> Sent: Thursday, August 27, 2020 4:26 PM
> To: Li,Rongqing <lirongqing@baidu.com>; netdev@vger.kernel.org;
> intel-wired-lan@lists.osuosl.org
> Subject: Re: [PATCH] iavf: use kvzalloc instead of kzalloc for rx/tx_bi buffer
> 
> 
> 
> On 8/27/20 12:53 AM, Li RongQing wrote:
> > when changes the rx/tx ring to 4096, kzalloc may fail due to a
> > temporary shortage on slab entries.
> >
> > kvmalloc is used to allocate this memory as there is no need to have
> > this memory area physical continuously.
> >
> > Signed-off-by: Li RongQing <lirongqing@baidu.com>
> > ---
> 
> 
> Well, fallback to vmalloc() overhead because order-1 pages are not readily
> available when the NIC is setup (usually one time per boot) is adding TLB cost
> at run time, for billions of packets to come, maybe for months.
> 
> Surely trying a bit harder to get order-1 pages is desirable.
> 
>  __GFP_RETRY_MAYFAIL is supposed to help here.

Could we add __GFP_RETRY_MAYFAIL to kvmalloc, to ensure the allocation success ?

> 

I see that lots of drivers are using vmalloc for this buffer, should we change it kmalloc?  

grep "buffer_info =" drivers/net/ethernet/intel/ -rI|grep alloc

drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c:      tx_ring->tx_buffer_info = vmalloc(size);
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c:      rx_ring->rx_buffer_info = vmalloc(size);
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:  tx_ring->tx_buffer_info = vmalloc_node(size, ring_node);
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:          tx_ring->tx_buffer_info = vmalloc(size);
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:  rx_ring->rx_buffer_info = vmalloc_node(size, ring_node);
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:          rx_ring->rx_buffer_info = vmalloc(size);
drivers/net/ethernet/intel/ixgb/ixgb_main.c:    txdr->buffer_info = vzalloc(size);
drivers/net/ethernet/intel/ixgb/ixgb_main.c:    rxdr->buffer_info = vzalloc(size);
drivers/net/ethernet/intel/e1000e/ethtool.c:    tx_ring->buffer_info = kcalloc(tx_ring->count,
drivers/net/ethernet/intel/e1000e/ethtool.c:    rx_ring->buffer_info = kcalloc(rx_ring->count,
drivers/net/ethernet/intel/e1000e/netdev.c:     tx_ring->buffer_info = vzalloc(size);
drivers/net/ethernet/intel/e1000e/netdev.c:     rx_ring->buffer_info = vzalloc(size);
drivers/net/ethernet/intel/igb/igb_main.c:      tx_ring->tx_buffer_info = vmalloc(size);
drivers/net/ethernet/intel/igb/igb_main.c:      rx_ring->rx_buffer_info = vmalloc(size);
drivers/net/ethernet/intel/e1000/e1000_ethtool.c:       txdr->buffer_info = kcalloc(txdr->count, sizeof(struct e1000_tx_buffer),
drivers/net/ethernet/intel/e1000/e1000_ethtool.c:       rxdr->buffer_info = kcalloc(rxdr->count, sizeof(struct e1000_rx_buffer),
drivers/net/ethernet/intel/e1000/e1000_main.c:  txdr->buffer_info = vzalloc(size);
drivers/net/ethernet/intel/e1000/e1000_main.c:  rxdr->buffer_info = vzalloc(size);
drivers/net/ethernet/intel/igc/igc_main.c:      tx_ring->tx_buffer_info = vzalloc(size);
drivers/net/ethernet/intel/igc/igc_main.c:      rx_ring->rx_buffer_info = vzalloc(size);
drivers/net/ethernet/intel/igbvf/netdev.c:      tx_ring->buffer_info = vzalloc(size);
drivers/net/ethernet/intel/igbvf/netdev.c:      rx_ring->buffer_info = vzalloc(size);


-Li
Eric Dumazet Aug. 27, 2020, 9:55 a.m. UTC | #3
On 8/27/20 1:53 AM, Li,Rongqing wrote:
> 
> 
>> -----Original Message-----
>> From: Eric Dumazet [mailto:eric.dumazet@gmail.com]
>> Sent: Thursday, August 27, 2020 4:26 PM
>> To: Li,Rongqing <lirongqing@baidu.com>; netdev@vger.kernel.org;
>> intel-wired-lan@lists.osuosl.org
>> Subject: Re: [PATCH] iavf: use kvzalloc instead of kzalloc for rx/tx_bi buffer
>>
>>
>>
>> On 8/27/20 12:53 AM, Li RongQing wrote:
>>> when changes the rx/tx ring to 4096, kzalloc may fail due to a
>>> temporary shortage on slab entries.
>>>
>>> kvmalloc is used to allocate this memory as there is no need to have
>>> this memory area physical continuously.
>>>
>>> Signed-off-by: Li RongQing <lirongqing@baidu.com>
>>> ---
>>
>>
>> Well, fallback to vmalloc() overhead because order-1 pages are not readily
>> available when the NIC is setup (usually one time per boot) is adding TLB cost
>> at run time, for billions of packets to come, maybe for months.
>>
>> Surely trying a bit harder to get order-1 pages is desirable.
>>
>>  __GFP_RETRY_MAYFAIL is supposed to help here.
> 
> Could we add __GFP_RETRY_MAYFAIL to kvmalloc, to ensure the allocation success ?

__GFP_RETRY_MAYFAIL does not _ensure_ the allocation success.

The idea here is that for large allocations (bigger than PAGE_SIZE),
kvmalloc_node() will not force __GFP_NORETRY, meaning that page allocator
will not bailout immediately in case of memory pressure.

This gives a chance for page reclaims to happen, and eventually the high order page
allocation will succeed under normal circumstances.

It is a trade-off, and only worth it for long living allocations.
Li RongQing Aug. 27, 2020, 10:54 a.m. UTC | #4
> -----Original Message-----
> From: Eric Dumazet [mailto:eric.dumazet@gmail.com]
> Sent: Thursday, August 27, 2020 5:55 PM
> To: Li,Rongqing <lirongqing@baidu.com>; Eric Dumazet
> <eric.dumazet@gmail.com>; netdev@vger.kernel.org;
> intel-wired-lan@lists.osuosl.org
> Subject: Re: [PATCH] iavf: use kvzalloc instead of kzalloc for rx/tx_bi buffer
> 
> 
> 
> On 8/27/20 1:53 AM, Li,Rongqing wrote:
> >
> >
> >> -----Original Message-----
> >> From: Eric Dumazet [mailto:eric.dumazet@gmail.com]
> >> Sent: Thursday, August 27, 2020 4:26 PM
> >> To: Li,Rongqing <lirongqing@baidu.com>; netdev@vger.kernel.org;
> >> intel-wired-lan@lists.osuosl.org
> >> Subject: Re: [PATCH] iavf: use kvzalloc instead of kzalloc for
> >> rx/tx_bi buffer
> >>
> >>
> >>
> >> On 8/27/20 12:53 AM, Li RongQing wrote:
> >>> when changes the rx/tx ring to 4096, kzalloc may fail due to a
> >>> temporary shortage on slab entries.
> >>>
> >>> kvmalloc is used to allocate this memory as there is no need to have
> >>> this memory area physical continuously.
> >>>
> >>> Signed-off-by: Li RongQing <lirongqing@baidu.com>
> >>> ---
> >>
> >>
> >> Well, fallback to vmalloc() overhead because order-1 pages are not
> >> readily available when the NIC is setup (usually one time per boot)
> >> is adding TLB cost at run time, for billions of packets to come, maybe for
> months.
> >>
> >> Surely trying a bit harder to get order-1 pages is desirable.
> >>
> >>  __GFP_RETRY_MAYFAIL is supposed to help here.
> >
> > Could we add __GFP_RETRY_MAYFAIL to kvmalloc, to ensure the allocation
> success ?
> 
> __GFP_RETRY_MAYFAIL does not _ensure_ the allocation success.
> 
> The idea here is that for large allocations (bigger than PAGE_SIZE),
> kvmalloc_node() will not force __GFP_NORETRY, meaning that page allocator
> will not bailout immediately in case of memory pressure.
> 
> This gives a chance for page reclaims to happen, and eventually the high order
> page allocation will succeed under normal circumstances.
> 
> It is a trade-off, and only worth it for long living allocations.

Thanks, Eric; 
I will change it as below, that kmalloc will be used in most time, and ensure allocation success if fail to reclaim memory under memory pressure.

@@ -622,7 +622,7 @@ int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring)
        /* warn if we are about to overwrite the pointer */
        WARN_ON(tx_ring->tx_bi);
        bi_size = sizeof(struct iavf_tx_buffer) * tx_ring->count;
-       tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
+       tx_ring->tx_bi = kvzalloc(bi_size, GFP_KERNEL|__GFP_RETRY_MAYFAIL);
        if (!tx_ring->tx_bi)
                goto err;
 
@@ -643,7 +643,7 @@ int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring)


-Li
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
index 256fa07d54d5..f5a3e195ec54 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -92,7 +92,7 @@  void iavf_clean_tx_ring(struct iavf_ring *tx_ring)
 void iavf_free_tx_resources(struct iavf_ring *tx_ring)
 {
 	iavf_clean_tx_ring(tx_ring);
-	kfree(tx_ring->tx_bi);
+	kvfree(tx_ring->tx_bi);
 	tx_ring->tx_bi = NULL;
 
 	if (tx_ring->desc) {
@@ -622,7 +622,7 @@  int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring)
 	/* warn if we are about to overwrite the pointer */
 	WARN_ON(tx_ring->tx_bi);
 	bi_size = sizeof(struct iavf_tx_buffer) * tx_ring->count;
-	tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
+	tx_ring->tx_bi = kvzalloc(bi_size, GFP_KERNEL);
 	if (!tx_ring->tx_bi)
 		goto err;
 
@@ -643,7 +643,7 @@  int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring)
 	return 0;
 
 err:
-	kfree(tx_ring->tx_bi);
+	kvfree(tx_ring->tx_bi);
 	tx_ring->tx_bi = NULL;
 	return -ENOMEM;
 }
@@ -714,7 +714,7 @@  void iavf_clean_rx_ring(struct iavf_ring *rx_ring)
 void iavf_free_rx_resources(struct iavf_ring *rx_ring)
 {
 	iavf_clean_rx_ring(rx_ring);
-	kfree(rx_ring->rx_bi);
+	kvfree(rx_ring->rx_bi);
 	rx_ring->rx_bi = NULL;
 
 	if (rx_ring->desc) {
@@ -738,7 +738,7 @@  int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring)
 	/* warn if we are about to overwrite the pointer */
 	WARN_ON(rx_ring->rx_bi);
 	bi_size = sizeof(struct iavf_rx_buffer) * rx_ring->count;
-	rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
+	rx_ring->rx_bi = kvzalloc(bi_size, GFP_KERNEL);
 	if (!rx_ring->rx_bi)
 		goto err;
 
@@ -762,7 +762,7 @@  int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring)
 
 	return 0;
 err:
-	kfree(rx_ring->rx_bi);
+	kvfree(rx_ring->rx_bi);
 	rx_ring->rx_bi = NULL;
 	return -ENOMEM;
 }