diff mbox series

[net,v3] i40e: Fix DMA mappings leak

Message ID 20220909083326.344027-1-jan.sokolowski@intel.com
State Changes Requested
Headers show
Series [net,v3] i40e: Fix DMA mappings leak | expand

Commit Message

Jan Sokolowski Sept. 9, 2022, 8:33 a.m. UTC
From: Jan Sokolowski <jan.sokolowski@intel.com>

During reallocation of RX buffers, new DMA mappings are created for
those buffers. New buffers with different RX ring count should
substitute older ones, but those buffers were freed in
i40e_configure_rx_ring and reallocated again with i40e_alloc_rx_bi,
thus kfree on rx_bi caused leak of already mapped DMA.

In case of non XDP ring, do not free rx_bi and reuse already existing
buffer, move kfree to XDP rings only, remove unused i40e_alloc_rx_bi
function.

steps for reproduction:
while :
do
for ((i=0; i<=8160; i=i+32))
do
ethtool -G enp130s0f0 rx $i tx $i
sleep 0.5
ethtool -g enp130s0f0
done
done

v2: Fixed improper kerneldoc that resulted in a warning
v3: Applied commit msg fixes reported during a review

Fixes: be1222b585fd ("i40e: Separate kernel allocated rx_bi rings from AF_XDP rings")
Signed-off-by: Jan Sokolowski <jan.sokolowski@intel.com>
---
 .../net/ethernet/intel/i40e/i40e_ethtool.c    |  3 -
 drivers/net/ethernet/intel/i40e/i40e_main.c   | 13 ++--
 drivers/net/ethernet/intel/i40e/i40e_txrx.c   | 14 ++--
 drivers/net/ethernet/intel/i40e/i40e_txrx.h   |  1 -
 drivers/net/ethernet/intel/i40e/i40e_xsk.c    | 70 ++++++++++++++++---
 drivers/net/ethernet/intel/i40e/i40e_xsk.h    |  2 +-
 6 files changed, 74 insertions(+), 29 deletions(-)

Comments

Tony Nguyen Sept. 12, 2022, 10:42 p.m. UTC | #1
Adding Maciej in case he wants to review.

On 9/9/2022 1:33 AM, Sokolowski, Jan wrote:
> From: Jan Sokolowski <jan.sokolowski@intel.com>
> 
> During reallocation of RX buffers, new DMA mappings are created for
> those buffers. New buffers with different RX ring count should
> substitute older ones, but those buffers were freed in
> i40e_configure_rx_ring and reallocated again with i40e_alloc_rx_bi,
> thus kfree on rx_bi caused leak of already mapped DMA.
> 
> In case of non XDP ring, do not free rx_bi and reuse already existing
> buffer, move kfree to XDP rings only, remove unused i40e_alloc_rx_bi
> function.
> 
> steps for reproduction:
> while :
> do
> for ((i=0; i<=8160; i=i+32))
> do
> ethtool -G enp130s0f0 rx $i tx $i
> sleep 0.5
> ethtool -g enp130s0f0
> done
> done
> 
> v2: Fixed improper kerneldoc that resulted in a warning
> v3: Applied commit msg fixes reported during a review

These should be after the '---'

> 
> Fixes: be1222b585fd ("i40e: Separate kernel allocated rx_bi rings from AF_XDP rings")
> Signed-off-by: Jan Sokolowski <jan.sokolowski@intel.com>
> ---
>   .../net/ethernet/intel/i40e/i40e_ethtool.c    |  3 -
>   drivers/net/ethernet/intel/i40e/i40e_main.c   | 13 ++--
>   drivers/net/ethernet/intel/i40e/i40e_txrx.c   | 14 ++--
>   drivers/net/ethernet/intel/i40e/i40e_txrx.h   |  1 -
>   drivers/net/ethernet/intel/i40e/i40e_xsk.c    | 70 ++++++++++++++++---
>   drivers/net/ethernet/intel/i40e/i40e_xsk.h    |  2 +-
>   6 files changed, 74 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> index e518aaa2c0ca..0f2042f1597c 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> @@ -2181,9 +2181,6 @@ static int i40e_set_ringparam(struct net_device *netdev,
>   			 */
>   			rx_rings[i].tail = hw->hw_addr + I40E_PRTGEN_STATUS;
>   			err = i40e_setup_rx_descriptors(&rx_rings[i]);
> -			if (err)
> -				goto rx_unwind;
> -			err = i40e_alloc_rx_bi(&rx_rings[i]);
>   			if (err)
>   				goto rx_unwind;
>   
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
> index e3d9804aeb25..ad15749a2dd3 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> @@ -3565,12 +3565,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
>   	if (ring->vsi->type == I40E_VSI_MAIN)
>   		xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
>   
> -	kfree(ring->rx_bi);
>   	ring->xsk_pool = i40e_xsk_pool(ring);
>   	if (ring->xsk_pool) {
> -		ret = i40e_alloc_rx_bi_zc(ring);
> -		if (ret)
> -			return ret;
>   		ring->rx_buf_len =
>   		  xsk_pool_get_rx_frame_size(ring->xsk_pool);
>   		/* For AF_XDP ZC, we disallow packets to span on
> @@ -3588,9 +3584,6 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
>   			 ring->queue_index);
>   
>   	} else {
> -		ret = i40e_alloc_rx_bi(ring);
> -		if (ret)
> -			return ret;
>   		ring->rx_buf_len = vsi->rx_buf_len;
>   		if (ring->vsi->type == I40E_VSI_MAIN) {
>   			ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
> @@ -13304,6 +13297,11 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
>   		i40e_reset_and_rebuild(pf, true, true);
>   	}
>   
> +	if (!i40e_enabled_xdp_vsi(vsi) && prog)
> +		i40e_realloc_rx_bi_zc(vsi, true);
> +	else if (i40e_enabled_xdp_vsi(vsi) && !prog)
> +		i40e_realloc_rx_bi_zc(vsi, false);
> +
>   	for (i = 0; i < vsi->num_queue_pairs; i++)
>   		WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
>   
> @@ -13536,6 +13534,7 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair)
>   
>   	i40e_queue_pair_disable_irq(vsi, queue_pair);
>   	err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */);
> +	i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
>   	i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */);
>   	i40e_queue_pair_clean_rings(vsi, queue_pair);
>   	i40e_queue_pair_reset_stats(vsi, queue_pair);
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> index 69e67eb6aea7..c5a7e3819ce2 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> @@ -1457,14 +1457,6 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
>   	return -ENOMEM;
>   }
>   
> -int i40e_alloc_rx_bi(struct i40e_ring *rx_ring)
> -{
> -	unsigned long sz = sizeof(*rx_ring->rx_bi) * rx_ring->count;
> -
> -	rx_ring->rx_bi = kzalloc(sz, GFP_KERNEL);
> -	return rx_ring->rx_bi ? 0 : -ENOMEM;
> -}
> -
>   static void i40e_clear_rx_bi(struct i40e_ring *rx_ring)
>   {
>   	memset(rx_ring->rx_bi, 0, sizeof(*rx_ring->rx_bi) * rx_ring->count);
> @@ -1593,6 +1585,12 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
>   
>   	rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
>   
> +	WARN_ON(rx_ring->rx_bi);
> +	rx_ring->rx_bi =
> +		kcalloc(rx_ring->count, sizeof(*rx_ring->rx_bi), GFP_KERNEL);

This will overwrite, and, presumably, leak rx_ring->rx_bi for non-null 
values.

> +	if (!rx_ring->rx_bi)
> +		return -ENOMEM;
> +
>   	return 0;
>   }
>   
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> index 41f86e9535a0..768290dc6f48 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> @@ -469,7 +469,6 @@ int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
>   bool __i40e_chk_linearize(struct sk_buff *skb);
>   int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
>   		  u32 flags);
> -int i40e_alloc_rx_bi(struct i40e_ring *rx_ring);
>   
>   /**
>    * i40e_get_head - Retrieve head from head writeback
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> index 6d4009e0cbd6..ba33b3b7a2bf 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> @@ -10,14 +10,6 @@
>   #include "i40e_txrx_common.h"
>   #include "i40e_xsk.h"
>   
> -int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring)
> -{
> -	unsigned long sz = sizeof(*rx_ring->rx_bi_zc) * rx_ring->count;
> -
> -	rx_ring->rx_bi_zc = kzalloc(sz, GFP_KERNEL);
> -	return rx_ring->rx_bi_zc ? 0 : -ENOMEM;
> -}
> -
>   void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring)
>   {
>   	memset(rx_ring->rx_bi_zc, 0,
> @@ -29,6 +21,58 @@ static struct xdp_buff **i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
>   	return &rx_ring->rx_bi_zc[idx];
>   }
>   
> +/**
> + * i40e_realloc_rx_xdp_bi - reallocate for either XSK or normal buffer
> + * @rx_ring: Current rx ring
> + * @pool_present: is pool for XSK present
> + *
> + * Try allocating memory and return ENOMEM, if failed to allocate.
> + * If allocation was successful, substitute buffer with allocated one.
> + * Returns 0 on success, negative on failure
> + */
> +static int i40e_realloc_rx_xdp_bi(struct i40e_ring *rx_ring, bool pool_present)
> +{
> +	size_t elem_size = pool_present ? sizeof(*rx_ring->rx_bi_zc) :
> +					  sizeof(*rx_ring->rx_bi);
> +

no newline here

> +	void *sw_ring = kcalloc(rx_ring->count, elem_size, GFP_KERNEL);

newline here.

> +	if (!sw_ring)
> +		return -ENOMEM;
> +
> +	if (pool_present) {
> +		kfree(rx_ring->rx_bi);
> +		rx_ring->rx_bi = NULL;
> +		rx_ring->rx_bi_zc = sw_ring;
> +	} else {
> +		kfree(rx_ring->rx_bi_zc);
> +		rx_ring->rx_bi_zc = NULL;
> +		rx_ring->rx_bi = sw_ring;
> +	}
> +	return 0;
> +}
> +
> +/**
> + * i40e_realloc_rx_bi_zc - reallocate xdp queue pairs
> + * @vsi: Current VSI
> + * @zc: is zero copy set
> + *
> + * Reallocate buffer for rx_rings that might be used by XSK.
> + * XDP requires more memory, than rx_buf provides.
> + * Returns 0 on success, negative on failure
> + */
> +int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc)
> +{
> +	struct i40e_ring *rx_ring;
> +	unsigned long q;
> +
> +	for_each_set_bit(q, vsi->af_xdp_zc_qps, vsi->alloc_queue_pairs) {
> +		rx_ring = vsi->rx_rings[q];
> +		if (i40e_realloc_rx_xdp_bi(rx_ring, zc))
> +			return -ENOMEM;
> +	}
> +	return 0;
> +}
> +
>   /**
>    * i40e_xsk_pool_enable - Enable/associate an AF_XDP buffer pool to a
>    * certain ring/qid
> @@ -69,6 +113,10 @@ static int i40e_xsk_pool_enable(struct i40e_vsi *vsi,
>   		if (err)
>   			return err;
>   
> +		err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], true);
> +		if (err)
> +			return err;
> +
>   		err = i40e_queue_pair_enable(vsi, qid);
>   		if (err)
>   			return err;
> @@ -107,12 +155,16 @@ static int i40e_xsk_pool_disable(struct i40e_vsi *vsi, u16 qid)
>   		err = i40e_queue_pair_disable(vsi, qid);
>   		if (err)
>   			return err;
> +

Why this change?

>   	}
>   
>   	clear_bit(qid, vsi->af_xdp_zc_qps);
>   	xsk_pool_dma_unmap(pool, I40E_RX_DMA_ATTR);
>   
>   	if (if_running) {
> +		err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], false);
> +		if (err)
> +			return err;
>   		err = i40e_queue_pair_enable(vsi, qid);
>   		if (err)
>   			return err;
> @@ -131,7 +183,7 @@ static int i40e_xsk_pool_disable(struct i40e_vsi *vsi, u16 qid)
>    * This function enables or disables a buffer pool to a certain ring.
>    *
>    * Returns 0 on success, <0 on failure
> - **/
> + */

Also, this one. I don't mind this, but seems unrelated to the patch.

>   int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool,
>   			u16 qid)
>   {
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> index bb962987f300..821df248f8be 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> @@ -32,7 +32,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
>   
>   bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
>   int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
> -int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring);
> +int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc);
>   void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);
>   
>   #endif /* _I40E_XSK_H_ */
Maciej Fijalkowski Sept. 13, 2022, 8:49 a.m. UTC | #2
> 
> Adding Maciej in case he wants to review.

Przemek, is it really broken on i40e as well or is this patch a blind port from ice?

> 
> On 9/9/2022 1:33 AM, Sokolowski, Jan wrote:
> > From: Jan Sokolowski <jan.sokolowski@intel.com>
> >
> > During reallocation of RX buffers, new DMA mappings are created for
> > those buffers. New buffers with different RX ring count should
> > substitute older ones, but those buffers were freed in
> > i40e_configure_rx_ring and reallocated again with i40e_alloc_rx_bi,
> > thus kfree on rx_bi caused leak of already mapped DMA.
> >
> > In case of non XDP ring, do not free rx_bi and reuse already existing
> > buffer, move kfree to XDP rings only, remove unused i40e_alloc_rx_bi
> > function.
> >
> > steps for reproduction:
> > while :
> > do
> > for ((i=0; i<=8160; i=i+32))
> > do
> > ethtool -G enp130s0f0 rx $i tx $i
> > sleep 0.5
> > ethtool -g enp130s0f0
> > done
> > done
> >
> > v2: Fixed improper kerneldoc that resulted in a warning
> > v3: Applied commit msg fixes reported during a review
> 
> These should be after the '---'
> 
> >
> > Fixes: be1222b585fd ("i40e: Separate kernel allocated rx_bi rings from AF_XDP
> rings")
> > Signed-off-by: Jan Sokolowski <jan.sokolowski@intel.com>
> > ---
> >   .../net/ethernet/intel/i40e/i40e_ethtool.c    |  3 -
> >   drivers/net/ethernet/intel/i40e/i40e_main.c   | 13 ++--
> >   drivers/net/ethernet/intel/i40e/i40e_txrx.c   | 14 ++--
> >   drivers/net/ethernet/intel/i40e/i40e_txrx.h   |  1 -
> >   drivers/net/ethernet/intel/i40e/i40e_xsk.c    | 70 ++++++++++++++++---
> >   drivers/net/ethernet/intel/i40e/i40e_xsk.h    |  2 +-
> >   6 files changed, 74 insertions(+), 29 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> > index e518aaa2c0ca..0f2042f1597c 100644
> > --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> > +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> > @@ -2181,9 +2181,6 @@ static int i40e_set_ringparam(struct net_device
> *netdev,
> >   			 */
> >   			rx_rings[i].tail = hw->hw_addr + I40E_PRTGEN_STATUS;
> >   			err = i40e_setup_rx_descriptors(&rx_rings[i]);
> > -			if (err)
> > -				goto rx_unwind;
> > -			err = i40e_alloc_rx_bi(&rx_rings[i]);
> >   			if (err)
> >   				goto rx_unwind;
> >
> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c
> b/drivers/net/ethernet/intel/i40e/i40e_main.c
> > index e3d9804aeb25..ad15749a2dd3 100644
> > --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> > +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> > @@ -3565,12 +3565,8 @@ static int i40e_configure_rx_ring(struct i40e_ring
> *ring)
> >   	if (ring->vsi->type == I40E_VSI_MAIN)
> >   		xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
> >
> > -	kfree(ring->rx_bi);
> >   	ring->xsk_pool = i40e_xsk_pool(ring);
> >   	if (ring->xsk_pool) {
> > -		ret = i40e_alloc_rx_bi_zc(ring);
> > -		if (ret)
> > -			return ret;
> >   		ring->rx_buf_len =
> >   		  xsk_pool_get_rx_frame_size(ring->xsk_pool);
> >   		/* For AF_XDP ZC, we disallow packets to span on
> > @@ -3588,9 +3584,6 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
> >   			 ring->queue_index);
> >
> >   	} else {
> > -		ret = i40e_alloc_rx_bi(ring);
> > -		if (ret)
> > -			return ret;
> >   		ring->rx_buf_len = vsi->rx_buf_len;
> >   		if (ring->vsi->type == I40E_VSI_MAIN) {
> >   			ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
> > @@ -13304,6 +13297,11 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct
> bpf_prog *prog,
> >   		i40e_reset_and_rebuild(pf, true, true);
> >   	}
> >
> > +	if (!i40e_enabled_xdp_vsi(vsi) && prog)
> > +		i40e_realloc_rx_bi_zc(vsi, true);
> > +	else if (i40e_enabled_xdp_vsi(vsi) && !prog)
> > +		i40e_realloc_rx_bi_zc(vsi, false);
> > +
> >   	for (i = 0; i < vsi->num_queue_pairs; i++)
> >   		WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
> >
> > @@ -13536,6 +13534,7 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int
> queue_pair)
> >
> >   	i40e_queue_pair_disable_irq(vsi, queue_pair);
> >   	err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */);
> > +	i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
> >   	i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */);
> >   	i40e_queue_pair_clean_rings(vsi, queue_pair);
> >   	i40e_queue_pair_reset_stats(vsi, queue_pair);
> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> > index 69e67eb6aea7..c5a7e3819ce2 100644
> > --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> > +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> > @@ -1457,14 +1457,6 @@ int i40e_setup_tx_descriptors(struct i40e_ring
> *tx_ring)
> >   	return -ENOMEM;
> >   }
> >
> > -int i40e_alloc_rx_bi(struct i40e_ring *rx_ring)
> > -{
> > -	unsigned long sz = sizeof(*rx_ring->rx_bi) * rx_ring->count;
> > -
> > -	rx_ring->rx_bi = kzalloc(sz, GFP_KERNEL);
> > -	return rx_ring->rx_bi ? 0 : -ENOMEM;
> > -}
> > -
> >   static void i40e_clear_rx_bi(struct i40e_ring *rx_ring)
> >   {
> >   	memset(rx_ring->rx_bi, 0, sizeof(*rx_ring->rx_bi) * rx_ring->count);
> > @@ -1593,6 +1585,12 @@ int i40e_setup_rx_descriptors(struct i40e_ring
> *rx_ring)
> >
> >   	rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
> >
> > +	WARN_ON(rx_ring->rx_bi);
> > +	rx_ring->rx_bi =
> > +		kcalloc(rx_ring->count, sizeof(*rx_ring->rx_bi), GFP_KERNEL);
> 
> This will overwrite, and, presumably, leak rx_ring->rx_bi for non-null
> values.
> 
> > +	if (!rx_ring->rx_bi)
> > +		return -ENOMEM;
> > +
> >   	return 0;
> >   }
> >
> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> > index 41f86e9535a0..768290dc6f48 100644
> > --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> > +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> > @@ -469,7 +469,6 @@ int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int
> size);
> >   bool __i40e_chk_linearize(struct sk_buff *skb);
> >   int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
> >   		  u32 flags);
> > -int i40e_alloc_rx_bi(struct i40e_ring *rx_ring);
> >
> >   /**
> >    * i40e_get_head - Retrieve head from head writeback
> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> > index 6d4009e0cbd6..ba33b3b7a2bf 100644
> > --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> > +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> > @@ -10,14 +10,6 @@
> >   #include "i40e_txrx_common.h"
> >   #include "i40e_xsk.h"
> >
> > -int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring)
> > -{
> > -	unsigned long sz = sizeof(*rx_ring->rx_bi_zc) * rx_ring->count;
> > -
> > -	rx_ring->rx_bi_zc = kzalloc(sz, GFP_KERNEL);
> > -	return rx_ring->rx_bi_zc ? 0 : -ENOMEM;
> > -}
> > -
> >   void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring)
> >   {
> >   	memset(rx_ring->rx_bi_zc, 0,
> > @@ -29,6 +21,58 @@ static struct xdp_buff **i40e_rx_bi(struct i40e_ring
> *rx_ring, u32 idx)
> >   	return &rx_ring->rx_bi_zc[idx];
> >   }
> >
> > +/**
> > + * i40e_realloc_rx_xdp_bi - reallocate for either XSK or normal buffer
> > + * @rx_ring: Current rx ring
> > + * @pool_present: is pool for XSK present
> > + *
> > + * Try allocating memory and return ENOMEM, if failed to allocate.
> > + * If allocation was successful, substitute buffer with allocated one.
> > + * Returns 0 on success, negative on failure
> > + */
> > +static int i40e_realloc_rx_xdp_bi(struct i40e_ring *rx_ring, bool pool_present)
> > +{
> > +	size_t elem_size = pool_present ? sizeof(*rx_ring->rx_bi_zc) :
> > +					  sizeof(*rx_ring->rx_bi);
> > +
> 
> no newline here
> 
> > +	void *sw_ring = kcalloc(rx_ring->count, elem_size, GFP_KERNEL);
> 
> newline here.
> 
> > +	if (!sw_ring)
> > +		return -ENOMEM;
> > +
> > +	if (pool_present) {
> > +		kfree(rx_ring->rx_bi);
> > +		rx_ring->rx_bi = NULL;
> > +		rx_ring->rx_bi_zc = sw_ring;
> > +	} else {
> > +		kfree(rx_ring->rx_bi_zc);
> > +		rx_ring->rx_bi_zc = NULL;
> > +		rx_ring->rx_bi = sw_ring;
> > +	}
> > +	return 0;
> > +}
> > +
> > +/**
> > + * i40e_realloc_rx_bi_zc - reallocate xdp queue pairs
> > + * @vsi: Current VSI
> > + * @zc: is zero copy set
> > + *
> > + * Reallocate buffer for rx_rings that might be used by XSK.
> > + * XDP requires more memory, than rx_buf provides.
> > + * Returns 0 on success, negative on failure
> > + */
> > +int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc)
> > +{
> > +	struct i40e_ring *rx_ring;
> > +	unsigned long q;
> > +
> > +	for_each_set_bit(q, vsi->af_xdp_zc_qps, vsi->alloc_queue_pairs) {
> > +		rx_ring = vsi->rx_rings[q];
> > +		if (i40e_realloc_rx_xdp_bi(rx_ring, zc))
> > +			return -ENOMEM;
> > +	}
> > +	return 0;
> > +}
> > +
> >   /**
> >    * i40e_xsk_pool_enable - Enable/associate an AF_XDP buffer pool to a
> >    * certain ring/qid
> > @@ -69,6 +113,10 @@ static int i40e_xsk_pool_enable(struct i40e_vsi *vsi,
> >   		if (err)
> >   			return err;
> >
> > +		err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], true);
> > +		if (err)
> > +			return err;
> > +
> >   		err = i40e_queue_pair_enable(vsi, qid);
> >   		if (err)
> >   			return err;
> > @@ -107,12 +155,16 @@ static int i40e_xsk_pool_disable(struct i40e_vsi *vsi,
> u16 qid)
> >   		err = i40e_queue_pair_disable(vsi, qid);
> >   		if (err)
> >   			return err;
> > +
> 
> Why this change?
> 
> >   	}
> >
> >   	clear_bit(qid, vsi->af_xdp_zc_qps);
> >   	xsk_pool_dma_unmap(pool, I40E_RX_DMA_ATTR);
> >
> >   	if (if_running) {
> > +		err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], false);
> > +		if (err)
> > +			return err;
> >   		err = i40e_queue_pair_enable(vsi, qid);
> >   		if (err)
> >   			return err;
> > @@ -131,7 +183,7 @@ static int i40e_xsk_pool_disable(struct i40e_vsi *vsi, u16
> qid)
> >    * This function enables or disables a buffer pool to a certain ring.
> >    *
> >    * Returns 0 on success, <0 on failure
> > - **/
> > + */
> 
> Also, this one. I don't mind this, but seems unrelated to the patch.
> 
> >   int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool,
> >   			u16 qid)
> >   {
> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> > index bb962987f300..821df248f8be 100644
> > --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> > +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> > @@ -32,7 +32,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int
> budget);
> >
> >   bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
> >   int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
> > -int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring);
> > +int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc);
> >   void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);
> >
> >   #endif /* _I40E_XSK_H_ */
Patynowski, PrzemyslawX Sept. 13, 2022, 9:01 a.m. UTC | #3
> > 
> > Adding Maciej in case he wants to review.
> 
> Przemek, is it really broken on i40e as well or is this patch a blind port from ice?
There was an issue raised by Redhat, which was the same as the one on ice driver.
I'm not sure whether it actually fixes it, as I did not test this patch.
> 
> > 
> > On 9/9/2022 1:33 AM, Sokolowski, Jan wrote:
> > > From: Jan Sokolowski <jan.sokolowski@intel.com>
> > >
> > > During reallocation of RX buffers, new DMA mappings are created for 
> > > those buffers. New buffers with different RX ring count should 
> > > substitute older ones, but those buffers were freed in 
> > > i40e_configure_rx_ring and reallocated again with i40e_alloc_rx_bi, 
> > > thus kfree on rx_bi caused leak of already mapped DMA.
> > >
> > > In case of non XDP ring, do not free rx_bi and reuse already 
> > > existing buffer, move kfree to XDP rings only, remove unused 
> > > i40e_alloc_rx_bi function.
> > >
> > > steps for reproduction:
> > > while :
> > > do
> > > for ((i=0; i<=8160; i=i+32))
> > > do
> > > ethtool -G enp130s0f0 rx $i tx $i
> > > sleep 0.5
> > > ethtool -g enp130s0f0
> > > done
> > > done
> > >
> > > v2: Fixed improper kerneldoc that resulted in a warning
> > > v3: Applied commit msg fixes reported during a review
> > 
> > These should be after the '---'
> > 
> > >
> > > Fixes: be1222b585fd ("i40e: Separate kernel allocated rx_bi rings 
> > > from AF_XDP
> > rings")
> > > Signed-off-by: Jan Sokolowski <jan.sokolowski@intel.com>
> > > ---
> > >   .../net/ethernet/intel/i40e/i40e_ethtool.c    |  3 -
> > >   drivers/net/ethernet/intel/i40e/i40e_main.c   | 13 ++--
> > >   drivers/net/ethernet/intel/i40e/i40e_txrx.c   | 14 ++--
> > >   drivers/net/ethernet/intel/i40e/i40e_txrx.h   |  1 -
> > >   drivers/net/ethernet/intel/i40e/i40e_xsk.c    | 70 ++++++++++++++++---
> > >   drivers/net/ethernet/intel/i40e/i40e_xsk.h    |  2 +-
> > >   6 files changed, 74 insertions(+), 29 deletions(-)
> > >
> > > diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> > b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> > > index e518aaa2c0ca..0f2042f1597c 100644
> > > --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> > > +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> > > @@ -2181,9 +2181,6 @@ static int i40e_set_ringparam(struct 
> > > net_device
> > *netdev,
> > >   			 */
> > >   			rx_rings[i].tail = hw->hw_addr + I40E_PRTGEN_STATUS;
> > >   			err = i40e_setup_rx_descriptors(&rx_rings[i]);
> > > -			if (err)
> > > -				goto rx_unwind;
> > > -			err = i40e_alloc_rx_bi(&rx_rings[i]);
> > >   			if (err)
> > >   				goto rx_unwind;
> > >
> > > diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c
> > b/drivers/net/ethernet/intel/i40e/i40e_main.c
> > > index e3d9804aeb25..ad15749a2dd3 100644
> > > --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> > > +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> > > @@ -3565,12 +3565,8 @@ static int i40e_configure_rx_ring(struct 
> > > i40e_ring
> > *ring)
> > >   	if (ring->vsi->type == I40E_VSI_MAIN)
> > >   		xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
> > >
> > > -	kfree(ring->rx_bi);
> > >   	ring->xsk_pool = i40e_xsk_pool(ring);
> > >   	if (ring->xsk_pool) {
> > > -		ret = i40e_alloc_rx_bi_zc(ring);
> > > -		if (ret)
> > > -			return ret;
> > >   		ring->rx_buf_len =
> > >   		  xsk_pool_get_rx_frame_size(ring->xsk_pool);
> > >   		/* For AF_XDP ZC, we disallow packets to span on @@ -3588,9 
> > > +3584,6 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
> > >   			 ring->queue_index);
> > >
> > >   	} else {
> > > -		ret = i40e_alloc_rx_bi(ring);
> > > -		if (ret)
> > > -			return ret;
> > >   		ring->rx_buf_len = vsi->rx_buf_len;
> > >   		if (ring->vsi->type == I40E_VSI_MAIN) {
> > >   			ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
> > > @@ -13304,6 +13297,11 @@ static int i40e_xdp_setup(struct i40e_vsi 
> > > *vsi, struct
> > bpf_prog *prog,
> > >   		i40e_reset_and_rebuild(pf, true, true);
> > >   	}
> > >
> > > +	if (!i40e_enabled_xdp_vsi(vsi) && prog)
> > > +		i40e_realloc_rx_bi_zc(vsi, true);
> > > +	else if (i40e_enabled_xdp_vsi(vsi) && !prog)
> > > +		i40e_realloc_rx_bi_zc(vsi, false);
> > > +
> > >   	for (i = 0; i < vsi->num_queue_pairs; i++)
> > >   		WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
> > >
> > > @@ -13536,6 +13534,7 @@ int i40e_queue_pair_disable(struct i40e_vsi 
> > > *vsi, int
> > queue_pair)
> > >
> > >   	i40e_queue_pair_disable_irq(vsi, queue_pair);
> > >   	err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off 
> > > */);
> > > +	i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
> > >   	i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */);
> > >   	i40e_queue_pair_clean_rings(vsi, queue_pair);
> > >   	i40e_queue_pair_reset_stats(vsi, queue_pair); diff --git 
> > > a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> > b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> > > index 69e67eb6aea7..c5a7e3819ce2 100644
> > > --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> > > +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> > > @@ -1457,14 +1457,6 @@ int i40e_setup_tx_descriptors(struct 
> > > i40e_ring
> > *tx_ring)
> > >   	return -ENOMEM;
> > >   }
> > >
> > > -int i40e_alloc_rx_bi(struct i40e_ring *rx_ring) -{
> > > -	unsigned long sz = sizeof(*rx_ring->rx_bi) * rx_ring->count;
> > > -
> > > -	rx_ring->rx_bi = kzalloc(sz, GFP_KERNEL);
> > > -	return rx_ring->rx_bi ? 0 : -ENOMEM;
> > > -}
> > > -
> > >   static void i40e_clear_rx_bi(struct i40e_ring *rx_ring)
> > >   {
> > >   	memset(rx_ring->rx_bi, 0, sizeof(*rx_ring->rx_bi) * 
> > > rx_ring->count); @@ -1593,6 +1585,12 @@ int 
> > > i40e_setup_rx_descriptors(struct i40e_ring
> > *rx_ring)
> > >
> > >   	rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
> > >
> > > +	WARN_ON(rx_ring->rx_bi);
> > > +	rx_ring->rx_bi =
> > > +		kcalloc(rx_ring->count, sizeof(*rx_ring->rx_bi), GFP_KERNEL);
> > 
> > This will overwrite, and, presumably, leak rx_ring->rx_bi for non-null 
> > values.
> > 
> > > +	if (!rx_ring->rx_bi)
> > > +		return -ENOMEM;
> > > +
> > >   	return 0;
> > >   }
> > >
> > > diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> > b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> > > index 41f86e9535a0..768290dc6f48 100644
> > > --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> > > +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> > > @@ -469,7 +469,6 @@ int __i40e_maybe_stop_tx(struct i40e_ring 
> > > *tx_ring, int
> > size);
> > >   bool __i40e_chk_linearize(struct sk_buff *skb);
> > >   int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
> > >   		  u32 flags);
> > > -int i40e_alloc_rx_bi(struct i40e_ring *rx_ring);
> > >
> > >   /**
> > >    * i40e_get_head - Retrieve head from head writeback diff --git 
> > > a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> > b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> > > index 6d4009e0cbd6..ba33b3b7a2bf 100644
> > > --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> > > +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> > > @@ -10,14 +10,6 @@
> > >   #include "i40e_txrx_common.h"
> > >   #include "i40e_xsk.h"
> > >
> > > -int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring) -{
> > > -	unsigned long sz = sizeof(*rx_ring->rx_bi_zc) * rx_ring->count;
> > > -
> > > -	rx_ring->rx_bi_zc = kzalloc(sz, GFP_KERNEL);
> > > -	return rx_ring->rx_bi_zc ? 0 : -ENOMEM;
> > > -}
> > > -
> > >   void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring)
> > >   {
> > >   	memset(rx_ring->rx_bi_zc, 0,
> > > @@ -29,6 +21,58 @@ static struct xdp_buff **i40e_rx_bi(struct 
> > > i40e_ring
> > *rx_ring, u32 idx)
> > >   	return &rx_ring->rx_bi_zc[idx];
> > >   }
> > >
> > > +/**
> > > + * i40e_realloc_rx_xdp_bi - reallocate for either XSK or normal 
> > > +buffer
> > > + * @rx_ring: Current rx ring
> > > + * @pool_present: is pool for XSK present
> > > + *
> > > + * Try allocating memory and return ENOMEM, if failed to allocate.
> > > + * If allocation was successful, substitute buffer with allocated one.
> > > + * Returns 0 on success, negative on failure  */ static int 
> > > +i40e_realloc_rx_xdp_bi(struct i40e_ring *rx_ring, bool 
> > > +pool_present) {
> > > +	size_t elem_size = pool_present ? sizeof(*rx_ring->rx_bi_zc) :
> > > +					  sizeof(*rx_ring->rx_bi);
> > > +
> > 
> > no newline here
> > 
> > > +	void *sw_ring = kcalloc(rx_ring->count, elem_size, GFP_KERNEL);
> > 
> > newline here.
> > 
> > > +	if (!sw_ring)
> > > +		return -ENOMEM;
> > > +
> > > +	if (pool_present) {
> > > +		kfree(rx_ring->rx_bi);
> > > +		rx_ring->rx_bi = NULL;
> > > +		rx_ring->rx_bi_zc = sw_ring;
> > > +	} else {
> > > +		kfree(rx_ring->rx_bi_zc);
> > > +		rx_ring->rx_bi_zc = NULL;
> > > +		rx_ring->rx_bi = sw_ring;
> > > +	}
> > > +	return 0;
> > > +}
> > > +
> > > +/**
> > > + * i40e_realloc_rx_bi_zc - reallocate xdp queue pairs
> > > + * @vsi: Current VSI
> > > + * @zc: is zero copy set
> > > + *
> > > + * Reallocate buffer for rx_rings that might be used by XSK.
> > > + * XDP requires more memory, than rx_buf provides.
> > > + * Returns 0 on success, negative on failure  */ int 
> > > +i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc) {
> > > +	struct i40e_ring *rx_ring;
> > > +	unsigned long q;
> > > +
> > > +	for_each_set_bit(q, vsi->af_xdp_zc_qps, vsi->alloc_queue_pairs) {
> > > +		rx_ring = vsi->rx_rings[q];
> > > +		if (i40e_realloc_rx_xdp_bi(rx_ring, zc))
> > > +			return -ENOMEM;
> > > +	}
> > > +	return 0;
> > > +}
> > > +
> > >   /**
> > >    * i40e_xsk_pool_enable - Enable/associate an AF_XDP buffer pool to a
> > >    * certain ring/qid
> > > @@ -69,6 +113,10 @@ static int i40e_xsk_pool_enable(struct i40e_vsi *vsi,
> > >   		if (err)
> > >   			return err;
> > >
> > > +		err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], true);
> > > +		if (err)
> > > +			return err;
> > > +
> > >   		err = i40e_queue_pair_enable(vsi, qid);
> > >   		if (err)
> > >   			return err;
> > > @@ -107,12 +155,16 @@ static int i40e_xsk_pool_disable(struct 
> > > i40e_vsi *vsi,
> > u16 qid)
> > >   		err = i40e_queue_pair_disable(vsi, qid);
> > >   		if (err)
> > >   			return err;
> > > +
> > 
> > Why this change?
> > 
> > >   	}
> > >
> > >   	clear_bit(qid, vsi->af_xdp_zc_qps);
> > >   	xsk_pool_dma_unmap(pool, I40E_RX_DMA_ATTR);
> > >
> > >   	if (if_running) {
> > > +		err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], false);
> > > +		if (err)
> > > +			return err;
> > >   		err = i40e_queue_pair_enable(vsi, qid);
> > >   		if (err)
> > >   			return err;
> > > @@ -131,7 +183,7 @@ static int i40e_xsk_pool_disable(struct i40e_vsi 
> > > *vsi, u16
> > qid)
> > >    * This function enables or disables a buffer pool to a certain ring.
> > >    *
> > >    * Returns 0 on success, <0 on failure
> > > - **/
> > > + */
> > 
> > Also, this one. I don't mind this, but seems unrelated to the patch.
> > 
> > >   int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool,
> > >   			u16 qid)
> > >   {
> > > diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> > b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> > > index bb962987f300..821df248f8be 100644
> > > --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> > > +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> > > @@ -32,7 +32,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring 
> > > *rx_ring, int
> > budget);
> > >
> > >   bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
> > >   int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 
> > > flags); -int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring);
> > > +int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc);
> > >   void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);
> > >
> > >   #endif /* _I40E_XSK_H_ */
Rout, ChandanX Sept. 26, 2022, 12:47 p.m. UTC | #4
>-----Original Message-----
>From: Kuruvinakunnel, George <george.kuruvinakunnel@intel.com>
>Sent: Monday, September 26, 2022 6:02 PM
>To: Rout, ChandanX <chandanx.rout@intel.com>
>Subject: FW: [Intel-wired-lan] [PATCH net v3] i40e: Fix DMA mappings leak
>
>FYI:
>
>George
>
>++++++++++++++++++++++++++++++++++++++++++++++++
>
>-----Original Message-----
>From: Intel-wired-lan <intel-wired-lan-bounces@osuosl.org> On Behalf Of
>Sokolowski, Jan
>Sent: Friday, September 9, 2022 2:03 PM
>To: intel-wired-lan@lists.osuosl.org
>Subject: [Intel-wired-lan] [PATCH net v3] i40e: Fix DMA mappings leak
>
>From: Jan Sokolowski <jan.sokolowski@intel.com>
>
>During reallocation of RX buffers, new DMA mappings are created for those
>buffers. New buffers with different RX ring count should substitute older
>ones, but those buffers were freed in i40e_configure_rx_ring and reallocated
>again with i40e_alloc_rx_bi, thus kfree on rx_bi caused leak of already
>mapped DMA.
>
>In case of non XDP ring, do not free rx_bi and reuse already existing buffer,
>move kfree to XDP rings only, remove unused i40e_alloc_rx_bi function.
>
>steps for reproduction:
>while :
>do
>for ((i=0; i<=8160; i=i+32))
>do
>ethtool -G enp130s0f0 rx $i tx $i
>sleep 0.5
>ethtool -g enp130s0f0
>done
>done
>
>v2: Fixed improper kerneldoc that resulted in a warning
>v3: Applied commit msg fixes reported during a review
>
>Fixes: be1222b585fd ("i40e: Separate kernel allocated rx_bi rings from AF_XDP
>rings")
>Signed-off-by: Jan Sokolowski <jan.sokolowski@intel.com>
>---
> .../net/ethernet/intel/i40e/i40e_ethtool.c    |  3 -
> drivers/net/ethernet/intel/i40e/i40e_main.c   | 13 ++--
> drivers/net/ethernet/intel/i40e/i40e_txrx.c   | 14 ++--
> drivers/net/ethernet/intel/i40e/i40e_txrx.h   |  1 -
> drivers/net/ethernet/intel/i40e/i40e_xsk.c    | 70 ++++++++++++++++---
> drivers/net/ethernet/intel/i40e/i40e_xsk.h    |  2 +-
> 6 files changed, 74 insertions(+), 29 deletions(-)

Tested-by: Chandan <chandanx.rout@intel.com> (A Contingent Worker at Intel)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index e518aaa2c0ca..0f2042f1597c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -2181,9 +2181,6 @@  static int i40e_set_ringparam(struct net_device *netdev,
 			 */
 			rx_rings[i].tail = hw->hw_addr + I40E_PRTGEN_STATUS;
 			err = i40e_setup_rx_descriptors(&rx_rings[i]);
-			if (err)
-				goto rx_unwind;
-			err = i40e_alloc_rx_bi(&rx_rings[i]);
 			if (err)
 				goto rx_unwind;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index e3d9804aeb25..ad15749a2dd3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3565,12 +3565,8 @@  static int i40e_configure_rx_ring(struct i40e_ring *ring)
 	if (ring->vsi->type == I40E_VSI_MAIN)
 		xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
 
-	kfree(ring->rx_bi);
 	ring->xsk_pool = i40e_xsk_pool(ring);
 	if (ring->xsk_pool) {
-		ret = i40e_alloc_rx_bi_zc(ring);
-		if (ret)
-			return ret;
 		ring->rx_buf_len =
 		  xsk_pool_get_rx_frame_size(ring->xsk_pool);
 		/* For AF_XDP ZC, we disallow packets to span on
@@ -3588,9 +3584,6 @@  static int i40e_configure_rx_ring(struct i40e_ring *ring)
 			 ring->queue_index);
 
 	} else {
-		ret = i40e_alloc_rx_bi(ring);
-		if (ret)
-			return ret;
 		ring->rx_buf_len = vsi->rx_buf_len;
 		if (ring->vsi->type == I40E_VSI_MAIN) {
 			ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
@@ -13304,6 +13297,11 @@  static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
 		i40e_reset_and_rebuild(pf, true, true);
 	}
 
+	if (!i40e_enabled_xdp_vsi(vsi) && prog)
+		i40e_realloc_rx_bi_zc(vsi, true);
+	else if (i40e_enabled_xdp_vsi(vsi) && !prog)
+		i40e_realloc_rx_bi_zc(vsi, false);
+
 	for (i = 0; i < vsi->num_queue_pairs; i++)
 		WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
 
@@ -13536,6 +13534,7 @@  int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair)
 
 	i40e_queue_pair_disable_irq(vsi, queue_pair);
 	err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */);
+	i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
 	i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */);
 	i40e_queue_pair_clean_rings(vsi, queue_pair);
 	i40e_queue_pair_reset_stats(vsi, queue_pair);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 69e67eb6aea7..c5a7e3819ce2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1457,14 +1457,6 @@  int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
 	return -ENOMEM;
 }
 
-int i40e_alloc_rx_bi(struct i40e_ring *rx_ring)
-{
-	unsigned long sz = sizeof(*rx_ring->rx_bi) * rx_ring->count;
-
-	rx_ring->rx_bi = kzalloc(sz, GFP_KERNEL);
-	return rx_ring->rx_bi ? 0 : -ENOMEM;
-}
-
 static void i40e_clear_rx_bi(struct i40e_ring *rx_ring)
 {
 	memset(rx_ring->rx_bi, 0, sizeof(*rx_ring->rx_bi) * rx_ring->count);
@@ -1593,6 +1585,12 @@  int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
 
 	rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
 
+	WARN_ON(rx_ring->rx_bi);
+	rx_ring->rx_bi =
+		kcalloc(rx_ring->count, sizeof(*rx_ring->rx_bi), GFP_KERNEL);
+	if (!rx_ring->rx_bi)
+		return -ENOMEM;
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 41f86e9535a0..768290dc6f48 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -469,7 +469,6 @@  int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
 bool __i40e_chk_linearize(struct sk_buff *skb);
 int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 		  u32 flags);
-int i40e_alloc_rx_bi(struct i40e_ring *rx_ring);
 
 /**
  * i40e_get_head - Retrieve head from head writeback
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 6d4009e0cbd6..ba33b3b7a2bf 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -10,14 +10,6 @@ 
 #include "i40e_txrx_common.h"
 #include "i40e_xsk.h"
 
-int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring)
-{
-	unsigned long sz = sizeof(*rx_ring->rx_bi_zc) * rx_ring->count;
-
-	rx_ring->rx_bi_zc = kzalloc(sz, GFP_KERNEL);
-	return rx_ring->rx_bi_zc ? 0 : -ENOMEM;
-}
-
 void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring)
 {
 	memset(rx_ring->rx_bi_zc, 0,
@@ -29,6 +21,58 @@  static struct xdp_buff **i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
 	return &rx_ring->rx_bi_zc[idx];
 }
 
+/**
+ * i40e_realloc_rx_xdp_bi - reallocate for either XSK or normal buffer
+ * @rx_ring: Current rx ring
+ * @pool_present: is pool for XSK present
+ *
+ * Try allocating memory and return ENOMEM, if failed to allocate.
+ * If allocation was successful, substitute buffer with allocated one.
+ * Returns 0 on success, negative on failure
+ */
+static int i40e_realloc_rx_xdp_bi(struct i40e_ring *rx_ring, bool pool_present)
+{
+	size_t elem_size = pool_present ? sizeof(*rx_ring->rx_bi_zc) :
+					  sizeof(*rx_ring->rx_bi);
+
+	void *sw_ring = kcalloc(rx_ring->count, elem_size, GFP_KERNEL);
+	if (!sw_ring)
+		return -ENOMEM;
+
+	if (pool_present) {
+		kfree(rx_ring->rx_bi);
+		rx_ring->rx_bi = NULL;
+		rx_ring->rx_bi_zc = sw_ring;
+	} else {
+		kfree(rx_ring->rx_bi_zc);
+		rx_ring->rx_bi_zc = NULL;
+		rx_ring->rx_bi = sw_ring;
+	}
+	return 0;
+}
+
+/**
+ * i40e_realloc_rx_bi_zc - reallocate xdp queue pairs
+ * @vsi: Current VSI
+ * @zc: is zero copy set
+ *
+ * Reallocate buffer for rx_rings that might be used by XSK.
+ * XDP requires more memory, than rx_buf provides.
+ * Returns 0 on success, negative on failure
+ */
+int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc)
+{
+	struct i40e_ring *rx_ring;
+	unsigned long q;
+
+	for_each_set_bit(q, vsi->af_xdp_zc_qps, vsi->alloc_queue_pairs) {
+		rx_ring = vsi->rx_rings[q];
+		if (i40e_realloc_rx_xdp_bi(rx_ring, zc))
+			return -ENOMEM;
+	}
+	return 0;
+}
+
 /**
  * i40e_xsk_pool_enable - Enable/associate an AF_XDP buffer pool to a
  * certain ring/qid
@@ -69,6 +113,10 @@  static int i40e_xsk_pool_enable(struct i40e_vsi *vsi,
 		if (err)
 			return err;
 
+		err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], true);
+		if (err)
+			return err;
+
 		err = i40e_queue_pair_enable(vsi, qid);
 		if (err)
 			return err;
@@ -107,12 +155,16 @@  static int i40e_xsk_pool_disable(struct i40e_vsi *vsi, u16 qid)
 		err = i40e_queue_pair_disable(vsi, qid);
 		if (err)
 			return err;
+
 	}
 
 	clear_bit(qid, vsi->af_xdp_zc_qps);
 	xsk_pool_dma_unmap(pool, I40E_RX_DMA_ATTR);
 
 	if (if_running) {
+		err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], false);
+		if (err)
+			return err;
 		err = i40e_queue_pair_enable(vsi, qid);
 		if (err)
 			return err;
@@ -131,7 +183,7 @@  static int i40e_xsk_pool_disable(struct i40e_vsi *vsi, u16 qid)
  * This function enables or disables a buffer pool to a certain ring.
  *
  * Returns 0 on success, <0 on failure
- **/
+ */
 int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool,
 			u16 qid)
 {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
index bb962987f300..821df248f8be 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
@@ -32,7 +32,7 @@  int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
 
 bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
 int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
-int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring);
+int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc);
 void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);
 
 #endif /* _I40E_XSK_H_ */