[SRU,Cosmic,1/1] net/mlx5: WQ, fixes for fragmented WQ buffers API

Message ID fcdc2dd577233f1698f0ef24b3564bacc3e28bd0.1540319757.git.joseph.salisbury@canonical.com
State New
Headers show
Series
  • net/mlx5: WQ, fixes for fragmented WQ buffers API
Related show

Commit Message

Joseph Salisbury Oct. 31, 2018, 6:06 p.m.
From: Tariq Toukan <tariqt@mellanox.com>

BugLink: https://bugs.launchpad.net/bugs/1799393

mlx5e netdevice used to calculate fragment edges by a call to
mlx5_wq_cyc_get_frag_size(). This calculation did not give the correct
indication for queues smaller than a PAGE_SIZE, (broken by default on
PowerPC, where PAGE_SIZE == 64KB).  Here it is replaced by the correct new
calls/API.

Since (TX/RX) Work Queues buffers are fragmented, here we introduce
changes to the API in core driver, so that it gets a stride index and
returns the index of last stride on same fragment, and an additional
wrapping function that returns the number of physically contiguous
strides that can be written contiguously to the work queue.

This obsoletes the following API functions, and their buggy
usage in EN driver:
* mlx5_wq_cyc_get_frag_size()
* mlx5_wq_cyc_ctr2fragix()

The new API improves modularity and hides the details of such
calculation for mlx5e netdevice and mlx5_ib rdma drivers.

New calculation is also more efficient, and improves performance
as follows:

Packet rate test: pktgen, UDP / IPv4, 64byte, single ring, 8K ring size.

Before: 16,477,619 pps
After:  17,085,793 pps

3.7% improvement

Fixes: 3a2f70331226 ("net/mlx5: Use order-0 allocations for all WQ types")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
(cherry picked from commit 37fdffb217a45609edccbb8b407d031143f551c0)
Signed-off-by: Joseph Salisbury <joseph.salisbury@canonical.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c    | 12 +++++-------
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c    | 22 +++++++++++-----------
 .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h  |  5 ++---
 drivers/net/ethernet/mellanox/mlx5/core/wq.c       |  5 -----
 drivers/net/ethernet/mellanox/mlx5/core/wq.h       | 11 +++++------
 include/linux/mlx5/driver.h                        |  8 ++++++++
 6 files changed, 31 insertions(+), 32 deletions(-)

Comments

Stefan Bader Nov. 6, 2018, 11:09 a.m. | #1
On 31.10.18 19:06, Joseph Salisbury wrote:
> From: Tariq Toukan <tariqt@mellanox.com>
> 
> BugLink: https://bugs.launchpad.net/bugs/1799393
> 
> mlx5e netdevice used to calculate fragment edges by a call to
> mlx5_wq_cyc_get_frag_size(). This calculation did not give the correct
> indication for queues smaller than a PAGE_SIZE, (broken by default on
> PowerPC, where PAGE_SIZE == 64KB).  Here it is replaced by the correct new
> calls/API.
> 
> Since (TX/RX) Work Queues buffers are fragmented, here we introduce
> changes to the API in core driver, so that it gets a stride index and
> returns the index of last stride on same fragment, and an additional
> wrapping function that returns the number of physically contiguous
> strides that can be written contiguously to the work queue.
> 
> This obsoletes the following API functions, and their buggy
> usage in EN driver:
> * mlx5_wq_cyc_get_frag_size()
> * mlx5_wq_cyc_ctr2fragix()
> 
> The new API improves modularity and hides the details of such
> calculation for mlx5e netdevice and mlx5_ib rdma drivers.
> 
> New calculation is also more efficient, and improves performance
> as follows:
> 
> Packet rate test: pktgen, UDP / IPv4, 64byte, single ring, 8K ring size.
> 
> Before: 16,477,619 pps
> After:  17,085,793 pps
> 
> 3.7% improvement
> 
> Fixes: 3a2f70331226 ("net/mlx5: Use order-0 allocations for all WQ types")
> Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
> Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
> (cherry picked from commit 37fdffb217a45609edccbb8b407d031143f551c0)
> Signed-off-by: Joseph Salisbury <joseph.salisbury@canonical.com>
Acked-by: Stefan Bader <stefan.bader@canonical.com>
> ---
>  drivers/net/ethernet/mellanox/mlx5/core/en_rx.c    | 12 +++++-------
>  drivers/net/ethernet/mellanox/mlx5/core/en_tx.c    | 22 +++++++++++-----------
>  .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h  |  5 ++---
>  drivers/net/ethernet/mellanox/mlx5/core/wq.c       |  5 -----
>  drivers/net/ethernet/mellanox/mlx5/core/wq.h       | 11 +++++------
>  include/linux/mlx5/driver.h                        |  8 ++++++++
>  6 files changed, 31 insertions(+), 32 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
> index d3a1dd2..549e926 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
> @@ -429,10 +429,9 @@ static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
>  
>  static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
>  					      struct mlx5_wq_cyc *wq,
> -					      u16 pi, u16 frag_pi)
> +					      u16 pi, u16 nnops)
>  {
>  	struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi];
> -	u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
>  
>  	edge_wi = wi + nnops;
>  
> @@ -451,15 +450,14 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
>  	struct mlx5_wq_cyc *wq = &sq->wq;
>  	struct mlx5e_umr_wqe *umr_wqe;
>  	u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
> -	u16 pi, frag_pi;
> +	u16 pi, contig_wqebbs_room;
>  	int err;
>  	int i;
>  
>  	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
> -	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
> -
> -	if (unlikely(frag_pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_frag_size(wq))) {
> -		mlx5e_fill_icosq_frag_edge(sq, wq, pi, frag_pi);
> +	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
> +	if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
> +		mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room);
>  		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
>  	}
>  
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
> index f29deb4..1e774d9 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
> @@ -287,10 +287,9 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
>  
>  static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq,
>  					   struct mlx5_wq_cyc *wq,
> -					   u16 pi, u16 frag_pi)
> +					   u16 pi, u16 nnops)
>  {
>  	struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
> -	u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
>  
>  	edge_wi = wi + nnops;
>  
> @@ -345,8 +344,8 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
>  	struct mlx5e_tx_wqe_info *wi;
>  
>  	struct mlx5e_sq_stats *stats = sq->stats;
> +	u16 headlen, ihs, contig_wqebbs_room;
>  	u16 ds_cnt, ds_cnt_inl = 0;
> -	u16 headlen, ihs, frag_pi;
>  	u8 num_wqebbs, opcode;
>  	u32 num_bytes;
>  	int num_dma;
> @@ -383,9 +382,9 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
>  	}
>  
>  	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
> -	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
> -	if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
> -		mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
> +	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
> +	if (unlikely(contig_wqebbs_room < num_wqebbs)) {
> +		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
>  		mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
>  	}
>  
> @@ -629,7 +628,7 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
>  	struct mlx5e_tx_wqe_info *wi;
>  
>  	struct mlx5e_sq_stats *stats = sq->stats;
> -	u16 headlen, ihs, pi, frag_pi;
> +	u16 headlen, ihs, pi, contig_wqebbs_room;
>  	u16 ds_cnt, ds_cnt_inl = 0;
>  	u8 num_wqebbs, opcode;
>  	u32 num_bytes;
> @@ -665,13 +664,14 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
>  	}
>  
>  	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
> -	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
> -	if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
> +	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
> +	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
> +	if (unlikely(contig_wqebbs_room < num_wqebbs)) {
> +		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
>  		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
> -		mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
>  	}
>  
> -	mlx5i_sq_fetch_wqe(sq, &wqe, &pi);
> +	mlx5i_sq_fetch_wqe(sq, &wqe, pi);
>  
>  	/* fill wqe */
>  	wi       = &sq->db.wqe_info[pi];
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
> index 08eac92..0982c57 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
> @@ -109,12 +109,11 @@ struct mlx5i_tx_wqe {
>  
>  static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq,
>  				      struct mlx5i_tx_wqe **wqe,
> -				      u16 *pi)
> +				      u16 pi)
>  {
>  	struct mlx5_wq_cyc *wq = &sq->wq;
>  
> -	*pi  = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
> -	*wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
> +	*wqe = mlx5_wq_cyc_get_wqe(wq, pi);
>  	memset(*wqe, 0, sizeof(**wqe));
>  }
>  
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
> index d838af9..9046475 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
> @@ -39,11 +39,6 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
>  	return (u32)wq->fbc.sz_m1 + 1;
>  }
>  
> -u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq)
> -{
> -	return wq->fbc.frag_sz_m1 + 1;
> -}
> -
>  u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
>  {
>  	return wq->fbc.sz_m1 + 1;
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
> index 16476cc..3112565 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
> @@ -80,7 +80,6 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
>  		       void *wqc, struct mlx5_wq_cyc *wq,
>  		       struct mlx5_wq_ctrl *wq_ctrl);
>  u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
> -u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq);
>  
>  int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
>  		      void *qpc, struct mlx5_wq_qp *wq,
> @@ -140,11 +139,6 @@ static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
>  	return ctr & wq->fbc.sz_m1;
>  }
>  
> -static inline u16 mlx5_wq_cyc_ctr2fragix(struct mlx5_wq_cyc *wq, u16 ctr)
> -{
> -	return ctr & wq->fbc.frag_sz_m1;
> -}
> -
>  static inline u16 mlx5_wq_cyc_get_head(struct mlx5_wq_cyc *wq)
>  {
>  	return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr);
> @@ -160,6 +154,11 @@ static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix)
>  	return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
>  }
>  
> +static inline u16 mlx5_wq_cyc_get_contig_wqebbs(struct mlx5_wq_cyc *wq, u16 ix)
> +{
> +	return mlx5_frag_buf_get_idx_last_contig_stride(&wq->fbc, ix) - ix + 1;
> +}
> +
>  static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2)
>  {
>  	int equal   = (cc1 == cc2);
> diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
> index 64f4505..b49bfc8 100644
> --- a/include/linux/mlx5/driver.h
> +++ b/include/linux/mlx5/driver.h
> @@ -1022,6 +1022,14 @@ static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
>  		((fbc->frag_sz_m1 & ix) << fbc->log_stride);
>  }
>  
> +static inline u32
> +mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix)
> +{
> +	u32 last_frag_stride_idx = (ix + fbc->strides_offset) | fbc->frag_sz_m1;
> +
> +	return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1);
> +}
> +
>  int mlx5_cmd_init(struct mlx5_core_dev *dev);
>  void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
>  void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
>
Kleber Sacilotto de Souza Nov. 6, 2018, 4:55 p.m. | #2
On 10/31/18 19:06, Joseph Salisbury wrote:
> From: Tariq Toukan <tariqt@mellanox.com>
>
> BugLink: https://bugs.launchpad.net/bugs/1799393
>
> mlx5e netdevice used to calculate fragment edges by a call to
> mlx5_wq_cyc_get_frag_size(). This calculation did not give the correct
> indication for queues smaller than a PAGE_SIZE, (broken by default on
> PowerPC, where PAGE_SIZE == 64KB).  Here it is replaced by the correct new
> calls/API.
>
> Since (TX/RX) Work Queues buffers are fragmented, here we introduce
> changes to the API in core driver, so that it gets a stride index and
> returns the index of last stride on same fragment, and an additional
> wrapping function that returns the number of physically contiguous
> strides that can be written contiguously to the work queue.
>
> This obsoletes the following API functions, and their buggy
> usage in EN driver:
> * mlx5_wq_cyc_get_frag_size()
> * mlx5_wq_cyc_ctr2fragix()
>
> The new API improves modularity and hides the details of such
> calculation for mlx5e netdevice and mlx5_ib rdma drivers.
>
> New calculation is also more efficient, and improves performance
> as follows:
>
> Packet rate test: pktgen, UDP / IPv4, 64byte, single ring, 8K ring size.
>
> Before: 16,477,619 pps
> After:  17,085,793 pps
>
> 3.7% improvement
>
> Fixes: 3a2f70331226 ("net/mlx5: Use order-0 allocations for all WQ types")
> Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
> Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
> (cherry picked from commit 37fdffb217a45609edccbb8b407d031143f551c0)
> Signed-off-by: Joseph Salisbury <joseph.salisbury@canonical.com>

Acked-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>

> ---
>  drivers/net/ethernet/mellanox/mlx5/core/en_rx.c    | 12 +++++-------
>  drivers/net/ethernet/mellanox/mlx5/core/en_tx.c    | 22 +++++++++++-----------
>  .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h  |  5 ++---
>  drivers/net/ethernet/mellanox/mlx5/core/wq.c       |  5 -----
>  drivers/net/ethernet/mellanox/mlx5/core/wq.h       | 11 +++++------
>  include/linux/mlx5/driver.h                        |  8 ++++++++
>  6 files changed, 31 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
> index d3a1dd2..549e926 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
> @@ -429,10 +429,9 @@ static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
>  
>  static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
>  					      struct mlx5_wq_cyc *wq,
> -					      u16 pi, u16 frag_pi)
> +					      u16 pi, u16 nnops)
>  {
>  	struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi];
> -	u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
>  
>  	edge_wi = wi + nnops;
>  
> @@ -451,15 +450,14 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
>  	struct mlx5_wq_cyc *wq = &sq->wq;
>  	struct mlx5e_umr_wqe *umr_wqe;
>  	u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
> -	u16 pi, frag_pi;
> +	u16 pi, contig_wqebbs_room;
>  	int err;
>  	int i;
>  
>  	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
> -	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
> -
> -	if (unlikely(frag_pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_frag_size(wq))) {
> -		mlx5e_fill_icosq_frag_edge(sq, wq, pi, frag_pi);
> +	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
> +	if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
> +		mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room);
>  		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
>  	}
>  
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
> index f29deb4..1e774d9 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
> @@ -287,10 +287,9 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
>  
>  static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq,
>  					   struct mlx5_wq_cyc *wq,
> -					   u16 pi, u16 frag_pi)
> +					   u16 pi, u16 nnops)
>  {
>  	struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
> -	u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
>  
>  	edge_wi = wi + nnops;
>  
> @@ -345,8 +344,8 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
>  	struct mlx5e_tx_wqe_info *wi;
>  
>  	struct mlx5e_sq_stats *stats = sq->stats;
> +	u16 headlen, ihs, contig_wqebbs_room;
>  	u16 ds_cnt, ds_cnt_inl = 0;
> -	u16 headlen, ihs, frag_pi;
>  	u8 num_wqebbs, opcode;
>  	u32 num_bytes;
>  	int num_dma;
> @@ -383,9 +382,9 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
>  	}
>  
>  	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
> -	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
> -	if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
> -		mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
> +	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
> +	if (unlikely(contig_wqebbs_room < num_wqebbs)) {
> +		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
>  		mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
>  	}
>  
> @@ -629,7 +628,7 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
>  	struct mlx5e_tx_wqe_info *wi;
>  
>  	struct mlx5e_sq_stats *stats = sq->stats;
> -	u16 headlen, ihs, pi, frag_pi;
> +	u16 headlen, ihs, pi, contig_wqebbs_room;
>  	u16 ds_cnt, ds_cnt_inl = 0;
>  	u8 num_wqebbs, opcode;
>  	u32 num_bytes;
> @@ -665,13 +664,14 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
>  	}
>  
>  	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
> -	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
> -	if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
> +	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
> +	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
> +	if (unlikely(contig_wqebbs_room < num_wqebbs)) {
> +		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
>  		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
> -		mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
>  	}
>  
> -	mlx5i_sq_fetch_wqe(sq, &wqe, &pi);
> +	mlx5i_sq_fetch_wqe(sq, &wqe, pi);
>  
>  	/* fill wqe */
>  	wi       = &sq->db.wqe_info[pi];
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
> index 08eac92..0982c57 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
> @@ -109,12 +109,11 @@ struct mlx5i_tx_wqe {
>  
>  static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq,
>  				      struct mlx5i_tx_wqe **wqe,
> -				      u16 *pi)
> +				      u16 pi)
>  {
>  	struct mlx5_wq_cyc *wq = &sq->wq;
>  
> -	*pi  = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
> -	*wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
> +	*wqe = mlx5_wq_cyc_get_wqe(wq, pi);
>  	memset(*wqe, 0, sizeof(**wqe));
>  }
>  
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
> index d838af9..9046475 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
> @@ -39,11 +39,6 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
>  	return (u32)wq->fbc.sz_m1 + 1;
>  }
>  
> -u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq)
> -{
> -	return wq->fbc.frag_sz_m1 + 1;
> -}
> -
>  u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
>  {
>  	return wq->fbc.sz_m1 + 1;
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
> index 16476cc..3112565 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
> @@ -80,7 +80,6 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
>  		       void *wqc, struct mlx5_wq_cyc *wq,
>  		       struct mlx5_wq_ctrl *wq_ctrl);
>  u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
> -u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq);
>  
>  int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
>  		      void *qpc, struct mlx5_wq_qp *wq,
> @@ -140,11 +139,6 @@ static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
>  	return ctr & wq->fbc.sz_m1;
>  }
>  
> -static inline u16 mlx5_wq_cyc_ctr2fragix(struct mlx5_wq_cyc *wq, u16 ctr)
> -{
> -	return ctr & wq->fbc.frag_sz_m1;
> -}
> -
>  static inline u16 mlx5_wq_cyc_get_head(struct mlx5_wq_cyc *wq)
>  {
>  	return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr);
> @@ -160,6 +154,11 @@ static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix)
>  	return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
>  }
>  
> +static inline u16 mlx5_wq_cyc_get_contig_wqebbs(struct mlx5_wq_cyc *wq, u16 ix)
> +{
> +	return mlx5_frag_buf_get_idx_last_contig_stride(&wq->fbc, ix) - ix + 1;
> +}
> +
>  static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2)
>  {
>  	int equal   = (cc1 == cc2);
> diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
> index 64f4505..b49bfc8 100644
> --- a/include/linux/mlx5/driver.h
> +++ b/include/linux/mlx5/driver.h
> @@ -1022,6 +1022,14 @@ static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
>  		((fbc->frag_sz_m1 & ix) << fbc->log_stride);
>  }
>  
> +static inline u32
> +mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix)
> +{
> +	u32 last_frag_stride_idx = (ix + fbc->strides_offset) | fbc->frag_sz_m1;
> +
> +	return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1);
> +}
> +
>  int mlx5_cmd_init(struct mlx5_core_dev *dev);
>  void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
>  void mlx5_cmd_use_events(struct mlx5_core_dev *dev);

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index d3a1dd2..549e926 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -429,10 +429,9 @@  static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
 
 static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
 					      struct mlx5_wq_cyc *wq,
-					      u16 pi, u16 frag_pi)
+					      u16 pi, u16 nnops)
 {
 	struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi];
-	u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
 
 	edge_wi = wi + nnops;
 
@@ -451,15 +450,14 @@  static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 	struct mlx5_wq_cyc *wq = &sq->wq;
 	struct mlx5e_umr_wqe *umr_wqe;
 	u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
-	u16 pi, frag_pi;
+	u16 pi, contig_wqebbs_room;
 	int err;
 	int i;
 
 	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
-
-	if (unlikely(frag_pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_frag_size(wq))) {
-		mlx5e_fill_icosq_frag_edge(sq, wq, pi, frag_pi);
+	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+	if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
+		mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room);
 		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index f29deb4..1e774d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -287,10 +287,9 @@  mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 
 static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq,
 					   struct mlx5_wq_cyc *wq,
-					   u16 pi, u16 frag_pi)
+					   u16 pi, u16 nnops)
 {
 	struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
-	u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
 
 	edge_wi = wi + nnops;
 
@@ -345,8 +344,8 @@  netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	struct mlx5e_tx_wqe_info *wi;
 
 	struct mlx5e_sq_stats *stats = sq->stats;
+	u16 headlen, ihs, contig_wqebbs_room;
 	u16 ds_cnt, ds_cnt_inl = 0;
-	u16 headlen, ihs, frag_pi;
 	u8 num_wqebbs, opcode;
 	u32 num_bytes;
 	int num_dma;
@@ -383,9 +382,9 @@  netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	}
 
 	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
-	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
-	if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
-		mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
+	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+	if (unlikely(contig_wqebbs_room < num_wqebbs)) {
+		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
 		mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
 	}
 
@@ -629,7 +628,7 @@  netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	struct mlx5e_tx_wqe_info *wi;
 
 	struct mlx5e_sq_stats *stats = sq->stats;
-	u16 headlen, ihs, pi, frag_pi;
+	u16 headlen, ihs, pi, contig_wqebbs_room;
 	u16 ds_cnt, ds_cnt_inl = 0;
 	u8 num_wqebbs, opcode;
 	u32 num_bytes;
@@ -665,13 +664,14 @@  netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	}
 
 	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
-	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
-	if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
+	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+	if (unlikely(contig_wqebbs_room < num_wqebbs)) {
+		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
 		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-		mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
 	}
 
-	mlx5i_sq_fetch_wqe(sq, &wqe, &pi);
+	mlx5i_sq_fetch_wqe(sq, &wqe, pi);
 
 	/* fill wqe */
 	wi       = &sq->db.wqe_info[pi];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
index 08eac92..0982c57 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
@@ -109,12 +109,11 @@  struct mlx5i_tx_wqe {
 
 static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq,
 				      struct mlx5i_tx_wqe **wqe,
-				      u16 *pi)
+				      u16 pi)
 {
 	struct mlx5_wq_cyc *wq = &sq->wq;
 
-	*pi  = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	*wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
+	*wqe = mlx5_wq_cyc_get_wqe(wq, pi);
 	memset(*wqe, 0, sizeof(**wqe));
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index d838af9..9046475 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -39,11 +39,6 @@  u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
 	return (u32)wq->fbc.sz_m1 + 1;
 }
 
-u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq)
-{
-	return wq->fbc.frag_sz_m1 + 1;
-}
-
 u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
 {
 	return wq->fbc.sz_m1 + 1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index 16476cc..3112565 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -80,7 +80,6 @@  int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		       void *wqc, struct mlx5_wq_cyc *wq,
 		       struct mlx5_wq_ctrl *wq_ctrl);
 u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
-u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq);
 
 int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *qpc, struct mlx5_wq_qp *wq,
@@ -140,11 +139,6 @@  static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
 	return ctr & wq->fbc.sz_m1;
 }
 
-static inline u16 mlx5_wq_cyc_ctr2fragix(struct mlx5_wq_cyc *wq, u16 ctr)
-{
-	return ctr & wq->fbc.frag_sz_m1;
-}
-
 static inline u16 mlx5_wq_cyc_get_head(struct mlx5_wq_cyc *wq)
 {
 	return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr);
@@ -160,6 +154,11 @@  static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix)
 	return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
 }
 
+static inline u16 mlx5_wq_cyc_get_contig_wqebbs(struct mlx5_wq_cyc *wq, u16 ix)
+{
+	return mlx5_frag_buf_get_idx_last_contig_stride(&wq->fbc, ix) - ix + 1;
+}
+
 static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2)
 {
 	int equal   = (cc1 == cc2);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 64f4505..b49bfc8 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1022,6 +1022,14 @@  static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
 		((fbc->frag_sz_m1 & ix) << fbc->log_stride);
 }
 
+static inline u32
+mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix)
+{
+	u32 last_frag_stride_idx = (ix + fbc->strides_offset) | fbc->frag_sz_m1;
+
+	return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1);
+}
+
 int mlx5_cmd_init(struct mlx5_core_dev *dev);
 void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev);