diff mbox series

[net-next,14/17] net/mlx5e: RX, Use cyclic WQ in legacy RQ

Message ID 20180602000544.18717-15-saeedm@mellanox.com
State Accepted, archived
Delegated to: David Miller
Headers show
Series [net-next,01/17] net/mlx5e: IPOIB, Fix overflowing SQ WQE memset | expand

Commit Message

Saeed Mahameed June 2, 2018, 12:05 a.m. UTC
From: Tariq Toukan <tariqt@mellanox.com>

Now that LRO is not supported for Legacy RQ, there is no source of
out-of-order completions in the WQ, and we can use a cyclic one.
This has multiple advantages:
- reduces the WQE size (smaller PCI transactions).
- lower overhead in datapath (no handling of 'next' pointers).
- no reserved WQE for the WQ head (was need in linked-list).
- allows using a constant map between frag and dma_info struct, in downstream patch.

Performance tests:
ConnectX-4, single core, single RX ring.
Major gain in packet rate of single ring XDP drop.
Bottleneck is shifted form HW (at 16Mpps) to SW (at 20Mpps).

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  10 +-
 .../net/ethernet/mellanox/mlx5/core/en_main.c |  89 ++++++++------
 .../net/ethernet/mellanox/mlx5/core/en_rep.c  |   2 +-
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 115 ++++++++----------
 drivers/net/ethernet/mellanox/mlx5/core/wq.c  |   1 +
 drivers/net/ethernet/mellanox/mlx5/core/wq.h  |  55 ++++++++-
 6 files changed, 161 insertions(+), 111 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index f2f2dcf6b23c..af521dd52993 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -186,9 +186,13 @@  struct mlx5e_tx_wqe {
 	struct mlx5_wqe_data_seg data[0];
 };
 
-struct mlx5e_rx_wqe {
+struct mlx5e_rx_wqe_ll {
 	struct mlx5_wqe_srq_next_seg  next;
-	struct mlx5_wqe_data_seg      data;
+	struct mlx5_wqe_data_seg      data[0];
+};
+
+struct mlx5e_rx_wqe_cyc {
+	struct mlx5_wqe_data_seg      data[0];
 };
 
 struct mlx5e_umr_wqe {
@@ -500,7 +504,7 @@  struct mlx5e_rq {
 	/* data path */
 	union {
 		struct {
-			struct mlx5_wq_ll      wq;
+			struct mlx5_wq_cyc     wq;
 			struct mlx5e_wqe_frag_info *frag_info;
 			u32 frag_sz;	/* max possible skb frag_sz */
 			union {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 3a007717cba5..7fd2d736fbb1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -166,7 +166,7 @@  static u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
 
 	linear_rq_headroom += NET_IP_ALIGN;
 
-	if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST)
+	if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC)
 		return linear_rq_headroom;
 
 	if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
@@ -205,7 +205,7 @@  void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
 	params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) &&
 		MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ?
 		MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
-		MLX5_WQ_TYPE_LINKED_LIST;
+		MLX5_WQ_TYPE_CYCLIC;
 }
 
 static void mlx5e_update_carrier(struct mlx5e_priv *priv)
@@ -325,7 +325,7 @@  static u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq)
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 		return mlx5_wq_ll_get_size(&rq->mpwqe.wq);
 	default:
-		return mlx5_wq_ll_get_size(&rq->wqe.wq);
+		return mlx5_wq_cyc_get_size(&rq->wqe.wq);
 	}
 }
 
@@ -491,15 +491,15 @@  static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		if (err)
 			goto err_destroy_umr_mkey;
 		break;
-	default: /* MLX5_WQ_TYPE_LINKED_LIST */
-		err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq,
-					&rq->wq_ctrl);
+	default: /* MLX5_WQ_TYPE_CYCLIC */
+		err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq,
+					 &rq->wq_ctrl);
 		if (err)
 			return err;
 
 		rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR];
 
-		wq_sz = mlx5_wq_ll_get_size(&rq->wqe.wq);
+		wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq);
 
 		rq->wqe.frag_info =
 			kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info),
@@ -568,19 +568,19 @@  static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 
 	for (i = 0; i < wq_sz; i++) {
 		if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
-			struct mlx5e_rx_wqe *wqe =
+			struct mlx5e_rx_wqe_ll *wqe =
 				mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i);
 			u64 dma_offset = mlx5e_get_mpwqe_offset(rq, i);
 
-			wqe->data.addr = cpu_to_be64(dma_offset + rq->buff.headroom);
-			wqe->data.byte_count = cpu_to_be32(byte_count);
-			wqe->data.lkey = rq->mkey_be;
+			wqe->data[0].addr = cpu_to_be64(dma_offset + rq->buff.headroom);
+			wqe->data[0].byte_count = cpu_to_be32(byte_count);
+			wqe->data[0].lkey = rq->mkey_be;
 		} else {
-			struct mlx5e_rx_wqe *wqe =
-				mlx5_wq_ll_get_wqe(&rq->wqe.wq, i);
+			struct mlx5e_rx_wqe_cyc *wqe =
+				mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i);
 
-			wqe->data.byte_count = cpu_to_be32(byte_count);
-			wqe->data.lkey = rq->mkey_be;
+			wqe->data[0].byte_count = cpu_to_be32(byte_count);
+			wqe->data[0].lkey = rq->mkey_be;
 		}
 	}
 
@@ -630,7 +630,7 @@  static void mlx5e_free_rq(struct mlx5e_rq *rq)
 		kfree(rq->mpwqe.info);
 		mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey);
 		break;
-	default: /* MLX5_WQ_TYPE_LINKED_LIST */
+	default: /* MLX5_WQ_TYPE_CYCLIC */
 		kfree(rq->wqe.frag_info);
 	}
 
@@ -801,11 +801,12 @@  static void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
 	if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
 		struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
 
+		/* UMR WQE (if in progress) is always at wq->head */
 		if (rq->mpwqe.umr_in_progress)
 			mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]);
 
 		while (!mlx5_wq_ll_is_empty(wq)) {
-			struct mlx5e_rx_wqe *wqe;
+			struct mlx5e_rx_wqe_ll *wqe;
 
 			wqe_ix_be = *wq->tail_next;
 			wqe_ix    = be16_to_cpu(wqe_ix_be);
@@ -815,24 +816,19 @@  static void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
 				       &wqe->next.next_wqe_index);
 		}
 	} else {
-		struct mlx5_wq_ll *wq = &rq->wqe.wq;
-
-		while (!mlx5_wq_ll_is_empty(wq)) {
-			struct mlx5e_rx_wqe *wqe;
+		struct mlx5_wq_cyc *wq = &rq->wqe.wq;
 
-			wqe_ix_be = *wq->tail_next;
-			wqe_ix    = be16_to_cpu(wqe_ix_be);
-			wqe       = mlx5_wq_ll_get_wqe(wq, wqe_ix);
+		while (!mlx5_wq_cyc_is_empty(wq)) {
+			wqe_ix = mlx5_wq_cyc_get_tail(wq);
 			rq->dealloc_wqe(rq, wqe_ix);
-			mlx5_wq_ll_pop(wq, wqe_ix_be,
-				       &wqe->next.next_wqe_index);
+			mlx5_wq_cyc_pop(wq);
 		}
 
 		/* Clean outstanding pages on handled WQEs that decided to do page-reuse,
 		 * but yet to be re-posted.
 		 */
 		if (rq->wqe.page_reuse) {
-			int wq_sz = mlx5_wq_ll_get_size(wq);
+			int wq_sz = mlx5_wq_cyc_get_size(wq);
 
 			for (wqe_ix = 0; wqe_ix < wq_sz; wqe_ix++)
 				rq->dealloc_wqe(rq, wqe_ix);
@@ -1958,6 +1954,21 @@  static void mlx5e_close_channel(struct mlx5e_channel *c)
 	kfree(c);
 }
 
+static inline u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs)
+{
+	int sz = sizeof(struct mlx5_wqe_data_seg) * ndsegs;
+
+	switch (wq_type) {
+	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+		sz += sizeof(struct mlx5e_rx_wqe_ll);
+		break;
+	default: /* MLX5_WQ_TYPE_CYCLIC */
+		sz += sizeof(struct mlx5e_rx_wqe_cyc);
+	}
+
+	return order_base_2(sz);
+}
+
 static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 				 struct mlx5e_params *params,
 				 struct mlx5e_rq_param *param)
@@ -1965,6 +1976,7 @@  static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 	struct mlx5_core_dev *mdev = priv->mdev;
 	void *rqc = param->rqc;
 	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
+	int ndsegs = 1;
 
 	switch (params->rq_wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
@@ -1974,16 +1986,16 @@  static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 		MLX5_SET(wq, wq, log_wqe_stride_size,
 			 mlx5e_mpwqe_get_log_stride_size(mdev, params) -
 			 MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
-		MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ);
 		MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params));
 		break;
-	default: /* MLX5_WQ_TYPE_LINKED_LIST */
-		MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
+	default: /* MLX5_WQ_TYPE_CYCLIC */
 		MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
 	}
 
+	MLX5_SET(wq, wq, wq_type,          params->rq_wq_type);
 	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
-	MLX5_SET(wq, wq, log_wq_stride,    ilog2(sizeof(struct mlx5e_rx_wqe)));
+	MLX5_SET(wq, wq, log_wq_stride,
+		 mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs));
 	MLX5_SET(wq, wq, pd,               mdev->mlx5e_res.pdn);
 	MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter);
 	MLX5_SET(rqc, rqc, vsd,            params->vlan_strip_disable);
@@ -1999,8 +2011,9 @@  static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
 	void *rqc = param->rqc;
 	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
 
-	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
-	MLX5_SET(wq, wq, log_wq_stride,    ilog2(sizeof(struct mlx5e_rx_wqe)));
+	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+	MLX5_SET(wq, wq, log_wq_stride,
+		 mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1));
 	MLX5_SET(rqc, rqc, counter_set_id, priv->drop_rq_q_counter);
 
 	param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev);
@@ -2051,7 +2064,7 @@  static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
 		log_cq_size = mlx5e_mpwqe_get_log_rq_size(params) +
 			mlx5e_mpwqe_get_log_num_strides(mdev, params);
 		break;
-	default: /* MLX5_WQ_TYPE_LINKED_LIST */
+	default: /* MLX5_WQ_TYPE_CYCLIC */
 		log_cq_size = params->log_rq_mtu_frames;
 	}
 
@@ -2857,8 +2870,8 @@  static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev,
 
 	param->wq.db_numa_node = param->wq.buf_numa_node;
 
-	err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wqe.wq,
-				&rq->wq_ctrl);
+	err = mlx5_wq_cyc_create(mdev, &param->wq, rqc_wq, &rq->wqe.wq,
+				 &rq->wq_ctrl);
 	if (err)
 		return err;
 
@@ -3360,7 +3373,7 @@  static int set_feature_lro(struct net_device *netdev, bool enable)
 	new_channels.params = *old_params;
 	new_channels.params.lro_en = enable;
 
-	if (old_params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST) {
+	if (old_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) {
 		if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params) ==
 		    mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params))
 			reset = false;
@@ -3566,7 +3579,7 @@  int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
 	new_channels.params = *params;
 	new_channels.params.sw_mtu = new_mtu;
 
-	if (params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST) {
+	if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
 		u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params);
 		u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 8ab4c96b7f7c..3857f22b5500 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -927,7 +927,7 @@  static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
 	params->hard_mtu    = MLX5E_ETH_HARD_MTU;
 	params->sw_mtu      = mtu;
 	params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE;
-	params->rq_wq_type  = MLX5_WQ_TYPE_LINKED_LIST;
+	params->rq_wq_type  = MLX5_WQ_TYPE_CYCLIC;
 	params->log_rq_mtu_frames = MLX5E_REP_PARAMS_LOG_RQ_SIZE;
 
 	params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 3b12d4de5b98..3cdf2c097356 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -113,7 +113,7 @@  static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq,
 			mpwrq_get_cqe_consumed_strides(&cq->title);
 	else
 		cq->decmprs_wqe_counter =
-			mlx5_wq_ll_ctr2ix(&rq->wqe.wq, cq->decmprs_wqe_counter + 1);
+			mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, cq->decmprs_wqe_counter + 1);
 }
 
 static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq,
@@ -270,7 +270,12 @@  static inline bool mlx5e_page_reuse(struct mlx5e_rq *rq,
 		!mlx5e_page_is_reserved(wi->di.page);
 }
 
-static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
+static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix)
+{
+	return &rq->wqe.frag_info[ix];
+}
+
+static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe, u16 ix)
 {
 	struct mlx5e_wqe_frag_info *wi = &rq->wqe.frag_info[ix];
 
@@ -281,7 +286,7 @@  static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16
 		wi->offset = 0;
 	}
 
-	wqe->data.addr = cpu_to_be64(wi->di.addr + wi->offset + rq->buff.headroom);
+	wqe->data[0].addr = cpu_to_be64(wi->di.addr + wi->offset + rq->buff.headroom);
 	return 0;
 }
 
@@ -370,7 +375,7 @@  void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi)
 static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
 {
 	struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
-	struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head);
+	struct mlx5e_rx_wqe_ll *wqe = mlx5_wq_ll_get_wqe(wq, wq->head);
 
 	rq->mpwqe.umr_in_progress = false;
 
@@ -470,31 +475,32 @@  void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
 {
-	struct mlx5_wq_ll *wq = &rq->wqe.wq;
+	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
 	int err;
 
 	if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
 		return false;
 
-	if (mlx5_wq_ll_is_full(wq))
+	if (mlx5_wq_cyc_is_full(wq))
 		return false;
 
 	do {
-		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head);
+		u16 head = mlx5_wq_cyc_get_head(wq);
+		struct mlx5e_rx_wqe_cyc *wqe = mlx5_wq_cyc_get_wqe(wq, head);
 
-		err = mlx5e_alloc_rx_wqe(rq, wqe, wq->head);
+		err = mlx5e_alloc_rx_wqe(rq, wqe, head);
 		if (unlikely(err)) {
 			rq->stats->buff_alloc_err++;
 			break;
 		}
 
-		mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index));
-	} while (!mlx5_wq_ll_is_full(wq));
+		mlx5_wq_cyc_push(wq);
+	} while (!mlx5_wq_cyc_is_full(wq));
 
 	/* ensure wqes are visible to device before updating doorbell record */
 	dma_wmb();
 
-	mlx5_wq_ll_update_db_record(wq);
+	mlx5_wq_cyc_update_db_record(wq);
 
 	return !!err;
 }
@@ -987,19 +993,15 @@  struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 {
-	struct mlx5_wq_ll *wq = &rq->wqe.wq;
+	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
 	struct mlx5e_wqe_frag_info *wi;
-	struct mlx5e_rx_wqe *wqe;
-	__be16 wqe_counter_be;
 	struct sk_buff *skb;
-	u16 wqe_counter;
 	u32 cqe_bcnt;
+	u16 ci;
 
-	wqe_counter_be = cqe->wqe_counter;
-	wqe_counter    = be16_to_cpu(wqe_counter_be);
-	wqe            = mlx5_wq_ll_get_wqe(wq, wqe_counter);
-	wi             = &rq->wqe.frag_info[wqe_counter];
-	cqe_bcnt       = be32_to_cpu(cqe->byte_cnt);
+	ci       = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
+	wi       = get_frag(rq, ci);
+	cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
 
 	skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt);
 	if (!skb) {
@@ -1007,20 +1009,19 @@  void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 		if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
 			wi->di.page = NULL;
 			/* do not return page to cache, it will be returned on XDP_TX completion */
-			goto wq_ll_pop;
+			goto wq_cyc_pop;
 		}
 		/* probably an XDP_DROP, save the page-reuse checks */
 		mlx5e_free_rx_wqe(rq, wi);
-		goto wq_ll_pop;
+		goto wq_cyc_pop;
 	}
 
 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
 	napi_gro_receive(rq->cq.napi, skb);
 
 	mlx5e_free_rx_wqe_reuse(rq, wi);
-wq_ll_pop:
-	mlx5_wq_ll_pop(wq, wqe_counter_be,
-		       &wqe->next.next_wqe_index);
+wq_cyc_pop:
+	mlx5_wq_cyc_pop(wq);
 }
 
 #ifdef CONFIG_MLX5_ESWITCH
@@ -1030,30 +1031,26 @@  void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5e_rep_priv *rpriv  = priv->ppriv;
 	struct mlx5_eswitch_rep *rep = rpriv->rep;
-	struct mlx5_wq_ll *wq = &rq->wqe.wq;
+	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
 	struct mlx5e_wqe_frag_info *wi;
-	struct mlx5e_rx_wqe *wqe;
 	struct sk_buff *skb;
-	__be16 wqe_counter_be;
-	u16 wqe_counter;
 	u32 cqe_bcnt;
+	u16 ci;
 
-	wqe_counter_be = cqe->wqe_counter;
-	wqe_counter    = be16_to_cpu(wqe_counter_be);
-	wqe            = mlx5_wq_ll_get_wqe(wq, wqe_counter);
-	wi             = &rq->wqe.frag_info[wqe_counter];
-	cqe_bcnt       = be32_to_cpu(cqe->byte_cnt);
+	ci       = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
+	wi       = get_frag(rq, ci);
+	cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
 
 	skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt);
 	if (!skb) {
 		if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
 			wi->di.page = NULL;
 			/* do not return page to cache, it will be returned on XDP_TX completion */
-			goto wq_ll_pop;
+			goto wq_cyc_pop;
 		}
 		/* probably an XDP_DROP, save the page-reuse checks */
 		mlx5e_free_rx_wqe(rq, wi);
-		goto wq_ll_pop;
+		goto wq_cyc_pop;
 	}
 
 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
@@ -1064,9 +1061,8 @@  void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	napi_gro_receive(rq->cq.napi, skb);
 
 	mlx5e_free_rx_wqe_reuse(rq, wi);
-wq_ll_pop:
-	mlx5_wq_ll_pop(wq, wqe_counter_be,
-		       &wqe->next.next_wqe_index);
+wq_cyc_pop:
+	mlx5_wq_cyc_pop(wq);
 }
 #endif
 
@@ -1165,7 +1161,7 @@  void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	u32 wqe_offset     = stride_ix << rq->mpwqe.log_stride_sz;
 	u32 head_offset    = wqe_offset & (PAGE_SIZE - 1);
 	u32 page_idx       = wqe_offset >> PAGE_SHIFT;
-	struct mlx5e_rx_wqe *wqe;
+	struct mlx5e_rx_wqe_ll *wqe;
 	struct mlx5_wq_ll *wq;
 	struct sk_buff *skb;
 	u16 cqe_bcnt;
@@ -1403,19 +1399,15 @@  static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
 
 void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 {
-	struct mlx5_wq_ll *wq = &rq->wqe.wq;
+	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
 	struct mlx5e_wqe_frag_info *wi;
-	struct mlx5e_rx_wqe *wqe;
-	__be16 wqe_counter_be;
 	struct sk_buff *skb;
-	u16 wqe_counter;
 	u32 cqe_bcnt;
+	u16 ci;
 
-	wqe_counter_be = cqe->wqe_counter;
-	wqe_counter    = be16_to_cpu(wqe_counter_be);
-	wqe            = mlx5_wq_ll_get_wqe(wq, wqe_counter);
-	wi             = &rq->wqe.frag_info[wqe_counter];
-	cqe_bcnt       = be32_to_cpu(cqe->byte_cnt);
+	ci       = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
+	wi       = get_frag(rq, ci);
+	cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
 
 	skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt);
 	if (!skb)
@@ -1430,8 +1422,7 @@  void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 
 wq_free_wqe:
 	mlx5e_free_rx_wqe_reuse(rq, wi);
-	mlx5_wq_ll_pop(wq, wqe_counter_be,
-		       &wqe->next.next_wqe_index);
+	mlx5_wq_cyc_pop(wq);
 }
 
 #endif /* CONFIG_MLX5_CORE_IPOIB */
@@ -1440,38 +1431,34 @@  void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 
 void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 {
-	struct mlx5_wq_ll *wq = &rq->wqe.wq;
+	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
 	struct mlx5e_wqe_frag_info *wi;
-	struct mlx5e_rx_wqe *wqe;
-	__be16 wqe_counter_be;
 	struct sk_buff *skb;
-	u16 wqe_counter;
 	u32 cqe_bcnt;
+	u16 ci;
 
-	wqe_counter_be = cqe->wqe_counter;
-	wqe_counter    = be16_to_cpu(wqe_counter_be);
-	wqe            = mlx5_wq_ll_get_wqe(wq, wqe_counter);
-	wi             = &rq->wqe.frag_info[wqe_counter];
-	cqe_bcnt       = be32_to_cpu(cqe->byte_cnt);
+	ci       = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
+	wi       = get_frag(rq, ci);
+	cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
 
 	skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt);
 	if (unlikely(!skb)) {
 		/* a DROP, save the page-reuse checks */
 		mlx5e_free_rx_wqe(rq, wi);
-		goto wq_ll_pop;
+		goto wq_cyc_pop;
 	}
 	skb = mlx5e_ipsec_handle_rx_skb(rq->netdev, skb);
 	if (unlikely(!skb)) {
 		mlx5e_free_rx_wqe(rq, wi);
-		goto wq_ll_pop;
+		goto wq_cyc_pop;
 	}
 
 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
 	napi_gro_receive(rq->cq.napi, skb);
 
 	mlx5e_free_rx_wqe_reuse(rq, wi);
-wq_ll_pop:
-	mlx5_wq_ll_pop(wq, wqe_counter_be, &wqe->next.next_wqe_index);
+wq_cyc_pop:
+	mlx5_wq_cyc_pop(wq);
 }
 
 #endif /* CONFIG_MLX5_EN_IPSEC */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 5b8b35392025..b97bb72b4db4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -85,6 +85,7 @@  int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 	mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride),
 		      MLX5_GET(wq, wqc, log_wq_sz),
 		      fbc);
+	wq->sz    = wq->fbc.sz_m1 + 1;
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index b9d7c01fc7cb..0b47126815b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -51,6 +51,9 @@  struct mlx5_wq_ctrl {
 struct mlx5_wq_cyc {
 	struct mlx5_frag_buf_ctrl fbc;
 	__be32			*db;
+	u16			sz;
+	u16			wqe_ctr;
+	u16			cur_sz;
 };
 
 struct mlx5_wq_qp {
@@ -95,6 +98,43 @@  u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq);
 
 void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl);
 
+static inline int mlx5_wq_cyc_is_full(struct mlx5_wq_cyc *wq)
+{
+	return wq->cur_sz == wq->sz;
+}
+
+static inline int mlx5_wq_cyc_missing(struct mlx5_wq_cyc *wq)
+{
+	return wq->sz - wq->cur_sz;
+}
+
+static inline int mlx5_wq_cyc_is_empty(struct mlx5_wq_cyc *wq)
+{
+	return !wq->cur_sz;
+}
+
+static inline void mlx5_wq_cyc_push(struct mlx5_wq_cyc *wq)
+{
+	wq->wqe_ctr++;
+	wq->cur_sz++;
+}
+
+static inline void mlx5_wq_cyc_push_n(struct mlx5_wq_cyc *wq, u8 n)
+{
+	wq->wqe_ctr += n;
+	wq->cur_sz += n;
+}
+
+static inline void mlx5_wq_cyc_pop(struct mlx5_wq_cyc *wq)
+{
+	wq->cur_sz--;
+}
+
+static inline void mlx5_wq_cyc_update_db_record(struct mlx5_wq_cyc *wq)
+{
+	*wq->db = cpu_to_be32(wq->wqe_ctr);
+}
+
 static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
 {
 	return ctr & wq->fbc.sz_m1;
@@ -105,6 +145,16 @@  static inline u16 mlx5_wq_cyc_ctr2fragix(struct mlx5_wq_cyc *wq, u16 ctr)
 	return ctr & wq->fbc.frag_sz_m1;
 }
 
+static inline u16 mlx5_wq_cyc_get_head(struct mlx5_wq_cyc *wq)
+{
+	return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr);
+}
+
+static inline u16 mlx5_wq_cyc_get_tail(struct mlx5_wq_cyc *wq)
+{
+	return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr - wq->cur_sz);
+}
+
 static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix)
 {
 	return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
@@ -179,11 +229,6 @@  static inline int mlx5_wq_ll_is_empty(struct mlx5_wq_ll *wq)
 	return !wq->cur_sz;
 }
 
-static inline u16 mlx5_wq_ll_ctr2ix(struct mlx5_wq_ll *wq, u16 ctr)
-{
-	return ctr & wq->fbc.sz_m1;
-}
-
 static inline void *mlx5_wq_ll_get_wqe(struct mlx5_wq_ll *wq, u16 ix)
 {
 	return mlx5_frag_buf_get_wqe(&wq->fbc, ix);