diff mbox series

[net-next,14/16] net/mlx5e: Recover from rx timeout

Message ID 1562500388-16847-15-git-send-email-tariqt@mellanox.com
State Changes Requested
Delegated to: David Miller
Headers show
Series mlx5e devlink health reporters | expand

Commit Message

Tariq Toukan July 7, 2019, 11:53 a.m. UTC
From: Aya Levin <ayal@mellanox.com>

Add support for recovery from rx timeout. On driver open we post NOP
work request on the rx channels to trigger napi in order to fillup the
rx rings. In case napi wasn't scheduled due to a lost interrupt, perform
EQ recovery.

Signed-off-by: Aya Levin <ayal@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/health.h    |  1 +
 .../ethernet/mellanox/mlx5/core/en/reporter_rx.c   | 30 ++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  1 +
 3 files changed, 32 insertions(+)

Comments

Jiri Pirko July 9, 2019, 3:32 p.m. UTC | #1
Sun, Jul 07, 2019 at 01:53:06PM CEST, tariqt@mellanox.com wrote:
>From: Aya Levin <ayal@mellanox.com>
>
>Add support for recovery from rx timeout. On driver open we post NOP
>work request on the rx channels to trigger napi in order to fillup the
>rx rings. In case napi wasn't scheduled due to a lost interrupt, perform
>EQ recovery.
>
>Signed-off-by: Aya Levin <ayal@mellanox.com>
>Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
>---
> .../net/ethernet/mellanox/mlx5/core/en/health.h    |  1 +
> .../ethernet/mellanox/mlx5/core/en/reporter_rx.c   | 30 ++++++++++++++++++++++
> drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  1 +
> 3 files changed, 32 insertions(+)
>
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
>index e8c5d3bd86f1..aa46f7ecae53 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
>@@ -19,6 +19,7 @@
> int mlx5e_reporter_rx_create(struct mlx5e_priv *priv);
> void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv);
> void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq);
>+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
> 
> #define MLX5E_REPORTER_PER_Q_MAX_LEN 256
> 
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
>index c47e9a53bd53..7e7dba129330 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
>@@ -109,6 +109,36 @@ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq)
> 	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
> }
> 
>+static int mlx5e_rx_reporter_timeout_recover(void *ctx)
>+{
>+	struct mlx5e_rq *rq = (struct mlx5e_rq *)ctx;

No need to cast. Please fix this in the rest of the patchset too.


>+	struct mlx5e_icosq *icosq = &rq->channel->icosq;
>+	struct mlx5_eq_comp *eq = rq->cq.mcq.eq;
>+	int err;
>+
>+	err = mlx5e_health_channel_eq_recover(eq, rq->channel);
>+	if (err)
>+		clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
>+
>+	return err;
>+}
>+
>+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq)
>+{
>+	struct mlx5e_icosq *icosq = &rq->channel->icosq;
>+	struct mlx5e_priv *priv = rq->channel->priv;
>+	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
>+	struct mlx5e_err_ctx err_ctx = {};
>+
>+	err_ctx.ctx = rq;
>+	err_ctx.recover = mlx5e_rx_reporter_timeout_recover;
>+	sprintf(err_str,
>+		"RX timeout on channel: %d, ICOSQ: 0x%x RQ: 0x%x, CQ: 0x%x\n",
>+		icosq->channel->ix, icosq->sqn, rq->rqn, rq->cq.mcq.cqn);
>+
>+	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
>+}
>+
> static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
> {
> 	return err_ctx->recover(err_ctx->ctx);
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>index 2d57611ac579..1ebdeccf395d 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>@@ -809,6 +809,7 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
> 	netdev_warn(c->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
> 		    c->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes);
> 
>+	mlx5e_reporter_rx_timeout(rq);
> 	return -ETIMEDOUT;
> }
> 
>-- 
>1.8.3.1
>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
index e8c5d3bd86f1..aa46f7ecae53 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -19,6 +19,7 @@ 
 int mlx5e_reporter_rx_create(struct mlx5e_priv *priv);
 void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv);
 void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq);
+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
 
 #define MLX5E_REPORTER_PER_Q_MAX_LEN 256
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index c47e9a53bd53..7e7dba129330 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -109,6 +109,36 @@  void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq)
 	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
 }
 
+static int mlx5e_rx_reporter_timeout_recover(void *ctx)
+{
+	struct mlx5e_rq *rq = (struct mlx5e_rq *)ctx;
+	struct mlx5e_icosq *icosq = &rq->channel->icosq;
+	struct mlx5_eq_comp *eq = rq->cq.mcq.eq;
+	int err;
+
+	err = mlx5e_health_channel_eq_recover(eq, rq->channel);
+	if (err)
+		clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
+
+	return err;
+}
+
+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq)
+{
+	struct mlx5e_icosq *icosq = &rq->channel->icosq;
+	struct mlx5e_priv *priv = rq->channel->priv;
+	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+	struct mlx5e_err_ctx err_ctx = {};
+
+	err_ctx.ctx = rq;
+	err_ctx.recover = mlx5e_rx_reporter_timeout_recover;
+	sprintf(err_str,
+		"RX timeout on channel: %d, ICOSQ: 0x%x RQ: 0x%x, CQ: 0x%x\n",
+		icosq->channel->ix, icosq->sqn, rq->rqn, rq->cq.mcq.cqn);
+
+	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
+}
+
 static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
 {
 	return err_ctx->recover(err_ctx->ctx);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 2d57611ac579..1ebdeccf395d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -809,6 +809,7 @@  int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
 	netdev_warn(c->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
 		    c->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes);
 
+	mlx5e_reporter_rx_timeout(rq);
 	return -ETIMEDOUT;
 }