diff mbox series

[mlx5-next,15/16] net/mlx5: Enable host PF HCA after eswitch is initialized

Message ID 20201120230339.651609-16-saeedm@nvidia.com
State Superseded
Headers show
Series mlx5 next updates 2020-11-20 | expand

Commit Message

Saeed Mahameed Nov. 20, 2020, 11:03 p.m. UTC
From: Parav Pandit <parav@nvidia.com>

Currently ECPF enables external host PF too early in the initialization
sequence for Ethernet links when ECPF is eswitch manager.

Due to this, when external host PF driver is loaded, host PF's HCA CAP has
inner_ip_version supported by NIC RX flow table.
This capability is later updated by firmware after ECPF driver enables
ENCAP/DECAP as eswitch manager.

This results into a timing race condition, where CREATE_TIR command
fails with a below syndrome on host PF.

mlx5_cmd_check:775:(pid 510): CREATE_TIR(0x900) op_mod(0x0) failed,
status bad parameter(0x3), syndrome (0x562b00)

Hence, enable the external host PF after necessary eswitch and per vport
initialization is completed.
Continue to enable host PF when eswitch manager capability is off for a
ECPF.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Bodong Wang <bodong@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/ecpf.c    | 35 ++++++++++++++-----
 .../net/ethernet/mellanox/mlx5/core/ecpf.h    |  3 ++
 .../net/ethernet/mellanox/mlx5/core/eswitch.c | 29 ++++++++++++++-
 .../net/ethernet/mellanox/mlx5/core/main.c    | 18 +++++-----
 4 files changed, 66 insertions(+), 19 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
index 68ca0e2b26cd..464eb3a18450 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
@@ -8,7 +8,16 @@  bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev)
 	return (ioread32be(&dev->iseg->initializing) >> MLX5_ECPU_BIT_NUM) & 1;
 }
 
-static int mlx5_cmd_host_pf_enable_hca(struct mlx5_core_dev *dev)
+static bool mlx5_ecpf_esw_admins_host_pf(const struct mlx5_core_dev *dev)
+{
+	/* In separate host mode, PF enables itself.
+	 * When ECPF is eswitch manager, eswitch enables host PF after
+	 * eswitch is setup.
+	 */
+	return mlx5_core_is_ecpf_esw_manager(dev);
+}
+
+int mlx5_cmd_host_pf_enable_hca(struct mlx5_core_dev *dev)
 {
 	u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(enable_hca_in)]   = {};
@@ -19,7 +28,7 @@  static int mlx5_cmd_host_pf_enable_hca(struct mlx5_core_dev *dev)
 	return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
 }
 
-static int mlx5_cmd_host_pf_disable_hca(struct mlx5_core_dev *dev)
+int mlx5_cmd_host_pf_disable_hca(struct mlx5_core_dev *dev)
 {
 	u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(disable_hca_in)]   = {};
@@ -34,6 +43,12 @@  static int mlx5_host_pf_init(struct mlx5_core_dev *dev)
 {
 	int err;
 
+	if (mlx5_ecpf_esw_admins_host_pf(dev))
+		return 0;
+
+	/* ECPF shall enable HCA for host PF in the same way a PF
+	 * does this for its VFs when ECPF is not a eswitch manager.
+	 */
 	err = mlx5_cmd_host_pf_enable_hca(dev);
 	if (err)
 		mlx5_core_err(dev, "Failed to enable external host PF HCA err(%d)\n", err);
@@ -45,15 +60,14 @@  static void mlx5_host_pf_cleanup(struct mlx5_core_dev *dev)
 {
 	int err;
 
+	if (mlx5_ecpf_esw_admins_host_pf(dev))
+		return;
+
 	err = mlx5_cmd_host_pf_disable_hca(dev);
 	if (err) {
 		mlx5_core_err(dev, "Failed to disable external host PF HCA err(%d)\n", err);
 		return;
 	}
-
-	err = mlx5_wait_for_pages(dev, &dev->priv.host_pf_pages);
-	if (err)
-		mlx5_core_warn(dev, "Timeout reclaiming external host PF pages err(%d)\n", err);
 }
 
 int mlx5_ec_init(struct mlx5_core_dev *dev)
@@ -61,16 +75,19 @@  int mlx5_ec_init(struct mlx5_core_dev *dev)
 	if (!mlx5_core_is_ecpf(dev))
 		return 0;
 
-	/* ECPF shall enable HCA for host PF in the same way a PF
-	 * does this for its VFs.
-	 */
 	return mlx5_host_pf_init(dev);
 }
 
 void mlx5_ec_cleanup(struct mlx5_core_dev *dev)
 {
+	int err;
+
 	if (!mlx5_core_is_ecpf(dev))
 		return;
 
 	mlx5_host_pf_cleanup(dev);
+
+	err = mlx5_wait_for_pages(dev, &dev->priv.host_pf_pages);
+	if (err)
+		mlx5_core_warn(dev, "Timeout reclaiming external host PF pages err(%d)\n", err);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
index d3d7a00a02ac..40b6ad76dca6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
@@ -17,6 +17,9 @@  bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev);
 int mlx5_ec_init(struct mlx5_core_dev *dev);
 void mlx5_ec_cleanup(struct mlx5_core_dev *dev);
 
+int mlx5_cmd_host_pf_enable_hca(struct mlx5_core_dev *dev);
+int mlx5_cmd_host_pf_disable_hca(struct mlx5_core_dev *dev);
+
 #else  /* CONFIG_MLX5_ESWITCH */
 
 static inline bool
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 6e6a9a563992..dcd8946a843c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1469,6 +1469,26 @@  int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs,
 	return err;
 }
 
+static int host_pf_enable_hca(struct mlx5_core_dev *dev)
+{
+	if (!mlx5_core_is_ecpf(dev))
+		return 0;
+
+	/* Once vport and representor are ready, take out the external host PF
+	 * out of initializing state. Enabling HCA clears the iser->initializing
+	 * bit and host PF driver loading can progress.
+	 */
+	return mlx5_cmd_host_pf_enable_hca(dev);
+}
+
+static void host_pf_disable_hca(struct mlx5_core_dev *dev)
+{
+	if (!mlx5_core_is_ecpf(dev))
+		return;
+
+	mlx5_cmd_host_pf_disable_hca(dev);
+}
+
 /* mlx5_eswitch_enable_pf_vf_vports() enables vports of PF, ECPF and VFs
  * whichever are present on the eswitch.
  */
@@ -1483,6 +1503,11 @@  mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
 	if (ret)
 		return ret;
 
+	/* Enable external host PF HCA */
+	ret = host_pf_enable_hca(esw->dev);
+	if (ret)
+		goto pf_hca_err;
+
 	/* Enable ECPF vport */
 	if (mlx5_ecpf_vport_exists(esw->dev)) {
 		ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_ECPF, enabled_events);
@@ -1500,8 +1525,9 @@  mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
 vf_err:
 	if (mlx5_ecpf_vport_exists(esw->dev))
 		mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF);
-
 ecpf_err:
+	host_pf_disable_hca(esw->dev);
+pf_hca_err:
 	mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF);
 	return ret;
 }
@@ -1516,6 +1542,7 @@  void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw)
 	if (mlx5_ecpf_vport_exists(esw->dev))
 		mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF);
 
+	host_pf_disable_hca(esw->dev);
 	mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index a9757ccb9d16..d86f06f14cd3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1126,23 +1126,23 @@  static int mlx5_load(struct mlx5_core_dev *dev)
 		goto err_sriov;
 	}
 
-	err = mlx5_sriov_attach(dev);
-	if (err) {
-		mlx5_core_err(dev, "sriov init failed %d\n", err);
-		goto err_sriov;
-	}
-
 	err = mlx5_ec_init(dev);
 	if (err) {
 		mlx5_core_err(dev, "Failed to init embedded CPU\n");
 		goto err_ec;
 	}
 
+	err = mlx5_sriov_attach(dev);
+	if (err) {
+		mlx5_core_err(dev, "sriov init failed %d\n", err);
+		goto err_sriov;
+	}
+
 	return 0;
 
-err_ec:
-	mlx5_sriov_detach(dev);
 err_sriov:
+	mlx5_ec_cleanup(dev);
+err_ec:
 	mlx5_cleanup_fs(dev);
 err_fs:
 	mlx5_accel_tls_cleanup(dev);
@@ -1168,8 +1168,8 @@  static int mlx5_load(struct mlx5_core_dev *dev)
 
 static void mlx5_unload(struct mlx5_core_dev *dev)
 {
-	mlx5_ec_cleanup(dev);
 	mlx5_sriov_detach(dev);
+	mlx5_ec_cleanup(dev);
 	mlx5_cleanup_fs(dev);
 	mlx5_accel_ipsec_cleanup(dev);
 	mlx5_accel_tls_cleanup(dev);