diff mbox series

[mlx5-next,15/15] RDMA/mlx5: Cleanup rep when doing unload

Message ID 20190617192247.25107-16-saeedm@mellanox.com
State Awaiting Upstream
Delegated to: David Miller
Headers show
Series Mellanox, mlx5 vport metadata matching | expand

Commit Message

Saeed Mahameed June 17, 2019, 7:23 p.m. UTC
From: Bodong Wang <bodong@mellanox.com>

When an IB rep is loaded, netdev for the same vport is saved for later
reference. However, it's not cleaned up when doing unload. For ECPF,
kernel crashes when driver is referring to the already removed netdev.

Following steps lead to a shown call trace:
1. Create n VFs from host PF
2. Distroy the VFs
3. Run "rdma link" from ARM

Call trace:
  mlx5_ib_get_netdev+0x9c/0xe8 [mlx5_ib]
  mlx5_query_port_roce+0x268/0x558 [mlx5_ib]
  mlx5_ib_rep_query_port+0x14/0x34 [mlx5_ib]
  ib_query_port+0x9c/0xfc [ib_core]
  fill_port_info+0x74/0x28c [ib_core]
  nldev_port_get_doit+0x1a8/0x1e8 [ib_core]
  rdma_nl_rcv_msg+0x16c/0x1c0 [ib_core]
  rdma_nl_rcv+0xe8/0x144 [ib_core]
  netlink_unicast+0x184/0x214
  netlink_sendmsg+0x288/0x354
  sock_sendmsg+0x18/0x2c
  __sys_sendto+0xbc/0x138
  __arm64_sys_sendto+0x28/0x34
  el0_svc_common+0xb0/0x100
  el0_svc_handler+0x6c/0x84
  el0_svc+0x8/0xc

Cleanup the rep and netdev reference when unloading IB rep.

Fixes: 26628e2d58c9 ("RDMA/mlx5: Move to single device multiport ports in switchdev mode")
Signed-off-by: Bodong Wang <bodong@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/ib_rep.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

Comments

Leon Romanovsky June 18, 2019, 10:38 a.m. UTC | #1
On Mon, Jun 17, 2019 at 07:23:39PM +0000, Saeed Mahameed wrote:
> From: Bodong Wang <bodong@mellanox.com>
>
> When an IB rep is loaded, netdev for the same vport is saved for later
> reference. However, it's not cleaned up when doing unload. For ECPF,
> kernel crashes when driver is referring to the already removed netdev.
>
> Following steps lead to a shown call trace:
> 1. Create n VFs from host PF
> 2. Distroy the VFs
> 3. Run "rdma link" from ARM
>
> Call trace:
>   mlx5_ib_get_netdev+0x9c/0xe8 [mlx5_ib]
>   mlx5_query_port_roce+0x268/0x558 [mlx5_ib]
>   mlx5_ib_rep_query_port+0x14/0x34 [mlx5_ib]
>   ib_query_port+0x9c/0xfc [ib_core]
>   fill_port_info+0x74/0x28c [ib_core]
>   nldev_port_get_doit+0x1a8/0x1e8 [ib_core]
>   rdma_nl_rcv_msg+0x16c/0x1c0 [ib_core]
>   rdma_nl_rcv+0xe8/0x144 [ib_core]
>   netlink_unicast+0x184/0x214
>   netlink_sendmsg+0x288/0x354
>   sock_sendmsg+0x18/0x2c
>   __sys_sendto+0xbc/0x138
>   __arm64_sys_sendto+0x28/0x34
>   el0_svc_common+0xb0/0x100
>   el0_svc_handler+0x6c/0x84
>   el0_svc+0x8/0xc
>
> Cleanup the rep and netdev reference when unloading IB rep.
>
> Fixes: 26628e2d58c9 ("RDMA/mlx5: Move to single device multiport ports in switchdev mode")
> Signed-off-by: Bodong Wang <bodong@mellanox.com>
> Reviewed-by: Mark Bloch <markb@mellanox.com>
> Reviewed-by: Parav Pandit <parav@mellanox.com>
> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
> ---
>  drivers/infiniband/hw/mlx5/ib_rep.c | 18 +++++++++++-------
>  1 file changed, 11 insertions(+), 7 deletions(-)
>

Thanks,
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index da4b936b3219..a4a54ddebb71 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -17,6 +17,7 @@  mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 	vport_index = rep->vport_index;
 
 	ibdev->port[vport_index].rep = rep;
+	rep->rep_data[REP_IB].priv = ibdev;
 	write_lock(&ibdev->port[vport_index].roce.netdev_lock);
 	ibdev->port[vport_index].roce.netdev =
 		mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport);
@@ -68,15 +69,18 @@  mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 static void
 mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 {
-	struct mlx5_ib_dev *dev;
-
-	if (!rep->rep_data[REP_IB].priv ||
-	    rep->vport != MLX5_VPORT_UPLINK)
-		return;
+	struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
+	struct mlx5_ib_port *port;
 
-	dev = mlx5_ib_rep_to_dev(rep);
-	__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+	port = &dev->port[rep->vport_index];
+	write_lock(&port->roce.netdev_lock);
+	port->roce.netdev = NULL;
+	write_unlock(&port->roce.netdev_lock);
 	rep->rep_data[REP_IB].priv = NULL;
+	port->rep = NULL;
+
+	if (rep->vport == MLX5_VPORT_UPLINK)
+		__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
 }
 
 static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)