diff mbox series

[net-next,03/15] net/mlx5: Add Crdump FW snapshot support

Message ID 20190505003207.1353-4-saeedm@mellanox.com
State Changes Requested
Delegated to: David Miller
Headers show
Series [net-next,01/15] net/mlx5: Move all devlink related functions calls to devlink.c | expand

Commit Message

Saeed Mahameed May 5, 2019, 12:33 a.m. UTC
From: Alex Vesker <valex@mellanox.com>

Crdump allows the driver to create a snapshot of the FW PCI crspace.
This is useful in case of catastrophic issues which may require FW
reset. The snapshot can be used for later debug.

The snapshot is exposed using devlink region_snapshot in downstream patch,
cr-space address regions are registered on init and snapshots are attached
once a new snapshot is collected by the driver.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Reviewed-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
 .../ethernet/mellanox/mlx5/core/diag/crdump.c | 179 ++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/health.c  |   1 +
 .../ethernet/mellanox/mlx5/core/lib/mlx5.h    |   4 +
 .../net/ethernet/mellanox/mlx5/core/main.c    |   5 +
 include/linux/mlx5/driver.h                   |   4 +
 6 files changed, 194 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c

Comments

Jiri Pirko May 5, 2019, 3:36 p.m. UTC | #1
Sun, May 05, 2019 at 02:33:00AM CEST, saeedm@mellanox.com wrote:
>From: Alex Vesker <valex@mellanox.com>
>
>Crdump allows the driver to create a snapshot of the FW PCI crspace.
>This is useful in case of catastrophic issues which may require FW
>reset. The snapshot can be used for later debug.
>
>The snapshot is exposed using devlink region_snapshot in downstream patch,
>cr-space address regions are registered on init and snapshots are attached
>once a new snapshot is collected by the driver.
>
>Signed-off-by: Alex Vesker <valex@mellanox.com>
>Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
>Reviewed-by: Feras Daoud <ferasda@mellanox.com>
>Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
>---
> .../net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
> .../ethernet/mellanox/mlx5/core/diag/crdump.c | 179 ++++++++++++++++++
> .../net/ethernet/mellanox/mlx5/core/health.c  |   1 +
> .../ethernet/mellanox/mlx5/core/lib/mlx5.h    |   4 +
> .../net/ethernet/mellanox/mlx5/core/main.c    |   5 +
> include/linux/mlx5/driver.h                   |   4 +
> 6 files changed, 194 insertions(+), 1 deletion(-)
> create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
>
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>index 34d9a079b608..5feed9e1bec7 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>@@ -16,7 +16,7 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
> 		transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
> 		fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
> 		lib/devcom.o lib/pci_vsc.o diag/fs_tracepoint.o \
>-		diag/fw_tracer.o devlink.o
>+		diag/fw_tracer.o diag/crdump.o devlink.o
> 
> #
> # Netdev basic
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
>new file mode 100644
>index 000000000000..6430ceeefb53
>--- /dev/null
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
>@@ -0,0 +1,179 @@
>+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
>+/* Copyright (c) 2019 Mellanox Technologies */
>+
>+#include <linux/proc_fs.h>
>+#include <linux/mlx5/driver.h>
>+#include <net/devlink.h>
>+#include "mlx5_core.h"
>+#include "lib/pci_vsc.h"
>+#include "lib/mlx5.h"
>+
>+#define BAD_ACCESS			0xBADACCE5
>+#define MLX5_PROTECTED_CR_SCAN_CRSPACE	0x7
>+#define MAX_NUM_OF_DUMPS_TO_STORE	(8)

Please make your local defines prefixed with "mlx5_". For example
"BAD_ACCESS" sounds way too generic.


>+
>+static const char *region_cr_space_str = "cr-space";
>+
>+struct mlx5_fw_crdump {
>+	u32			size;
>+	struct devlink_region	*region_crspace;
>+};
>+
>+static bool mlx5_crdump_enbaled(struct mlx5_core_dev *dev)

Typo: s/enbaled/enabled/


>+{
>+	struct mlx5_priv *priv = &dev->priv;
>+
>+	return (!!priv->health.crdump);

Poitlesss brackets. Please remove.


>+}
>+
>+static int mlx5_crdump_fill(struct mlx5_core_dev *dev,
>+			    char *crdump_region, u32 *snapshot_id)
>+{
>+	struct devlink *devlink = priv_to_devlink(dev);
>+	struct mlx5_priv *priv = &dev->priv;
>+	struct mlx5_fw_crdump *crdump = priv->health.crdump;
>+	int i, ret = 0;
>+	u32 *cr_data;
>+	u32 id;
>+
>+	cr_data = kvmalloc(crdump->size, GFP_KERNEL);
>+	if (!cr_data)
>+		return -ENOMEM;
>+
>+	for (i = 0; i < (crdump->size / 4); i++)
>+		cr_data[i] = BAD_ACCESS;
>+
>+	ret = mlx5_vsc_gw_read_block_fast(dev, cr_data, crdump->size);
>+	if (ret <= 0) {
>+		if (ret == 0)
>+			ret = -EIO;
>+		goto free_data;
>+	}
>+
>+	if (crdump->size != ret) {
>+		mlx5_core_warn(dev, "failed to read full dump, read %d out of %u\n",
>+			       ret, crdump->size);
>+		ret = -EINVAL;
>+		goto free_data;
>+	}
>+
>+	/* Get the available snapshot ID for the dumps */
>+	id = devlink_region_shapshot_id_get(devlink);
>+	ret = devlink_region_snapshot_create(crdump->region_crspace,
>+					     crdump->size, (u8 *)cr_data,
>+					     id, &kvfree);
>+	if (ret) {
>+		mlx5_core_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n",
>+			       region_cr_space_str, id, ret);
>+		goto free_data;
>+	} else {
>+		*snapshot_id = id;
>+		strcpy(crdump_region, region_cr_space_str);
>+	}
>+	return 0;
>+
>+free_data:
>+	kvfree(cr_data);
>+	return ret;
>+}
>+
>+int mlx5_crdump_collect(struct mlx5_core_dev *dev,
>+			char *crdump_region, u32 *snapshot_id)
>+{
>+	int ret = 0;
>+
>+	if (!mlx5_crdump_enbaled(dev))
>+		return -ENODEV;
>+
>+	ret = mlx5_vsc_gw_lock(dev);
>+	if (ret) {
>+		mlx5_core_warn(dev, "crdump: failed to lock vsc gw err %d\n",
>+			       ret);
>+		return ret;
>+	}
>+
>+	ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, NULL);
>+	if (ret)
>+		goto unlock;
>+
>+	ret = mlx5_crdump_fill(dev, crdump_region, snapshot_id);
>+
>+unlock:
>+	mlx5_vsc_gw_unlock(dev);
>+	return ret;
>+}
>+
>+int mlx5_crdump_init(struct mlx5_core_dev *dev)
>+{
>+	struct devlink *devlink = priv_to_devlink(dev);
>+	struct mlx5_priv *priv = &dev->priv;
>+	struct mlx5_fw_crdump *crdump;
>+	u32 space_size;
>+	int ret;
>+
>+	if (!mlx5_core_is_pf(dev) || !mlx5_vsc_accessible(dev) ||
>+	    mlx5_crdump_enbaled(dev))
>+		return 0;
>+
>+	ret = mlx5_vsc_gw_lock(dev);
>+	if (ret)
>+		return ret;
>+
>+	/* Check if space is supported and get space size */
>+	ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE,
>+				    &space_size);
>+	if (ret) {
>+		/* Unlock and mask error since space is not supported */
>+		mlx5_vsc_gw_unlock(dev);
>+		return 0;
>+	}
>+
>+	if (!space_size) {
>+		mlx5_core_warn(dev, "Invalid Crspace size, zero\n");
>+		mlx5_vsc_gw_unlock(dev);
>+		return -EINVAL;
>+	}
>+
>+	ret = mlx5_vsc_gw_unlock(dev);
>+	if (ret)
>+		return ret;
>+
>+	crdump = kzalloc(sizeof(*crdump), GFP_KERNEL);
>+	if (!crdump)
>+		return -ENOMEM;
>+
>+	/* Create cr-space region */
>+	crdump->size = space_size;
>+	crdump->region_crspace =
>+		devlink_region_create(devlink,
>+				      region_cr_space_str,
>+				      MAX_NUM_OF_DUMPS_TO_STORE,
>+				      space_size);

Unnecessary wraps.


>+	if (IS_ERR(crdump->region_crspace)) {
>+		mlx5_core_warn(dev,

Unnecessary wrap.


>+			       "crdump: create devlink region %s err %ld\n",
>+			       region_cr_space_str,
>+			       PTR_ERR(crdump->region_crspace));
>+		ret = PTR_ERR(crdump->region_crspace);
>+		goto free_crdump;
>+	}
>+	priv->health.crdump = crdump;
>+	return 0;
>+
>+free_crdump:
>+	kfree(crdump);
>+	return ret;
>+}
>+
>+void mlx5_crdump_cleanup(struct mlx5_core_dev *dev)
>+{
>+	struct mlx5_priv *priv = &dev->priv;
>+	struct mlx5_fw_crdump *crdump = priv->health.crdump;
>+
>+	if (!crdump)
>+		return;
>+
>+	devlink_region_destroy(crdump->region_crspace);
>+	kfree(crdump);
>+	priv->health.crdump = NULL;

Why do you need to have this NULL. Where do you check for NULL?


>+}
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
>index a2656f4008d9..90f3da6da7f9 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
>@@ -388,6 +388,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev)
> 	spin_lock_init(&health->wq_lock);
> 	INIT_WORK(&health->work, health_care);
> 	INIT_DELAYED_WORK(&health->recover_work, health_recover);
>+	health->crdump = NULL;

Same here.


> 
> 	return 0;
> }
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
>index 397a2847867a..3c9a6dedccaa 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
>@@ -41,6 +41,10 @@ int  mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
> void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
> int  mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
> void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
>+int mlx5_crdump_init(struct mlx5_core_dev *dev);
>+void mlx5_crdump_cleanup(struct mlx5_core_dev *dev);
>+int mlx5_crdump_collect(struct mlx5_core_dev *dev,
>+			char *crdump_region, u32 *snapshot_id);
> 
> /* TODO move to lib/events.h */
> 
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
>index 64eb2a558b30..43f5487de4c3 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
>@@ -1320,6 +1320,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
> 	if (err)
> 		goto clean_load;
> 
>+	err = mlx5_crdump_init(dev);
>+	if (err)
>+		dev_err(&pdev->dev, "mlx5_crdump_init failed with error code %d\n", err);
>+
> 	pci_save_state(pdev);
> 	return 0;
> 
>@@ -1341,6 +1345,7 @@ static void remove_one(struct pci_dev *pdev)
> 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
> 	struct devlink *devlink = priv_to_devlink(dev);
> 
>+	mlx5_crdump_cleanup(dev);
> 	mlx5_devlink_unregister(devlink);
> 	mlx5_unregister_device(dev);
> 
>diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
>index 56d0a116f575..ddf6f41a75d3 100644
>--- a/include/linux/mlx5/driver.h
>+++ b/include/linux/mlx5/driver.h
>@@ -53,6 +53,7 @@
> #include <linux/mlx5/eq.h>
> #include <linux/timecounter.h>
> #include <linux/ptp_clock_kernel.h>
>+#include <net/devlink.h>
> 
> enum {
> 	MLX5_BOARD_ID_LEN = 64,
>@@ -427,6 +428,8 @@ struct mlx5_sq_bfreg {
> 	unsigned int		offset;
> };
> 
>+struct mlx5_fw_crdump;
>+
> struct mlx5_core_health {
> 	struct health_buffer __iomem   *health;
> 	__be32 __iomem		       *health_counter;
>@@ -440,6 +443,7 @@ struct mlx5_core_health {
> 	unsigned long			flags;
> 	struct work_struct		work;
> 	struct delayed_work		recover_work;
>+	struct mlx5_fw_crdump	       *crdump;
> };
> 
> struct mlx5_qp_table {
>-- 
>2.20.1
>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 34d9a079b608..5feed9e1bec7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -16,7 +16,7 @@  mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
 		fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
 		lib/devcom.o lib/pci_vsc.o diag/fs_tracepoint.o \
-		diag/fw_tracer.o devlink.o
+		diag/fw_tracer.o diag/crdump.o devlink.o
 
 #
 # Netdev basic
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
new file mode 100644
index 000000000000..6430ceeefb53
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
@@ -0,0 +1,179 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#include <linux/proc_fs.h>
+#include <linux/mlx5/driver.h>
+#include <net/devlink.h>
+#include "mlx5_core.h"
+#include "lib/pci_vsc.h"
+#include "lib/mlx5.h"
+
+#define BAD_ACCESS			0xBADACCE5
+#define MLX5_PROTECTED_CR_SCAN_CRSPACE	0x7
+#define MAX_NUM_OF_DUMPS_TO_STORE	(8)
+
+static const char *region_cr_space_str = "cr-space";
+
+struct mlx5_fw_crdump {
+	u32			size;
+	struct devlink_region	*region_crspace;
+};
+
+static bool mlx5_crdump_enbaled(struct mlx5_core_dev *dev)
+{
+	struct mlx5_priv *priv = &dev->priv;
+
+	return (!!priv->health.crdump);
+}
+
+static int mlx5_crdump_fill(struct mlx5_core_dev *dev,
+			    char *crdump_region, u32 *snapshot_id)
+{
+	struct devlink *devlink = priv_to_devlink(dev);
+	struct mlx5_priv *priv = &dev->priv;
+	struct mlx5_fw_crdump *crdump = priv->health.crdump;
+	int i, ret = 0;
+	u32 *cr_data;
+	u32 id;
+
+	cr_data = kvmalloc(crdump->size, GFP_KERNEL);
+	if (!cr_data)
+		return -ENOMEM;
+
+	for (i = 0; i < (crdump->size / 4); i++)
+		cr_data[i] = BAD_ACCESS;
+
+	ret = mlx5_vsc_gw_read_block_fast(dev, cr_data, crdump->size);
+	if (ret <= 0) {
+		if (ret == 0)
+			ret = -EIO;
+		goto free_data;
+	}
+
+	if (crdump->size != ret) {
+		mlx5_core_warn(dev, "failed to read full dump, read %d out of %u\n",
+			       ret, crdump->size);
+		ret = -EINVAL;
+		goto free_data;
+	}
+
+	/* Get the available snapshot ID for the dumps */
+	id = devlink_region_shapshot_id_get(devlink);
+	ret = devlink_region_snapshot_create(crdump->region_crspace,
+					     crdump->size, (u8 *)cr_data,
+					     id, &kvfree);
+	if (ret) {
+		mlx5_core_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n",
+			       region_cr_space_str, id, ret);
+		goto free_data;
+	} else {
+		*snapshot_id = id;
+		strcpy(crdump_region, region_cr_space_str);
+	}
+	return 0;
+
+free_data:
+	kvfree(cr_data);
+	return ret;
+}
+
+int mlx5_crdump_collect(struct mlx5_core_dev *dev,
+			char *crdump_region, u32 *snapshot_id)
+{
+	int ret = 0;
+
+	if (!mlx5_crdump_enbaled(dev))
+		return -ENODEV;
+
+	ret = mlx5_vsc_gw_lock(dev);
+	if (ret) {
+		mlx5_core_warn(dev, "crdump: failed to lock vsc gw err %d\n",
+			       ret);
+		return ret;
+	}
+
+	ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, NULL);
+	if (ret)
+		goto unlock;
+
+	ret = mlx5_crdump_fill(dev, crdump_region, snapshot_id);
+
+unlock:
+	mlx5_vsc_gw_unlock(dev);
+	return ret;
+}
+
+int mlx5_crdump_init(struct mlx5_core_dev *dev)
+{
+	struct devlink *devlink = priv_to_devlink(dev);
+	struct mlx5_priv *priv = &dev->priv;
+	struct mlx5_fw_crdump *crdump;
+	u32 space_size;
+	int ret;
+
+	if (!mlx5_core_is_pf(dev) || !mlx5_vsc_accessible(dev) ||
+	    mlx5_crdump_enbaled(dev))
+		return 0;
+
+	ret = mlx5_vsc_gw_lock(dev);
+	if (ret)
+		return ret;
+
+	/* Check if space is supported and get space size */
+	ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE,
+				    &space_size);
+	if (ret) {
+		/* Unlock and mask error since space is not supported */
+		mlx5_vsc_gw_unlock(dev);
+		return 0;
+	}
+
+	if (!space_size) {
+		mlx5_core_warn(dev, "Invalid Crspace size, zero\n");
+		mlx5_vsc_gw_unlock(dev);
+		return -EINVAL;
+	}
+
+	ret = mlx5_vsc_gw_unlock(dev);
+	if (ret)
+		return ret;
+
+	crdump = kzalloc(sizeof(*crdump), GFP_KERNEL);
+	if (!crdump)
+		return -ENOMEM;
+
+	/* Create cr-space region */
+	crdump->size = space_size;
+	crdump->region_crspace =
+		devlink_region_create(devlink,
+				      region_cr_space_str,
+				      MAX_NUM_OF_DUMPS_TO_STORE,
+				      space_size);
+	if (IS_ERR(crdump->region_crspace)) {
+		mlx5_core_warn(dev,
+			       "crdump: create devlink region %s err %ld\n",
+			       region_cr_space_str,
+			       PTR_ERR(crdump->region_crspace));
+		ret = PTR_ERR(crdump->region_crspace);
+		goto free_crdump;
+	}
+	priv->health.crdump = crdump;
+	return 0;
+
+free_crdump:
+	kfree(crdump);
+	return ret;
+}
+
+void mlx5_crdump_cleanup(struct mlx5_core_dev *dev)
+{
+	struct mlx5_priv *priv = &dev->priv;
+	struct mlx5_fw_crdump *crdump = priv->health.crdump;
+
+	if (!crdump)
+		return;
+
+	devlink_region_destroy(crdump->region_crspace);
+	kfree(crdump);
+	priv->health.crdump = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index a2656f4008d9..90f3da6da7f9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -388,6 +388,7 @@  int mlx5_health_init(struct mlx5_core_dev *dev)
 	spin_lock_init(&health->wq_lock);
 	INIT_WORK(&health->work, health_care);
 	INIT_DELAYED_WORK(&health->recover_work, health_recover);
+	health->crdump = NULL;
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
index 397a2847867a..3c9a6dedccaa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -41,6 +41,10 @@  int  mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
 void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
 int  mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
 void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
+int mlx5_crdump_init(struct mlx5_core_dev *dev);
+void mlx5_crdump_cleanup(struct mlx5_core_dev *dev);
+int mlx5_crdump_collect(struct mlx5_core_dev *dev,
+			char *crdump_region, u32 *snapshot_id);
 
 /* TODO move to lib/events.h */
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 64eb2a558b30..43f5487de4c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1320,6 +1320,10 @@  static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (err)
 		goto clean_load;
 
+	err = mlx5_crdump_init(dev);
+	if (err)
+		dev_err(&pdev->dev, "mlx5_crdump_init failed with error code %d\n", err);
+
 	pci_save_state(pdev);
 	return 0;
 
@@ -1341,6 +1345,7 @@  static void remove_one(struct pci_dev *pdev)
 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
 	struct devlink *devlink = priv_to_devlink(dev);
 
+	mlx5_crdump_cleanup(dev);
 	mlx5_devlink_unregister(devlink);
 	mlx5_unregister_device(dev);
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 56d0a116f575..ddf6f41a75d3 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -53,6 +53,7 @@ 
 #include <linux/mlx5/eq.h>
 #include <linux/timecounter.h>
 #include <linux/ptp_clock_kernel.h>
+#include <net/devlink.h>
 
 enum {
 	MLX5_BOARD_ID_LEN = 64,
@@ -427,6 +428,8 @@  struct mlx5_sq_bfreg {
 	unsigned int		offset;
 };
 
+struct mlx5_fw_crdump;
+
 struct mlx5_core_health {
 	struct health_buffer __iomem   *health;
 	__be32 __iomem		       *health_counter;
@@ -440,6 +443,7 @@  struct mlx5_core_health {
 	unsigned long			flags;
 	struct work_struct		work;
 	struct delayed_work		recover_work;
+	struct mlx5_fw_crdump	       *crdump;
 };
 
 struct mlx5_qp_table {