diff mbox series

[net-next,25/27] net/mlx5: Add support for FW fatal reporter dump

Message ID 1547737521-29888-26-git-send-email-eranbe@mellanox.com
State Changes Requested
Delegated to: David Miller
Headers show
Series Devlink health reporting and recovery system | expand

Commit Message

Eran Ben Elisha Jan. 17, 2019, 3:05 p.m. UTC
From: Moshe Shemesh <moshe@mellanox.com>

Add support of dump callback for mlx5 FW fatal reporter.
The FW fatal dump use cr-dump functionality to gather cr-space data for
debug. The cr-dump uses vsc interface which is valid even if the FW
command interface is not functional, which is the case in FW fatal errors.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/devlink.c | 91 +++++++++++++++++++
 1 file changed, 91 insertions(+)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index ae08af00b101..406856002dd1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -252,9 +252,100 @@  mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
 	return 0;
 }
 
+static int
+mlx5_devlink_health_buffer_fill_snapshot(struct devlink_health_buffer *dh_buffer,
+					 char *crdump_region, u32 snapshot_id)
+{
+	int err;
+
+	err = devlink_health_buffer_nest_start(dh_buffer,
+					       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT);
+	if (err)
+		return err;
+	err = devlink_health_buffer_nest_start(dh_buffer,
+					       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR);
+	if (err)
+		return err;
+	err = devlink_health_buffer_put_object_name(dh_buffer,
+						    "devlink region name");
+	if (err)
+		return err;
+	err = devlink_health_buffer_nest_start(dh_buffer,
+					       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE);
+	if (err)
+		return err;
+	err = devlink_health_buffer_put_value_string(dh_buffer, crdump_region);
+	if (err)
+		return err;
+	devlink_health_buffer_nest_end(dh_buffer);
+	devlink_health_buffer_nest_end(dh_buffer);
+
+	err = devlink_health_buffer_nest_start(dh_buffer,
+					       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR);
+	if (err)
+		return err;
+	err = devlink_health_buffer_put_object_name(dh_buffer, "snapshot id");
+	if (err)
+		return err;
+	err = devlink_health_buffer_nest_start(dh_buffer,
+					       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE);
+	if (err)
+		return err;
+	err = devlink_health_buffer_put_value_u32(dh_buffer, snapshot_id);
+	if (err)
+		return err;
+	devlink_health_buffer_nest_end(dh_buffer);
+	devlink_health_buffer_nest_end(dh_buffer);
+	devlink_health_buffer_nest_end(dh_buffer);
+
+	return 0;
+}
+
+#define MLX5_FW_FATAL_REPORTER_DUMP_SIZE_BYTE 256
+static int
+mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter,
+			    struct devlink_health_buffer **buffers_array,
+			    unsigned int buff_size, unsigned int num_buffers,
+			    void *priv_ctx)
+{
+	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+	struct devlink_health_buffer *buffer;
+	char crdump_region[20];
+	u32 snapshot_id;
+	int err;
+
+	if (!mlx5_core_is_pf(dev)) {
+		mlx5_core_err(dev, "Only PF is permitted run FW fatal dump");
+		return -EPERM;
+	}
+
+	if (!buffers_array || num_buffers < 1 ||
+	    buff_size < MLX5_FW_FATAL_REPORTER_DUMP_SIZE_BYTE)
+		return -EINVAL;
+
+	err = mlx5_crdump_collect(dev, crdump_region, &snapshot_id);
+	if (err)
+		return err;
+
+	buffer = buffers_array[0];
+	if (priv_ctx) {
+		struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
+
+		err = mlx5_devlink_health_buffer_fill_syndrom(buffer,
+							      fw_reporter_ctx->err_synd);
+		if (err)
+			return err;
+	}
+
+	return mlx5_devlink_health_buffer_fill_snapshot(buffer, crdump_region,
+							snapshot_id);
+}
+
 static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
 		.name = "FW_fatal",
 		.recover = mlx5_fw_fatal_reporter_recover,
+		.dump_size = MLX5_FW_FATAL_REPORTER_DUMP_SIZE_BYTE,
+		.dump = mlx5_fw_fatal_reporter_dump,
 };
 
 #define MLX5_REPORTER_FW_GRACEFUL_PERIOD 1200000