diff mbox series

[RFC,net-next,15/19] net/mlx5: Add support for FW reporter objdump

Message ID 1546266733-9512-16-git-send-email-eranbe@mellanox.com
State RFC, archived
Delegated to: David Miller
Headers show
Series Devlink health reporting and recovery system | expand

Commit Message

Eran Ben Elisha Dec. 31, 2018, 2:32 p.m. UTC
From: Moshe Shemesh <moshe@mellanox.com>

Add support of objdump callback for mlx5 FW reporter.
Once we trigger FW dump, the FW will write the core dump to its raw data
buffer. The tracer translates the raw data to traces and save it to a
buffer. Once dump is done, the saved traces data is filled as objects
into the objdump buffer.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/devlink.c | 112 ++++++++++++++++++
 .../mellanox/mlx5/core/diag/fw_tracer.c       |  44 +++++++
 .../mellanox/mlx5/core/diag/fw_tracer.h       |  13 ++
 3 files changed, 169 insertions(+)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 4ec5d092a332..07bc473a8ebb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -3,6 +3,7 @@ 
 
 #include <devlink.h>
 #include "mlx5_core.h"
+#include "diag/fw_tracer.h"
 
 static int
 mlx5_devlink_health_buffer_fill_syndrom(struct devlink_health_buffer *dh_buffer,
@@ -35,6 +36,115 @@  mlx5_devlink_health_buffer_fill_syndrom(struct devlink_health_buffer *dh_buffer,
 	return 0;
 }
 
+int mlx5_devlink_health_buffer_fill_trace(struct devlink_health_buffer *dh_buffer,
+					  char *trace)
+{
+	int nest = 0;
+	int err = 0;
+	int i;
+
+	err = devlink_health_buffer_nest_start(dh_buffer,
+					       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT);
+	if (err)
+		goto nest_cancel;
+	nest++;
+
+	err = devlink_health_buffer_nest_start(dh_buffer,
+					       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR);
+	if (err)
+		goto nest_cancel;
+	nest++;
+
+	err = devlink_health_buffer_put_object_name(dh_buffer, "trace");
+	if (err)
+		goto nest_cancel;
+
+	err = devlink_health_buffer_nest_start(dh_buffer,
+					       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE);
+	if (err)
+		goto nest_cancel;
+	nest++;
+
+	err = devlink_health_buffer_put_value_string(dh_buffer, trace);
+	if (err)
+		goto nest_cancel;
+
+	for (i = 0; i < nest; i++)
+		devlink_health_buffer_nest_end(dh_buffer);
+	return 0;
+nest_cancel:
+	for (i = 0; i < nest; i++)
+		devlink_health_buffer_nest_cancel(dh_buffer);
+
+	return err;
+}
+
+int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer,
+					    struct devlink_health_buffer **buffers_array,
+					    unsigned int num_buffers)
+{
+	u32 saved_traces_index = tracer->sbuff.saved_traces_index;
+	char *saved_traces = tracer->sbuff.traces_buff;
+	u32 index, start_index, end_index;
+	u32 dh_buffer_index = 0;
+	int err = 0;
+
+	if (!saved_traces[0])
+		return -ENOMSG;
+
+	if (saved_traces[saved_traces_index * TRACE_STR_LINE])
+		start_index = saved_traces_index;
+	else
+		start_index = 0;
+	end_index = (saved_traces_index - 1) & (SAVED_TRACES_NUM - 1);
+
+	index = start_index;
+	while (index <= end_index) {
+		err = mlx5_devlink_health_buffer_fill_trace(buffers_array[dh_buffer_index],
+							    saved_traces + index * TRACE_STR_LINE);
+		if (err) {
+			dh_buffer_index++;
+			if (dh_buffer_index == num_buffers)
+				break;
+		} else {
+			index++;
+		}
+	}
+
+	return err;
+}
+
+static int
+mlx5_fw_reporter_objdump(struct devlink_health_reporter *reporter,
+			 struct devlink_health_buffer **buffers_array,
+			 unsigned int buff_size, unsigned int num_buffers,
+			 void *priv_ctx)
+{
+	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+	struct devlink_health_buffer *buffer;
+	int err;
+
+	if (!buffers_array || buff_size < TRACE_STR_LINE || num_buffers < 1)
+		return -EINVAL;
+
+	err = mlx5_fw_tracer_trigger_core_dump_general(dev);
+	if (err)
+		return err;
+
+	buffer = buffers_array[0];
+	if (priv_ctx) {
+		struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
+
+		err = mlx5_devlink_health_buffer_fill_syndrom(buffer,
+							      fw_reporter_ctx->err_synd);
+		if (err)
+			return err;
+	}
+
+	return mlx5_fw_tracer_get_saved_traces_objects(dev->tracer, buffers_array,
+						       num_buffers);
+}
+
 static int
 mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
 			  struct devlink_health_buffer **buffers_array,
@@ -97,7 +207,9 @@  mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
 
 static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
 		.name = "FW",
+		.objdump_size = SAVED_TRACES_BUFFER_SIZE_BYTE,
 		.diagnose_size = HEALTH_INFO_MAX_BUFF,
+		.objdump = mlx5_fw_reporter_objdump,
 		.diagnose = mlx5_fw_reporter_diagnose,
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index d0f8449019af..b704df545d01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -243,6 +243,24 @@  static int mlx5_fw_tracer_allocate_strings_db(struct mlx5_fw_tracer *tracer)
 	return -ENOMEM;
 }
 
+static int mlx5_fw_tracer_allocate_saved_traces_buff(struct mlx5_fw_tracer *tracer)
+{
+	int traces_buff_size = SAVED_TRACES_BUFFER_SIZE_BYTE;
+
+	tracer->sbuff.traces_buff = kzalloc(traces_buff_size, GFP_KERNEL);
+	if (!tracer->sbuff.traces_buff)
+		return -ENOMEM;
+	tracer->sbuff.saved_traces_index = 0;
+
+	return 0;
+}
+
+static void mlx5_fw_tracer_free_saved_traces_buff(struct mlx5_fw_tracer *tracer)
+{
+	kfree(tracer->sbuff.traces_buff);
+	tracer->sbuff.traces_buff = NULL;
+}
+
 static void mlx5_tracer_read_strings_db(struct work_struct *work)
 {
 	struct mlx5_fw_tracer *tracer = container_of(work, struct mlx5_fw_tracer,
@@ -522,6 +540,20 @@  static void mlx5_fw_tracer_clean_ready_list(struct mlx5_fw_tracer *tracer)
 		list_del(&str_frmt->list);
 }
 
+static void mlx5_fw_tracer_save_trace(struct mlx5_fw_tracer *tracer, u64 timestamp,
+				      bool lost, u8 event_id, char *msg)
+{
+	char *saved_traces = tracer->sbuff.traces_buff;
+	u32 offset;
+
+	offset = tracer->sbuff.saved_traces_index * TRACE_STR_LINE;
+	snprintf(saved_traces + offset, TRACE_STR_LINE, "%s [0x%llx] %d [0x%x] %s",
+		 dev_name(&tracer->dev->pdev->dev), timestamp, lost, event_id, msg);
+
+	tracer->sbuff.saved_traces_index =
+		(tracer->sbuff.saved_traces_index + 1) & (SAVED_TRACES_NUM - 1);
+}
+
 static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
 				    struct mlx5_core_dev *dev,
 				    u64 trace_timestamp)
@@ -540,6 +572,9 @@  static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
 	trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost,
 		      str_frmt->event_id, tmp);
 
+	mlx5_fw_tracer_save_trace(dev->tracer, trace_timestamp,
+				  str_frmt->lost, str_frmt->event_id, tmp);
+
 	/* remove it from hash */
 	mlx5_tracer_clean_message(str_frmt);
 }
@@ -864,10 +899,18 @@  struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
 		goto free_log_buf;
 	}
 
+	err = mlx5_fw_tracer_allocate_saved_traces_buff(tracer);
+	if (err) {
+		mlx5_core_warn(dev, "FWTracer: Create saved traces buffer failed %d\n", err);
+		goto free_strings_db;
+	}
+
 	mlx5_core_dbg(dev, "FWTracer: Tracer created\n");
 
 	return tracer;
 
+free_strings_db:
+	mlx5_fw_tracer_free_strings_db(tracer);
 free_log_buf:
 	mlx5_fw_tracer_destroy_log_buf(tracer);
 destroy_workqueue:
@@ -948,6 +991,7 @@  void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
 	cancel_work_sync(&tracer->read_fw_strings_work);
 	mlx5_fw_tracer_clean_ready_list(tracer);
 	mlx5_fw_tracer_clean_print_hash(tracer);
+	mlx5_fw_tracer_free_saved_traces_buff(tracer);
 	mlx5_fw_tracer_free_strings_db(tracer);
 	mlx5_fw_tracer_destroy_log_buf(tracer);
 	flush_workqueue(tracer->work_queue);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
index a8b8747f2b61..ad817932cc8e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
@@ -46,6 +46,10 @@ 
 #define TRACER_BLOCK_SIZE_BYTE 256
 #define TRACES_PER_BLOCK 32
 
+#define TRACE_STR_LINE 256
+#define SAVED_TRACES_NUM 1024
+#define SAVED_TRACES_BUFFER_SIZE_BYTE (SAVED_TRACES_NUM * TRACE_STR_LINE)
+
 #define TRACER_MAX_PARAMS 7
 #define MESSAGE_HASH_BITS 6
 #define MESSAGE_HASH_SIZE BIT(MESSAGE_HASH_BITS)
@@ -83,6 +87,12 @@  struct mlx5_fw_tracer {
 		u32 consumer_index;
 	} buff;
 
+	/* Saved Tarces Buffer */
+	struct {
+		void *traces_buff;
+		u32 saved_traces_index;
+	} sbuff;
+
 	u64 last_timestamp;
 	struct work_struct handle_traces_work;
 	struct hlist_head hash[MESSAGE_HASH_SIZE];
@@ -171,5 +181,8 @@  struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev);
 int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer);
 void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer);
 void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
+int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev);
+int mlx5_fw_tracer_get_saved_traces(struct mlx5_fw_tracer *tracer,
+				    char *buff, unsigned int buff_size);
 
 #endif