diff mbox series

[net-next,08/15] net/mlx5: Refactor print health info

Message ID 20190505003207.1353-9-saeedm@mellanox.com
State Changes Requested
Delegated to: David Miller
Headers show
Series [net-next,01/15] net/mlx5: Move all devlink related functions calls to devlink.c | expand

Commit Message

Saeed Mahameed May 5, 2019, 12:33 a.m. UTC
From: Moshe Shemesh <moshe@mellanox.com>

Refactor print health info code, split to two functions:
 1. mlx5_get_health_info() - writes the health info into a buffer.
 2. mlx5_print_health_info() - prints the health info to kernel log.
This refactoring is done to enable using the health info data by devlink
health reporter diagnose() in the downstream patch.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/health.c  | 83 +++++++++++++++----
 include/linux/mlx5/driver.h                   |  4 +
 2 files changed, 70 insertions(+), 17 deletions(-)

Comments

Jiri Pirko May 5, 2019, 3:42 p.m. UTC | #1
Sun, May 05, 2019 at 02:33:21AM CEST, saeedm@mellanox.com wrote:
>From: Moshe Shemesh <moshe@mellanox.com>
>
>Refactor print health info code, split to two functions:
> 1. mlx5_get_health_info() - writes the health info into a buffer.
> 2. mlx5_print_health_info() - prints the health info to kernel log.
>This refactoring is done to enable using the health info data by devlink
>health reporter diagnose() in the downstream patch.

Please avoid this. Leave the print out as it is and format fmsg
properly.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 19d9297682d7..a3c7e46aafd9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -357,7 +357,28 @@  static const char *hsynd_str(u8 synd)
 	}
 }
 
-static void print_health_info(struct mlx5_core_dev *dev)
+#define HEALTH_INFO_MAX_BUFF 1024
+static void mlx5_health_info_buf_reset(struct mlx5_core_dev *dev)
+{
+	dev->priv.health.info_buf_len = 0;
+}
+
+static void
+mlx5_health_info_buf_write(struct mlx5_core_dev *dev, const char *fmt, ...)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+	va_list args;
+	int len;
+
+	va_start(args, fmt);
+	len = vsnprintf(health->info_buf + health->info_buf_len,
+			HEALTH_INFO_MAX_BUFF - health->info_buf_len, fmt, args);
+	va_end(args);
+	health->info_buf_len = min_t(int, health->info_buf_len + len,
+				     HEALTH_INFO_MAX_BUFF);
+}
+
+static void mlx5_get_health_info(struct mlx5_core_dev *dev, u8 *synd)
 {
 	struct mlx5_core_health *health = &dev->priv.health;
 	struct health_buffer __iomem *h = health->health;
@@ -365,27 +386,46 @@  static void print_health_info(struct mlx5_core_dev *dev)
 	u32 fw;
 	int i;
 
+	*synd = ioread8(&h->synd);
 	/* If the syndrome is 0, the device is OK and no need to print buffer */
-	if (!ioread8(&h->synd))
+	if (!synd)
 		return;
 
+	mlx5_health_info_buf_reset(dev);
+	mlx5_health_info_buf_write(dev, "\n");
 	for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
-		mlx5_core_err(dev, "assert_var[%d] 0x%08x\n", i,
-			      ioread32be(h->assert_var + i));
+		mlx5_health_info_buf_write(dev, "assert_var[%d] 0x%08x\n", i,
+					   ioread32be(h->assert_var + i));
 
-	mlx5_core_err(dev, "assert_exit_ptr 0x%08x\n",
-		      ioread32be(&h->assert_exit_ptr));
-	mlx5_core_err(dev, "assert_callra 0x%08x\n",
-		      ioread32be(&h->assert_callra));
+	mlx5_health_info_buf_write(dev, "assert_exit_ptr 0x%08x\n",
+				   ioread32be(&h->assert_exit_ptr));
+	mlx5_health_info_buf_write(dev, "assert_callra 0x%08x\n",
+				   ioread32be(&h->assert_callra));
 	sprintf(fw_str, "%d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev));
-	mlx5_core_err(dev, "fw_ver %s\n", fw_str);
-	mlx5_core_err(dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
-	mlx5_core_err(dev, "irisc_index %d\n", ioread8(&h->irisc_index));
-	mlx5_core_err(dev, "synd 0x%x: %s\n", ioread8(&h->synd),
-		      hsynd_str(ioread8(&h->synd)));
-	mlx5_core_err(dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
+	mlx5_health_info_buf_write(dev, "fw_ver %s\n", fw_str);
+	mlx5_health_info_buf_write(dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
+	mlx5_health_info_buf_write(dev, "irisc_index %d\n", ioread8(&h->irisc_index));
+	mlx5_health_info_buf_write(dev, "synd 0x%x: %s\n", ioread8(&h->synd),
+				   hsynd_str(ioread8(&h->synd)));
+	mlx5_health_info_buf_write(dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
 	fw = ioread32be(&h->fw_ver);
-	mlx5_core_err(dev, "raw fw_ver 0x%08x\n", fw);
+	mlx5_health_info_buf_write(dev, "raw fw_ver 0x%08x\n", fw);
+}
+
+static void mlx5_print_health_info(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+	u8 synd;
+
+	mutex_lock(&health->info_buf_lock);
+	mlx5_get_health_info(dev, &synd);
+
+	if (!synd)
+		goto unlock;
+
+	mlx5_core_err(dev, "%s", health->info_buf);
+unlock:
+	mutex_unlock(&health->info_buf_lock);
 }
 
 static unsigned long get_next_poll_jiffies(void)
@@ -431,7 +471,7 @@  static void poll_health(struct timer_list *t)
 	health->prev = count;
 	if (health->miss_counter == MAX_MISSES) {
 		mlx5_core_err(dev, "device's health compromised - reached miss count\n");
-		print_health_info(dev);
+		mlx5_print_health_info(dev);
 	}
 
 	fatal_error = check_fatal_sensors(dev);
@@ -439,7 +479,7 @@  static void poll_health(struct timer_list *t)
 	if (fatal_error && !health->fatal_error) {
 		mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
 		dev->priv.health.fatal_error = fatal_error;
-		print_health_info(dev);
+		mlx5_print_health_info(dev);
 		mlx5_trigger_health_work(dev);
 	}
 
@@ -497,6 +537,7 @@  void mlx5_health_cleanup(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_health *health = &dev->priv.health;
 
+	kfree(health->info_buf);
 	destroy_workqueue(health->wq);
 }
 
@@ -519,6 +560,14 @@  int mlx5_health_init(struct mlx5_core_dev *dev)
 	spin_lock_init(&health->wq_lock);
 	INIT_WORK(&health->work, health_care);
 	health->crdump = NULL;
+	health->info_buf = kmalloc(HEALTH_INFO_MAX_BUFF, GFP_KERNEL);
+	if (!health->info_buf)
+		goto err_alloc_buff;
+	mutex_init(&health->info_buf_lock);
 
 	return 0;
+
+err_alloc_buff:
+	destroy_workqueue(health->wq);
+	return -ENOMEM;
 }
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 33c977db6ceb..df8f4c4e21c6 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -444,6 +444,10 @@  struct mlx5_core_health {
 	struct work_struct		work;
 	struct delayed_work		recover_work;
 	struct mlx5_fw_crdump	       *crdump;
+	char			       *info_buf;
+	int				info_buf_len;
+	/* protect info buf access */
+	struct mutex			info_buf_lock;
 };
 
 struct mlx5_qp_table {