diff mbox series

[v4,net-next,03/11] devlink: Add health report functionality

Message ID 1549532202-943-4-git-send-email-eranbe@mellanox.com
State Accepted
Delegated to: David Miller
Headers show
Series Devlink health reporting and recovery system | expand

Commit Message

Eran Ben Elisha Feb. 7, 2019, 9:36 a.m. UTC
Upon error discover, every driver can report it to the devlink health
mechanism via devlink_health_report function, using the appropriate
reporter registered to it. Driver can pass error specific context which
will be delivered to it as part of the dump / recovery callbacks.

Once an error is reported, devlink health will do the following actions:
* A log is being send to the kernel trace events buffer
* Health status and statistics are being updated for the reporter instance
* Object dump is being taken and stored at the reporter instance (as long
  as there is no other dump which is already stored)
* Auto recovery attempt is being done. Depends on:
  - Auto Recovery configuration
  - Grace period vs. Time since last recover

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
---
 include/net/devlink.h          |   9 +++
 include/trace/events/devlink.h |  65 ++++++++++++++++++
 net/core/devlink.c             | 119 +++++++++++++++++++++++++++++++++
 3 files changed, 193 insertions(+)

Comments

Jiri Pirko Feb. 7, 2019, 9:38 a.m. UTC | #1
Thu, Feb 07, 2019 at 10:36:34AM CET, eranbe@mellanox.com wrote:
>Upon error discover, every driver can report it to the devlink health
>mechanism via devlink_health_report function, using the appropriate
>reporter registered to it. Driver can pass error specific context which
>will be delivered to it as part of the dump / recovery callbacks.
>
>Once an error is reported, devlink health will do the following actions:
>* A log is being send to the kernel trace events buffer
>* Health status and statistics are being updated for the reporter instance
>* Object dump is being taken and stored at the reporter instance (as long
>  as there is no other dump which is already stored)
>* Auto recovery attempt is being done. Depends on:
>  - Auto Recovery configuration
>  - Grace period vs. Time since last recover
>
>Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
>Reviewed-by: Moshe Shemesh <moshe@mellanox.com>

Acked-by: Jiri Pirko <jiri@mellanox.com>
diff mbox series

Patch

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 3dfe30235878..c12ad6e9095d 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -704,6 +704,8 @@  devlink_health_reporter_destroy(struct devlink_health_reporter *reporter);
 
 void *
 devlink_health_reporter_priv(struct devlink_health_reporter *reporter);
+int devlink_health_report(struct devlink_health_reporter *reporter,
+			  const char *msg, void *priv_ctx);
 
 #else
 
@@ -1173,6 +1175,13 @@  devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
 {
 	return NULL;
 }
+
+static inline int
+devlink_health_report(struct devlink_health_reporter *reporter,
+		      const char *msg, void *priv_ctx)
+{
+	return 0;
+}
 #endif
 
 #if IS_REACHABLE(CONFIG_NET_DEVLINK)
diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h
index 40705364a50f..191ddf67d769 100644
--- a/include/trace/events/devlink.h
+++ b/include/trace/events/devlink.h
@@ -75,6 +75,71 @@  TRACE_EVENT(devlink_hwerr,
 			__get_str(driver_name), __entry->err, __get_str(msg))
 );
 
+/*
+ * Tracepoint for devlink health message:
+ */
+TRACE_EVENT(devlink_health_report,
+	TP_PROTO(const struct devlink *devlink, const char *reporter_name,
+		 const char *msg),
+
+	TP_ARGS(devlink, reporter_name, msg),
+
+	TP_STRUCT__entry(
+		__string(bus_name, devlink->dev->bus->name)
+		__string(dev_name, dev_name(devlink->dev))
+		__string(driver_name, devlink->dev->driver->name)
+		__string(reporter_name, msg)
+		__string(msg, msg)
+	),
+
+	TP_fast_assign(
+		__assign_str(bus_name, devlink->dev->bus->name);
+		__assign_str(dev_name, dev_name(devlink->dev));
+		__assign_str(driver_name, devlink->dev->driver->name);
+		__assign_str(reporter_name, reporter_name);
+		__assign_str(msg, msg);
+	),
+
+	TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: %s",
+		  __get_str(bus_name), __get_str(dev_name),
+		  __get_str(driver_name), __get_str(reporter_name),
+		  __get_str(msg))
+);
+
+/*
+ * Tracepoint for devlink health recover aborted message:
+ */
+TRACE_EVENT(devlink_health_recover_aborted,
+	TP_PROTO(const struct devlink *devlink, const char *reporter_name,
+		 bool health_state, u64 time_since_last_recover),
+
+	TP_ARGS(devlink, reporter_name, health_state, time_since_last_recover),
+
+	TP_STRUCT__entry(
+		__string(bus_name, devlink->dev->bus->name)
+		__string(dev_name, dev_name(devlink->dev))
+		__string(driver_name, devlink->dev->driver->name)
+		__string(reporter_name, reporter_name)
+		__field(bool, health_state)
+		__field(u64, time_since_last_recover)
+	),
+
+	TP_fast_assign(
+		__assign_str(bus_name, devlink->dev->bus->name);
+		__assign_str(dev_name, dev_name(devlink->dev));
+		__assign_str(driver_name, devlink->dev->driver->name);
+		__assign_str(reporter_name, reporter_name);
+		__entry->health_state = health_state;
+		__entry->time_since_last_recover = time_since_last_recover;
+	),
+
+	TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: health_state=%d time_since_last_recover=%llu recover aborted",
+		  __get_str(bus_name), __get_str(dev_name),
+		  __get_str(driver_name), __get_str(reporter_name),
+		  __entry->health_state,
+		  __entry->time_since_last_recover)
+);
+
 #endif /* _TRACE_DEVLINK_H */
 
 /* This part must be outside protection */
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 341548d7f1f1..3eaa290831aa 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4367,9 +4367,20 @@  struct devlink_health_reporter {
 	void *priv;
 	const struct devlink_health_reporter_ops *ops;
 	struct devlink *devlink;
+	struct devlink_fmsg *dump_fmsg;
+	struct mutex dump_lock; /* lock parallel read/write from dump buffers */
 	u64 graceful_period;
 	bool auto_recover;
 	u8 health_state;
+	u64 dump_ts;
+	u64 error_count;
+	u64 recovery_count;
+	u64 last_recovery_ts;
+};
+
+enum devlink_health_reporter_state {
+	DEVLINK_HEALTH_REPORTER_STATE_HEALTHY,
+	DEVLINK_HEALTH_REPORTER_STATE_ERROR,
 };
 
 void *
@@ -4431,6 +4442,7 @@  devlink_health_reporter_create(struct devlink *devlink,
 	reporter->devlink = devlink;
 	reporter->graceful_period = graceful_period;
 	reporter->auto_recover = auto_recover;
+	mutex_init(&reporter->dump_lock);
 	list_add_tail(&reporter->list, &devlink->reporter_list);
 unlock:
 	mutex_unlock(&devlink->lock);
@@ -4449,10 +4461,117 @@  devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
 	mutex_lock(&reporter->devlink->lock);
 	list_del(&reporter->list);
 	mutex_unlock(&reporter->devlink->lock);
+	if (reporter->dump_fmsg)
+		devlink_fmsg_free(reporter->dump_fmsg);
 	kfree(reporter);
 }
 EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
 
+static int
+devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
+				void *priv_ctx)
+{
+	int err;
+
+	if (!reporter->ops->recover)
+		return -EOPNOTSUPP;
+
+	err = reporter->ops->recover(reporter, priv_ctx);
+	if (err)
+		return err;
+
+	reporter->recovery_count++;
+	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
+	reporter->last_recovery_ts = jiffies;
+
+	return 0;
+}
+
+static void
+devlink_health_dump_clear(struct devlink_health_reporter *reporter)
+{
+	if (!reporter->dump_fmsg)
+		return;
+	devlink_fmsg_free(reporter->dump_fmsg);
+	reporter->dump_fmsg = NULL;
+}
+
+static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
+				  void *priv_ctx)
+{
+	int err;
+
+	if (!reporter->ops->dump)
+		return 0;
+
+	if (reporter->dump_fmsg)
+		return 0;
+
+	reporter->dump_fmsg = devlink_fmsg_alloc();
+	if (!reporter->dump_fmsg) {
+		err = -ENOMEM;
+		return err;
+	}
+
+	err = devlink_fmsg_obj_nest_start(reporter->dump_fmsg);
+	if (err)
+		goto dump_err;
+
+	err = reporter->ops->dump(reporter, reporter->dump_fmsg,
+				  priv_ctx);
+	if (err)
+		goto dump_err;
+
+	err = devlink_fmsg_obj_nest_end(reporter->dump_fmsg);
+	if (err)
+		goto dump_err;
+
+	reporter->dump_ts = jiffies;
+
+	return 0;
+
+dump_err:
+	devlink_health_dump_clear(reporter);
+	return err;
+}
+
+int devlink_health_report(struct devlink_health_reporter *reporter,
+			  const char *msg, void *priv_ctx)
+{
+	struct devlink *devlink = reporter->devlink;
+
+	/* write a log message of the current error */
+	WARN_ON(!msg);
+	trace_devlink_health_report(devlink, reporter->ops->name, msg);
+	reporter->error_count++;
+
+	/* abort if the previous error wasn't recovered */
+	if (reporter->auto_recover &&
+	    (reporter->health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY ||
+	     jiffies - reporter->last_recovery_ts <
+	     msecs_to_jiffies(reporter->graceful_period))) {
+		trace_devlink_health_recover_aborted(devlink,
+						     reporter->ops->name,
+						     reporter->health_state,
+						     jiffies -
+						     reporter->last_recovery_ts);
+		return -ECANCELED;
+	}
+
+	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
+
+	mutex_lock(&reporter->dump_lock);
+	/* store current dump of current error, for later analysis */
+	devlink_health_do_dump(reporter, priv_ctx);
+	mutex_unlock(&reporter->dump_lock);
+
+	if (reporter->auto_recover)
+		return devlink_health_reporter_recover(reporter, priv_ctx);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_health_report);
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },