diff mbox series

[net-next,v2,2/4] devlink: propagate extack down to health reporter ops

Message ID 20191010131851.21438-3-jiri@resnulli.us
State Accepted
Delegated to: David Miller
Headers show
Series [net-next,v2,1/4] devlink: don't do reporter recovery if the state is healthy | expand

Commit Message

Jiri Pirko Oct. 10, 2019, 1:18 p.m. UTC
From: Jiri Pirko <jiri@mellanox.com>

During health reporter operations, driver might want to fill-up
the extack message, so propagate extack down to the health reporter ops.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 .../net/ethernet/broadcom/bnxt/bnxt_devlink.c |  9 ++++++---
 .../mellanox/mlx5/core/en/reporter_rx.c       |  6 ++++--
 .../mellanox/mlx5/core/en/reporter_tx.c       |  6 ++++--
 .../net/ethernet/mellanox/mlx5/core/health.c  | 12 +++++++----
 include/net/devlink.h                         |  9 ++++++---
 net/core/devlink.c                            | 20 ++++++++++---------
 6 files changed, 39 insertions(+), 23 deletions(-)

Comments

Jakub Kicinski Oct. 11, 2019, 2:04 a.m. UTC | #1
On Thu, 10 Oct 2019 15:18:49 +0200, Jiri Pirko wrote:
> From: Jiri Pirko <jiri@mellanox.com>
> 
> During health reporter operations, driver might want to fill-up
> the extack message, so propagate extack down to the health reporter ops.
> 
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>

> @@ -507,11 +507,14 @@ enum devlink_health_reporter_state {
>  struct devlink_health_reporter_ops {
>  	char *name;
>  	int (*recover)(struct devlink_health_reporter *reporter,
> -		       void *priv_ctx);
> +		       void *priv_ctx, struct netlink_ext_ack *extack);
>  	int (*dump)(struct devlink_health_reporter *reporter,
> -		    struct devlink_fmsg *fmsg, void *priv_ctx);
> +		    struct devlink_fmsg *fmsg, void *priv_ctx,
> +		    struct netlink_ext_ack *extack);
>  	int (*diagnose)(struct devlink_health_reporter *reporter,
> -			struct devlink_fmsg *fmsg);
> +			struct devlink_fmsg *fmsg,
> +			struct netlink_ext_ack *extack);
> +

nit: Looks like an extra new line snuck in here?

>  };
>  
>  /**

> @@ -4946,11 +4947,12 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
>  
>  	mutex_lock(&reporter->dump_lock);
>  	/* store current dump of current error, for later analysis */
> -	devlink_health_do_dump(reporter, priv_ctx);
> +	devlink_health_do_dump(reporter, priv_ctx, NULL);
>  	mutex_unlock(&reporter->dump_lock);
>  
>  	if (reporter->auto_recover)
> -		return devlink_health_reporter_recover(reporter, priv_ctx);
> +		return devlink_health_reporter_recover(reporter,
> +						       priv_ctx, NULL);
>  
>  	return 0;
>  }

Thinking about this again - would it be entirely insane to allocate the
extack on the stack here? And if anything gets set output into the logs?

For context the situation here is that the health API can be poked from
user space, but also the recovery actions are triggered automatically
when failure is detected, if so configured (usually we expect them to
be).

When we were adding the extack helper for the drivers to use Johannes
was concerned about printing to logs because that gave us a
disincentive to convert all locations, and people could get surprised
by the logs disappearing when more places are converted to extack [1].

I wonder if this is a special case where outputting to the logs is a
good idea? Really for all auto-recoverable health reporters the extack
argument will just confuse driver authors. If driver uses extack here
instead of printing to the logs information why auto-recovery failed is
likely to get lost.

Am I over-thinking this?

[1] https://www.spinics.net/lists/netdev/msg431998.html
Jiri Pirko Oct. 11, 2019, 6:13 a.m. UTC | #2
Fri, Oct 11, 2019 at 04:04:29AM CEST, jakub.kicinski@netronome.com wrote:
>On Thu, 10 Oct 2019 15:18:49 +0200, Jiri Pirko wrote:
>> From: Jiri Pirko <jiri@mellanox.com>
>> 
>> During health reporter operations, driver might want to fill-up
>> the extack message, so propagate extack down to the health reporter ops.
>> 
>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>
>> @@ -507,11 +507,14 @@ enum devlink_health_reporter_state {
>>  struct devlink_health_reporter_ops {
>>  	char *name;
>>  	int (*recover)(struct devlink_health_reporter *reporter,
>> -		       void *priv_ctx);
>> +		       void *priv_ctx, struct netlink_ext_ack *extack);
>>  	int (*dump)(struct devlink_health_reporter *reporter,
>> -		    struct devlink_fmsg *fmsg, void *priv_ctx);
>> +		    struct devlink_fmsg *fmsg, void *priv_ctx,
>> +		    struct netlink_ext_ack *extack);
>>  	int (*diagnose)(struct devlink_health_reporter *reporter,
>> -			struct devlink_fmsg *fmsg);
>> +			struct devlink_fmsg *fmsg,
>> +			struct netlink_ext_ack *extack);
>> +
>
>nit: Looks like an extra new line snuck in here?
>
>>  };
>>  
>>  /**
>
>> @@ -4946,11 +4947,12 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
>>  
>>  	mutex_lock(&reporter->dump_lock);
>>  	/* store current dump of current error, for later analysis */
>> -	devlink_health_do_dump(reporter, priv_ctx);
>> +	devlink_health_do_dump(reporter, priv_ctx, NULL);
>>  	mutex_unlock(&reporter->dump_lock);
>>  
>>  	if (reporter->auto_recover)
>> -		return devlink_health_reporter_recover(reporter, priv_ctx);
>> +		return devlink_health_reporter_recover(reporter,
>> +						       priv_ctx, NULL);
>>  
>>  	return 0;
>>  }
>
>Thinking about this again - would it be entirely insane to allocate the
>extack on the stack here? And if anything gets set output into the logs?
>
>For context the situation here is that the health API can be poked from
>user space, but also the recovery actions are triggered automatically
>when failure is detected, if so configured (usually we expect them to
>be).
>
>When we were adding the extack helper for the drivers to use Johannes
>was concerned about printing to logs because that gave us a
>disincentive to convert all locations, and people could get surprised
>by the logs disappearing when more places are converted to extack [1].
>
>I wonder if this is a special case where outputting to the logs is a
>good idea? Really for all auto-recoverable health reporters the extack
>argument will just confuse driver authors. If driver uses extack here
>instead of printing to the logs information why auto-recovery failed is
>likely to get lost.
>
>Am I over-thinking this?

My gut feeling is kidn of odd about allocating some netlink specific
struct and use it separatelly from netlink :/

In any case, I think that this should probably be a separate patch/set.

>
>[1] https://www.spinics.net/lists/netdev/msg431998.html
Johannes Berg Oct. 11, 2019, 6:45 a.m. UTC | #3
On Thu, 2019-10-10 at 19:04 -0700, Jakub Kicinski wrote:

> >  	if (reporter->auto_recover)
> > -		return devlink_health_reporter_recover(reporter, priv_ctx);
> > +		return devlink_health_reporter_recover(reporter,
> > +						       priv_ctx, NULL);
> >  
> >  	return 0;
> >  }
> 
> Thinking about this again - would it be entirely insane to allocate the
> extack on the stack here? And if anything gets set output into the logs?

I think that's fine.

> For context the situation here is that the health API can be poked from
> user space, but also the recovery actions are triggered automatically
> when failure is detected, if so configured (usually we expect them to
> be).

Right. We have similar situations in the wireless stack, and we usually
just get very noisy & WARN. But then it's a level lower, so you don't
have any extack around there :)

> When we were adding the extack helper for the drivers to use Johannes
> was concerned about printing to logs because that gave us a
> disincentive to convert all locations, and people could get surprised
> by the logs disappearing when more places are converted to extack [1].

Yes, but that argument doesn't apply here. The argument was around code
like

> +#define NL_SET_ERR_MSG(extack, msg) do {		\
> +	struct netlink_ext_ack *_extack = (extack);	\
> +	static const char _msg[] = (msg);		\
> +							\
> +	if (_extack)					\
> +		_extack->_msg = _msg;			\
> +	else						\
> +		pr_info("%s\n", _msg);			\
>  } while (0)

(which I quoted in the message you linked).

I still stand by my comment there, I think it'd be a bad idea to do
this, because it'd mean that some random code using NL_SET_ERR_MSG()
might print something to the log if it's called in one way, and perhaps
the fact that it's getting NULL there is due to some higher level call
chain a few steps up "losing" the extack (or not having one), but then
if that's fixed suddenly all the messages disappear.

In the case you're proposing it's entirely different - you're proposing
that a non-netlink caller still supplies an extack that can be filled,
and then prints it out by itself. There's no reason to believe that
would be changed to suddenly have a real netlink extack that *doesn't*
get printed - that wouldn't make sense since you're doing this precisely
because you're *not* inside a netlink call.

Now, if you were saying "let's have a netlink handler that prints the
extack because a few levels up in the callstack we aren't passing the
parameter down yet" I'd still oppose it on similar grounds - that's
something we'd want to fix later to actually report to userspace - but
here there's no chance of that.

So to me this looks fine.

I don't really share the concern about extack being netlink specific and
then using it here - it ultimately doesn't matter whether this thing is
called "netlink_extack" or "extended_error_reporting", IMHO.

I guess we could wrap this so we have

struct devlink_error_report {
  /* pretty much identical content to extack? */
};

and then in this path just print it, while in the actual netlink paths
convert it to extack a level up. That might be the right thing from an
abstraction POV (if we were designing this in Enterprise Architect ;-) )
but pragmatically I'd say it doesn't really matter what the struct is
called, and extack already has helper macros etc.

johannes
David Ahern Oct. 11, 2019, 11:10 p.m. UTC | #4
On 10/11/19 12:45 AM, Johannes Berg wrote:
> So to me this looks fine.
> 
> I don't really share the concern about extack being netlink specific and
> then using it here - it ultimately doesn't matter whether this thing is
> called "netlink_extack" or "extended_error_reporting", IMHO.

+1
diff mbox series

Patch

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index e664392dccc0..ff1bc0ec2e7c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -16,7 +16,8 @@ 
 #include "bnxt_devlink.h"
 
 static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
-				     struct devlink_fmsg *fmsg)
+				     struct devlink_fmsg *fmsg,
+				     struct netlink_ext_ack *extack)
 {
 	struct bnxt *bp = devlink_health_reporter_priv(reporter);
 	struct bnxt_fw_health *health = bp->fw_health;
@@ -66,7 +67,8 @@  static const struct devlink_health_reporter_ops bnxt_dl_fw_reporter_ops = {
 };
 
 static int bnxt_fw_reset_recover(struct devlink_health_reporter *reporter,
-				 void *priv_ctx)
+				 void *priv_ctx,
+				 struct netlink_ext_ack *extack)
 {
 	struct bnxt *bp = devlink_health_reporter_priv(reporter);
 
@@ -84,7 +86,8 @@  struct devlink_health_reporter_ops bnxt_dl_fw_reset_reporter_ops = {
 };
 
 static int bnxt_fw_fatal_recover(struct devlink_health_reporter *reporter,
-				 void *priv_ctx)
+				 void *priv_ctx,
+				 struct netlink_ext_ack *extack)
 {
 	struct bnxt *bp = devlink_health_reporter_priv(reporter);
 	struct bnxt_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index b860569d4247..6c72b592315b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -222,7 +222,8 @@  static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
 }
 
 static int mlx5e_rx_reporter_recover(struct devlink_health_reporter *reporter,
-				     void *context)
+				     void *context,
+				     struct netlink_ext_ack *extack)
 {
 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
 	struct mlx5e_err_ctx *err_ctx = context;
@@ -301,7 +302,8 @@  static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq,
 }
 
 static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter,
-				      struct devlink_fmsg *fmsg)
+				      struct devlink_fmsg *fmsg,
+				      struct netlink_ext_ack *extack)
 {
 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
 	struct mlx5e_params *params = &priv->channels.params;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index bfed558637c2..b468549e96ff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -135,7 +135,8 @@  static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
 }
 
 static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
-				     void *context)
+				     void *context,
+				     struct netlink_ext_ack *extack)
 {
 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
 	struct mlx5e_err_ctx *err_ctx = context;
@@ -205,7 +206,8 @@  mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
 }
 
 static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
-				      struct devlink_fmsg *fmsg)
+				      struct devlink_fmsg *fmsg,
+				      struct netlink_ext_ack *extack)
 {
 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
 	struct mlx5e_txqsq *generic_sq = priv->txq2sq[0];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index d685122d9ff7..be3c3c704bfc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -390,7 +390,8 @@  static void print_health_info(struct mlx5_core_dev *dev)
 
 static int
 mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
-			  struct devlink_fmsg *fmsg)
+			  struct devlink_fmsg *fmsg,
+			  struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
 	struct mlx5_core_health *health = &dev->priv.health;
@@ -491,7 +492,8 @@  mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev,
 
 static int
 mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter,
-		      struct devlink_fmsg *fmsg, void *priv_ctx)
+		      struct devlink_fmsg *fmsg, void *priv_ctx,
+		      struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
 	int err;
@@ -545,7 +547,8 @@  static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
 
 static int
 mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
-			       void *priv_ctx)
+			       void *priv_ctx,
+			       struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
 
@@ -555,7 +558,8 @@  mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
 #define MLX5_CR_DUMP_CHUNK_SIZE 256
 static int
 mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter,
-			    struct devlink_fmsg *fmsg, void *priv_ctx)
+			    struct devlink_fmsg *fmsg, void *priv_ctx,
+			    struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
 	u32 crdump_size = dev->priv.health.crdump_size;
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 4095657fc23f..d35a1be107b5 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -507,11 +507,14 @@  enum devlink_health_reporter_state {
 struct devlink_health_reporter_ops {
 	char *name;
 	int (*recover)(struct devlink_health_reporter *reporter,
-		       void *priv_ctx);
+		       void *priv_ctx, struct netlink_ext_ack *extack);
 	int (*dump)(struct devlink_health_reporter *reporter,
-		    struct devlink_fmsg *fmsg, void *priv_ctx);
+		    struct devlink_fmsg *fmsg, void *priv_ctx,
+		    struct netlink_ext_ack *extack);
 	int (*diagnose)(struct devlink_health_reporter *reporter,
-			struct devlink_fmsg *fmsg);
+			struct devlink_fmsg *fmsg,
+			struct netlink_ext_ack *extack);
+
 };
 
 /**
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 95887462eecf..97e9a2246929 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4847,7 +4847,7 @@  EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
 
 static int
 devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
-				void *priv_ctx)
+				void *priv_ctx, struct netlink_ext_ack *extack)
 {
 	int err;
 
@@ -4857,7 +4857,7 @@  devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
 	if (!reporter->ops->recover)
 		return -EOPNOTSUPP;
 
-	err = reporter->ops->recover(reporter, priv_ctx);
+	err = reporter->ops->recover(reporter, priv_ctx, extack);
 	if (err)
 		return err;
 
@@ -4878,7 +4878,8 @@  devlink_health_dump_clear(struct devlink_health_reporter *reporter)
 }
 
 static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
-				  void *priv_ctx)
+				  void *priv_ctx,
+				  struct netlink_ext_ack *extack)
 {
 	int err;
 
@@ -4899,7 +4900,7 @@  static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
 		goto dump_err;
 
 	err = reporter->ops->dump(reporter, reporter->dump_fmsg,
-				  priv_ctx);
+				  priv_ctx, extack);
 	if (err)
 		goto dump_err;
 
@@ -4946,11 +4947,12 @@  int devlink_health_report(struct devlink_health_reporter *reporter,
 
 	mutex_lock(&reporter->dump_lock);
 	/* store current dump of current error, for later analysis */
-	devlink_health_do_dump(reporter, priv_ctx);
+	devlink_health_do_dump(reporter, priv_ctx, NULL);
 	mutex_unlock(&reporter->dump_lock);
 
 	if (reporter->auto_recover)
-		return devlink_health_reporter_recover(reporter, priv_ctx);
+		return devlink_health_reporter_recover(reporter,
+						       priv_ctx, NULL);
 
 	return 0;
 }
@@ -5188,7 +5190,7 @@  static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
 	if (!reporter)
 		return -EINVAL;
 
-	err = devlink_health_reporter_recover(reporter, NULL);
+	err = devlink_health_reporter_recover(reporter, NULL, info->extack);
 
 	devlink_health_reporter_put(reporter);
 	return err;
@@ -5221,7 +5223,7 @@  static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
 	if (err)
 		goto out;
 
-	err = reporter->ops->diagnose(reporter, fmsg);
+	err = reporter->ops->diagnose(reporter, fmsg, info->extack);
 	if (err)
 		goto out;
 
@@ -5256,7 +5258,7 @@  devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb,
 	}
 	mutex_lock(&reporter->dump_lock);
 	if (!start) {
-		err = devlink_health_do_dump(reporter, NULL);
+		err = devlink_health_do_dump(reporter, NULL, cb->extack);
 		if (err)
 			goto unlock;
 		cb->args[1] = reporter->dump_ts;