diff mbox series

[net-next] devlink: Execute devlink health recover as a work

Message ID 1556189823-5368-1-git-send-email-moshe@mellanox.com
State Changes Requested
Delegated to: David Miller
Headers show
Series [net-next] devlink: Execute devlink health recover as a work | expand

Commit Message

Moshe Shemesh April 25, 2019, 10:57 a.m. UTC
Different reporters have different rules in the driver and are being
created/destroyed during different stages of driver load/unload/running.
So during execution of a reporter recover the flow can go through
another reporter's destroy and create. Such flow leads to deadlock
trying to lock a mutex already held if the flow was triggered by devlink
recover command.
To avoid such deadlock, we execute the recover flow from a workqueue.
Once the recover work is done successfully the reporter health state and
recover counter are being updated.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 include/net/devlink.h |  1 +
 net/core/devlink.c    | 70 +++++++++++++++++++++++++++++++++++----------------
 2 files changed, 50 insertions(+), 21 deletions(-)

Comments

Saeed Mahameed April 25, 2019, 6:11 p.m. UTC | #1
On Thu, 2019-04-25 at 13:57 +0300, Moshe Shemesh wrote:
> Different reporters have different rules in the driver and are being
> created/destroyed during different stages of driver
> load/unload/running.
> So during execution of a reporter recover the flow can go through
> another reporter's destroy and create. Such flow leads to deadlock
> trying to lock a mutex already held if the flow was triggered by
> devlink
> recover command.
> To avoid such deadlock, we execute the recover flow from a workqueue.
> Once the recover work is done successfully the reporter health state
> and
> recover counter are being updated.
> 
> Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>

Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>

> ---
>  include/net/devlink.h |  1 +
>  net/core/devlink.c    | 70 +++++++++++++++++++++++++++++++++++----
> ------------
>  2 files changed, 50 insertions(+), 21 deletions(-)
> 
> diff --git a/include/net/devlink.h b/include/net/devlink.h
> index 4f5e416..820b327 100644
> --- a/include/net/devlink.h
> +++ b/include/net/devlink.h
> @@ -32,6 +32,7 @@ struct devlink {
>  	struct list_head region_list;
>  	u32 snapshot_id;
>  	struct list_head reporter_list;
> +	struct workqueue_struct *reporters_wq;
>  	struct devlink_dpipe_headers *dpipe_headers;
>  	const struct devlink_ops *ops;
>  	struct device *dev;
> diff --git a/net/core/devlink.c b/net/core/devlink.c
> index 7b91605..8ee380e 100644
> --- a/net/core/devlink.c
> +++ b/net/core/devlink.c
> @@ -4422,6 +4422,7 @@ struct devlink_health_reporter {
>  	u64 error_count;
>  	u64 recovery_count;
>  	u64 last_recovery_ts;
> +	struct work_struct recover_work;
>  };
>  
>  void *
> @@ -4443,6 +4444,40 @@ struct devlink_health_reporter {
>  	return NULL;
>  }
>  
> +static int
> +devlink_health_reporter_recover(struct devlink_health_reporter
> *reporter,
> +				void *priv_ctx)
> +{
> +	int err;
> +
> +	if (!reporter->ops->recover)
> +		return -EOPNOTSUPP;
> +
> +	err = reporter->ops->recover(reporter, priv_ctx);
> +	if (err)
> +		return err;
> +
> +	reporter->recovery_count++;
> +	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
> +	reporter->last_recovery_ts = jiffies;
> +
> +	trace_devlink_health_reporter_state_update(reporter->devlink,
> +						   reporter->ops->name,
> +						   reporter-
> >health_state);
> +	return 0;
> +}
> +
> +static void
> +devlink_health_reporter_recover_work(struct work_struct *work)
> +{
> +	struct devlink_health_reporter *reporter;
> +
> +	reporter = container_of(work, struct devlink_health_reporter,
> +				recover_work);
> +
> +	devlink_health_reporter_recover(reporter, NULL);
> +}
> +
>  /**
>   *	devlink_health_reporter_create - create devlink health reporter
>   *
> @@ -4483,6 +4518,8 @@ struct devlink_health_reporter *
>  	reporter->devlink = devlink;
>  	reporter->graceful_period = graceful_period;
>  	reporter->auto_recover = auto_recover;
> +	INIT_WORK(&reporter->recover_work,
> +		  devlink_health_reporter_recover_work);
>  	mutex_init(&reporter->dump_lock);
>  	list_add_tail(&reporter->list, &devlink->reporter_list);
>  unlock:
> @@ -4505,6 +4542,7 @@ struct devlink_health_reporter *
>  	mutex_unlock(&reporter->devlink->lock);
>  	if (reporter->dump_fmsg)
>  		devlink_fmsg_free(reporter->dump_fmsg);
> +	cancel_work_sync(&reporter->recover_work);
>  	kfree(reporter);
>  }
>  EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
> @@ -4526,26 +4564,6 @@ struct devlink_health_reporter *
>  }
>  EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
>  
> -static int
> -devlink_health_reporter_recover(struct devlink_health_reporter
> *reporter,
> -				void *priv_ctx)
> -{
> -	int err;
> -
> -	if (!reporter->ops->recover)
> -		return -EOPNOTSUPP;
> -
> -	err = reporter->ops->recover(reporter, priv_ctx);
> -	if (err)
> -		return err;
> -
> -	reporter->recovery_count++;
> -	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
> -	reporter->last_recovery_ts = jiffies;
> -
> -	return 0;
> -}
> -
>  static void
>  devlink_health_dump_clear(struct devlink_health_reporter *reporter)
>  {
> @@ -4813,7 +4831,11 @@ static int
> devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
>  	if (!reporter)
>  		return -EINVAL;
>  
> -	return devlink_health_reporter_recover(reporter, NULL);
> +	if (!reporter->ops->recover)
> +		return -EOPNOTSUPP;
> +
> +	queue_work(devlink->reporters_wq, &reporter->recover_work);
> +	return 0;
>  }
>  
>  static int devlink_nl_cmd_health_reporter_diagnose_doit(struct
> sk_buff *skb,
> @@ -5234,6 +5256,11 @@ struct devlink *devlink_alloc(const struct
> devlink_ops *ops, size_t priv_size)
>  	INIT_LIST_HEAD(&devlink->param_list);
>  	INIT_LIST_HEAD(&devlink->region_list);
>  	INIT_LIST_HEAD(&devlink->reporter_list);
> +	devlink->reporters_wq =
> create_singlethread_workqueue("devlink_reporters");
> +	if (!devlink->reporters_wq) {
> +		kfree(devlink);
> +		return NULL;
> +	}
>  	mutex_init(&devlink->lock);
>  	return devlink;
>  }
> @@ -5278,6 +5305,7 @@ void devlink_unregister(struct devlink
> *devlink)
>  void devlink_free(struct devlink *devlink)
>  {
>  	mutex_destroy(&devlink->lock);
> +	destroy_workqueue(devlink->reporters_wq);
>  	WARN_ON(!list_empty(&devlink->reporter_list));
>  	WARN_ON(!list_empty(&devlink->region_list));
>  	WARN_ON(!list_empty(&devlink->param_list));
Jakub Kicinski April 25, 2019, 9:38 p.m. UTC | #2
On Thu, 25 Apr 2019 13:57:03 +0300, Moshe Shemesh wrote:
> Different reporters have different rules in the driver and are being
> created/destroyed during different stages of driver load/unload/running.
> So during execution of a reporter recover the flow can go through
> another reporter's destroy and create. Such flow leads to deadlock
> trying to lock a mutex already held if the flow was triggered by devlink
> recover command.
> To avoid such deadlock, we execute the recover flow from a workqueue.
> Once the recover work is done successfully the reporter health state and
> recover counter are being updated.

Naive question, why not just run the doit unlocked?  Why the async?

> Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>

One day we really gotta start documenting the context from which 
things are called and locks called when ops are invoked.. :)

> diff --git a/net/core/devlink.c b/net/core/devlink.c
> index 7b91605..8ee380e 100644
> --- a/net/core/devlink.c
> +++ b/net/core/devlink.c
> @@ -4443,6 +4444,40 @@ struct devlink_health_reporter {
>  	return NULL;
>  }
>  
> +static int
> +devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
> +				void *priv_ctx)
> +{
> +	int err;
> +
> +	if (!reporter->ops->recover)
> +		return -EOPNOTSUPP;
> +
> +	err = reporter->ops->recover(reporter, priv_ctx);
> +	if (err)
> +		return err;
> +
> +	reporter->recovery_count++;
> +	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
> +	reporter->last_recovery_ts = jiffies;

Well, the dump looks at these without taking any locks..

> +	trace_devlink_health_reporter_state_update(reporter->devlink,
> +						   reporter->ops->name,
> +						   reporter->health_state);
> +	return 0;
> +}
> +
> +static void
> +devlink_health_reporter_recover_work(struct work_struct *work)
> +{
> +	struct devlink_health_reporter *reporter;
> +
> +	reporter = container_of(work, struct devlink_health_reporter,
> +				recover_work);
> +
> +	devlink_health_reporter_recover(reporter, NULL);
> +}
> +
>  /**
>   *	devlink_health_reporter_create - create devlink health reporter
>   *

> @@ -4483,6 +4518,8 @@ struct devlink_health_reporter *
>  	reporter->devlink = devlink;
>  	reporter->graceful_period = graceful_period;
>  	reporter->auto_recover = auto_recover;
> +	INIT_WORK(&reporter->recover_work,
> +		  devlink_health_reporter_recover_work);
>  	mutex_init(&reporter->dump_lock);
>  	list_add_tail(&reporter->list, &devlink->reporter_list);
>  unlock:
> @@ -4505,6 +4542,7 @@ struct devlink_health_reporter *
>  	mutex_unlock(&reporter->devlink->lock);
>  	if (reporter->dump_fmsg)
>  		devlink_fmsg_free(reporter->dump_fmsg);
> +	cancel_work_sync(&reporter->recover_work);
>  	kfree(reporter);
>  }
>  EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
> @@ -4526,26 +4564,6 @@ struct devlink_health_reporter *
>  }
>  EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
>  
> -static int
> -devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
> -				void *priv_ctx)
> -{
> -	int err;
> -
> -	if (!reporter->ops->recover)
> -		return -EOPNOTSUPP;
> -
> -	err = reporter->ops->recover(reporter, priv_ctx);
> -	if (err)
> -		return err;
> -
> -	reporter->recovery_count++;
> -	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
> -	reporter->last_recovery_ts = jiffies;
> -
> -	return 0;
> -}
> -
>  static void
>  devlink_health_dump_clear(struct devlink_health_reporter *reporter)
>  {
> @@ -4813,7 +4831,11 @@ static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
>  	if (!reporter)
>  		return -EINVAL;
>  
> -	return devlink_health_reporter_recover(reporter, NULL);
> +	if (!reporter->ops->recover)
> +		return -EOPNOTSUPP;
> +
> +	queue_work(devlink->reporters_wq, &reporter->recover_work);
> +	return 0;
>  }

So the recover user space request will no longer return the status, and
it will not actually wait for the recover to happen.  Leaving user
pondering - did the recover run and fail, or did it nor get run yet...

>  static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
> @@ -5234,6 +5256,11 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
>  	INIT_LIST_HEAD(&devlink->param_list);
>  	INIT_LIST_HEAD(&devlink->region_list);
>  	INIT_LIST_HEAD(&devlink->reporter_list);
> +	devlink->reporters_wq = create_singlethread_workqueue("devlink_reporters");

Why is it single threaded?

> +	if (!devlink->reporters_wq) {
> +		kfree(devlink);
> +		return NULL;
> +	}
>  	mutex_init(&devlink->lock);
>  	return devlink;
>  }
> @@ -5278,6 +5305,7 @@ void devlink_unregister(struct devlink *devlink)
>  void devlink_free(struct devlink *devlink)
>  {
>  	mutex_destroy(&devlink->lock);
> +	destroy_workqueue(devlink->reporters_wq);
>  	WARN_ON(!list_empty(&devlink->reporter_list));
>  	WARN_ON(!list_empty(&devlink->region_list));
>  	WARN_ON(!list_empty(&devlink->param_list));
Saeed Mahameed April 26, 2019, 1:42 a.m. UTC | #3
On Thu, 2019-04-25 at 14:38 -0700, Jakub Kicinski wrote:
> On Thu, 25 Apr 2019 13:57:03 +0300, Moshe Shemesh wrote:
> > Different reporters have different rules in the driver and are
> > being
> > created/destroyed during different stages of driver
> > load/unload/running.
> > So during execution of a reporter recover the flow can go through
> > another reporter's destroy and create. Such flow leads to deadlock
> > trying to lock a mutex already held if the flow was triggered by
> > devlink
> > recover command.
> > To avoid such deadlock, we execute the recover flow from a
> > workqueue.
> > Once the recover work is done successfully the reporter health
> > state and
> > recover counter are being updated.
> 
> Naive question, why not just run the doit unlocked?  Why the async?
> 
> > Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
> > Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> 
> One day we really gotta start documenting the context from which 
> things are called and locks called when ops are invoked.. :)
> 
> > diff --git a/net/core/devlink.c b/net/core/devlink.c
> > index 7b91605..8ee380e 100644
> > --- a/net/core/devlink.c
> > +++ b/net/core/devlink.c
> > @@ -4443,6 +4444,40 @@ struct devlink_health_reporter {
> >  	return NULL;
> >  }
> >  
> > +static int
> > +devlink_health_reporter_recover(struct devlink_health_reporter
> > *reporter,
> > +				void *priv_ctx)
> > +{
> > +	int err;
> > +
> > +	if (!reporter->ops->recover)
> > +		return -EOPNOTSUPP;
> > +
> > +	err = reporter->ops->recover(reporter, priv_ctx);
> > +	if (err)
> > +		return err;
> > +
> > +	reporter->recovery_count++;
> > +	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
> > +	reporter->last_recovery_ts = jiffies;
> 
> Well, the dump looks at these without taking any locks..
> 
> > +	trace_devlink_health_reporter_state_update(reporter->devlink,
> > +						   reporter->ops->name,
> > +						   reporter-
> > >health_state);
> > +	return 0;
> > +}
> > +
> > +static void
> > +devlink_health_reporter_recover_work(struct work_struct *work)
> > +{
> > +	struct devlink_health_reporter *reporter;
> > +
> > +	reporter = container_of(work, struct devlink_health_reporter,
> > +				recover_work);
> > +
> > +	devlink_health_reporter_recover(reporter, NULL);
> > +}
> > +
> >  /**
> >   *	devlink_health_reporter_create - create devlink health reporter
> >   *
> > @@ -4483,6 +4518,8 @@ struct devlink_health_reporter *
> >  	reporter->devlink = devlink;
> >  	reporter->graceful_period = graceful_period;
> >  	reporter->auto_recover = auto_recover;
> > +	INIT_WORK(&reporter->recover_work,
> > +		  devlink_health_reporter_recover_work);
> >  	mutex_init(&reporter->dump_lock);
> >  	list_add_tail(&reporter->list, &devlink->reporter_list);
> >  unlock:
> > @@ -4505,6 +4542,7 @@ struct devlink_health_reporter *
> >  	mutex_unlock(&reporter->devlink->lock);
> >  	if (reporter->dump_fmsg)
> >  		devlink_fmsg_free(reporter->dump_fmsg);
> > +	cancel_work_sync(&reporter->recover_work);
> >  	kfree(reporter);
> >  }
> >  EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
> > @@ -4526,26 +4564,6 @@ struct devlink_health_reporter *
> >  }
> >  EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
> >  
> > -static int
> > -devlink_health_reporter_recover(struct devlink_health_reporter
> > *reporter,
> > -				void *priv_ctx)
> > -{
> > -	int err;
> > -
> > -	if (!reporter->ops->recover)
> > -		return -EOPNOTSUPP;
> > -
> > -	err = reporter->ops->recover(reporter, priv_ctx);
> > -	if (err)
> > -		return err;
> > -
> > -	reporter->recovery_count++;
> > -	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
> > -	reporter->last_recovery_ts = jiffies;
> > -
> > -	return 0;
> > -}
> > -
> >  static void
> >  devlink_health_dump_clear(struct devlink_health_reporter
> > *reporter)
> >  {
> > @@ -4813,7 +4831,11 @@ static int
> > devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
> >  	if (!reporter)
> >  		return -EINVAL;
> >  
> > -	return devlink_health_reporter_recover(reporter, NULL);
> > +	if (!reporter->ops->recover)
> > +		return -EOPNOTSUPP;
> > +
> > +	queue_work(devlink->reporters_wq, &reporter->recover_work);
> > +	return 0;
> >  }
> 
> So the recover user space request will no longer return the status,
> and
> it will not actually wait for the recover to happen.  Leaving user
> pondering - did the recover run and fail, or did it nor get run
> yet...
> 

wait_for_completion_interruptible_timeout is missing from the design ?

> >  static int devlink_nl_cmd_health_reporter_diagnose_doit(struct
> > sk_buff *skb,
> > @@ -5234,6 +5256,11 @@ struct devlink *devlink_alloc(const struct
> > devlink_ops *ops, size_t priv_size)
> >  	INIT_LIST_HEAD(&devlink->param_list);
> >  	INIT_LIST_HEAD(&devlink->region_list);
> >  	INIT_LIST_HEAD(&devlink->reporter_list);
> > +	devlink->reporters_wq =
> > create_singlethread_workqueue("devlink_reporters");
> 
> Why is it single threaded?
> 
> > +	if (!devlink->reporters_wq) {
> > +		kfree(devlink);
> > +		return NULL;
> > +	}
> >  	mutex_init(&devlink->lock);
> >  	return devlink;
> >  }
> > @@ -5278,6 +5305,7 @@ void devlink_unregister(struct devlink
> > *devlink)
> >  void devlink_free(struct devlink *devlink)
> >  {
> >  	mutex_destroy(&devlink->lock);
> > +	destroy_workqueue(devlink->reporters_wq);
> >  	WARN_ON(!list_empty(&devlink->reporter_list));
> >  	WARN_ON(!list_empty(&devlink->region_list));
> >  	WARN_ON(!list_empty(&devlink->param_list));
Jakub Kicinski April 26, 2019, 2:37 a.m. UTC | #4
On Fri, 26 Apr 2019 01:42:34 +0000, Saeed Mahameed wrote:
> > > @@ -4813,7 +4831,11 @@ static int
> > > devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
> > >  	if (!reporter)
> > >  		return -EINVAL;
> > >  
> > > -	return devlink_health_reporter_recover(reporter, NULL);
> > > +	if (!reporter->ops->recover)
> > > +		return -EOPNOTSUPP;
> > > +
> > > +	queue_work(devlink->reporters_wq, &reporter->recover_work);
> > > +	return 0;
> > >  }  
> > 
> > So the recover user space request will no longer return the status,
> > and
> > it will not actually wait for the recover to happen.  Leaving user
> > pondering - did the recover run and fail, or did it nor get run
> > yet...
> >   
> 
> wait_for_completion_interruptible_timeout is missing from the design ?

Perhaps, but I think its better to avoid the async execution of 
the recover all together.  Perhaps its better to refcount the 
reporters on the call to recover_doit?  Or some such.. :)
Jiri Pirko April 26, 2019, 4:38 a.m. UTC | #5
Thu, Apr 25, 2019 at 11:38:47PM CEST, jakub.kicinski@netronome.com wrote:
>On Thu, 25 Apr 2019 13:57:03 +0300, Moshe Shemesh wrote:
>> Different reporters have different rules in the driver and are being
>> created/destroyed during different stages of driver load/unload/running.
>> So during execution of a reporter recover the flow can go through
>> another reporter's destroy and create. Such flow leads to deadlock
>> trying to lock a mutex already held if the flow was triggered by devlink
>> recover command.
>> To avoid such deadlock, we execute the recover flow from a workqueue.
>> Once the recover work is done successfully the reporter health state and
>> recover counter are being updated.
>
>Naive question, why not just run the doit unlocked?  Why the async?

Hmm, you have a point here. That would be probably doable.
Moshe Shemesh April 26, 2019, 1:04 p.m. UTC | #6
On 4/26/2019 5:37 AM, Jakub Kicinski wrote:
> On Fri, 26 Apr 2019 01:42:34 +0000, Saeed Mahameed wrote:
>>>> @@ -4813,7 +4831,11 @@ static int
>>>> devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
>>>>   	if (!reporter)
>>>>   		return -EINVAL;
>>>>   
>>>> -	return devlink_health_reporter_recover(reporter, NULL);
>>>> +	if (!reporter->ops->recover)
>>>> +		return -EOPNOTSUPP;
>>>> +
>>>> +	queue_work(devlink->reporters_wq, &reporter->recover_work);
>>>> +	return 0;
>>>>   }
>>>
>>> So the recover user space request will no longer return the status,
>>> and
>>> it will not actually wait for the recover to happen.  Leaving user
>>> pondering - did the recover run and fail, or did it nor get run
>>> yet...
>>>    
>>
>> wait_for_completion_interruptible_timeout is missing from the design ?
> 
> Perhaps, but I think its better to avoid the async execution of
> the recover all together.  Perhaps its better to refcount the
> reporters on the call to recover_doit?  Or some such.. :)
> 

I tried using refcount instead of devlink lock here. But once I get to 
reporter destroy I wait for the refcount and not sure if I should 
release the reporter after some timeout or have endless wait for 
refcount. Both options seem not good.
Jakub Kicinski April 26, 2019, 4:19 p.m. UTC | #7
On Fri, Apr 26, 2019 at 6:04 AM Moshe Shemesh <moshe@mellanox.com> wrote:
> On 4/26/2019 5:37 AM, Jakub Kicinski wrote:
> > On Fri, 26 Apr 2019 01:42:34 +0000, Saeed Mahameed wrote:
> >>>> @@ -4813,7 +4831,11 @@ static int
> >>>> devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
> >>>>    if (!reporter)
> >>>>            return -EINVAL;
> >>>>
> >>>> -  return devlink_health_reporter_recover(reporter, NULL);
> >>>> +  if (!reporter->ops->recover)
> >>>> +          return -EOPNOTSUPP;
> >>>> +
> >>>> +  queue_work(devlink->reporters_wq, &reporter->recover_work);
> >>>> +  return 0;
> >>>>   }
> >>>
> >>> So the recover user space request will no longer return the status,
> >>> and
> >>> it will not actually wait for the recover to happen.  Leaving user
> >>> pondering - did the recover run and fail, or did it nor get run
> >>> yet...
> >>>
> >>
> >> wait_for_completion_interruptible_timeout is missing from the design ?
> >
> > Perhaps, but I think its better to avoid the async execution of
> > the recover all together.  Perhaps its better to refcount the
> > reporters on the call to recover_doit?  Or some such.. :)
> >
>
> I tried using refcount instead of devlink lock here. But once I get to
> reporter destroy I wait for the refcount and not sure if I should
> release the reporter after some timeout or have endless wait for
> refcount. Both options seem not good.

Well you should "endlessly" wait.  Why would the refcount not drop,
you have to remove it from the list first, so no new operations can
start, right?
In principle there is no difference between waiting for refcount to
drop, flushing the work, or waiting for the devlink lock if reporter
holds it?
Moshe Shemesh April 27, 2019, 5:15 p.m. UTC | #8
On 4/26/2019 7:19 PM, Jakub Kicinski wrote:
> On Fri, Apr 26, 2019 at 6:04 AM Moshe Shemesh <moshe@mellanox.com> wrote:
>> On 4/26/2019 5:37 AM, Jakub Kicinski wrote:
>>> On Fri, 26 Apr 2019 01:42:34 +0000, Saeed Mahameed wrote:
>>>>>> @@ -4813,7 +4831,11 @@ static int
>>>>>> devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
>>>>>>     if (!reporter)
>>>>>>             return -EINVAL;
>>>>>>
>>>>>> -  return devlink_health_reporter_recover(reporter, NULL);
>>>>>> +  if (!reporter->ops->recover)
>>>>>> +          return -EOPNOTSUPP;
>>>>>> +
>>>>>> +  queue_work(devlink->reporters_wq, &reporter->recover_work);
>>>>>> +  return 0;
>>>>>>    }
>>>>>
>>>>> So the recover user space request will no longer return the status,
>>>>> and
>>>>> it will not actually wait for the recover to happen.  Leaving user
>>>>> pondering - did the recover run and fail, or did it nor get run
>>>>> yet...
>>>>>
>>>>
>>>> wait_for_completion_interruptible_timeout is missing from the design ?
>>>
>>> Perhaps, but I think its better to avoid the async execution of
>>> the recover all together.  Perhaps its better to refcount the
>>> reporters on the call to recover_doit?  Or some such.. :)
>>>
>>
>> I tried using refcount instead of devlink lock here. But once I get to
>> reporter destroy I wait for the refcount and not sure if I should
>> release the reporter after some timeout or have endless wait for
>> refcount. Both options seem not good.
> 
> Well you should "endlessly" wait.  Why would the refcount not drop,
> you have to remove it from the list first, so no new operations can
> start, right?
> In principle there is no difference between waiting for refcount to
> drop, flushing the work, or waiting for the devlink lock if reporter
> holds it?
> 
Makes sense, I will rewrite this patch, thanks.
diff mbox series

Patch

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 4f5e416..820b327 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -32,6 +32,7 @@  struct devlink {
 	struct list_head region_list;
 	u32 snapshot_id;
 	struct list_head reporter_list;
+	struct workqueue_struct *reporters_wq;
 	struct devlink_dpipe_headers *dpipe_headers;
 	const struct devlink_ops *ops;
 	struct device *dev;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 7b91605..8ee380e 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4422,6 +4422,7 @@  struct devlink_health_reporter {
 	u64 error_count;
 	u64 recovery_count;
 	u64 last_recovery_ts;
+	struct work_struct recover_work;
 };
 
 void *
@@ -4443,6 +4444,40 @@  struct devlink_health_reporter {
 	return NULL;
 }
 
+static int
+devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
+				void *priv_ctx)
+{
+	int err;
+
+	if (!reporter->ops->recover)
+		return -EOPNOTSUPP;
+
+	err = reporter->ops->recover(reporter, priv_ctx);
+	if (err)
+		return err;
+
+	reporter->recovery_count++;
+	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
+	reporter->last_recovery_ts = jiffies;
+
+	trace_devlink_health_reporter_state_update(reporter->devlink,
+						   reporter->ops->name,
+						   reporter->health_state);
+	return 0;
+}
+
+static void
+devlink_health_reporter_recover_work(struct work_struct *work)
+{
+	struct devlink_health_reporter *reporter;
+
+	reporter = container_of(work, struct devlink_health_reporter,
+				recover_work);
+
+	devlink_health_reporter_recover(reporter, NULL);
+}
+
 /**
  *	devlink_health_reporter_create - create devlink health reporter
  *
@@ -4483,6 +4518,8 @@  struct devlink_health_reporter *
 	reporter->devlink = devlink;
 	reporter->graceful_period = graceful_period;
 	reporter->auto_recover = auto_recover;
+	INIT_WORK(&reporter->recover_work,
+		  devlink_health_reporter_recover_work);
 	mutex_init(&reporter->dump_lock);
 	list_add_tail(&reporter->list, &devlink->reporter_list);
 unlock:
@@ -4505,6 +4542,7 @@  struct devlink_health_reporter *
 	mutex_unlock(&reporter->devlink->lock);
 	if (reporter->dump_fmsg)
 		devlink_fmsg_free(reporter->dump_fmsg);
+	cancel_work_sync(&reporter->recover_work);
 	kfree(reporter);
 }
 EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
@@ -4526,26 +4564,6 @@  struct devlink_health_reporter *
 }
 EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
 
-static int
-devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
-				void *priv_ctx)
-{
-	int err;
-
-	if (!reporter->ops->recover)
-		return -EOPNOTSUPP;
-
-	err = reporter->ops->recover(reporter, priv_ctx);
-	if (err)
-		return err;
-
-	reporter->recovery_count++;
-	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
-	reporter->last_recovery_ts = jiffies;
-
-	return 0;
-}
-
 static void
 devlink_health_dump_clear(struct devlink_health_reporter *reporter)
 {
@@ -4813,7 +4831,11 @@  static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
 	if (!reporter)
 		return -EINVAL;
 
-	return devlink_health_reporter_recover(reporter, NULL);
+	if (!reporter->ops->recover)
+		return -EOPNOTSUPP;
+
+	queue_work(devlink->reporters_wq, &reporter->recover_work);
+	return 0;
 }
 
 static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
@@ -5234,6 +5256,11 @@  struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
 	INIT_LIST_HEAD(&devlink->param_list);
 	INIT_LIST_HEAD(&devlink->region_list);
 	INIT_LIST_HEAD(&devlink->reporter_list);
+	devlink->reporters_wq = create_singlethread_workqueue("devlink_reporters");
+	if (!devlink->reporters_wq) {
+		kfree(devlink);
+		return NULL;
+	}
 	mutex_init(&devlink->lock);
 	return devlink;
 }
@@ -5278,6 +5305,7 @@  void devlink_unregister(struct devlink *devlink)
 void devlink_free(struct devlink *devlink)
 {
 	mutex_destroy(&devlink->lock);
+	destroy_workqueue(devlink->reporters_wq);
 	WARN_ON(!list_empty(&devlink->reporter_list));
 	WARN_ON(!list_empty(&devlink->region_list));
 	WARN_ON(!list_empty(&devlink->param_list));