diff mbox series

[linux,dev-5.10,27/35] net/ncsi: Avoid channel_monitor hrtimer deadlock

Message ID 20210308225419.46530-28-eajames@linux.ibm.com
State New
Headers show
Series Rainier and Everest system updates | expand

Commit Message

Eddie James March 8, 2021, 10:54 p.m. UTC
From: Milton Miller <miltonm@us.ibm.com>

Calling ncsi_stop_channel_monitor from channel_monitor is a guaranteed
deadlock on SMP because stop calls del_timer_sync on the timer that
inoked channel_monitor as its timer function.

Recognise the inherent race of marking the monitor disabled before
deleting the timer by just returning if enable was cleared.  After
a timeout (the default case -- reset to START when response recieved)
just mark the monitor.enabled false.

If the channel has an entrie on the channel_queue list, or if the the
state is not ACTIVE or INACTIVE, then warn and mark the timer stopped
and don't restart, as the locking is broken somehow.

Fixes: 0795fb2021f0 ("net/ncsi: Stop monitor if channel times out or is inactive")
Signed-off-by: Milton Miller <miltonm@us.ibm.com>
---
 net/ncsi/ncsi-manage.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

Comments

Joel Stanley March 12, 2021, 12:35 a.m. UTC | #1
On Mon, 8 Mar 2021 at 22:56, Eddie James <eajames@linux.ibm.com> wrote:
>
> From: Milton Miller <miltonm@us.ibm.com>
>
> Calling ncsi_stop_channel_monitor from channel_monitor is a guaranteed
> deadlock on SMP because stop calls del_timer_sync on the timer that
> inoked channel_monitor as its timer function.
>
> Recognise the inherent race of marking the monitor disabled before
> deleting the timer by just returning if enable was cleared.  After
> a timeout (the default case -- reset to START when response recieved)
> just mark the monitor.enabled false.
>
> If the channel has an entrie on the channel_queue list, or if the the
> state is not ACTIVE or INACTIVE, then warn and mark the timer stopped
> and don't restart, as the locking is broken somehow.
>
> Fixes: 0795fb2021f0 ("net/ncsi: Stop monitor if channel times out or is inactive")
> Signed-off-by: Milton Miller <miltonm@us.ibm.com>

Please send upstream for review.


> ---
>  net/ncsi/ncsi-manage.c | 18 ++++++++++++------
>  1 file changed, 12 insertions(+), 6 deletions(-)
>
> diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
> index a9cb355324d1..5a2beaf874c7 100644
> --- a/net/ncsi/ncsi-manage.c
> +++ b/net/ncsi/ncsi-manage.c
> @@ -105,13 +105,20 @@ static void ncsi_channel_monitor(struct timer_list *t)
>         monitor_state = nc->monitor.state;
>         spin_unlock_irqrestore(&nc->lock, flags);
>
> -       if (!enabled || chained) {
> -               ncsi_stop_channel_monitor(nc);
> -               return;
> +       if (!enabled)
> +               return;         /* expected race disabling timer */
> +       if (WARN_ON_ONCE(chained)) {
> +               goto bad_state;
>         }
>         if (state != NCSI_CHANNEL_INACTIVE &&
>             state != NCSI_CHANNEL_ACTIVE) {
> -               ncsi_stop_channel_monitor(nc);
> +bad_state:
> +               netdev_warn(ndp->ndev.dev,
> +                           "Bad NCSI monitor state channel %d 0x%x %s queue\n",
> +                           nc->id, state, chained ? "on" : "off");
> +               spin_lock_irqsave(&nc->lock, flags);
> +               nc->monitor.enabled = false;
> +               spin_unlock_irqrestore(&nc->lock, flags);
>                 return;
>         }
>
> @@ -136,10 +143,9 @@ static void ncsi_channel_monitor(struct timer_list *t)
>                 ncsi_report_link(ndp, true);
>                 ndp->flags |= NCSI_DEV_RESHUFFLE;
>
> -               ncsi_stop_channel_monitor(nc);
> -
>                 ncm = &nc->modes[NCSI_MODE_LINK];
>                 spin_lock_irqsave(&nc->lock, flags);
> +               nc->monitor.enabled = false;
>                 nc->state = NCSI_CHANNEL_INVISIBLE;
>                 ncm->data[2] &= ~0x1;
>                 spin_unlock_irqrestore(&nc->lock, flags);
> --
> 2.27.0
>
diff mbox series

Patch

diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index a9cb355324d1..5a2beaf874c7 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -105,13 +105,20 @@  static void ncsi_channel_monitor(struct timer_list *t)
 	monitor_state = nc->monitor.state;
 	spin_unlock_irqrestore(&nc->lock, flags);
 
-	if (!enabled || chained) {
-		ncsi_stop_channel_monitor(nc);
-		return;
+	if (!enabled)
+		return;		/* expected race disabling timer */
+	if (WARN_ON_ONCE(chained)) {
+		goto bad_state;
 	}
 	if (state != NCSI_CHANNEL_INACTIVE &&
 	    state != NCSI_CHANNEL_ACTIVE) {
-		ncsi_stop_channel_monitor(nc);
+bad_state:
+		netdev_warn(ndp->ndev.dev,
+			    "Bad NCSI monitor state channel %d 0x%x %s queue\n",
+			    nc->id, state, chained ? "on" : "off");
+		spin_lock_irqsave(&nc->lock, flags);
+		nc->monitor.enabled = false;
+		spin_unlock_irqrestore(&nc->lock, flags);
 		return;
 	}
 
@@ -136,10 +143,9 @@  static void ncsi_channel_monitor(struct timer_list *t)
 		ncsi_report_link(ndp, true);
 		ndp->flags |= NCSI_DEV_RESHUFFLE;
 
-		ncsi_stop_channel_monitor(nc);
-
 		ncm = &nc->modes[NCSI_MODE_LINK];
 		spin_lock_irqsave(&nc->lock, flags);
+		nc->monitor.enabled = false;
 		nc->state = NCSI_CHANNEL_INVISIBLE;
 		ncm->data[2] &= ~0x1;
 		spin_unlock_irqrestore(&nc->lock, flags);