diff mbox series

net: hns: Ensure that interface teardown cannot race with TX interrupt

Message ID 20191104195604.17109-1-maz@kernel.org
State Changes Requested
Delegated to: David Miller
Headers show
Series net: hns: Ensure that interface teardown cannot race with TX interrupt | expand

Commit Message

Marc Zyngier Nov. 4, 2019, 7:56 p.m. UTC
On a lockdep-enabled kernel, bringing down a HNS interface results
in a loud splat. It turns out that  the per-ring spinlock is taken
both in the TX interrupt path, and when bringing down the interface.

Lockdep sums it up with:

[32099.424453]        CPU0
[32099.426885]        ----
[32099.429318]   lock(&(&ring->lock)->rlock);
[32099.433402]   <Interrupt>
[32099.436008]     lock(&(&ring->lock)->rlock);
[32099.440264]
[32099.440264]  *** DEADLOCK ***

To solve this, turn the NETIF_TX_{LOCK,UNLOCK} macros from standard
spin_[un]lock to their irqsave/irqrestore version.

Fixes: f2aaed557ecff ("net: hns: Replace netif_tx_lock to ring spin lock")
Cc: lipeng <lipeng321@huawei.com>
Cc: Yisen Zhuang <yisen.zhuang@huawei.com>
Cc: Salil Mehta <salil.mehta@huawei.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/net/ethernet/hisilicon/hns/hns_enet.c | 22 ++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

Comments

Salil Mehta Nov. 5, 2019, 9:16 a.m. UTC | #1
Hi Marc,
Thanks for the catch & the patch. As such Looks good to me. For the sanity check,
I will try to reproduce the problem again and test it once with your patch.


Best Regards
Salil


> From: Marc Zyngier [mailto:maz@kernel.org]
> Sent: Monday, November 4, 2019 7:56 PM
> To: netdev@vger.kernel.org
> Cc: lipeng (Y) <lipeng321@huawei.com>; Zhuangyuzeng (Yisen)
> <yisen.zhuang@huawei.com>; Salil Mehta <salil.mehta@huawei.com>; David S .
> Miller <davem@davemloft.net>
> Subject: [PATCH] net: hns: Ensure that interface teardown cannot race with TX
> interrupt
> 
> On a lockdep-enabled kernel, bringing down a HNS interface results
> in a loud splat. It turns out that  the per-ring spinlock is taken
> both in the TX interrupt path, and when bringing down the interface.
> 
> Lockdep sums it up with:
> 
> [32099.424453]        CPU0
> [32099.426885]        ----
> [32099.429318]   lock(&(&ring->lock)->rlock);
> [32099.433402]   <Interrupt>
> [32099.436008]     lock(&(&ring->lock)->rlock);
> [32099.440264]
> [32099.440264]  *** DEADLOCK ***
> 
> To solve this, turn the NETIF_TX_{LOCK,UNLOCK} macros from standard
> spin_[un]lock to their irqsave/irqrestore version.
> 
> Fixes: f2aaed557ecff ("net: hns: Replace netif_tx_lock to ring spin lock")
> Cc: lipeng <lipeng321@huawei.com>
> Cc: Yisen Zhuang <yisen.zhuang@huawei.com>
> Cc: Salil Mehta <salil.mehta@huawei.com>
> Cc: David S. Miller <davem@davemloft.net>
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> ---
>  drivers/net/ethernet/hisilicon/hns/hns_enet.c | 22 ++++++++++---------
>  1 file changed, 12 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
> b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
> index a48396dd4ebb..9fbe4e1e6853 100644
> --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
> +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
> @@ -945,11 +945,11 @@ static int is_valid_clean_head(struct hnae_ring *ring,
> int h)
> 
>  /* netif_tx_lock will turn down the performance, set only when necessary */
>  #ifdef CONFIG_NET_POLL_CONTROLLER
> -#define NETIF_TX_LOCK(ring) spin_lock(&(ring)->lock)
> -#define NETIF_TX_UNLOCK(ring) spin_unlock(&(ring)->lock)
> +#define NETIF_TX_LOCK(ring, flags) spin_lock_irqsave(&(ring)->lock, flags)
> +#define NETIF_TX_UNLOCK(ring, flags) spin_unlock_irqrestore(&(ring)->lock,
> flags)
>  #else
> -#define NETIF_TX_LOCK(ring)
> -#define NETIF_TX_UNLOCK(ring)
> +#define NETIF_TX_LOCK(ring, flags)
> +#define NETIF_TX_UNLOCK(ring, flags)
>  #endif
> 
>  /* reclaim all desc in one budget
> @@ -962,16 +962,17 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data
> *ring_data,
>  	struct net_device *ndev = ring_data->napi.dev;
>  	struct netdev_queue *dev_queue;
>  	struct hns_nic_priv *priv = netdev_priv(ndev);
> +	unsigned long flags;
>  	int head;
>  	int bytes, pkts;
> 
> -	NETIF_TX_LOCK(ring);
> +	NETIF_TX_LOCK(ring, flags);
> 
>  	head = readl_relaxed(ring->io_base + RCB_REG_HEAD);
>  	rmb(); /* make sure head is ready before touch any data */
> 
>  	if (is_ring_empty(ring) || head == ring->next_to_clean) {
> -		NETIF_TX_UNLOCK(ring);
> +		NETIF_TX_UNLOCK(ring, flags);
>  		return 0; /* no data to poll */
>  	}
> 
> @@ -979,7 +980,7 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data
> *ring_data,
>  		netdev_err(ndev, "wrong head (%d, %d-%d)\n", head,
>  			   ring->next_to_use, ring->next_to_clean);
>  		ring->stats.io_err_cnt++;
> -		NETIF_TX_UNLOCK(ring);
> +		NETIF_TX_UNLOCK(ring, flags);
>  		return -EIO;
>  	}
> 
> @@ -994,7 +995,7 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data
> *ring_data,
>  	ring->stats.tx_pkts += pkts;
>  	ring->stats.tx_bytes += bytes;
> 
> -	NETIF_TX_UNLOCK(ring);
> +	NETIF_TX_UNLOCK(ring, flags);
> 
>  	dev_queue = netdev_get_tx_queue(ndev, ring_data->queue_index);
>  	netdev_tx_completed_queue(dev_queue, pkts, bytes);
> @@ -1052,10 +1053,11 @@ static void hns_nic_tx_clr_all_bufs(struct
> hns_nic_ring_data *ring_data)
>  	struct hnae_ring *ring = ring_data->ring;
>  	struct net_device *ndev = ring_data->napi.dev;
>  	struct netdev_queue *dev_queue;
> +	unsigned long flags;
>  	int head;
>  	int bytes, pkts;
> 
> -	NETIF_TX_LOCK(ring);
> +	NETIF_TX_LOCK(ring, flags);
> 
>  	head = ring->next_to_use; /* ntu :soft setted ring position*/
>  	bytes = 0;
> @@ -1063,7 +1065,7 @@ static void hns_nic_tx_clr_all_bufs(struct
> hns_nic_ring_data *ring_data)
>  	while (head != ring->next_to_clean)
>  		hns_nic_reclaim_one_desc(ring, &bytes, &pkts);
> 
> -	NETIF_TX_UNLOCK(ring);
> +	NETIF_TX_UNLOCK(ring, flags);
> 
>  	dev_queue = netdev_get_tx_queue(ndev, ring_data->queue_index);
>  	netdev_tx_reset_queue(dev_queue);
> --
> 2.20.1
Salil Mehta Nov. 5, 2019, 6:41 p.m. UTC | #2
Hi Marc,
I tested with the patch on D05 with the lockdep enabled kernel with below options
and I could not reproduce the deadlock. I do not argue the issue being mentioned
as this looks to be a clear bug which should hit while TX data-path is running
and we try to disable the interface.

Could you please help me know the exact set of steps you used to get into this
problem. Also, are you able to re-create it easily/frequently?


# Kernel Config options:
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_LOCKDEP=y


Thanks
Salil.

> From: Salil Mehta
> Sent: Tuesday, November 5, 2019 9:15 AM
> To: 'Marc Zyngier' <maz@kernel.org>; netdev@vger.kernel.org
> Cc: lipeng (Y) <lipeng321@huawei.com>; Zhuangyuzeng (Yisen)
> <yisen.zhuang@huawei.com>; David S . Miller <davem@davemloft.net>
> Subject: RE: [PATCH] net: hns: Ensure that interface teardown cannot race with
> TX interrupt
> 
> Hi Marc,
> Thanks for the catch & the patch. As such Looks good to me. For the sanity check,
> I will try to reproduce the problem again and test it once with your patch.
> 
> 
> Best Regards
> Salil
> 
> 
> > From: Marc Zyngier [mailto:maz@kernel.org]
> > Sent: Monday, November 4, 2019 7:56 PM
> > To: netdev@vger.kernel.org
> > Cc: lipeng (Y) <lipeng321@huawei.com>; Zhuangyuzeng (Yisen)
> > <yisen.zhuang@huawei.com>; Salil Mehta <salil.mehta@huawei.com>; David S .
> > Miller <davem@davemloft.net>
> > Subject: [PATCH] net: hns: Ensure that interface teardown cannot race with> TX
> > interrupt
> >
> > On a lockdep-enabled kernel, bringing down a HNS interface results
> > in a loud splat. It turns out that  the per-ring spinlock is taken
> > both in the TX interrupt path, and when bringing down the interface.
> >
> > Lockdep sums it up with:
> >
> > [32099.424453]        CPU0
> > [32099.426885]        ----
> > [32099.429318]   lock(&(&ring->lock)->rlock);
> > [32099.433402]   <Interrupt>
> > [32099.436008]     lock(&(&ring->lock)->rlock);
> > [32099.440264]
> > [32099.440264]  *** DEADLOCK ***
> >
> > To solve this, turn the NETIF_TX_{LOCK,UNLOCK} macros from standard
> > spin_[un]lock to their irqsave/irqrestore version.
> >
> > Fixes: f2aaed557ecff ("net: hns: Replace netif_tx_lock to ring spin lock")
> > Cc: lipeng <lipeng321@huawei.com>
> > Cc: Yisen Zhuang <yisen.zhuang@huawei.com>
> > Cc: Salil Mehta <salil.mehta@huawei.com>
> > Cc: David S. Miller <davem@davemloft.net>
> > Signed-off-by: Marc Zyngier <maz@kernel.org>
> > ---
> >  drivers/net/ethernet/hisilicon/hns/hns_enet.c | 22 ++++++++++---------
> >  1 file changed, 12 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
> > b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
> > index a48396dd4ebb..9fbe4e1e6853 100644
> > --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
> > +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
> > @@ -945,11 +945,11 @@ static int is_valid_clean_head(struct hnae_ring *ring,
> > int h)
> >
> >  /* netif_tx_lock will turn down the performance, set only when necessary */
> >  #ifdef CONFIG_NET_POLL_CONTROLLER
> > -#define NETIF_TX_LOCK(ring) spin_lock(&(ring)->lock)
> > -#define NETIF_TX_UNLOCK(ring) spin_unlock(&(ring)->lock)
> > +#define NETIF_TX_LOCK(ring, flags) spin_lock_irqsave(&(ring)->lock, flags)
> > +#define NETIF_TX_UNLOCK(ring, flags) spin_unlock_irqrestore(&(ring)->lock,
> > flags)
> >  #else
> > -#define NETIF_TX_LOCK(ring)
> > -#define NETIF_TX_UNLOCK(ring)
> > +#define NETIF_TX_LOCK(ring, flags)
> > +#define NETIF_TX_UNLOCK(ring, flags)
> >  #endif
> >
> >  /* reclaim all desc in one budget
> > @@ -962,16 +962,17 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data
> > *ring_data,
> >  	struct net_device *ndev = ring_data->napi.dev;
> >  	struct netdev_queue *dev_queue;
> >  	struct hns_nic_priv *priv = netdev_priv(ndev);
> > +	unsigned long flags;
> >  	int head;
> >  	int bytes, pkts;
> >
> > -	NETIF_TX_LOCK(ring);
> > +	NETIF_TX_LOCK(ring, flags);
> >
> >  	head = readl_relaxed(ring->io_base + RCB_REG_HEAD);
> >  	rmb(); /* make sure head is ready before touch any data */
> >
> >  	if (is_ring_empty(ring) || head == ring->next_to_clean) {
> > -		NETIF_TX_UNLOCK(ring);
> > +		NETIF_TX_UNLOCK(ring, flags);
> >  		return 0; /* no data to poll */
> >  	}
> >
> > @@ -979,7 +980,7 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data
> > *ring_data,
> >  		netdev_err(ndev, "wrong head (%d, %d-%d)\n", head,
> >  			   ring->next_to_use, ring->next_to_clean);
> >  		ring->stats.io_err_cnt++;
> > -		NETIF_TX_UNLOCK(ring);
> > +		NETIF_TX_UNLOCK(ring, flags);
> >  		return -EIO;
> >  	}
> >
> > @@ -994,7 +995,7 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data
> > *ring_data,
> >  	ring->stats.tx_pkts += pkts;
> >  	ring->stats.tx_bytes += bytes;
> >
> > -	NETIF_TX_UNLOCK(ring);
> > +	NETIF_TX_UNLOCK(ring, flags);
> >
> >  	dev_queue = netdev_get_tx_queue(ndev, ring_data->queue_index);
> >  	netdev_tx_completed_queue(dev_queue, pkts, bytes);
> > @@ -1052,10 +1053,11 @@ static void hns_nic_tx_clr_all_bufs(struct
> > hns_nic_ring_data *ring_data)
> >  	struct hnae_ring *ring = ring_data->ring;
> >  	struct net_device *ndev = ring_data->napi.dev;
> >  	struct netdev_queue *dev_queue;
> > +	unsigned long flags;
> >  	int head;
> >  	int bytes, pkts;
> >
> > -	NETIF_TX_LOCK(ring);
> > +	NETIF_TX_LOCK(ring, flags);
> >
> >  	head = ring->next_to_use; /* ntu :soft setted ring position*/
> >  	bytes = 0;
> > @@ -1063,7 +1065,7 @@ static void hns_nic_tx_clr_all_bufs(struct
> > hns_nic_ring_data *ring_data)
> >  	while (head != ring->next_to_clean)
> >  		hns_nic_reclaim_one_desc(ring, &bytes, &pkts);
> >
> > -	NETIF_TX_UNLOCK(ring);
> > +	NETIF_TX_UNLOCK(ring, flags);
> >
> >  	dev_queue = netdev_get_tx_queue(ndev, ring_data->queue_index);
> >  	netdev_tx_reset_queue(dev_queue);
> > --
> > 2.20.1
Marc Zyngier Nov. 6, 2019, 8:17 a.m. UTC | #3
On Tue, 5 Nov 2019 18:41:11 +0000
Salil Mehta <salil.mehta@huawei.com> wrote:

Hi Salil,

> Hi Marc,
> I tested with the patch on D05 with the lockdep enabled kernel with below options
> and I could not reproduce the deadlock. I do not argue the issue being mentioned
> as this looks to be a clear bug which should hit while TX data-path is running
> and we try to disable the interface.

Lockdep screaming at you doesn't mean the deadly scenario happens in
practice, and I've never seen the machine hanging in these conditions.
But I've also never tried to trigger it in anger.

> Could you please help me know the exact set of steps you used to get into this
> problem. Also, are you able to re-create it easily/frequently?

I just need to issue "reboot" (which calls "ip link ... down") for this
to trigger. Here's a full splat[1], as well as my full config[2]. It is
100% repeatable.

> # Kernel Config options:
> CONFIG_LOCKDEP_SUPPORT=y
> CONFIG_LOCKDEP=y

You'll need at least 

CONFIG_PROVE_LOCKING=y
CONFIG_NET_POLL_CONTROLLER=y

in order to hit it.

Thanks,

	M.

[1] https://paste.debian.net/1114451/
[2] https://paste.debian.net/1114472/
Salil Mehta Nov. 6, 2019, 11:19 a.m. UTC | #4
> From: Marc Zyngier [mailto:maz@kernel.org]
> Sent: Wednesday, November 6, 2019 8:18 AM
> To: Salil Mehta <salil.mehta@huawei.com>

Hi Marc,

> On Tue, 5 Nov 2019 18:41:11 +0000
> Salil Mehta <salil.mehta@huawei.com> wrote:
> 
> Hi Salil,
> 
> > Hi Marc,
> > I tested with the patch on D05 with the lockdep enabled kernel with below options
> > and I could not reproduce the deadlock. I do not argue the issue being mentioned
> > as this looks to be a clear bug which should hit while TX data-path is running
> > and we try to disable the interface.
> 
> Lockdep screaming at you doesn't mean the deadly scenario happens in
> practice, and I've never seen the machine hanging in these conditions.
> But I've also never tried to trigger it in anger.
> 
> > Could you please help me know the exact set of steps you used to get into this
> > problem. Also, are you able to re-create it easily/frequently?
> 
> I just need to issue "reboot" (which calls "ip link ... down") for this
> to trigger. Here's a full splat[1], as well as my full config[2]. It is
> 100% repeatable.
> 
> > # Kernel Config options:
> > CONFIG_LOCKDEP_SUPPORT=y
> > CONFIG_LOCKDEP=y
> 
> You'll need at least
> 
> CONFIG_PROVE_LOCKING=y
> CONFIG_NET_POLL_CONTROLLER=y


Few points:
1. To me netpoll causing spinlock deadlock with IRQ leg of TX and ip util is
    highly unlikely since netpoll runs with both RX/TX interrupts disabled.
    It runs in polling mode to facilitate parallel path to features like
    Netconsole, netdump etc. hence, deadlock because of the netpoll should
    be highly unlikely. Therefore, smells of some other problem here...
2. Also, I remember patch[s1][s2] from Eric Dumazet to disable netpoll on many
    NICs way back in 4.19 kernel on the basis of Song Liu's findings.

Problem: 
Aah, I see the problem now, it is because of the stray code related to the
NET_POLL_CONTROLLER in hns driver which actually should have got remove within
the patch[s1], and that also explains why it does not get hit while NET POLL
is disabled.


/* netif_tx_lock will turn down the performance, set only when necessary */
#ifdef CONFIG_NET_POLL_CONTROLLER
#define NETIF_TX_LOCK(ring) spin_lock(&(ring)->lock)
#define NETIF_TX_UNLOCK(ring) spin_unlock(&(ring)->lock)
#else
#define NETIF_TX_LOCK(ring)
#define NETIF_TX_UNLOCK(ring)
#endif


Once you define CONFIG_NET_POLL_CONTROLLER in the latest code these macros
Kick-in even for the normal NAPI path. Which can cause deadlock and that perhaps
is what you are seeing?

Now, the question is do we require these locks in normal NAPI poll? I do not
see that we need them anymore as Tasklets are serialized to themselves and
configuration path like "ip <intf> down" cannot conflict with NAPI poll path
as the later is always disabled prior performing interface down operation.
Hence, no conflict there.

As  a side analysis, I could figure out some contentions in the configuration
path not related to this though. :) 


Suggested Solution:
Since we do not have support of NET_POLL_CONTROLLER macros NETIF_TX_[UN]LOCK
We should remove these NET_POLL_CONTROLLER macros altogether for now.

Though, I still have not looked comprehensively how other are able to use
Debugging utils like netconsole etc without having NET_POLL_CONTROLLER.
Maybe @Eric Dumazet might give us some insight on this?


If you agree with this then I can send a patch to remove these from hns
driver. This should solve your problem as well?



[S1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=4bd2c03be7
[S2] https://lkml.org/lkml/2018/10/4/32

> 
> in order to hit it.
> 
> Thanks,
> 
> 	M.
> 
> [1] https://paste.debian.net/1114451/
> [2] https://paste.debian.net/1114472/
> --
> Jazz is not dead. It just smells funny...
Marc Zyngier Nov. 6, 2019, 12:16 p.m. UTC | #5
On 2019-11-06 12:28, Salil Mehta wrote:
> Hi Marc,
>
>> On Tue, 5 Nov 2019 18:41:11 +0000
>> Salil Mehta <salil.mehta@huawei.com> wrote:
>>
>> Hi Salil,
>>
>> > Hi Marc,
>> > I tested with the patch on D05 with the lockdep enabled kernel 
>> with below options
>> > and I could not reproduce the deadlock. I do not argue the issue 
>> being mentioned
>> > as this looks to be a clear bug which should hit while TX 
>> data-path is running
>> > and we try to disable the interface.
>>
>> Lockdep screaming at you doesn't mean the deadly scenario happens in
>> practice, and I've never seen the machine hanging in these 
>> conditions.
>> But I've also never tried to trigger it in anger.
>>
>> > Could you please help me know the exact set of steps you used to 
>> get into this
>> > problem. Also, are you able to re-create it easily/frequently?
>>
>> I just need to issue "reboot" (which calls "ip link ... down") for 
>> this
>> to trigger. Here's a full splat[1], as well as my full config[2]. It 
>> is
>> 100% repeatable.
>>
>> > # Kernel Config options:
>> > CONFIG_LOCKDEP_SUPPORT=y
>> > CONFIG_LOCKDEP=y
>>
>> You'll need at least
>>
>> CONFIG_PROVE_LOCKING=y
>> CONFIG_NET_POLL_CONTROLLER=y
>
>
> Few points:
> 1. To me netpoll causing spinlock deadlock with IRQ leg of TX and ip 
> util is
>     highly unlikely since netpoll runs with both RX/TX interrupts 
> disabled.
>     It runs in polling mode to facilitate parallel path to features 
> like
>     Netconsole, netdump etc. hence, deadlock because of the netpoll 
> should
>     be highly unlikely. Therefore, smells of some other problem 
> here...
> 2. Also, I remember patch[s1][s2] from Eric Dumazet to disable
> netpoll on many
>     NICs way back in 4.19 kernel on the basis of Song Liu's findings.
>
> Problem:
> Aah, I see the problem now, it is because of the stray code related 
> to the
> NET_POLL_CONTROLLER in hns driver which actually should have got
> remove within
> the patch[s1], and that also explains why it does not get hit while 
> NET POLL
> is disabled.
>
>
> /* netif_tx_lock will turn down the performance, set only when 
> necessary */
> #ifdef CONFIG_NET_POLL_CONTROLLER
> #define NETIF_TX_LOCK(ring) spin_lock(&(ring)->lock)
> #define NETIF_TX_UNLOCK(ring) spin_unlock(&(ring)->lock)
> #else
> #define NETIF_TX_LOCK(ring)
> #define NETIF_TX_UNLOCK(ring)
> #endif
>
>
> Once you define CONFIG_NET_POLL_CONTROLLER in the latest code these 
> macros
> Kick-in even for the normal NAPI path. Which can cause deadlock and
> that perhaps
> is what you are seeing?

Yes, that's the problem.

> Now, the question is do we require these locks in normal NAPI poll? I 
> do not
> see that we need them anymore as Tasklets are serialized to 
> themselves and
> configuration path like "ip <intf> down" cannot conflict with NAPI 
> poll path
> as the later is always disabled prior performing interface down 
> operation.
> Hence, no conflict there.

My preference would indeed be to drop these per-queue locks if they 
aren't
required. I couldn't figure out from a cursory look at the code whether
two CPUs could serve the same TX queue. If that cannot happen by 
construction,
then these locks are perfectly useless and should be removed.

>
> As  a side analysis, I could figure out some contentions in the 
> configuration
> path not related to this though. :)
>
>
> Suggested Solution:
> Since we do not have support of NET_POLL_CONTROLLER macros 
> NETIF_TX_[UN]LOCK
> We should remove these NET_POLL_CONTROLLER macros altogether for now.
>
> Though, I still have not looked comprehensively how other are able to 
> use
> Debugging utils like netconsole etc without having 
> NET_POLL_CONTROLLER.
> Maybe @Eric Dumazet might give us some insight on this?
>
>
> If you agree with this then I can send a patch to remove these from 
> hns
> driver. This should solve your problem as well?

Sure, as long as you can guarantee that these locks are never used for 
anything
useful.

Thanks,

         M.
Salil Mehta Nov. 6, 2019, 7:05 p.m. UTC | #6
> From: Marc Zyngier [mailto:maz@kernel.org]
> Sent: Wednesday, November 6, 2019 12:17 PM
> To: Salil Mehta <salil.mehta@huawei.com>
> 
> On 2019-11-06 12:28, Salil Mehta wrote:
> > Hi Marc,
> >
> >> On Tue, 5 Nov 2019 18:41:11 +0000
> >> Salil Mehta <salil.mehta@huawei.com> wrote:
> >>
> >> Hi Salil,
> >>
> >> > Hi Marc,
> >> > I tested with the patch on D05 with the lockdep enabled kernel
> >> with below options
> >> > and I could not reproduce the deadlock. I do not argue the issue
> >> being mentioned
> >> > as this looks to be a clear bug which should hit while TX
> >> data-path is running
> >> > and we try to disable the interface.
> >>
> >> Lockdep screaming at you doesn't mean the deadly scenario happens in
> >> practice, and I've never seen the machine hanging in these
> >> conditions.
> >> But I've also never tried to trigger it in anger.
> >>
> >> > Could you please help me know the exact set of steps you used to
> >> get into this
> >> > problem. Also, are you able to re-create it easily/frequently?
> >>
> >> I just need to issue "reboot" (which calls "ip link ... down") for
> >> this
> >> to trigger. Here's a full splat[1], as well as my full config[2]. It
> >> is
> >> 100% repeatable.
> >>
> >> > # Kernel Config options:
> >> > CONFIG_LOCKDEP_SUPPORT=y
> >> > CONFIG_LOCKDEP=y
> >>
> >> You'll need at least
> >>
> >> CONFIG_PROVE_LOCKING=y
> >> CONFIG_NET_POLL_CONTROLLER=y
> >
> >
> > Few points:
> > 1. To me netpoll causing spinlock deadlock with IRQ leg of TX and ip
> > util is
> >     highly unlikely since netpoll runs with both RX/TX interrupts
> > disabled.
> >     It runs in polling mode to facilitate parallel path to features
> > like
> >     Netconsole, netdump etc. hence, deadlock because of the netpoll
> > should
> >     be highly unlikely. Therefore, smells of some other problem
> > here...
> > 2. Also, I remember patch[s1][s2] from Eric Dumazet to disable
> > netpoll on many
> >     NICs way back in 4.19 kernel on the basis of Song Liu's findings.
> >
> > Problem:
> > Aah, I see the problem now, it is because of the stray code related
> > to the
> > NET_POLL_CONTROLLER in hns driver which actually should have got
> > remove within
> > the patch[s1], and that also explains why it does not get hit while
> > NET POLL
> > is disabled.
> >
> >
> > /* netif_tx_lock will turn down the performance, set only when
> > necessary */
> > #ifdef CONFIG_NET_POLL_CONTROLLER
> > #define NETIF_TX_LOCK(ring) spin_lock(&(ring)->lock)
> > #define NETIF_TX_UNLOCK(ring) spin_unlock(&(ring)->lock)
> > #else
> > #define NETIF_TX_LOCK(ring)
> > #define NETIF_TX_UNLOCK(ring)
> > #endif
> >
> >
> > Once you define CONFIG_NET_POLL_CONTROLLER in the latest code these
> > macros
> > Kick-in even for the normal NAPI path. Which can cause deadlock and
> > that perhaps
> > is what you are seeing?
> 
> Yes, that's the problem.


Sure, thanks.


> > Now, the question is do we require these locks in normal NAPI poll? I
> > do not
> > see that we need them anymore as Tasklets are serialized to
> > themselves and
> > configuration path like "ip <intf> down" cannot conflict with NAPI
> > poll path
> > as the later is always disabled prior performing interface down
> > operation.
> > Hence, no conflict there.
> 
> My preference would indeed be to drop these per-queue locks if they
> aren't
> required. I couldn't figure out from a cursory look at the code whether
> two CPUs could serve the same TX queue. If that cannot happen by
> construction,
> then these locks are perfectly useless and should be removed.


Sure.


> > As  a side analysis, I could figure out some contentions in the
> > configuration
> > path not related to this though. :)
> >
> >
> > Suggested Solution:
> > Since we do not have support of NET_POLL_CONTROLLER macros
> > NETIF_TX_[UN]LOCK
> > We should remove these NET_POLL_CONTROLLER macros altogether for now.
> >
> > Though, I still have not looked comprehensively how other are able to
> > use
> > Debugging utils like netconsole etc without having
> > NET_POLL_CONTROLLER.
> > Maybe @Eric Dumazet might give us some insight on this?
> >
> >
> > If you agree with this then I can send a patch to remove these from
> > hns
> > driver. This should solve your problem as well?
> 
> Sure, as long as you can guarantee that these locks are never used for
> anything
> useful.


Hi Marc,
I have floated the patch. Could you please confirm if this solves your issue
and if possible provide your Tested-by? :)

[PATCH net] net: hns: Fix the stray netpoll locks causing deadlock in NAPI path

Thanks
Salil
diff mbox series

Patch

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index a48396dd4ebb..9fbe4e1e6853 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -945,11 +945,11 @@  static int is_valid_clean_head(struct hnae_ring *ring, int h)
 
 /* netif_tx_lock will turn down the performance, set only when necessary */
 #ifdef CONFIG_NET_POLL_CONTROLLER
-#define NETIF_TX_LOCK(ring) spin_lock(&(ring)->lock)
-#define NETIF_TX_UNLOCK(ring) spin_unlock(&(ring)->lock)
+#define NETIF_TX_LOCK(ring, flags) spin_lock_irqsave(&(ring)->lock, flags)
+#define NETIF_TX_UNLOCK(ring, flags) spin_unlock_irqrestore(&(ring)->lock, flags)
 #else
-#define NETIF_TX_LOCK(ring)
-#define NETIF_TX_UNLOCK(ring)
+#define NETIF_TX_LOCK(ring, flags)
+#define NETIF_TX_UNLOCK(ring, flags)
 #endif
 
 /* reclaim all desc in one budget
@@ -962,16 +962,17 @@  static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data,
 	struct net_device *ndev = ring_data->napi.dev;
 	struct netdev_queue *dev_queue;
 	struct hns_nic_priv *priv = netdev_priv(ndev);
+	unsigned long flags;
 	int head;
 	int bytes, pkts;
 
-	NETIF_TX_LOCK(ring);
+	NETIF_TX_LOCK(ring, flags);
 
 	head = readl_relaxed(ring->io_base + RCB_REG_HEAD);
 	rmb(); /* make sure head is ready before touch any data */
 
 	if (is_ring_empty(ring) || head == ring->next_to_clean) {
-		NETIF_TX_UNLOCK(ring);
+		NETIF_TX_UNLOCK(ring, flags);
 		return 0; /* no data to poll */
 	}
 
@@ -979,7 +980,7 @@  static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data,
 		netdev_err(ndev, "wrong head (%d, %d-%d)\n", head,
 			   ring->next_to_use, ring->next_to_clean);
 		ring->stats.io_err_cnt++;
-		NETIF_TX_UNLOCK(ring);
+		NETIF_TX_UNLOCK(ring, flags);
 		return -EIO;
 	}
 
@@ -994,7 +995,7 @@  static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data,
 	ring->stats.tx_pkts += pkts;
 	ring->stats.tx_bytes += bytes;
 
-	NETIF_TX_UNLOCK(ring);
+	NETIF_TX_UNLOCK(ring, flags);
 
 	dev_queue = netdev_get_tx_queue(ndev, ring_data->queue_index);
 	netdev_tx_completed_queue(dev_queue, pkts, bytes);
@@ -1052,10 +1053,11 @@  static void hns_nic_tx_clr_all_bufs(struct hns_nic_ring_data *ring_data)
 	struct hnae_ring *ring = ring_data->ring;
 	struct net_device *ndev = ring_data->napi.dev;
 	struct netdev_queue *dev_queue;
+	unsigned long flags;
 	int head;
 	int bytes, pkts;
 
-	NETIF_TX_LOCK(ring);
+	NETIF_TX_LOCK(ring, flags);
 
 	head = ring->next_to_use; /* ntu :soft setted ring position*/
 	bytes = 0;
@@ -1063,7 +1065,7 @@  static void hns_nic_tx_clr_all_bufs(struct hns_nic_ring_data *ring_data)
 	while (head != ring->next_to_clean)
 		hns_nic_reclaim_one_desc(ring, &bytes, &pkts);
 
-	NETIF_TX_UNLOCK(ring);
+	NETIF_TX_UNLOCK(ring, flags);
 
 	dev_queue = netdev_get_tx_queue(ndev, ring_data->queue_index);
 	netdev_tx_reset_queue(dev_queue);