Message ID | 1228173819.15505.15.camel@HP1 |
---|---|
State | Accepted, archived |
Delegated to: | David Miller |
Headers | show |
Michael Chan a écrit : > On Wed, 2008-11-26 at 09:14 -0800, Michael Chan wrote: >> On Wed, 2008-11-26 at 01:01 -0800, Eric Dumazet wrote: >>> Eric Dumazet a écrit : >>>> Michael Chan a écrit : >>>>> I believe this may be the patch that broke it: >>>>> >>>>> http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=ce6fce4295ba727b36fdc73040e444bd1aae64cd >>>>> >>>>> >>>>> I don't remember all the details, but the Broadcom 5708 chip is >>>>> affected because it does not support MSI per-vector masking. >>>>> >>>>> One way to get around is to disable MSI with bnx2 parameter >>>>> disable_msi=1. >>>>> >>>> I tried this MSI disabling and yes, it now works. >>>> >>>> 16: 42726 128 105 106 89 >>>> 89 145 152 IO-APIC-fasteoi uhci_hcd:usb1, eth0, eth1 >>>> >>> I believe the bnx2 driver doesnt work at all if !disable_msi (default setting) >>> >>> Doing a "echo 0 >/sys/devices/system/cpu/cpu1/online" just freeze network >>> >>> No messages logged >>> >>> If loaded with disable_msi=1, the cpu unplug works as expected. >>> >>> Thats a pretty serious issue. >>> >> Yes, that's the same issue and it is serious. If MSI is being delivered >> to CPU 1 and you then take CPU 1 offline, the MSI will not be delivered >> to another CPU. >> >> I think I can detect this problem in bnx2_timer() and try to recover. >> I'll post a patch when I have something ready. Thanks. > > [PATCH] bnx2: Add workaround to handle missed MSI. > > The bnx2 chips do not support per MSI vector masking. On 5706/5708, new MSI > address/data are stored only when the MSI enable bit is toggled. As a result, > SMP affinity no longer works in the latest kernel. A more serious problem is > that the driver will no longer receive interrupts when the MSI receiving CPU > goes offline. > > The workaround in this patch only addresses the problem of CPU going offline. > When that happens, the driver's timer function will detect that it is making > no forward progress on pending interrupt events and will recover from it. > > Eric Dumazet reported the problem. > > We also found that if an interrupt is internally asserted while MSI and INTA > are disabled, the chip will end up in the same state after MSI is re-enabled. > The same workaround is needed for this problem. > > Signed-off-by: Michael Chan <mchan@broadcom.com> > --- > drivers/net/bnx2.c | 35 ++++++++++++++++++++++++++++++++--- > drivers/net/bnx2.h | 6 ++++++ > 2 files changed, 38 insertions(+), 3 deletions(-) > > diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c > index 182f241..0e2218d 100644 > --- a/drivers/net/bnx2.c > +++ b/drivers/net/bnx2.c > @@ -3147,6 +3147,28 @@ bnx2_has_work(struct bnx2_napi *bnapi) > return 0; > } > > +static void > +bnx2_chk_missed_msi(struct bnx2 *bp) > +{ > + struct bnx2_napi *bnapi = &bp->bnx2_napi[0]; > + u32 msi_ctrl; > + > + if (bnx2_has_work(bnapi)) { > + msi_ctrl = REG_RD(bp, BNX2_PCICFG_MSI_CONTROL); > + if (!(msi_ctrl & BNX2_PCICFG_MSI_CONTROL_ENABLE)) > + return; > + > + if (bnapi->last_status_idx == bp->idle_chk_status_idx) { > + REG_WR(bp, BNX2_PCICFG_MSI_CONTROL, msi_ctrl & > + ~BNX2_PCICFG_MSI_CONTROL_ENABLE); > + REG_WR(bp, BNX2_PCICFG_MSI_CONTROL, msi_ctrl); > + bnx2_msi(bp->irq_tbl[0].vector, bnapi); > + } > + } > + > + bp->idle_chk_status_idx = bnapi->last_status_idx; > +} > + > static void bnx2_poll_link(struct bnx2 *bp, struct bnx2_napi *bnapi) > { > struct status_block *sblk = bnapi->status_blk.msi; > @@ -3221,14 +3243,15 @@ static int bnx2_poll(struct napi_struct *napi, int budget) > > work_done = bnx2_poll_work(bp, bnapi, work_done, budget); > > - if (unlikely(work_done >= budget)) > - break; > - > /* bnapi->last_status_idx is used below to tell the hw how > * much work has been processed, so we must read it before > * checking for more work. > */ > bnapi->last_status_idx = sblk->status_idx; > + > + if (unlikely(work_done >= budget)) > + break; > + > rmb(); > if (likely(!bnx2_has_work(bnapi))) { > netif_rx_complete(bp->dev, napi); > @@ -4581,6 +4604,8 @@ bnx2_init_chip(struct bnx2 *bp) > for (i = 0; i < BNX2_MAX_MSIX_VEC; i++) > bp->bnx2_napi[i].last_status_idx = 0; > > + bp->idle_chk_status_idx = 0xffff; > + > bp->rx_mode = BNX2_EMAC_RX_MODE_SORT_MODE; > > /* Set up how to generate a link change interrupt. */ > @@ -5729,6 +5754,10 @@ bnx2_timer(unsigned long data) > if (atomic_read(&bp->intr_sem) != 0) > goto bnx2_restart_timer; > > + if ((bp->flags & (BNX2_FLAG_USING_MSI | BNX2_FLAG_ONE_SHOT_MSI)) == > + BNX2_FLAG_USING_MSI) > + bnx2_chk_missed_msi(bp); > + > bnx2_send_heart_beat(bp); > > bp->stats_blk->stat_FwRxDrop = > diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h > index 0763108..2f43c45 100644 > --- a/drivers/net/bnx2.h > +++ b/drivers/net/bnx2.h > @@ -378,6 +378,9 @@ struct l2_fhdr { > * pci_config_l definition > * offset: 0000 > */ > +#define BNX2_PCICFG_MSI_CONTROL 0x00000058 > +#define BNX2_PCICFG_MSI_CONTROL_ENABLE (1L<<16) > + > #define BNX2_PCICFG_MISC_CONFIG 0x00000068 > #define BNX2_PCICFG_MISC_CONFIG_TARGET_BYTE_SWAP (1L<<2) > #define BNX2_PCICFG_MISC_CONFIG_TARGET_MB_WORD_SWAP (1L<<3) > @@ -6882,6 +6885,9 @@ struct bnx2 { > > u8 num_tx_rings; > u8 num_rx_rings; > + > + u32 idle_chk_status_idx; > + > }; > > #define REG_RD(bp, offset) \ Thanks a lot Michael I tested your patch on my machine. I confirm CPU unplug/hotplug works now (If correcting a bug in oprofile first) NIC doesnt freeze anymore Eric -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
From: Eric Dumazet <dada1@cosmosbay.com> Date: Tue, 02 Dec 2008 07:04:48 +0100 > Michael Chan a écrit : > > [PATCH] bnx2: Add workaround to handle missed MSI. ... > Thanks a lot Michael > > I tested your patch on my machine. > > I confirm CPU unplug/hotplug works now (If correcting a bug in oprofile first) > NIC doesnt freeze anymore Applied to net-2.6, thanks everyone. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 182f241..0e2218d 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -3147,6 +3147,28 @@ bnx2_has_work(struct bnx2_napi *bnapi) return 0; } +static void +bnx2_chk_missed_msi(struct bnx2 *bp) +{ + struct bnx2_napi *bnapi = &bp->bnx2_napi[0]; + u32 msi_ctrl; + + if (bnx2_has_work(bnapi)) { + msi_ctrl = REG_RD(bp, BNX2_PCICFG_MSI_CONTROL); + if (!(msi_ctrl & BNX2_PCICFG_MSI_CONTROL_ENABLE)) + return; + + if (bnapi->last_status_idx == bp->idle_chk_status_idx) { + REG_WR(bp, BNX2_PCICFG_MSI_CONTROL, msi_ctrl & + ~BNX2_PCICFG_MSI_CONTROL_ENABLE); + REG_WR(bp, BNX2_PCICFG_MSI_CONTROL, msi_ctrl); + bnx2_msi(bp->irq_tbl[0].vector, bnapi); + } + } + + bp->idle_chk_status_idx = bnapi->last_status_idx; +} + static void bnx2_poll_link(struct bnx2 *bp, struct bnx2_napi *bnapi) { struct status_block *sblk = bnapi->status_blk.msi; @@ -3221,14 +3243,15 @@ static int bnx2_poll(struct napi_struct *napi, int budget) work_done = bnx2_poll_work(bp, bnapi, work_done, budget); - if (unlikely(work_done >= budget)) - break; - /* bnapi->last_status_idx is used below to tell the hw how * much work has been processed, so we must read it before * checking for more work. */ bnapi->last_status_idx = sblk->status_idx; + + if (unlikely(work_done >= budget)) + break; + rmb(); if (likely(!bnx2_has_work(bnapi))) { netif_rx_complete(bp->dev, napi); @@ -4581,6 +4604,8 @@ bnx2_init_chip(struct bnx2 *bp) for (i = 0; i < BNX2_MAX_MSIX_VEC; i++) bp->bnx2_napi[i].last_status_idx = 0; + bp->idle_chk_status_idx = 0xffff; + bp->rx_mode = BNX2_EMAC_RX_MODE_SORT_MODE; /* Set up how to generate a link change interrupt. */ @@ -5729,6 +5754,10 @@ bnx2_timer(unsigned long data) if (atomic_read(&bp->intr_sem) != 0) goto bnx2_restart_timer; + if ((bp->flags & (BNX2_FLAG_USING_MSI | BNX2_FLAG_ONE_SHOT_MSI)) == + BNX2_FLAG_USING_MSI) + bnx2_chk_missed_msi(bp); + bnx2_send_heart_beat(bp); bp->stats_blk->stat_FwRxDrop = diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h index 0763108..2f43c45 100644 --- a/drivers/net/bnx2.h +++ b/drivers/net/bnx2.h @@ -378,6 +378,9 @@ struct l2_fhdr { * pci_config_l definition * offset: 0000 */ +#define BNX2_PCICFG_MSI_CONTROL 0x00000058 +#define BNX2_PCICFG_MSI_CONTROL_ENABLE (1L<<16) + #define BNX2_PCICFG_MISC_CONFIG 0x00000068 #define BNX2_PCICFG_MISC_CONFIG_TARGET_BYTE_SWAP (1L<<2) #define BNX2_PCICFG_MISC_CONFIG_TARGET_MB_WORD_SWAP (1L<<3) @@ -6882,6 +6885,9 @@ struct bnx2 { u8 num_tx_rings; u8 num_rx_rings; + + u32 idle_chk_status_idx; + }; #define REG_RD(bp, offset) \