diff mbox

PCI: add quirk for 3ware 9650SE controller

Message ID alpine.LNX.2.00.1308271141080.2077@pobox.suse.cz
State Rejected
Headers show

Commit Message

Jiri Kosina Aug. 27, 2013, 9:44 a.m. UTC
Commit d5dea7d95 ("PCI: msi: Disable msi interrupts when we initialize a 
pci device") makes MSIs be forcibly disabled at boot time.

It turns out that this breaks 3ware controller -- if MSIs are disabled 
during PCI discovery of this controller, the device doesn't work properly 
(it doesn't respond to any commands that are being sent to it after 
initialization).

Reverting d5dea7d95 or not force-disabling MSIs in pci_msi_init_pci_dev()
makes the device work properly again.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---

I am adding Adam Radford as a recepient as well, to see whether he is able 
to provide some more explanation why this device would expose this 
behavior.
Thanks.

 drivers/pci/msi.c    |    3 +++
 drivers/pci/quirks.c |   10 ++++++++++
 include/linux/pci.h  |    1 +
 3 files changed, 14 insertions(+), 0 deletions(-)

Comments

Jiri Kosina Aug. 27, 2013, 9:45 a.m. UTC | #1
[ adding Bjorn and Eric to CC, sorry for omitting you originally ]

On Tue, 27 Aug 2013, Jiri Kosina wrote:

> Commit d5dea7d95 ("PCI: msi: Disable msi interrupts when we initialize a 
> pci device") makes MSIs be forcibly disabled at boot time.
> 
> It turns out that this breaks 3ware controller -- if MSIs are disabled 
> during PCI discovery of this controller, the device doesn't work properly 
> (it doesn't respond to any commands that are being sent to it after 
> initialization).
> 
> Reverting d5dea7d95 or not force-disabling MSIs in pci_msi_init_pci_dev()
> makes the device work properly again.
> 
> Signed-off-by: Jiri Kosina <jkosina@suse.cz>
> ---
> 
> I am adding Adam Radford as a recepient as well, to see whether he is able 
> to provide some more explanation why this device would expose this 
> behavior.
> Thanks.
> 
>  drivers/pci/msi.c    |    3 +++
>  drivers/pci/quirks.c |   10 ++++++++++
>  include/linux/pci.h  |    1 +
>  3 files changed, 14 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> index aca7578..4f36b8b 100644
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -1040,6 +1040,9 @@ void pci_msi_init_pci_dev(struct pci_dev *dev)
>  {
>  	INIT_LIST_HEAD(&dev->msi_list);
>  
> +	if (dev->broken_msi_disable)
> +		return;
> +
>  	/* Disable the msi hardware to avoid screaming interrupts
>  	 * during boot.  This is the power on reset default so
>  	 * usually this should be a noop.
> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
> index e85d230..4ba3400 100644
> --- a/drivers/pci/quirks.c
> +++ b/drivers/pci/quirks.c
> @@ -2890,6 +2890,16 @@ static void quirk_intel_ntb(struct pci_dev *dev)
>  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0e08, quirk_intel_ntb);
>  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0e0d, quirk_intel_ntb);
>  
> +/*
> + * 3ware 9650SE controller doesn't properly initialize if MSI are
> + * disabled on it during PCI device discovery
> + */
> +static void quirk_broken_msi_disable(struct pci_dev *dev)
> +{
> +	dev->broken_msi_disable = 1;
> +}
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_3WARE, 0x1004, quirk_broken_msi_disable);
> +
>  static ktime_t fixup_debug_start(struct pci_dev *dev,
>  				 void (*fn)(struct pci_dev *dev))
>  {
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 0fd1f15..c327d74 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -341,6 +341,7 @@ struct pci_dev {
>  #ifdef CONFIG_PCI_MSI
>  	struct list_head msi_list;
>  	struct kset *msi_kset;
> +	unsigned int	broken_msi_disable:1;
>  #endif
>  	struct pci_vpd *vpd;
>  #ifdef CONFIG_PCI_ATS
> 
> -- 
> Jiri Kosina
> SUSE Labs
>
Jiri Kosina Aug. 28, 2013, 3:46 p.m. UTC | #2
On Tue, 27 Aug 2013, Jiri Kosina wrote:

> On Tue, 27 Aug 2013, Jiri Kosina wrote:
> 
> > Commit d5dea7d95 ("PCI: msi: Disable msi interrupts when we initialize a 
> > pci device") makes MSIs be forcibly disabled at boot time.
> > 
> > It turns out that this breaks 3ware controller -- if MSIs are disabled 
> > during PCI discovery of this controller, the device doesn't work properly 
> > (it doesn't respond to any commands that are being sent to it after 
> > initialization).
> > 
> > Reverting d5dea7d95 or not force-disabling MSIs in pci_msi_init_pci_dev()
> > makes the device work properly again.
> > 
> > Signed-off-by: Jiri Kosina <jkosina@suse.cz>
> >
> > ---
> > 
> > I am adding Adam Radford as a recepient as well, to see whether he is able 
> > to provide some more explanation why this device would expose this 
> > behavior.

OK, so Adam Radford's lsi.com address is bouncing, hence I guess we can't 
expect any feedback from him.

Bjorn, Jesse, any word on this please?

> > Thanks.
> > 
> >  drivers/pci/msi.c    |    3 +++
> >  drivers/pci/quirks.c |   10 ++++++++++
> >  include/linux/pci.h  |    1 +
> >  3 files changed, 14 insertions(+), 0 deletions(-)
> > 
> > diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> > index aca7578..4f36b8b 100644
> > --- a/drivers/pci/msi.c
> > +++ b/drivers/pci/msi.c
> > @@ -1040,6 +1040,9 @@ void pci_msi_init_pci_dev(struct pci_dev *dev)
> >  {
> >  	INIT_LIST_HEAD(&dev->msi_list);
> >  
> > +	if (dev->broken_msi_disable)
> > +		return;
> > +
> >  	/* Disable the msi hardware to avoid screaming interrupts
> >  	 * during boot.  This is the power on reset default so
> >  	 * usually this should be a noop.
> > diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
> > index e85d230..4ba3400 100644
> > --- a/drivers/pci/quirks.c
> > +++ b/drivers/pci/quirks.c
> > @@ -2890,6 +2890,16 @@ static void quirk_intel_ntb(struct pci_dev *dev)
> >  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0e08, quirk_intel_ntb);
> >  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0e0d, quirk_intel_ntb);
> >  
> > +/*
> > + * 3ware 9650SE controller doesn't properly initialize if MSI are
> > + * disabled on it during PCI device discovery
> > + */
> > +static void quirk_broken_msi_disable(struct pci_dev *dev)
> > +{
> > +	dev->broken_msi_disable = 1;
> > +}
> > +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_3WARE, 0x1004, quirk_broken_msi_disable);
> > +
> >  static ktime_t fixup_debug_start(struct pci_dev *dev,
> >  				 void (*fn)(struct pci_dev *dev))
> >  {
> > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > index 0fd1f15..c327d74 100644
> > --- a/include/linux/pci.h
> > +++ b/include/linux/pci.h
> > @@ -341,6 +341,7 @@ struct pci_dev {
> >  #ifdef CONFIG_PCI_MSI
> >  	struct list_head msi_list;
> >  	struct kset *msi_kset;
> > +	unsigned int	broken_msi_disable:1;
> >  #endif
> >  	struct pci_vpd *vpd;
> >  #ifdef CONFIG_PCI_ATS
> > 
> > -- 
> > Jiri Kosina
> > SUSE Labs
> > 
> 
> -- 
> Jiri Kosina
> SUSE Labs
>
Bjorn Helgaas Aug. 28, 2013, 4:33 p.m. UTC | #3
[+cc another email addr for Adam from git logs]

On Wed, Aug 28, 2013 at 9:46 AM, Jiri Kosina <jkosina@suse.cz> wrote:
> On Tue, 27 Aug 2013, Jiri Kosina wrote:
>
>> On Tue, 27 Aug 2013, Jiri Kosina wrote:
>>
>> > Commit d5dea7d95 ("PCI: msi: Disable msi interrupts when we initialize a
>> > pci device") makes MSIs be forcibly disabled at boot time.
>> >
>> > It turns out that this breaks 3ware controller -- if MSIs are disabled
>> > during PCI discovery of this controller, the device doesn't work properly
>> > (it doesn't respond to any commands that are being sent to it after
>> > initialization).
>> >
>> > Reverting d5dea7d95 or not force-disabling MSIs in pci_msi_init_pci_dev()
>> > makes the device work properly again.
>> >
>> > Signed-off-by: Jiri Kosina <jkosina@suse.cz>
>> >
>> > ---
>> >
>> > I am adding Adam Radford as a recepient as well, to see whether he is able
>> > to provide some more explanation why this device would expose this
>> > behavior.
>
> OK, so Adam Radford's lsi.com address is bouncing, hence I guess we can't
> expect any feedback from him.
>
> Bjorn, Jesse, any word on this please?

It's on my list to look at.  It's too late to put it in for v3.11, and
it's doubtful that it will even make the v3.12 merge window (though
possibly it could go in post-merge window).  d5dea7d95 is several
years old, so hopefully this issue isn't super-urgent.  Let me know if
otherwise.

Bjorn

>> >  drivers/pci/msi.c    |    3 +++
>> >  drivers/pci/quirks.c |   10 ++++++++++
>> >  include/linux/pci.h  |    1 +
>> >  3 files changed, 14 insertions(+), 0 deletions(-)
>> >
>> > diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
>> > index aca7578..4f36b8b 100644
>> > --- a/drivers/pci/msi.c
>> > +++ b/drivers/pci/msi.c
>> > @@ -1040,6 +1040,9 @@ void pci_msi_init_pci_dev(struct pci_dev *dev)
>> >  {
>> >     INIT_LIST_HEAD(&dev->msi_list);
>> >
>> > +   if (dev->broken_msi_disable)
>> > +           return;
>> > +
>> >     /* Disable the msi hardware to avoid screaming interrupts
>> >      * during boot.  This is the power on reset default so
>> >      * usually this should be a noop.
>> > diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
>> > index e85d230..4ba3400 100644
>> > --- a/drivers/pci/quirks.c
>> > +++ b/drivers/pci/quirks.c
>> > @@ -2890,6 +2890,16 @@ static void quirk_intel_ntb(struct pci_dev *dev)
>> >  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0e08, quirk_intel_ntb);
>> >  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0e0d, quirk_intel_ntb);
>> >
>> > +/*
>> > + * 3ware 9650SE controller doesn't properly initialize if MSI are
>> > + * disabled on it during PCI device discovery
>> > + */
>> > +static void quirk_broken_msi_disable(struct pci_dev *dev)
>> > +{
>> > +   dev->broken_msi_disable = 1;
>> > +}
>> > +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_3WARE, 0x1004, quirk_broken_msi_disable);
>> > +
>> >  static ktime_t fixup_debug_start(struct pci_dev *dev,
>> >                              void (*fn)(struct pci_dev *dev))
>> >  {
>> > diff --git a/include/linux/pci.h b/include/linux/pci.h
>> > index 0fd1f15..c327d74 100644
>> > --- a/include/linux/pci.h
>> > +++ b/include/linux/pci.h
>> > @@ -341,6 +341,7 @@ struct pci_dev {
>> >  #ifdef CONFIG_PCI_MSI
>> >     struct list_head msi_list;
>> >     struct kset *msi_kset;
>> > +   unsigned int    broken_msi_disable:1;
>> >  #endif
>> >     struct pci_vpd *vpd;
>> >  #ifdef CONFIG_PCI_ATS
>> >
>> > --
>> > Jiri Kosina
>> > SUSE Labs
>> >
>>
>> --
>> Jiri Kosina
>> SUSE Labs
>>
>
> --
> Jiri Kosina
> SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Kosina Sept. 6, 2013, 9:51 a.m. UTC | #4
On Wed, 28 Aug 2013, Bjorn Helgaas wrote:

> [+cc another email addr for Adam from git logs]

Thanks. Adam, would you happen to have any possible explanation / 
background?

> >> > Commit d5dea7d95 ("PCI: msi: Disable msi interrupts when we initialize a
> >> > pci device") makes MSIs be forcibly disabled at boot time.
> >> >
> >> > It turns out that this breaks 3ware controller -- if MSIs are disabled
> >> > during PCI discovery of this controller, the device doesn't work properly
> >> > (it doesn't respond to any commands that are being sent to it after
> >> > initialization).
> >> >
> >> > Reverting d5dea7d95 or not force-disabling MSIs in pci_msi_init_pci_dev()
> >> > makes the device work properly again.
> >> >
> >> > Signed-off-by: Jiri Kosina <jkosina@suse.cz>
> >> >
> >> > ---
> >> >
> >> > I am adding Adam Radford as a recepient as well, to see whether he is able
> >> > to provide some more explanation why this device would expose this
> >> > behavior.
> >
> > OK, so Adam Radford's lsi.com address is bouncing, hence I guess we can't
> > expect any feedback from him.
> >
> > Bjorn, Jesse, any word on this please?
> 
> It's on my list to look at.  It's too late to put it in for v3.11, and
> it's doubtful that it will even make the v3.12 merge window (though
> possibly it could go in post-merge window).  d5dea7d95 is several
> years old, so hopefully this issue isn't super-urgent.  Let me know if
> otherwise.

I agree that this should be applicable to 3.12-rcX as well, as it's very 
device-specific.

Thanks.

> 
> Bjorn
> 
> >> >  drivers/pci/msi.c    |    3 +++
> >> >  drivers/pci/quirks.c |   10 ++++++++++
> >> >  include/linux/pci.h  |    1 +
> >> >  3 files changed, 14 insertions(+), 0 deletions(-)
> >> >
> >> > diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> >> > index aca7578..4f36b8b 100644
> >> > --- a/drivers/pci/msi.c
> >> > +++ b/drivers/pci/msi.c
> >> > @@ -1040,6 +1040,9 @@ void pci_msi_init_pci_dev(struct pci_dev *dev)
> >> >  {
> >> >     INIT_LIST_HEAD(&dev->msi_list);
> >> >
> >> > +   if (dev->broken_msi_disable)
> >> > +           return;
> >> > +
> >> >     /* Disable the msi hardware to avoid screaming interrupts
> >> >      * during boot.  This is the power on reset default so
> >> >      * usually this should be a noop.
> >> > diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
> >> > index e85d230..4ba3400 100644
> >> > --- a/drivers/pci/quirks.c
> >> > +++ b/drivers/pci/quirks.c
> >> > @@ -2890,6 +2890,16 @@ static void quirk_intel_ntb(struct pci_dev *dev)
> >> >  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0e08, quirk_intel_ntb);
> >> >  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0e0d, quirk_intel_ntb);
> >> >
> >> > +/*
> >> > + * 3ware 9650SE controller doesn't properly initialize if MSI are
> >> > + * disabled on it during PCI device discovery
> >> > + */
> >> > +static void quirk_broken_msi_disable(struct pci_dev *dev)
> >> > +{
> >> > +   dev->broken_msi_disable = 1;
> >> > +}
> >> > +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_3WARE, 0x1004, quirk_broken_msi_disable);
> >> > +
> >> >  static ktime_t fixup_debug_start(struct pci_dev *dev,
> >> >                              void (*fn)(struct pci_dev *dev))
> >> >  {
> >> > diff --git a/include/linux/pci.h b/include/linux/pci.h
> >> > index 0fd1f15..c327d74 100644
> >> > --- a/include/linux/pci.h
> >> > +++ b/include/linux/pci.h
> >> > @@ -341,6 +341,7 @@ struct pci_dev {
> >> >  #ifdef CONFIG_PCI_MSI
> >> >     struct list_head msi_list;
> >> >     struct kset *msi_kset;
> >> > +   unsigned int    broken_msi_disable:1;
> >> >  #endif
> >> >     struct pci_vpd *vpd;
> >> >  #ifdef CONFIG_PCI_ATS
> >> >
> >> > --
> >> > Jiri Kosina
> >> > SUSE Labs
> >> >
> >>
> >> --
> >> Jiri Kosina
> >> SUSE Labs
> >>
> >
> > --
> > Jiri Kosina
> > SUSE Labs
>
Bjorn Helgaas Sept. 6, 2013, 10:47 p.m. UTC | #5
On Fri, Sep 6, 2013 at 3:51 AM, Jiri Kosina <jkosina@suse.cz> wrote:
> On Wed, 28 Aug 2013, Bjorn Helgaas wrote:
>
>> [+cc another email addr for Adam from git logs]
>
> Thanks. Adam, would you happen to have any possible explanation /
> background?
>
>> >> > Commit d5dea7d95 ("PCI: msi: Disable msi interrupts when we initialize a
>> >> > pci device") makes MSIs be forcibly disabled at boot time.
>> >> >
>> >> > It turns out that this breaks 3ware controller -- if MSIs are disabled
>> >> > during PCI discovery of this controller, the device doesn't work properly
>> >> > (it doesn't respond to any commands that are being sent to it after
>> >> > initialization).

Is there a bug report for this issue?  It's nice to have a pointer to,
e.g., a bugzilla.kernel.org bug report with info such as dmesg logs,
lspci output, etc., for future reference.  Maybe somebody will figure
out a more generic change that could make this quirk unnecessary, and
details will help in figuring that out.

I assume the actual PCI discovery done in the PCI core works fine;
it's just that the driver doesn't work if MSIs are disabled on the
device.  If that's the case, can this be fixed by some driver change?
Maybe the driver needs to enable MSI before it sends commands to the
device?

Any description of what this failure looks like to a user?  How can a
user or a distro connect a symptom (driver timeout, console message,
or whatever) to this patch?

>> >> > Reverting d5dea7d95 or not force-disabling MSIs in pci_msi_init_pci_dev()
>> >> > makes the device work properly again.
>> >> >
>> >> > Signed-off-by: Jiri Kosina <jkosina@suse.cz>
>> >> >
>> >> > ---
>> >> >
>> >> > I am adding Adam Radford as a recepient as well, to see whether he is able
>> >> > to provide some more explanation why this device would expose this
>> >> > behavior.
>> >
>> > OK, so Adam Radford's lsi.com address is bouncing, hence I guess we can't
>> > expect any feedback from him.
>> >
>> > Bjorn, Jesse, any word on this please?
>>
>> It's on my list to look at.  It's too late to put it in for v3.11, and
>> it's doubtful that it will even make the v3.12 merge window (though
>> possibly it could go in post-merge window).  d5dea7d95 is several
>> years old, so hopefully this issue isn't super-urgent.  Let me know if
>> otherwise.
>
> I agree that this should be applicable to 3.12-rcX as well, as it's very
> device-specific.
>
> Thanks.
>
>>
>> Bjorn
>>
>> >> >  drivers/pci/msi.c    |    3 +++
>> >> >  drivers/pci/quirks.c |   10 ++++++++++
>> >> >  include/linux/pci.h  |    1 +
>> >> >  3 files changed, 14 insertions(+), 0 deletions(-)
>> >> >
>> >> > diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
>> >> > index aca7578..4f36b8b 100644
>> >> > --- a/drivers/pci/msi.c
>> >> > +++ b/drivers/pci/msi.c
>> >> > @@ -1040,6 +1040,9 @@ void pci_msi_init_pci_dev(struct pci_dev *dev)
>> >> >  {
>> >> >     INIT_LIST_HEAD(&dev->msi_list);
>> >> >
>> >> > +   if (dev->broken_msi_disable)
>> >> > +           return;
>> >> > +
>> >> >     /* Disable the msi hardware to avoid screaming interrupts
>> >> >      * during boot.  This is the power on reset default so
>> >> >      * usually this should be a noop.
>> >> > diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
>> >> > index e85d230..4ba3400 100644
>> >> > --- a/drivers/pci/quirks.c
>> >> > +++ b/drivers/pci/quirks.c
>> >> > @@ -2890,6 +2890,16 @@ static void quirk_intel_ntb(struct pci_dev *dev)
>> >> >  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0e08, quirk_intel_ntb);
>> >> >  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0e0d, quirk_intel_ntb);
>> >> >
>> >> > +/*
>> >> > + * 3ware 9650SE controller doesn't properly initialize if MSI are
>> >> > + * disabled on it during PCI device discovery
>> >> > + */
>> >> > +static void quirk_broken_msi_disable(struct pci_dev *dev)
>> >> > +{
>> >> > +   dev->broken_msi_disable = 1;
>> >> > +}
>> >> > +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_3WARE, 0x1004, quirk_broken_msi_disable);
>> >> > +
>> >> >  static ktime_t fixup_debug_start(struct pci_dev *dev,
>> >> >                              void (*fn)(struct pci_dev *dev))
>> >> >  {
>> >> > diff --git a/include/linux/pci.h b/include/linux/pci.h
>> >> > index 0fd1f15..c327d74 100644
>> >> > --- a/include/linux/pci.h
>> >> > +++ b/include/linux/pci.h
>> >> > @@ -341,6 +341,7 @@ struct pci_dev {
>> >> >  #ifdef CONFIG_PCI_MSI
>> >> >     struct list_head msi_list;
>> >> >     struct kset *msi_kset;
>> >> > +   unsigned int    broken_msi_disable:1;
>> >> >  #endif
>> >> >     struct pci_vpd *vpd;
>> >> >  #ifdef CONFIG_PCI_ATS
>> >> >
>> >> > --
>> >> > Jiri Kosina
>> >> > SUSE Labs
>> >> >
>> >>
>> >> --
>> >> Jiri Kosina
>> >> SUSE Labs
>> >>
>> >
>> > --
>> > Jiri Kosina
>> > SUSE Labs
>>
>
> --
> Jiri Kosina
> SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas Sept. 24, 2013, 8:50 p.m. UTC | #6
On Fri, Sep 6, 2013 at 4:47 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
> On Fri, Sep 6, 2013 at 3:51 AM, Jiri Kosina <jkosina@suse.cz> wrote:

>>> >> > Commit d5dea7d95 ("PCI: msi: Disable msi interrupts when we initialize a
>>> >> > pci device") makes MSIs be forcibly disabled at boot time.
>>> >> >
>>> >> > It turns out that this breaks 3ware controller -- if MSIs are disabled
>>> >> > during PCI discovery of this controller, the device doesn't work properly
>>> >> > (it doesn't respond to any commands that are being sent to it after
>>> >> > initialization).
>
> Is there a bug report for this issue?  It's nice to have a pointer to,
> e.g., a bugzilla.kernel.org bug report with info such as dmesg logs,
> lspci output, etc., for future reference.  Maybe somebody will figure
> out a more generic change that could make this quirk unnecessary, and
> details will help in figuring that out.
>
> I assume the actual PCI discovery done in the PCI core works fine;
> it's just that the driver doesn't work if MSIs are disabled on the
> device.  If that's the case, can this be fixed by some driver change?
> Maybe the driver needs to enable MSI before it sends commands to the
> device?

Ping?  Since the complaint is that the device doesn't work unless MSIs
are enabled, can this be fixed in the driver instead of in a quirk?

> Any description of what this failure looks like to a user?  How can a
> user or a distro connect a symptom (driver timeout, console message,
> or whatever) to this patch?
>
>>> >> > Reverting d5dea7d95 or not force-disabling MSIs in pci_msi_init_pci_dev()
>>> >> > makes the device work properly again.
>>> >> >
>>> >> > Signed-off-by: Jiri Kosina <jkosina@suse.cz>
>>> >> >
>>> >> > ---
>>> >> >
>>> >> > I am adding Adam Radford as a recepient as well, to see whether he is able
>>> >> > to provide some more explanation why this device would expose this
>>> >> > behavior.
>>> >
>>> > OK, so Adam Radford's lsi.com address is bouncing, hence I guess we can't
>>> > expect any feedback from him.
>>> >
>>> > Bjorn, Jesse, any word on this please?
>>>
>>> It's on my list to look at.  It's too late to put it in for v3.11, and
>>> it's doubtful that it will even make the v3.12 merge window (though
>>> possibly it could go in post-merge window).  d5dea7d95 is several
>>> years old, so hopefully this issue isn't super-urgent.  Let me know if
>>> otherwise.
>>
>> I agree that this should be applicable to 3.12-rcX as well, as it's very
>> device-specific.
>>
>> Thanks.
>>
>>>
>>> Bjorn
>>>
>>> >> >  drivers/pci/msi.c    |    3 +++
>>> >> >  drivers/pci/quirks.c |   10 ++++++++++
>>> >> >  include/linux/pci.h  |    1 +
>>> >> >  3 files changed, 14 insertions(+), 0 deletions(-)
>>> >> >
>>> >> > diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
>>> >> > index aca7578..4f36b8b 100644
>>> >> > --- a/drivers/pci/msi.c
>>> >> > +++ b/drivers/pci/msi.c
>>> >> > @@ -1040,6 +1040,9 @@ void pci_msi_init_pci_dev(struct pci_dev *dev)
>>> >> >  {
>>> >> >     INIT_LIST_HEAD(&dev->msi_list);
>>> >> >
>>> >> > +   if (dev->broken_msi_disable)
>>> >> > +           return;
>>> >> > +
>>> >> >     /* Disable the msi hardware to avoid screaming interrupts
>>> >> >      * during boot.  This is the power on reset default so
>>> >> >      * usually this should be a noop.
>>> >> > diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
>>> >> > index e85d230..4ba3400 100644
>>> >> > --- a/drivers/pci/quirks.c
>>> >> > +++ b/drivers/pci/quirks.c
>>> >> > @@ -2890,6 +2890,16 @@ static void quirk_intel_ntb(struct pci_dev *dev)
>>> >> >  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0e08, quirk_intel_ntb);
>>> >> >  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0e0d, quirk_intel_ntb);
>>> >> >
>>> >> > +/*
>>> >> > + * 3ware 9650SE controller doesn't properly initialize if MSI are
>>> >> > + * disabled on it during PCI device discovery
>>> >> > + */
>>> >> > +static void quirk_broken_msi_disable(struct pci_dev *dev)
>>> >> > +{
>>> >> > +   dev->broken_msi_disable = 1;
>>> >> > +}
>>> >> > +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_3WARE, 0x1004, quirk_broken_msi_disable);
>>> >> > +
>>> >> >  static ktime_t fixup_debug_start(struct pci_dev *dev,
>>> >> >                              void (*fn)(struct pci_dev *dev))
>>> >> >  {
>>> >> > diff --git a/include/linux/pci.h b/include/linux/pci.h
>>> >> > index 0fd1f15..c327d74 100644
>>> >> > --- a/include/linux/pci.h
>>> >> > +++ b/include/linux/pci.h
>>> >> > @@ -341,6 +341,7 @@ struct pci_dev {
>>> >> >  #ifdef CONFIG_PCI_MSI
>>> >> >     struct list_head msi_list;
>>> >> >     struct kset *msi_kset;
>>> >> > +   unsigned int    broken_msi_disable:1;
>>> >> >  #endif
>>> >> >     struct pci_vpd *vpd;
>>> >> >  #ifdef CONFIG_PCI_ATS
>>> >> >
>>> >> > --
>>> >> > Jiri Kosina
>>> >> > SUSE Labs
>>> >> >
>>> >>
>>> >> --
>>> >> Jiri Kosina
>>> >> SUSE Labs
>>> >>
>>> >
>>> > --
>>> > Jiri Kosina
>>> > SUSE Labs
>>>
>>
>> --
>> Jiri Kosina
>> SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Kosina Sept. 27, 2013, 9:08 a.m. UTC | #7
On Fri, 6 Sep 2013, Bjorn Helgaas wrote:

> >> >> > Commit d5dea7d95 ("PCI: msi: Disable msi interrupts when we initialize a
> >> >> > pci device") makes MSIs be forcibly disabled at boot time.
> >> >> >
> >> >> > It turns out that this breaks 3ware controller -- if MSIs are disabled
> >> >> > during PCI discovery of this controller, the device doesn't work properly
> >> >> > (it doesn't respond to any commands that are being sent to it after
> >> >> > initialization).
> 
> Is there a bug report for this issue?  

Yes, but unfortunately only in our internal bugzilla.

> It's nice to have a pointer to, e.g., a bugzilla.kernel.org bug report 
> with info such as dmesg logs, lspci output, etc., for future reference.  

It's a customer-reported issue, so I am gathering permission to make this 
information public (I don't think that'll be an issue).

I'll send up a followup afterwards.

> Maybe somebody will figure out a more generic change that could make 
> this quirk unnecessary, and details will help in figuring that out.
> 
> I assume the actual PCI discovery done in the PCI core works fine;
> it's just that the driver doesn't work if MSIs are disabled on the
> device.  If that's the case, can this be fixed by some driver change?
> Maybe the driver needs to enable MSI before it sends commands to the
> device?

I have tried it, but it still doesn't work. It seems like the device 
initialization is not finalized properly with MISs disabled; meaning the 
device is there (discovery has completed), but it "just doesn't work".

> Any description of what this failure looks like to a user?  How can a 
> user or a distro connect a symptom (driver timeout, console message, or 
> whatever) to this patch?

Will be hopefully part of the dmesg I will be providing later ... 
basically any commands sent to it time out.

Thanks,
Bjorn Helgaas Oct. 31, 2013, 9:27 p.m. UTC | #8
On Wed, Oct 30, 2013 at 4:27 AM, Jiri Kosina <jkosina@suse.cz> wrote:
> Attached is dmesg output leading to timeouts (that are cured by my
> original patch in this thread) and lspci.

I opened https://bugzilla.kernel.org/show_bug.cgi?id=64141 for this
issue and attached your dmesg log and lspci output.

> Please let me know if there is anything else I could do, or if you are
> going to proceed with my patch adding the quirk.

Your quirk keeps us from disabling MSIs on the device during
enumeration.  But even if the BIOS left MSIs enabled, there's nothing
to field the MSI until after the driver claims the device.  So I don't
believe this has to be done as a quirk.  It should work just as well
to do something in the driver when it claims the device.

I guess another way to say this is that I don't think we understand
what the real problem is, and if we just add a quirk to work around
it, we might miss the chance to fix the real problem, and we may never
be able to remove the special-case code we're adding in the generic
path.

I know you said you tried doing something in the driver, and it didn't
work.  I don't know exactly what you tried, but twa_probe() looks
strange to me.  The other drivers I looked at do all their PCI
initialization before the scsi_host_alloc() / scsi_add_host() /
scsi_scan_host() stuff.  But twa_probe() has PCI stuff scattered
around between those three SCSI calls.  In particular, it does the MSI
setup way down near the end, after scsi_add_host(), which seems like
just the sort of thing that could explain this problem.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index aca7578..4f36b8b 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1040,6 +1040,9 @@  void pci_msi_init_pci_dev(struct pci_dev *dev)
 {
 	INIT_LIST_HEAD(&dev->msi_list);
 
+	if (dev->broken_msi_disable)
+		return;
+
 	/* Disable the msi hardware to avoid screaming interrupts
 	 * during boot.  This is the power on reset default so
 	 * usually this should be a noop.
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index e85d230..4ba3400 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2890,6 +2890,16 @@  static void quirk_intel_ntb(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0e08, quirk_intel_ntb);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0e0d, quirk_intel_ntb);
 
+/*
+ * 3ware 9650SE controller doesn't properly initialize if MSI are
+ * disabled on it during PCI device discovery
+ */
+static void quirk_broken_msi_disable(struct pci_dev *dev)
+{
+	dev->broken_msi_disable = 1;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_3WARE, 0x1004, quirk_broken_msi_disable);
+
 static ktime_t fixup_debug_start(struct pci_dev *dev,
 				 void (*fn)(struct pci_dev *dev))
 {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0fd1f15..c327d74 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -341,6 +341,7 @@  struct pci_dev {
 #ifdef CONFIG_PCI_MSI
 	struct list_head msi_list;
 	struct kset *msi_kset;
+	unsigned int	broken_msi_disable:1;
 #endif
 	struct pci_vpd *vpd;
 #ifdef CONFIG_PCI_ATS