Patchwork [2/3] Refactor msi/msix restore code Part2

login
register
mail settings
Submitter Zhenzhong Duan
Date July 24, 2013, 3:08 a.m.
Message ID <51EF451A.8050003@oracle.com>
Download mbox | patch
Permalink /patch/261263/
State Changes Requested
Headers show

Comments

Zhenzhong Duan - July 24, 2013, 3:08 a.m.
xen_initdom_restore_msi_irqs trigger a hypercall to restore addr/data/mask
in dom0. It's better to do the same for default_restore_msi_irqs in baremetal.

Move restore of mask in default_restore_msi_irqs, this could avoid mask
restored twice in dom0, once in hypercall, the other in kernel.

Without that, qlcnic driver calling pci_reset_function will lost interrupt
in dom0.

Tested-by: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
---
 drivers/pci/msi.c |   17 ++++++++++++++---
 1 files changed, 14 insertions(+), 3 deletions(-)
Konrad Rzeszutek Wilk - July 24, 2013, 1:46 p.m.
On Wed, Jul 24, 2013 at 11:08:10AM +0800, Zhenzhong Duan wrote:
> xen_initdom_restore_msi_irqs trigger a hypercall to restore addr/data/mask
> in dom0. It's better to do the same for default_restore_msi_irqs in baremetal.
> 
> Move restore of mask in default_restore_msi_irqs, this could avoid mask
> restored twice in dom0, once in hypercall, the other in kernel.

Why not remove the hypercall then? Or alter the function to detect
whether the restore of the mask has occurred?

> 
> Without that, qlcnic driver calling pci_reset_function will lost interrupt
> in dom0.
> 
> Tested-by: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
> ---
>  drivers/pci/msi.c |   17 ++++++++++++++---
>  1 files changed, 14 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> index 87223ae..922fb49 100644
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -216,6 +216,8 @@ void unmask_msi_irq(struct irq_data *data)
>  #ifdef HAVE_DEFAULT_MSI_RESTORE_IRQS
>  void default_restore_msi_irqs(struct pci_dev *dev, int irq)
>  {
> +	int pos;
> +	u16 control;
>  	struct msi_desc *entry;
>  
>  	entry = NULL;
> @@ -228,8 +230,19 @@ void default_restore_msi_irqs(struct pci_dev *dev, int irq)
>  		entry = irq_get_msi_desc(irq);
>  	}
>  
> -	if (entry)
> +	if (entry) {
>  		write_msi_msg(irq, &entry->msg);
> +		if (dev->msix_enabled) {
> +			msix_mask_irq(entry, entry->masked);
> +			readl(entry->mask_base);
> +		} else {
> +			pos = entry->msi_attrib.pos;
> +			pci_read_config_word(dev, pos + PCI_MSI_FLAGS,
> +					     &control);
> +			msi_mask_irq(entry, msi_capable_mask(control),
> +				     entry->masked);
> +		}
> +	}
>  }
>  #endif
>  
> @@ -406,7 +419,6 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
>  	arch_restore_msi_irqs(dev, dev->irq);
>  
>  	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
> -	msi_mask_irq(entry, msi_capable_mask(control), entry->masked);

Before this patch we had:

	write_msi_msg(..)
	pci_read_config_work(PCI_MSI_FLAGS, &control)
	pci_write_config_dword(~msi_capable_mask(control) | entry->masked)
	control &= ~_PCI_MSI_FLAGS_QSIZE;
	control |= ...
	pci_write_config_dword(PCI_MSI_FLAGS, control)

while with this you have now:

	write_msi_msg(..)
	pci_read_config_work(PCI_MSI_FLAGS, &_control)
	pci_write_config_dword(~msi_capable_mask(_control) | entry->masked)
--> 	pci_read_config_work(PCI_MSI_FLAGS, &control)
	control &= ~_PCI_MSI_FLAGS_QSIZE;
	control |= ...
	pci_write_config_dword(PCI_MSI_FLAGS, control)

see the problem? The 'control' value in __pci_restore_msi_state reads the
value _after_ it has been masked (which is now done in default_restore_msi_irqs).

Wouldn't that cause problems?


>  	control &= ~PCI_MSI_FLAGS_QSIZE;
>  	control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
>  	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
> @@ -430,7 +442,6 @@ static void __pci_restore_msix_state(struct pci_dev *dev)
>  
>  	list_for_each_entry(entry, &dev->msi_list, list) {
>  		arch_restore_msi_irqs(dev, entry->irq);
> -		msix_mask_irq(entry, entry->masked);
>  	}
>  
>  	control &= ~PCI_MSIX_FLAGS_MASKALL;
> -- 
> 1.7.3
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhenzhong Duan - July 25, 2013, 6:52 a.m.
On 2013-07-24 21:46, Konrad Rzeszutek Wilk wrote:
> On Wed, Jul 24, 2013 at 11:08:10AM +0800, Zhenzhong Duan wrote:
>> xen_initdom_restore_msi_irqs trigger a hypercall to restore addr/data/mask
>> in dom0. It's better to do the same for default_restore_msi_irqs in baremetal.
>>
>> Move restore of mask in default_restore_msi_irqs, this could avoid mask
>> restored twice in dom0, once in hypercall, the other in kernel.
> Why not remove the hypercall then?
If removed, msi entry couldn't be restored, such as pci_reset_function 
who will reset pci registers.
> Or alter the function to detect
> whether the restore of the mask has occurred?
Then we need to add the check for dom0 only.
>
>> Without that, qlcnic driver calling pci_reset_function will lost interrupt
>> in dom0.
>>
>> Tested-by: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
>> ---
>>   drivers/pci/msi.c |   17 ++++++++++++++---
>>   1 files changed, 14 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
>> index 87223ae..922fb49 100644
>> --- a/drivers/pci/msi.c
>> +++ b/drivers/pci/msi.c
>> @@ -216,6 +216,8 @@ void unmask_msi_irq(struct irq_data *data)
>>   #ifdef HAVE_DEFAULT_MSI_RESTORE_IRQS
>>   void default_restore_msi_irqs(struct pci_dev *dev, int irq)
>>   {
>> +	int pos;
>> +	u16 control;
>>   	struct msi_desc *entry;
>>   
>>   	entry = NULL;
>> @@ -228,8 +230,19 @@ void default_restore_msi_irqs(struct pci_dev *dev, int irq)
>>   		entry = irq_get_msi_desc(irq);
>>   	}
>>   
>> -	if (entry)
>> +	if (entry) {
>>   		write_msi_msg(irq, &entry->msg);
>> +		if (dev->msix_enabled) {
>> +			msix_mask_irq(entry, entry->masked);
>> +			readl(entry->mask_base);
>> +		} else {
>> +			pos = entry->msi_attrib.pos;
>> +			pci_read_config_word(dev, pos + PCI_MSI_FLAGS,
>> +					     &control);
>> +			msi_mask_irq(entry, msi_capable_mask(control),
>> +				     entry->masked);
>> +		}
>> +	}
>>   }
>>   #endif
>>   
>> @@ -406,7 +419,6 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
>>   	arch_restore_msi_irqs(dev, dev->irq);
>>   
>>   	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
>> -	msi_mask_irq(entry, msi_capable_mask(control), entry->masked);
> Before this patch we had:
>
> 	write_msi_msg(..)
> 	pci_read_config_work(PCI_MSI_FLAGS, &control)
> 	pci_write_config_dword(~msi_capable_mask(control) | entry->masked)
> 	control &= ~_PCI_MSI_FLAGS_QSIZE;
> 	control |= ...
> 	pci_write_config_dword(PCI_MSI_FLAGS, control)
>
> while with this you have now:
>
> 	write_msi_msg(..)
> 	pci_read_config_work(PCI_MSI_FLAGS, &_control)
> 	pci_write_config_dword(~msi_capable_mask(_control) | entry->masked)
> --> 	pci_read_config_work(PCI_MSI_FLAGS, &control)
> 	control &= ~_PCI_MSI_FLAGS_QSIZE;
> 	control |= ...
> 	pci_write_config_dword(PCI_MSI_FLAGS, control)
>
> see the problem? The 'control' value in __pci_restore_msi_state reads the
> value _after_ it has been masked (which is now done in default_restore_msi_irqs).
>
> Wouldn't that cause problems?
>
pci_write_config_dword(~msi_capable_mask(_control) | entry->masked) restore per vector
msi mask bits based on the support of PCI_MSI_FLAGS_MASKBIT. This is different from the
global mask bit in PCI_MSI_FLAGS.

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Konrad Rzeszutek Wilk - July 25, 2013, 12:25 p.m.
On Thu, Jul 25, 2013 at 02:52:00PM +0800, Zhenzhong Duan wrote:
> 
> On 2013-07-24 21:46, Konrad Rzeszutek Wilk wrote:
> >On Wed, Jul 24, 2013 at 11:08:10AM +0800, Zhenzhong Duan wrote:
> >>xen_initdom_restore_msi_irqs trigger a hypercall to restore addr/data/mask
> >>in dom0. It's better to do the same for default_restore_msi_irqs in baremetal.
> >>
> >>Move restore of mask in default_restore_msi_irqs, this could avoid mask
> >>restored twice in dom0, once in hypercall, the other in kernel.
> >Why not remove the hypercall then?
> If removed, msi entry couldn't be restored, such as
> pci_reset_function who will reset pci registers.

I did not read your email first time correctly. You are saying
that we restore it twice in the host kernel (aka dom0), once in the
hypervisor (b/c the guest tries to do MSI-X write and it ends up in
the hypervisor), and then we also do it in the guest kernel?

That is a lot of duplicate calls.
> >Or alter the function to detect
> >whether the restore of the mask has occurred?
> Then we need to add the check for dom0 only.

I am not sure I completly follow this. Is the reason for the lost of
interrupt b/c one of those four MSI-X writes ends up masking and the
subsequent writes end up with invalid data?

> >
> >>Without that, qlcnic driver calling pci_reset_function will lost interrupt
> >>in dom0.

But if you pass said PCI device to a guest there is no need for the
interrupts to go to the host (dom0). They should go to the hypervisor
which will deliever them to the guest.

Is that what you meant by 'in dom0' ?


> >>
> >>Tested-by: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
> >>Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
> >>---
> >>  drivers/pci/msi.c |   17 ++++++++++++++---
> >>  1 files changed, 14 insertions(+), 3 deletions(-)
> >>
> >>diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> >>index 87223ae..922fb49 100644
> >>--- a/drivers/pci/msi.c
> >>+++ b/drivers/pci/msi.c
> >>@@ -216,6 +216,8 @@ void unmask_msi_irq(struct irq_data *data)
> >>  #ifdef HAVE_DEFAULT_MSI_RESTORE_IRQS
> >>  void default_restore_msi_irqs(struct pci_dev *dev, int irq)
> >>  {
> >>+	int pos;
> >>+	u16 control;
> >>  	struct msi_desc *entry;
> >>  	entry = NULL;
> >>@@ -228,8 +230,19 @@ void default_restore_msi_irqs(struct pci_dev *dev, int irq)
> >>  		entry = irq_get_msi_desc(irq);
> >>  	}
> >>-	if (entry)
> >>+	if (entry) {
> >>  		write_msi_msg(irq, &entry->msg);
> >>+		if (dev->msix_enabled) {
> >>+			msix_mask_irq(entry, entry->masked);
> >>+			readl(entry->mask_base);
> >>+		} else {
> >>+			pos = entry->msi_attrib.pos;
> >>+			pci_read_config_word(dev, pos + PCI_MSI_FLAGS,
> >>+					     &control);
> >>+			msi_mask_irq(entry, msi_capable_mask(control),
> >>+				     entry->masked);
> >>+		}
> >>+	}
> >>  }
> >>  #endif
> >>@@ -406,7 +419,6 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
> >>  	arch_restore_msi_irqs(dev, dev->irq);
> >>  	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
> >>-	msi_mask_irq(entry, msi_capable_mask(control), entry->masked);
> >Before this patch we had:
> >
> >	write_msi_msg(..)
> >	pci_read_config_work(PCI_MSI_FLAGS, &control)
> >	pci_write_config_dword(~msi_capable_mask(control) | entry->masked)
> >	control &= ~_PCI_MSI_FLAGS_QSIZE;
> >	control |= ...
> >	pci_write_config_dword(PCI_MSI_FLAGS, control)
> >
> >while with this you have now:
> >
> >	write_msi_msg(..)
> >	pci_read_config_work(PCI_MSI_FLAGS, &_control)
> >	pci_write_config_dword(~msi_capable_mask(_control) | entry->masked)
> >--> 	pci_read_config_work(PCI_MSI_FLAGS, &control)
> >	control &= ~_PCI_MSI_FLAGS_QSIZE;
> >	control |= ...
> >	pci_write_config_dword(PCI_MSI_FLAGS, control)
> >
> >see the problem? The 'control' value in __pci_restore_msi_state reads the
> >value _after_ it has been masked (which is now done in default_restore_msi_irqs).
> >
> >Wouldn't that cause problems?
> >
> pci_write_config_dword(~msi_capable_mask(_control) | entry->masked) restore per vector
> msi mask bits based on the support of PCI_MSI_FLAGS_MASKBIT. This is different from the
> global mask bit in PCI_MSI_FLAGS.

So I think you are saying that it won't cause problems? 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhenzhong Duan - July 26, 2013, 3:01 a.m.
On 2013-07-25 20:25, Konrad Rzeszutek Wilk wrote:
> On Thu, Jul 25, 2013 at 02:52:00PM +0800, Zhenzhong Duan wrote:
>> On 2013-07-24 21:46, Konrad Rzeszutek Wilk wrote:
>>> On Wed, Jul 24, 2013 at 11:08:10AM +0800, Zhenzhong Duan wrote:
>>>> xen_initdom_restore_msi_irqs trigger a hypercall to restore addr/data/mask
>>>> in dom0. It's better to do the same for default_restore_msi_irqs in baremetal.
>>>>
>>>> Move restore of mask in default_restore_msi_irqs, this could avoid mask
>>>> restored twice in dom0, once in hypercall, the other in kernel.
>>> Why not remove the hypercall then?
>> If removed, msi entry couldn't be restored, such as
>> pci_reset_function who will reset pci registers.
> I did not read your email first time correctly. You are saying
> that we restore it twice in the host kernel (aka dom0), once in the
> hypervisor (b/c the guest tries to do MSI-X write and it ends up in
> the hypervisor), and then we also do it in the guest kernel?
Non business of guest kernel, this patch is fixing driver load issue in 
dom0.
Driver qlcnic called pci_reset_function during init. The call path:
pci_reset_function->pci_restore_state->__pci_restore_msix_state->arch_restore_msi_irqs->
xen_initdom_restore_msi_irqs->PHYSDEVOP_restore_msi hypercall

First mask restore is in 
xen_initdom_restore_msi_irqs->PHYSDEVOP_restore_msi hypercall
Second restore is __pci_restore_msix_state->msix_mask_irq(entry, 
entry->masked)

Mask bits are under full control of xen, and the entry->masked in dom0 
kernel is invalid.
We restore an invalid value to mask register could mask the msix interrupt.
>
> That is a lot of duplicate calls.
>>> Or alter the function to detect
>>> whether the restore of the mask has occurred?
>> Then we need to add the check for dom0 only.
> I am not sure I completly follow this. Is the reason for the lost of
> interrupt b/c one of those four MSI-X writes ends up masking and the
> subsequent writes end up with invalid data?
The first restore in hypercall is needed, and second restore should be 
removed for dom0
But baremetal need that restore, so I move it in 
default_restore_msi_irqs which is the func for baremetal.
>
>>>> Without that, qlcnic driver calling pci_reset_function will lost interrupt
>>>> in dom0.
> But if you pass said PCI device to a guest there is no need for the
> interrupts to go to the host (dom0). They should go to the hypervisor
> which will deliever them to the guest.
>
> Is that what you meant by 'in dom0' ?
>
>>>> Tested-by: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
>>>> ---
>>>>   drivers/pci/msi.c |   17 ++++++++++++++---
>>>>   1 files changed, 14 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
>>>> index 87223ae..922fb49 100644
>>>> --- a/drivers/pci/msi.c
>>>> +++ b/drivers/pci/msi.c
>>>> @@ -216,6 +216,8 @@ void unmask_msi_irq(struct irq_data *data)
>>>>   #ifdef HAVE_DEFAULT_MSI_RESTORE_IRQS
>>>>   void default_restore_msi_irqs(struct pci_dev *dev, int irq)
>>>>   {
>>>> +	int pos;
>>>> +	u16 control;
>>>>   	struct msi_desc *entry;
>>>>   	entry = NULL;
>>>> @@ -228,8 +230,19 @@ void default_restore_msi_irqs(struct pci_dev *dev, int irq)
>>>>   		entry = irq_get_msi_desc(irq);
>>>>   	}
>>>> -	if (entry)
>>>> +	if (entry) {
>>>>   		write_msi_msg(irq, &entry->msg);
>>>> +		if (dev->msix_enabled) {
>>>> +			msix_mask_irq(entry, entry->masked);
>>>> +			readl(entry->mask_base);
>>>> +		} else {
>>>> +			pos = entry->msi_attrib.pos;
>>>> +			pci_read_config_word(dev, pos + PCI_MSI_FLAGS,
>>>> +					     &control);
>>>> +			msi_mask_irq(entry, msi_capable_mask(control),
>>>> +				     entry->masked);
>>>> +		}
>>>> +	}
>>>>   }
>>>>   #endif
>>>> @@ -406,7 +419,6 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
>>>>   	arch_restore_msi_irqs(dev, dev->irq);
>>>>   	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
>>>> -	msi_mask_irq(entry, msi_capable_mask(control), entry->masked);
>>> Before this patch we had:
>>>
>>> 	write_msi_msg(..)
>>> 	pci_read_config_work(PCI_MSI_FLAGS, &control)
>>> 	pci_write_config_dword(~msi_capable_mask(control) | entry->masked)
>>> 	control &= ~_PCI_MSI_FLAGS_QSIZE;
>>> 	control |= ...
>>> 	pci_write_config_dword(PCI_MSI_FLAGS, control)
>>>
>>> while with this you have now:
>>>
>>> 	write_msi_msg(..)
>>> 	pci_read_config_work(PCI_MSI_FLAGS, &_control)
>>> 	pci_write_config_dword(~msi_capable_mask(_control) | entry->masked)
>>> --> 	pci_read_config_work(PCI_MSI_FLAGS, &control)
>>> 	control &= ~_PCI_MSI_FLAGS_QSIZE;
>>> 	control |= ...
>>> 	pci_write_config_dword(PCI_MSI_FLAGS, control)
>>>
>>> see the problem? The 'control' value in __pci_restore_msi_state reads the
>>> value _after_ it has been masked (which is now done in default_restore_msi_irqs).
>>>
>>> Wouldn't that cause problems?
>>>
>> pci_write_config_dword(~msi_capable_mask(_control) | entry->masked) restore per vector
>> msi mask bits based on the support of PCI_MSI_FLAGS_MASKBIT. This is different from the
>> global mask bit in PCI_MSI_FLAGS.
> So I think you are saying that it won't cause problems?
Yes
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas - July 31, 2013, 10:41 p.m.
On Tue, Jul 23, 2013 at 9:08 PM, Zhenzhong Duan
<zhenzhong.duan@oracle.com> wrote:
> xen_initdom_restore_msi_irqs trigger a hypercall to restore addr/data/mask
> in dom0. It's better to do the same for default_restore_msi_irqs in baremetal.
>
> Move restore of mask in default_restore_msi_irqs, this could avoid mask
> restored twice in dom0, once in hypercall, the other in kernel.
>
> Without that, qlcnic driver calling pci_reset_function will lost interrupt
> in dom0.
>
> Tested-by: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
> ---
>  drivers/pci/msi.c |   17 ++++++++++++++---
>  1 files changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> index 87223ae..922fb49 100644
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -216,6 +216,8 @@ void unmask_msi_irq(struct irq_data *data)
>  #ifdef HAVE_DEFAULT_MSI_RESTORE_IRQS
>  void default_restore_msi_irqs(struct pci_dev *dev, int irq)
>  {
> +       int pos;
> +       u16 control;
>         struct msi_desc *entry;
>
>         entry = NULL;
> @@ -228,8 +230,19 @@ void default_restore_msi_irqs(struct pci_dev *dev, int irq)
>                 entry = irq_get_msi_desc(irq);
>         }
>
> -       if (entry)
> +       if (entry) {
>                 write_msi_msg(irq, &entry->msg);
> +               if (dev->msix_enabled) {
> +                       msix_mask_irq(entry, entry->masked);
> +                       readl(entry->mask_base);
> +               } else {
> +                       pos = entry->msi_attrib.pos;
> +                       pci_read_config_word(dev, pos + PCI_MSI_FLAGS,
> +                                            &control);
> +                       msi_mask_irq(entry, msi_capable_mask(control),
> +                                    entry->masked);
> +               }
> +       }
>  }
>  #endif
>
> @@ -406,7 +419,6 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
>         arch_restore_msi_irqs(dev, dev->irq);
>
>         pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
> -       msi_mask_irq(entry, msi_capable_mask(control), entry->masked);
>         control &= ~PCI_MSI_FLAGS_QSIZE;
>         control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
>         pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
> @@ -430,7 +442,6 @@ static void __pci_restore_msix_state(struct pci_dev *dev)
>
>         list_for_each_entry(entry, &dev->msi_list, list) {
>                 arch_restore_msi_irqs(dev, entry->irq);
> -               msix_mask_irq(entry, entry->masked);
>         }
>
>         control &= ~PCI_MSIX_FLAGS_MASKALL;

Konrad, are you OK with this patch now?

If so, can you (Zhenzhong) update the changelog with some of the info
from the conversation with Konrad?  It seems like a few more details
are needed to make it clear why we need this change.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Konrad Rzeszutek Wilk - Aug. 1, 2013, 3:16 p.m.
On Wed, Jul 31, 2013 at 04:41:32PM -0600, Bjorn Helgaas wrote:
> On Tue, Jul 23, 2013 at 9:08 PM, Zhenzhong Duan
> <zhenzhong.duan@oracle.com> wrote:
> > xen_initdom_restore_msi_irqs trigger a hypercall to restore addr/data/mask
> > in dom0. It's better to do the same for default_restore_msi_irqs in baremetal.
> >
> > Move restore of mask in default_restore_msi_irqs, this could avoid mask
> > restored twice in dom0, once in hypercall, the other in kernel.
> >
> > Without that, qlcnic driver calling pci_reset_function will lost interrupt
> > in dom0.
> >
> > Tested-by: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
> > Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
> > ---
> >  drivers/pci/msi.c |   17 ++++++++++++++---
> >  1 files changed, 14 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> > index 87223ae..922fb49 100644
> > --- a/drivers/pci/msi.c
> > +++ b/drivers/pci/msi.c
> > @@ -216,6 +216,8 @@ void unmask_msi_irq(struct irq_data *data)
> >  #ifdef HAVE_DEFAULT_MSI_RESTORE_IRQS
> >  void default_restore_msi_irqs(struct pci_dev *dev, int irq)
> >  {
> > +       int pos;
> > +       u16 control;
> >         struct msi_desc *entry;
> >
> >         entry = NULL;
> > @@ -228,8 +230,19 @@ void default_restore_msi_irqs(struct pci_dev *dev, int irq)
> >                 entry = irq_get_msi_desc(irq);
> >         }
> >
> > -       if (entry)
> > +       if (entry) {
> >                 write_msi_msg(irq, &entry->msg);
> > +               if (dev->msix_enabled) {
> > +                       msix_mask_irq(entry, entry->masked);
> > +                       readl(entry->mask_base);
> > +               } else {
> > +                       pos = entry->msi_attrib.pos;
> > +                       pci_read_config_word(dev, pos + PCI_MSI_FLAGS,
> > +                                            &control);
> > +                       msi_mask_irq(entry, msi_capable_mask(control),
> > +                                    entry->masked);
> > +               }
> > +       }
> >  }
> >  #endif
> >
> > @@ -406,7 +419,6 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
> >         arch_restore_msi_irqs(dev, dev->irq);
> >
> >         pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
> > -       msi_mask_irq(entry, msi_capable_mask(control), entry->masked);
> >         control &= ~PCI_MSI_FLAGS_QSIZE;
> >         control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
> >         pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
> > @@ -430,7 +442,6 @@ static void __pci_restore_msix_state(struct pci_dev *dev)
> >
> >         list_for_each_entry(entry, &dev->msi_list, list) {
> >                 arch_restore_msi_irqs(dev, entry->irq);
> > -               msix_mask_irq(entry, entry->masked);
> >         }
> >
> >         control &= ~PCI_MSIX_FLAGS_MASKALL;
> 
> Konrad, are you OK with this patch now?

Yes, I wanted to make sure that the existing behavior under baremetal
was not altered.

And Duan had confirmed it was not  - thought to be on a safe side it
would be good to confirm this via testing just in case.

> 
> If so, can you (Zhenzhong) update the changelog with some of the info
> from the conversation with Konrad?  It seems like a few more details
> are needed to make it clear why we need this change.
> 
> Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhenzhong Duan - Aug. 2, 2013, 9:16 a.m.
On 2013-08-01 23:16, Konrad Rzeszutek Wilk wrote:
> On Wed, Jul 31, 2013 at 04:41:32PM -0600, Bjorn Helgaas wrote:
>> On Tue, Jul 23, 2013 at 9:08 PM, Zhenzhong Duan
>> <zhenzhong.duan@oracle.com> wrote:
>>> xen_initdom_restore_msi_irqs trigger a hypercall to restore addr/data/mask
>>> in dom0. It's better to do the same for default_restore_msi_irqs in baremetal.
>>>
>>> Move restore of mask in default_restore_msi_irqs, this could avoid mask
>>> restored twice in dom0, once in hypercall, the other in kernel.
>>>
>>> Without that, qlcnic driver calling pci_reset_function will lost interrupt
>>> in dom0.
>>>
>>> Tested-by: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
>>> ---
>>>   drivers/pci/msi.c |   17 ++++++++++++++---
>>>   1 files changed, 14 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
>>> index 87223ae..922fb49 100644
>>> --- a/drivers/pci/msi.c
>>> +++ b/drivers/pci/msi.c
>>> @@ -216,6 +216,8 @@ void unmask_msi_irq(struct irq_data *data)
>>>   #ifdef HAVE_DEFAULT_MSI_RESTORE_IRQS
>>>   void default_restore_msi_irqs(struct pci_dev *dev, int irq)
>>>   {
>>> +       int pos;
>>> +       u16 control;
>>>          struct msi_desc *entry;
>>>
>>>          entry = NULL;
>>> @@ -228,8 +230,19 @@ void default_restore_msi_irqs(struct pci_dev *dev, int irq)
>>>                  entry = irq_get_msi_desc(irq);
>>>          }
>>>
>>> -       if (entry)
>>> +       if (entry) {
>>>                  write_msi_msg(irq, &entry->msg);
>>> +               if (dev->msix_enabled) {
>>> +                       msix_mask_irq(entry, entry->masked);
>>> +                       readl(entry->mask_base);
>>> +               } else {
>>> +                       pos = entry->msi_attrib.pos;
>>> +                       pci_read_config_word(dev, pos + PCI_MSI_FLAGS,
>>> +                                            &control);
>>> +                       msi_mask_irq(entry, msi_capable_mask(control),
>>> +                                    entry->masked);
>>> +               }
>>> +       }
>>>   }
>>>   #endif
>>>
>>> @@ -406,7 +419,6 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
>>>          arch_restore_msi_irqs(dev, dev->irq);
>>>
>>>          pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
>>> -       msi_mask_irq(entry, msi_capable_mask(control), entry->masked);
>>>          control &= ~PCI_MSI_FLAGS_QSIZE;
>>>          control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
>>>          pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
>>> @@ -430,7 +442,6 @@ static void __pci_restore_msix_state(struct pci_dev *dev)
>>>
>>>          list_for_each_entry(entry, &dev->msi_list, list) {
>>>                  arch_restore_msi_irqs(dev, entry->irq);
>>> -               msix_mask_irq(entry, entry->masked);
>>>          }
>>>
>>>          control &= ~PCI_MSIX_FLAGS_MASKALL;
>> Konrad, are you OK with this patch now?
> Yes, I wanted to make sure that the existing behavior under baremetal
> was not altered.
>
> And Duan had confirmed it was not  - thought to be on a safe side it
> would be good to confirm this via testing just in case.
I had ever let our customer test on baremetal and it passed.

zduan
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Konrad Rzeszutek Wilk - Aug. 2, 2013, 12:01 p.m.
> >>Konrad, are you OK with this patch now?
> >Yes, I wanted to make sure that the existing behavior under baremetal
> >was not altered.
> >
> >And Duan had confirmed it was not  - thought to be on a safe side it
> >would be good to confirm this via testing just in case.
> I had ever let our customer test on baremetal and it passed.

Are they OK with being credited for this in the patch? Meaning are they
OK with Reported-by-and-Tested-by: flag?

Either way, I think the only remaining issue is to expand the git commit
a bit to include the discussion about the flow of the pci_read/pci_write
and point out that the logic remains the same.


> 
> zduan
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhenzhong Duan - Aug. 5, 2013, 7:19 a.m.
On 2013-08-02 20:01, Konrad Rzeszutek Wilk wrote:
>>>> Konrad, are you OK with this patch now?
>>> Yes, I wanted to make sure that the existing behavior under baremetal
>>> was not altered.
>>>
>>> And Duan had confirmed it was not  - thought to be on a safe side it
>>> would be good to confirm this via testing just in case.
>> I had ever let our customer test on baremetal and it passed.
> Are they OK with being credited for this in the patch? Meaning are they
> OK with Reported-by-and-Tested-by: flag?
sucheta.chakraborty from Qlogic also reproduced the same and I got his 
confirm patch worked.
Already add him in tested-by.

zduan
>
> Either way, I think the only remaining issue is to expand the git commit
> a bit to include the discussion about the flow of the pci_read/pci_write
> and point out that the logic remains the same.
>
>
>> zduan

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 87223ae..922fb49 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -216,6 +216,8 @@  void unmask_msi_irq(struct irq_data *data)
 #ifdef HAVE_DEFAULT_MSI_RESTORE_IRQS
 void default_restore_msi_irqs(struct pci_dev *dev, int irq)
 {
+	int pos;
+	u16 control;
 	struct msi_desc *entry;
 
 	entry = NULL;
@@ -228,8 +230,19 @@  void default_restore_msi_irqs(struct pci_dev *dev, int irq)
 		entry = irq_get_msi_desc(irq);
 	}
 
-	if (entry)
+	if (entry) {
 		write_msi_msg(irq, &entry->msg);
+		if (dev->msix_enabled) {
+			msix_mask_irq(entry, entry->masked);
+			readl(entry->mask_base);
+		} else {
+			pos = entry->msi_attrib.pos;
+			pci_read_config_word(dev, pos + PCI_MSI_FLAGS,
+					     &control);
+			msi_mask_irq(entry, msi_capable_mask(control),
+				     entry->masked);
+		}
+	}
 }
 #endif
 
@@ -406,7 +419,6 @@  static void __pci_restore_msi_state(struct pci_dev *dev)
 	arch_restore_msi_irqs(dev, dev->irq);
 
 	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
-	msi_mask_irq(entry, msi_capable_mask(control), entry->masked);
 	control &= ~PCI_MSI_FLAGS_QSIZE;
 	control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
 	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
@@ -430,7 +442,6 @@  static void __pci_restore_msix_state(struct pci_dev *dev)
 
 	list_for_each_entry(entry, &dev->msi_list, list) {
 		arch_restore_msi_irqs(dev, entry->irq);
-		msix_mask_irq(entry, entry->masked);
 	}
 
 	control &= ~PCI_MSIX_FLAGS_MASKALL;