Patchwork [4/7] powerpc/powernv: Patch MSI EOI handler on P8

login
register
mail settings
Submitter Gavin Shan
Date April 24, 2013, 9:37 a.m.
Message ID <1366796259-29412-5-git-send-email-shangw@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/239120/
State Superseded
Headers show

Comments

Gavin Shan - April 24, 2013, 9:37 a.m.
The EOI handler of MSI/MSI-X interrupts for P8 (PHB3) need additional
steps to handle the P/Q bits in IVE before EOIing the corresponding
interrupt. The patch changes the EOI handler to cover that.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/opal.h                |    2 +
 arch/powerpc/include/asm/xics.h                |    3 ++
 arch/powerpc/platforms/powernv/opal-wrappers.S |    1 +
 arch/powerpc/platforms/powernv/pci-ioda.c      |   16 ++++++++++++++
 arch/powerpc/platforms/powernv/pci.c           |   19 ++++++++++++++++
 arch/powerpc/platforms/powernv/pci.h           |    1 +
 arch/powerpc/sysdev/xics/icp-native.c          |   27 +++++++++++++++++++++++-
 7 files changed, 68 insertions(+), 1 deletions(-)
Benjamin Herrenschmidt - April 24, 2013, 8:49 p.m.
On Wed, 2013-04-24 at 17:37 +0800, Gavin Shan wrote:
> The EOI handler of MSI/MSI-X interrupts for P8 (PHB3) need additional
> steps to handle the P/Q bits in IVE before EOIing the corresponding
> interrupt. The patch changes the EOI handler to cover that.

 .../...

>  static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
>  {
>  	unsigned int count;
> @@ -667,6 +681,8 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
>  	}
>  
>  	phb->msi_setup = pnv_pci_ioda_msi_setup;
> +	if (phb->type == PNV_PHB_IODA2)
> +		phb->msi_eoi = pnv_pci_ioda_msi_eoi;

Ouch, another function pointer call in a hot path...

>  	phb->msi32_support = 1;
>  	pr_info("  Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
>  		count, phb->msi_base);
> diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
> index a11b5a6..ea6a93d 100644
> --- a/arch/powerpc/platforms/powernv/pci.c
> +++ b/arch/powerpc/platforms/powernv/pci.c
> @@ -115,6 +115,25 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
>  		irq_dispose_mapping(entry->irq);
>  	}
>  }
> +
> +int pnv_pci_msi_eoi(unsigned int hw_irq)
> +{
> +	struct pci_controller *hose, *tmp;
> +	struct pnv_phb *phb = NULL;
> +
> +	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
> +		phb = hose->private_data;
> +		if (hw_irq >= phb->msi_base &&
> +		    hw_irq < phb->msi_base + phb->msi_bmp.irq_count) {
> +			if (!phb->msi_eoi)
> +				return -EEXIST;
> +			return phb->msi_eoi(phb, hw_irq);
> +		}
> +	}
> +
> +	/* For LSI interrupts, we needn't do it */
> +	return 0;
> +}

And a list walk ... that's not right.

Also, you do it for all XICS interrupts, including the non-PCI ones, the
LSIs, etc... only to figure out that some might not be MSIs later in
the loop.

Why not instead look at changing the irq_chip for the MSIs ?

IE. When setting up the MSIs for IODA2, use a different irq_chip which
is a copy of the original one with a different ->eoi callback, which
does the original xics eoi and then the OPAL stuff ?

You might even be able to use something like container_of to get back
to the struct phb, no need to iterate them all.

Cheers,
Ben.
Gavin Shan - April 25, 2013, 8:08 a.m.
On Thu, Apr 25, 2013 at 06:49:40AM +1000, Benjamin Herrenschmidt wrote:
>On Wed, 2013-04-24 at 17:37 +0800, Gavin Shan wrote:
>> The EOI handler of MSI/MSI-X interrupts for P8 (PHB3) need additional
>> steps to handle the P/Q bits in IVE before EOIing the corresponding
>> interrupt. The patch changes the EOI handler to cover that.
>
> .../...
>
>>  static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
>>  {
>>  	unsigned int count;
>> @@ -667,6 +681,8 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
>>  	}
>>  
>>  	phb->msi_setup = pnv_pci_ioda_msi_setup;
>> +	if (phb->type == PNV_PHB_IODA2)
>> +		phb->msi_eoi = pnv_pci_ioda_msi_eoi;
>
>Ouch, another function pointer call in a hot path...
>

Yeah. I've removed it in next version (not send out yet) :-)

>>  	phb->msi32_support = 1;
>>  	pr_info("  Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
>>  		count, phb->msi_base);
>> diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
>> index a11b5a6..ea6a93d 100644
>> --- a/arch/powerpc/platforms/powernv/pci.c
>> +++ b/arch/powerpc/platforms/powernv/pci.c
>> @@ -115,6 +115,25 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
>>  		irq_dispose_mapping(entry->irq);
>>  	}
>>  }
>> +
>> +int pnv_pci_msi_eoi(unsigned int hw_irq)
>> +{
>> +	struct pci_controller *hose, *tmp;
>> +	struct pnv_phb *phb = NULL;
>> +
>> +	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
>> +		phb = hose->private_data;
>> +		if (hw_irq >= phb->msi_base &&
>> +		    hw_irq < phb->msi_base + phb->msi_bmp.irq_count) {
>> +			if (!phb->msi_eoi)
>> +				return -EEXIST;
>> +			return phb->msi_eoi(phb, hw_irq);
>> +		}
>> +	}
>> +
>> +	/* For LSI interrupts, we needn't do it */
>> +	return 0;
>> +}
>
>And a list walk ... that's not right.
>
>Also, you do it for all XICS interrupts, including the non-PCI ones, the
>LSIs, etc... only to figure out that some might not be MSIs later in
>the loop.
>
>Why not instead look at changing the irq_chip for the MSIs ?
>
>IE. When setting up the MSIs for IODA2, use a different irq_chip which
>is a copy of the original one with a different ->eoi callback, which
>does the original xics eoi and then the OPAL stuff ?
>
>You might even be able to use something like container_of to get back
>to the struct phb, no need to iterate them all.
>

Thanks for the detailed explaining, Ben.

I found irq_data hasn't been fully utilized until this moment. I already
have code to start use that. Firstly, "irq_data" is set to the PHB OPAL ID
or invalid value (0xffs) during mapping stage (there, we call irq_set_chip_data()
to trace the PHB OPAL ID or invalid value). Before EOIing the interrupt, we
will check "irq_data" and do special handling on P/Q bits if it has valid value.
With it, the "hot" path should be fast enough and the function pointer (mentioned
above) can be removed.

Thanks,
Gavin
Gavin Shan - April 25, 2013, 8:13 a.m.
On Thu, Apr 25, 2013 at 04:08:37PM +0800, Gavin Shan wrote:
>On Thu, Apr 25, 2013 at 06:49:40AM +1000, Benjamin Herrenschmidt wrote:
>>On Wed, 2013-04-24 at 17:37 +0800, Gavin Shan wrote:
>>> The EOI handler of MSI/MSI-X interrupts for P8 (PHB3) need additional
>>> steps to handle the P/Q bits in IVE before EOIing the corresponding
>>> interrupt. The patch changes the EOI handler to cover that.
>>
>> .../...
>>

.../...

>>> diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
>>> index a11b5a6..ea6a93d 100644
>>> --- a/arch/powerpc/platforms/powernv/pci.c
>>> +++ b/arch/powerpc/platforms/powernv/pci.c
>>> @@ -115,6 +115,25 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
>>>  		irq_dispose_mapping(entry->irq);
>>>  	}
>>>  }
>>> +
>>> +int pnv_pci_msi_eoi(unsigned int hw_irq)
>>> +{
>>> +	struct pci_controller *hose, *tmp;
>>> +	struct pnv_phb *phb = NULL;
>>> +
>>> +	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
>>> +		phb = hose->private_data;
>>> +		if (hw_irq >= phb->msi_base &&
>>> +		    hw_irq < phb->msi_base + phb->msi_bmp.irq_count) {
>>> +			if (!phb->msi_eoi)
>>> +				return -EEXIST;
>>> +			return phb->msi_eoi(phb, hw_irq);
>>> +		}
>>> +	}
>>> +
>>> +	/* For LSI interrupts, we needn't do it */
>>> +	return 0;
>>> +}
>>
>>And a list walk ... that's not right.
>>
>>Also, you do it for all XICS interrupts, including the non-PCI ones, the
>>LSIs, etc... only to figure out that some might not be MSIs later in
>>the loop.
>>
>>Why not instead look at changing the irq_chip for the MSIs ?
>>
>>IE. When setting up the MSIs for IODA2, use a different irq_chip which
>>is a copy of the original one with a different ->eoi callback, which
>>does the original xics eoi and then the OPAL stuff ?
>>
>>You might even be able to use something like container_of to get back
>>to the struct phb, no need to iterate them all.
>>
>
>Thanks for the detailed explaining, Ben.
>
>I found irq_data hasn't been fully utilized until this moment. I already
>have code to start use that. Firstly, "irq_data" is set to the PHB OPAL ID
>or invalid value (0xffs) during mapping stage (there, we call irq_set_chip_data()
>to trace the PHB OPAL ID or invalid value). Before EOIing the interrupt, we
>will check "irq_data" and do special handling on P/Q bits if it has valid value.
>With it, the "hot" path should be fast enough and the function pointer (mentioned
>above) can be removed.
>

It should be "chip_data" (not "irq_data"). Hopefully, you haven't
get time to see the reply. Otherwise, it would a bit confused ;-)

Thanks,
Gavin
Benjamin Herrenschmidt - April 25, 2013, 8:47 a.m.
On Thu, 2013-04-25 at 16:13 +0800, Gavin Shan wrote:
> It should be "chip_data" (not "irq_data"). Hopefully, you haven't
> get time to see the reply. Otherwise, it would a bit confused ;-)

Doesn't ics-opal already use chip_data ?

I was thinking just duplicating the irq_chip (including chip_data) so it
can be used by ics-opal just fine for all calls, just then overriding
the eoi callback and using container_of to get to the PHB.

Any reason that wouldn't work ?

Cheers,
Ben.
Gavin Shan - April 25, 2013, 11:58 a.m.
On Thu, Apr 25, 2013 at 06:47:58PM +1000, Benjamin Herrenschmidt wrote:
>On Thu, 2013-04-25 at 16:13 +0800, Gavin Shan wrote:
>> It should be "chip_data" (not "irq_data"). Hopefully, you haven't
>> get time to see the reply. Otherwise, it would a bit confused ;-)
>
>Doesn't ics-opal already use chip_data ?
>

Yeah, Ben. that have been used now. So we can't use it for other purposes :-)

>I was thinking just duplicating the irq_chip (including chip_data) so it
>can be used by ics-opal just fine for all calls, just then overriding
>the eoi callback and using container_of to get to the PHB.
>
>Any reason that wouldn't work ?
>

It should work and I had the code (with your idea implemented) and verified
that on simulator. I'll send next version (together with the changes on f/w)
for review after it works correctly on real hardware box.

(I hope it can be done as early as possible to catch 3.10 merge window).

Thanks,
Gavin

Patch

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 0af7ba0..93dad52 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -117,6 +117,7 @@  extern int opal_enter_rtas(struct rtas_args *args,
 #define OPAL_SET_SLOT_LED_STATUS		55
 #define OPAL_GET_EPOW_STATUS			56
 #define OPAL_SET_SYSTEM_ATTENTION_LED		57
+#define OPAL_PCI_MSI_EOI			63
 
 #ifndef __ASSEMBLY__
 
@@ -505,6 +506,7 @@  int64_t opal_pci_get_xive_reissue(uint64_t phb_id, uint32_t xive_number,
 				  uint8_t *p_bit, uint8_t *q_bit);
 int64_t opal_pci_set_xive_reissue(uint64_t phb_id, uint32_t xive_number,
 				  uint8_t p_bit, uint8_t q_bit);
+int64_t opal_pci_msi_eoi(uint64_t phb_id, uint32_t ive_number);
 int64_t opal_pci_set_xive_pe(uint64_t phb_id, uint32_t pe_number,
 			     uint32_t xive_num);
 int64_t opal_get_xive_source(uint64_t phb_id, uint32_t xive_num,
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index 4ae9a09..c4b364b 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -72,6 +72,9 @@  extern int ics_opal_init(void);
 static inline int ics_opal_init(void) { return -ENODEV; }
 #endif
 
+/* Extra EOI handler for PHB3 */
+extern int pnv_pci_msi_eoi(unsigned int hw_irq);
+
 /* ICS instance, hooked up to chip_data of an irq */
 struct ics {
 	struct list_head link;
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 3bb07e5..6fabe92 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -107,3 +107,4 @@  OPAL_CALL(opal_pci_mask_pe_error,		OPAL_PCI_MASK_PE_ERROR);
 OPAL_CALL(opal_set_slot_led_status,		OPAL_SET_SLOT_LED_STATUS);
 OPAL_CALL(opal_get_epow_status,			OPAL_GET_EPOW_STATUS);
 OPAL_CALL(opal_set_system_attention_led,	OPAL_SET_SYSTEM_ATTENTION_LED);
+OPAL_CALL(opal_pci_msi_eoi,			OPAL_PCI_MSI_EOI);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 0c15870..32197af 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -646,6 +646,20 @@  static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
 	return 0;
 }
 
+static int pnv_pci_ioda_msi_eoi(struct pnv_phb *phb, unsigned int hw_irq)
+{
+	long rc;
+
+	rc = opal_pci_msi_eoi(phb->opal_id, hw_irq - phb->msi_base);
+	if (rc) {
+		pr_warning("%s: Failed to EOI IRQ#%d on PHB#%d, rc=%ld\n",
+			   __func__, hw_irq, phb->hose->global_number, rc);
+		return -EIO;
+	}
+
+	return 0;
+}
+
 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
 {
 	unsigned int count;
@@ -667,6 +681,8 @@  static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
 	}
 
 	phb->msi_setup = pnv_pci_ioda_msi_setup;
+	if (phb->type == PNV_PHB_IODA2)
+		phb->msi_eoi = pnv_pci_ioda_msi_eoi;
 	phb->msi32_support = 1;
 	pr_info("  Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
 		count, phb->msi_base);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index a11b5a6..ea6a93d 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -115,6 +115,25 @@  static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
 		irq_dispose_mapping(entry->irq);
 	}
 }
+
+int pnv_pci_msi_eoi(unsigned int hw_irq)
+{
+	struct pci_controller *hose, *tmp;
+	struct pnv_phb *phb = NULL;
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		phb = hose->private_data;
+		if (hw_irq >= phb->msi_base &&
+		    hw_irq < phb->msi_base + phb->msi_bmp.irq_count) {
+			if (!phb->msi_eoi)
+				return -EEXIST;
+			return phb->msi_eoi(phb, hw_irq);
+		}
+	}
+
+	/* For LSI interrupts, we needn't do it */
+	return 0;
+}
 #endif /* CONFIG_PCI_MSI */
 
 static void pnv_pci_dump_p7ioc_diag_data(struct pnv_phb *phb)
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index c048c29..c6690b3 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -81,6 +81,7 @@  struct pnv_phb {
 	int (*msi_setup)(struct pnv_phb *phb, struct pci_dev *dev,
 			 unsigned int hwirq, unsigned int is_64,
 			 struct msi_msg *msg);
+	int (*msi_eoi)(struct pnv_phb *phb, unsigned int hw_irq);
 	void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
 	void (*fixup_phb)(struct pci_controller *hose);
 	u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 48861d3..38dd2b1 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -89,6 +89,22 @@  static void icp_native_eoi(struct irq_data *d)
 	icp_native_set_xirr((xics_pop_cppr() << 24) | hw_irq);
 }
 
+static void icp_p8_native_eoi(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int ret;
+
+	/* Let firmware handle P/Q bits */
+	if (hw_irq != XICS_IPI) {
+		ret = pnv_pci_msi_eoi(hw_irq);
+		WARN_ON_ONCE(ret);
+	}
+
+	/* EOI on ICP */
+	iosync();
+	icp_native_set_xirr((xics_pop_cppr() << 24) | hw_irq);
+}
+
 static void icp_native_teardown_cpu(void)
 {
 	int cpu = smp_processor_id();
@@ -264,7 +280,7 @@  static int __init icp_native_init_one_node(struct device_node *np,
 	return 0;
 }
 
-static const struct icp_ops icp_native_ops = {
+static struct icp_ops icp_native_ops = {
 	.get_irq	= icp_native_get_irq,
 	.eoi		= icp_native_eoi,
 	.set_priority	= icp_native_set_cpu_priority,
@@ -296,6 +312,15 @@  int __init icp_native_init(void)
 	if (found == 0)
 		return -ENODEV;
 
+	/* Change the EOI handler for P8 */
+#ifdef CONFIG_POWERNV_MSI
+	np = of_find_compatible_node(NULL, NULL, "ibm,power8-xicp");
+	if (np) {
+		icp_native_ops.eoi = icp_p8_native_eoi;
+		of_node_put(np);
+	}
+#endif
+
 	icp_ops = &icp_native_ops;
 
 	return 0;