Patchwork [v2,4/8] irqchip: armada-370-xp: implement MSI support

login
register
mail settings
Submitter Thomas Petazzoni
Date June 6, 2013, 4:41 p.m.
Message ID <1370536888-8871-5-git-send-email-thomas.petazzoni@free-electrons.com>
Download mbox | patch
Permalink /patch/249484/
State Not Applicable
Headers show

Comments

Thomas Petazzoni - June 6, 2013, 4:41 p.m.
This commit introduces the support for the MSI interrupts in the
armada-370-xp interrupt controller driver. It registers an MSI chip to
the MSI chip registry, which will be used by the Marvell PCIe host
controller driver.

The MSI interrupts use the 16 high doorbells, and are therefore
notified using IRQ1 of the main interrupt controller.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 drivers/irqchip/irq-armada-370-xp.c | 166 +++++++++++++++++++++++++++++++++++-
 1 file changed, 165 insertions(+), 1 deletion(-)
Grant Likely - June 11, 2013, 1:37 p.m.
On Thu,  6 Jun 2013 18:41:24 +0200, Thomas Petazzoni <thomas.petazzoni@free-electrons.com> wrote:
> This commit introduces the support for the MSI interrupts in the
> armada-370-xp interrupt controller driver. It registers an MSI chip to
> the MSI chip registry, which will be used by the Marvell PCIe host
> controller driver.
> 
> The MSI interrupts use the 16 high doorbells, and are therefore
> notified using IRQ1 of the main interrupt controller.
> 
> Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
> ---
>  drivers/irqchip/irq-armada-370-xp.c | 166 +++++++++++++++++++++++++++++++++++-
>  1 file changed, 165 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c
> index 26adc74..c7c904d 100644
> --- a/drivers/irqchip/irq-armada-370-xp.c
> +++ b/drivers/irqchip/irq-armada-370-xp.c
> @@ -22,6 +22,8 @@
>  #include <linux/of_address.h>
>  #include <linux/of_irq.h>
>  #include <linux/irqdomain.h>
> +#include <linux/slab.h>
> +#include <linux/msi.h>
>  #include <asm/mach/arch.h>
>  #include <asm/exception.h>
>  #include <asm/smp_plat.h>
> @@ -51,12 +53,22 @@
>  #define IPI_DOORBELL_START                      (0)
>  #define IPI_DOORBELL_END                        (8)
>  #define IPI_DOORBELL_MASK                       0xFF
> +#define PCI_MSI_DOORBELL_START                  (16)
> +#define PCI_MSI_DOORBELL_NR                     (16)
> +#define PCI_MSI_DOORBELL_END                    (32)
> +#define PCI_MSI_DOORBELL_MASK                   0xFFFF0000
>  
>  static DEFINE_RAW_SPINLOCK(irq_controller_lock);
>  
>  static void __iomem *per_cpu_int_base;
>  static void __iomem *main_int_base;
>  static struct irq_domain *armada_370_xp_mpic_domain;
> +#ifdef CONFIG_PCI_MSI
> +static struct irq_domain *armada_370_xp_msi_domain;
> +static DECLARE_BITMAP(msi_used, PCI_MSI_DOORBELL_NR);
> +static DEFINE_MUTEX(msi_used_lock);
> +static phys_addr_t msi_doorbell_addr;
> +#endif
>  
>  /*
>   * In SMP mode:
> @@ -87,6 +99,126 @@ static void armada_370_xp_irq_unmask(struct irq_data *d)
>  				ARMADA_370_XP_INT_CLEAR_MASK_OFFS);
>  }
>  
> +#ifdef CONFIG_PCI_MSI
> +static int armada_370_xp_alloc_msi(void)
> +{
> +	int hwirq;
> +
> +	mutex_lock(&msi_used_lock);
> +	hwirq = find_first_zero_bit(&msi_used, PCI_MSI_DOORBELL_NR);
> +	if (hwirq >= PCI_MSI_DOORBELL_NR)
> +		hwirq = -ENOSPC;
> +	else
> +		set_bit(hwirq, msi_used);
> +	mutex_unlock(&msi_used_lock);
> +
> +	return hwirq;
> +}
> +
> +static void armada_370_xp_free_msi(int hwirq)
> +{
> +	mutex_lock(&msi_used_lock);
> +	if (!test_bit(hwirq, msi_used))
> +		pr_err("trying to free unused MSI#%d\n", hwirq);
> +	else
> +		clear_bit(hwirq, msi_used);
> +       mutex_unlock(&msi_used_lock);
> +}
> +
> +static int armada_370_xp_setup_msi_irq(struct msi_chip *chip,
> +				       struct pci_dev *pdev,
> +				       struct msi_desc *desc)
> +{
> +	struct msi_msg msg;
> +	int hwirq, irq;
> +
> +	hwirq = armada_370_xp_alloc_msi();
> +	if (hwirq < 0)
> +		return hwirq;
> +
> +	irq = irq_create_mapping(armada_370_xp_msi_domain, hwirq);
> +	if (!irq) {
> +		armada_370_xp_free_msi(hwirq);
> +		return -EINVAL;
> +	}

This looks like something that the irq_domain should handle for you.
It would be good to have a variant of irq_create_mapping() that keeps
track of available hwirqs, allocates a free one, and maps it all with
one function call. I can see that being useful by other MSI interrupt
controllers and would eliminate needing to open-code it above.

take a look at the irqdomain/test branch on git://git.secretlab.ca/git/linux

I'm hoping to push that series into v3.11 and it simplifies the
irq_domain code quite a bit. It would be easy to build an
irq_create_mapping() variant on top of that. Take a look at
irq_create_direct_mapping() for inspiration (although that function does
it the other way around. It allocates a virq and uses that number for
the hwirq number)

> +static int armada_370_xp_msi_init(struct device_node *node)
> +{
> +	struct msi_chip *msi_chip;
> +	u32 reg;
> +
> +	msi_chip = kzalloc(sizeof(*msi_chip), GFP_KERNEL);

devm_kzalloc() so it gets freed on failure or unbind; although for this
device you may not care.

> +	if (!msi_chip)
> +		return -ENOMEM;
> +
> +	armada_370_xp_msi_domain =
> +		irq_domain_add_linear(NULL, PCI_MSI_DOORBELL_NR,
> +				      &armada_370_xp_msi_irq_ops, NULL);
> +	if (!armada_370_xp_msi_domain) {
> +		kfree(msi_chip);
> +		return -ENOMEM;
> +	}
> +
> +	msi_chip->of_node = node;
> +	msi_chip->setup_irq = armada_370_xp_setup_msi_irq;
> +	msi_chip->teardown_irq = armada_370_xp_teardown_msi_irq;
> +
> +	msi_chip_add(msi_chip);
> +
> +	reg = readl(per_cpu_int_base + ARMADA_370_XP_IN_DRBEL_MSK_OFFS)
> +		| PCI_MSI_DOORBELL_MASK;
> +
> +	writel(reg, per_cpu_int_base +
> +	       ARMADA_370_XP_IN_DRBEL_MSK_OFFS);
> +
> +	/* Unmask IPI interrupt */
> +	writel(1, per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS);
> +
> +	return 0;
> +}
> +#else
> +static inline int armada_370_xp_msi_init(struct device_node *node) { return 0; }
> +#endif
> +
>  #ifdef CONFIG_SMP
>  static int armada_xp_set_affinity(struct irq_data *d,
>  				  const struct cpumask *mask_val, bool force)
> @@ -214,12 +346,39 @@ armada_370_xp_handle_irq(struct pt_regs *regs)
>  		if (irqnr > 1022)
>  			break;
>  
> -		if (irqnr > 0) {
> +		if (irqnr > 1) {
>  			irqnr =	irq_find_mapping(armada_370_xp_mpic_domain,
>  					irqnr);
>  			handle_IRQ(irqnr, regs);
>  			continue;
>  		}
> +
> +#ifdef CONFIG_PCI_MSI
> +		/* MSI handling */
> +		if (irqnr == 1) {
> +			u32 msimask, msinr;
> +
> +			msimask = readl_relaxed(per_cpu_int_base +
> +						ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS)
> +				& PCI_MSI_DOORBELL_MASK;
> +
> +			writel(~PCI_MSI_DOORBELL_MASK, per_cpu_int_base +
> +			       ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS);
> +
> +			for (msinr = PCI_MSI_DOORBELL_START;
> +			     msinr < PCI_MSI_DOORBELL_END; msinr++) {
> +				int irq;
> +
> +				if (!(msimask & BIT(msinr)))
> +					continue;
> +
> +				irq = irq_find_mapping(armada_370_xp_msi_domain,
> +						       msinr - 16);
> +				handle_IRQ(irq, regs);
> +			}
> +		}
> +#endif
> +
>  #ifdef CONFIG_SMP
>  		/* IPI Handling */
>  		if (irqnr == 0) {
> @@ -269,6 +428,9 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node,
>  				   resource_size(&per_cpu_int_res));
>  	BUG_ON(!per_cpu_int_base);
>  
> +	msi_doorbell_addr = main_int_res.start +
> +		ARMADA_370_XP_SW_TRIG_INT_OFFS;
> +
>  	control = readl(main_int_base + ARMADA_370_XP_INT_CONTROL);
>  
>  	armada_370_xp_mpic_domain =
> @@ -292,6 +454,8 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node,
>  
>  #endif
>  
> +	armada_370_xp_msi_init(node);
> +
>  	set_handle_irq(armada_370_xp_handle_irq);
>  
>  	return 0;
> -- 
> 1.8.1.2
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Thierry Reding - June 12, 2013, 10:42 a.m.
On Thu, Jun 06, 2013 at 06:41:24PM +0200, Thomas Petazzoni wrote:
[...]
> @@ -292,6 +454,8 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node,
>  
>  #endif
>  
> +	armada_370_xp_msi_init(node);
> +

So I see that you don't have access to the original platform device
here, but you could use of_find_device_by_node() to obtain it and pass
that into armada_370_xp_msi_init() in order to set the msi_chip.dev
field. Or you could do the lookup in armada_370_xp_msi_init() if you
don't need it for anything else in armada_370_xp_mpic_of_init().

Doing the above will also allow you to use devm_kzalloc() as Grant
suggested in his reply.

Thierry
Thomas Petazzoni - June 18, 2013, 8:42 a.m.
Dear Grant Likely,

On Tue, 11 Jun 2013 14:37:45 +0100, Grant Likely wrote:

> > +static int armada_370_xp_setup_msi_irq(struct msi_chip *chip,
> > +				       struct pci_dev *pdev,
> > +				       struct msi_desc *desc)
> > +{
> > +	struct msi_msg msg;
> > +	int hwirq, irq;
> > +
> > +	hwirq = armada_370_xp_alloc_msi();
> > +	if (hwirq < 0)
> > +		return hwirq;
> > +
> > +	irq = irq_create_mapping(armada_370_xp_msi_domain, hwirq);
> > +	if (!irq) {
> > +		armada_370_xp_free_msi(hwirq);
> > +		return -EINVAL;
> > +	}
> 
> This looks like something that the irq_domain should handle for you.
> It would be good to have a variant of irq_create_mapping() that keeps
> track of available hwirqs, allocates a free one, and maps it all with
> one function call. I can see that being useful by other MSI interrupt
> controllers and would eliminate needing to open-code it above.
> 
> take a look at the irqdomain/test branch on git://git.secretlab.ca/git/linux
> 
> I'm hoping to push that series into v3.11 and it simplifies the
> irq_domain code quite a bit. It would be easy to build an
> irq_create_mapping() variant on top of that. Take a look at
> irq_create_direct_mapping() for inspiration (although that function does
> it the other way around. It allocates a virq and uses that number for
> the hwirq number)

Ok, I've implemented something according to your idea. Will be part of
the v3 of this series. For reference, the piece of code I've added in
irqdomain.c looks like:

 /**
+ * irq_alloc_mapping() - Allocate an irq for mapping
+ * @domain: domain to allocate the irq for or NULL for default domain
+ * @hwirq:  reference to the returned hwirq
+ *
+ * This routine are used for irq controllers which can choose the
+ * hardware interrupt number from a range [ 0 ; domain size ], such as
+ * is often the case with PCI MSI controllers. The function will
+ * returned the allocated hwirq number in the hwirq pointer, and the
+ * corresponding virq number as the return value.
+ */
+unsigned int irq_alloc_mapping(struct irq_domain *domain,
+                              irq_hw_number_t *out_hwirq)
+{
+       unsigned int virq;
+       irq_hw_number_t hwirq;
+
+       pr_debug("irq_alloc_mapping(0x%p)\n", domain);
+
+       if (domain == NULL)
+               domain = irq_default_domain;
+
+       for (hwirq = 0; hwirq < domain->hwirq_max; hwirq++)
+               if (!irq_find_mapping(domain, hwirq))
+                       break;
+
+       if (hwirq == domain->hwirq_max) {
+               pr_debug("-> no available hwirq found\n");
+               return 0;
+       }
+
+       virq = irq_alloc_desc_from(1, of_node_to_nid(domain->of_node));
+       if (virq <= 0) {
+               pr_debug("-> virq allocation failed\n");
+               return 0;
+       }
+
+       if (irq_domain_associate(domain, virq, hwirq)) {
+               irq_free_desc(virq);
+               return 0;
+       }
+
+       pr_debug("irq %lu on domain %s mapped to virtual irq %u\n",
+                hwirq, of_node_full_name(domain->of_node), virq);
+
+       *out_hwirq = hwirq;
+
+       return virq;
+}
+EXPORT_SYMBOL_GPL(irq_alloc_mapping);
+
+/**

> > +static int armada_370_xp_msi_init(struct device_node *node)
> > +{
> > +	struct msi_chip *msi_chip;
> > +	u32 reg;
> > +
> > +	msi_chip = kzalloc(sizeof(*msi_chip), GFP_KERNEL);
> 
> devm_kzalloc() so it gets freed on failure or unbind; although for this
> device you may not care.

Unfortunately, we don't have access to a platform_device by the time
the IRQ controller driver is initialized. Thierry Redding suggested to
use of_find_device_by_node(), but it returns NULL. I guess it's because
this IRQ controller driver is initialized much before the
platform_device structures are created.

Thomas
Thomas Petazzoni - June 18, 2013, 8:43 a.m.
Dear Thierry Reding,

On Wed, 12 Jun 2013 12:42:40 +0200, Thierry Reding wrote:
> On Thu, Jun 06, 2013 at 06:41:24PM +0200, Thomas Petazzoni wrote:
> [...]
> > @@ -292,6 +454,8 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node,
> >  
> >  #endif
> >  
> > +	armada_370_xp_msi_init(node);
> > +
> 
> So I see that you don't have access to the original platform device
> here, but you could use of_find_device_by_node() to obtain it and pass
> that into armada_370_xp_msi_init() in order to set the msi_chip.dev
> field. Or you could do the lookup in armada_370_xp_msi_init() if you
> don't need it for anything else in armada_370_xp_mpic_of_init().

As I replied to Grant, of_find_device_by_node() returns NULL, I believe
because the all IRQ controller driver initialization is done pretty
early, before the of_platform_populate() call is made, so there is no
platform_device associated with the IRQ controller node at the time the
armada_370_xp_mpic_of_init() function is called.

Do you see another approach, especially in relation to your comment on
PATCH 2/8 ?

Best regards,

Thomas
Grant Likely - June 18, 2013, 10:15 a.m.
On Tue, 18 Jun 2013 10:42:18 +0200, Thomas Petazzoni <thomas.petazzoni@free-electrons.com> wrote:
> Dear Grant Likely,
> 
> On Tue, 11 Jun 2013 14:37:45 +0100, Grant Likely wrote:
> 
> > > +static int armada_370_xp_setup_msi_irq(struct msi_chip *chip,
> > > +				       struct pci_dev *pdev,
> > > +				       struct msi_desc *desc)
> > > +{
> > > +	struct msi_msg msg;
> > > +	int hwirq, irq;
> > > +
> > > +	hwirq = armada_370_xp_alloc_msi();
> > > +	if (hwirq < 0)
> > > +		return hwirq;
> > > +
> > > +	irq = irq_create_mapping(armada_370_xp_msi_domain, hwirq);
> > > +	if (!irq) {
> > > +		armada_370_xp_free_msi(hwirq);
> > > +		return -EINVAL;
> > > +	}
> > 
> > This looks like something that the irq_domain should handle for you.
> > It would be good to have a variant of irq_create_mapping() that keeps
> > track of available hwirqs, allocates a free one, and maps it all with
> > one function call. I can see that being useful by other MSI interrupt
> > controllers and would eliminate needing to open-code it above.
> > 
> > take a look at the irqdomain/test branch on git://git.secretlab.ca/git/linux
> > 
> > I'm hoping to push that series into v3.11 and it simplifies the
> > irq_domain code quite a bit. It would be easy to build an
> > irq_create_mapping() variant on top of that. Take a look at
> > irq_create_direct_mapping() for inspiration (although that function does
> > it the other way around. It allocates a virq and uses that number for
> > the hwirq number)
> 
> Ok, I've implemented something according to your idea. Will be part of
> the v3 of this series. For reference, the piece of code I've added in
> irqdomain.c looks like:
> 
>  /**
> + * irq_alloc_mapping() - Allocate an irq for mapping
> + * @domain: domain to allocate the irq for or NULL for default domain
> + * @hwirq:  reference to the returned hwirq
> + *
> + * This routine are used for irq controllers which can choose the
> + * hardware interrupt number from a range [ 0 ; domain size ], such as
> + * is often the case with PCI MSI controllers. The function will
> + * returned the allocated hwirq number in the hwirq pointer, and the
> + * corresponding virq number as the return value.
> + */
> +unsigned int irq_alloc_mapping(struct irq_domain *domain,
> +                              irq_hw_number_t *out_hwirq)
> +{
> +       unsigned int virq;
> +       irq_hw_number_t hwirq;
> +
> +       pr_debug("irq_alloc_mapping(0x%p)\n", domain);
> +
> +       if (domain == NULL)
> +               domain = irq_default_domain;

Drop the above 2 lines. You absolutely must know what irq_domain you
want to operate on when calling this function. There is no situation
where the default domain is what should be used.

> +
> +       for (hwirq = 0; hwirq < domain->hwirq_max; hwirq++)
> +               if (!irq_find_mapping(domain, hwirq))
> +                       break;

Ugh. This will be slow on domains with a high hwirq_max and low
revmap_size since all the lookups will go out to the radix tree. Blech.
Not much to do about it though at this point without implementing some
kind of fast lookup path. To do it right would require iterating over
the radix tree looking for a hole.

> +
> +       if (hwirq == domain->hwirq_max) {
> +               pr_debug("-> no available hwirq found\n");
> +               return 0;
> +       }
> +
> +       virq = irq_alloc_desc_from(1, of_node_to_nid(domain->of_node));
> +       if (virq <= 0) {
> +               pr_debug("-> virq allocation failed\n");
> +               return 0;
> +       }
> +
> +       if (irq_domain_associate(domain, virq, hwirq)) {
> +               irq_free_desc(virq);
> +               return 0;
> +       }

Once a free hwirq has been found, it would be better to call
irq_create_mapping() directly rather than open coding it.

> +
> +       pr_debug("irq %lu on domain %s mapped to virtual irq %u\n",
> +                hwirq, of_node_full_name(domain->of_node), virq);
> +
> +       *out_hwirq = hwirq;
> +
> +       return virq;
> +}
> +EXPORT_SYMBOL_GPL(irq_alloc_mapping);
> +
> +/**
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Petazzoni - June 18, 2013, 10:36 a.m.
Dear Grant Likely,

On Tue, 18 Jun 2013 11:15:38 +0100, Grant Likely wrote:

> > +       if (domain == NULL)
> > +               domain = irq_default_domain;
> 
> Drop the above 2 lines. You absolutely must know what irq_domain you
> want to operate on when calling this function. There is no situation
> where the default domain is what should be used.

Sure, makes sense.

> > +
> > +       for (hwirq = 0; hwirq < domain->hwirq_max; hwirq++)
> > +               if (!irq_find_mapping(domain, hwirq))
> > +                       break;
> 
> Ugh. This will be slow on domains with a high hwirq_max and low
> revmap_size since all the lookups will go out to the radix tree. Blech.
> Not much to do about it though at this point without implementing some
> kind of fast lookup path. To do it right would require iterating over
> the radix tree looking for a hole.

So to conclude you would leave it as I proposed for now?

An option is to make irq_alloc_mapping() work only on linear domains,
where hwirq_max == revmap_size, and return an error otherwise.

> Once a free hwirq has been found, it would be better to call
> irq_create_mapping() directly rather than open coding it.

Thanks, will do.

Thomas
Thierry Reding - June 18, 2013, 11:26 a.m.
On Tue, Jun 18, 2013 at 10:43:38AM +0200, Thomas Petazzoni wrote:
> Dear Thierry Reding,
> 
> On Wed, 12 Jun 2013 12:42:40 +0200, Thierry Reding wrote:
> > On Thu, Jun 06, 2013 at 06:41:24PM +0200, Thomas Petazzoni wrote:
> > [...]
> > > @@ -292,6 +454,8 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node,
> > >  
> > >  #endif
> > >  
> > > +	armada_370_xp_msi_init(node);
> > > +
> > 
> > So I see that you don't have access to the original platform device
> > here, but you could use of_find_device_by_node() to obtain it and pass
> > that into armada_370_xp_msi_init() in order to set the msi_chip.dev
> > field. Or you could do the lookup in armada_370_xp_msi_init() if you
> > don't need it for anything else in armada_370_xp_mpic_of_init().
> 
> As I replied to Grant, of_find_device_by_node() returns NULL, I believe
> because the all IRQ controller driver initialization is done pretty
> early, before the of_platform_populate() call is made, so there is no
> platform_device associated with the IRQ controller node at the time the
> armada_370_xp_mpic_of_init() function is called.
> 
> Do you see another approach, especially in relation to your comment on
> PATCH 2/8 ?

Hmmm, that's too bad. The only other possibility that I see is that you
could associate the struct device at a later point when it becomes
available, but looking at the irqchip driver it doesn't look like you
get notification of that either. I suppose you could add a
platform_driver to it and hook things up in its .probe() callback, but
I'm not sure if that's in line with how the irqchip was designed. Adding
Grant Likely and Thomas Gleixner on Cc, maybe they have better advice.

Looking at other irqchip drivers I find it a bit odd to see how they're
structured, though. We've been preaching for years that drivers should
be well-encapsulated and told everybody it was bad to use globals and
they should be associating driver-specific data with each instance of a
device. Then comes along irqchip and all of a sudden it's okay to use
globals again. It feels a bit fragile.

Thierry
Thomas Petazzoni - June 18, 2013, 12:11 p.m.
Dear Thierry Reding,

On Tue, 18 Jun 2013 13:26:04 +0200, Thierry Reding wrote:

> > As I replied to Grant, of_find_device_by_node() returns NULL, I believe
> > because the all IRQ controller driver initialization is done pretty
> > early, before the of_platform_populate() call is made, so there is no
> > platform_device associated with the IRQ controller node at the time the
> > armada_370_xp_mpic_of_init() function is called.
> > 
> > Do you see another approach, especially in relation to your comment on
> > PATCH 2/8 ?
> 
> Hmmm, that's too bad. The only other possibility that I see is that you
> could associate the struct device at a later point when it becomes
> available, but looking at the irqchip driver it doesn't look like you
> get notification of that either. I suppose you could add a
> platform_driver to it and hook things up in its .probe() callback, but
> I'm not sure if that's in line with how the irqchip was designed. Adding
> Grant Likely and Thomas Gleixner on Cc, maybe they have better advice.

If we do hook the MSI stuff in a ->probe() callback, then we'd have a
dependency between the ->probe() of the PCIe driver and the ->probe()
of the IRQ controller driver. In order for the PCIe ->probe() to
succeed, it needs the MSI controller to be registered, which wouldn't
appear until the ->probe() of the IRQ controller driver gets called. A
typical case of platform device dependency where the two platform
devices don't have a bus -> child dependency. Could be handled by a
-EPROBE_DEFER trick, though.

> Looking at other irqchip drivers I find it a bit odd to see how they're
> structured, though. We've been preaching for years that drivers should
> be well-encapsulated and told everybody it was bad to use globals and
> they should be associating driver-specific data with each instance of a
> device. Then comes along irqchip and all of a sudden it's okay to use
> globals again. It feels a bit fragile.

Yes, I've seen this as well, and I'm thinking of doing some
improvements in this area if there's some interest. But I believe this
is fairly separate from the specific discussion of this patch set.

Best regards,

Thomas

Patch

diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c
index 26adc74..c7c904d 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -22,6 +22,8 @@ 
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/irqdomain.h>
+#include <linux/slab.h>
+#include <linux/msi.h>
 #include <asm/mach/arch.h>
 #include <asm/exception.h>
 #include <asm/smp_plat.h>
@@ -51,12 +53,22 @@ 
 #define IPI_DOORBELL_START                      (0)
 #define IPI_DOORBELL_END                        (8)
 #define IPI_DOORBELL_MASK                       0xFF
+#define PCI_MSI_DOORBELL_START                  (16)
+#define PCI_MSI_DOORBELL_NR                     (16)
+#define PCI_MSI_DOORBELL_END                    (32)
+#define PCI_MSI_DOORBELL_MASK                   0xFFFF0000
 
 static DEFINE_RAW_SPINLOCK(irq_controller_lock);
 
 static void __iomem *per_cpu_int_base;
 static void __iomem *main_int_base;
 static struct irq_domain *armada_370_xp_mpic_domain;
+#ifdef CONFIG_PCI_MSI
+static struct irq_domain *armada_370_xp_msi_domain;
+static DECLARE_BITMAP(msi_used, PCI_MSI_DOORBELL_NR);
+static DEFINE_MUTEX(msi_used_lock);
+static phys_addr_t msi_doorbell_addr;
+#endif
 
 /*
  * In SMP mode:
@@ -87,6 +99,126 @@  static void armada_370_xp_irq_unmask(struct irq_data *d)
 				ARMADA_370_XP_INT_CLEAR_MASK_OFFS);
 }
 
+#ifdef CONFIG_PCI_MSI
+static int armada_370_xp_alloc_msi(void)
+{
+	int hwirq;
+
+	mutex_lock(&msi_used_lock);
+	hwirq = find_first_zero_bit(&msi_used, PCI_MSI_DOORBELL_NR);
+	if (hwirq >= PCI_MSI_DOORBELL_NR)
+		hwirq = -ENOSPC;
+	else
+		set_bit(hwirq, msi_used);
+	mutex_unlock(&msi_used_lock);
+
+	return hwirq;
+}
+
+static void armada_370_xp_free_msi(int hwirq)
+{
+	mutex_lock(&msi_used_lock);
+	if (!test_bit(hwirq, msi_used))
+		pr_err("trying to free unused MSI#%d\n", hwirq);
+	else
+		clear_bit(hwirq, msi_used);
+       mutex_unlock(&msi_used_lock);
+}
+
+static int armada_370_xp_setup_msi_irq(struct msi_chip *chip,
+				       struct pci_dev *pdev,
+				       struct msi_desc *desc)
+{
+	struct msi_msg msg;
+	int hwirq, irq;
+
+	hwirq = armada_370_xp_alloc_msi();
+	if (hwirq < 0)
+		return hwirq;
+
+	irq = irq_create_mapping(armada_370_xp_msi_domain, hwirq);
+	if (!irq) {
+		armada_370_xp_free_msi(hwirq);
+		return -EINVAL;
+	}
+
+	irq_set_msi_desc(irq, desc);
+
+	msg.address_lo = msi_doorbell_addr;
+	msg.address_hi = 0;
+	msg.data = 0xf00 | (hwirq + 16);
+
+	write_msi_msg(irq, &msg);
+	return 0;
+}
+
+static void armada_370_xp_teardown_msi_irq(struct msi_chip *chip,
+					   unsigned int irq)
+{
+	struct irq_data *d = irq_get_irq_data(irq);
+	armada_370_xp_free_msi(d->hwirq);
+}
+
+static struct irq_chip armada_370_xp_msi_irq_chip = {
+	.name = "armada_370_xp_msi_irq",
+	.irq_enable = unmask_msi_irq,
+	.irq_disable = mask_msi_irq,
+	.irq_mask = mask_msi_irq,
+	.irq_unmask = unmask_msi_irq,
+};
+
+static int armada_370_xp_msi_map(struct irq_domain *domain, unsigned int virq,
+				 irq_hw_number_t hw)
+{
+	irq_set_chip_and_handler(virq, &armada_370_xp_msi_irq_chip,
+				 handle_simple_irq);
+	set_irq_flags(virq, IRQF_VALID);
+
+	return 0;
+}
+
+static const struct irq_domain_ops armada_370_xp_msi_irq_ops = {
+	.map = armada_370_xp_msi_map,
+};
+
+static int armada_370_xp_msi_init(struct device_node *node)
+{
+	struct msi_chip *msi_chip;
+	u32 reg;
+
+	msi_chip = kzalloc(sizeof(*msi_chip), GFP_KERNEL);
+	if (!msi_chip)
+		return -ENOMEM;
+
+	armada_370_xp_msi_domain =
+		irq_domain_add_linear(NULL, PCI_MSI_DOORBELL_NR,
+				      &armada_370_xp_msi_irq_ops, NULL);
+	if (!armada_370_xp_msi_domain) {
+		kfree(msi_chip);
+		return -ENOMEM;
+	}
+
+	msi_chip->of_node = node;
+	msi_chip->setup_irq = armada_370_xp_setup_msi_irq;
+	msi_chip->teardown_irq = armada_370_xp_teardown_msi_irq;
+
+	msi_chip_add(msi_chip);
+
+	reg = readl(per_cpu_int_base + ARMADA_370_XP_IN_DRBEL_MSK_OFFS)
+		| PCI_MSI_DOORBELL_MASK;
+
+	writel(reg, per_cpu_int_base +
+	       ARMADA_370_XP_IN_DRBEL_MSK_OFFS);
+
+	/* Unmask IPI interrupt */
+	writel(1, per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS);
+
+	return 0;
+}
+#else
+static inline int armada_370_xp_msi_init(struct device_node *node) { return 0; }
+#endif
+
 #ifdef CONFIG_SMP
 static int armada_xp_set_affinity(struct irq_data *d,
 				  const struct cpumask *mask_val, bool force)
@@ -214,12 +346,39 @@  armada_370_xp_handle_irq(struct pt_regs *regs)
 		if (irqnr > 1022)
 			break;
 
-		if (irqnr > 0) {
+		if (irqnr > 1) {
 			irqnr =	irq_find_mapping(armada_370_xp_mpic_domain,
 					irqnr);
 			handle_IRQ(irqnr, regs);
 			continue;
 		}
+
+#ifdef CONFIG_PCI_MSI
+		/* MSI handling */
+		if (irqnr == 1) {
+			u32 msimask, msinr;
+
+			msimask = readl_relaxed(per_cpu_int_base +
+						ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS)
+				& PCI_MSI_DOORBELL_MASK;
+
+			writel(~PCI_MSI_DOORBELL_MASK, per_cpu_int_base +
+			       ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS);
+
+			for (msinr = PCI_MSI_DOORBELL_START;
+			     msinr < PCI_MSI_DOORBELL_END; msinr++) {
+				int irq;
+
+				if (!(msimask & BIT(msinr)))
+					continue;
+
+				irq = irq_find_mapping(armada_370_xp_msi_domain,
+						       msinr - 16);
+				handle_IRQ(irq, regs);
+			}
+		}
+#endif
+
 #ifdef CONFIG_SMP
 		/* IPI Handling */
 		if (irqnr == 0) {
@@ -269,6 +428,9 @@  static int __init armada_370_xp_mpic_of_init(struct device_node *node,
 				   resource_size(&per_cpu_int_res));
 	BUG_ON(!per_cpu_int_base);
 
+	msi_doorbell_addr = main_int_res.start +
+		ARMADA_370_XP_SW_TRIG_INT_OFFS;
+
 	control = readl(main_int_base + ARMADA_370_XP_INT_CONTROL);
 
 	armada_370_xp_mpic_domain =
@@ -292,6 +454,8 @@  static int __init armada_370_xp_mpic_of_init(struct device_node *node,
 
 #endif
 
+	armada_370_xp_msi_init(node);
+
 	set_handle_irq(armada_370_xp_handle_irq);
 
 	return 0;