diff mbox

[6/6] powerpc: Enable sparse irq_descs on powerpc

Message ID 91cec5c64da4ca31a025fc7c45d9f1b93c8b98da.1255499081.git.michael@ellerman.id.au (mailing list archive)
State Accepted, archived
Commit cd015707176820b86d07b5dffdecfefdd539a497
Delegated to: Benjamin Herrenschmidt
Headers show

Commit Message

Michael Ellerman Oct. 14, 2009, 5:45 a.m. UTC
Defining CONFIG_SPARSE_IRQ enables generic code that gets rid of the
static irq_desc array, and replaces it with an array of pointers to
irq_descs.

It also allows node local allocation of irq_descs, however we
currently don't have the information available to do that, so we just
allocate them on all on node 0.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
---
 arch/powerpc/Kconfig            |   13 ++++++++++++
 arch/powerpc/include/asm/irq.h  |    3 ++
 arch/powerpc/kernel/irq.c       |   40 ++++++++++++++++++++++++++++++++------
 arch/powerpc/kernel/ppc_ksyms.c |    1 -
 arch/powerpc/kernel/setup_64.c  |    5 ----
 5 files changed, 49 insertions(+), 13 deletions(-)

Comments

Grant Likely Oct. 14, 2009, 6:44 p.m. UTC | #1
On Tue, Oct 13, 2009 at 11:45 PM, Michael Ellerman
<michael@ellerman.id.au> wrote:
> Defining CONFIG_SPARSE_IRQ enables generic code that gets rid of the
> static irq_desc array, and replaces it with an array of pointers to
> irq_descs.
>
> It also allows node local allocation of irq_descs, however we
> currently don't have the information available to do that, so we just
> allocate them on all on node 0.
>
> Signed-off-by: Michael Ellerman <michael@ellerman.id.au>

Why not make sparse IRQs manditory for all platforms?  Is there a
performance concern with doing so?  From a maintenance perspective,
I'd rather see IRQ descs manged in one way only to keep the code
simple.

Cheers,
g.

> ---
>  arch/powerpc/Kconfig            |   13 ++++++++++++
>  arch/powerpc/include/asm/irq.h  |    3 ++
>  arch/powerpc/kernel/irq.c       |   40 ++++++++++++++++++++++++++++++++------
>  arch/powerpc/kernel/ppc_ksyms.c |    1 -
>  arch/powerpc/kernel/setup_64.c  |    5 ----
>  5 files changed, 49 insertions(+), 13 deletions(-)
>
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 2230e75..825d889 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -388,6 +388,19 @@ config IRQ_ALL_CPUS
>          CPU.  Generally saying Y is safe, although some problems have been
>          reported with SMP Power Macintoshes with this option enabled.
>
> +config SPARSE_IRQ
> +       bool "Support sparse irq numbering"
> +       default y
> +       help
> +         This enables support for sparse irqs. This is useful for distro
> +         kernels that want to define a high CONFIG_NR_CPUS value but still
> +         want to have low kernel memory footprint on smaller machines.
> +
> +         ( Sparse IRQs can also be beneficial on NUMA boxes, as they spread
> +           out the irq_desc[] array in a more NUMA-friendly way. )
> +
> +         If you don't know what to do here, say Y.
> +
>  config NUMA
>        bool "NUMA support"
>        depends on PPC64
> diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
> index 03dc28c..c85a32f 100644
> --- a/arch/powerpc/include/asm/irq.h
> +++ b/arch/powerpc/include/asm/irq.h
> @@ -38,6 +38,9 @@ extern atomic_t ppc_n_lost_interrupts;
>  /* Number of irqs reserved for the legacy controller */
>  #define NUM_ISA_INTERRUPTS     16
>
> +/* Same thing, used by the generic IRQ code */
> +#define NR_IRQS_LEGACY         NUM_ISA_INTERRUPTS
> +
>  /* This type is the placeholder for a hardware interrupt number. It has to
>  * be big enough to enclose whatever representation is used by a given
>  * platform.
> diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
> index 63e27d5..eba5392 100644
> --- a/arch/powerpc/kernel/irq.c
> +++ b/arch/powerpc/kernel/irq.c
> @@ -85,7 +85,10 @@ extern int tau_interrupts(int);
>  #endif /* CONFIG_PPC32 */
>
>  #ifdef CONFIG_PPC64
> +
> +#ifndef CONFIG_SPARSE_IRQ
>  EXPORT_SYMBOL(irq_desc);
> +#endif
>
>  int distribute_irqs = 1;
>
> @@ -613,8 +616,16 @@ void irq_set_virq_count(unsigned int count)
>  static int irq_setup_virq(struct irq_host *host, unsigned int virq,
>                            irq_hw_number_t hwirq)
>  {
> +       struct irq_desc *desc;
> +
> +       desc = irq_to_desc_alloc_node(virq, 0);
> +       if (!desc) {
> +               pr_debug("irq: -> allocating desc failed\n");
> +               goto error;
> +       }
> +
>        /* Clear IRQ_NOREQUEST flag */
> -       irq_to_desc(virq)->status &= ~IRQ_NOREQUEST;
> +       desc->status &= ~IRQ_NOREQUEST;
>
>        /* map it */
>        smp_wmb();
> @@ -623,11 +634,14 @@ static int irq_setup_virq(struct irq_host *host, unsigned int virq,
>
>        if (host->ops->map(host, virq, hwirq)) {
>                pr_debug("irq: -> mapping failed, freeing\n");
> -               irq_free_virt(virq, 1);
> -               return -1;
> +               goto error;
>        }
>
>        return 0;
> +
> +error:
> +       irq_free_virt(virq, 1);
> +       return -1;
>  }
>
>  unsigned int irq_create_direct_mapping(struct irq_host *host)
> @@ -1008,12 +1022,24 @@ void irq_free_virt(unsigned int virq, unsigned int count)
>        spin_unlock_irqrestore(&irq_big_lock, flags);
>  }
>
> -void irq_early_init(void)
> +int arch_early_irq_init(void)
>  {
> -       unsigned int i;
> +       struct irq_desc *desc;
> +       int i;
>
> -       for (i = 0; i < NR_IRQS; i++)
> -               irq_to_desc(i)->status |= IRQ_NOREQUEST;
> +       for (i = 0; i < NR_IRQS; i++) {
> +               desc = irq_to_desc(i);
> +               if (desc)
> +                       desc->status |= IRQ_NOREQUEST;
> +       }
> +
> +       return 0;
> +}
> +
> +int arch_init_chip_data(struct irq_desc *desc, int node)
> +{
> +       desc->status |= IRQ_NOREQUEST;
> +       return 0;
>  }
>
>  /* We need to create the radix trees late */
> diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
> index c8b27bb..07115d6 100644
> --- a/arch/powerpc/kernel/ppc_ksyms.c
> +++ b/arch/powerpc/kernel/ppc_ksyms.c
> @@ -162,7 +162,6 @@ EXPORT_SYMBOL(screen_info);
>
>  #ifdef CONFIG_PPC32
>  EXPORT_SYMBOL(timer_interrupt);
> -EXPORT_SYMBOL(irq_desc);
>  EXPORT_SYMBOL(tb_ticks_per_jiffy);
>  EXPORT_SYMBOL(cacheable_memcpy);
>  EXPORT_SYMBOL(cacheable_memzero);
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index 797ea95..8e5ec92 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -357,11 +357,6 @@ void __init setup_system(void)
>         */
>        initialize_cache_info();
>
> -       /*
> -        * Initialize irq remapping subsystem
> -        */
> -       irq_early_init();
> -
>  #ifdef CONFIG_PPC_RTAS
>        /*
>         * Initialize RTAS if available
> --
> 1.6.2.1
>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
>
Michael Ellerman Oct. 14, 2009, 11:51 p.m. UTC | #2
On Wed, 2009-10-14 at 12:44 -0600, Grant Likely wrote:
> On Tue, Oct 13, 2009 at 11:45 PM, Michael Ellerman
> <michael@ellerman.id.au> wrote:
> > Defining CONFIG_SPARSE_IRQ enables generic code that gets rid of the
> > static irq_desc array, and replaces it with an array of pointers to
> > irq_descs.
> >
> > It also allows node local allocation of irq_descs, however we
> > currently don't have the information available to do that, so we just
> > allocate them on all on node 0.
> >
> > Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
> 
> Why not make sparse IRQs manditory for all platforms?  Is there a
> performance concern with doing so?  From a maintenance perspective,
> I'd rather see IRQ descs manged in one way only to keep the code
> simple.

I agree on the maintenance angle. My thinking was we'd run with it
optional but default y for a release or two, and if no one complains we
can make it mandatory.

It does make some code paths bigger, and looking up an irq_desc is going
to take slightly more cycles. I don't think it's a big issue, but I
thought we should try it for a while before making it mandatory. The
code has only been tested on x86 and sh as far as I know.

cheers

ps. thanks for the review
Grant Likely Oct. 15, 2009, 12:33 a.m. UTC | #3
On Wed, Oct 14, 2009 at 5:51 PM, Michael Ellerman
<michael@ellerman.id.au> wrote:
> On Wed, 2009-10-14 at 12:44 -0600, Grant Likely wrote:
>> Why not make sparse IRQs manditory for all platforms?  Is there a
>> performance concern with doing so?  From a maintenance perspective,
>> I'd rather see IRQ descs manged in one way only to keep the code
>> simple.
>
> I agree on the maintenance angle. My thinking was we'd run with it
> optional but default y for a release or two, and if no one complains we
> can make it mandatory.
>
> It does make some code paths bigger, and looking up an irq_desc is going
> to take slightly more cycles. I don't think it's a big issue, but I
> thought we should try it for a while before making it mandatory. The
> code has only been tested on x86 and sh as far as I know.

No guts, no glory.  I say throw it into linux-next to give it some
time before the next merge window.  I don't think you'll get any
better results by having it optional for a few releases (in fact, I
suspect that people who do have problems will just end up turning it
off and waiting for someone else to report/fix the problems).  If this
is the direction IRQ handling is going, then just make the change and
force any bugs to be dealt with before the next release.

> ps. thanks for the review

You're welcome.

g.
diff mbox

Patch

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2230e75..825d889 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -388,6 +388,19 @@  config IRQ_ALL_CPUS
 	  CPU.  Generally saying Y is safe, although some problems have been
 	  reported with SMP Power Macintoshes with this option enabled.
 
+config SPARSE_IRQ
+	bool "Support sparse irq numbering"
+	default y
+	help
+	  This enables support for sparse irqs. This is useful for distro
+	  kernels that want to define a high CONFIG_NR_CPUS value but still
+	  want to have low kernel memory footprint on smaller machines.
+
+	  ( Sparse IRQs can also be beneficial on NUMA boxes, as they spread
+	    out the irq_desc[] array in a more NUMA-friendly way. )
+
+	  If you don't know what to do here, say Y.
+
 config NUMA
 	bool "NUMA support"
 	depends on PPC64
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 03dc28c..c85a32f 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -38,6 +38,9 @@  extern atomic_t ppc_n_lost_interrupts;
 /* Number of irqs reserved for the legacy controller */
 #define NUM_ISA_INTERRUPTS	16
 
+/* Same thing, used by the generic IRQ code */
+#define NR_IRQS_LEGACY		NUM_ISA_INTERRUPTS
+
 /* This type is the placeholder for a hardware interrupt number. It has to
  * be big enough to enclose whatever representation is used by a given
  * platform.
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 63e27d5..eba5392 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -85,7 +85,10 @@  extern int tau_interrupts(int);
 #endif /* CONFIG_PPC32 */
 
 #ifdef CONFIG_PPC64
+
+#ifndef CONFIG_SPARSE_IRQ
 EXPORT_SYMBOL(irq_desc);
+#endif
 
 int distribute_irqs = 1;
 
@@ -613,8 +616,16 @@  void irq_set_virq_count(unsigned int count)
 static int irq_setup_virq(struct irq_host *host, unsigned int virq,
 			    irq_hw_number_t hwirq)
 {
+	struct irq_desc *desc;
+
+	desc = irq_to_desc_alloc_node(virq, 0);
+	if (!desc) {
+		pr_debug("irq: -> allocating desc failed\n");
+		goto error;
+	}
+
 	/* Clear IRQ_NOREQUEST flag */
-	irq_to_desc(virq)->status &= ~IRQ_NOREQUEST;
+	desc->status &= ~IRQ_NOREQUEST;
 
 	/* map it */
 	smp_wmb();
@@ -623,11 +634,14 @@  static int irq_setup_virq(struct irq_host *host, unsigned int virq,
 
 	if (host->ops->map(host, virq, hwirq)) {
 		pr_debug("irq: -> mapping failed, freeing\n");
-		irq_free_virt(virq, 1);
-		return -1;
+		goto error;
 	}
 
 	return 0;
+
+error:
+	irq_free_virt(virq, 1);
+	return -1;
 }
 
 unsigned int irq_create_direct_mapping(struct irq_host *host)
@@ -1008,12 +1022,24 @@  void irq_free_virt(unsigned int virq, unsigned int count)
 	spin_unlock_irqrestore(&irq_big_lock, flags);
 }
 
-void irq_early_init(void)
+int arch_early_irq_init(void)
 {
-	unsigned int i;
+	struct irq_desc *desc;
+	int i;
 
-	for (i = 0; i < NR_IRQS; i++)
-		irq_to_desc(i)->status |= IRQ_NOREQUEST;
+	for (i = 0; i < NR_IRQS; i++) {
+		desc = irq_to_desc(i);
+		if (desc)
+			desc->status |= IRQ_NOREQUEST;
+	}
+
+	return 0;
+}
+
+int arch_init_chip_data(struct irq_desc *desc, int node)
+{
+	desc->status |= IRQ_NOREQUEST;
+	return 0;
 }
 
 /* We need to create the radix trees late */
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index c8b27bb..07115d6 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -162,7 +162,6 @@  EXPORT_SYMBOL(screen_info);
 
 #ifdef CONFIG_PPC32
 EXPORT_SYMBOL(timer_interrupt);
-EXPORT_SYMBOL(irq_desc);
 EXPORT_SYMBOL(tb_ticks_per_jiffy);
 EXPORT_SYMBOL(cacheable_memcpy);
 EXPORT_SYMBOL(cacheable_memzero);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 797ea95..8e5ec92 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -357,11 +357,6 @@  void __init setup_system(void)
 	 */
 	initialize_cache_info();
 
-	/*
-	 * Initialize irq remapping subsystem
-	 */
-	irq_early_init();
-
 #ifdef CONFIG_PPC_RTAS
 	/*
 	 * Initialize RTAS if available