Patchwork powerpc: Reduce footprint of xics_ipi_struct

login
register
mail settings
Submitter Anton Blanchard
Date Jan. 12, 2010, 10:58 a.m.
Message ID <20100112105805.GJ12666@kryten>
Download mbox | patch
Permalink /patch/42706/
State Superseded
Delegated to: Benjamin Herrenschmidt
Headers show

Comments

Anton Blanchard - Jan. 12, 2010, 10:58 a.m.
Right now we allocate a cacheline sized NR_CPUS array for xics IPI
communication. Since irq_stat is now PowerPC specific and using
DECLARE_PER_CPU_SHARED_ALIGNED (which should mean remote writes to
this should not conflict with other per cpu data), we can put it in there.

On a kernel with NR_CPUS=1024, this saves quite a lot of memory:

   text    data     bss      dec         hex    filename
8767779 2944260 1505724 13217763         c9afe3 vmlinux.irq_cpustat
8767555 2813444 1505724 13086723         c7b003 vmlinux.xics

A saving of around 128kB.

Signed-off-by: Anton Blanchard <anton@samba.org>
---
Benjamin Herrenschmidt - Feb. 1, 2010, 5:18 a.m.
On Tue, 2010-01-12 at 21:58 +1100, Anton Blanchard wrote:

> Index: linux-cpumask/arch/powerpc/include/asm/hardirq.h
> ===================================================================
> --- linux-cpumask.orig/arch/powerpc/include/asm/hardirq.h	2010-01-12 12:36:47.174226189 +1100
> +++ linux-cpumask/arch/powerpc/include/asm/hardirq.h	2010-01-12 12:36:47.432976459 +1100
> @@ -6,6 +6,9 @@
>  
>  typedef struct {
>  	unsigned int __softirq_pending;
> +#if defined(CONFIG_XICS) && defined(CONFIG_SMP)
> +	unsigned long xics_ipi;
> +#endif
>  } ____cacheline_aligned irq_cpustat_t;

This is still a gross abuse of irq_cpustat_t ... Can't we do
a separate DECLARE_PER_CPU_SHARED_ALIGNED(unsigned long, xics_ipi)
inside xics.c instead ?

Cheers,
Ben.

>  DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
> Index: linux-cpumask/arch/powerpc/platforms/pseries/xics.c
> ===================================================================
> --- linux-cpumask.orig/arch/powerpc/platforms/pseries/xics.c	2010-01-12 12:36:46.905477650 +1100
> +++ linux-cpumask/arch/powerpc/platforms/pseries/xics.c	2010-01-12 12:40:54.782975198 +1100
> @@ -514,15 +514,12 @@ static void __init xics_init_host(void)
>  /*
>   * XICS only has a single IPI, so encode the messages per CPU
>   */
> -struct xics_ipi_struct {
> -        unsigned long value;
> -	} ____cacheline_aligned;
> -
> -static struct xics_ipi_struct xics_ipi_message[NR_CPUS] __cacheline_aligned;
>  
>  static inline void smp_xics_do_message(int cpu, int msg)
>  {
> -	set_bit(msg, &xics_ipi_message[cpu].value);
> +	unsigned long *tgt = &(per_cpu(irq_stat, cpu).xics_ipi);
> +
> +	set_bit(msg, tgt);
>  	mb();
>  	if (firmware_has_feature(FW_FEATURE_LPAR))
>  		lpar_qirr_info(cpu, IPI_PRIORITY);
> @@ -548,25 +545,23 @@ void smp_xics_message_pass(int target, i
>  
>  static irqreturn_t xics_ipi_dispatch(int cpu)
>  {
> +	unsigned long *tgt = &(per_cpu(irq_stat, cpu).xics_ipi);
> +
>  	WARN_ON(cpu_is_offline(cpu));
>  
>  	mb();	/* order mmio clearing qirr */
> -	while (xics_ipi_message[cpu].value) {
> -		if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION,
> -				       &xics_ipi_message[cpu].value)) {
> +	while (*tgt) {
> +		if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, tgt)) {
>  			smp_message_recv(PPC_MSG_CALL_FUNCTION);
>  		}
> -		if (test_and_clear_bit(PPC_MSG_RESCHEDULE,
> -				       &xics_ipi_message[cpu].value)) {
> +		if (test_and_clear_bit(PPC_MSG_RESCHEDULE, tgt)) {
>  			smp_message_recv(PPC_MSG_RESCHEDULE);
>  		}
> -		if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE,
> -				       &xics_ipi_message[cpu].value)) {
> +		if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE, tgt)) {
>  			smp_message_recv(PPC_MSG_CALL_FUNC_SINGLE);
>  		}
>  #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
> -		if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK,
> -				       &xics_ipi_message[cpu].value)) {
> +		if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, tgt)) {
>  			smp_message_recv(PPC_MSG_DEBUGGER_BREAK);
>  		}
>  #endif
Anton Blanchard - Feb. 1, 2010, 6:26 a.m.
Hi,

> > +#if defined(CONFIG_XICS) && defined(CONFIG_SMP)
> > +	unsigned long xics_ipi;
> > +#endif
> >  } ____cacheline_aligned irq_cpustat_t;
> 
> This is still a gross abuse of irq_cpustat_t ... Can't we do
> a separate DECLARE_PER_CPU_SHARED_ALIGNED(unsigned long, xics_ipi)
> inside xics.c instead ?

Yeah it was a bit tasteless :) Respinning...

Anton

Patch

Index: linux-cpumask/arch/powerpc/include/asm/hardirq.h
===================================================================
--- linux-cpumask.orig/arch/powerpc/include/asm/hardirq.h	2010-01-12 12:36:47.174226189 +1100
+++ linux-cpumask/arch/powerpc/include/asm/hardirq.h	2010-01-12 12:36:47.432976459 +1100
@@ -6,6 +6,9 @@ 
 
 typedef struct {
 	unsigned int __softirq_pending;
+#if defined(CONFIG_XICS) && defined(CONFIG_SMP)
+	unsigned long xics_ipi;
+#endif
 } ____cacheline_aligned irq_cpustat_t;
 
 DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
Index: linux-cpumask/arch/powerpc/platforms/pseries/xics.c
===================================================================
--- linux-cpumask.orig/arch/powerpc/platforms/pseries/xics.c	2010-01-12 12:36:46.905477650 +1100
+++ linux-cpumask/arch/powerpc/platforms/pseries/xics.c	2010-01-12 12:40:54.782975198 +1100
@@ -514,15 +514,12 @@  static void __init xics_init_host(void)
 /*
  * XICS only has a single IPI, so encode the messages per CPU
  */
-struct xics_ipi_struct {
-        unsigned long value;
-	} ____cacheline_aligned;
-
-static struct xics_ipi_struct xics_ipi_message[NR_CPUS] __cacheline_aligned;
 
 static inline void smp_xics_do_message(int cpu, int msg)
 {
-	set_bit(msg, &xics_ipi_message[cpu].value);
+	unsigned long *tgt = &(per_cpu(irq_stat, cpu).xics_ipi);
+
+	set_bit(msg, tgt);
 	mb();
 	if (firmware_has_feature(FW_FEATURE_LPAR))
 		lpar_qirr_info(cpu, IPI_PRIORITY);
@@ -548,25 +545,23 @@  void smp_xics_message_pass(int target, i
 
 static irqreturn_t xics_ipi_dispatch(int cpu)
 {
+	unsigned long *tgt = &(per_cpu(irq_stat, cpu).xics_ipi);
+
 	WARN_ON(cpu_is_offline(cpu));
 
 	mb();	/* order mmio clearing qirr */
-	while (xics_ipi_message[cpu].value) {
-		if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION,
-				       &xics_ipi_message[cpu].value)) {
+	while (*tgt) {
+		if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, tgt)) {
 			smp_message_recv(PPC_MSG_CALL_FUNCTION);
 		}
-		if (test_and_clear_bit(PPC_MSG_RESCHEDULE,
-				       &xics_ipi_message[cpu].value)) {
+		if (test_and_clear_bit(PPC_MSG_RESCHEDULE, tgt)) {
 			smp_message_recv(PPC_MSG_RESCHEDULE);
 		}
-		if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE,
-				       &xics_ipi_message[cpu].value)) {
+		if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE, tgt)) {
 			smp_message_recv(PPC_MSG_CALL_FUNC_SINGLE);
 		}
 #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
-		if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK,
-				       &xics_ipi_message[cpu].value)) {
+		if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, tgt)) {
 			smp_message_recv(PPC_MSG_DEBUGGER_BREAK);
 		}
 #endif