Patchwork [3/3] powerpc: Disable VPHN polling during a suspend operation

login
register
mail settings
Submitter Jesse Larrew
Date Oct. 29, 2010, 12:30 a.m.
Message ID <20101029002921.17835.88560.sendpatchset@manic8ball.ltc.austin.ibm.com>
Download mbox | patch
Permalink /patch/69519/
State Superseded
Headers show

Comments

Jesse Larrew - Oct. 29, 2010, 12:30 a.m.
From: Jesse Larrew <jlarrew@linux.vnet.ibm.com>

Tie the polling mechanism into the ibm,suspend-me rtas call to
stop/restart polling before/after a suspend, hibernate, migrate,
or checkpoint restart operation. This ensures that the system has a
chance to disable the polling if the partition is migrated to a system
that does not support VPHN (and vice versa).

Signed-off-by: Jesse Larrew <jlarrew@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/topology.h |    6 +++++-
 arch/powerpc/kernel/rtas.c          |   15 +++++++++++++++
 2 files changed, 20 insertions(+), 1 deletions(-)
Michael Ellerman - Nov. 3, 2010, 12:12 p.m.
On Thu, 2010-10-28 at 20:30 -0400, Jesse Larrew wrote: 
> From: Jesse Larrew <jlarrew@linux.vnet.ibm.com>

Hi Jesse, a few comments ...

> diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
> index afe4aaa..1747d27 100644
> --- a/arch/powerpc/include/asm/topology.h
> +++ b/arch/powerpc/include/asm/topology.h
> @@ -49,7 +49,7 @@ static inline int pcibus_to_node(struct pci_bus *bus)
>  {
>  	return -1;
>  }
> -#endif
> +#endif /* CONFIG_PCI */

Random change, though not a biggy I suppose.

> #define cpumask_of_pcibus(bus)	(pcibus_to_node(bus) == -1 ?		\
>  				 cpu_all_mask :				\
> @@ -93,6 +93,8 @@ extern void __init dump_numa_cpu_topology(void);
>  extern int sysfs_add_device_to_node(struct sys_device *dev, int nid);
>  extern void sysfs_remove_device_from_node(struct sys_device *dev, int nid);
>  
> +extern int __init init_topology_update(void);
> +extern int stop_topology_update(void);

init_topology_update() is called repeatedly from post_suspend_work() so
it seems like it should be called start_topology_update(). And it can't
be __init because the suspend code is called after boot. You should get
a section mismatch warning if they are enabled.

> #else
>  
>  static inline void dump_numa_cpu_topology(void) {}
> @@ -107,6 +109,8 @@ static inline void sysfs_remove_device_from_node(struct sys_device *dev,
>  {
>  }
>  
> +static int __init init_topology_update(void) {}
> +static int stop_topology_update(void) {}

That doesn't look like it compiles to me, you want static inline, and
they both return int.

> #endif /* CONFIG_NUMA */
>  
>  #include <asm-generic/topology.h>
> diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
> index 8fe8bc6..317ff2f 100644
> --- a/arch/powerpc/kernel/rtas.c
> +++ b/arch/powerpc/kernel/rtas.c
> @@ -41,6 +41,7 @@
>  #include <asm/atomic.h>
>  #include <asm/time.h>
>  #include <asm/mmu.h>
> +#include <asm/topology.h>
>  
>  struct rtas_t rtas = {
>  	.lock = __ARCH_SPIN_LOCK_UNLOCKED
> @@ -706,6 +707,18 @@ void rtas_os_term(char *str)
>  
>  static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE;
>  #ifdef CONFIG_PPC_PSERIES
> +static void pre_suspend_work(void)
> +{
> +	stop_topology_update();
> +	return;
> +}
> +
> +static void post_suspend_work(void)
> +{
> +	init_topology_update();
> +	return;
> +}

I'm not sure if it's worth splitting these out into "generic"
callbacks ..

> static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_when_done)
>  {
>  	u16 slb_size = mmu_slb_size;
> @@ -713,6 +726,7 @@ static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_w
>  	int cpu;
>  
>  	slb_set_size(SLB_MIN_SIZE);
> +	pre_suspend_work();
>  	printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", smp_processor_id());
>  
>  	while (rc == H_MULTI_THREADS_ACTIVE && !atomic_read(&data->done) &&

And isn't there an error case here where you're not re-enabling the
polling? See eg. the slb_set_size() call.

> @@ -728,6 +742,7 @@ static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_w
>  		rc = atomic_read(&data->error);
>  
>  	atomic_set(&data->error, rc);
> +	post_suspend_work();
>  
>  	if (wake_when_done) {
>  		atomic_set(&data->done, 1);

cheers
Jesse Larrew - Nov. 5, 2010, 8:33 p.m.
On 11/03/2010 07:12 AM, Michael Ellerman wrote:
> On Thu, 2010-10-28 at 20:30 -0400, Jesse Larrew wrote: 
>> From: Jesse Larrew <jlarrew@linux.vnet.ibm.com>
> 
> Hi Jesse, a few comments ...
> 
>> diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
>> index afe4aaa..1747d27 100644
>> --- a/arch/powerpc/include/asm/topology.h
>> +++ b/arch/powerpc/include/asm/topology.h
>> @@ -49,7 +49,7 @@ static inline int pcibus_to_node(struct pci_bus *bus)
>>  {
>>  	return -1;
>>  }
>> -#endif
>> +#endif /* CONFIG_PCI */
> 
> Random change, though not a biggy I suppose.
> 

That was a change that made the header easier to read, but that change should probably be submitted separately.

>> #define cpumask_of_pcibus(bus)	(pcibus_to_node(bus) == -1 ?		\
>>  				 cpu_all_mask :				\
>> @@ -93,6 +93,8 @@ extern void __init dump_numa_cpu_topology(void);
>>  extern int sysfs_add_device_to_node(struct sys_device *dev, int nid);
>>  extern void sysfs_remove_device_from_node(struct sys_device *dev, int nid);
>>  
>> +extern int __init init_topology_update(void);
>> +extern int stop_topology_update(void);
> 
> init_topology_update() is called repeatedly from post_suspend_work() so
> it seems like it should be called start_topology_update(). And it can't
> be __init because the suspend code is called after boot. You should get
> a section mismatch warning if they are enabled.
> 

Agreed. My implementation was based on a similar feature for System Z in arch/s390/kernel/topology.c, and I had simply carried over some of their naming conventions. start_topology_update() is a better name.

>> #else
>>  
>>  static inline void dump_numa_cpu_topology(void) {}
>> @@ -107,6 +109,8 @@ static inline void sysfs_remove_device_from_node(struct sys_device *dev,
>>  {
>>  }
>>  
>> +static int __init init_topology_update(void) {}
>> +static int stop_topology_update(void) {}
> 
> That doesn't look like it compiles to me, you want static inline, and
> they both return int.
>

Good catch! I hadn't tried to compile this with CONFIG_NUMA turned off.
 
>> #endif /* CONFIG_NUMA */
>>  
>>  #include <asm-generic/topology.h>
>> diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
>> index 8fe8bc6..317ff2f 100644
>> --- a/arch/powerpc/kernel/rtas.c
>> +++ b/arch/powerpc/kernel/rtas.c
>> @@ -41,6 +41,7 @@
>>  #include <asm/atomic.h>
>>  #include <asm/time.h>
>>  #include <asm/mmu.h>
>> +#include <asm/topology.h>
>>  
>>  struct rtas_t rtas = {
>>  	.lock = __ARCH_SPIN_LOCK_UNLOCKED
>> @@ -706,6 +707,18 @@ void rtas_os_term(char *str)
>>  
>>  static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE;
>>  #ifdef CONFIG_PPC_PSERIES
>> +static void pre_suspend_work(void)
>> +{
>> +	stop_topology_update();
>> +	return;
>> +}
>> +
>> +static void post_suspend_work(void)
>> +{
>> +	init_topology_update();
>> +	return;
>> +}
> 
> I'm not sure if it's worth splitting these out into "generic"
> callbacks ..
> 

I talked with Nathan Fontenot about this a couple weeks ago, and I think the plan going forward is to implement a notifier call chain that executes before/after a suspend operation to handle reinitializations like this. In the mean time, I'll just remove the pre_suspend_work() and post_suspend_work() functions in my next revision.

>> static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_when_done)
>>  {
>>  	u16 slb_size = mmu_slb_size;
>> @@ -713,6 +726,7 @@ static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_w
>>  	int cpu;
>>  
>>  	slb_set_size(SLB_MIN_SIZE);
>> +	pre_suspend_work();
>>  	printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", smp_processor_id());
>>  
>>  	while (rc == H_MULTI_THREADS_ACTIVE && !atomic_read(&data->done) &&
> 
> And isn't there an error case here where you're not re-enabling the
> polling? See eg. the slb_set_size() call.
> 

I'm not sure that I understand this point. I looked it over, and it looks to me that all possible code paths touch pre_suspend_work() and post_suspend_work() exactly once (even the paths that call slb_set_size()). Which path appears to be unhandled?

>> @@ -728,6 +742,7 @@ static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_w
>>  		rc = atomic_read(&data->error);
>>  
>>  	atomic_set(&data->error, rc);
>> +	post_suspend_work();
>>  
>>  	if (wake_when_done) {
>>  		atomic_set(&data->done, 1);
> 
> cheers
> 
>

Patch

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index afe4aaa..1747d27 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -49,7 +49,7 @@  static inline int pcibus_to_node(struct pci_bus *bus)
 {
 	return -1;
 }
-#endif
+#endif /* CONFIG_PCI */
 
 #define cpumask_of_pcibus(bus)	(pcibus_to_node(bus) == -1 ?		\
 				 cpu_all_mask :				\
@@ -93,6 +93,8 @@  extern void __init dump_numa_cpu_topology(void);
 extern int sysfs_add_device_to_node(struct sys_device *dev, int nid);
 extern void sysfs_remove_device_from_node(struct sys_device *dev, int nid);
 
+extern int __init init_topology_update(void);
+extern int stop_topology_update(void);
 #else
 
 static inline void dump_numa_cpu_topology(void) {}
@@ -107,6 +109,8 @@  static inline void sysfs_remove_device_from_node(struct sys_device *dev,
 {
 }
 
+static int __init init_topology_update(void) {}
+static int stop_topology_update(void) {}
 #endif /* CONFIG_NUMA */
 
 #include <asm-generic/topology.h>
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 8fe8bc6..317ff2f 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -41,6 +41,7 @@ 
 #include <asm/atomic.h>
 #include <asm/time.h>
 #include <asm/mmu.h>
+#include <asm/topology.h>
 
 struct rtas_t rtas = {
 	.lock = __ARCH_SPIN_LOCK_UNLOCKED
@@ -706,6 +707,18 @@  void rtas_os_term(char *str)
 
 static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE;
 #ifdef CONFIG_PPC_PSERIES
+static void pre_suspend_work(void)
+{
+	stop_topology_update();
+	return;
+}
+
+static void post_suspend_work(void)
+{
+	init_topology_update();
+	return;
+}
+
 static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_when_done)
 {
 	u16 slb_size = mmu_slb_size;
@@ -713,6 +726,7 @@  static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_w
 	int cpu;
 
 	slb_set_size(SLB_MIN_SIZE);
+	pre_suspend_work();
 	printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", smp_processor_id());
 
 	while (rc == H_MULTI_THREADS_ACTIVE && !atomic_read(&data->done) &&
@@ -728,6 +742,7 @@  static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_w
 		rc = atomic_read(&data->error);
 
 	atomic_set(&data->error, rc);
+	post_suspend_work();
 
 	if (wake_when_done) {
 		atomic_set(&data->done, 1);