diff mbox

[RFC] powerpc: add support for new hcall H_BEST_ENERGY

Message ID 20100303181822.GH5439@dirshya.in.ibm.com (mailing list archive)
State Changes Requested
Delegated to: Benjamin Herrenschmidt
Headers show

Commit Message

Vaidyanathan Srinivasan March 3, 2010, 6:18 p.m. UTC
Hi,

A new hypervisor call H_BEST_ENERGY is supported in IBM pseries ppc64
platform in order to exports platform energy management capabilities
to user space administrative tools.

This hypervisor calls provides hints or suggested list of cpus to
activate or deactivate for optimized energy management.  The following
patch add a new pseries specific device driver that will export such
platform energy management related information over sysfs interface
for use by user space administrative tools.

The device driver exports the following sysfs files:

/sys/device/system/cpu/pseries_(de)activate_hint_list and
/sys/device/system/cpu/cpuN/pseries_(de)activate_hint.

Typical usage:

# cat /sys/devices/system/cpu/pseries_activate_hint_list
16,20,24
# cat /sys/devices/system/cpu/pseries_deactivate_hint_list
0,4,8,12

Comma separated list of recommended cpu numbers to activate or
deactivate.

cat /sys/device/system/cpu/cpuN/pseries_(de)activate_hint
will provide a single integer value returned by the platform
for that cpu.

Please let me know your comments and suggestions.

Thanks,
Vaidy
---
        powerpc: add support for new hcall H_BEST_ENERGY

	Create sysfs interface to export data from H_BEST_ENERGY hcall
	that can be used by administrative tools on supported pseries
	platforms for energy management	optimizations.

	/sys/device/system/cpu/pseries_(de)activate_hint_list and
	/sys/device/system/cpu/cpuN/pseries_(de)activate_hint will provide
	hints for activation and deactivation of cpus respectively.

	Added new driver module
		arch/powerpc/platforms/pseries/pseries_energy.c
	under new config option CONFIG_PSERIES_ENERGY

Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/hvcall.h               |    3 
 arch/powerpc/kernel/setup-common.c              |    2 
 arch/powerpc/platforms/pseries/Kconfig          |   10 +
 arch/powerpc/platforms/pseries/Makefile         |    1 
 arch/powerpc/platforms/pseries/pseries_energy.c |  246 +++++++++++++++++++++++
 5 files changed, 261 insertions(+), 1 deletions(-)
 create mode 100644 arch/powerpc/platforms/pseries/pseries_energy.c

Comments

Dipankar Sarma March 5, 2010, 7:18 p.m. UTC | #1
On Wed, Mar 03, 2010 at 11:48:22PM +0530, Vaidyanathan Srinivasan wrote:
>  static void __init cpu_init_thread_core_maps(int tpc)
> diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
> index c667f0f..b3dd108 100644
> --- a/arch/powerpc/platforms/pseries/Kconfig
> +++ b/arch/powerpc/platforms/pseries/Kconfig
> @@ -33,6 +33,16 @@ config PSERIES_MSI
>         depends on PCI_MSI && EEH
>         default y
> 
> +config PSERIES_ENERGY
> +	tristate "pseries energy management capabilities driver"
> +	depends on PPC_PSERIES
> +	default y
> +	help
> +	  Provides interface to platform energy management capabilities
> +	  on supported PSERIES platforms.
> +	  Provides: /sys/devices/system/cpu/pseries_(de)activation_hint_list
> +	  and /sys/devices/system/cpu/cpuN/pseries_(de)activation_hint
> +
>  config SCANLOG
>  	tristate "Scanlog dump interface"
>  	depends on RTAS_PROC && PPC_PSERIES

.....

> +static int __init pseries_energy_init(void)
> +{
> +	int cpu, err;
> +	struct sys_device *cpu_sys_dev;
> +
> +	/* Create the sysfs files */
> +	err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
> +				&attr_cpu_activate_hint_list.attr);
> +	if (!err)
> +		err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
> +				&attr_cpu_deactivate_hint_list.attr);
> +
> +	for_each_possible_cpu(cpu) {
> +		cpu_sys_dev = get_cpu_sysdev(cpu);
> +		err = sysfs_create_file(&cpu_sys_dev->kobj,
> +				&attr_percpu_activate_hint.attr);
> +		if (err)
> +			break;
> +		err = sysfs_create_file(&cpu_sys_dev->kobj,
> +				&attr_percpu_deactivate_hint.attr);
> +		if (err)
> +			break;
> +	}
> +	return err;
> +
> +}

Shouldn't we create this only for supported platforms ?

Thanks
Dipankar
Vaidyanathan Srinivasan March 8, 2010, 6:50 a.m. UTC | #2
* Dipankar Sarma <dipankar@in.ibm.com> [2010-03-06 00:48:11]:

> On Wed, Mar 03, 2010 at 11:48:22PM +0530, Vaidyanathan Srinivasan wrote:
> >  static void __init cpu_init_thread_core_maps(int tpc)
> > diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
> > index c667f0f..b3dd108 100644
> > --- a/arch/powerpc/platforms/pseries/Kconfig
> > +++ b/arch/powerpc/platforms/pseries/Kconfig
> > @@ -33,6 +33,16 @@ config PSERIES_MSI
> >         depends on PCI_MSI && EEH
> >         default y
> > 
> > +config PSERIES_ENERGY
> > +	tristate "pseries energy management capabilities driver"
> > +	depends on PPC_PSERIES
> > +	default y
> > +	help
> > +	  Provides interface to platform energy management capabilities
> > +	  on supported PSERIES platforms.
> > +	  Provides: /sys/devices/system/cpu/pseries_(de)activation_hint_list
> > +	  and /sys/devices/system/cpu/cpuN/pseries_(de)activation_hint
> > +
> >  config SCANLOG
> >  	tristate "Scanlog dump interface"
> >  	depends on RTAS_PROC && PPC_PSERIES
> 
> .....
> 
> > +static int __init pseries_energy_init(void)
> > +{
> > +	int cpu, err;
> > +	struct sys_device *cpu_sys_dev;
> > +
> > +	/* Create the sysfs files */
> > +	err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
> > +				&attr_cpu_activate_hint_list.attr);
> > +	if (!err)
> > +		err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
> > +				&attr_cpu_deactivate_hint_list.attr);
> > +
> > +	for_each_possible_cpu(cpu) {
> > +		cpu_sys_dev = get_cpu_sysdev(cpu);
> > +		err = sysfs_create_file(&cpu_sys_dev->kobj,
> > +				&attr_percpu_activate_hint.attr);
> > +		if (err)
> > +			break;
> > +		err = sysfs_create_file(&cpu_sys_dev->kobj,
> > +				&attr_percpu_deactivate_hint.attr);
> > +		if (err)
> > +			break;
> > +	}
> > +	return err;
> > +
> > +}
> 
> Shouldn't we create this only for supported platforms ?

Hi Dipankar,

Yes we will need a check like
firmware_has_feature(FW_FEATURE_BEST_ENERGY) to avoid sysfs files in
unsupported platforms.  I will add that check in the next iteration.

Thanks,
Vaidy
Dipankar Sarma March 8, 2010, 7:28 p.m. UTC | #3
On Mon, Mar 08, 2010 at 12:20:06PM +0530, Vaidyanathan Srinivasan wrote:
> * Dipankar Sarma <dipankar@in.ibm.com> [2010-03-06 00:48:11]:
> 
> > Shouldn't we create this only for supported platforms ?
> 
> Hi Dipankar,
> 
> Yes we will need a check like
> firmware_has_feature(FW_FEATURE_BEST_ENERGY) to avoid sysfs files in
> unsupported platforms.  I will add that check in the next iteration.

Also, given that this module isn't likely to provide anything on
older platforms, it should get loaded only on newer platforms.

Thanks
Dipankar
Vaidyanathan Srinivasan March 10, 2010, 4:30 a.m. UTC | #4
* Dipankar Sarma <dipankar@in.ibm.com> [2010-03-09 00:58:26]:

> On Mon, Mar 08, 2010 at 12:20:06PM +0530, Vaidyanathan Srinivasan wrote:
> > * Dipankar Sarma <dipankar@in.ibm.com> [2010-03-06 00:48:11]:
> > 
> > > Shouldn't we create this only for supported platforms ?
> > 
> > Hi Dipankar,
> > 
> > Yes we will need a check like
> > firmware_has_feature(FW_FEATURE_BEST_ENERGY) to avoid sysfs files in
> > unsupported platforms.  I will add that check in the next iteration.
> 
> Also, given that this module isn't likely to provide anything on
> older platforms, it should get loaded only on newer platforms.

Yes, I will explore methods to achieve this.  I could not yet find
a trigger to load modules based on firmware feature.

--Vaidy
Benjamin Herrenschmidt April 7, 2010, 2:04 a.m. UTC | #5
On Wed, 2010-03-03 at 23:48 +0530, Vaidyanathan Srinivasan wrote:
> Hi,

> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
> index 03dd6a2..fbd93e3 100644
> --- a/arch/powerpc/kernel/setup-common.c
> +++ b/arch/powerpc/kernel/setup-common.c
> @@ -359,6 +359,8 @@ void __init check_for_initrd(void)
>  #ifdef CONFIG_SMP
>  
>  int threads_per_core, threads_shift;
> +EXPORT_SYMBOL_GPL(threads_per_core);

While I agree it should be exported for the APIs in cputhread.h to be
usable from a module, this variable shouldn't be -used- directly, but
only via the API functions in there.

 .../...

> +
> +#define MODULE_VERS "1.0"
> +#define MODULE_NAME "pseries_energy"
> +
> +/* Helper Routines to convert between drc_index to cpu numbers */
> +
> +static u32 cpu_to_drc_index(int cpu)
> +{
> +	struct device_node *dn = NULL;
> +	const int *indexes;
> +	int i;
> +	dn = of_find_node_by_name(dn, "cpus");

Check the result. Also that's not a nice way to do that, you should look
for /cpus by path I reckon.

> +	indexes = of_get_property(dn, "ibm,drc-indexes", NULL);

Check the result here too.

> +	/* Convert logical cpu number to core number */
> +	i = cpu / threads_per_core;

Don't use that variable as I said earlier. Use cpu_thread_to_core()

> +	/*
> +	 * The first element indexes[0] is the number of drc_indexes
> +	 * returned in the list.  Hence i+1 will get the drc_index
> +	 * corresponding to core number i.
> +	 */
> +	WARN_ON(i > indexes[0]);
> +	return indexes[i + 1];
> +}
> +
> +static int drc_index_to_cpu(u32 drc_index)
> +{
> +	struct device_node *dn = NULL;
> +	const int *indexes;
> +	int i, cpu;
> +	dn = of_find_node_by_name(dn, "cpus");
> +	indexes = of_get_property(dn, "ibm,drc-indexes", NULL);

Same comments, check results and use /cpus path

> +	/*
> +	 * First element in the array is the number of drc_indexes
> +	 * returned.  Search through the list to find the matching
> +	 * drc_index and get the core number
> +	 */
> +	for (i = 0; i < indexes[0]; i++) {
> +		if (indexes[i + 1] == drc_index)
> +			break;
> +	}
> +	/* Convert core number to logical cpu number */
> +	cpu = i * threads_per_core;

Here's more annoying as we don't have an API in cputhread.h for that.

In fact, we have a confusion in there since cpu_first_thread_in_core()
doesn't actually take a core number ...

I'm going to recommend a complicated approach but that's the best in the
long run:

 - First do a patch that renames cpu_first_thread_in_core() to something
clearer like cpu_first_thread_in_same_core() or cpu_leftmost_sibling()
(I think I prefer the later). Rename the few users in
arch/powerpc/mm/mmu_context_nohash.c and arch/powerpc/kernel/smp.c

 - Then do a patch that adds a cpu_first_thread_of_core() that takes a
core number, basically does:

static inline int cpu_first_thread_of_core(int core)
{
	return core << threads_shift;
}

 - Then add your modified H_BEST_ENERGY patch on top of it using the
above two as pre-reqs :-)

> +	return cpu;
> +}
> +
> +/*
> + * pseries hypervisor call H_BEST_ENERGY provides hints to OS on
> + * preferred logical cpus to activate or deactivate for optimized
> + * energy consumption.
> + */
> +
> +#define FLAGS_MODE1	0x004E200000080E01
> +#define FLAGS_MODE2	0x004E200000080401
> +#define FLAGS_ACTIVATE  0x100
> +
> +static ssize_t get_best_energy_list(char *page, int activate)
> +{
> +	int rc, cnt, i, cpu;
> +	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
> +	unsigned long flags = 0;
> +	u32 *buf_page;
> +	char *s = page;
> +
> +	buf_page = (u32 *) get_zeroed_page(GFP_KERNEL);
> +
Why that blank line ?

> +	if (!buf_page)
> +		return 0;

So here you return 0 instead of -ENOMEM

> +	flags = FLAGS_MODE1;
> +	if (activate)
> +		flags |= FLAGS_ACTIVATE;
> +
> +	rc = plpar_hcall9(H_BEST_ENERGY, retbuf, flags, 0, __pa(buf_page),
> +				0, 0, 0, 0, 0, 0);
> +
> +	

Again, no need for a blank line before xx = foo() and if (xx)

> if (rc != H_SUCCESS) {
> +		free_page((unsigned long) buf_page);
> +		return -EINVAL;
> +	}

And here you return an error code. Which one is right ?

> +	cnt = retbuf[0];
> +	for (i = 0; i < cnt; i++) {
> +		cpu = drc_index_to_cpu(buf_page[2*i+1]);
> +		if ((cpu_online(cpu) && !activate) ||
> +		    (!cpu_online(cpu) && activate))
> +			s += sprintf(s, "%d,", cpu);
> +	}
> +	if (s > page) { /* Something to show */
> +		s--; /* Suppress last comma */
> +		s += sprintf(s, "\n");
> +	}
> +
> +	free_page((unsigned long) buf_page);
> +	return s-page;
> +}
> +
> +static ssize_t get_best_energy_data(struct sys_device *dev,
> +					char *page, int activate)
> +{
> +	int rc;
> +	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
> +	unsigned long flags = 0;
> +
> +	flags = FLAGS_MODE2;
> +	if (activate)
> +		flags |= FLAGS_ACTIVATE;
> +
> +	rc = plpar_hcall9(H_BEST_ENERGY, retbuf, flags,
> +				cpu_to_drc_index(dev->id),
> +				0, 0, 0, 0, 0, 0, 0);
> +
> +	if (rc != H_SUCCESS)
> +		return -EINVAL;
> +
> +	return sprintf(page, "%lu\n", retbuf[1] >> 32);
> +}
> +
> +/* Wrapper functions */
> +
> +static ssize_t cpu_activate_hint_list_show(struct sysdev_class *class,
> +						char *page)
> +{
> +	return get_best_energy_list(page, 1);
> +}
> +
> +static ssize_t cpu_deactivate_hint_list_show(struct sysdev_class *class,
> +						char *page)
> +{
> +	return get_best_energy_list(page, 0);
> +}
> +
> +static ssize_t percpu_activate_hint_show(struct sys_device *dev,
> +			struct sysdev_attribute *attr, char *page)
> +{
> +	return get_best_energy_data(dev, page, 1);
> +}
> +
> +static ssize_t percpu_deactivate_hint_show(struct sys_device *dev,
> +			struct sysdev_attribute *attr, char *page)
> +{
> +	return get_best_energy_data(dev, page, 0);
> +}
> +
> +/*
> + * Create sysfs interface:
> + * /sys/devices/system/cpu/pseries_activate_hint_list
> + * /sys/devices/system/cpu/pseries_deactivate_hint_list
> + * 	Comma separated list of cpus to activate or deactivate
> + * /sys/devices/system/cpu/cpuN/pseries_activate_hint
> + * /sys/devices/system/cpu/cpuN/pseries_deactivate_hint
> + *	Per-cpu value of the hint
> + */
> +
> +struct sysdev_class_attribute attr_cpu_activate_hint_list =
> +		_SYSDEV_CLASS_ATTR(pseries_activate_hint_list, 0444,
> +		cpu_activate_hint_list_show, NULL);
> +
> +struct sysdev_class_attribute attr_cpu_deactivate_hint_list =
> +		_SYSDEV_CLASS_ATTR(pseries_deactivate_hint_list, 0444,
> +		cpu_deactivate_hint_list_show, NULL);
> +
> +struct sysdev_attribute attr_percpu_activate_hint =
> +		_SYSDEV_ATTR(pseries_activate_hint, 0444,
> +		percpu_activate_hint_show, NULL);
> +
> +struct sysdev_attribute attr_percpu_deactivate_hint =
> +		_SYSDEV_ATTR(pseries_deactivate_hint, 0444,
> +		percpu_deactivate_hint_show, NULL);
> +
> +static int __init pseries_energy_init(void)
> +{
> +	int cpu, err;
> +	struct sys_device *cpu_sys_dev;
> +
> +	/* Create the sysfs files */
> +	err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
> +				&attr_cpu_activate_hint_list.attr);
> +	if (!err)
> +		err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
> +				&attr_cpu_deactivate_hint_list.attr);
> +
> +	for_each_possible_cpu(cpu) {
> +		cpu_sys_dev = get_cpu_sysdev(cpu);
> +		err = sysfs_create_file(&cpu_sys_dev->kobj,
> +				&attr_percpu_activate_hint.attr);
> +		if (err)
> +			break;
> +		err = sysfs_create_file(&cpu_sys_dev->kobj,
> +				&attr_percpu_deactivate_hint.attr);
> +		if (err)
> +			break;
> +	}
> +	return err;
> +
> +}
> +
> +static void __exit pseries_energy_cleanup(void)
> +{
> +	int cpu;
> +	struct sys_device *cpu_sys_dev;
> +
> +	/* Remove the sysfs files */
> +	sysfs_remove_file(&cpu_sysdev_class.kset.kobj,
> +				&attr_cpu_activate_hint_list.attr);
> +
> +	sysfs_remove_file(&cpu_sysdev_class.kset.kobj,
> +				&attr_cpu_deactivate_hint_list.attr);
> +
> +	for_each_possible_cpu(cpu) {
> +		cpu_sys_dev = get_cpu_sysdev(cpu);
> +		sysfs_remove_file(&cpu_sys_dev->kobj,
> +				&attr_percpu_activate_hint.attr);
> +		sysfs_remove_file(&cpu_sys_dev->kobj,
> +				&attr_percpu_deactivate_hint.attr);
> +	}
> +}
> +
> +module_init(pseries_energy_init);
> +module_exit(pseries_energy_cleanup);
> +MODULE_DESCRIPTION("Driver for pseries platform energy management");
> +MODULE_AUTHOR("Vaidyanathan Srinivasan");
> +MODULE_LICENSE("GPL");
> 

Cheers,
Ben.
Vaidyanathan Srinivasan April 7, 2010, 4:57 a.m. UTC | #6
* Benjamin Herrenschmidt <benh@kernel.crashing.org> [2010-04-07 12:04:49]:

> On Wed, 2010-03-03 at 23:48 +0530, Vaidyanathan Srinivasan wrote:
> > Hi,
> 
> > diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
> > index 03dd6a2..fbd93e3 100644
> > --- a/arch/powerpc/kernel/setup-common.c
> > +++ b/arch/powerpc/kernel/setup-common.c
> > @@ -359,6 +359,8 @@ void __init check_for_initrd(void)
> >  #ifdef CONFIG_SMP
> >  
> >  int threads_per_core, threads_shift;
> > +EXPORT_SYMBOL_GPL(threads_per_core);
> 
> While I agree it should be exported for the APIs in cputhread.h to be
> usable from a module, this variable shouldn't be -used- directly, but
> only via the API functions in there.

Ok agreed.  This will be the first module to use this and hence will
have to implement the API and export the function.

>  .../...
> 
> > +
> > +#define MODULE_VERS "1.0"
> > +#define MODULE_NAME "pseries_energy"
> > +
> > +/* Helper Routines to convert between drc_index to cpu numbers */
> > +
> > +static u32 cpu_to_drc_index(int cpu)
> > +{
> > +	struct device_node *dn = NULL;
> > +	const int *indexes;
> > +	int i;
> > +	dn = of_find_node_by_name(dn, "cpus");
> 
> Check the result. Also that's not a nice way to do that, you should look
> for /cpus by path I reckon.

I will check the return code, but why do you feel getting the node by
path is better?  Is there any race, or we may have duplicate "cpus"
node?

> > +	indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
> 
> Check the result here too.

Yes, will do.

> > +	/* Convert logical cpu number to core number */
> > +	i = cpu / threads_per_core;
> 
> Don't use that variable as I said earlier. Use cpu_thread_to_core()

Ack

> > +	/*
> > +	 * The first element indexes[0] is the number of drc_indexes
> > +	 * returned in the list.  Hence i+1 will get the drc_index
> > +	 * corresponding to core number i.
> > +	 */
> > +	WARN_ON(i > indexes[0]);
> > +	return indexes[i + 1];
> > +}
> > +
> > +static int drc_index_to_cpu(u32 drc_index)
> > +{
> > +	struct device_node *dn = NULL;
> > +	const int *indexes;
> > +	int i, cpu;
> > +	dn = of_find_node_by_name(dn, "cpus");
> > +	indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
> 
> Same comments, check results and use /cpus path

Sure. Will do.

> > +	/*
> > +	 * First element in the array is the number of drc_indexes
> > +	 * returned.  Search through the list to find the matching
> > +	 * drc_index and get the core number
> > +	 */
> > +	for (i = 0; i < indexes[0]; i++) {
> > +		if (indexes[i + 1] == drc_index)
> > +			break;
> > +	}
> > +	/* Convert core number to logical cpu number */
> > +	cpu = i * threads_per_core;
> 
> Here's more annoying as we don't have an API in cputhread.h for that.
> 
> In fact, we have a confusion in there since cpu_first_thread_in_core()
> doesn't actually take a core number ...
> 
> I'm going to recommend a complicated approach but that's the best in the
> long run:
> 
>  - First do a patch that renames cpu_first_thread_in_core() to something
> clearer like cpu_first_thread_in_same_core() or cpu_leftmost_sibling()
> (I think I prefer the later). Rename the few users in
> arch/powerpc/mm/mmu_context_nohash.c and arch/powerpc/kernel/smp.c
> 
>  - Then do a patch that adds a cpu_first_thread_of_core() that takes a
> core number, basically does:
> 
> static inline int cpu_first_thread_of_core(int core)
> {
> 	return core << threads_shift;
> }
> 
>  - Then add your modified H_BEST_ENERGY patch on top of it using the
> above two as pre-reqs :-)

Ok, I can do that change and then base this patch on the new API.

> > +	return cpu;
> > +}
> > +
> > +/*
> > + * pseries hypervisor call H_BEST_ENERGY provides hints to OS on
> > + * preferred logical cpus to activate or deactivate for optimized
> > + * energy consumption.
> > + */
> > +
> > +#define FLAGS_MODE1	0x004E200000080E01
> > +#define FLAGS_MODE2	0x004E200000080401
> > +#define FLAGS_ACTIVATE  0x100
> > +
> > +static ssize_t get_best_energy_list(char *page, int activate)
> > +{
> > +	int rc, cnt, i, cpu;
> > +	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
> > +	unsigned long flags = 0;
> > +	u32 *buf_page;
> > +	char *s = page;
> > +
> > +	buf_page = (u32 *) get_zeroed_page(GFP_KERNEL);
> > +
> Why that blank line ?

I will remove them

> > +	if (!buf_page)
> > +		return 0;
> 
> So here you return 0 instead of -ENOMEM

Will fix.  -ENOMEM is correct.

> > +	flags = FLAGS_MODE1;
> > +	if (activate)
> > +		flags |= FLAGS_ACTIVATE;
> > +
> > +	rc = plpar_hcall9(H_BEST_ENERGY, retbuf, flags, 0, __pa(buf_page),
> > +				0, 0, 0, 0, 0, 0);
> > +
> > +	
> 
> Again, no need for a blank line before xx = foo() and if (xx)

Ack.

> > if (rc != H_SUCCESS) {
> > +		free_page((unsigned long) buf_page);
> > +		return -EINVAL;
> > +	}
> 
> And here you return an error code. Which one is right ?

Returning an error code is correct to user space.  The call will check
for return value on read while getting the list.

Thanks for the detailed review.  I will fix the issues that you
pointed out and post the next version.

--Vaidy
Benjamin Herrenschmidt April 7, 2010, 5:13 a.m. UTC | #7
On Wed, 2010-04-07 at 10:27 +0530, Vaidyanathan Srinivasan wrote:

> > Check the result. Also that's not a nice way to do that, you should look
> > for /cpus by path I reckon.
> 
> I will check the return code, but why do you feel getting the node by
> path is better?  Is there any race, or we may have duplicate "cpus"
> node?

Well, you never really know what might be down the device-tree :-) /cpus
is well defined where it is, while god knows what would happen if
somebody had the weird idea to have something called "cpus" below some
device driver for whatever reason they might even.

Cheers,
Ben.
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index f027581..396599f 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -230,7 +230,8 @@ 
 #define H_ENABLE_CRQ		0x2B0
 #define H_SET_MPP		0x2D0
 #define H_GET_MPP		0x2D4
-#define MAX_HCALL_OPCODE	H_GET_MPP
+#define H_BEST_ENERGY		0x2F4
+#define MAX_HCALL_OPCODE	H_BEST_ENERGY
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 03dd6a2..fbd93e3 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -359,6 +359,8 @@  void __init check_for_initrd(void)
 #ifdef CONFIG_SMP
 
 int threads_per_core, threads_shift;
+EXPORT_SYMBOL_GPL(threads_per_core);
+
 cpumask_t threads_core_mask;
 
 static void __init cpu_init_thread_core_maps(int tpc)
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index c667f0f..b3dd108 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -33,6 +33,16 @@  config PSERIES_MSI
        depends on PCI_MSI && EEH
        default y
 
+config PSERIES_ENERGY
+	tristate "pseries energy management capabilities driver"
+	depends on PPC_PSERIES
+	default y
+	help
+	  Provides interface to platform energy management capabilities
+	  on supported PSERIES platforms.
+	  Provides: /sys/devices/system/cpu/pseries_(de)activation_hint_list
+	  and /sys/devices/system/cpu/cpuN/pseries_(de)activation_hint
+
 config SCANLOG
 	tristate "Scanlog dump interface"
 	depends on RTAS_PROC && PPC_PSERIES
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 0ff5174..3813977 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -16,6 +16,7 @@  obj-$(CONFIG_EEH)	+= eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o
 obj-$(CONFIG_KEXEC)	+= kexec.o
 obj-$(CONFIG_PCI)	+= pci.o pci_dlpar.o
 obj-$(CONFIG_PSERIES_MSI)	+= msi.o
+obj-$(CONFIG_PSERIES_ENERGY)	+= pseries_energy.o
 
 obj-$(CONFIG_HOTPLUG_CPU)	+= hotplug-cpu.o
 obj-$(CONFIG_MEMORY_HOTPLUG)	+= hotplug-memory.o
diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c
new file mode 100644
index 0000000..0390188
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -0,0 +1,246 @@ 
+/*
+ * POWER platform energy management driver
+ * Copyright (C) 2010 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This pseries platform device driver provides access to
+ * platform energy management capabilities.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/sysdev.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <asm/cputhreads.h>
+#include <asm/page.h>
+#include <asm/hvcall.h>
+
+
+#define MODULE_VERS "1.0"
+#define MODULE_NAME "pseries_energy"
+
+/* Helper Routines to convert between drc_index to cpu numbers */
+
+static u32 cpu_to_drc_index(int cpu)
+{
+	struct device_node *dn = NULL;
+	const int *indexes;
+	int i;
+	dn = of_find_node_by_name(dn, "cpus");
+	indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
+	/* Convert logical cpu number to core number */
+	i = cpu / threads_per_core;
+	/*
+	 * The first element indexes[0] is the number of drc_indexes
+	 * returned in the list.  Hence i+1 will get the drc_index
+	 * corresponding to core number i.
+	 */
+	WARN_ON(i > indexes[0]);
+	return indexes[i + 1];
+}
+
+static int drc_index_to_cpu(u32 drc_index)
+{
+	struct device_node *dn = NULL;
+	const int *indexes;
+	int i, cpu;
+	dn = of_find_node_by_name(dn, "cpus");
+	indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
+	/*
+	 * First element in the array is the number of drc_indexes
+	 * returned.  Search through the list to find the matching
+	 * drc_index and get the core number
+	 */
+	for (i = 0; i < indexes[0]; i++) {
+		if (indexes[i + 1] == drc_index)
+			break;
+	}
+	/* Convert core number to logical cpu number */
+	cpu = i * threads_per_core;
+	return cpu;
+}
+
+/*
+ * pseries hypervisor call H_BEST_ENERGY provides hints to OS on
+ * preferred logical cpus to activate or deactivate for optimized
+ * energy consumption.
+ */
+
+#define FLAGS_MODE1	0x004E200000080E01
+#define FLAGS_MODE2	0x004E200000080401
+#define FLAGS_ACTIVATE  0x100
+
+static ssize_t get_best_energy_list(char *page, int activate)
+{
+	int rc, cnt, i, cpu;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+	unsigned long flags = 0;
+	u32 *buf_page;
+	char *s = page;
+
+	buf_page = (u32 *) get_zeroed_page(GFP_KERNEL);
+
+	if (!buf_page)
+		return 0;
+
+	flags = FLAGS_MODE1;
+	if (activate)
+		flags |= FLAGS_ACTIVATE;
+
+	rc = plpar_hcall9(H_BEST_ENERGY, retbuf, flags, 0, __pa(buf_page),
+				0, 0, 0, 0, 0, 0);
+
+	if (rc != H_SUCCESS) {
+		free_page((unsigned long) buf_page);
+		return -EINVAL;
+	}
+
+	cnt = retbuf[0];
+	for (i = 0; i < cnt; i++) {
+		cpu = drc_index_to_cpu(buf_page[2*i+1]);
+		if ((cpu_online(cpu) && !activate) ||
+		    (!cpu_online(cpu) && activate))
+			s += sprintf(s, "%d,", cpu);
+	}
+	if (s > page) { /* Something to show */
+		s--; /* Suppress last comma */
+		s += sprintf(s, "\n");
+	}
+
+	free_page((unsigned long) buf_page);
+	return s-page;
+}
+
+static ssize_t get_best_energy_data(struct sys_device *dev,
+					char *page, int activate)
+{
+	int rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+	unsigned long flags = 0;
+
+	flags = FLAGS_MODE2;
+	if (activate)
+		flags |= FLAGS_ACTIVATE;
+
+	rc = plpar_hcall9(H_BEST_ENERGY, retbuf, flags,
+				cpu_to_drc_index(dev->id),
+				0, 0, 0, 0, 0, 0, 0);
+
+	if (rc != H_SUCCESS)
+		return -EINVAL;
+
+	return sprintf(page, "%lu\n", retbuf[1] >> 32);
+}
+
+/* Wrapper functions */
+
+static ssize_t cpu_activate_hint_list_show(struct sysdev_class *class,
+						char *page)
+{
+	return get_best_energy_list(page, 1);
+}
+
+static ssize_t cpu_deactivate_hint_list_show(struct sysdev_class *class,
+						char *page)
+{
+	return get_best_energy_list(page, 0);
+}
+
+static ssize_t percpu_activate_hint_show(struct sys_device *dev,
+			struct sysdev_attribute *attr, char *page)
+{
+	return get_best_energy_data(dev, page, 1);
+}
+
+static ssize_t percpu_deactivate_hint_show(struct sys_device *dev,
+			struct sysdev_attribute *attr, char *page)
+{
+	return get_best_energy_data(dev, page, 0);
+}
+
+/*
+ * Create sysfs interface:
+ * /sys/devices/system/cpu/pseries_activate_hint_list
+ * /sys/devices/system/cpu/pseries_deactivate_hint_list
+ * 	Comma separated list of cpus to activate or deactivate
+ * /sys/devices/system/cpu/cpuN/pseries_activate_hint
+ * /sys/devices/system/cpu/cpuN/pseries_deactivate_hint
+ *	Per-cpu value of the hint
+ */
+
+struct sysdev_class_attribute attr_cpu_activate_hint_list =
+		_SYSDEV_CLASS_ATTR(pseries_activate_hint_list, 0444,
+		cpu_activate_hint_list_show, NULL);
+
+struct sysdev_class_attribute attr_cpu_deactivate_hint_list =
+		_SYSDEV_CLASS_ATTR(pseries_deactivate_hint_list, 0444,
+		cpu_deactivate_hint_list_show, NULL);
+
+struct sysdev_attribute attr_percpu_activate_hint =
+		_SYSDEV_ATTR(pseries_activate_hint, 0444,
+		percpu_activate_hint_show, NULL);
+
+struct sysdev_attribute attr_percpu_deactivate_hint =
+		_SYSDEV_ATTR(pseries_deactivate_hint, 0444,
+		percpu_deactivate_hint_show, NULL);
+
+static int __init pseries_energy_init(void)
+{
+	int cpu, err;
+	struct sys_device *cpu_sys_dev;
+
+	/* Create the sysfs files */
+	err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+				&attr_cpu_activate_hint_list.attr);
+	if (!err)
+		err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+				&attr_cpu_deactivate_hint_list.attr);
+
+	for_each_possible_cpu(cpu) {
+		cpu_sys_dev = get_cpu_sysdev(cpu);
+		err = sysfs_create_file(&cpu_sys_dev->kobj,
+				&attr_percpu_activate_hint.attr);
+		if (err)
+			break;
+		err = sysfs_create_file(&cpu_sys_dev->kobj,
+				&attr_percpu_deactivate_hint.attr);
+		if (err)
+			break;
+	}
+	return err;
+
+}
+
+static void __exit pseries_energy_cleanup(void)
+{
+	int cpu;
+	struct sys_device *cpu_sys_dev;
+
+	/* Remove the sysfs files */
+	sysfs_remove_file(&cpu_sysdev_class.kset.kobj,
+				&attr_cpu_activate_hint_list.attr);
+
+	sysfs_remove_file(&cpu_sysdev_class.kset.kobj,
+				&attr_cpu_deactivate_hint_list.attr);
+
+	for_each_possible_cpu(cpu) {
+		cpu_sys_dev = get_cpu_sysdev(cpu);
+		sysfs_remove_file(&cpu_sys_dev->kobj,
+				&attr_percpu_activate_hint.attr);
+		sysfs_remove_file(&cpu_sys_dev->kobj,
+				&attr_percpu_deactivate_hint.attr);
+	}
+}
+
+module_init(pseries_energy_init);
+module_exit(pseries_energy_cleanup);
+MODULE_DESCRIPTION("Driver for pseries platform energy management");
+MODULE_AUTHOR("Vaidyanathan Srinivasan");
+MODULE_LICENSE("GPL");