diff mbox

[2/4] : CPUIDLE: Introduce architecture independent cpuidle_pm_idle in drivers/cpuidle/cpuidle.c

Message ID 20090827115354.GC24986@linux.vnet.ibm.com (mailing list archive)
State Superseded
Headers show

Commit Message

Arun Bharadwaj Aug. 27, 2009, 11:53 a.m. UTC
* Arun R Bharadwaj <arun@linux.vnet.ibm.com> [2009-08-27 17:19:08]:

Cpuidle infrastructure assumes pm_idle as the default idle routine.
But, ppc_md.power_save is the default idle callback in case of pSeries.

So, create a more generic, architecture independent cpuidle_pm_idle
function pointer in driver/cpuidle/cpuidle.c and allow the idle routines
of architectures to be set to cpuidle_pm_idle.

Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
---
 drivers/cpuidle/cpuidle.c |   12 +++++++-----
 include/linux/cpuidle.h   |    7 +++++++
 2 files changed, 14 insertions(+), 5 deletions(-)

Comments

Peter Zijlstra Aug. 27, 2009, 12:53 p.m. UTC | #1
On Thu, 2009-08-27 at 17:23 +0530, Arun R Bharadwaj wrote:
> * Arun R Bharadwaj <arun@linux.vnet.ibm.com> [2009-08-27 17:19:08]:
> 
> Cpuidle infrastructure assumes pm_idle as the default idle routine.
> But, ppc_md.power_save is the default idle callback in case of pSeries.
> 
> So, create a more generic, architecture independent cpuidle_pm_idle
> function pointer in driver/cpuidle/cpuidle.c and allow the idle routines
> of architectures to be set to cpuidle_pm_idle.
> 
> Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
> ---
>  drivers/cpuidle/cpuidle.c |   12 +++++++-----
>  include/linux/cpuidle.h   |    7 +++++++
>  2 files changed, 14 insertions(+), 5 deletions(-)
> 
> Index: linux.trees.git/drivers/cpuidle/cpuidle.c
> ===================================================================
> --- linux.trees.git.orig/drivers/cpuidle/cpuidle.c
> +++ linux.trees.git/drivers/cpuidle/cpuidle.c
> @@ -25,6 +25,7 @@ DEFINE_PER_CPU(struct cpuidle_device *, 
>  DEFINE_MUTEX(cpuidle_lock);
>  LIST_HEAD(cpuidle_detected_devices);
>  static void (*pm_idle_old)(void);
> +void (*cpuidle_pm_idle)(void);
>  
>  static int enabled_devices;
>  
> @@ -98,10 +99,10 @@ static void cpuidle_idle_call(void)
>   */
>  void cpuidle_install_idle_handler(void)
>  {
> -	if (enabled_devices && (pm_idle != cpuidle_idle_call)) {
> +	if (enabled_devices && (cpuidle_pm_idle != cpuidle_idle_call)) {
>  		/* Make sure all changes finished before we switch to new idle */
>  		smp_wmb();
> -		pm_idle = cpuidle_idle_call;
> +		cpuidle_pm_idle = cpuidle_idle_call;
>  	}
>  }
>  
> @@ -110,8 +111,9 @@ void cpuidle_install_idle_handler(void)
>   */
>  void cpuidle_uninstall_idle_handler(void)
>  {
> -	if (enabled_devices && pm_idle_old && (pm_idle != pm_idle_old)) {
> -		pm_idle = pm_idle_old;
> +	if (enabled_devices && pm_idle_old &&
> +			(cpuidle_pm_idle != pm_idle_old)) {
> +		cpuidle_pm_idle = pm_idle_old;
>  		cpuidle_kick_cpus();
>  	}
>  }
> @@ -382,7 +384,7 @@ static int __init cpuidle_init(void)
>  {
>  	int ret;
>  
> -	pm_idle_old = pm_idle;
> +	pm_idle_old = cpuidle_pm_idle;
>  
>  	ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
>  	if (ret)
> Index: linux.trees.git/include/linux/cpuidle.h
> ===================================================================
> --- linux.trees.git.orig/include/linux/cpuidle.h
> +++ linux.trees.git/include/linux/cpuidle.h
> @@ -188,4 +188,11 @@ static inline void cpuidle_unregister_go
>  #define CPUIDLE_DRIVER_STATE_START	0
>  #endif
>  
> +/*
> + * Idle callback used by cpuidle to call the cpuidle_idle_call().
> + * Platform drivers can use this to register to cpuidle's idle loop.
> + */
> +
> +extern void (*cpuidle_pm_idle)(void);
> +
>  #endif /* _LINUX_CPUIDLE_H */


I'm not quite seeing how this makes anything any better. Not we have 3
function pointers, where 1 should suffice.

/me wonders what's wrong with something like:

struct idle_func_desc {
	int		 power;
	int		 latency;
	void		 (*idle)(void);
	struct list_head list;
};

static void spin_idle(void)
{
	for (;;)
		cpu_relax();
}

static idle_func_desc default_idle_func = {
	power = 0, 	   /* doesn't safe any power */
	latency = INT_MAX, /* has max latency */
	idle = spin_idle,
	list = INIT_LIST_HEAD(default_idle_func.list),
};

void (*idle_func)(void);
static struct list_head idle_func_list;

static void pick_idle_func(void)
{
	struct idle_func_desc *desc, *idle = &default_idle_desc;

	list_for_each_entry(desc, &idle_func_list, list) {
		if (desc->power < idle->power)
			continue;
		if (desc->latency > target_latency);
			continue;
		idle = desc;
	}

	pm_idle = idle->idle;
}

void register_idle_func(struct idle_func_desc *desc)
{
	WARN_ON_ONCE(!list_empty(&desc->list));

	list_add_tail(&idle_func_list, &desc->list);
	pick_idle_func();
}

void unregister_idle_func(struct idle_func_desc *desc)
{
	WARN_ON_ONCE(list_empty(&desc->list));

	list_del_init(&desc->list);
	if (idle_func == desc->idle) 
		pick_idle_func();
}
Benjamin Herrenschmidt Aug. 27, 2009, 9:28 p.m. UTC | #2
On Thu, 2009-08-27 at 14:53 +0200, Peter Zijlstra wrote:

> I'm not quite seeing how this makes anything any better. Not we have 3
> function pointers, where 1 should suffice.

There's also the question of us having different "idle" vs.
"power_save", the former being the entire idle loop, the later being the
part that does put the processor into power.

At what level are we trying to change the loop here ?

There are some requirements of things to do in our idle loop that really
don't have their place in generic drivers/* code.

Ben.

> /me wonders what's wrong with something like:
> 
> struct idle_func_desc {
> 	int		 power;
> 	int		 latency;
> 	void		 (*idle)(void);
> 	struct list_head list;
> };
> 
> static void spin_idle(void)
> {
> 	for (;;)
> 		cpu_relax();
> }
> 
> static idle_func_desc default_idle_func = {
> 	power = 0, 	   /* doesn't safe any power */
> 	latency = INT_MAX, /* has max latency */
> 	idle = spin_idle,
> 	list = INIT_LIST_HEAD(default_idle_func.list),
> };
> 
> void (*idle_func)(void);
> static struct list_head idle_func_list;
> 
> static void pick_idle_func(void)
> {
> 	struct idle_func_desc *desc, *idle = &default_idle_desc;
> 
> 	list_for_each_entry(desc, &idle_func_list, list) {
> 		if (desc->power < idle->power)
> 			continue;
> 		if (desc->latency > target_latency);
> 			continue;
> 		idle = desc;
> 	}
> 
> 	pm_idle = idle->idle;
> }
> 
> void register_idle_func(struct idle_func_desc *desc)
> {
> 	WARN_ON_ONCE(!list_empty(&desc->list));
> 
> 	list_add_tail(&idle_func_list, &desc->list);
> 	pick_idle_func();
> }
> 
> void unregister_idle_func(struct idle_func_desc *desc)
> {
> 	WARN_ON_ONCE(list_empty(&desc->list));
> 
> 	list_del_init(&desc->list);
> 	if (idle_func == desc->idle) 
> 		pick_idle_func();
> }
Arun Bharadwaj Aug. 28, 2009, 4:49 a.m. UTC | #3
* Peter Zijlstra <a.p.zijlstra@chello.nl> [2009-08-27 14:53:27]:

> On Thu, 2009-08-27 at 17:23 +0530, Arun R Bharadwaj wrote:
> > * Arun R Bharadwaj <arun@linux.vnet.ibm.com> [2009-08-27 17:19:08]:
> > 
> > Cpuidle infrastructure assumes pm_idle as the default idle routine.
> > But, ppc_md.power_save is the default idle callback in case of pSeries.
> > 
> > So, create a more generic, architecture independent cpuidle_pm_idle
> > function pointer in driver/cpuidle/cpuidle.c and allow the idle routines
> > of architectures to be set to cpuidle_pm_idle.
> > 
> > Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
> > ---
> >  drivers/cpuidle/cpuidle.c |   12 +++++++-----
> >  include/linux/cpuidle.h   |    7 +++++++
> >  2 files changed, 14 insertions(+), 5 deletions(-)
> > 
> > Index: linux.trees.git/drivers/cpuidle/cpuidle.c
> > ===================================================================
> > --- linux.trees.git.orig/drivers/cpuidle/cpuidle.c
> > +++ linux.trees.git/drivers/cpuidle/cpuidle.c
> > @@ -25,6 +25,7 @@ DEFINE_PER_CPU(struct cpuidle_device *, 
> >  DEFINE_MUTEX(cpuidle_lock);
> >  LIST_HEAD(cpuidle_detected_devices);
> >  static void (*pm_idle_old)(void);
> > +void (*cpuidle_pm_idle)(void);
> >  
> >  static int enabled_devices;
> >  
> > @@ -98,10 +99,10 @@ static void cpuidle_idle_call(void)
> >   */
> >  void cpuidle_install_idle_handler(void)
> >  {
> > -	if (enabled_devices && (pm_idle != cpuidle_idle_call)) {
> > +	if (enabled_devices && (cpuidle_pm_idle != cpuidle_idle_call)) {
> >  		/* Make sure all changes finished before we switch to new idle */
> >  		smp_wmb();
> > -		pm_idle = cpuidle_idle_call;
> > +		cpuidle_pm_idle = cpuidle_idle_call;
> >  	}
> >  }
> >  
> > @@ -110,8 +111,9 @@ void cpuidle_install_idle_handler(void)
> >   */
> >  void cpuidle_uninstall_idle_handler(void)
> >  {
> > -	if (enabled_devices && pm_idle_old && (pm_idle != pm_idle_old)) {
> > -		pm_idle = pm_idle_old;
> > +	if (enabled_devices && pm_idle_old &&
> > +			(cpuidle_pm_idle != pm_idle_old)) {
> > +		cpuidle_pm_idle = pm_idle_old;
> >  		cpuidle_kick_cpus();
> >  	}
> >  }
> > @@ -382,7 +384,7 @@ static int __init cpuidle_init(void)
> >  {
> >  	int ret;
> >  
> > -	pm_idle_old = pm_idle;
> > +	pm_idle_old = cpuidle_pm_idle;
> >  
> >  	ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
> >  	if (ret)
> > Index: linux.trees.git/include/linux/cpuidle.h
> > ===================================================================
> > --- linux.trees.git.orig/include/linux/cpuidle.h
> > +++ linux.trees.git/include/linux/cpuidle.h
> > @@ -188,4 +188,11 @@ static inline void cpuidle_unregister_go
> >  #define CPUIDLE_DRIVER_STATE_START	0
> >  #endif
> >  
> > +/*
> > + * Idle callback used by cpuidle to call the cpuidle_idle_call().
> > + * Platform drivers can use this to register to cpuidle's idle loop.
> > + */
> > +
> > +extern void (*cpuidle_pm_idle)(void);
> > +
> >  #endif /* _LINUX_CPUIDLE_H */
> 
> 
> I'm not quite seeing how this makes anything any better. Not we have 3
> function pointers, where 1 should suffice.
> 

Not really. We already do have pm_idle in case of x86 and
ppc_md.power_save in case of POWER. So here I'm only introducing
cpuidle_pm_idle which can be used by doing a

ppc_md.power_save = cpuidle_pm_idle;


> /me wonders what's wrong with something like:
> 
> struct idle_func_desc {
> 	int		 power;
> 	int		 latency;
> 	void		 (*idle)(void);
> 	struct list_head list;
> };
> 
> static void spin_idle(void)
> {
> 	for (;;)
> 		cpu_relax();
> }
> 
> static idle_func_desc default_idle_func = {
> 	power = 0, 	   /* doesn't safe any power */
> 	latency = INT_MAX, /* has max latency */
> 	idle = spin_idle,
> 	list = INIT_LIST_HEAD(default_idle_func.list),
> };
> 
> void (*idle_func)(void);
> static struct list_head idle_func_list;
> 
> static void pick_idle_func(void)
> {
> 	struct idle_func_desc *desc, *idle = &default_idle_desc;
> 
> 	list_for_each_entry(desc, &idle_func_list, list) {
> 		if (desc->power < idle->power)
> 			continue;
> 		if (desc->latency > target_latency);
> 			continue;
> 		idle = desc;
> 	}
> 
> 	pm_idle = idle->idle;
> }
>

This only does the job of picking the right idle loop for current
latency and power requirement. This is already done in ladder/menu
governors under the routines menu_select()/ladder_select().
I'm not sure whats the purpose of it here.

Here we are only concerned about the main idle loop, which is
pm_idle/ppc_md.power_save. After setting the main idle loop to
cpuidle_pm_idle, that would call cpuidle_idle_call() which would do
the job of picking the right low level idle loop based on latency and
other requirements.


> void register_idle_func(struct idle_func_desc *desc)
> {
> 	WARN_ON_ONCE(!list_empty(&desc->list));
> 
> 	list_add_tail(&idle_func_list, &desc->list);
> 	pick_idle_func();
> }
> 
> void unregister_idle_func(struct idle_func_desc *desc)
> {
> 	WARN_ON_ONCE(list_empty(&desc->list));
> 
> 	list_del_init(&desc->list);
> 	if (idle_func == desc->idle) 
> 		pick_idle_func();
> }
>
Arun Bharadwaj Aug. 28, 2009, 6:14 a.m. UTC | #4
* Peter Zijlstra <a.p.zijlstra@chello.nl> [2009-08-27 14:53:27]:

Hi Peter, Ben,

I've put the whole thing in a sort of a block diagram. Hope it
explains things more clearly.






				----------------
				|    CPUIDLE   |  (Select idle states like
				|   GOVERNORS  |   C1, C1e, C6 etc in case
				| (Menu/Ladder)|   x86 & nap, snooze in
				|	       |   case of POWER - based on
				----------------   latency & power req)
					^
					|
					|
					|
					|
					|
  ----------			-----------------	         -------------
  |	   |			|		|	         |  PSERIES  |
  |  ACPI  |------------------>	|    CPUIDLE	| <--------------|   IDLE    |
  |	   |			|		|	         |           |
  ----------			-----------------	         -------------

Main idle routine- pm_idle()			         Main idle routine-
							     ppc_md.power_save()

pm_idle = cpuidle_pm_idle;			         ppc_md.power_save =
(start using cpuidle's idle				      cpuidle_pm_idle();
 loop, which internally calls
 governor to select the right
 state to go into).


Relavent code snippet from drivers/cpuidle/cpuidle.c
-------------------------------------

static void cpuidle_idle_call(void)
{
	............
	............

	/* Call the menu_select() to select the idle state to enter. */
	next_state = cpuidle_curr_governor->select(dev);

	............
	............

	/*
	 * Enter the idle state previously selected. target_state->enter
	 * would call pseries_cpuidle_loop() which selects nap/snooze
	 * /
	dev->last_residency = target_state->enter(dev, target_state);
}

void cpuidle_install_idle_handler(void)
{
	.........
	.........
	cpuidle_pm_idle = cpuidle_idle_call;
}

--arun
Peter Zijlstra Aug. 28, 2009, 6:40 a.m. UTC | #5
On Fri, 2009-08-28 at 10:19 +0530, Arun R Bharadwaj wrote:
> 
> 
> This only does the job of picking the right idle loop for current
> latency and power requirement. This is already done in ladder/menu
> governors under the routines menu_select()/ladder_select().
> I'm not sure whats the purpose of it here.

I can't seem to find ladder_select() but menu_select() doesn't manage
pm_idle and its not clear what it does manage.

> Here we are only concerned about the main idle loop, which is
> pm_idle/ppc_md.power_save. After setting the main idle loop to
> cpuidle_pm_idle, that would call cpuidle_idle_call() which would do
> the job of picking the right low level idle loop based on latency and
> other requirements.

It also gets pm_idle unexported and avoids anybody directly tinkering
with the function pointer, _that_ is the whole goal.

pm_idle is it exists today, and the whole cpuidle_{un,}install*() is
utter crap. It relies on unmanaged access to this function pointer.

/me stop looking at drivers/cpuidle/, convoluted mess that is, shame on
you for wanting to have anything to do with it.
Arun Bharadwaj Aug. 28, 2009, 6:43 a.m. UTC | #6
* Peter Zijlstra <a.p.zijlstra@chello.nl> [2009-08-27 14:53:27]:

> On Thu, 2009-08-27 at 17:23 +0530, Arun R Bharadwaj wrote:
> > * Arun R Bharadwaj <arun@linux.vnet.ibm.com> [2009-08-27 17:19:08]:
> > 
> > Cpuidle infrastructure assumes pm_idle as the default idle routine.
> > But, ppc_md.power_save is the default idle callback in case of pSeries.
> > 
> > So, create a more generic, architecture independent cpuidle_pm_idle
> > function pointer in driver/cpuidle/cpuidle.c and allow the idle routines
> > of architectures to be set to cpuidle_pm_idle.
> > 
> > Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
> > ---
> >  drivers/cpuidle/cpuidle.c |   12 +++++++-----
> >  include/linux/cpuidle.h   |    7 +++++++
> >  2 files changed, 14 insertions(+), 5 deletions(-)
> > 
> > Index: linux.trees.git/drivers/cpuidle/cpuidle.c
> > ===================================================================
> > --- linux.trees.git.orig/drivers/cpuidle/cpuidle.c
> > +++ linux.trees.git/drivers/cpuidle/cpuidle.c
> > @@ -25,6 +25,7 @@ DEFINE_PER_CPU(struct cpuidle_device *, 
> >  DEFINE_MUTEX(cpuidle_lock);
> >  LIST_HEAD(cpuidle_detected_devices);
> >  static void (*pm_idle_old)(void);
> > +void (*cpuidle_pm_idle)(void);
> >  
> >  static int enabled_devices;
> >  
> > @@ -98,10 +99,10 @@ static void cpuidle_idle_call(void)
> >   */
> >  void cpuidle_install_idle_handler(void)
> >  {
> > -	if (enabled_devices && (pm_idle != cpuidle_idle_call)) {
> > +	if (enabled_devices && (cpuidle_pm_idle != cpuidle_idle_call)) {
> >  		/* Make sure all changes finished before we switch to new idle */
> >  		smp_wmb();
> > -		pm_idle = cpuidle_idle_call;
> > +		cpuidle_pm_idle = cpuidle_idle_call;
> >  	}
> >  }
> >  
> > @@ -110,8 +111,9 @@ void cpuidle_install_idle_handler(void)
> >   */
> >  void cpuidle_uninstall_idle_handler(void)
> >  {
> > -	if (enabled_devices && pm_idle_old && (pm_idle != pm_idle_old)) {
> > -		pm_idle = pm_idle_old;
> > +	if (enabled_devices && pm_idle_old &&
> > +			(cpuidle_pm_idle != pm_idle_old)) {
> > +		cpuidle_pm_idle = pm_idle_old;
> >  		cpuidle_kick_cpus();
> >  	}
> >  }
> > @@ -382,7 +384,7 @@ static int __init cpuidle_init(void)
> >  {
> >  	int ret;
> >  
> > -	pm_idle_old = pm_idle;
> > +	pm_idle_old = cpuidle_pm_idle;
> >  
> >  	ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
> >  	if (ret)
> > Index: linux.trees.git/include/linux/cpuidle.h
> > ===================================================================
> > --- linux.trees.git.orig/include/linux/cpuidle.h
> > +++ linux.trees.git/include/linux/cpuidle.h
> > @@ -188,4 +188,11 @@ static inline void cpuidle_unregister_go
> >  #define CPUIDLE_DRIVER_STATE_START	0
> >  #endif
> >  
> > +/*
> > + * Idle callback used by cpuidle to call the cpuidle_idle_call().
> > + * Platform drivers can use this to register to cpuidle's idle loop.
> > + */
> > +
> > +extern void (*cpuidle_pm_idle)(void);
> > +
> >  #endif /* _LINUX_CPUIDLE_H */
> 
> 
> I'm not quite seeing how this makes anything any better. Not we have 3
> function pointers, where 1 should suffice.
> 

Or, can we have something like:
(if exporting a function is ok, instead of exporting a function
pointer).

in drivers/cpuidle/cpuidle.c

void (*return_cpuidle_handler(void))(void)
{
        return cpuidle_pm_idle;
}
EXPORT_SYMBOL(return_cpuidle_handler);


and from pseries/processor_idle.c,

ppc_md.power_save = return_cpuidle_handler;


--arun

> /me wonders what's wrong with something like:
> 
> struct idle_func_desc {
> 	int		 power;
> 	int		 latency;
> 	void		 (*idle)(void);
> 	struct list_head list;
> };
> 
> static void spin_idle(void)
> {
> 	for (;;)
> 		cpu_relax();
> }
> 
> static idle_func_desc default_idle_func = {
> 	power = 0, 	   /* doesn't safe any power */
> 	latency = INT_MAX, /* has max latency */
> 	idle = spin_idle,
> 	list = INIT_LIST_HEAD(default_idle_func.list),
> };
> 
> void (*idle_func)(void);
> static struct list_head idle_func_list;
> 
> static void pick_idle_func(void)
> {
> 	struct idle_func_desc *desc, *idle = &default_idle_desc;
> 
> 	list_for_each_entry(desc, &idle_func_list, list) {
> 		if (desc->power < idle->power)
> 			continue;
> 		if (desc->latency > target_latency);
> 			continue;
> 		idle = desc;
> 	}
> 
> 	pm_idle = idle->idle;
> }
> 
> void register_idle_func(struct idle_func_desc *desc)
> {
> 	WARN_ON_ONCE(!list_empty(&desc->list));
> 
> 	list_add_tail(&idle_func_list, &desc->list);
> 	pick_idle_func();
> }
> 
> void unregister_idle_func(struct idle_func_desc *desc)
> {
> 	WARN_ON_ONCE(list_empty(&desc->list));
> 
> 	list_del_init(&desc->list);
> 	if (idle_func == desc->idle) 
> 		pick_idle_func();
> }
>
Peter Zijlstra Aug. 28, 2009, 6:48 a.m. UTC | #7
On Fri, 2009-08-28 at 11:44 +0530, Arun R Bharadwaj wrote:
> * Peter Zijlstra <a.p.zijlstra@chello.nl> [2009-08-27 14:53:27]:
> 
> Hi Peter, Ben,
> 
> I've put the whole thing in a sort of a block diagram. Hope it
> explains things more clearly.
> 
> 
> 
> 
> 
> 
> 				----------------
> 				|    CPUIDLE   |  (Select idle states like
> 				|   GOVERNORS  |   C1, C1e, C6 etc in case
> 				| (Menu/Ladder)|   x86 & nap, snooze in
> 				|	       |   case of POWER - based on
> 				----------------   latency & power req)
> 					^
> 					|
> 					|
> 					|
> 					|
> 					|
>   ----------			-----------------	         -------------
>   |	   |			|		|	         |  PSERIES  |
>   |  ACPI  |------------------>	|    CPUIDLE	| <--------------|   IDLE    |
>   |	   |			|		|	         |           |
>   ----------			-----------------	         -------------
> 
> Main idle routine- pm_idle()			         Main idle routine-
> 							     ppc_md.power_save()
> 
> pm_idle = cpuidle_pm_idle;			         ppc_md.power_save =
> (start using cpuidle's idle				      cpuidle_pm_idle();
>  loop, which internally calls
>  governor to select the right
>  state to go into).
> 
> 
> Relavent code snippet from drivers/cpuidle/cpuidle.c
> -------------------------------------
> 
> static void cpuidle_idle_call(void)
> {
> 	............
> 	............
> 
> 	/* Call the menu_select() to select the idle state to enter. */
> 	next_state = cpuidle_curr_governor->select(dev);
> 
> 	............
> 	............
> 
> 	/*
> 	 * Enter the idle state previously selected. target_state->enter
> 	 * would call pseries_cpuidle_loop() which selects nap/snooze
> 	 * /
> 	dev->last_residency = target_state->enter(dev, target_state);
> }
> 
> void cpuidle_install_idle_handler(void)
> {
> 	.........
> 	.........
> 	cpuidle_pm_idle = cpuidle_idle_call;
> }

All I'm seeing here is a frigging mess.

How on earths can something called: cpuidle_install_idle_handler() have
a void argument, _WHAT_ handler is it going to install?

So somehow you added to the ACPI mess by now having 3 wild function
pointers, that's _NOT_ progress.
Balbir Singh Aug. 28, 2009, 6:59 a.m. UTC | #8
* Peter Zijlstra <a.p.zijlstra@chello.nl> [2009-08-28 08:48:05]:

> On Fri, 2009-08-28 at 11:44 +0530, Arun R Bharadwaj wrote:
> > * Peter Zijlstra <a.p.zijlstra@chello.nl> [2009-08-27 14:53:27]:
> > 
> > Hi Peter, Ben,
> > 
> > I've put the whole thing in a sort of a block diagram. Hope it
> > explains things more clearly.
> > 
> > 
> > 
> > 
> > 
> > 
> > 				----------------
> > 				|    CPUIDLE   |  (Select idle states like
> > 				|   GOVERNORS  |   C1, C1e, C6 etc in case
> > 				| (Menu/Ladder)|   x86 & nap, snooze in
> > 				|	       |   case of POWER - based on
> > 				----------------   latency & power req)
> > 					^
> > 					|
> > 					|
> > 					|
> > 					|
> > 					|
> >   ----------			-----------------	         -------------
> >   |	   |			|		|	         |  PSERIES  |
> >   |  ACPI  |------------------>	|    CPUIDLE	| <--------------|   IDLE    |
> >   |	   |			|		|	         |           |
> >   ----------			-----------------	         -------------
> > 
> > Main idle routine- pm_idle()			         Main idle routine-
> > 							     ppc_md.power_save()
> > 
> > pm_idle = cpuidle_pm_idle;			         ppc_md.power_save =
> > (start using cpuidle's idle				      cpuidle_pm_idle();
> >  loop, which internally calls
> >  governor to select the right
> >  state to go into).
> > 
> > 
> > Relavent code snippet from drivers/cpuidle/cpuidle.c
> > -------------------------------------
> > 
> > static void cpuidle_idle_call(void)
> > {
> > 	............
> > 	............
> > 
> > 	/* Call the menu_select() to select the idle state to enter. */
> > 	next_state = cpuidle_curr_governor->select(dev);
> > 
> > 	............
> > 	............
> > 
> > 	/*
> > 	 * Enter the idle state previously selected. target_state->enter
> > 	 * would call pseries_cpuidle_loop() which selects nap/snooze
> > 	 * /
> > 	dev->last_residency = target_state->enter(dev, target_state);
> > }
> > 
> > void cpuidle_install_idle_handler(void)
> > {
> > 	.........
> > 	.........
> > 	cpuidle_pm_idle = cpuidle_idle_call;
> > }
> 
> All I'm seeing here is a frigging mess.
> 
> How on earths can something called: cpuidle_install_idle_handler() have
> a void argument, _WHAT_ handler is it going to install?
>

Peter, I think that is a typo, we need a function pointer like your
snippet of code showed
 
> So somehow you added to the ACPI mess by now having 3 wild function
> pointers, that's _NOT_ progress.
>

The goal, IIUC is to integrate three modules

1. CPUIdle - for idle CPU management
2. Architecture specific code for idle detection
3. CPUIdle governor

Again, if IIUC

The architecture specific idle code would call an idle loop that would
call into the CPUIdle framework, which inturn would depend on the
governor selected.

Passing void* is a mess, we need function pointer registration
and a framework so that we can query what is registered.
Peter Zijlstra Aug. 28, 2009, 7:01 a.m. UTC | #9
On Fri, 2009-08-28 at 08:48 +0200, Peter Zijlstra wrote:
> 
> > void cpuidle_install_idle_handler(void)
> > {
> >       .........
> >       .........
> >       cpuidle_pm_idle = cpuidle_idle_call;
> > }
> 
> All I'm seeing here is a frigging mess.
> 
> How on earths can something called: cpuidle_install_idle_handler() have
> a void argument, _WHAT_ handler is it going to install?

Argh, now I see, it installs itself as the platform idle handler.

so cpuidle_install_idle_handler() pokes at the unmanaged pm_idle pointer
to make cpuidle take control.

On module load it does:

 pm_idle_old = pm_idle;

then in the actual idle loop it does:

        if (!dev || !dev->enabled) {
                if (pm_idle_old)
                        pm_idle_old();

who is to say that the pointer stored at module init time is still
around at that time?

So cpuidle recognised the pm_idle stuff was a flaky, but instead of
fixing it, they build a whole new layer on top of it. Brilliant.

/me goes mark this whole thread read, I've got enough things to do.
Vaidyanathan Srinivasan Aug. 28, 2009, 8:19 a.m. UTC | #10
* Peter Zijlstra <peterz@infradead.org> [2009-08-28 09:01:12]:

> On Fri, 2009-08-28 at 08:48 +0200, Peter Zijlstra wrote:
> > 
> > > void cpuidle_install_idle_handler(void)
> > > {
> > >       .........
> > >       .........
> > >       cpuidle_pm_idle = cpuidle_idle_call;
> > > }
> > 
> > All I'm seeing here is a frigging mess.
> > 
> > How on earths can something called: cpuidle_install_idle_handler() have
> > a void argument, _WHAT_ handler is it going to install?
> 
> Argh, now I see, it installs itself as the platform idle handler.
> 
> so cpuidle_install_idle_handler() pokes at the unmanaged pm_idle pointer
> to make cpuidle take control.
> 
> On module load it does:
> 
>  pm_idle_old = pm_idle;
> 
> then in the actual idle loop it does:
> 
>         if (!dev || !dev->enabled) {
>                 if (pm_idle_old)
>                         pm_idle_old();
> 
> who is to say that the pointer stored at module init time is still
> around at that time?
> 
> So cpuidle recognised the pm_idle stuff was a flaky, but instead of
> fixing it, they build a whole new layer on top of it. Brilliant.
> 
> /me goes mark this whole thread read, I've got enough things to do.

Hi Peter,

I understand that you are frustrated with the mess.  We are willing
to clean up the pm_idle pointer at the moment before the cpuidle
framework is exploited my more archs.

At this moment we need your suggestions on what interface should we
call 'clean' and safe.

cpuidle.c and the arch independent cpuidle subsystem is not a module
and its cpuidle_idle_call() routine is valid and can be safely called
from arch dependent process.c

The fragile part is how cpuidle_idle_call() is hooked onto arch
specific cpu_idle() function at runtime.  x86 has the pm_idle pointer
exported while powerpc has ppc_md.power_save pointer being called.

At cpuidle init time we can override the platform idle function, but
that will mean we are including arch specific code in cpuidle.c

Do you think having an exported function in cpuidle.c to give us the
correct pointer to arch code be better than the current situation:

in drivers/cpuidle/cpuidle.c

void (*get_cpuidle_handler(void))(void)
{
        return cpuidle_pm_idle;
}
EXPORT_SYMBOL(get_cpuidle_handler);

and from pseries/processor_idle.c,

ppc_md.power_save = get_cpuidle_handler();

--Vaidy
diff mbox

Patch

Index: linux.trees.git/drivers/cpuidle/cpuidle.c
===================================================================
--- linux.trees.git.orig/drivers/cpuidle/cpuidle.c
+++ linux.trees.git/drivers/cpuidle/cpuidle.c
@@ -25,6 +25,7 @@  DEFINE_PER_CPU(struct cpuidle_device *, 
 DEFINE_MUTEX(cpuidle_lock);
 LIST_HEAD(cpuidle_detected_devices);
 static void (*pm_idle_old)(void);
+void (*cpuidle_pm_idle)(void);
 
 static int enabled_devices;
 
@@ -98,10 +99,10 @@  static void cpuidle_idle_call(void)
  */
 void cpuidle_install_idle_handler(void)
 {
-	if (enabled_devices && (pm_idle != cpuidle_idle_call)) {
+	if (enabled_devices && (cpuidle_pm_idle != cpuidle_idle_call)) {
 		/* Make sure all changes finished before we switch to new idle */
 		smp_wmb();
-		pm_idle = cpuidle_idle_call;
+		cpuidle_pm_idle = cpuidle_idle_call;
 	}
 }
 
@@ -110,8 +111,9 @@  void cpuidle_install_idle_handler(void)
  */
 void cpuidle_uninstall_idle_handler(void)
 {
-	if (enabled_devices && pm_idle_old && (pm_idle != pm_idle_old)) {
-		pm_idle = pm_idle_old;
+	if (enabled_devices && pm_idle_old &&
+			(cpuidle_pm_idle != pm_idle_old)) {
+		cpuidle_pm_idle = pm_idle_old;
 		cpuidle_kick_cpus();
 	}
 }
@@ -382,7 +384,7 @@  static int __init cpuidle_init(void)
 {
 	int ret;
 
-	pm_idle_old = pm_idle;
+	pm_idle_old = cpuidle_pm_idle;
 
 	ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
 	if (ret)
Index: linux.trees.git/include/linux/cpuidle.h
===================================================================
--- linux.trees.git.orig/include/linux/cpuidle.h
+++ linux.trees.git/include/linux/cpuidle.h
@@ -188,4 +188,11 @@  static inline void cpuidle_unregister_go
 #define CPUIDLE_DRIVER_STATE_START	0
 #endif
 
+/*
+ * Idle callback used by cpuidle to call the cpuidle_idle_call().
+ * Platform drivers can use this to register to cpuidle's idle loop.
+ */
+
+extern void (*cpuidle_pm_idle)(void);
+
 #endif /* _LINUX_CPUIDLE_H */