diff mbox

[v4,2/4] cpuidle:powernv: Add helper function to populate powernv idle states.

Message ID 36f9cd2d944772d8e414a8240f9ec36eaec65ebd.1481288905.git.ego@linux.vnet.ibm.com (mailing list archive)
State Superseded
Headers show

Commit Message

Gautham R Shenoy Dec. 9, 2016, 1:32 p.m. UTC
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>

In the current code for powernv_add_idle_states, there is a lot of code
duplication while initializing an idle state in powernv_states table.

Add an inline helper function to populate the powernv_states[] table for
a given idle state. Invoke this for populating the "Nap", "Fastsleep"
and the stop states in powernv_add_idle_states.

Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
---
 drivers/cpuidle/cpuidle-powernv.c | 85 ++++++++++++++++++++++-----------------
 include/linux/cpuidle.h           |  1 +
 2 files changed, 50 insertions(+), 36 deletions(-)

Comments

Balbir Singh Dec. 13, 2016, 11:51 a.m. UTC | #1
On 10/12/16 00:32, Gautham R. Shenoy wrote:
> From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
> 
> In the current code for powernv_add_idle_states, there is a lot of code
> duplication while initializing an idle state in powernv_states table.
> 
> Add an inline helper function to populate the powernv_states[] table for
> a given idle state. Invoke this for populating the "Nap", "Fastsleep"
> and the stop states in powernv_add_idle_states.
> 
> Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> ---
>  drivers/cpuidle/cpuidle-powernv.c | 85 ++++++++++++++++++++++-----------------
>  include/linux/cpuidle.h           |  1 +
>  2 files changed, 50 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
> index 7fe442c..db18af1 100644
> --- a/drivers/cpuidle/cpuidle-powernv.c
> +++ b/drivers/cpuidle/cpuidle-powernv.c
> @@ -167,6 +167,24 @@ static int powernv_cpuidle_driver_init(void)
>  	return 0;
>  }
>  
> +static inline void add_powernv_state(int index, const char *name,
> +				     unsigned int flags,
> +				     int (*idle_fn)(struct cpuidle_device *,
> +						    struct cpuidle_driver *,
> +						    int),
> +				     unsigned int target_residency,
> +				     unsigned int exit_latency,
> +				     u64 psscr_val)
> +{
> +	strlcpy(powernv_states[index].name, name, CPUIDLE_NAME_LEN);
> +	strlcpy(powernv_states[index].desc, name, CPUIDLE_NAME_LEN);

Do name and desc ever diverge?

> +	powernv_states[index].flags = flags;
> +	powernv_states[index].target_residency = target_residency;
> +	powernv_states[index].exit_latency = exit_latency;
> +	powernv_states[index].enter = idle_fn;

Why not call it idle_fn instead of enter?

> +	stop_psscr_table[index] = psscr_val;
> +}
> +
>  static int powernv_add_idle_states(void)
>  {
>  	struct device_node *power_mgt;
> @@ -236,6 +254,7 @@ static int powernv_add_idle_states(void)
>  		"ibm,cpu-idle-state-residency-ns", residency_ns, dt_idle_states);
>  
>  	for (i = 0; i < dt_idle_states; i++) {
> +		unsigned int exit_latency, target_residency;
>  		/*
>  		 * If an idle state has exit latency beyond
>  		 * POWERNV_THRESHOLD_LATENCY_NS then don't use it
> @@ -243,28 +262,33 @@ static int powernv_add_idle_states(void)
>  		 */
>  		if (latency_ns[i] > POWERNV_THRESHOLD_LATENCY_NS)

Ideally this should be called POWERNV_MAX_THRESHOLD_LATENCY_NS then

>  			continue;
> +		/*
> +		 * Firmware passes residency and latency values in ns.
> +		 * cpuidle expects it in us.
> +		 */
> +		exit_latency = ((unsigned int)latency_ns[i]) / 1000;
> +		if (!rc)
> +			target_residency = residency_ns[i] / 1000;
> +		else
> +			target_residency = 0;

Where do we get rc from? what does target_residency = 0 mean?
Balbir Singh
Gautham R Shenoy Dec. 14, 2016, 7:14 a.m. UTC | #2
Hi Balbir,

On Tue, Dec 13, 2016 at 10:51:04PM +1100, Balbir Singh wrote:
> 
> 
> On 10/12/16 00:32, Gautham R. Shenoy wrote:
> > From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
> > 
> > In the current code for powernv_add_idle_states, there is a lot of code
> > duplication while initializing an idle state in powernv_states table.
> > 
> > Add an inline helper function to populate the powernv_states[] table for
> > a given idle state. Invoke this for populating the "Nap", "Fastsleep"
> > and the stop states in powernv_add_idle_states.
> > 
> > Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> > ---
> >  drivers/cpuidle/cpuidle-powernv.c | 85 ++++++++++++++++++++++-----------------
> >  include/linux/cpuidle.h           |  1 +
> >  2 files changed, 50 insertions(+), 36 deletions(-)
> > 
> > diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
> > index 7fe442c..db18af1 100644
> > --- a/drivers/cpuidle/cpuidle-powernv.c
> > +++ b/drivers/cpuidle/cpuidle-powernv.c
> > @@ -167,6 +167,24 @@ static int powernv_cpuidle_driver_init(void)
> >  	return 0;
> >  }
> >  
> > +static inline void add_powernv_state(int index, const char *name,
> > +				     unsigned int flags,
> > +				     int (*idle_fn)(struct cpuidle_device *,
> > +						    struct cpuidle_driver *,
> > +						    int),
> > +				     unsigned int target_residency,
> > +				     unsigned int exit_latency,
> > +				     u64 psscr_val)
> > +{
> > +	strlcpy(powernv_states[index].name, name, CPUIDLE_NAME_LEN);
> > +	strlcpy(powernv_states[index].desc, name, CPUIDLE_NAME_LEN);
> 
> Do name and desc ever diverge?

On some other architectures, like kirkwood (see
drivers/cpuidle/cpuidle-kirkwood.c) they do. "desc" field is used to
provide a more descriptive information regarding the idle state.

On POWER, the names were self-explanatory. So, we have desc same as
the name.

> 
> > +	powernv_states[index].flags = flags;
> > +	powernv_states[index].target_residency = target_residency;
> > +	powernv_states[index].exit_latency = exit_latency;
> > +	powernv_states[index].enter = idle_fn;
> 
> Why not call it idle_fn instead of enter?

"enter" is a field name in the generic cpuidle_state structure and
powernv_states[] is an instance of that structure.

> 
> > +	stop_psscr_table[index] = psscr_val;
> > +}
> > +
> >  static int powernv_add_idle_states(void)
> >  {
> >  	struct device_node *power_mgt;
> > @@ -236,6 +254,7 @@ static int powernv_add_idle_states(void)
> >  		"ibm,cpu-idle-state-residency-ns", residency_ns, dt_idle_states);
> >  
> >  	for (i = 0; i < dt_idle_states; i++) {
> > +		unsigned int exit_latency, target_residency;
> >  		/*
> >  		 * If an idle state has exit latency beyond
> >  		 * POWERNV_THRESHOLD_LATENCY_NS then don't use it
> > @@ -243,28 +262,33 @@ static int powernv_add_idle_states(void)
> >  		 */
> >  		if (latency_ns[i] > POWERNV_THRESHOLD_LATENCY_NS)
> 
> Ideally this should be called POWERNV_MAX_THRESHOLD_LATENCY_NS then

Yes, it can be called that. But then again, we're only interested in
the upper threshold in this code. I will add a comment near the macro
definition.

> 
> >  			continue;
> > +		/*
> > +		 * Firmware passes residency and latency values in ns.
> > +		 * cpuidle expects it in us.
> > +		 */
> > +		exit_latency = ((unsigned int)latency_ns[i]) / 1000;
> > +		if (!rc)
> > +			target_residency = residency_ns[i] / 1000;
> > +		else
> > +			target_residency = 0;
> 
> Where do we get rc from? what does target_residency = 0 mean?

The rc value comes from the
of_property_read_u32_array(power_mgt,
		"ibm,cpu-idle-state-residency-ns", residency_ns,
		dt_idle_states);

just before the for-loop. This tells us whether the firmware has
populated the residency information for the idle state or not.

rc != 0 indicates that the firmware has not populated the value.

Since the governor will pick the first idle state whose
target_residency matches the predicted residency, setting
target_residency = 0 implies that if any stop state is selected at
all, it is the earliest state.


> Balbir Singh
>
diff mbox

Patch

diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 7fe442c..db18af1 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -167,6 +167,24 @@  static int powernv_cpuidle_driver_init(void)
 	return 0;
 }
 
+static inline void add_powernv_state(int index, const char *name,
+				     unsigned int flags,
+				     int (*idle_fn)(struct cpuidle_device *,
+						    struct cpuidle_driver *,
+						    int),
+				     unsigned int target_residency,
+				     unsigned int exit_latency,
+				     u64 psscr_val)
+{
+	strlcpy(powernv_states[index].name, name, CPUIDLE_NAME_LEN);
+	strlcpy(powernv_states[index].desc, name, CPUIDLE_NAME_LEN);
+	powernv_states[index].flags = flags;
+	powernv_states[index].target_residency = target_residency;
+	powernv_states[index].exit_latency = exit_latency;
+	powernv_states[index].enter = idle_fn;
+	stop_psscr_table[index] = psscr_val;
+}
+
 static int powernv_add_idle_states(void)
 {
 	struct device_node *power_mgt;
@@ -236,6 +254,7 @@  static int powernv_add_idle_states(void)
 		"ibm,cpu-idle-state-residency-ns", residency_ns, dt_idle_states);
 
 	for (i = 0; i < dt_idle_states; i++) {
+		unsigned int exit_latency, target_residency;
 		/*
 		 * If an idle state has exit latency beyond
 		 * POWERNV_THRESHOLD_LATENCY_NS then don't use it
@@ -243,28 +262,33 @@  static int powernv_add_idle_states(void)
 		 */
 		if (latency_ns[i] > POWERNV_THRESHOLD_LATENCY_NS)
 			continue;
+		/*
+		 * Firmware passes residency and latency values in ns.
+		 * cpuidle expects it in us.
+		 */
+		exit_latency = ((unsigned int)latency_ns[i]) / 1000;
+		if (!rc)
+			target_residency = residency_ns[i] / 1000;
+		else
+			target_residency = 0;
 
 		/*
-		 * Cpuidle accepts exit_latency and target_residency in us.
-		 * Use default target_residency values if f/w does not expose it.
+		 * For nap and fastsleep, use default target_residency
+		 * values if f/w does not expose it.
 		 */
 		if (flags[i] & OPAL_PM_NAP_ENABLED) {
+			if (!rc)
+				target_residency = 100;
 			/* Add NAP state */
-			strcpy(powernv_states[nr_idle_states].name, "Nap");
-			strcpy(powernv_states[nr_idle_states].desc, "Nap");
-			powernv_states[nr_idle_states].flags = 0;
-			powernv_states[nr_idle_states].target_residency = 100;
-			powernv_states[nr_idle_states].enter = nap_loop;
+			add_powernv_state(nr_idle_states, "Nap",
+					  CPUIDLE_FLAG_NONE, nap_loop,
+					  target_residency, exit_latency, 0);
 		} else if ((flags[i] & OPAL_PM_STOP_INST_FAST) &&
 				!(flags[i] & OPAL_PM_TIMEBASE_STOP)) {
-			strncpy(powernv_states[nr_idle_states].name,
-				names[i], CPUIDLE_NAME_LEN);
-			strncpy(powernv_states[nr_idle_states].desc,
-				names[i], CPUIDLE_NAME_LEN);
-			powernv_states[nr_idle_states].flags = 0;
-
-			powernv_states[nr_idle_states].enter = stop_loop;
-			stop_psscr_table[nr_idle_states] = psscr_val[i];
+			add_powernv_state(nr_idle_states, names[i],
+					  CPUIDLE_FLAG_NONE, stop_loop,
+					  target_residency, exit_latency,
+					  psscr_val[i]);
 		}
 
 		/*
@@ -274,32 +298,21 @@  static int powernv_add_idle_states(void)
 #ifdef CONFIG_TICK_ONESHOT
 		if (flags[i] & OPAL_PM_SLEEP_ENABLED ||
 			flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) {
+			if (!rc)
+				target_residency = 300000;
 			/* Add FASTSLEEP state */
-			strcpy(powernv_states[nr_idle_states].name, "FastSleep");
-			strcpy(powernv_states[nr_idle_states].desc, "FastSleep");
-			powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIMER_STOP;
-			powernv_states[nr_idle_states].target_residency = 300000;
-			powernv_states[nr_idle_states].enter = fastsleep_loop;
+			add_powernv_state(nr_idle_states, "FastSleep",
+					  CPUIDLE_FLAG_TIMER_STOP,
+					  fastsleep_loop,
+					  target_residency, exit_latency, 0);
 		} else if ((flags[i] & OPAL_PM_STOP_INST_DEEP) &&
 				(flags[i] & OPAL_PM_TIMEBASE_STOP)) {
-			strncpy(powernv_states[nr_idle_states].name,
-				names[i], CPUIDLE_NAME_LEN);
-			strncpy(powernv_states[nr_idle_states].desc,
-				names[i], CPUIDLE_NAME_LEN);
-
-			powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIMER_STOP;
-			powernv_states[nr_idle_states].enter = stop_loop;
-			stop_psscr_table[nr_idle_states] = psscr_val[i];
+			add_powernv_state(nr_idle_states, names[i],
+					  CPUIDLE_FLAG_TIMER_STOP, stop_loop,
+					  target_residency, exit_latency,
+					  psscr_val[i]);
 		}
 #endif
-		powernv_states[nr_idle_states].exit_latency =
-				((unsigned int)latency_ns[i]) / 1000;
-
-		if (!rc) {
-			powernv_states[nr_idle_states].target_residency =
-				((unsigned int)residency_ns[i]) / 1000;
-		}
-
 		nr_idle_states++;
 	}
 out:
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index bb31373..c4e10f8 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -62,6 +62,7 @@  struct cpuidle_state {
 };
 
 /* Idle State Flags */
+#define CPUIDLE_FLAG_NONE       (0x00)
 #define CPUIDLE_FLAG_COUPLED	(0x02) /* state applies to multiple cpus */
 #define CPUIDLE_FLAG_TIMER_STOP (0x04)  /* timer is stopped on this state */