[V7,2/3] occ: Fix Pstate ordering for P9

Submitted by Shilpasri G Bhat on Feb. 13, 2017, 8:31 p.m.

Details

Message ID 1487017902-14345-3-git-send-email-shilpa.bhat@linux.vnet.ibm.com
State Superseded
Headers show

Commit Message

Shilpasri G Bhat Feb. 13, 2017, 8:31 p.m.
In P9 the pstate values are positive. They are continuous set of
unsigned integers [0 to +N] where Pmax is 0 and Pmin is N. The
linear ordering of pstates for P9 has changed compared to P8.
P8 has neagtive pstate values advertised as [0 to -N] where Pmax
is 0 and Pmin is -N. This patch adds helper routines to abstract
pstate comparison with pmax and adds sanity pstate limit checks.
This patch also fixes pstate arithmetic by using labs().

Suggested-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
---
No changes from V6
 hw/occ.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 88 insertions(+), 4 deletions(-)

Comments

Michael Neuling May 22, 2017, 1:13 a.m.
On Tue, 2017-02-14 at 02:01 +0530, Shilpasri G Bhat wrote:
> In P9 the pstate values are positive. They are continuous set of
> unsigned integers [0 to +N] where Pmax is 0 and Pmin is N. The
> linear ordering of pstates for P9 has changed compared to P8.
> P8 has neagtive pstate values advertised as [0 to -N] where Pmax
> is 0 and Pmin is -N. This patch adds helper routines to abstract
> pstate comparison with pmax and adds sanity pstate limit checks.
> This patch also fixes pstate arithmetic by using labs().

Can you put this comment inline in the code?  There is no mention of Pmax Pmin
where cmp_postive/negative_pstates() is making it difficult to understand what
it's actually doing.

Alternatively, why don't we convert what we get on P8 to positive numbers, so
that all the pstates numbers are positive.  Then we don't need this clumsy
negative/positive code.

Mikey


> Suggested-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> Signed-off-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
> ---
> No changes from V6
>  hw/occ.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> ---
>  1 file changed, 88 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/occ.c b/hw/occ.c
> index ee76be6..9eb32b4 100644
> --- a/hw/occ.c
> +++ b/hw/occ.c
> @@ -84,6 +84,40 @@ DEFINE_LOG_ENTRY(OPAL_RC_OCC_TIMEOUT,
> OPAL_PLATFORM_ERR_EVT, OPAL_OCC,
>  		OPAL_CEC_HARDWARE, OPAL_UNRECOVERABLE_ERR_GENERAL,
>  		OPAL_NA);
>  
> +/**
> + * cmp_pstates: Compares the given two pstates and determines which
> + *              among them is associated with a higher pstate.
> + *
> + * @a,@b: The pstate ids of the pstates being compared.
> + *
> + * Returns: -1 : If pstate associated with @a is smaller than
> + *               the pstate associated with @b.
> + *	     0 : If pstates associated with @a and @b are equal.
> + *	     1 : If pstate associated with @a is greater than
> + *               the pstate associated with @b.
> + */
> +static int (*cmp_pstates)(int a, int b);
> +
> +static int cmp_positive_pstates(int a, int b)
> +{
> +	if (a > b)
> +		return -1;
> +	else if (a < b)
> +		return 1;
> +
> +	return 0;
> +}
> +
> +static int cmp_negative_pstates(int a, int b)
> +{
> +	if (a < b)
> +		return -1;
> +	else if (a > b)
> +		return 1;
> +
> +	return 0;
> +}
> +

Can these just return a bool?  The callers only care about > 0.

>  /* Check each chip's HOMER/Sapphire area for PState valid bit */
>  static bool wait_for_all_occ_init(void)
>  {
> @@ -214,15 +248,38 @@ static bool add_cpu_pstate_properties(s8 *pstate_nom)
>  		return false;
>  	}
>  
> +	/*
> +	 * Workload-Optimized-Frequency(WOF) or Ultra-Turbo is supported
> +	 * from version 2 onwards. If WOF is disabled then, the max
> +	 * ultra_turbo pstate will be equal to max turbo pstate.
> +	 */
>  	if (occ_data->version > 1 &&
> -	    occ_data->pstate_ultra_turbo > occ_data->pstate_turbo)
> +	    cmp_pstates(occ_data->pstate_ultra_turbo,
> +			occ_data->pstate_turbo) > 0)
>  		ultra_turbo_en = true;
>  	else
>  		ultra_turbo_en = false;
>  
>  	pmax = ultra_turbo_en ? occ_data->pstate_ultra_turbo :
>  				occ_data->pstate_turbo;
> -	nr_pstates = pmax - occ_data->pstate_min + 1;
> +
> +	/* Sanity check for pstate limits */
> +	if (cmp_pstates(occ_data->pstate_min, pmax) > 0) {
> +		/**
> +		 * @fwts-label OCCInvalidPStateLimits
> +		 * @fwts-advice The min pstate is greater than the
> +		 * max pstate, this could be due to corrupted/invalid
> +		 * data in OCC-OPAL shared memory region. So OPAL has
> +		 * not added pstates to device tree. This means that
> +		 * CPU Frequency management will not be functional in
> +		 * the host.
> +		 */
> +		prlog(PR_ERR, "OCC: Invalid Pstate Limits. Pmin(%d) > Pmax
> (%d)\n",
> +		      occ_data->pstate_min, pmax);
> +		return false;
> +	}
> +
> +	nr_pstates = labs(pmax - occ_data->pstate_min) + 1;
>  	prlog(PR_DEBUG, "OCC: Min %d Nom %d Max %d Nr States %d\n", 
>  	      occ_data->pstate_min, occ_data->pstate_nom,
>  	      pmax, nr_pstates);
> @@ -319,15 +376,31 @@ static bool add_cpu_pstate_properties(s8 *pstate_nom)
>  			dt_core_max[i] = occ_data->core_max[i];
>  	}
>  
> +	/*
> +	 * OCC provides pstate table entries in continuous descending order.
> +	 * Parse the pstate table to skip pstate_ids that are greater
> +	 * than Pmax. If a pstate_id is equal to Pmin then add it to
> +	 * the list and break from the loop as this is the last valid
> +	 * element in the pstate table.
> +	 */
>  	for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
> -		if (occ_data->pstates[i].id > pmax ||
> -		    occ_data->pstates[i].id < occ_data->pstate_min)
> +		if (cmp_pstates(occ_data->pstates[i].id, pmax) > 0)
>  			continue;
> +
>  		dt_id[j] = occ_data->pstates[i].id;
>  		dt_freq[j] = occ_data->pstates[i].freq_khz / 1000;
>  		dt_vdd[j] = occ_data->pstates[i].vdd;
>  		dt_vcs[j] = occ_data->pstates[i].vcs;
>  		j++;
> +
> +		if (occ_data->pstates[i].id == occ_data->pstate_min)
> +			break;
> +	}
> +
> +	if (j != nr_pstates) {
> +		prerror("OCC: Expected pstates(%d) is not equal to parsed
> pstates(%d)\n",
> +			nr_pstates, j);
> +		goto out_free_vcs;
>  	}
>  
>  	/* Add the device-tree entries */
> @@ -533,6 +606,17 @@ void occ_pstates_init(void)
>  	if (occ_pstates_initialized)
>  		return;
>  
> +	switch (proc_gen) {
> +	case proc_gen_p8:
> +		cmp_pstates = cmp_negative_pstates;
> +		break;
> +	case proc_gen_p9:
> +		cmp_pstates = cmp_positive_pstates;
> +		break;
> +	default:
> +		return;
> +	}
> +
>  	chip = next_chip(NULL);
>  	if (!chip->homer_base) {
>  		log_simple_error(&e_info(OPAL_RC_OCC_PSTATE_INIT),
Vaidyanathan Srinivasan May 26, 2017, 7:08 a.m.
* Michael Neuling <mikey@neuling.org> [2017-05-22 11:13:38]:

> On Tue, 2017-02-14 at 02:01 +0530, Shilpasri G Bhat wrote:
> > In P9 the pstate values are positive. They are continuous set of
> > unsigned integers [0 to +N] where Pmax is 0 and Pmin is N. The
> > linear ordering of pstates for P9 has changed compared to P8.
> > P8 has neagtive pstate values advertised as [0 to -N] where Pmax
> > is 0 and Pmin is -N. This patch adds helper routines to abstract
> > pstate comparison with pmax and adds sanity pstate limit checks.
> > This patch also fixes pstate arithmetic by using labs().
> 
> Can you put this comment inline in the code?  There is no mention of Pmax Pmin
> where cmp_postive/negative_pstates() is making it difficult to understand what
> it's actually doing.

Yes, we can explain this difference in specification inline in code.
Will update in a repost.

> 
> Alternatively, why don't we convert what we get on P8 to positive numbers, so
> that all the pstates numbers are positive.  Then we don't need this clumsy
> negative/positive code.

The firmware interface specification allows POWER8 IDs to be a signed
number and practically we can have negative PState IDs that are higher
frequency and positive PState IDs that are lower frequencies.

Treating them as unsigned make validation of the range and boundaries
provided by OCC difficult between POWER8 and POWER9.

The good part is that the current firmware interface for POWER9 and
future platforms treat the PState IDs as positive and hence future
platforms and implementation are clean.

--Vaidy
Vaidyanathan Srinivasan May 26, 2017, 7:29 a.m.
* Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> [2017-05-26 12:38:52]:

> * Michael Neuling <mikey@neuling.org> [2017-05-22 11:13:38]:
> 
> > On Tue, 2017-02-14 at 02:01 +0530, Shilpasri G Bhat wrote:
> > > In P9 the pstate values are positive. They are continuous set of
> > > unsigned integers [0 to +N] where Pmax is 0 and Pmin is N. The
> > > linear ordering of pstates for P9 has changed compared to P8.
> > > P8 has neagtive pstate values advertised as [0 to -N] where Pmax
> > > is 0 and Pmin is -N. This patch adds helper routines to abstract
> > > pstate comparison with pmax and adds sanity pstate limit checks.
> > > This patch also fixes pstate arithmetic by using labs().
> > 
> > Can you put this comment inline in the code?  There is no mention of Pmax Pmin
> > where cmp_postive/negative_pstates() is making it difficult to understand what
> > it's actually doing.
> 
> Yes, we can explain this difference in specification inline in code.
> Will update in a repost.
> 
> > 
> > Alternatively, why don't we convert what we get on P8 to positive numbers, so
> > that all the pstates numbers are positive.  Then we don't need this clumsy
> > negative/positive code.
> 
> The firmware interface specification allows POWER8 IDs to be a signed
> number and practically we can have negative PState IDs that are higher
                                                                ^^ lower
> frequency and positive PState IDs that are lower frequencies.
                                             ^^ higher
                                              
> Treating them as unsigned make validation of the range and boundaries
> provided by OCC difficult between POWER8 and POWER9.
> 
> The good part is that the current firmware interface for POWER9 and
> future platforms treat the PState IDs as positive and hence future
> platforms and implementation are clean.
> 
> --Vaidy
> 
> _______________________________________________
> Skiboot mailing list
> Skiboot@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/skiboot
Vaidyanathan Srinivasan May 26, 2017, 8:34 a.m.
* Michael Neuling <mikey@neuling.org> [2017-05-22 11:13:38]:

> On Tue, 2017-02-14 at 02:01 +0530, Shilpasri G Bhat wrote:

[snip]

> > + * cmp_pstates: Compares the given two pstates and determines which
> > + *              among them is associated with a higher pstate.
> > + *
> > + * @a,@b: The pstate ids of the pstates being compared.
> > + *
> > + * Returns: -1 : If pstate associated with @a is smaller than
> > + *               the pstate associated with @b.
> > + *	     0 : If pstates associated with @a and @b are equal.
> > + *	     1 : If pstate associated with @a is greater than
> > + *               the pstate associated with @b.
> > + */
> > +static int (*cmp_pstates)(int a, int b);
> > +
> > +static int cmp_positive_pstates(int a, int b)
> > +{
> > +	if (a > b)
> > +		return -1;
> > +	else if (a < b)
> > +		return 1;
> > +
> > +	return 0;
> > +}
> > +
> > +static int cmp_negative_pstates(int a, int b)
> > +{
> > +	if (a < b)
> > +		return -1;
> > +	else if (a > b)
> > +		return 1;
> > +
> > +	return 0;
> > +}
> > +
> 
> Can these just return a bool?  The callers only care about > 0.

I tried converting to bool, but the validation checks become
inaccurate.  We really need to skip the equal case and check for
greater or less.

I tried to change like this:

/**
 * cmp_pstates: Compares the given two pstates and determines which
 *              among them is associated with a higher pstate.
 *
 * @a,@b: The pstate ids of the pstates being compared.
 *
 * Returns: false: If pstate associated with @a is smaller than
 *                 or equal to the pstate associated with @b.
 *	    true: If pstate associated with @a is greater than
 *               the pstate associated with @b.
 */

Merging the equal condition with true or false is not very obvious to
the caller.  So I decided to keep the current " < 0", " = 0", "> 0"
condition similar to other string compare routines.

--Vaidy

Patch hide | download patch | download mbox

diff --git a/hw/occ.c b/hw/occ.c
index ee76be6..9eb32b4 100644
--- a/hw/occ.c
+++ b/hw/occ.c
@@ -84,6 +84,40 @@  DEFINE_LOG_ENTRY(OPAL_RC_OCC_TIMEOUT, OPAL_PLATFORM_ERR_EVT, OPAL_OCC,
 		OPAL_CEC_HARDWARE, OPAL_UNRECOVERABLE_ERR_GENERAL,
 		OPAL_NA);
 
+/**
+ * cmp_pstates: Compares the given two pstates and determines which
+ *              among them is associated with a higher pstate.
+ *
+ * @a,@b: The pstate ids of the pstates being compared.
+ *
+ * Returns: -1 : If pstate associated with @a is smaller than
+ *               the pstate associated with @b.
+ *	     0 : If pstates associated with @a and @b are equal.
+ *	     1 : If pstate associated with @a is greater than
+ *               the pstate associated with @b.
+ */
+static int (*cmp_pstates)(int a, int b);
+
+static int cmp_positive_pstates(int a, int b)
+{
+	if (a > b)
+		return -1;
+	else if (a < b)
+		return 1;
+
+	return 0;
+}
+
+static int cmp_negative_pstates(int a, int b)
+{
+	if (a < b)
+		return -1;
+	else if (a > b)
+		return 1;
+
+	return 0;
+}
+
 /* Check each chip's HOMER/Sapphire area for PState valid bit */
 static bool wait_for_all_occ_init(void)
 {
@@ -214,15 +248,38 @@  static bool add_cpu_pstate_properties(s8 *pstate_nom)
 		return false;
 	}
 
+	/*
+	 * Workload-Optimized-Frequency(WOF) or Ultra-Turbo is supported
+	 * from version 2 onwards. If WOF is disabled then, the max
+	 * ultra_turbo pstate will be equal to max turbo pstate.
+	 */
 	if (occ_data->version > 1 &&
-	    occ_data->pstate_ultra_turbo > occ_data->pstate_turbo)
+	    cmp_pstates(occ_data->pstate_ultra_turbo,
+			occ_data->pstate_turbo) > 0)
 		ultra_turbo_en = true;
 	else
 		ultra_turbo_en = false;
 
 	pmax = ultra_turbo_en ? occ_data->pstate_ultra_turbo :
 				occ_data->pstate_turbo;
-	nr_pstates = pmax - occ_data->pstate_min + 1;
+
+	/* Sanity check for pstate limits */
+	if (cmp_pstates(occ_data->pstate_min, pmax) > 0) {
+		/**
+		 * @fwts-label OCCInvalidPStateLimits
+		 * @fwts-advice The min pstate is greater than the
+		 * max pstate, this could be due to corrupted/invalid
+		 * data in OCC-OPAL shared memory region. So OPAL has
+		 * not added pstates to device tree. This means that
+		 * CPU Frequency management will not be functional in
+		 * the host.
+		 */
+		prlog(PR_ERR, "OCC: Invalid Pstate Limits. Pmin(%d) > Pmax (%d)\n",
+		      occ_data->pstate_min, pmax);
+		return false;
+	}
+
+	nr_pstates = labs(pmax - occ_data->pstate_min) + 1;
 	prlog(PR_DEBUG, "OCC: Min %d Nom %d Max %d Nr States %d\n", 
 	      occ_data->pstate_min, occ_data->pstate_nom,
 	      pmax, nr_pstates);
@@ -319,15 +376,31 @@  static bool add_cpu_pstate_properties(s8 *pstate_nom)
 			dt_core_max[i] = occ_data->core_max[i];
 	}
 
+	/*
+	 * OCC provides pstate table entries in continuous descending order.
+	 * Parse the pstate table to skip pstate_ids that are greater
+	 * than Pmax. If a pstate_id is equal to Pmin then add it to
+	 * the list and break from the loop as this is the last valid
+	 * element in the pstate table.
+	 */
 	for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
-		if (occ_data->pstates[i].id > pmax ||
-		    occ_data->pstates[i].id < occ_data->pstate_min)
+		if (cmp_pstates(occ_data->pstates[i].id, pmax) > 0)
 			continue;
+
 		dt_id[j] = occ_data->pstates[i].id;
 		dt_freq[j] = occ_data->pstates[i].freq_khz / 1000;
 		dt_vdd[j] = occ_data->pstates[i].vdd;
 		dt_vcs[j] = occ_data->pstates[i].vcs;
 		j++;
+
+		if (occ_data->pstates[i].id == occ_data->pstate_min)
+			break;
+	}
+
+	if (j != nr_pstates) {
+		prerror("OCC: Expected pstates(%d) is not equal to parsed pstates(%d)\n",
+			nr_pstates, j);
+		goto out_free_vcs;
 	}
 
 	/* Add the device-tree entries */
@@ -533,6 +606,17 @@  void occ_pstates_init(void)
 	if (occ_pstates_initialized)
 		return;
 
+	switch (proc_gen) {
+	case proc_gen_p8:
+		cmp_pstates = cmp_negative_pstates;
+		break;
+	case proc_gen_p9:
+		cmp_pstates = cmp_positive_pstates;
+		break;
+	default:
+		return;
+	}
+
 	chip = next_chip(NULL);
 	if (!chip->homer_base) {
 		log_simple_error(&e_info(OPAL_RC_OCC_PSTATE_INIT),