diff mbox series

[v6] power-mgmt : occ : Add 'freq-domain-mask' DT property

Message ID 1535524091-32484-1-git-send-email-huntbag@linux.vnet.ibm.com
State Superseded
Headers show
Series [v6] power-mgmt : occ : Add 'freq-domain-mask' DT property | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success master/apply_patch Successfully applied
snowpatch_ozlabs/make_check success Test make_check on branch master

Commit Message

Abhishek Goel Aug. 29, 2018, 6:28 a.m. UTC
Add a new device-tree property freq-domain-indicator to define group of
CPUs which would share same frequency. This property has been added under
power-mgmt node.It is a bitmask.

Bitwise AND is taken between this bitmask value and PIR of cpu. All the
CPUs lying in the same frequency domain will have same result for AND.

For example, For POWER9, 0xFFF0 indicates quad wide frequency domain.
Taking AND with the PIR of CPUs will yield us frequency domain which is
quad wise distribution as last 4 bits have been masked which represent the
cores.

Similarly, 0xFFF4 will represent core wide frequency domain for P9.

Signed-off-by: Abhishek Goel <huntbag@linux.vnet.ibm.com>
---
 doc/device-tree/ibm,opal/power-mgt/occ.rst | 19 +++++++++++++++++++
 hw/occ.c                                   | 24 ++++++++++++++++++++++++
 2 files changed, 43 insertions(+)

Comments

Oliver O'Halloran Aug. 30, 2018, 7:54 a.m. UTC | #1
On Wed, Aug 29, 2018 at 4:28 PM, Abhishek Goel
<huntbag@linux.vnet.ibm.com> wrote:
> Add a new device-tree property freq-domain-indicator to define group of
> CPUs which would share same frequency. This property has been added under
> power-mgmt node.It is a bitmask.
>
> Bitwise AND is taken between this bitmask value and PIR of cpu. All the
> CPUs lying in the same frequency domain will have same result for AND.
>
> For example, For POWER9, 0xFFF0 indicates quad wide frequency domain.
> Taking AND with the PIR of CPUs will yield us frequency domain which is
> quad wise distribution as last 4 bits have been masked which represent the
> cores.
>
> Similarly, 0xFFF4 will represent core wide frequency domain for P9.
>
> Signed-off-by: Abhishek Goel <huntbag@linux.vnet.ibm.com>
> ---
>  doc/device-tree/ibm,opal/power-mgt/occ.rst | 19 +++++++++++++++++++
>  hw/occ.c                                   | 24 ++++++++++++++++++++++++
>  2 files changed, 43 insertions(+)
>
> diff --git a/doc/device-tree/ibm,opal/power-mgt/occ.rst b/doc/device-tree/ibm,opal/power-mgt/occ.rst
> index d13a62b..6a60443 100644
> --- a/doc/device-tree/ibm,opal/power-mgt/occ.rst
> +++ b/doc/device-tree/ibm,opal/power-mgt/occ.rst
> @@ -37,3 +37,22 @@ ibm,pstate-vcss ibm,pstate-vdds
>  These properties list a voltage-identifier of each of the pstates listed in
>  ibm,pstate-ids for the Vcs and Vdd values used for that pstate in that chip.
>  Each VID is a single byte.
> +
> +ibm,opal/power-mgt/freq-domain-mask
> +--------------------------------------------
> +
> +This property is a bitmask which will have different value depending upon the
> +generation of the processor. Frequency domain would indicate group of CPUs
> +which would share same frequency. Bitwise AND is taken between this bitmask
> +value and PIR of cpu. All the CPUs lying in the same frequency domain will have
> +same result for AND. Thus frequency management can be done based on frequency
> +domain. A frequency domain may be a core or a quad, etc depending upon the
> +generation of the processor.
>
> +For example, for POWER8 0xFFF8 indicates core wide frequency domain. Taking AND
> +with the PIR of CPUs will yield us a frequency domain which is core wide
> +distribution as last 3 bits have been masked which represent the threads.
> +
> +For POWER9, 0xFFF0 indicates quad wide frequency domain. Taking AND with
> +the PIR of CPUs will yield us frequency domain which is quad wise
> +distribution as last 4 bits have been masked which represent the cores.
>
> diff --git a/hw/occ.c b/hw/occ.c
> index a55bf8e..52c9db2 100644
> --- a/hw/occ.c
> +++ b/hw/occ.c
> @@ -48,6 +48,12 @@
>  #define MAX_OPAL_CMD_DATA_LENGTH       4090
>  #define MAX_OCC_RSP_DATA_LENGTH                8698
>
> +#define P8_PIR_CORE_MASK               0xFFF8
> +#define P9_PIR_CORE_MASK               0xFFF4
> +#define P9_PIR_QUAD_MASK               0xFFF0
> +#define FREQ_MAX_IN_DOMAIN             0
> +#define FREQ_MOST_RECENTLY_SET         1
> +
>  /**
>   * OCC-OPAL Shared Memory Region
>   *
> @@ -494,6 +500,16 @@ static bool add_cpu_pstate_properties(int *pstate_nom)
>         u8 nr_pstates;
>         bool ultra_turbo_supported;
>         int i, major, minor;
> +       u8 domain_runs_at;
> +       u32 freq_domain_mask;

> +
> +       /* TODO Firmware plumbing required so as to have two modes to set
> +        * PMCR based on max in domain or most recently used. As of today,
> +        * it is always max in domain.
> +        */

> +       domain_runs_at = FREQ_MAX_IN_DOMAIN;
> +       /* Initialized with P8 core domain mask */
> +       freq_domain_mask = P8_PIR_CORE_MASK;

move this into an else branch of the if below.

>         prlog(PR_DEBUG, "OCC: CPU pstate state device tree init\n");
>
> @@ -666,6 +682,13 @@ static bool add_cpu_pstate_properties(int *pstate_nom)
>                 return false;
>         }
>
> +       if (proc_gen == proc_gen_p9) {
> +               if (domain_runs_at == FREQ_MAX_IN_DOMAIN)
> +                       freq_domain_mask = P9_PIR_CORE_MASK;
> +               else if (domain_runs_at == FREQ_MOST_RECENTLY_SET)
> +                       freq_domain_mask = P9_PIR_QUAD_MASK;
> +       }

I don't understand what's going on here. What is the difference
between FREQ_MAX_IN_DOMAIN and FREQ_MOST_RECENTLY_SET? Why are you
using P9_PIR_CORE_MASK in the MAX_IN_DOMAIN case? As far as I know we
only have frequency control at the quad level, so why does using the
core mask as the domain mask make sense here?

>         /* Add the device-tree entries */
>         dt_add_property(power_mgt, "ibm,pstate-ids", dt_id,
>                         nr_pstates * sizeof(u32));
> @@ -674,6 +697,7 @@ static bool add_cpu_pstate_properties(int *pstate_nom)
>         dt_add_property_cells(power_mgt, "ibm,pstate-min", pmin);
>         dt_add_property_cells(power_mgt, "ibm,pstate-nominal", pnom);
>         dt_add_property_cells(power_mgt, "ibm,pstate-max", pmax);
> +       dt_add_property_cells(power_mgt, "freq-domain-mask", freq_domain_mask);
>
>         free(dt_freq);
>         free(dt_id);
> --
> 1.8.3.1
>
> _______________________________________________
> Skiboot mailing list
> Skiboot@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/skiboot
Abhishek Goel Aug. 30, 2018, 9:37 a.m. UTC | #2
Hi Oliver,

Thanks for the review. I have commented inline below.

On 08/30/2018 01:24 PM, Oliver wrote:
> On Wed, Aug 29, 2018 at 4:28 PM, Abhishek Goel
> <huntbag@linux.vnet.ibm.com> wrote:
>> Add a new device-tree property freq-domain-indicator to define group of
>> CPUs which would share same frequency. This property has been added under
>> power-mgmt node.It is a bitmask.
>>
>> Bitwise AND is taken between this bitmask value and PIR of cpu. All the
>> CPUs lying in the same frequency domain will have same result for AND.
>>
>> For example, For POWER9, 0xFFF0 indicates quad wide frequency domain.
>> Taking AND with the PIR of CPUs will yield us frequency domain which is
>> quad wise distribution as last 4 bits have been masked which represent the
>> cores.
>>
>> Similarly, 0xFFF4 will represent core wide frequency domain for P9.
>>
>> Signed-off-by: Abhishek Goel <huntbag@linux.vnet.ibm.com>
>> ---
>>   doc/device-tree/ibm,opal/power-mgt/occ.rst | 19 +++++++++++++++++++
>>   hw/occ.c                                   | 24 ++++++++++++++++++++++++
>>   2 files changed, 43 insertions(+)
>>
>> diff --git a/doc/device-tree/ibm,opal/power-mgt/occ.rst b/doc/device-tree/ibm,opal/power-mgt/occ.rst
>> index d13a62b..6a60443 100644
>> --- a/doc/device-tree/ibm,opal/power-mgt/occ.rst
>> +++ b/doc/device-tree/ibm,opal/power-mgt/occ.rst
>> @@ -37,3 +37,22 @@ ibm,pstate-vcss ibm,pstate-vdds
>>   These properties list a voltage-identifier of each of the pstates listed in
>>   ibm,pstate-ids for the Vcs and Vdd values used for that pstate in that chip.
>>   Each VID is a single byte.
>> +
>> +ibm,opal/power-mgt/freq-domain-mask
>> +--------------------------------------------
>> +
>> +This property is a bitmask which will have different value depending upon the
>> +generation of the processor. Frequency domain would indicate group of CPUs
>> +which would share same frequency. Bitwise AND is taken between this bitmask
>> +value and PIR of cpu. All the CPUs lying in the same frequency domain will have
>> +same result for AND. Thus frequency management can be done based on frequency
>> +domain. A frequency domain may be a core or a quad, etc depending upon the
>> +generation of the processor.
>>
>> +For example, for POWER8 0xFFF8 indicates core wide frequency domain. Taking AND
>> +with the PIR of CPUs will yield us a frequency domain which is core wide
>> +distribution as last 3 bits have been masked which represent the threads.
>> +
>> +For POWER9, 0xFFF0 indicates quad wide frequency domain. Taking AND with
>> +the PIR of CPUs will yield us frequency domain which is quad wise
>> +distribution as last 4 bits have been masked which represent the cores.
>>
>> diff --git a/hw/occ.c b/hw/occ.c
>> index a55bf8e..52c9db2 100644
>> --- a/hw/occ.c
>> +++ b/hw/occ.c
>> @@ -48,6 +48,12 @@
>>   #define MAX_OPAL_CMD_DATA_LENGTH       4090
>>   #define MAX_OCC_RSP_DATA_LENGTH                8698
>>
>> +#define P8_PIR_CORE_MASK               0xFFF8
>> +#define P9_PIR_CORE_MASK               0xFFF4
>> +#define P9_PIR_QUAD_MASK               0xFFF0
>> +#define FREQ_MAX_IN_DOMAIN             0
>> +#define FREQ_MOST_RECENTLY_SET         1
>> +
>>   /**
>>    * OCC-OPAL Shared Memory Region
>>    *
>> @@ -494,6 +500,16 @@ static bool add_cpu_pstate_properties(int *pstate_nom)
>>          u8 nr_pstates;
>>          bool ultra_turbo_supported;
>>          int i, major, minor;
>> +       u8 domain_runs_at;
>> +       u32 freq_domain_mask;
>> +
>> +       /* TODO Firmware plumbing required so as to have two modes to set
>> +        * PMCR based on max in domain or most recently used. As of today,
>> +        * it is always max in domain.
>> +        */
>> +       domain_runs_at = FREQ_MAX_IN_DOMAIN;
>> +       /* Initialized with P8 core domain mask */
>> +       freq_domain_mask = P8_PIR_CORE_MASK;
> move this into an else branch of the if below.
OK
>
>>          prlog(PR_DEBUG, "OCC: CPU pstate state device tree init\n");
>>
>> @@ -666,6 +682,13 @@ static bool add_cpu_pstate_properties(int *pstate_nom)
>>                  return false;
>>          }
>>
>> +       if (proc_gen == proc_gen_p9) {
>> +               if (domain_runs_at == FREQ_MAX_IN_DOMAIN)
>> +                       freq_domain_mask = P9_PIR_CORE_MASK;
>> +               else if (domain_runs_at == FREQ_MOST_RECENTLY_SET)
>> +                       freq_domain_mask = P9_PIR_QUAD_MASK;
>> +       }
> I don't understand what's going on here. What is the difference
> between FREQ_MAX_IN_DOMAIN and FREQ_MOST_RECENTLY_SET? Why are you
> using P9_PIR_CORE_MASK in the MAX_IN_DOMAIN case? As far as I know we
> only have frequency control at the quad level, so why does using the
> core mask as the domain mask make sense here?
There are two strategies in which the OCC can change the frequency of 
the cores in the quad on P9.
1) FREQ_MAX_IN_DOMAIN : the OCC sets the frequency of the quad to the 
maximal value requested by the component cores.
2) FREQ_MOST_RECENTLY_SET : the OCC sets the frequency of the quad to 
the most recent frequency value requested  by the CPUs in the quad

In case of P8, the domain is the core and the strategy by default is 
FREQ_MOST_RECENTLY_SET (since the PMCRs of the threads in the core are 
mirrored). However on P9, the domain is quad and the strategy is 
FREQ_MAX_IN_DOMAIN (since each core has its own PMCR).

The kernel code expects that the frequencies on the domain changes as 
per the MOST_RECENTLY_SET strategy. If we change domain to quad with the 
strategy being MAX_IN_DOMAIN, in order to be consistent across all the 
cores in the quad, the kernel will have to send IPIs to the other cores 
to decrease frequency on the whole quad, which causes unnecessary jitter.

So, if we are *only* sending the frequency domain mask in the 
device-tree, and not the strategy, then, in order to be consistent with 
the current kernel behaviour, as long as the strategy is MAX_IN_DOMAIN, 
we are falling back to CORE domain (which is what kernel assumes today). 
Once OCC gives us the MOST_RECENTLY_SET strategy, this patch will choose 
the QUAD domain, while remaining consistent with the Kernel's 
expectation of the MOST_RECENTLY_SET strategy.

Suppose we pass both the frequency-domain-mask and the strategy, then we 
can leave this to the Kernel. Would that be better ?


>>          /* Add the device-tree entries */
>>          dt_add_property(power_mgt, "ibm,pstate-ids", dt_id,
>>                          nr_pstates * sizeof(u32));
>> @@ -674,6 +697,7 @@ static bool add_cpu_pstate_properties(int *pstate_nom)
>>          dt_add_property_cells(power_mgt, "ibm,pstate-min", pmin);
>>          dt_add_property_cells(power_mgt, "ibm,pstate-nominal", pnom);
>>          dt_add_property_cells(power_mgt, "ibm,pstate-max", pmax);
>> +       dt_add_property_cells(power_mgt, "freq-domain-mask", freq_domain_mask);
>>
>>          free(dt_freq);
>>          free(dt_id);
>> --
>> 1.8.3.1
>>
>> _______________________________________________
>> Skiboot mailing list
>> Skiboot@lists.ozlabs.org
>> https://lists.ozlabs.org/listinfo/skiboot

Thanks and Regards,
Abhishek
Oliver O'Halloran Aug. 31, 2018, 5:35 a.m. UTC | #3
On Thu, Aug 30, 2018 at 7:37 PM, Abhishek <huntbag@linux.vnet.ibm.com> wrote:
> Hi Oliver,
>
> Thanks for the review. I have commented inline below.
>
>
> On 08/30/2018 01:24 PM, Oliver wrote:
>>
>> On Wed, Aug 29, 2018 at 4:28 PM, Abhishek Goel
>> <huntbag@linux.vnet.ibm.com> wrote:
>>>
>>> Add a new device-tree property freq-domain-indicator to define group of
>>> CPUs which would share same frequency. This property has been added under
>>> power-mgmt node.It is a bitmask.
>>>
>>> Bitwise AND is taken between this bitmask value and PIR of cpu. All the
>>> CPUs lying in the same frequency domain will have same result for AND.
>>>
>>> For example, For POWER9, 0xFFF0 indicates quad wide frequency domain.
>>> Taking AND with the PIR of CPUs will yield us frequency domain which is
>>> quad wise distribution as last 4 bits have been masked which represent
>>> the
>>> cores.
>>>
>>> Similarly, 0xFFF4 will represent core wide frequency domain for P9.
>>>
>>> Signed-off-by: Abhishek Goel <huntbag@linux.vnet.ibm.com>
>>> ---
>>>   doc/device-tree/ibm,opal/power-mgt/occ.rst | 19 +++++++++++++++++++
>>>   hw/occ.c                                   | 24
>>> ++++++++++++++++++++++++
>>>   2 files changed, 43 insertions(+)
>>>
>>> diff --git a/doc/device-tree/ibm,opal/power-mgt/occ.rst
>>> b/doc/device-tree/ibm,opal/power-mgt/occ.rst
>>> index d13a62b..6a60443 100644
>>> --- a/doc/device-tree/ibm,opal/power-mgt/occ.rst
>>> +++ b/doc/device-tree/ibm,opal/power-mgt/occ.rst
>>> @@ -37,3 +37,22 @@ ibm,pstate-vcss ibm,pstate-vdds
>>>   These properties list a voltage-identifier of each of the pstates
>>> listed in
>>>   ibm,pstate-ids for the Vcs and Vdd values used for that pstate in that
>>> chip.
>>>   Each VID is a single byte.
>>> +
>>> +ibm,opal/power-mgt/freq-domain-mask
>>> +--------------------------------------------
>>> +
>>> +This property is a bitmask which will have different value depending
>>> upon the
>>> +generation of the processor. Frequency domain would indicate group of
>>> CPUs
>>> +which would share same frequency. Bitwise AND is taken between this
>>> bitmask
>>> +value and PIR of cpu. All the CPUs lying in the same frequency domain
>>> will have
>>> +same result for AND. Thus frequency management can be done based on
>>> frequency
>>> +domain. A frequency domain may be a core or a quad, etc depending upon
>>> the
>>> +generation of the processor.
>>>
>>> +For example, for POWER8 0xFFF8 indicates core wide frequency domain.
>>> Taking AND
>>> +with the PIR of CPUs will yield us a frequency domain which is core wide
>>> +distribution as last 3 bits have been masked which represent the
>>> threads.
>>> +
>>> +For POWER9, 0xFFF0 indicates quad wide frequency domain. Taking AND with
>>> +the PIR of CPUs will yield us frequency domain which is quad wise
>>> +distribution as last 4 bits have been masked which represent the cores.
>>>
>>> diff --git a/hw/occ.c b/hw/occ.c
>>> index a55bf8e..52c9db2 100644
>>> --- a/hw/occ.c
>>> +++ b/hw/occ.c
>>> @@ -48,6 +48,12 @@
>>>   #define MAX_OPAL_CMD_DATA_LENGTH       4090
>>>   #define MAX_OCC_RSP_DATA_LENGTH                8698
>>>
>>> +#define P8_PIR_CORE_MASK               0xFFF8
>>> +#define P9_PIR_CORE_MASK               0xFFF4
>>> +#define P9_PIR_QUAD_MASK               0xFFF0
>>> +#define FREQ_MAX_IN_DOMAIN             0
>>> +#define FREQ_MOST_RECENTLY_SET         1
>>> +
>>>   /**
>>>    * OCC-OPAL Shared Memory Region
>>>    *
>>> @@ -494,6 +500,16 @@ static bool add_cpu_pstate_properties(int
>>> *pstate_nom)
>>>          u8 nr_pstates;
>>>          bool ultra_turbo_supported;
>>>          int i, major, minor;
>>> +       u8 domain_runs_at;
>>> +       u32 freq_domain_mask;
>>> +
>>> +       /* TODO Firmware plumbing required so as to have two modes to set
>>> +        * PMCR based on max in domain or most recently used. As of
>>> today,
>>> +        * it is always max in domain.
>>> +        */
>>> +       domain_runs_at = FREQ_MAX_IN_DOMAIN;
>>> +       /* Initialized with P8 core domain mask */
>>> +       freq_domain_mask = P8_PIR_CORE_MASK;
>>
>> move this into an else branch of the if below.
>
> OK
>>
>>
>>>          prlog(PR_DEBUG, "OCC: CPU pstate state device tree init\n");
>>>
>>> @@ -666,6 +682,13 @@ static bool add_cpu_pstate_properties(int
>>> *pstate_nom)
>>>                  return false;
>>>          }
>>>
>>> +       if (proc_gen == proc_gen_p9) {
>>> +               if (domain_runs_at == FREQ_MAX_IN_DOMAIN)
>>> +                       freq_domain_mask = P9_PIR_CORE_MASK;
>>> +               else if (domain_runs_at == FREQ_MOST_RECENTLY_SET)
>>> +                       freq_domain_mask = P9_PIR_QUAD_MASK;
>>> +       }
>>
>> I don't understand what's going on here. What is the difference
>> between FREQ_MAX_IN_DOMAIN and FREQ_MOST_RECENTLY_SET? Why are you
>> using P9_PIR_CORE_MASK in the MAX_IN_DOMAIN case? As far as I know we
>> only have frequency control at the quad level, so why does using the
>> core mask as the domain mask make sense here?
>
> There are two strategies in which the OCC can change the frequency of the
> cores in the quad on P9.
> 1) FREQ_MAX_IN_DOMAIN : the OCC sets the frequency of the quad to the
> maximal value requested by the component cores.
> 2) FREQ_MOST_RECENTLY_SET : the OCC sets the frequency of the quad to the
> most recent frequency value requested  by the CPUs in the quad
>
> In case of P8, the domain is the core and the strategy by default is
> FREQ_MOST_RECENTLY_SET (since the PMCRs of the threads in the core are
> mirrored). However on P9, the domain is quad and the strategy is
> FREQ_MAX_IN_DOMAIN (since each core has its own PMCR).
>
> The kernel code expects that the frequencies on the domain changes as per
> the MOST_RECENTLY_SET strategy. If we change domain to quad with the
> strategy being MAX_IN_DOMAIN, in order to be consistent across all the cores
> in the quad, the kernel will have to send IPIs to the other cores to
> decrease frequency on the whole quad, which causes unnecessary jitter.

In what situations do we need to force the frequency down across the
quad? If a core is loaded then I'd assume we want to to be running as
fast as possible, or are we trying to implement some kind of
throttling in Linux rather than relying on the OCC to do it?

> So, if we are *only* sending the frequency domain mask in the device-tree,
> and not the strategy, then, in order to be consistent with the current
> kernel behaviour, as long as the strategy is MAX_IN_DOMAIN, we are falling
> back to CORE domain (which is what kernel assumes today). Once OCC gives us
> the MOST_RECENTLY_SET strategy, this patch will choose the QUAD domain,
> while remaining consistent with the Kernel's expectation of the
> MOST_RECENTLY_SET strategy.

I guess that makes sense. This whole thing seems like a really weird
hack in firmware to trick a cpufreq driver that was written to do
per-core management into working with a group of cores. Fundamentally
the problem is in the kernel driver, so the fix needs to be there too.

> Suppose we pass both the frequency-domain-mask and the strategy, then we can
> leave this to the Kernel. Would that be better ?

That would be better. I don't mind adding more stuff to the DT, but I
do mind adding information that's deliberately wrong in difficult to
describe ways ;)

Oliver

>>>          /* Add the device-tree entries */
>>>          dt_add_property(power_mgt, "ibm,pstate-ids", dt_id,
>>>                          nr_pstates * sizeof(u32));
>>> @@ -674,6 +697,7 @@ static bool add_cpu_pstate_properties(int
>>> *pstate_nom)
>>>          dt_add_property_cells(power_mgt, "ibm,pstate-min", pmin);
>>>          dt_add_property_cells(power_mgt, "ibm,pstate-nominal", pnom);
>>>          dt_add_property_cells(power_mgt, "ibm,pstate-max", pmax);
>>> +       dt_add_property_cells(power_mgt, "freq-domain-mask",
>>> freq_domain_mask);
>>>
>>>          free(dt_freq);
>>>          free(dt_id);
>>> --
>>> 1.8.3.1
>>>
>>> _______________________________________________
>>> Skiboot mailing list
>>> Skiboot@lists.ozlabs.org
>>> https://lists.ozlabs.org/listinfo/skiboot
>
>
> Thanks and Regards,
> Abhishek
>
diff mbox series

Patch

diff --git a/doc/device-tree/ibm,opal/power-mgt/occ.rst b/doc/device-tree/ibm,opal/power-mgt/occ.rst
index d13a62b..6a60443 100644
--- a/doc/device-tree/ibm,opal/power-mgt/occ.rst
+++ b/doc/device-tree/ibm,opal/power-mgt/occ.rst
@@ -37,3 +37,22 @@  ibm,pstate-vcss ibm,pstate-vdds
 These properties list a voltage-identifier of each of the pstates listed in
 ibm,pstate-ids for the Vcs and Vdd values used for that pstate in that chip.
 Each VID is a single byte.
+
+ibm,opal/power-mgt/freq-domain-mask
+--------------------------------------------
+
+This property is a bitmask which will have different value depending upon the
+generation of the processor. Frequency domain would indicate group of CPUs
+which would share same frequency. Bitwise AND is taken between this bitmask
+value and PIR of cpu. All the CPUs lying in the same frequency domain will have
+same result for AND. Thus frequency management can be done based on frequency
+domain. A frequency domain may be a core or a quad, etc depending upon the
+generation of the processor.
+
+For example, for POWER8 0xFFF8 indicates core wide frequency domain. Taking AND
+with the PIR of CPUs will yield us a frequency domain which is core wide
+distribution as last 3 bits have been masked which represent the threads.
+
+For POWER9, 0xFFF0 indicates quad wide frequency domain. Taking AND with
+the PIR of CPUs will yield us frequency domain which is quad wise
+distribution as last 4 bits have been masked which represent the cores.
diff --git a/hw/occ.c b/hw/occ.c
index a55bf8e..52c9db2 100644
--- a/hw/occ.c
+++ b/hw/occ.c
@@ -48,6 +48,12 @@ 
 #define MAX_OPAL_CMD_DATA_LENGTH	4090
 #define MAX_OCC_RSP_DATA_LENGTH		8698
 
+#define P8_PIR_CORE_MASK		0xFFF8
+#define P9_PIR_CORE_MASK		0xFFF4
+#define P9_PIR_QUAD_MASK		0xFFF0
+#define FREQ_MAX_IN_DOMAIN		0
+#define FREQ_MOST_RECENTLY_SET		1
+
 /**
  * OCC-OPAL Shared Memory Region
  *
@@ -494,6 +500,16 @@  static bool add_cpu_pstate_properties(int *pstate_nom)
 	u8 nr_pstates;
 	bool ultra_turbo_supported;
 	int i, major, minor;
+	u8 domain_runs_at;
+	u32 freq_domain_mask;
+
+	/* TODO Firmware plumbing required so as to have two modes to set
+	 * PMCR based on max in domain or most recently used. As of today,
+	 * it is always max in domain.
+	 */
+	domain_runs_at = FREQ_MAX_IN_DOMAIN;
+	/* Initialized with P8 core domain mask */
+	freq_domain_mask = P8_PIR_CORE_MASK;
 
 	prlog(PR_DEBUG, "OCC: CPU pstate state device tree init\n");
 
@@ -666,6 +682,13 @@  static bool add_cpu_pstate_properties(int *pstate_nom)
 		return false;
 	}
 
+	if (proc_gen == proc_gen_p9) {
+		if (domain_runs_at == FREQ_MAX_IN_DOMAIN)
+			freq_domain_mask = P9_PIR_CORE_MASK;
+		else if (domain_runs_at == FREQ_MOST_RECENTLY_SET)
+			freq_domain_mask = P9_PIR_QUAD_MASK;
+	}
+
 	/* Add the device-tree entries */
 	dt_add_property(power_mgt, "ibm,pstate-ids", dt_id,
 			nr_pstates * sizeof(u32));
@@ -674,6 +697,7 @@  static bool add_cpu_pstate_properties(int *pstate_nom)
 	dt_add_property_cells(power_mgt, "ibm,pstate-min", pmin);
 	dt_add_property_cells(power_mgt, "ibm,pstate-nominal", pnom);
 	dt_add_property_cells(power_mgt, "ibm,pstate-max", pmax);
+	dt_add_property_cells(power_mgt, "freq-domain-mask", freq_domain_mask);
 
 	free(dt_freq);
 	free(dt_id);