diff mbox series

[18/19] target/ppc/pmu_book3s_helper.c: add PM_CMPLU_STALL mock events

Message ID 20210809131057.1694145-19-danielhb413@gmail.com
State New
Headers show
Series PMU-EBB support for PPC64 TCG | expand

Commit Message

Daniel Henrique Barboza Aug. 9, 2021, 1:10 p.m. UTC
EBB powerpc kernel test 'multi_counter_test' uses PM_CMPLU_STALL events
that we do not support. These events are related to CPU stalled/wasted
cycles while waiting for resources, cache misses and so on.

Unlike the 0xFA event added previously, there's no available equivalent
for us to use, and at this moment we can't sample those events as well.
What we can do is mock those events as if we were calculating them.

This patch implements PM_CMPLU_STALL, PM_CMPLU_STALL_FXU,
PM_CMPLU_STALL_OTHER_CMPL and PM_CMPLU_STALL_THRD mock events by giving
them a fixed amount of the total elapsed cycles.

The chosen sample values for these events (25% of total cycles for
PM_CMPLU_STALL and 5% for the other three) were chosen at random and has
no intention of being truthful with what a real PowerPC hardware would
give us. Our intention here is to make 'multi_counter_test' EBB test
pass.

Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
---
 target/ppc/pmu_book3s_helper.c | 81 +++++++++++++++++++++++++++++++++-
 1 file changed, 79 insertions(+), 2 deletions(-)

Comments

David Gibson Aug. 10, 2021, 4:17 a.m. UTC | #1
On Mon, Aug 09, 2021 at 10:10:56AM -0300, Daniel Henrique Barboza wrote:
> EBB powerpc kernel test 'multi_counter_test' uses PM_CMPLU_STALL events
> that we do not support. These events are related to CPU stalled/wasted
> cycles while waiting for resources, cache misses and so on.
> 
> Unlike the 0xFA event added previously, there's no available equivalent
> for us to use, and at this moment we can't sample those events as well.
> What we can do is mock those events as if we were calculating them.
> 
> This patch implements PM_CMPLU_STALL, PM_CMPLU_STALL_FXU,
> PM_CMPLU_STALL_OTHER_CMPL and PM_CMPLU_STALL_THRD mock events by giving
> them a fixed amount of the total elapsed cycles.
> 
> The chosen sample values for these events (25% of total cycles for
> PM_CMPLU_STALL and 5% for the other three) were chosen at random and has
> no intention of being truthful with what a real PowerPC hardware would
> give us. Our intention here is to make 'multi_counter_test' EBB test
> pass.

Hmm.  I guess these mock values make sense for getting the kernel
tests to pass, but I'm not sure if it's a good idea in general.  Would
we be better off just reporting 0 always - that would be a strong hint
to someone attempting to analyze results that something is fishy (in
this case that they don't actually have a real CPU).

> 
> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
> ---
>  target/ppc/pmu_book3s_helper.c | 81 +++++++++++++++++++++++++++++++++-
>  1 file changed, 79 insertions(+), 2 deletions(-)
> 
> diff --git a/target/ppc/pmu_book3s_helper.c b/target/ppc/pmu_book3s_helper.c
> index ae7050cd62..32cf76b77f 100644
> --- a/target/ppc/pmu_book3s_helper.c
> +++ b/target/ppc/pmu_book3s_helper.c
> @@ -92,16 +92,54 @@ static void update_PMC_PM_CYC(CPUPPCState *env, int sprn,
>      env->spr[sprn] += get_cycles(icount_delta);
>  }
>  
> +static int get_stall_ratio(uint8_t stall_event)
> +{
> +    int stall_ratio = 0;
> +
> +    switch (stall_event) {
> +    case 0xA:
> +        stall_ratio = 25;
> +        break;
> +    case 0x6:
> +    case 0x16:
> +    case 0x1C:
> +        stall_ratio = 5;
> +        break;
> +    default:
> +        break;
> +    }
> +
> +    return stall_ratio;
> +}
> +
> +static void update_PMC_PM_STALL(CPUPPCState *env, int sprn,
> +                                uint64_t icount_delta,
> +                                uint8_t stall_event)
> +{
> +    int stall_ratio = get_stall_ratio(stall_event);
> +    uint64_t cycles = muldiv64(get_cycles(icount_delta), stall_ratio, 100);
> +
> +    env->spr[sprn] += cycles;
> +}
> +
>  static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,
>                                          uint64_t icount_delta)
>  {
> -    switch (get_PMC_event(env, sprn)) {
> +    uint8_t event = get_PMC_event(env, sprn);
> +
> +    switch (event) {
>      case 0x2:
>          update_PMC_PM_INST_CMPL(env, sprn, icount_delta);
>          break;
>      case 0x1E:
>          update_PMC_PM_CYC(env, sprn, icount_delta);
>          break;
> +    case 0xA:
> +    case 0x6:
> +    case 0x16:
> +    case 0x1C:
> +        update_PMC_PM_STALL(env, sprn, icount_delta, event);
> +        break;
>      default:
>          return;
>      }
> @@ -163,6 +201,34 @@ static int64_t get_CYC_timeout(CPUPPCState *env, int sprn)
>      return muldiv64(remaining_cyc, NANOSECONDS_PER_SECOND, PPC_CPU_FREQ);
>  }
>  
> +static int64_t get_stall_timeout(CPUPPCState *env, int sprn,
> +                                 uint8_t stall_event)
> +{
> +    uint64_t remaining_cyc;
> +    int stall_multiplier;
> +
> +    if (env->spr[sprn] == 0) {
> +        return icount_to_ns(COUNTER_NEGATIVE_VAL);
> +    }
> +
> +    if (env->spr[sprn] >= COUNTER_NEGATIVE_VAL) {
> +        return 0;
> +    }
> +
> +    remaining_cyc = COUNTER_NEGATIVE_VAL - env->spr[sprn];
> +
> +    /*
> +     * Consider that for this stall event we'll advance the counter
> +     * in a lower rate, thus requiring more cycles to overflow.
> +     * E.g. for PM_CMPLU_STALL (0xA), ratio 25, it'll require
> +     * 100/25 = 4 times the same amount of cycles to overflow.
> +     */
> +    stall_multiplier = 100 / get_stall_ratio(stall_event);
> +    remaining_cyc *= stall_multiplier;
> +
> +    return muldiv64(remaining_cyc, NANOSECONDS_PER_SECOND, PPC_CPU_FREQ);
> +}
> +
>  static bool pmc_counter_negative_enabled(CPUPPCState *env, int sprn)
>  {
>      bool PMC14_running = !(env->spr[SPR_POWER_MMCR0] & MMCR0_FC14);
> @@ -191,6 +257,7 @@ static bool pmc_counter_negative_enabled(CPUPPCState *env, int sprn)
>  static int64_t get_counter_neg_timeout(CPUPPCState *env, int sprn)
>  {
>      int64_t timeout = -1;
> +    uint8_t event;
>  
>      if (!pmc_counter_negative_enabled(env, sprn)) {
>          return -1;
> @@ -205,13 +272,23 @@ static int64_t get_counter_neg_timeout(CPUPPCState *env, int sprn)
>      case SPR_POWER_PMC2:
>      case SPR_POWER_PMC3:
>      case SPR_POWER_PMC4:
> -        switch (get_PMC_event(env, sprn)) {
> +        event = get_PMC_event(env, sprn);
> +
> +        switch (event) {
>          case 0x2:
>              timeout = get_INST_CMPL_timeout(env, sprn);
>              break;
>          case 0x1E:
>              timeout = get_CYC_timeout(env, sprn);
>              break;
> +        case 0xA:
> +        case 0x6:
> +        case 0x16:
> +        case 0x1c:
> +            timeout = get_stall_timeout(env, sprn, event);
> +            break;
> +        default:
> +            break;
>          }
>  
>          break;
Daniel Henrique Barboza Aug. 10, 2021, 7:48 p.m. UTC | #2
On 8/10/21 1:17 AM, David Gibson wrote:
> On Mon, Aug 09, 2021 at 10:10:56AM -0300, Daniel Henrique Barboza wrote:
>> EBB powerpc kernel test 'multi_counter_test' uses PM_CMPLU_STALL events
>> that we do not support. These events are related to CPU stalled/wasted
>> cycles while waiting for resources, cache misses and so on.
>>
>> Unlike the 0xFA event added previously, there's no available equivalent
>> for us to use, and at this moment we can't sample those events as well.
>> What we can do is mock those events as if we were calculating them.
>>
>> This patch implements PM_CMPLU_STALL, PM_CMPLU_STALL_FXU,
>> PM_CMPLU_STALL_OTHER_CMPL and PM_CMPLU_STALL_THRD mock events by giving
>> them a fixed amount of the total elapsed cycles.
>>
>> The chosen sample values for these events (25% of total cycles for
>> PM_CMPLU_STALL and 5% for the other three) were chosen at random and has
>> no intention of being truthful with what a real PowerPC hardware would
>> give us. Our intention here is to make 'multi_counter_test' EBB test
>> pass.
> 
> Hmm.  I guess these mock values make sense for getting the kernel
> tests to pass, but I'm not sure if it's a good idea in general.  Would
> we be better off just reporting 0 always - that would be a strong hint
> to someone attempting to analyze results that something is fishy (in
> this case that they don't actually have a real CPU).

We can drop this patch and I'll add a note in the docs that the EBB kernel
test 'multi_counter_test' fails because the PMU does not implement STALL
events.


Thanks,


Daniel

> 
>>
>> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
>> ---
>>   target/ppc/pmu_book3s_helper.c | 81 +++++++++++++++++++++++++++++++++-
>>   1 file changed, 79 insertions(+), 2 deletions(-)
>>
>> diff --git a/target/ppc/pmu_book3s_helper.c b/target/ppc/pmu_book3s_helper.c
>> index ae7050cd62..32cf76b77f 100644
>> --- a/target/ppc/pmu_book3s_helper.c
>> +++ b/target/ppc/pmu_book3s_helper.c
>> @@ -92,16 +92,54 @@ static void update_PMC_PM_CYC(CPUPPCState *env, int sprn,
>>       env->spr[sprn] += get_cycles(icount_delta);
>>   }
>>   
>> +static int get_stall_ratio(uint8_t stall_event)
>> +{
>> +    int stall_ratio = 0;
>> +
>> +    switch (stall_event) {
>> +    case 0xA:
>> +        stall_ratio = 25;
>> +        break;
>> +    case 0x6:
>> +    case 0x16:
>> +    case 0x1C:
>> +        stall_ratio = 5;
>> +        break;
>> +    default:
>> +        break;
>> +    }
>> +
>> +    return stall_ratio;
>> +}
>> +
>> +static void update_PMC_PM_STALL(CPUPPCState *env, int sprn,
>> +                                uint64_t icount_delta,
>> +                                uint8_t stall_event)
>> +{
>> +    int stall_ratio = get_stall_ratio(stall_event);
>> +    uint64_t cycles = muldiv64(get_cycles(icount_delta), stall_ratio, 100);
>> +
>> +    env->spr[sprn] += cycles;
>> +}
>> +
>>   static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,
>>                                           uint64_t icount_delta)
>>   {
>> -    switch (get_PMC_event(env, sprn)) {
>> +    uint8_t event = get_PMC_event(env, sprn);
>> +
>> +    switch (event) {
>>       case 0x2:
>>           update_PMC_PM_INST_CMPL(env, sprn, icount_delta);
>>           break;
>>       case 0x1E:
>>           update_PMC_PM_CYC(env, sprn, icount_delta);
>>           break;
>> +    case 0xA:
>> +    case 0x6:
>> +    case 0x16:
>> +    case 0x1C:
>> +        update_PMC_PM_STALL(env, sprn, icount_delta, event);
>> +        break;
>>       default:
>>           return;
>>       }
>> @@ -163,6 +201,34 @@ static int64_t get_CYC_timeout(CPUPPCState *env, int sprn)
>>       return muldiv64(remaining_cyc, NANOSECONDS_PER_SECOND, PPC_CPU_FREQ);
>>   }
>>   
>> +static int64_t get_stall_timeout(CPUPPCState *env, int sprn,
>> +                                 uint8_t stall_event)
>> +{
>> +    uint64_t remaining_cyc;
>> +    int stall_multiplier;
>> +
>> +    if (env->spr[sprn] == 0) {
>> +        return icount_to_ns(COUNTER_NEGATIVE_VAL);
>> +    }
>> +
>> +    if (env->spr[sprn] >= COUNTER_NEGATIVE_VAL) {
>> +        return 0;
>> +    }
>> +
>> +    remaining_cyc = COUNTER_NEGATIVE_VAL - env->spr[sprn];
>> +
>> +    /*
>> +     * Consider that for this stall event we'll advance the counter
>> +     * in a lower rate, thus requiring more cycles to overflow.
>> +     * E.g. for PM_CMPLU_STALL (0xA), ratio 25, it'll require
>> +     * 100/25 = 4 times the same amount of cycles to overflow.
>> +     */
>> +    stall_multiplier = 100 / get_stall_ratio(stall_event);
>> +    remaining_cyc *= stall_multiplier;
>> +
>> +    return muldiv64(remaining_cyc, NANOSECONDS_PER_SECOND, PPC_CPU_FREQ);
>> +}
>> +
>>   static bool pmc_counter_negative_enabled(CPUPPCState *env, int sprn)
>>   {
>>       bool PMC14_running = !(env->spr[SPR_POWER_MMCR0] & MMCR0_FC14);
>> @@ -191,6 +257,7 @@ static bool pmc_counter_negative_enabled(CPUPPCState *env, int sprn)
>>   static int64_t get_counter_neg_timeout(CPUPPCState *env, int sprn)
>>   {
>>       int64_t timeout = -1;
>> +    uint8_t event;
>>   
>>       if (!pmc_counter_negative_enabled(env, sprn)) {
>>           return -1;
>> @@ -205,13 +272,23 @@ static int64_t get_counter_neg_timeout(CPUPPCState *env, int sprn)
>>       case SPR_POWER_PMC2:
>>       case SPR_POWER_PMC3:
>>       case SPR_POWER_PMC4:
>> -        switch (get_PMC_event(env, sprn)) {
>> +        event = get_PMC_event(env, sprn);
>> +
>> +        switch (event) {
>>           case 0x2:
>>               timeout = get_INST_CMPL_timeout(env, sprn);
>>               break;
>>           case 0x1E:
>>               timeout = get_CYC_timeout(env, sprn);
>>               break;
>> +        case 0xA:
>> +        case 0x6:
>> +        case 0x16:
>> +        case 0x1c:
>> +            timeout = get_stall_timeout(env, sprn, event);
>> +            break;
>> +        default:
>> +            break;
>>           }
>>   
>>           break;
>
David Gibson Aug. 11, 2021, 3:37 a.m. UTC | #3
On Tue, Aug 10, 2021 at 04:48:59PM -0300, Daniel Henrique Barboza wrote:
> 
> 
> On 8/10/21 1:17 AM, David Gibson wrote:
> > On Mon, Aug 09, 2021 at 10:10:56AM -0300, Daniel Henrique Barboza wrote:
> > > EBB powerpc kernel test 'multi_counter_test' uses PM_CMPLU_STALL events
> > > that we do not support. These events are related to CPU stalled/wasted
> > > cycles while waiting for resources, cache misses and so on.
> > > 
> > > Unlike the 0xFA event added previously, there's no available equivalent
> > > for us to use, and at this moment we can't sample those events as well.
> > > What we can do is mock those events as if we were calculating them.
> > > 
> > > This patch implements PM_CMPLU_STALL, PM_CMPLU_STALL_FXU,
> > > PM_CMPLU_STALL_OTHER_CMPL and PM_CMPLU_STALL_THRD mock events by giving
> > > them a fixed amount of the total elapsed cycles.
> > > 
> > > The chosen sample values for these events (25% of total cycles for
> > > PM_CMPLU_STALL and 5% for the other three) were chosen at random and has
> > > no intention of being truthful with what a real PowerPC hardware would
> > > give us. Our intention here is to make 'multi_counter_test' EBB test
> > > pass.
> > 
> > Hmm.  I guess these mock values make sense for getting the kernel
> > tests to pass, but I'm not sure if it's a good idea in general.  Would
> > we be better off just reporting 0 always - that would be a strong hint
> > to someone attempting to analyze results that something is fishy (in
> > this case that they don't actually have a real CPU).
> 
> We can drop this patch and I'll add a note in the docs that the EBB kernel
> test 'multi_counter_test' fails because the PMU does not implement STALL
> events.

Sounds good for now.  We can reconsider this if we have a specific
justification for it.
diff mbox series

Patch

diff --git a/target/ppc/pmu_book3s_helper.c b/target/ppc/pmu_book3s_helper.c
index ae7050cd62..32cf76b77f 100644
--- a/target/ppc/pmu_book3s_helper.c
+++ b/target/ppc/pmu_book3s_helper.c
@@ -92,16 +92,54 @@  static void update_PMC_PM_CYC(CPUPPCState *env, int sprn,
     env->spr[sprn] += get_cycles(icount_delta);
 }
 
+static int get_stall_ratio(uint8_t stall_event)
+{
+    int stall_ratio = 0;
+
+    switch (stall_event) {
+    case 0xA:
+        stall_ratio = 25;
+        break;
+    case 0x6:
+    case 0x16:
+    case 0x1C:
+        stall_ratio = 5;
+        break;
+    default:
+        break;
+    }
+
+    return stall_ratio;
+}
+
+static void update_PMC_PM_STALL(CPUPPCState *env, int sprn,
+                                uint64_t icount_delta,
+                                uint8_t stall_event)
+{
+    int stall_ratio = get_stall_ratio(stall_event);
+    uint64_t cycles = muldiv64(get_cycles(icount_delta), stall_ratio, 100);
+
+    env->spr[sprn] += cycles;
+}
+
 static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,
                                         uint64_t icount_delta)
 {
-    switch (get_PMC_event(env, sprn)) {
+    uint8_t event = get_PMC_event(env, sprn);
+
+    switch (event) {
     case 0x2:
         update_PMC_PM_INST_CMPL(env, sprn, icount_delta);
         break;
     case 0x1E:
         update_PMC_PM_CYC(env, sprn, icount_delta);
         break;
+    case 0xA:
+    case 0x6:
+    case 0x16:
+    case 0x1C:
+        update_PMC_PM_STALL(env, sprn, icount_delta, event);
+        break;
     default:
         return;
     }
@@ -163,6 +201,34 @@  static int64_t get_CYC_timeout(CPUPPCState *env, int sprn)
     return muldiv64(remaining_cyc, NANOSECONDS_PER_SECOND, PPC_CPU_FREQ);
 }
 
+static int64_t get_stall_timeout(CPUPPCState *env, int sprn,
+                                 uint8_t stall_event)
+{
+    uint64_t remaining_cyc;
+    int stall_multiplier;
+
+    if (env->spr[sprn] == 0) {
+        return icount_to_ns(COUNTER_NEGATIVE_VAL);
+    }
+
+    if (env->spr[sprn] >= COUNTER_NEGATIVE_VAL) {
+        return 0;
+    }
+
+    remaining_cyc = COUNTER_NEGATIVE_VAL - env->spr[sprn];
+
+    /*
+     * Consider that for this stall event we'll advance the counter
+     * in a lower rate, thus requiring more cycles to overflow.
+     * E.g. for PM_CMPLU_STALL (0xA), ratio 25, it'll require
+     * 100/25 = 4 times the same amount of cycles to overflow.
+     */
+    stall_multiplier = 100 / get_stall_ratio(stall_event);
+    remaining_cyc *= stall_multiplier;
+
+    return muldiv64(remaining_cyc, NANOSECONDS_PER_SECOND, PPC_CPU_FREQ);
+}
+
 static bool pmc_counter_negative_enabled(CPUPPCState *env, int sprn)
 {
     bool PMC14_running = !(env->spr[SPR_POWER_MMCR0] & MMCR0_FC14);
@@ -191,6 +257,7 @@  static bool pmc_counter_negative_enabled(CPUPPCState *env, int sprn)
 static int64_t get_counter_neg_timeout(CPUPPCState *env, int sprn)
 {
     int64_t timeout = -1;
+    uint8_t event;
 
     if (!pmc_counter_negative_enabled(env, sprn)) {
         return -1;
@@ -205,13 +272,23 @@  static int64_t get_counter_neg_timeout(CPUPPCState *env, int sprn)
     case SPR_POWER_PMC2:
     case SPR_POWER_PMC3:
     case SPR_POWER_PMC4:
-        switch (get_PMC_event(env, sprn)) {
+        event = get_PMC_event(env, sprn);
+
+        switch (event) {
         case 0x2:
             timeout = get_INST_CMPL_timeout(env, sprn);
             break;
         case 0x1E:
             timeout = get_CYC_timeout(env, sprn);
             break;
+        case 0xA:
+        case 0x6:
+        case 0x16:
+        case 0x1c:
+            timeout = get_stall_timeout(env, sprn, event);
+            break;
+        default:
+            break;
         }
 
         break;