diff mbox series

[v8,06/10] target/ppc: enable PMU instruction count

Message ID 20211125150817.573204-7-danielhb413@gmail.com
State New
Headers show
Series PMU-EBB support for PPC64 TCG | expand

Commit Message

Daniel Henrique Barboza Nov. 25, 2021, 3:08 p.m. UTC
The PMU is already counting cycles by calculating time elapsed in
nanoseconds. Counting instructions is a different matter and requires
another approach.

This patch adds the capability of counting completed instructions
(Perf event PM_INST_CMPL) by counting the amount of instructions
translated in each translation block right before exiting it.

A new pmu_count_insns() helper in translation.c was added to do that.
After verifying that the PMU is running (MMCR0_FC bit not set), call
helper_insns_inc(). This new helper from power8-pmu.c will add the
instructions to the relevant counters. It'll also be responsible for
triggering counter negative overflows as it is already being done with
cycles.

Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
---
 target/ppc/cpu.h                 |  1 +
 target/ppc/helper.h              |  1 +
 target/ppc/helper_regs.c         |  4 +++
 target/ppc/power8-pmu-regs.c.inc |  6 +++++
 target/ppc/power8-pmu.c          | 38 ++++++++++++++++++++++++++
 target/ppc/translate.c           | 46 ++++++++++++++++++++++++++++++++
 6 files changed, 96 insertions(+)

Comments

David Gibson Nov. 29, 2021, 4:36 a.m. UTC | #1
On Thu, Nov 25, 2021 at 12:08:13PM -0300, Daniel Henrique Barboza wrote:
> The PMU is already counting cycles by calculating time elapsed in
> nanoseconds. Counting instructions is a different matter and requires
> another approach.
> 
> This patch adds the capability of counting completed instructions
> (Perf event PM_INST_CMPL) by counting the amount of instructions
> translated in each translation block right before exiting it.
> 
> A new pmu_count_insns() helper in translation.c was added to do that.
> After verifying that the PMU is running (MMCR0_FC bit not set), call
> helper_insns_inc(). This new helper from power8-pmu.c will add the
> instructions to the relevant counters. It'll also be responsible for
> triggering counter negative overflows as it is already being done with
> cycles.
> 
> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
> ---
>  target/ppc/cpu.h                 |  1 +
>  target/ppc/helper.h              |  1 +
>  target/ppc/helper_regs.c         |  4 +++
>  target/ppc/power8-pmu-regs.c.inc |  6 +++++
>  target/ppc/power8-pmu.c          | 38 ++++++++++++++++++++++++++
>  target/ppc/translate.c           | 46 ++++++++++++++++++++++++++++++++
>  6 files changed, 96 insertions(+)
> 
> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> index 9b41b022e2..38cd2b5c43 100644
> --- a/target/ppc/cpu.h
> +++ b/target/ppc/cpu.h
> @@ -656,6 +656,7 @@ enum {
>      HFLAGS_PR = 14,  /* MSR_PR */
>      HFLAGS_PMCC0 = 15,  /* MMCR0 PMCC bit 0 */
>      HFLAGS_PMCC1 = 16,  /* MMCR0 PMCC bit 1 */
> +    HFLAGS_MMCR0FC = 17, /* MMCR0 FC bit */

Now that the event stuff is a bit more refined, you could narrow this
down to specifically marking if any counters are actively counting
instructions (not frozen by MMCR0[FC] and not frozen by
MMCR0[FC14|FC56] *and* have the right event selected).

Since I suspect the instruction counting instrumentation could be
quite expensive (helper call on every tb), that might be worthwhile.

>      HFLAGS_VSX = 23, /* MSR_VSX if cpu has VSX */
>      HFLAGS_VR = 25,  /* MSR_VR if cpu has VRE */
>  
> diff --git a/target/ppc/helper.h b/target/ppc/helper.h
> index 94b4690375..d8a23e054a 100644
> --- a/target/ppc/helper.h
> +++ b/target/ppc/helper.h
> @@ -24,6 +24,7 @@ DEF_HELPER_2(store_mmcr0, void, env, tl)
>  DEF_HELPER_2(store_mmcr1, void, env, tl)
>  DEF_HELPER_3(store_pmc, void, env, i32, i64)
>  DEF_HELPER_2(read_pmc, tl, env, i32)
> +DEF_HELPER_2(insns_inc, void, env, i32)
>  #endif
>  DEF_HELPER_1(check_tlb_flush_local, void, env)
>  DEF_HELPER_1(check_tlb_flush_global, void, env)
> diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
> index 99562edd57..875c2fdfc6 100644
> --- a/target/ppc/helper_regs.c
> +++ b/target/ppc/helper_regs.c
> @@ -115,6 +115,10 @@ static uint32_t hreg_compute_hflags_value(CPUPPCState *env)
>      if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCC1) {
>          hflags |= 1 << HFLAGS_PMCC1;
>      }
> +    if (env->spr[SPR_POWER_MMCR0] & MMCR0_FC) {
> +        hflags |= 1 << HFLAGS_MMCR0FC;
> +    }
> +
>  
>  #ifndef CONFIG_USER_ONLY
>      if (!env->has_hv_mode || (msr & (1ull << MSR_HV))) {
> diff --git a/target/ppc/power8-pmu-regs.c.inc b/target/ppc/power8-pmu-regs.c.inc
> index 25b13ad564..580e4e41b2 100644
> --- a/target/ppc/power8-pmu-regs.c.inc
> +++ b/target/ppc/power8-pmu-regs.c.inc
> @@ -113,6 +113,12 @@ static void write_MMCR0_common(DisasContext *ctx, TCGv val)
>       */
>      gen_icount_io_start(ctx);
>      gen_helper_store_mmcr0(cpu_env, val);
> +
> +    /*
> +     * End the translation block because MMCR0 writes can change
> +     * ctx->pmu_frozen.
> +     */
> +    ctx->base.is_jmp = DISAS_EXIT_UPDATE;
>  }
>  
>  void spr_write_MMCR0_ureg(DisasContext *ctx, int sprn, int gprn)
> diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c
> index 01e0b9b8fc..59d0def79d 100644
> --- a/target/ppc/power8-pmu.c
> +++ b/target/ppc/power8-pmu.c
> @@ -112,6 +112,30 @@ static PMUEventType pmc_get_event(CPUPPCState *env, int sprn)
>      return evt_type;
>  }
>  
> +static bool pmu_increment_insns(CPUPPCState *env, uint32_t num_insns)
> +{
> +    bool overflow_triggered = false;
> +    int sprn;
> +
> +    /* PMC6 never counts instructions */
> +    for (sprn = SPR_POWER_PMC1; sprn <= SPR_POWER_PMC5; sprn++) {
> +        if (pmc_get_event(env, sprn) != PMU_EVENT_INSTRUCTIONS) {
> +            continue;
> +        }
> +
> +        env->spr[sprn] += num_insns;
> +
> +        if (env->spr[sprn] >= PMC_COUNTER_NEGATIVE_VAL &&
> +            pmc_has_overflow_enabled(env, sprn)) {
> +
> +            overflow_triggered = true;
> +            env->spr[sprn] = PMC_COUNTER_NEGATIVE_VAL;

Does the hardware PMU actually guarantee that the event will happen
exactly on the overflow?  Or could you count a few into the negative
zone before the event is delivered?

> +        }
> +    }
> +
> +    return overflow_triggered;
> +}
> +
>  static void pmu_update_cycles(CPUPPCState *env)
>  {
>      uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
> @@ -258,6 +282,20 @@ static void fire_PMC_interrupt(PowerPCCPU *cpu)
>      return;
>  }
>  
> +/* This helper assumes that the PMC is running. */
> +void helper_insns_inc(CPUPPCState *env, uint32_t num_insns)
> +{
> +    bool overflow_triggered;
> +    PowerPCCPU *cpu;
> +
> +    overflow_triggered = pmu_increment_insns(env, num_insns);
> +
> +    if (overflow_triggered) {
> +        cpu = env_archcpu(env);
> +        fire_PMC_interrupt(cpu);
> +    }
> +}
> +
>  static void cpu_ppc_pmu_timer_cb(void *opaque)
>  {
>      PowerPCCPU *cpu = opaque;
> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> index 9960df6e18..ccc83d0603 100644
> --- a/target/ppc/translate.c
> +++ b/target/ppc/translate.c
> @@ -177,6 +177,7 @@ struct DisasContext {
>      bool hr;
>      bool mmcr0_pmcc0;
>      bool mmcr0_pmcc1;
> +    bool pmu_frozen;
>      ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */
>      int singlestep_enabled;
>      uint32_t flags;
> @@ -4170,6 +4171,31 @@ static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip)
>  #endif
>  }
>  
> +#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)

Should this actually be !CONFIG_USER_ONLY?  IIUC there are
circumstances where userspace could access the PMU, including
instruction counting.

> +static void pmu_count_insns(DisasContext *ctx)
> +{
> +    /* Do not bother calling the helper if the PMU is frozen */
> +    if (ctx->pmu_frozen) {
> +        return;
> +    }
> +
> +    /*
> +     * The PMU insns_inc() helper stops the internal PMU timer if a
> +     * counter overflows happens. In that case, if the guest is
> +     * running with icount and we do not handle it beforehand,
> +     * the helper can trigger a 'bad icount read'.
> +     */
> +    gen_icount_io_start(ctx);
> +
> +    gen_helper_insns_inc(cpu_env, tcg_constant_i32(ctx->base.num_insns));
> +}
> +#else
> +static void pmu_count_insns(DisasContext *ctx)
> +{
> +    return;
> +}
> +#endif
> +
>  static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
>  {
>      return translator_use_goto_tb(&ctx->base, dest);
> @@ -4180,6 +4206,14 @@ static void gen_lookup_and_goto_ptr(DisasContext *ctx)
>      if (unlikely(ctx->singlestep_enabled)) {
>          gen_debug_exception(ctx);
>      } else {
> +        /*
> +         * tcg_gen_lookup_and_goto_ptr will exit the TB if
> +         * CF_NO_GOTO_PTR is set. Count insns now.
> +         */
> +        if (ctx->base.tb->flags & CF_NO_GOTO_PTR) {
> +            pmu_count_insns(ctx);
> +        }
> +
>          tcg_gen_lookup_and_goto_ptr();
>      }
>  }
> @@ -4191,6 +4225,7 @@ static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
>          dest = (uint32_t) dest;
>      }
>      if (use_goto_tb(ctx, dest)) {
> +        pmu_count_insns(ctx);
>          tcg_gen_goto_tb(n);
>          tcg_gen_movi_tl(cpu_nip, dest & ~3);
>          tcg_gen_exit_tb(ctx->base.tb, n);
> @@ -8458,6 +8493,7 @@ static void ppc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>      ctx->hr = (hflags >> HFLAGS_HR) & 1;
>      ctx->mmcr0_pmcc0 = (hflags >> HFLAGS_PMCC0) & 1;
>      ctx->mmcr0_pmcc1 = (hflags >> HFLAGS_PMCC1) & 1;
> +    ctx->pmu_frozen = (hflags >> HFLAGS_MMCR0FC) & 1;
>  
>      ctx->singlestep_enabled = 0;
>      if ((hflags >> HFLAGS_SE) & 1) {
> @@ -8564,6 +8600,7 @@ static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
>      switch (is_jmp) {
>      case DISAS_TOO_MANY:
>          if (use_goto_tb(ctx, nip)) {
> +            pmu_count_insns(ctx);
>              tcg_gen_goto_tb(0);
>              gen_update_nip(ctx, nip);
>              tcg_gen_exit_tb(ctx->base.tb, 0);
> @@ -8574,6 +8611,14 @@ static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
>          gen_update_nip(ctx, nip);
>          /* fall through */
>      case DISAS_CHAIN:
> +        /*
> +         * tcg_gen_lookup_and_goto_ptr will exit the TB if
> +         * CF_NO_GOTO_PTR is set. Count insns now.
> +         */
> +        if (ctx->base.tb->flags & CF_NO_GOTO_PTR) {
> +            pmu_count_insns(ctx);
> +        }
> +
>          tcg_gen_lookup_and_goto_ptr();
>          break;
>  
> @@ -8581,6 +8626,7 @@ static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
>          gen_update_nip(ctx, nip);
>          /* fall through */
>      case DISAS_EXIT:
> +        pmu_count_insns(ctx);
>          tcg_gen_exit_tb(NULL, 0);
>          break;
>
Daniel Henrique Barboza Nov. 30, 2021, 10:24 p.m. UTC | #2
On 11/29/21 01:36, David Gibson wrote:
> On Thu, Nov 25, 2021 at 12:08:13PM -0300, Daniel Henrique Barboza wrote:
>> The PMU is already counting cycles by calculating time elapsed in
>> nanoseconds. Counting instructions is a different matter and requires
>> another approach.
>>
>> This patch adds the capability of counting completed instructions
>> (Perf event PM_INST_CMPL) by counting the amount of instructions
>> translated in each translation block right before exiting it.
>>
>> A new pmu_count_insns() helper in translation.c was added to do that.
>> After verifying that the PMU is running (MMCR0_FC bit not set), call
>> helper_insns_inc(). This new helper from power8-pmu.c will add the
>> instructions to the relevant counters. It'll also be responsible for
>> triggering counter negative overflows as it is already being done with
>> cycles.
>>
>> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
>> ---
>>   target/ppc/cpu.h                 |  1 +
>>   target/ppc/helper.h              |  1 +
>>   target/ppc/helper_regs.c         |  4 +++
>>   target/ppc/power8-pmu-regs.c.inc |  6 +++++
>>   target/ppc/power8-pmu.c          | 38 ++++++++++++++++++++++++++
>>   target/ppc/translate.c           | 46 ++++++++++++++++++++++++++++++++
>>   6 files changed, 96 insertions(+)
>>
>> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
>> index 9b41b022e2..38cd2b5c43 100644
>> --- a/target/ppc/cpu.h
>> +++ b/target/ppc/cpu.h
>> @@ -656,6 +656,7 @@ enum {
>>       HFLAGS_PR = 14,  /* MSR_PR */
>>       HFLAGS_PMCC0 = 15,  /* MMCR0 PMCC bit 0 */
>>       HFLAGS_PMCC1 = 16,  /* MMCR0 PMCC bit 1 */
>> +    HFLAGS_MMCR0FC = 17, /* MMCR0 FC bit */
> 
> Now that the event stuff is a bit more refined, you could narrow this
> down to specifically marking if any counters are actively counting
> instructions (not frozen by MMCR0[FC] and not frozen by
> MMCR0[FC14|FC56] *and* have the right event selected).
> 
> Since I suspect the instruction counting instrumentation could be
> quite expensive (helper call on every tb), that might be worthwhile.

That was worthwhile. The performance increase is substantial with this
change, in particular with tests that exercises only cycle events.



> 
>>       HFLAGS_VSX = 23, /* MSR_VSX if cpu has VSX */
>>       HFLAGS_VR = 25,  /* MSR_VR if cpu has VRE */
>>   
>> diff --git a/target/ppc/helper.h b/target/ppc/helper.h
>> index 94b4690375..d8a23e054a 100644
>> --- a/target/ppc/helper.h
>> +++ b/target/ppc/helper.h
>> @@ -24,6 +24,7 @@ DEF_HELPER_2(store_mmcr0, void, env, tl)
>>   DEF_HELPER_2(store_mmcr1, void, env, tl)
>>   DEF_HELPER_3(store_pmc, void, env, i32, i64)
>>   DEF_HELPER_2(read_pmc, tl, env, i32)
>> +DEF_HELPER_2(insns_inc, void, env, i32)
>>   #endif
>>   DEF_HELPER_1(check_tlb_flush_local, void, env)
>>   DEF_HELPER_1(check_tlb_flush_global, void, env)
>> diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
>> index 99562edd57..875c2fdfc6 100644
>> --- a/target/ppc/helper_regs.c
>> +++ b/target/ppc/helper_regs.c
>> @@ -115,6 +115,10 @@ static uint32_t hreg_compute_hflags_value(CPUPPCState *env)
>>       if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCC1) {
>>           hflags |= 1 << HFLAGS_PMCC1;
>>       }
>> +    if (env->spr[SPR_POWER_MMCR0] & MMCR0_FC) {
>> +        hflags |= 1 << HFLAGS_MMCR0FC;
>> +    }
>> +
>>   
>>   #ifndef CONFIG_USER_ONLY
>>       if (!env->has_hv_mode || (msr & (1ull << MSR_HV))) {
>> diff --git a/target/ppc/power8-pmu-regs.c.inc b/target/ppc/power8-pmu-regs.c.inc
>> index 25b13ad564..580e4e41b2 100644
>> --- a/target/ppc/power8-pmu-regs.c.inc
>> +++ b/target/ppc/power8-pmu-regs.c.inc
>> @@ -113,6 +113,12 @@ static void write_MMCR0_common(DisasContext *ctx, TCGv val)
>>        */
>>       gen_icount_io_start(ctx);
>>       gen_helper_store_mmcr0(cpu_env, val);
>> +
>> +    /*
>> +     * End the translation block because MMCR0 writes can change
>> +     * ctx->pmu_frozen.
>> +     */
>> +    ctx->base.is_jmp = DISAS_EXIT_UPDATE;
>>   }
>>   
>>   void spr_write_MMCR0_ureg(DisasContext *ctx, int sprn, int gprn)
>> diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c
>> index 01e0b9b8fc..59d0def79d 100644
>> --- a/target/ppc/power8-pmu.c
>> +++ b/target/ppc/power8-pmu.c
>> @@ -112,6 +112,30 @@ static PMUEventType pmc_get_event(CPUPPCState *env, int sprn)
>>       return evt_type;
>>   }
>>   
>> +static bool pmu_increment_insns(CPUPPCState *env, uint32_t num_insns)
>> +{
>> +    bool overflow_triggered = false;
>> +    int sprn;
>> +
>> +    /* PMC6 never counts instructions */
>> +    for (sprn = SPR_POWER_PMC1; sprn <= SPR_POWER_PMC5; sprn++) {
>> +        if (pmc_get_event(env, sprn) != PMU_EVENT_INSTRUCTIONS) {
>> +            continue;
>> +        }
>> +
>> +        env->spr[sprn] += num_insns;
>> +
>> +        if (env->spr[sprn] >= PMC_COUNTER_NEGATIVE_VAL &&
>> +            pmc_has_overflow_enabled(env, sprn)) {
>> +
>> +            overflow_triggered = true;
>> +            env->spr[sprn] = PMC_COUNTER_NEGATIVE_VAL;
> 
> Does the hardware PMU actually guarantee that the event will happen
> exactly on the overflow?  Or could you count a few into the negative
> zone before the event is delivered?

My understand reading the ISA and from testing with the a real PMU is that yes,
it'll guarantee that the overflow will happen when the counter reaches exactly
0x80000000.

> 
>> +        }
>> +    }
>> +
>> +    return overflow_triggered;
>> +}
>> +
>>   static void pmu_update_cycles(CPUPPCState *env)
>>   {
>>       uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
>> @@ -258,6 +282,20 @@ static void fire_PMC_interrupt(PowerPCCPU *cpu)
>>       return;
>>   }
>>   
>> +/* This helper assumes that the PMC is running. */
>> +void helper_insns_inc(CPUPPCState *env, uint32_t num_insns)
>> +{
>> +    bool overflow_triggered;
>> +    PowerPCCPU *cpu;
>> +
>> +    overflow_triggered = pmu_increment_insns(env, num_insns);
>> +
>> +    if (overflow_triggered) {
>> +        cpu = env_archcpu(env);
>> +        fire_PMC_interrupt(cpu);
>> +    }
>> +}
>> +
>>   static void cpu_ppc_pmu_timer_cb(void *opaque)
>>   {
>>       PowerPCCPU *cpu = opaque;
>> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
>> index 9960df6e18..ccc83d0603 100644
>> --- a/target/ppc/translate.c
>> +++ b/target/ppc/translate.c
>> @@ -177,6 +177,7 @@ struct DisasContext {
>>       bool hr;
>>       bool mmcr0_pmcc0;
>>       bool mmcr0_pmcc1;
>> +    bool pmu_frozen;
>>       ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */
>>       int singlestep_enabled;
>>       uint32_t flags;
>> @@ -4170,6 +4171,31 @@ static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip)
>>   #endif
>>   }
>>   
>> +#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
> 
> Should this actually be !CONFIG_USER_ONLY?  IIUC there are
> circumstances where userspace could access the PMU, including
> instruction counting.

The user mode will not be able to use the PMU properly because the MMCR1
reg, used to define the events to be sampled, isn't writable by userpace
under any circunstance.


Thanks,


Daniel

> 
>> +static void pmu_count_insns(DisasContext *ctx)
>> +{
>> +    /* Do not bother calling the helper if the PMU is frozen */
>> +    if (ctx->pmu_frozen) {
>> +        return;
>> +    }
>> +
>> +    /*
>> +     * The PMU insns_inc() helper stops the internal PMU timer if a
>> +     * counter overflows happens. In that case, if the guest is
>> +     * running with icount and we do not handle it beforehand,
>> +     * the helper can trigger a 'bad icount read'.
>> +     */
>> +    gen_icount_io_start(ctx);
>> +
>> +    gen_helper_insns_inc(cpu_env, tcg_constant_i32(ctx->base.num_insns));
>> +}
>> +#else
>> +static void pmu_count_insns(DisasContext *ctx)
>> +{
>> +    return;
>> +}
>> +#endif
>> +
>>   static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
>>   {
>>       return translator_use_goto_tb(&ctx->base, dest);
>> @@ -4180,6 +4206,14 @@ static void gen_lookup_and_goto_ptr(DisasContext *ctx)
>>       if (unlikely(ctx->singlestep_enabled)) {
>>           gen_debug_exception(ctx);
>>       } else {
>> +        /*
>> +         * tcg_gen_lookup_and_goto_ptr will exit the TB if
>> +         * CF_NO_GOTO_PTR is set. Count insns now.
>> +         */
>> +        if (ctx->base.tb->flags & CF_NO_GOTO_PTR) {
>> +            pmu_count_insns(ctx);
>> +        }
>> +
>>           tcg_gen_lookup_and_goto_ptr();
>>       }
>>   }
>> @@ -4191,6 +4225,7 @@ static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
>>           dest = (uint32_t) dest;
>>       }
>>       if (use_goto_tb(ctx, dest)) {
>> +        pmu_count_insns(ctx);
>>           tcg_gen_goto_tb(n);
>>           tcg_gen_movi_tl(cpu_nip, dest & ~3);
>>           tcg_gen_exit_tb(ctx->base.tb, n);
>> @@ -8458,6 +8493,7 @@ static void ppc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>>       ctx->hr = (hflags >> HFLAGS_HR) & 1;
>>       ctx->mmcr0_pmcc0 = (hflags >> HFLAGS_PMCC0) & 1;
>>       ctx->mmcr0_pmcc1 = (hflags >> HFLAGS_PMCC1) & 1;
>> +    ctx->pmu_frozen = (hflags >> HFLAGS_MMCR0FC) & 1;
>>   
>>       ctx->singlestep_enabled = 0;
>>       if ((hflags >> HFLAGS_SE) & 1) {
>> @@ -8564,6 +8600,7 @@ static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
>>       switch (is_jmp) {
>>       case DISAS_TOO_MANY:
>>           if (use_goto_tb(ctx, nip)) {
>> +            pmu_count_insns(ctx);
>>               tcg_gen_goto_tb(0);
>>               gen_update_nip(ctx, nip);
>>               tcg_gen_exit_tb(ctx->base.tb, 0);
>> @@ -8574,6 +8611,14 @@ static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
>>           gen_update_nip(ctx, nip);
>>           /* fall through */
>>       case DISAS_CHAIN:
>> +        /*
>> +         * tcg_gen_lookup_and_goto_ptr will exit the TB if
>> +         * CF_NO_GOTO_PTR is set. Count insns now.
>> +         */
>> +        if (ctx->base.tb->flags & CF_NO_GOTO_PTR) {
>> +            pmu_count_insns(ctx);
>> +        }
>> +
>>           tcg_gen_lookup_and_goto_ptr();
>>           break;
>>   
>> @@ -8581,6 +8626,7 @@ static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
>>           gen_update_nip(ctx, nip);
>>           /* fall through */
>>       case DISAS_EXIT:
>> +        pmu_count_insns(ctx);
>>           tcg_gen_exit_tb(NULL, 0);
>>           break;
>>   
>
David Gibson Nov. 30, 2021, 11:52 p.m. UTC | #3
On Tue, Nov 30, 2021 at 07:24:04PM -0300, Daniel Henrique Barboza wrote:
> 
> 
> On 11/29/21 01:36, David Gibson wrote:
> > On Thu, Nov 25, 2021 at 12:08:13PM -0300, Daniel Henrique Barboza wrote:
> > > The PMU is already counting cycles by calculating time elapsed in
> > > nanoseconds. Counting instructions is a different matter and requires
> > > another approach.
> > > 
> > > This patch adds the capability of counting completed instructions
> > > (Perf event PM_INST_CMPL) by counting the amount of instructions
> > > translated in each translation block right before exiting it.
> > > 
> > > A new pmu_count_insns() helper in translation.c was added to do that.
> > > After verifying that the PMU is running (MMCR0_FC bit not set), call
> > > helper_insns_inc(). This new helper from power8-pmu.c will add the
> > > instructions to the relevant counters. It'll also be responsible for
> > > triggering counter negative overflows as it is already being done with
> > > cycles.
> > > 
> > > Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
> > > ---
> > >   target/ppc/cpu.h                 |  1 +
> > >   target/ppc/helper.h              |  1 +
> > >   target/ppc/helper_regs.c         |  4 +++
> > >   target/ppc/power8-pmu-regs.c.inc |  6 +++++
> > >   target/ppc/power8-pmu.c          | 38 ++++++++++++++++++++++++++
> > >   target/ppc/translate.c           | 46 ++++++++++++++++++++++++++++++++
> > >   6 files changed, 96 insertions(+)
> > > 
> > > diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> > > index 9b41b022e2..38cd2b5c43 100644
> > > --- a/target/ppc/cpu.h
> > > +++ b/target/ppc/cpu.h
> > > @@ -656,6 +656,7 @@ enum {
> > >       HFLAGS_PR = 14,  /* MSR_PR */
> > >       HFLAGS_PMCC0 = 15,  /* MMCR0 PMCC bit 0 */
> > >       HFLAGS_PMCC1 = 16,  /* MMCR0 PMCC bit 1 */
> > > +    HFLAGS_MMCR0FC = 17, /* MMCR0 FC bit */
> > 
> > Now that the event stuff is a bit more refined, you could narrow this
> > down to specifically marking if any counters are actively counting
> > instructions (not frozen by MMCR0[FC] and not frozen by
> > MMCR0[FC14|FC56] *and* have the right event selected).
> > 
> > Since I suspect the instruction counting instrumentation could be
> > quite expensive (helper call on every tb), that might be worthwhile.
> 
> That was worthwhile. The performance increase is substantial with this
> change, in particular with tests that exercises only cycle events.

Good to know.

> > >       HFLAGS_VSX = 23, /* MSR_VSX if cpu has VSX */
> > >       HFLAGS_VR = 25,  /* MSR_VR if cpu has VRE */
> > > diff --git a/target/ppc/helper.h b/target/ppc/helper.h
> > > index 94b4690375..d8a23e054a 100644
> > > --- a/target/ppc/helper.h
> > > +++ b/target/ppc/helper.h
> > > @@ -24,6 +24,7 @@ DEF_HELPER_2(store_mmcr0, void, env, tl)
> > >   DEF_HELPER_2(store_mmcr1, void, env, tl)
> > >   DEF_HELPER_3(store_pmc, void, env, i32, i64)
> > >   DEF_HELPER_2(read_pmc, tl, env, i32)
> > > +DEF_HELPER_2(insns_inc, void, env, i32)
> > >   #endif
> > >   DEF_HELPER_1(check_tlb_flush_local, void, env)
> > >   DEF_HELPER_1(check_tlb_flush_global, void, env)
> > > diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
> > > index 99562edd57..875c2fdfc6 100644
> > > --- a/target/ppc/helper_regs.c
> > > +++ b/target/ppc/helper_regs.c
> > > @@ -115,6 +115,10 @@ static uint32_t hreg_compute_hflags_value(CPUPPCState *env)
> > >       if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCC1) {
> > >           hflags |= 1 << HFLAGS_PMCC1;
> > >       }
> > > +    if (env->spr[SPR_POWER_MMCR0] & MMCR0_FC) {
> > > +        hflags |= 1 << HFLAGS_MMCR0FC;
> > > +    }
> > > +
> > >   #ifndef CONFIG_USER_ONLY
> > >       if (!env->has_hv_mode || (msr & (1ull << MSR_HV))) {
> > > diff --git a/target/ppc/power8-pmu-regs.c.inc b/target/ppc/power8-pmu-regs.c.inc
> > > index 25b13ad564..580e4e41b2 100644
> > > --- a/target/ppc/power8-pmu-regs.c.inc
> > > +++ b/target/ppc/power8-pmu-regs.c.inc
> > > @@ -113,6 +113,12 @@ static void write_MMCR0_common(DisasContext *ctx, TCGv val)
> > >        */
> > >       gen_icount_io_start(ctx);
> > >       gen_helper_store_mmcr0(cpu_env, val);
> > > +
> > > +    /*
> > > +     * End the translation block because MMCR0 writes can change
> > > +     * ctx->pmu_frozen.
> > > +     */
> > > +    ctx->base.is_jmp = DISAS_EXIT_UPDATE;
> > >   }
> > >   void spr_write_MMCR0_ureg(DisasContext *ctx, int sprn, int gprn)
> > > diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c
> > > index 01e0b9b8fc..59d0def79d 100644
> > > --- a/target/ppc/power8-pmu.c
> > > +++ b/target/ppc/power8-pmu.c
> > > @@ -112,6 +112,30 @@ static PMUEventType pmc_get_event(CPUPPCState *env, int sprn)
> > >       return evt_type;
> > >   }
> > > +static bool pmu_increment_insns(CPUPPCState *env, uint32_t num_insns)
> > > +{
> > > +    bool overflow_triggered = false;
> > > +    int sprn;
> > > +
> > > +    /* PMC6 never counts instructions */
> > > +    for (sprn = SPR_POWER_PMC1; sprn <= SPR_POWER_PMC5; sprn++) {
> > > +        if (pmc_get_event(env, sprn) != PMU_EVENT_INSTRUCTIONS) {
> > > +            continue;
> > > +        }
> > > +
> > > +        env->spr[sprn] += num_insns;
> > > +
> > > +        if (env->spr[sprn] >= PMC_COUNTER_NEGATIVE_VAL &&
> > > +            pmc_has_overflow_enabled(env, sprn)) {
> > > +
> > > +            overflow_triggered = true;
> > > +            env->spr[sprn] = PMC_COUNTER_NEGATIVE_VAL;
> > 
> > Does the hardware PMU actually guarantee that the event will happen
> > exactly on the overflow?  Or could you count a few into the negative
> > zone before the event is delivered?
> 
> My understand reading the ISA and from testing with the a real PMU is that yes,
> it'll guarantee that the overflow will happen when the counter reaches exactly
> 0x80000000.

Ok.  We can't quite achieve that in TCG, which makes forcing the
counter to 0x8000000 a reasonable way of faking it.  Might be worth
commenting that that's what this is, though.

> > > +        }
> > > +    }
> > > +
> > > +    return overflow_triggered;
> > > +}
> > > +
> > >   static void pmu_update_cycles(CPUPPCState *env)
> > >   {
> > >       uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
> > > @@ -258,6 +282,20 @@ static void fire_PMC_interrupt(PowerPCCPU *cpu)
> > >       return;
> > >   }
> > > +/* This helper assumes that the PMC is running. */
> > > +void helper_insns_inc(CPUPPCState *env, uint32_t num_insns)
> > > +{
> > > +    bool overflow_triggered;
> > > +    PowerPCCPU *cpu;
> > > +
> > > +    overflow_triggered = pmu_increment_insns(env, num_insns);
> > > +
> > > +    if (overflow_triggered) {
> > > +        cpu = env_archcpu(env);
> > > +        fire_PMC_interrupt(cpu);
> > > +    }
> > > +}
> > > +
> > >   static void cpu_ppc_pmu_timer_cb(void *opaque)
> > >   {
> > >       PowerPCCPU *cpu = opaque;
> > > diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> > > index 9960df6e18..ccc83d0603 100644
> > > --- a/target/ppc/translate.c
> > > +++ b/target/ppc/translate.c
> > > @@ -177,6 +177,7 @@ struct DisasContext {
> > >       bool hr;
> > >       bool mmcr0_pmcc0;
> > >       bool mmcr0_pmcc1;
> > > +    bool pmu_frozen;
> > >       ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */
> > >       int singlestep_enabled;
> > >       uint32_t flags;
> > > @@ -4170,6 +4171,31 @@ static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip)
> > >   #endif
> > >   }
> > > +#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
> > 
> > Should this actually be !CONFIG_USER_ONLY?  IIUC there are
> > circumstances where userspace could access the PMU, including
> > instruction counting.
> 
> The user mode will not be able to use the PMU properly because the MMCR1
> reg, used to define the events to be sampled, isn't writable by userpace
> under any circunstance.

Couldn't they use PMC5 without writing MMCR1?
Daniel Henrique Barboza Dec. 1, 2021, 1:12 p.m. UTC | #4
On 11/30/21 20:52, David Gibson wrote:
> On Tue, Nov 30, 2021 at 07:24:04PM -0300, Daniel Henrique Barboza wrote:
>>
>>
>> On 11/29/21 01:36, David Gibson wrote:
>>> On Thu, Nov 25, 2021 at 12:08:13PM -0300, Daniel Henrique Barboza wrote:
>>>> The PMU is already counting cycles by calculating time elapsed in
>>>> nanoseconds. Counting instructions is a different matter and requires
>>>> another approach.
>>>>
>>>> This patch adds the capability of counting completed instructions
>>>> (Perf event PM_INST_CMPL) by counting the amount of instructions
>>>> translated in each translation block right before exiting it.
>>>>
>>>> A new pmu_count_insns() helper in translation.c was added to do that.
>>>> After verifying that the PMU is running (MMCR0_FC bit not set), call
>>>> helper_insns_inc(). This new helper from power8-pmu.c will add the
>>>> instructions to the relevant counters. It'll also be responsible for
>>>> triggering counter negative overflows as it is already being done with
>>>> cycles.
>>>>
>>>> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
>>>> ---
>>>>    target/ppc/cpu.h                 |  1 +
>>>>    target/ppc/helper.h              |  1 +
>>>>    target/ppc/helper_regs.c         |  4 +++
>>>>    target/ppc/power8-pmu-regs.c.inc |  6 +++++
>>>>    target/ppc/power8-pmu.c          | 38 ++++++++++++++++++++++++++
>>>>    target/ppc/translate.c           | 46 ++++++++++++++++++++++++++++++++
>>>>    6 files changed, 96 insertions(+)
>>>>
>>>> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
>>>> index 9b41b022e2..38cd2b5c43 100644
>>>> --- a/target/ppc/cpu.h
>>>> +++ b/target/ppc/cpu.h
>>>> @@ -656,6 +656,7 @@ enum {
>>>>        HFLAGS_PR = 14,  /* MSR_PR */
>>>>        HFLAGS_PMCC0 = 15,  /* MMCR0 PMCC bit 0 */
>>>>        HFLAGS_PMCC1 = 16,  /* MMCR0 PMCC bit 1 */
>>>> +    HFLAGS_MMCR0FC = 17, /* MMCR0 FC bit */
>>>
>>> Now that the event stuff is a bit more refined, you could narrow this
>>> down to specifically marking if any counters are actively counting
>>> instructions (not frozen by MMCR0[FC] and not frozen by
>>> MMCR0[FC14|FC56] *and* have the right event selected).
>>>
>>> Since I suspect the instruction counting instrumentation could be
>>> quite expensive (helper call on every tb), that might be worthwhile.
>>
>> That was worthwhile. The performance increase is substantial with this
>> change, in particular with tests that exercises only cycle events.
> 
> Good to know.
> 
>>>>        HFLAGS_VSX = 23, /* MSR_VSX if cpu has VSX */
>>>>        HFLAGS_VR = 25,  /* MSR_VR if cpu has VRE */
>>>> diff --git a/target/ppc/helper.h b/target/ppc/helper.h
>>>> index 94b4690375..d8a23e054a 100644
>>>> --- a/target/ppc/helper.h
>>>> +++ b/target/ppc/helper.h
>>>> @@ -24,6 +24,7 @@ DEF_HELPER_2(store_mmcr0, void, env, tl)
>>>>    DEF_HELPER_2(store_mmcr1, void, env, tl)
>>>>    DEF_HELPER_3(store_pmc, void, env, i32, i64)
>>>>    DEF_HELPER_2(read_pmc, tl, env, i32)
>>>> +DEF_HELPER_2(insns_inc, void, env, i32)
>>>>    #endif
>>>>    DEF_HELPER_1(check_tlb_flush_local, void, env)
>>>>    DEF_HELPER_1(check_tlb_flush_global, void, env)
>>>> diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
>>>> index 99562edd57..875c2fdfc6 100644
>>>> --- a/target/ppc/helper_regs.c
>>>> +++ b/target/ppc/helper_regs.c
>>>> @@ -115,6 +115,10 @@ static uint32_t hreg_compute_hflags_value(CPUPPCState *env)
>>>>        if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCC1) {
>>>>            hflags |= 1 << HFLAGS_PMCC1;
>>>>        }
>>>> +    if (env->spr[SPR_POWER_MMCR0] & MMCR0_FC) {
>>>> +        hflags |= 1 << HFLAGS_MMCR0FC;
>>>> +    }
>>>> +
>>>>    #ifndef CONFIG_USER_ONLY
>>>>        if (!env->has_hv_mode || (msr & (1ull << MSR_HV))) {
>>>> diff --git a/target/ppc/power8-pmu-regs.c.inc b/target/ppc/power8-pmu-regs.c.inc
>>>> index 25b13ad564..580e4e41b2 100644
>>>> --- a/target/ppc/power8-pmu-regs.c.inc
>>>> +++ b/target/ppc/power8-pmu-regs.c.inc
>>>> @@ -113,6 +113,12 @@ static void write_MMCR0_common(DisasContext *ctx, TCGv val)
>>>>         */
>>>>        gen_icount_io_start(ctx);
>>>>        gen_helper_store_mmcr0(cpu_env, val);
>>>> +
>>>> +    /*
>>>> +     * End the translation block because MMCR0 writes can change
>>>> +     * ctx->pmu_frozen.
>>>> +     */
>>>> +    ctx->base.is_jmp = DISAS_EXIT_UPDATE;
>>>>    }
>>>>    void spr_write_MMCR0_ureg(DisasContext *ctx, int sprn, int gprn)
>>>> diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c
>>>> index 01e0b9b8fc..59d0def79d 100644
>>>> --- a/target/ppc/power8-pmu.c
>>>> +++ b/target/ppc/power8-pmu.c
>>>> @@ -112,6 +112,30 @@ static PMUEventType pmc_get_event(CPUPPCState *env, int sprn)
>>>>        return evt_type;
>>>>    }
>>>> +static bool pmu_increment_insns(CPUPPCState *env, uint32_t num_insns)
>>>> +{
>>>> +    bool overflow_triggered = false;
>>>> +    int sprn;
>>>> +
>>>> +    /* PMC6 never counts instructions */
>>>> +    for (sprn = SPR_POWER_PMC1; sprn <= SPR_POWER_PMC5; sprn++) {
>>>> +        if (pmc_get_event(env, sprn) != PMU_EVENT_INSTRUCTIONS) {
>>>> +            continue;
>>>> +        }
>>>> +
>>>> +        env->spr[sprn] += num_insns;
>>>> +
>>>> +        if (env->spr[sprn] >= PMC_COUNTER_NEGATIVE_VAL &&
>>>> +            pmc_has_overflow_enabled(env, sprn)) {
>>>> +
>>>> +            overflow_triggered = true;
>>>> +            env->spr[sprn] = PMC_COUNTER_NEGATIVE_VAL;
>>>
>>> Does the hardware PMU actually guarantee that the event will happen
>>> exactly on the overflow?  Or could you count a few into the negative
>>> zone before the event is delivered?
>>
>> My understand reading the ISA and from testing with the a real PMU is that yes,
>> it'll guarantee that the overflow will happen when the counter reaches exactly
>> 0x80000000.
> 
> Ok.  We can't quite achieve that in TCG, which makes forcing the
> counter to 0x8000000 a reasonable way of faking it.  Might be worth
> commenting that that's what this is, though.


Fair enough.


> 
>>>> +        }
>>>> +    }
>>>> +
>>>> +    return overflow_triggered;
>>>> +}
>>>> +
>>>>    static void pmu_update_cycles(CPUPPCState *env)
>>>>    {
>>>>        uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
>>>> @@ -258,6 +282,20 @@ static void fire_PMC_interrupt(PowerPCCPU *cpu)
>>>>        return;
>>>>    }
>>>> +/* This helper assumes that the PMC is running. */
>>>> +void helper_insns_inc(CPUPPCState *env, uint32_t num_insns)
>>>> +{
>>>> +    bool overflow_triggered;
>>>> +    PowerPCCPU *cpu;
>>>> +
>>>> +    overflow_triggered = pmu_increment_insns(env, num_insns);
>>>> +
>>>> +    if (overflow_triggered) {
>>>> +        cpu = env_archcpu(env);
>>>> +        fire_PMC_interrupt(cpu);
>>>> +    }
>>>> +}
>>>> +
>>>>    static void cpu_ppc_pmu_timer_cb(void *opaque)
>>>>    {
>>>>        PowerPCCPU *cpu = opaque;
>>>> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
>>>> index 9960df6e18..ccc83d0603 100644
>>>> --- a/target/ppc/translate.c
>>>> +++ b/target/ppc/translate.c
>>>> @@ -177,6 +177,7 @@ struct DisasContext {
>>>>        bool hr;
>>>>        bool mmcr0_pmcc0;
>>>>        bool mmcr0_pmcc1;
>>>> +    bool pmu_frozen;
>>>>        ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */
>>>>        int singlestep_enabled;
>>>>        uint32_t flags;
>>>> @@ -4170,6 +4171,31 @@ static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip)
>>>>    #endif
>>>>    }
>>>> +#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
>>>
>>> Should this actually be !CONFIG_USER_ONLY?  IIUC there are
>>> circumstances where userspace could access the PMU, including
>>> instruction counting.
>>
>> The user mode will not be able to use the PMU properly because the MMCR1
>> reg, used to define the events to be sampled, isn't writable by userpace
>> under any circunstance.
> 
> Couldn't they use PMC5 without writing MMCR1?


Yeah, in theory. Problem state write access to PMCs are granted for MMCR0_PMCC
0b10 || 0b11. The PMCC bits of MMCR0 aren't user read/writable (only FC, PMAO and PMAE
bits can be r/w from userspace, all other bits are filtered out). With the default
PMCC value of 0b00 the PMCs are readable, but not writable. So in a way userspace can
start the PMU and see instruction counting in PMC5 but it wouldn't be able to set it
to a specific value and wouldn't be able to use overflows.

All that said, the change to allow PMC5 to be incremented in problem state is simple
enough so I ended up doing it.


Thanks,


Daniel



>
David Gibson Dec. 2, 2021, 1:23 a.m. UTC | #5
On Wed, Dec 01, 2021 at 10:12:27AM -0300, Daniel Henrique Barboza wrote:
> On 11/30/21 20:52, David Gibson wrote:
> > On Tue, Nov 30, 2021 at 07:24:04PM -0300, Daniel Henrique Barboza wrote:
[snip]
> > > > > +static bool pmu_increment_insns(CPUPPCState *env, uint32_t num_insns)
> > > > > +{
> > > > > +    bool overflow_triggered = false;
> > > > > +    int sprn;
> > > > > +
> > > > > +    /* PMC6 never counts instructions */
> > > > > +    for (sprn = SPR_POWER_PMC1; sprn <= SPR_POWER_PMC5; sprn++) {
> > > > > +        if (pmc_get_event(env, sprn) != PMU_EVENT_INSTRUCTIONS) {
> > > > > +            continue;
> > > > > +        }
> > > > > +
> > > > > +        env->spr[sprn] += num_insns;
> > > > > +
> > > > > +        if (env->spr[sprn] >= PMC_COUNTER_NEGATIVE_VAL &&
> > > > > +            pmc_has_overflow_enabled(env, sprn)) {
> > > > > +
> > > > > +            overflow_triggered = true;
> > > > > +            env->spr[sprn] = PMC_COUNTER_NEGATIVE_VAL;
> > > > 
> > > > Does the hardware PMU actually guarantee that the event will happen
> > > > exactly on the overflow?  Or could you count a few into the negative
> > > > zone before the event is delivered?
> > > 
> > > My understand reading the ISA and from testing with the a real PMU is that yes,
> > > it'll guarantee that the overflow will happen when the counter reaches exactly
> > > 0x80000000.
> > 
> > Ok.  We can't quite achieve that in TCG, which makes forcing the
> > counter to 0x8000000 a reasonable way of faking it.  Might be worth
> > commenting that that's what this is, though.
> 
> Fair enough.

To expand on this a little, I mentioned in connection with cycle
counting that we kind of have a choice as to which illusion to
preserve.  At least assuming that the VM can see a real real-time
clock, we can either preserve the illusion that cycles have the
advertised frequency, or we can preserve the illusion that
instructions take vaguely the right number of cycles to complete.
With TCG, we can't do both.

We have a somewhat similar tradeoff here: do we prioritize matching
the behaviour that the counter will be exactly 0x80000000 at the time
of an overflow interrupt, or do we prioritize matching the behaviour
that the counter is an accurate and exact count of completed
instructions.  The fact that there is a tradeoff and which side we've
chosen is the key point to describe here, I think.

[In this instance, I think we can get closer to getting both sides
right.  I believe there's a way in TCG to clamp the number of
instructions in a translated block - we could set that based on the
distance to overflow of the PMCs.  That sounds very much not worth it
at this stage - we could look at it as a refinement later if we care]

> > > > > +        }
> > > > > +    }
> > > > > +
> > > > > +    return overflow_triggered;
> > > > > +}
> > > > > +
> > > > >    static void pmu_update_cycles(CPUPPCState *env)
> > > > >    {
> > > > >        uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
> > > > > @@ -258,6 +282,20 @@ static void fire_PMC_interrupt(PowerPCCPU *cpu)
> > > > >        return;
> > > > >    }
> > > > > +/* This helper assumes that the PMC is running. */
> > > > > +void helper_insns_inc(CPUPPCState *env, uint32_t num_insns)
> > > > > +{
> > > > > +    bool overflow_triggered;
> > > > > +    PowerPCCPU *cpu;
> > > > > +
> > > > > +    overflow_triggered = pmu_increment_insns(env, num_insns);
> > > > > +
> > > > > +    if (overflow_triggered) {
> > > > > +        cpu = env_archcpu(env);
> > > > > +        fire_PMC_interrupt(cpu);
> > > > > +    }
> > > > > +}
> > > > > +
> > > > >    static void cpu_ppc_pmu_timer_cb(void *opaque)
> > > > >    {
> > > > >        PowerPCCPU *cpu = opaque;
> > > > > diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> > > > > index 9960df6e18..ccc83d0603 100644
> > > > > --- a/target/ppc/translate.c
> > > > > +++ b/target/ppc/translate.c
> > > > > @@ -177,6 +177,7 @@ struct DisasContext {
> > > > >        bool hr;
> > > > >        bool mmcr0_pmcc0;
> > > > >        bool mmcr0_pmcc1;
> > > > > +    bool pmu_frozen;
> > > > >        ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */
> > > > >        int singlestep_enabled;
> > > > >        uint32_t flags;
> > > > > @@ -4170,6 +4171,31 @@ static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip)
> > > > >    #endif
> > > > >    }
> > > > > +#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
> > > > 
> > > > Should this actually be !CONFIG_USER_ONLY?  IIUC there are
> > > > circumstances where userspace could access the PMU, including
> > > > instruction counting.
> > > 
> > > The user mode will not be able to use the PMU properly because the MMCR1
> > > reg, used to define the events to be sampled, isn't writable by userpace
> > > under any circunstance.
> > 
> > Couldn't they use PMC5 without writing MMCR1?
> 
> 
> Yeah, in theory. Problem state write access to PMCs are granted for MMCR0_PMCC
> 0b10 || 0b11. The PMCC bits of MMCR0 aren't user read/writable (only FC, PMAO and PMAE
> bits can be r/w from userspace, all other bits are filtered out). With the default
> PMCC value of 0b00 the PMCs are readable, but not writable. So in a way userspace can
> start the PMU and see instruction counting in PMC5 but it wouldn't be able to set it
> to a specific value and wouldn't be able to use overflows.
> 
> All that said, the change to allow PMC5 to be incremented in problem state is simple
> enough so I ended up doing it.

Ok, I think that's a good idea.

You're probably right that at the moment there's no way for a pure
userspace program to access the counters like this.  However, that's
only because of a pretty complex chain of restrictions.  There's no
fundamental reason why we couldn't have some configuration mechanism
to execute a userland program with PMU permission.
diff mbox series

Patch

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 9b41b022e2..38cd2b5c43 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -656,6 +656,7 @@  enum {
     HFLAGS_PR = 14,  /* MSR_PR */
     HFLAGS_PMCC0 = 15,  /* MMCR0 PMCC bit 0 */
     HFLAGS_PMCC1 = 16,  /* MMCR0 PMCC bit 1 */
+    HFLAGS_MMCR0FC = 17, /* MMCR0 FC bit */
     HFLAGS_VSX = 23, /* MSR_VSX if cpu has VSX */
     HFLAGS_VR = 25,  /* MSR_VR if cpu has VRE */
 
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 94b4690375..d8a23e054a 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -24,6 +24,7 @@  DEF_HELPER_2(store_mmcr0, void, env, tl)
 DEF_HELPER_2(store_mmcr1, void, env, tl)
 DEF_HELPER_3(store_pmc, void, env, i32, i64)
 DEF_HELPER_2(read_pmc, tl, env, i32)
+DEF_HELPER_2(insns_inc, void, env, i32)
 #endif
 DEF_HELPER_1(check_tlb_flush_local, void, env)
 DEF_HELPER_1(check_tlb_flush_global, void, env)
diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
index 99562edd57..875c2fdfc6 100644
--- a/target/ppc/helper_regs.c
+++ b/target/ppc/helper_regs.c
@@ -115,6 +115,10 @@  static uint32_t hreg_compute_hflags_value(CPUPPCState *env)
     if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCC1) {
         hflags |= 1 << HFLAGS_PMCC1;
     }
+    if (env->spr[SPR_POWER_MMCR0] & MMCR0_FC) {
+        hflags |= 1 << HFLAGS_MMCR0FC;
+    }
+
 
 #ifndef CONFIG_USER_ONLY
     if (!env->has_hv_mode || (msr & (1ull << MSR_HV))) {
diff --git a/target/ppc/power8-pmu-regs.c.inc b/target/ppc/power8-pmu-regs.c.inc
index 25b13ad564..580e4e41b2 100644
--- a/target/ppc/power8-pmu-regs.c.inc
+++ b/target/ppc/power8-pmu-regs.c.inc
@@ -113,6 +113,12 @@  static void write_MMCR0_common(DisasContext *ctx, TCGv val)
      */
     gen_icount_io_start(ctx);
     gen_helper_store_mmcr0(cpu_env, val);
+
+    /*
+     * End the translation block because MMCR0 writes can change
+     * ctx->pmu_frozen.
+     */
+    ctx->base.is_jmp = DISAS_EXIT_UPDATE;
 }
 
 void spr_write_MMCR0_ureg(DisasContext *ctx, int sprn, int gprn)
diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c
index 01e0b9b8fc..59d0def79d 100644
--- a/target/ppc/power8-pmu.c
+++ b/target/ppc/power8-pmu.c
@@ -112,6 +112,30 @@  static PMUEventType pmc_get_event(CPUPPCState *env, int sprn)
     return evt_type;
 }
 
+static bool pmu_increment_insns(CPUPPCState *env, uint32_t num_insns)
+{
+    bool overflow_triggered = false;
+    int sprn;
+
+    /* PMC6 never counts instructions */
+    for (sprn = SPR_POWER_PMC1; sprn <= SPR_POWER_PMC5; sprn++) {
+        if (pmc_get_event(env, sprn) != PMU_EVENT_INSTRUCTIONS) {
+            continue;
+        }
+
+        env->spr[sprn] += num_insns;
+
+        if (env->spr[sprn] >= PMC_COUNTER_NEGATIVE_VAL &&
+            pmc_has_overflow_enabled(env, sprn)) {
+
+            overflow_triggered = true;
+            env->spr[sprn] = PMC_COUNTER_NEGATIVE_VAL;
+        }
+    }
+
+    return overflow_triggered;
+}
+
 static void pmu_update_cycles(CPUPPCState *env)
 {
     uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
@@ -258,6 +282,20 @@  static void fire_PMC_interrupt(PowerPCCPU *cpu)
     return;
 }
 
+/* This helper assumes that the PMC is running. */
+void helper_insns_inc(CPUPPCState *env, uint32_t num_insns)
+{
+    bool overflow_triggered;
+    PowerPCCPU *cpu;
+
+    overflow_triggered = pmu_increment_insns(env, num_insns);
+
+    if (overflow_triggered) {
+        cpu = env_archcpu(env);
+        fire_PMC_interrupt(cpu);
+    }
+}
+
 static void cpu_ppc_pmu_timer_cb(void *opaque)
 {
     PowerPCCPU *cpu = opaque;
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 9960df6e18..ccc83d0603 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -177,6 +177,7 @@  struct DisasContext {
     bool hr;
     bool mmcr0_pmcc0;
     bool mmcr0_pmcc1;
+    bool pmu_frozen;
     ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */
     int singlestep_enabled;
     uint32_t flags;
@@ -4170,6 +4171,31 @@  static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip)
 #endif
 }
 
+#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
+static void pmu_count_insns(DisasContext *ctx)
+{
+    /* Do not bother calling the helper if the PMU is frozen */
+    if (ctx->pmu_frozen) {
+        return;
+    }
+
+    /*
+     * The PMU insns_inc() helper stops the internal PMU timer if a
+     * counter overflows happens. In that case, if the guest is
+     * running with icount and we do not handle it beforehand,
+     * the helper can trigger a 'bad icount read'.
+     */
+    gen_icount_io_start(ctx);
+
+    gen_helper_insns_inc(cpu_env, tcg_constant_i32(ctx->base.num_insns));
+}
+#else
+static void pmu_count_insns(DisasContext *ctx)
+{
+    return;
+}
+#endif
+
 static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
 {
     return translator_use_goto_tb(&ctx->base, dest);
@@ -4180,6 +4206,14 @@  static void gen_lookup_and_goto_ptr(DisasContext *ctx)
     if (unlikely(ctx->singlestep_enabled)) {
         gen_debug_exception(ctx);
     } else {
+        /*
+         * tcg_gen_lookup_and_goto_ptr will exit the TB if
+         * CF_NO_GOTO_PTR is set. Count insns now.
+         */
+        if (ctx->base.tb->flags & CF_NO_GOTO_PTR) {
+            pmu_count_insns(ctx);
+        }
+
         tcg_gen_lookup_and_goto_ptr();
     }
 }
@@ -4191,6 +4225,7 @@  static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
         dest = (uint32_t) dest;
     }
     if (use_goto_tb(ctx, dest)) {
+        pmu_count_insns(ctx);
         tcg_gen_goto_tb(n);
         tcg_gen_movi_tl(cpu_nip, dest & ~3);
         tcg_gen_exit_tb(ctx->base.tb, n);
@@ -8458,6 +8493,7 @@  static void ppc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
     ctx->hr = (hflags >> HFLAGS_HR) & 1;
     ctx->mmcr0_pmcc0 = (hflags >> HFLAGS_PMCC0) & 1;
     ctx->mmcr0_pmcc1 = (hflags >> HFLAGS_PMCC1) & 1;
+    ctx->pmu_frozen = (hflags >> HFLAGS_MMCR0FC) & 1;
 
     ctx->singlestep_enabled = 0;
     if ((hflags >> HFLAGS_SE) & 1) {
@@ -8564,6 +8600,7 @@  static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
     switch (is_jmp) {
     case DISAS_TOO_MANY:
         if (use_goto_tb(ctx, nip)) {
+            pmu_count_insns(ctx);
             tcg_gen_goto_tb(0);
             gen_update_nip(ctx, nip);
             tcg_gen_exit_tb(ctx->base.tb, 0);
@@ -8574,6 +8611,14 @@  static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
         gen_update_nip(ctx, nip);
         /* fall through */
     case DISAS_CHAIN:
+        /*
+         * tcg_gen_lookup_and_goto_ptr will exit the TB if
+         * CF_NO_GOTO_PTR is set. Count insns now.
+         */
+        if (ctx->base.tb->flags & CF_NO_GOTO_PTR) {
+            pmu_count_insns(ctx);
+        }
+
         tcg_gen_lookup_and_goto_ptr();
         break;
 
@@ -8581,6 +8626,7 @@  static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
         gen_update_nip(ctx, nip);
         /* fall through */
     case DISAS_EXIT:
+        pmu_count_insns(ctx);
         tcg_gen_exit_tb(NULL, 0);
         break;