diff mbox

ARM64: support access to more performance registers in AA64 mode

Message ID 1417590738-29072-1-git-send-email-csong84@gatech.edu
State New
Headers show

Commit Message

Chengyu Song Dec. 3, 2014, 7:12 a.m. UTC
In AA64 mode, certain system registers are access through MSR/MRS 
instructions instead of MCR/MRC. This patch added more such registers:

/* ARMv8 manual, D8.4.10 */
PMINTENCLR_EL1

/* ARMv8 manual, D8.4.11 */
PMINTENSET_EL1

/* ARMv8 manual, D8.4.12 */
PMOVSCLR_EL0

/* ARMv8 manual, D8.4.14 */
PMSELR_EL0

/* ARMv8 manual, D8.4.15 */
PMSWINC_EL0

/* ARMv8 manual, D8.4.16 */
PMUSERENR_EL0

/* ARMv8 manual, D8.4.17 */
PMXEVCNTR_EL0

/* ARMv8 manual, D8.4.18 */
PMXEVTYPER_EL0

Signed-off-by: Chengyu Song <csong84@gatech.edu>
---
 target-arm/helper.c | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

Comments

Christopher Covington Dec. 3, 2014, 8:25 p.m. UTC | #1
Hi Chengyu,

On 12/03/2014 02:12 AM, Chengyu Song wrote:
> In AA64 mode, certain system registers are access through MSR/MRS 
> instructions instead of MCR/MRC. This patch added more such registers:

If you don't mind sharing, I'm curious what motivated this patch. I have a
particular interest in having `perf stat -e instructions:u echo` inside
qemu-system-aarch64 function correctly, and this looks like a good step in
that direction.

> /* ARMv8 manual, D8.4.10 */
> PMINTENCLR_EL1

It'd probably good to mention the version of the document.

> /* ARMv8 manual, D8.4.11 */
> PMINTENSET_EL1
> 
> /* ARMv8 manual, D8.4.12 */
> PMOVSCLR_EL0
> 
> /* ARMv8 manual, D8.4.14 */
> PMSELR_EL0
> 
> /* ARMv8 manual, D8.4.15 */
> PMSWINC_EL0
> 
> /* ARMv8 manual, D8.4.16 */
> PMUSERENR_EL0
> 
> /* ARMv8 manual, D8.4.17 */
> PMXEVCNTR_EL0
> 
> /* ARMv8 manual, D8.4.18 */
> PMXEVTYPER_EL0
> 
> Signed-off-by: Chengyu Song <csong84@gatech.edu>
> ---
>  target-arm/helper.c | 40 ++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 40 insertions(+)
> 
> diff --git a/target-arm/helper.c b/target-arm/helper.c
> index b74d348..43b5b06 100644
> --- a/target-arm/helper.c
> +++ b/target-arm/helper.c
> @@ -843,15 +843,28 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
>        .accessfn = pmreg_access,
>        .writefn = pmovsr_write,
>        .raw_writefn = raw_write },
> +    { .name = "PMOVSCLR_EL0", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 3, .crn = 9, .crm = 12, .opc1 = 3, .opc2 = 3,

It might be helpful to order the fields opc0, opc1, crn, crm, opc2 to match
the documentation and some (most?) of the other A64 QEMU code.

> +      .access = PL0_RW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr),
> +      .accessfn = pmreg_access,
> +      .writefn = pmovsr_write,
> +      .raw_writefn = raw_write },

Should this contain .type = ARM_CP_NO_MIGRATE, if PMOVSCLR_EL0, PMOVSSET_EL0,
and PMOVSR all refer to the same underlying variable?

Should PMOVSSET_EL0 also be added?

>      /* Unimplemented so WI. */
>      { .name = "PMSWINC", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 4,
>        .access = PL0_W, .accessfn = pmreg_access, .type = ARM_CP_NOP },
> +    { .name = "PMSWINC_EL0", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 3, .crn = 9, .crm = 12, .opc1 = 3, .opc2 = 4,
> +      .access = PL0_W, .accessfn = pmreg_access, .type = ARM_CP_NOP },
>      /* Since we don't implement any events, writing to PMSELR is UNPREDICTABLE.
>       * We choose to RAZ/WI.
>       */
>      { .name = "PMSELR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 5,
>        .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0,
>        .accessfn = pmreg_access },
> +    { .name = "PMSELR_EL0", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 3, .crn = 9, .crm = 12, .opc1 = 3, .opc2 = 5,
> +      .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0,
> +      .accessfn = pmreg_access },
>  #ifndef CONFIG_USER_ONLY
>      { .name = "PMCCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 0,
>        .access = PL0_RW, .resetvalue = 0, .type = ARM_CP_IO,
> @@ -875,24 +888,51 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
>        .fieldoffset = offsetof(CPUARMState, cp15.c9_pmxevtyper),
>        .accessfn = pmreg_access, .writefn = pmxevtyper_write,
>        .raw_writefn = raw_write },
> +    { .name = "PMXEVTYPER_EL0", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 3, .crn = 9, .crm = 13, .opc1 = 3, .opc2 = 1,
> +      .access = PL0_RW,
> +      .fieldoffset = offsetof(CPUARMState, cp15.c9_pmxevtyper),
> +      .accessfn = pmreg_access, .writefn = pmxevtyper_write,
> +      .raw_writefn = raw_write },
>      /* Unimplemented, RAZ/WI. */
>      { .name = "PMXEVCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 2,
>        .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0,
>        .accessfn = pmreg_access },
> +    { .name = "PMXEVCNTR_EL0", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 3, .crn = 9, .crm = 13, .opc1 = 3, .opc2 = 2,
> +      .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0,
> +      .accessfn = pmreg_access },
>      { .name = "PMUSERENR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 0,
>        .access = PL0_R | PL1_RW,
>        .fieldoffset = offsetof(CPUARMState, cp15.c9_pmuserenr),
>        .resetvalue = 0,
>        .writefn = pmuserenr_write, .raw_writefn = raw_write },
> +    { .name = "PMUSERENR_EL0", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 3, .crn = 9, .crm = 14, .opc1 = 3, .opc2 = 0,
> +      .access = PL0_R | PL1_RW,
> +      .fieldoffset = offsetof(CPUARMState, cp15.c9_pmuserenr),
> +      .resetvalue = 0,
> +      .writefn = pmuserenr_write, .raw_writefn = raw_write },

Again, should .type = ARM_CP_NO_MIGRATE be used as this shares a variable with
PMUSERENR?

>      { .name = "PMINTENSET", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 1,
>        .access = PL1_RW,
>        .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
>        .resetvalue = 0,
>        .writefn = pmintenset_write, .raw_writefn = raw_write },
> +    { .name = "PMINTENSET_EL1", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 3, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 1,
> +      .access = PL1_RW,
> +      .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
> +      .resetvalue = 0,
> +      .writefn = pmintenset_write, .raw_writefn = raw_write },

Again, should .type = ARM_CP_NO_MIGRATE be used as this shares a variable with
PMINTSET?

>      { .name = "PMINTENCLR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2,
>        .access = PL1_RW, .type = ARM_CP_NO_MIGRATE,
>        .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
>        .resetvalue = 0, .writefn = pmintenclr_write, },
> +    { .name = "PMINTENCLR_EL1", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 3, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2,
> +      .access = PL1_RW, .type = ARM_CP_NO_MIGRATE,
> +      .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
> +      .resetvalue = 0, .writefn = pmintenclr_write, },
>      { .name = "VBAR", .state = ARM_CP_STATE_BOTH,
>        .opc0 = 3, .crn = 12, .crm = 0, .opc1 = 0, .opc2 = 0,
>        .access = PL1_RW, .writefn = vbar_write,

Thanks,
Chris
Peter Maydell Dec. 3, 2014, 8:45 p.m. UTC | #2
On 3 December 2014 at 20:25, Christopher Covington <cov@codeaurora.org> wrote:
> Hi Chengyu,
>
> On 12/03/2014 02:12 AM, Chengyu Song wrote:

>>      { .name = "PMINTENSET", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 1,
>>        .access = PL1_RW,
>>        .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
>>        .resetvalue = 0,
>>        .writefn = pmintenset_write, .raw_writefn = raw_write },
>> +    { .name = "PMINTENSET_EL1", .state = ARM_CP_STATE_AA64,
>> +      .opc0 = 3, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 1,
>> +      .access = PL1_RW,
>> +      .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
>> +      .resetvalue = 0,
>> +      .writefn = pmintenset_write, .raw_writefn = raw_write },
>
> Again, should .type = ARM_CP_NO_MIGRATE be used as this shares a variable with
> PMINTSET?

I think in this case and PMINTENCLR you can actually share a
single STATE_BOTH regdef, since the crn/crm/opc encodings
line up. Generally we prefer to do that where it works out.

thanks
-- PMM
Chengyu Song Dec. 3, 2014, 8:53 p.m. UTC | #3
Hi Christopher,

> On Dec 3, 2014, at 3:25 PM, Christopher Covington <cov@codeaurora.org> wrote:
> On 12/03/2014 02:12 AM, Chengyu Song wrote:
>> In AA64 mode, certain system registers are access through MSR/MRS 
>> instructions instead of MCR/MRC. This patch added more such registers:
> 
> If you don't mind sharing, I'm curious what motivated this patch. I have a
> particular interest in having `perf stat -e instructions:u echo` inside
> qemu-system-aarch64 function correctly, and this looks like a good step in
> that direction.

I'm playing with the Nexus 9 kernel and it died with a undef instruction exception caused by those MSR/MRS instructions.

>> /* ARMv8 manual, D8.4.10 */
>> PMINTENCLR_EL1
> 
> It'd probably good to mention the version of the document.

The version I use is A.a Non-Confidential Beta, release on 04 September 2013. I don't have any later version.

>> +      .opc0 = 3, .crn = 9, .crm = 12, .opc1 = 3, .opc2 = 3,
> 
> It might be helpful to order the fields opc0, opc1, crn, crm, opc2 to match
> the documentation and some (most?) of the other A64 QEMU code.

Good point. Let me try to resubmit the patch.

> 
>> +      .access = PL0_RW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr),
>> +      .accessfn = pmreg_access,
>> +      .writefn = pmovsr_write,
>> +      .raw_writefn = raw_write },
> 
> Should this contain .type = ARM_CP_NO_MIGRATE, if PMOVSCLR_EL0, PMOVSSET_EL0,
> and PMOVSR all refer to the same underlying variable?

I don't know. I'm not an expert here, so I simply copied from the AA32 version. It would be nicer if you can create a correct patch :)

> Should PMOVSSET_EL0 also be added?

PMOVSSET is not available in QEMU, so I'm not sure how it should be added

> Again, should .type = ARM_CP_NO_MIGRATE be used as this shares a variable with
> PMUSERENR?

Same as above.

> Again, should .type = ARM_CP_NO_MIGRATE be used as this shares a variable with
> PMINTSET?

Same as above.

BTW, I think the current mask (0x7E000000) in the pmccfiltr_write function is not right.

Thanks,
Chengyu
Peter Maydell Dec. 3, 2014, 9:12 p.m. UTC | #4
On 3 December 2014 at 20:53, Chengyu Song <csong84@gatech.edu> wrote:
> The version I use is A.a Non-Confidential Beta, release on 04 September
> 2013. I don't have any later version.

We're up to rev A.d now; you can get it from
http://infocenter.arm.com/help/topic/com.arm.doc.ddi0487a.d/index.html
(if you haven't registered on the ARM website before you'll need
to fill in a form but it's a fairly painless process.)

>>> +      .opc0 = 3, .crn = 9, .crm = 12, .opc1 = 3, .opc2 = 3,
>>
>> It might be helpful to order the fields opc0, opc1, crn, crm, opc2 to match
>> the documentation and some (most?) of the other A64 QEMU code.
>
> Good point. Let me try to resubmit the patch.

The order we're preferring is the v8 ARM ARM order, so
that's .opc0, .opc1, .crn, .crm, .opc2. (Some of our existing
code doesn't follow that, unfortunately.)

>> Should PMOVSSET_EL0 also be added?
>
> PMOVSSET is not available in QEMU, so I'm not sure how it should be added

By implementing the register's behaviour; there's only one
bit of state we care about since we don't model any
perf counters except the cycle counter. The overflow bit
is handled by a set/clear pair of register accessors,
and we also need to check on reads to see if the
cycle counter would have overflowed (in which case we
need to set the bit to 1), using a similar logic to how
we deal with reads of the cycle counter itself. (A similar
check also needs to be done when the cycle counter is
written to or when it is disabled.)

Are there any other bits of new-in-v8 perf counter
functionality we're missing?

thanks
-- PMM
Chengyu Song Dec. 6, 2014, 9:07 p.m. UTC | #5
Hello Peter,

Sorry for replying late.

> We're up to rev A.d now; you can get it from
> http://infocenter.arm.com/help/topic/com.arm.doc.ddi0487a.d/index.html
> (if you haven't registered on the ARM website before you'll need
> to fill in a form but it's a fairly painless process.)

Thank you for the information, I have resubmitted the patch according to the new revision.

> By implementing the register's behaviour; there's only one
> bit of state we care about since we don't model any
> perf counters except the cycle counter. The overflow bit
> is handled by a set/clear pair of register accessors,
> and we also need to check on reads to see if the
> cycle counter would have overflowed (in which case we
> need to set the bit to 1), using a similar logic to how
> we deal with reads of the cycle counter itself. (A similar
> check also needs to be done when the cycle counter is
> written to or when it is disabled.)

Thank you for the instruction, I have added the support for PMOVSSET.

> Are there any other bits of new-in-v8 perf counter
> functionality we're missing?

Yes. But the rest ones are all related to events:

PMCEID0_EL0, PMCEID1_EL0, PMEVCNTR<n>_EL0, PMEVTYPER<n>_EL0, PMXEVCNTR_EL0

and their corresponding AA32 mappings.

Best,
Chengyu
Chengyu Song Dec. 6, 2014, 9:11 p.m. UTC | #6
Hello Christopher,

>> Again, should .type = ARM_CP_NO_MIGRATE be used as this shares a variable with
>> PMUSERENR?
> 

I went back and re-read the code and the manual, but I'm not sure if PMUSERENR should be NO_MIGRATE or not. It's only been read/written with this register, but it also controls EL0 access (i.e., used in pmreg_access). Should we still add the NO_MIGRATE type to it?

Thanks,
Chengyu
diff mbox

Patch

diff --git a/target-arm/helper.c b/target-arm/helper.c
index b74d348..43b5b06 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -843,15 +843,28 @@  static const ARMCPRegInfo v7_cp_reginfo[] = {
       .accessfn = pmreg_access,
       .writefn = pmovsr_write,
       .raw_writefn = raw_write },
+    { .name = "PMOVSCLR_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .crn = 9, .crm = 12, .opc1 = 3, .opc2 = 3,
+      .access = PL0_RW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr),
+      .accessfn = pmreg_access,
+      .writefn = pmovsr_write,
+      .raw_writefn = raw_write },
     /* Unimplemented so WI. */
     { .name = "PMSWINC", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 4,
       .access = PL0_W, .accessfn = pmreg_access, .type = ARM_CP_NOP },
+    { .name = "PMSWINC_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .crn = 9, .crm = 12, .opc1 = 3, .opc2 = 4,
+      .access = PL0_W, .accessfn = pmreg_access, .type = ARM_CP_NOP },
     /* Since we don't implement any events, writing to PMSELR is UNPREDICTABLE.
      * We choose to RAZ/WI.
      */
     { .name = "PMSELR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 5,
       .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0,
       .accessfn = pmreg_access },
+    { .name = "PMSELR_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .crn = 9, .crm = 12, .opc1 = 3, .opc2 = 5,
+      .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+      .accessfn = pmreg_access },
 #ifndef CONFIG_USER_ONLY
     { .name = "PMCCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 0,
       .access = PL0_RW, .resetvalue = 0, .type = ARM_CP_IO,
@@ -875,24 +888,51 @@  static const ARMCPRegInfo v7_cp_reginfo[] = {
       .fieldoffset = offsetof(CPUARMState, cp15.c9_pmxevtyper),
       .accessfn = pmreg_access, .writefn = pmxevtyper_write,
       .raw_writefn = raw_write },
+    { .name = "PMXEVTYPER_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .crn = 9, .crm = 13, .opc1 = 3, .opc2 = 1,
+      .access = PL0_RW,
+      .fieldoffset = offsetof(CPUARMState, cp15.c9_pmxevtyper),
+      .accessfn = pmreg_access, .writefn = pmxevtyper_write,
+      .raw_writefn = raw_write },
     /* Unimplemented, RAZ/WI. */
     { .name = "PMXEVCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 2,
       .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0,
       .accessfn = pmreg_access },
+    { .name = "PMXEVCNTR_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .crn = 9, .crm = 13, .opc1 = 3, .opc2 = 2,
+      .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+      .accessfn = pmreg_access },
     { .name = "PMUSERENR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 0,
       .access = PL0_R | PL1_RW,
       .fieldoffset = offsetof(CPUARMState, cp15.c9_pmuserenr),
       .resetvalue = 0,
       .writefn = pmuserenr_write, .raw_writefn = raw_write },
+    { .name = "PMUSERENR_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .crn = 9, .crm = 14, .opc1 = 3, .opc2 = 0,
+      .access = PL0_R | PL1_RW,
+      .fieldoffset = offsetof(CPUARMState, cp15.c9_pmuserenr),
+      .resetvalue = 0,
+      .writefn = pmuserenr_write, .raw_writefn = raw_write },
     { .name = "PMINTENSET", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 1,
       .access = PL1_RW,
       .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
       .resetvalue = 0,
       .writefn = pmintenset_write, .raw_writefn = raw_write },
+    { .name = "PMINTENSET_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 1,
+      .access = PL1_RW,
+      .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
+      .resetvalue = 0,
+      .writefn = pmintenset_write, .raw_writefn = raw_write },
     { .name = "PMINTENCLR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2,
       .access = PL1_RW, .type = ARM_CP_NO_MIGRATE,
       .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
       .resetvalue = 0, .writefn = pmintenclr_write, },
+    { .name = "PMINTENCLR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2,
+      .access = PL1_RW, .type = ARM_CP_NO_MIGRATE,
+      .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
+      .resetvalue = 0, .writefn = pmintenclr_write, },
     { .name = "VBAR", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .crn = 12, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .writefn = vbar_write,