diff mbox

[v2,2/2] target-arm: Fix arm_el_is_aa64() function to support EL2 and EL3

Message ID 1441208342-10601-3-git-send-email-afarallax@yandex.ru
State New
Headers show

Commit Message

Sergey Sorokin Sept. 2, 2015, 3:39 p.m. UTC
Function arm_el_is_aa64() was fixed to support EL2 and EL3.
It is needed for a future support of EL2 and/or EL3,
and 32 bit EL1/EL2 support for ARMv8 cpu.
ARM_FEATURE_AARCH64 flag means that the highest exception level is
in AArch64 state. The state of lower exception levels is controlled
by the HCR_EL2.RW, SCR_EL3.RW and SCR_EL3.NS bits.
If EL2 or EL3 is not permitted by the appropriate ARM_FEATURE flag,
then the function arm_el_is_aa64() aborts on an attempt to get
the bitness of this EL. So we need to be sure that EL passed to
the function is enabled. Therefore some conditional statements was
slightly changed to check it.

Signed-off-by: Sergey Sorokin <afarallax@yandex.ru>
---
Changes since previous version:
* Some typos was fixed.
* Extended comments was added.
* The initial patch was divided in two parts.
* The erroneous changes in arm_excp_unmasked() function was fixed.

 hw/arm/boot.c        |  4 ++++
 target-arm/cpu.c     | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 target-arm/cpu.h     | 51 ++++++++++++++++++++++++++++++++-------------
 target-arm/helper.c  | 23 +++++++++++++++++++-
 target-arm/machine.c |  5 +++++
 5 files changed, 126 insertions(+), 16 deletions(-)

Comments

Peter Maydell Sept. 8, 2015, 1:44 p.m. UTC | #1
On 2 September 2015 at 16:39, Sergey Sorokin <afarallax@yandex.ru> wrote:
> Function arm_el_is_aa64() was fixed to support EL2 and EL3.
> It is needed for a future support of EL2 and/or EL3,
> and 32 bit EL1/EL2 support for ARMv8 cpu.
> ARM_FEATURE_AARCH64 flag means that the highest exception level is
> in AArch64 state. The state of lower exception levels is controlled
> by the HCR_EL2.RW, SCR_EL3.RW and SCR_EL3.NS bits.
> If EL2 or EL3 is not permitted by the appropriate ARM_FEATURE flag,
> then the function arm_el_is_aa64() aborts on an attempt to get
> the bitness of this EL. So we need to be sure that EL passed to
> the function is enabled. Therefore some conditional statements was
> slightly changed to check it.
>
> Signed-off-by: Sergey Sorokin <afarallax@yandex.ru>
> ---
> Changes since previous version:
> * Some typos was fixed.
> * Extended comments was added.
> * The initial patch was divided in two parts.
> * The erroneous changes in arm_excp_unmasked() function was fixed.
>
>  hw/arm/boot.c        |  4 ++++
>  target-arm/cpu.c     | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  target-arm/cpu.h     | 51 ++++++++++++++++++++++++++++++++-------------
>  target-arm/helper.c  | 23 +++++++++++++++++++-
>  target-arm/machine.c |  5 +++++
>  5 files changed, 126 insertions(+), 16 deletions(-)
>
> diff --git a/hw/arm/boot.c b/hw/arm/boot.c
> index 5b969cd..e5d00af 100644
> --- a/hw/arm/boot.c
> +++ b/hw/arm/boot.c
> @@ -486,6 +486,10 @@ static void do_cpu_reset(void *opaque)
>                  if (!info->secure_boot) {
>                      /* Linux expects non-secure state */
>                      env->cp15.scr_el3 |= SCR_NS;
> +                    /* EL bitness can depend on SCR.NS bit, so we need
> +                     * recalc it.
> +                     */
> +                    calc_el_bitness(env);
>                  }
>              }
>
> diff --git a/target-arm/cpu.c b/target-arm/cpu.c
> index cc6c6f3..af4beab 100644
> --- a/target-arm/cpu.c
> +++ b/target-arm/cpu.c
> @@ -100,6 +100,64 @@ static void cp_reg_check_reset(gpointer key, gpointer value,  gpointer opaque)
>      assert(oldvalue == newvalue);
>  }
>
> +/* This function must be called if SCR_EL3.RW, SCR_EL3.NS
> + * or HCR_EL2.RW has been changed.
> + */
> +void calc_el_bitness(CPUARMState *env)
> +{
> +    /* -1 is invalid value */
> +    env->el_is_aa64[0] = -1;
> +
> +    /* If the highest EL is not AArch64, then all ELs are AArch32. */
> +    if (!arm_feature(env, ARM_FEATURE_AARCH64)) {
> +        if (arm_feature(env, ARM_FEATURE_EL3)) {
> +            env->el_is_aa64[3] = 0;
> +        } else {
> +            env->el_is_aa64[3] = -1;
> +        }
> +
> +        if (arm_feature(env, ARM_FEATURE_EL2)) {
> +            env->el_is_aa64[2] = 0;
> +        } else {
> +            env->el_is_aa64[2] = -1;
> +        }
> +
> +        env->el_is_aa64[1] = 0;
> +        return;
> +    }
> +
> +    if (arm_feature(env, ARM_FEATURE_EL3)) {
> +        env->el_is_aa64[3] = 1;
> +        int8_t scr_rw = env->cp15.scr_el3 & SCR_RW ? 1 : 0;
> +
> +        if (arm_feature(env, ARM_FEATURE_EL2) && (env->cp15.scr_el3 & SCR_NS)) {
> +            if (scr_rw) {
> +                env->el_is_aa64[2] = 1;
> +                env->el_is_aa64[1] = env->cp15.hcr_el2 & HCR_RW ? 1 : 0;
> +            } else {
> +                env->el_is_aa64[2] = 0;
> +                env->el_is_aa64[1] = 0;
> +            }
> +        } else {
> +            /* If there is no EL2 or if secure. */
> +            env->el_is_aa64[2] = -1;
> +            env->el_is_aa64[1] = scr_rw;
> +        }
> +    } else {
> +        env->el_is_aa64[3] = -1;
> +
> +        if (arm_feature(env, ARM_FEATURE_EL2)) {
> +            /* The highest EL is EL2, and it is AArch64. */
> +            env->el_is_aa64[2] = 1;
> +            env->el_is_aa64[1] = env->cp15.hcr_el2 & HCR_RW ? 1 : 0;
> +        } else {
> +            /* The highest EL is EL1, and it is AArch64. */
> +            env->el_is_aa64[2] = -1;
> +            env->el_is_aa64[1] = 1;
> +        }
> +    }
> +}

This is definitely rather overlarge for what it's doing. I think
it's sufficient to write:


void calc_el_bitness(CPUARMState *env)
{
    bool aa64 = arm_feature(env, ARM_FEATURE_AARCH64);

    if (arm_feature(env, ARM_FEATURE_EL3)) {
        env->el_is_aa64[3] = aa64;
        aa64 = aa64 && (env->cp15.scr_el3 & SCR_RW);
    } else {
        env->el_is_aa64[3] = -1;
    }

    if (arm_feature(env, ARM_FEATURE_EL2) && !arm_is_secure_below_el3(env)) {
        env->el_is_aa64[2] = aa64;
        aa64 = aa64 && (env->cp15.hcr_el2 & HCR_RW);
    } else {
        env->el_is_aa64[2] = -1;
    }

    env->el_is_aa64[1] = aa64;
}

And in fact this is pretty simple, so we could really just have

static inline bool arm_el_is_aa64(CPUARMState *env, int el)
{
    /* This isn't valid for EL0 (if we're in EL0, is_a64() is what you want,
     * and if we're not in EL0 then the state of EL0 isn't well defined.)
     */
    assert(el >= 1 && el <= 3);
    bool aa64 = arm_feature(env, ARM_FEATURE_AARCH64);

    if (el == 3) {
        return aa64;
    }

    if (arm_feature(env, ARM_FEATURE_EL3)) {
        aa64 = aa64 && (env->cp15.scr_el3 & SCR_RW);
    }

    if (el == 2) {
        return aa64;
    }

    if (arm_feature(env, ARM_FEATURE_EL2) && !arm_is_secure_below_el3(env)) {
        aa64 = aa64 && (env->cp15.hcr_el2 & HCR_RW);
    }

    return aa64;
}

and not bother with the caching at all. I think I'd prefer that
unless you have performance figures showing the caching makes
a noticeable difference.

>  /* Function for determing whether guest cp register reads and writes should
> @@ -1012,11 +1031,11 @@ static inline bool access_secure_reg(CPUARMState *env)
>   */
>  #define A32_BANKED_CURRENT_REG_GET(_env, _regname)        \
>      A32_BANKED_REG_GET((_env), _regname,                \
> -                       ((!arm_el_is_aa64((_env), 3) && arm_is_secure(_env))))
> +                       (arm_is_secure(_env) && !arm_el_is_aa64((_env), 3)))
>
>  #define A32_BANKED_CURRENT_REG_SET(_env, _regname, _val)                       \
>      A32_BANKED_REG_SET((_env), _regname,                                    \
> -                       ((!arm_el_is_aa64((_env), 3) && arm_is_secure(_env))),  \
> +                       (arm_is_secure(_env) && !arm_el_is_aa64((_env), 3)), \
>                         (_val))
>
>  void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf);

This hunk, and the two I've quoted below, are trying to avoid calling
arm_el_is_aa64() for an unimplemented EL. I think it makes sense
to do that, but they should be a different patch from the one
that changes the implementation of arm_el_is_aa64().

> @@ -1583,7 +1602,9 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
>       * interrupt.
>       */
>      if ((target_el > cur_el) && (target_el != 1)) {
> -        if (arm_el_is_aa64(env, 3) || ((scr || hcr) && (!secure))) {
> +        /* ARM_FEATURE_AARCH64 enabled means the higher EL is AArch64. */

"highest"

> +        if (arm_feature(env, ARM_FEATURE_AARCH64) ||
> +            ((scr || hcr) && (!secure))) {
>              unmasked = 1;
>          }
>      }

I think this change is correct, though it took me a while to
convince myself of it. (Roughly, I think that in the case where
we have AArch64 EL2 but not EL3 then the (hcr && !secure) half
of the condition is going to be true anyway, so it doesn't matter
what the test on the left side of the || is.)

> @@ -5088,7 +5104,8 @@ uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx,
>      int scr;
>      int hcr;
>      int target_el;
> -    int is64 = arm_el_is_aa64(env, 3);
> +    /* Is the higher EL AArch64? */

"highest"

> +    int is64 = arm_feature(env, ARM_FEATURE_AARCH64);
>
>      switch (excp_idx) {
>      case EXCP_IRQ:

I think this change is insufficient. If there is no EL3 but
there is an EL2 and EL2 is AArch64, then we should route
exceptions in the same way as if EL3 existed and was AArch64.
For this to be the case we need to also get rw correct (ie
reflecting the width of EL2). So something like:

    if (arm_feature(env, ARM_FEATURE_EL3)) {
        rw = ((env->cp15.scr_el3 & SCR_RW) == SCR_RW);
    } else {
        rw = is64;
    }

> diff --git a/target-arm/machine.c b/target-arm/machine.c
> index 32adfe7..e5bfb59 100644
> --- a/target-arm/machine.c
> +++ b/target-arm/machine.c
> @@ -265,6 +265,11 @@ static int cpu_post_load(void *opaque, int version_id)
>          }
>      }
>
> +    /* It is not nesessary, because this call was made on
> +     * HCR and SCR writes above, but just to make sure.
> +     */
> +    calc_el_bitness(&cpu->env);

We should generally be confident about our code. Either
this call *is* necessary, in which case the comment should
say why, or it is *not* necessary, in which case it shouldn't
be here (but you can have a comment saying why it isn't).
In this case I think you are correct that it's not needed.
(Some of the only-happens-on-value-change behaviour of register
writes needs care in register loading, because you want
the behaviour to happen anyway even if the old stale value
in the CPU state happened to be the same as the new value
from incoming migration data. But in this case we're OK
because if the incoming migration data happens to be the
same as the old data for all of the SCR and HCR bits then
the old cached el_is_aa64[] values will all still be correct,
even though we will never actually call calc_el_bitness().)

> +
>      hw_breakpoint_update_all(cpu);
>      hw_watchpoint_update_all(cpu);

thanks
-- PMM
Sergey Sorokin Sept. 9, 2015, 3:10 p.m. UTC | #2
08.09.2015, 16:44, "Peter Maydell" <peter.maydell@linaro.org>:
>On 2 September 2015 at 16:39, Sergey Sorokin <afarallax@yandex.ru> wrote:
>> Function arm_el_is_aa64() was fixed to support EL2 and EL3.
>> It is needed for a future support of EL2 and/or EL3,
>> and 32 bit EL1/EL2 support for ARMv8 cpu.
>> ARM_FEATURE_AARCH64 flag means that the highest exception level is
>> in AArch64 state. The state of lower exception levels is controlled
>> by the HCR_EL2.RW, SCR_EL3.RW and SCR_EL3.NS bits.
>> If EL2 or EL3 is not permitted by the appropriate ARM_FEATURE flag,
>> then the function arm_el_is_aa64() aborts on an attempt to get
>> the bitness of this EL. So we need to be sure that EL passed to
>> the function is enabled. Therefore some conditional statements was
>> slightly changed to check it.
>>
>> Signed-off-by: Sergey Sorokin <afarallax@yandex.ru>
>> ---
>> Changes since previous version:
>> * Some typos was fixed.
>> * Extended comments was added.
>> * The initial patch was divided in two parts.
>> * The erroneous changes in arm_excp_unmasked() function was fixed.
>>
>>  hw/arm/boot.c        |  4 ++++
>>  target-arm/cpu.c     | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  target-arm/cpu.h     | 51 ++++++++++++++++++++++++++++++++-------------
>>  target-arm/helper.c  | 23 +++++++++++++++++++-
>>  target-arm/machine.c |  5 +++++
>>  5 files changed, 126 insertions(+), 16 deletions(-)
>>
>> diff --git a/hw/arm/boot.c b/hw/arm/boot.c
>> index 5b969cd..e5d00af 100644
>> --- a/hw/arm/boot.c
>> +++ b/hw/arm/boot.c
>> @@ -486,6 +486,10 @@ static void do_cpu_reset(void *opaque)
>>                  if (!info->secure_boot) {
>>                      /* Linux expects non-secure state */
>>                      env->cp15.scr_el3 |= SCR_NS;
>> +                    /* EL bitness can depend on SCR.NS bit, so we need
>> +                     * recalc it.
>> +                     */
>> +                    calc_el_bitness(env);
>>                  }
>>              }
>>
>> diff --git a/target-arm/cpu.c b/target-arm/cpu.c
>> index cc6c6f3..af4beab 100644
>> --- a/target-arm/cpu.c
>> +++ b/target-arm/cpu.c
>> @@ -100,6 +100,64 @@ static void cp_reg_check_reset(gpointer key, gpointer value,  gpointer opaque)
>>      assert(oldvalue == newvalue);
>>  }
>>
>> +/* This function must be called if SCR_EL3.RW, SCR_EL3.NS
>> + * or HCR_EL2.RW has been changed.
>> + */
>> +void calc_el_bitness(CPUARMState *env)
>> +{
>> +    /* -1 is invalid value */
>> +    env->el_is_aa64[0] = -1;
>> +
>> +    /* If the highest EL is not AArch64, then all ELs are AArch32. */
>> +    if (!arm_feature(env, ARM_FEATURE_AARCH64)) {
>> +        if (arm_feature(env, ARM_FEATURE_EL3)) {
>> +            env->el_is_aa64[3] = 0;
>> +        } else {
>> +            env->el_is_aa64[3] = -1;
>> +        }
>> +
>> +        if (arm_feature(env, ARM_FEATURE_EL2)) {
>> +            env->el_is_aa64[2] = 0;
>> +        } else {
>> +            env->el_is_aa64[2] = -1;
>> +        }
>> +
>> +        env->el_is_aa64[1] = 0;
>> +        return;
>> +    }
>> +
>> +    if (arm_feature(env, ARM_FEATURE_EL3)) {
>> +        env->el_is_aa64[3] = 1;
>> +        int8_t scr_rw = env->cp15.scr_el3 & SCR_RW ? 1 : 0;
>> +
>> +        if (arm_feature(env, ARM_FEATURE_EL2) && (env->cp15.scr_el3 & SCR_NS)) {
>> +            if (scr_rw) {
>> +                env->el_is_aa64[2] = 1;
>> +                env->el_is_aa64[1] = env->cp15.hcr_el2 & HCR_RW ? 1 : 0;
>> +            } else {
>> +                env->el_is_aa64[2] = 0;
>> +                env->el_is_aa64[1] = 0;
>> +            }
>> +        } else {
>> +            /* If there is no EL2 or if secure. */
>> +            env->el_is_aa64[2] = -1;
>> +            env->el_is_aa64[1] = scr_rw;
>> +        }
>> +    } else {
>> +        env->el_is_aa64[3] = -1;
>> +
>> +        if (arm_feature(env, ARM_FEATURE_EL2)) {
>> +            /* The highest EL is EL2, and it is AArch64. */
>> +            env->el_is_aa64[2] = 1;
>> +            env->el_is_aa64[1] = env->cp15.hcr_el2 & HCR_RW ? 1 : 0;
>> +        } else {
>> +            /* The highest EL is EL1, and it is AArch64. */
>> +            env->el_is_aa64[2] = -1;
>> +            env->el_is_aa64[1] = 1;
>> +        }
>> +    }
>> +}
>
>This is definitely rather overlarge for what it's doing. I think
>it's sufficient to write:
>
>
>void calc_el_bitness(CPUARMState *env)
>{
>    bool aa64 = arm_feature(env, ARM_FEATURE_AARCH64);
>
>    if (arm_feature(env, ARM_FEATURE_EL3)) {
>        env->el_is_aa64[3] = aa64;
>        aa64 = aa64 && (env->cp15.scr_el3 & SCR_RW);
>    } else {
>        env->el_is_aa64[3] = -1;
>    }
>
>    if (arm_feature(env, ARM_FEATURE_EL2) && !arm_is_secure_below_el3(env)) {
>        env->el_is_aa64[2] = aa64;
>        aa64 = aa64 && (env->cp15.hcr_el2 & HCR_RW);
>    } else {
>        env->el_is_aa64[2] = -1;
>    }
>
>    env->el_is_aa64[1] = aa64;
>}

I agree, this version looks better.

>
>And in fact this is pretty simple, so we could really just have
>
>static inline bool arm_el_is_aa64(CPUARMState *env, int el)
>{
>    /* This isn't valid for EL0 (if we're in EL0, is_a64() is what you want,
>     * and if we're not in EL0 then the state of EL0 isn't well defined.)
>     */
>    assert(el >= 1 && el <= 3);
>    bool aa64 = arm_feature(env, ARM_FEATURE_AARCH64);
>
>    if (el == 3) {
>        return aa64;
>    }
>
>    if (arm_feature(env, ARM_FEATURE_EL3)) {
>        aa64 = aa64 && (env->cp15.scr_el3 & SCR_RW);
>    }
>
>    if (el == 2) {
>        return aa64;
>    }
>
>    if (arm_feature(env, ARM_FEATURE_EL2) && !arm_is_secure_below_el3(env)) {
>        aa64 = aa64 && (env->cp15.hcr_el2 & HCR_RW);
>    }
>
>    return aa64;
>}

I supposed that the function should abort on attempt
to call it with unimplemented EL. Just to avoid bugs.
This your version does not abort in such case.
With the check it will not be so short.

>
>and not bother with the caching at all. I think I'd prefer that
>unless you have performance figures showing the caching makes
>a noticeable difference.

arm_el_is_aa64() is used (maybe indirectly) in address translations,
in do_interrupt() functions, some sysreg access,
cpu_get_tb_cpu_state () and arm_current_el() functions, etc.
And obviously it will be used more widely in future.
I think it's enough to show the caching makes sense.
Nevertheless, if you disagree with me, I could remove the caching
from the patch.

>>  /* Function for determing whether guest cp register reads and writes should
>> @@ -1012,11 +1031,11 @@ static inline bool access_secure_reg(CPUARMState *env)
>>   */
>>  #define A32_BANKED_CURRENT_REG_GET(_env, _regname)        \
>>      A32_BANKED_REG_GET((_env), _regname,                \
>> -                       ((!arm_el_is_aa64((_env), 3) && arm_is_secure(_env))))
>> +                       (arm_is_secure(_env) && !arm_el_is_aa64((_env), 3)))
>>
>>  #define A32_BANKED_CURRENT_REG_SET(_env, _regname, _val)                       \
>>      A32_BANKED_REG_SET((_env), _regname,                                    \
>> -                       ((!arm_el_is_aa64((_env), 3) && arm_is_secure(_env))),  \
>> +                       (arm_is_secure(_env) && !arm_el_is_aa64((_env), 3)), \
>>                         (_val))
>>
>>  void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf);
>
>This hunk, and the two I've quoted below, are trying to avoid calling
>arm_el_is_aa64() for an unimplemented EL. I think it makes sense
>to do that, but they should be a different patch from the one
>that changes the implementation of arm_el_is_aa64().
>
>> @@ -1583,7 +1602,9 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
>>       * interrupt.
>>       */
>>      if ((target_el > cur_el) && (target_el != 1)) {
>> -        if (arm_el_is_aa64(env, 3) || ((scr || hcr) && (!secure))) {
>> +        /* ARM_FEATURE_AARCH64 enabled means the higher EL is AArch64. */
>
>"highest"
>
>> +        if (arm_feature(env, ARM_FEATURE_AARCH64) ||
>> +            ((scr || hcr) && (!secure))) {
>>              unmasked = 1;
>>          }
>>      }
>
>I think this change is correct, though it took me a while to
>convince myself of it. (Roughly, I think that in the case where
>we have AArch64 EL2 but not EL3 then the (hcr && !secure) half
>of the condition is going to be true anyway, so it doesn't matter
>what the test on the left side of the || is.)
>

The change makes the check more intelligible in case of unimplemented EL3.
Moreover, arm_el_is_aa64() will abort with el argument == 3
if there is no EL3 in current configuration.

>> @@ -5088,7 +5104,8 @@ uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx,
>>      int scr;
>>      int hcr;
>>      int target_el;
>> -    int is64 = arm_el_is_aa64(env, 3);
>> +    /* Is the higher EL AArch64? */
>
>"highest"
>
>> +    int is64 = arm_feature(env, ARM_FEATURE_AARCH64);
>>
>>      switch (excp_idx) {
>>      case EXCP_IRQ:
>
>I think this change is insufficient. If there is no EL3 but
>there is an EL2 and EL2 is AArch64, then we should route
>exceptions in the same way as if EL3 existed and was AArch64.
>For this to be the case we need to also get rw correct (ie
>reflecting the width of EL2). So something like:
>
>    if (arm_feature(env, ARM_FEATURE_EL3)) {
>        rw = ((env->cp15.scr_el3 & SCR_RW) == SCR_RW);
>    } else {
>        rw = is64;
>    }
>

Yes, I agree.

>> diff --git a/target-arm/machine.c b/target-arm/machine.c
>> index 32adfe7..e5bfb59 100644
>> --- a/target-arm/machine.c
>> +++ b/target-arm/machine.c
>> @@ -265,6 +265,11 @@ static int cpu_post_load(void *opaque, int version_id)
>>          }
>>      }
>>
>> +    /* It is not nesessary, because this call was made on
>> +     * HCR and SCR writes above, but just to make sure.
>> +     */
>> +    calc_el_bitness(&cpu->env);
>
>We should generally be confident about our code. Either
>this call *is* necessary, in which case the comment should
>say why, or it is *not* necessary, in which case it shouldn't
>be here (but you can have a comment saying why it isn't).
>In this case I think you are correct that it's not needed.
>(Some of the only-happens-on-value-change behaviour of register
>writes needs care in register loading, because you want
>the behaviour to happen anyway even if the old stale value
>in the CPU state happened to be the same as the new value
>from incoming migration data. But in this case we're OK
>because if the incoming migration data happens to be the
>same as the old data for all of the SCR and HCR bits then
>the old cached el_is_aa64[] values will all still be correct,
>even though we will never actually call calc_el_bitness().)
>

I have made this change acording to your last comment
on the first version of the patch. I will be glad to remove it :)
Peter Maydell Sept. 9, 2015, 5:14 p.m. UTC | #3
On 9 September 2015 at 16:10, Sergey Sorokin <afarallax@yandex.ru> wrote:
> 08.09.2015, 16:44, "Peter Maydell" <peter.maydell@linaro.org>:
>>And in fact this is pretty simple, so we could really just have
>>
>>static inline bool arm_el_is_aa64(CPUARMState *env, int el)
>>{
>>    /* This isn't valid for EL0 (if we're in EL0, is_a64() is what you want,
>>     * and if we're not in EL0 then the state of EL0 isn't well defined.)
>>     */
>>    assert(el >= 1 && el <= 3);
>>    bool aa64 = arm_feature(env, ARM_FEATURE_AARCH64);
>>
>>    if (el == 3) {
>>        return aa64;
>>    }
>>
>>    if (arm_feature(env, ARM_FEATURE_EL3)) {
>>        aa64 = aa64 && (env->cp15.scr_el3 & SCR_RW);
>>    }
>>
>>    if (el == 2) {
>>        return aa64;
>>    }
>>
>>    if (arm_feature(env, ARM_FEATURE_EL2) && !arm_is_secure_below_el3(env)) {
>>        aa64 = aa64 && (env->cp15.hcr_el2 & HCR_RW);
>>    }
>>
>>    return aa64;
>>}
>
> I supposed that the function should abort on attempt
> to call it with unimplemented EL. Just to avoid bugs.
> This your version does not abort in such case.
> With the check it will not be so short.

Assertions are nice when they're easy, but we don't have to
necessarily make the code more complex purely in order to try
to provide a place to assert. In practice I would expect that
pretty much all 64-bit CPUs we emulate will implement both
EL2 and EL3 anyway.

>>and not bother with the caching at all. I think I'd prefer that
>>unless you have performance figures showing the caching makes
>>a noticeable difference.
>
> arm_el_is_aa64() is used (maybe indirectly) in address translations,
> in do_interrupt() functions, some sysreg access,
> cpu_get_tb_cpu_state () and arm_current_el() functions, etc.
> And obviously it will be used more widely in future.
> I think it's enough to show the caching makes sense.
> Nevertheless, if you disagree with me, I could remove the caching
> from the patch.

If you can show that the cacheing is worth having (by showing
an actual speedup on some workload), then that's fine. But
it does complicate the code, so I'd prefer not to unless we
have some evidence for the benefit side of things. Gut feeling
about "I think this will be faster" is often inaccurate.

>>>  /* Function for determing whether guest cp register reads and writes should
>>> @@ -1012,11 +1031,11 @@ static inline bool access_secure_reg(CPUARMState *env)
>>>   */
>>>  #define A32_BANKED_CURRENT_REG_GET(_env, _regname)        \
>>>      A32_BANKED_REG_GET((_env), _regname,                \
>>> -                       ((!arm_el_is_aa64((_env), 3) && arm_is_secure(_env))))
>>> +                       (arm_is_secure(_env) && !arm_el_is_aa64((_env), 3)))
>>>
>>>  #define A32_BANKED_CURRENT_REG_SET(_env, _regname, _val)                       \
>>>      A32_BANKED_REG_SET((_env), _regname,                                    \
>>> -                       ((!arm_el_is_aa64((_env), 3) && arm_is_secure(_env))),  \
>>> +                       (arm_is_secure(_env) && !arm_el_is_aa64((_env), 3)), \
>>>                         (_val))
>>>
>>>  void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf);
>>
>>This hunk, and the two I've quoted below, are trying to avoid calling
>>arm_el_is_aa64() for an unimplemented EL. I think it makes sense
>>to do that, but they should be a different patch from the one
>>that changes the implementation of arm_el_is_aa64().
>>
>>> @@ -1583,7 +1602,9 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
>>>       * interrupt.
>>>       */
>>>      if ((target_el > cur_el) && (target_el != 1)) {
>>> -        if (arm_el_is_aa64(env, 3) || ((scr || hcr) && (!secure))) {
>>> +        /* ARM_FEATURE_AARCH64 enabled means the higher EL is AArch64. */
>>
>>"highest"
>>
>>> +        if (arm_feature(env, ARM_FEATURE_AARCH64) ||
>>> +            ((scr || hcr) && (!secure))) {
>>>              unmasked = 1;
>>>          }
>>>      }
>>
>>I think this change is correct, though it took me a while to
>>convince myself of it. (Roughly, I think that in the case where
>>we have AArch64 EL2 but not EL3 then the (hcr && !secure) half
>>of the condition is going to be true anyway, so it doesn't matter
>>what the test on the left side of the || is.)
>>
>
> The change makes the check more intelligible in case of unimplemented EL3.
> Moreover, arm_el_is_aa64() will abort with el argument == 3
> if there is no EL3 in current configuration.

Yeah, I see why you've done it. But it's only "more intelligible"
if you can match the code up with what the spec requires...
As I say, I think it's correct, it's just not very obvious.

thanks
-- PMM
diff mbox

Patch

diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 5b969cd..e5d00af 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -486,6 +486,10 @@  static void do_cpu_reset(void *opaque)
                 if (!info->secure_boot) {
                     /* Linux expects non-secure state */
                     env->cp15.scr_el3 |= SCR_NS;
+                    /* EL bitness can depend on SCR.NS bit, so we need
+                     * recalc it.
+                     */
+                    calc_el_bitness(env);
                 }
             }
 
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index cc6c6f3..af4beab 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -100,6 +100,64 @@  static void cp_reg_check_reset(gpointer key, gpointer value,  gpointer opaque)
     assert(oldvalue == newvalue);
 }
 
+/* This function must be called if SCR_EL3.RW, SCR_EL3.NS
+ * or HCR_EL2.RW has been changed.
+ */
+void calc_el_bitness(CPUARMState *env)
+{
+    /* -1 is invalid value */
+    env->el_is_aa64[0] = -1;
+
+    /* If the highest EL is not AArch64, then all ELs are AArch32. */
+    if (!arm_feature(env, ARM_FEATURE_AARCH64)) {
+        if (arm_feature(env, ARM_FEATURE_EL3)) {
+            env->el_is_aa64[3] = 0;
+        } else {
+            env->el_is_aa64[3] = -1;
+        }
+
+        if (arm_feature(env, ARM_FEATURE_EL2)) {
+            env->el_is_aa64[2] = 0;
+        } else {
+            env->el_is_aa64[2] = -1;
+        }
+
+        env->el_is_aa64[1] = 0;
+        return;
+    }
+
+    if (arm_feature(env, ARM_FEATURE_EL3)) {
+        env->el_is_aa64[3] = 1;
+        int8_t scr_rw = env->cp15.scr_el3 & SCR_RW ? 1 : 0;
+
+        if (arm_feature(env, ARM_FEATURE_EL2) && (env->cp15.scr_el3 & SCR_NS)) {
+            if (scr_rw) {
+                env->el_is_aa64[2] = 1;
+                env->el_is_aa64[1] = env->cp15.hcr_el2 & HCR_RW ? 1 : 0;
+            } else {
+                env->el_is_aa64[2] = 0;
+                env->el_is_aa64[1] = 0;
+            }
+        } else {
+            /* If there is no EL2 or if secure. */
+            env->el_is_aa64[2] = -1;
+            env->el_is_aa64[1] = scr_rw;
+        }
+    } else {
+        env->el_is_aa64[3] = -1;
+
+        if (arm_feature(env, ARM_FEATURE_EL2)) {
+            /* The highest EL is EL2, and it is AArch64. */
+            env->el_is_aa64[2] = 1;
+            env->el_is_aa64[1] = env->cp15.hcr_el2 & HCR_RW ? 1 : 0;
+        } else {
+            /* The highest EL is EL1, and it is AArch64. */
+            env->el_is_aa64[2] = -1;
+            env->el_is_aa64[1] = 1;
+        }
+    }
+}
+
 /* CPUClass::reset() */
 static void arm_cpu_reset(CPUState *s)
 {
@@ -151,6 +209,7 @@  static void arm_cpu_reset(CPUState *s)
         env->cp15.cpacr_el1 = deposit64(env->cp15.cpacr_el1, 20, 4, 0xf);
 #endif
     }
+    calc_el_bitness(env);
 
 #if defined(CONFIG_USER_ONLY)
     env->uncached_cpsr = ARM_CPU_MODE_USR;
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 31825d3..222ad3d 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -177,6 +177,8 @@  typedef struct CPUARMState {
     uint64_t elr_el[4]; /* AArch64 exception link regs  */
     uint64_t sp_el[4]; /* AArch64 banked stack pointers */
 
+    int8_t el_is_aa64[4]; /* Cached flags describing the EL bitness */
+
     /* System control coprocessor (cp15) */
     struct {
         uint32_t c0_cpuid;
@@ -924,7 +926,7 @@  static inline bool arm_is_secure_below_el3(CPUARMState *env)
     if (arm_feature(env, ARM_FEATURE_EL3)) {
         return !(env->cp15.scr_el3 & SCR_NS);
     } else {
-        /* If EL2 is not supported then the secure state is implementation
+        /* If EL3 is not supported then the secure state is implementation
          * defined, in which case QEMU defaults to non-secure.
          */
         return false;
@@ -959,21 +961,38 @@  static inline bool arm_is_secure(CPUARMState *env)
 }
 #endif
 
-/* Return true if the specified exception level is running in AArch64 state. */
+/**
+ * calc_el_bitness:
+ * @env: The CPU where we need to recalculate the cached EL bitness.
+ *
+ * Calculates the bitness for each enabled EL and caches the values.
+ * The value -1 means the EL is not enabled in current configuration.
+ *
+ * This function must be called if SCR_EL3.RW, SCR_EL3.NS
+ * or HCR_EL2.RW has been changed.
+ */
+void calc_el_bitness(CPUARMState *env);
+
+/**
+ * arm_el_is_aa64:
+ * @env: The CPU where we want to know EL bitness.
+ * @el: The EL whose bitness we want to know, except EL0.
+ *
+ * Note: This function aborts if @el is not enabled in current configuration,
+ * or if @el == 0.
+ *
+ * Returns: %true if the specified exception level is running in AArch64 state;
+ * %false otherwise.
+ */
 static inline bool arm_el_is_aa64(CPUARMState *env, int el)
 {
-    /* We don't currently support EL2, and this isn't valid for EL0
-     * (if we're in EL0, is_a64() is what you want, and if we're not in EL0
-     * then the state of EL0 isn't well defined.)
+    /* This isn't valid for EL0 (if we're in EL0, is_a64() is what you want,
+     * and if we're not in EL0 then the state of EL0 isn't well defined.)
      */
-    assert(el == 1 || el == 3);
+    assert(el >= 1 && el <= 3);
+    assert(env->el_is_aa64[el] >= 0);
 
-    /* AArch64-capable CPUs always run with EL1 in AArch64 mode. This
-     * is a QEMU-imposed simplification which we may wish to change later.
-     * If we in future support EL2 and/or EL3, then the state of lower
-     * exception levels is controlled by the HCR.RW and SCR.RW bits.
-     */
-    return arm_feature(env, ARM_FEATURE_AARCH64);
+    return env->el_is_aa64[el];
 }
 
 /* Function for determing whether guest cp register reads and writes should
@@ -1012,11 +1031,11 @@  static inline bool access_secure_reg(CPUARMState *env)
  */
 #define A32_BANKED_CURRENT_REG_GET(_env, _regname)        \
     A32_BANKED_REG_GET((_env), _regname,                \
-                       ((!arm_el_is_aa64((_env), 3) && arm_is_secure(_env))))
+                       (arm_is_secure(_env) && !arm_el_is_aa64((_env), 3)))
 
 #define A32_BANKED_CURRENT_REG_SET(_env, _regname, _val)                       \
     A32_BANKED_REG_SET((_env), _regname,                                    \
-                       ((!arm_el_is_aa64((_env), 3) && arm_is_secure(_env))),  \
+                       (arm_is_secure(_env) && !arm_el_is_aa64((_env), 3)), \
                        (_val))
 
 void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf);
@@ -1583,7 +1602,9 @@  static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
      * interrupt.
      */
     if ((target_el > cur_el) && (target_el != 1)) {
-        if (arm_el_is_aa64(env, 3) || ((scr || hcr) && (!secure))) {
+        /* ARM_FEATURE_AARCH64 enabled means the higher EL is AArch64. */
+        if (arm_feature(env, ARM_FEATURE_AARCH64) ||
+            ((scr || hcr) && (!secure))) {
             unmasked = 1;
         }
     }
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 7df1f06..4a1c715 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -838,6 +838,7 @@  static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
      * current execution mode before directly using the feature bit.
      */
     uint32_t valid_mask = SCR_AARCH64_MASK | SCR_AARCH32_MASK;
+    uint64_t old_value;
 
     if (!arm_feature(env, ARM_FEATURE_EL2)) {
         valid_mask &= ~SCR_HCE;
@@ -856,7 +857,14 @@  static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 
     /* Clear all-context RES0 bits.  */
     value &= valid_mask;
+    old_value = raw_read(env, ri);
     raw_write(env, ri, value);
+
+    /* We need to recalculate EL bitness if SCR_EL3.RW, SCR_EL3.NS
+     * or HCR_EL2.RW has been changed. */
+    if ((old_value ^ value) & (SCR_RW | SCR_NS)) {
+        calc_el_bitness(env);
+    }
 }
 
 static uint64_t ccsidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
@@ -3146,6 +3154,7 @@  static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
     uint64_t valid_mask = HCR_MASK;
+    uint64_t old_value;
 
     if (arm_feature(env, ARM_FEATURE_EL3)) {
         valid_mask &= ~HCR_HCD;
@@ -3164,7 +3173,14 @@  static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
     if ((raw_read(env, ri) ^ value) & (HCR_VM | HCR_PTW | HCR_DC)) {
         tlb_flush(CPU(cpu), 1);
     }
+    old_value = raw_read(env, ri);
     raw_write(env, ri, value);
+
+    /* We need to recalculate EL bitness if SCR_EL3.RW, SCR_EL3.NS
+     * or HCR_EL2.RW has been changed. */
+    if ((old_value ^ value) & HCR_RW) {
+        calc_el_bitness(env);
+    }
 }
 
 static const ARMCPRegInfo el2_cp_reginfo[] = {
@@ -5088,7 +5104,8 @@  uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx,
     int scr;
     int hcr;
     int target_el;
-    int is64 = arm_el_is_aa64(env, 3);
+    /* Is the higher EL AArch64? */
+    int is64 = arm_feature(env, ARM_FEATURE_AARCH64);
 
     switch (excp_idx) {
     case EXCP_IRQ:
@@ -5645,6 +5662,10 @@  void arm_cpu_do_interrupt(CPUState *cs)
 
     if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_MON) {
         env->cp15.scr_el3 &= ~SCR_NS;
+        /* SCR.NS bit is changed, but we don't need to call calc_el_bitness()
+         * here, because the transition to AArch32 EL3 supposes that
+         * all lower ELs are always 32-bit.
+         */
     }
 
     switch_mode (env, new_mode);
diff --git a/target-arm/machine.c b/target-arm/machine.c
index 32adfe7..e5bfb59 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -265,6 +265,11 @@  static int cpu_post_load(void *opaque, int version_id)
         }
     }
 
+    /* It is not nesessary, because this call was made on
+     * HCR and SCR writes above, but just to make sure.
+     */
+    calc_el_bitness(&cpu->env);
+
     hw_breakpoint_update_all(cpu);
     hw_watchpoint_update_all(cpu);