diff mbox series

[1/1] target/arm: Split out aa64_va_parameter_tbi, aa64_va_parameter_tbid

Message ID 20200206130847.11166-2-richard.henderson@linaro.org
State New
Headers show
Series target/arm: Reduce aa64_va_parameter overhead | expand

Commit Message

Richard Henderson Feb. 6, 2020, 1:08 p.m. UTC
For the purpose of rebuild_hflags_a64, we do not need to compute
all of the va parameters, only tbi.  Moreover, we can compute them
in a form that is more useful to storing in hflags.

This eliminates the need for aa64_va_parameter_both, so fold that
in to aa64_va_parameter.  The remaining calls to aa64_va_parameter
are in get_phys_addr_lpae and in pauth_helper.c.

This reduces the total cpu consumption of aa64_va_parameter in a
kernel boot plus a kvm guest kernel boot from 3% to 0.5%.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/internals.h |  3 --
 target/arm/helper.c    | 68 +++++++++++++++++++++++-------------------
 2 files changed, 37 insertions(+), 34 deletions(-)

Comments

Peter Maydell Feb. 7, 2020, 2 p.m. UTC | #1
On Thu, 6 Feb 2020 at 13:08, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> For the purpose of rebuild_hflags_a64, we do not need to compute
> all of the va parameters, only tbi.  Moreover, we can compute them
> in a form that is more useful to storing in hflags.
>
> This eliminates the need for aa64_va_parameter_both, so fold that
> in to aa64_va_parameter.  The remaining calls to aa64_va_parameter
> are in get_phys_addr_lpae and in pauth_helper.c.
>
> This reduces the total cpu consumption of aa64_va_parameter in a
> kernel boot plus a kvm guest kernel boot from 3% to 0.5%.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/internals.h |  3 --
>  target/arm/helper.c    | 68 +++++++++++++++++++++++-------------------
>  2 files changed, 37 insertions(+), 34 deletions(-)
>
> diff --git a/target/arm/internals.h b/target/arm/internals.h
> index 6d4a942bde..6ac84bbca7 100644
> --- a/target/arm/internals.h
> +++ b/target/arm/internals.h
> @@ -1042,15 +1042,12 @@ typedef struct ARMVAParameters {
>      unsigned tsz    : 8;
>      unsigned select : 1;
>      bool tbi        : 1;
> -    bool tbid       : 1;
>      bool epd        : 1;
>      bool hpd        : 1;
>      bool using16k   : 1;
>      bool using64k   : 1;
>  } ARMVAParameters;
>
> -ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
> -                                        ARMMMUIdx mmu_idx);
>  ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
>                                     ARMMMUIdx mmu_idx, bool data);
>
> diff --git a/target/arm/helper.c b/target/arm/helper.c
> index 7d15d5c933..d2e9332696 100644
> --- a/target/arm/helper.c
> +++ b/target/arm/helper.c
> @@ -10067,12 +10067,34 @@ static uint8_t convert_stage2_attrs(CPUARMState *env, uint8_t s2attrs)
>  }
>  #endif /* !CONFIG_USER_ONLY */
>
> -ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
> -                                        ARMMMUIdx mmu_idx)
> +static int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx)
> +{
> +    if (regime_has_2_ranges(mmu_idx)) {
> +        return extract64(tcr, 37, 2);
> +    } else if (mmu_idx == ARMMMUIdx_Stage2) {
> +        return 0; /* VTCR_EL2 */
> +    } else {
> +        return extract32(tcr, 20, 1);
> +    }

So, this function returns either the two TBI bits, for
the 2-ranges case, or a single TBI bit with bit 1 always
zero, in the 1-range case...

>
> +    /* Present TBI as a composite with TBID.  */
> +    tbi = aa64_va_parameter_tbi(tcr, mmu_idx);
> +    if (!data) {
> +        tbi &= ~aa64_va_parameter_tbid(tcr, mmu_idx);
> +    }
> +    tbi = (tbi >> select) & 1;

...but aa64_va_parameters() always sets
    select = extract64(va, 55, 1);
even for the 1-range case, and then we assume in this bit
of code that we can pull the corresponding bit out of tbi.

Don't we need to either duplicate the bit returned by
aa64_va_parameter_tbi() in the 1-range case, or else
only shift tbi by 'select' in the 2-range case ?

thanks
-- PMM
Richard Henderson Feb. 7, 2020, 2:30 p.m. UTC | #2
On 2/7/20 2:00 PM, Peter Maydell wrote:
>>
>> +    /* Present TBI as a composite with TBID.  */
>> +    tbi = aa64_va_parameter_tbi(tcr, mmu_idx);
>> +    if (!data) {
>> +        tbi &= ~aa64_va_parameter_tbid(tcr, mmu_idx);
>> +    }
>> +    tbi = (tbi >> select) & 1;
> 
> ...but aa64_va_parameters() always sets
>     select = extract64(va, 55, 1);
> even for the 1-range case, and then we assume in this bit
> of code that we can pull the corresponding bit out of tbi.
> 
> Don't we need to either duplicate the bit returned by
> aa64_va_parameter_tbi() in the 1-range case, or else
> only shift tbi by 'select' in the 2-range case ?

I think we need to force select = 0 in the 1-range case, since "select == 1"
makes no sense in that case.


r~
diff mbox series

Patch

diff --git a/target/arm/internals.h b/target/arm/internals.h
index 6d4a942bde..6ac84bbca7 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1042,15 +1042,12 @@  typedef struct ARMVAParameters {
     unsigned tsz    : 8;
     unsigned select : 1;
     bool tbi        : 1;
-    bool tbid       : 1;
     bool epd        : 1;
     bool hpd        : 1;
     bool using16k   : 1;
     bool using64k   : 1;
 } ARMVAParameters;
 
-ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
-                                        ARMMMUIdx mmu_idx);
 ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
                                    ARMMMUIdx mmu_idx, bool data);
 
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 7d15d5c933..d2e9332696 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -10067,12 +10067,34 @@  static uint8_t convert_stage2_attrs(CPUARMState *env, uint8_t s2attrs)
 }
 #endif /* !CONFIG_USER_ONLY */
 
-ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
-                                        ARMMMUIdx mmu_idx)
+static int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx)
+{
+    if (regime_has_2_ranges(mmu_idx)) {
+        return extract64(tcr, 37, 2);
+    } else if (mmu_idx == ARMMMUIdx_Stage2) {
+        return 0; /* VTCR_EL2 */
+    } else {
+        return extract32(tcr, 20, 1);
+    }
+}
+
+static int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx)
+{
+    if (regime_has_2_ranges(mmu_idx)) {
+        return extract64(tcr, 51, 2);
+    } else if (mmu_idx == ARMMMUIdx_Stage2) {
+        return 0; /* VTCR_EL2 */
+    } else {
+        return extract32(tcr, 29, 1);
+    }
+}
+
+ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
+                                   ARMMMUIdx mmu_idx, bool data)
 {
     uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr;
-    bool tbi, tbid, epd, hpd, using16k, using64k;
-    int select, tsz;
+    bool epd, hpd, using16k, using64k;
+    int select, tsz, tbi;
 
     /*
      * Bit 55 is always between the two regions, and is canonical for
@@ -10086,11 +10108,9 @@  ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
         using16k = extract32(tcr, 15, 1);
         if (mmu_idx == ARMMMUIdx_Stage2) {
             /* VTCR_EL2 */
-            tbi = tbid = hpd = false;
+            hpd = false;
         } else {
-            tbi = extract32(tcr, 20, 1);
             hpd = extract32(tcr, 24, 1);
-            tbid = extract32(tcr, 29, 1);
         }
         epd = false;
     } else if (!select) {
@@ -10098,27 +10118,29 @@  ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
         epd = extract32(tcr, 7, 1);
         using64k = extract32(tcr, 14, 1);
         using16k = extract32(tcr, 15, 1);
-        tbi = extract64(tcr, 37, 1);
         hpd = extract64(tcr, 41, 1);
-        tbid = extract64(tcr, 51, 1);
     } else {
         int tg = extract32(tcr, 30, 2);
         using16k = tg == 1;
         using64k = tg == 3;
         tsz = extract32(tcr, 16, 6);
         epd = extract32(tcr, 23, 1);
-        tbi = extract64(tcr, 38, 1);
         hpd = extract64(tcr, 42, 1);
-        tbid = extract64(tcr, 52, 1);
     }
     tsz = MIN(tsz, 39);  /* TODO: ARMv8.4-TTST */
     tsz = MAX(tsz, 16);  /* TODO: ARMv8.2-LVA  */
 
+    /* Present TBI as a composite with TBID.  */
+    tbi = aa64_va_parameter_tbi(tcr, mmu_idx);
+    if (!data) {
+        tbi &= ~aa64_va_parameter_tbid(tcr, mmu_idx);
+    }
+    tbi = (tbi >> select) & 1;
+
     return (ARMVAParameters) {
         .tsz = tsz,
         .select = select,
         .tbi = tbi,
-        .tbid = tbid,
         .epd = epd,
         .hpd = hpd,
         .using16k = using16k,
@@ -10126,16 +10148,6 @@  ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
     };
 }
 
-ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
-                                   ARMMMUIdx mmu_idx, bool data)
-{
-    ARMVAParameters ret = aa64_va_parameters_both(env, va, mmu_idx);
-
-    /* Present TBI as a composite with TBID.  */
-    ret.tbi &= (data || !ret.tbid);
-    return ret;
-}
-
 #ifndef CONFIG_USER_ONLY
 static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
                                           ARMMMUIdx mmu_idx)
@@ -11955,21 +11967,15 @@  static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
 {
     uint32_t flags = rebuild_hflags_aprofile(env);
     ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx);
-    ARMVAParameters p0 = aa64_va_parameters_both(env, 0, stage1);
+    uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr;
     uint64_t sctlr;
     int tbii, tbid;
 
     flags = FIELD_DP32(flags, TBFLAG_ANY, AARCH64_STATE, 1);
 
     /* Get control bits for tagged addresses.  */
-    if (regime_has_2_ranges(mmu_idx)) {
-        ARMVAParameters p1 = aa64_va_parameters_both(env, -1, stage1);
-        tbid = (p1.tbi << 1) | p0.tbi;
-        tbii = tbid & ~((p1.tbid << 1) | p0.tbid);
-    } else {
-        tbid = p0.tbi;
-        tbii = tbid & !p0.tbid;
-    }
+    tbid = aa64_va_parameter_tbi(tcr, mmu_idx);
+    tbii = tbid & ~aa64_va_parameter_tbid(tcr, mmu_idx);
 
     flags = FIELD_DP32(flags, TBFLAG_A64, TBII, tbii);
     flags = FIELD_DP32(flags, TBFLAG_A64, TBID, tbid);