diff mbox series

[4/4] target/arm: Add support for DC CVAP & DC CVADP ins

Message ID 20190910095610.4546-5-beata.michalska@linaro.org
State New
Headers show
Series target/arm: Support for Data Cache Clean up to PoP | expand

Commit Message

Beata Michalska Sept. 10, 2019, 9:56 a.m. UTC
ARMv8.2 introduced support for Data Cache Clean instructions
to PoP (point-of-persistence) - DC CVAP and PoDP (point-of-deep-persistence)
- DV CVADP. Both specify conceptual points in a memory system where all writes
that are to reach them are considered persistent.
The support provided considers both to be actually the same so there is no
distinction between the two. If none is available (there is no backing store
for given memory) both will result in Data Cache Clean up to the point of
coherency. Otherwise sync for the specified range shall be performed.

Signed-off-by: Beata Michalska <beata.michalska@linaro.org>
---
 linux-user/elfload.c       | 18 +++++++++++++++++-
 target/arm/cpu.h           | 13 ++++++++++++-
 target/arm/cpu64.c         |  1 +
 target/arm/helper.c        | 24 ++++++++++++++++++++++++
 target/arm/helper.h        |  1 +
 target/arm/op_helper.c     | 36 ++++++++++++++++++++++++++++++++++++
 target/arm/translate-a64.c |  5 +++++
 7 files changed, 96 insertions(+), 2 deletions(-)

Comments

Alex Bennée Sept. 23, 2019, 11:54 p.m. UTC | #1
Beata Michalska <beata.michalska@linaro.org> writes:

> ARMv8.2 introduced support for Data Cache Clean instructions
> to PoP (point-of-persistence) - DC CVAP and PoDP (point-of-deep-persistence)
> - DV CVADP. Both specify conceptual points in a memory system where all writes
> that are to reach them are considered persistent.
> The support provided considers both to be actually the same so there is no
> distinction between the two. If none is available (there is no backing store
> for given memory) both will result in Data Cache Clean up to the point of
> coherency. Otherwise sync for the specified range shall be performed.
>
> Signed-off-by: Beata Michalska <beata.michalska@linaro.org>
> ---
>  linux-user/elfload.c       | 18 +++++++++++++++++-

There are conflicts from the recent elfload.c tweaks to fix on your next rebase.

>  target/arm/cpu.h           | 13 ++++++++++++-
>  target/arm/cpu64.c         |  1 +
>  target/arm/helper.c        | 24 ++++++++++++++++++++++++
>  target/arm/helper.h        |  1 +
>  target/arm/op_helper.c     | 36 ++++++++++++++++++++++++++++++++++++
>  target/arm/translate-a64.c |  5 +++++
>  7 files changed, 96 insertions(+), 2 deletions(-)
>
> diff --git a/linux-user/elfload.c b/linux-user/elfload.c
> index 3365e192eb..1ec00308d5 100644
> --- a/linux-user/elfload.c
> +++ b/linux-user/elfload.c
> @@ -609,7 +609,12 @@ enum {
>      ARM_HWCAP_A64_PACG          = 1UL << 31,
>  };
>
> +enum {
> +    ARM_HWCAP2_A64_DCPODP   = 1 << 0,
> +};
> +
>  #define ELF_HWCAP get_elf_hwcap()
> +#define ELF_HWCAP2 get_elf_hwcap2()
>
>  static uint32_t get_elf_hwcap(void)
>  {
> @@ -644,12 +649,23 @@ static uint32_t get_elf_hwcap(void)
>      GET_FEATURE_ID(aa64_jscvt, ARM_HWCAP_A64_JSCVT);
>      GET_FEATURE_ID(aa64_sb, ARM_HWCAP_A64_SB);
>      GET_FEATURE_ID(aa64_condm_4, ARM_HWCAP_A64_FLAGM);
> +    GET_FEATURE_ID(aa64_dcpop, ARM_HWCAP_A64_DCPOP);
>
> -#undef GET_FEATURE_ID
>
>      return hwcaps;
>  }
>
> +static uint32_t get_elf_hwcap2(void)
> +{
> +    ARMCPU *cpu = ARM_CPU(thread_cpu);
> +    uint32_t hwcaps = 0;
> +
> +    GET_FEATURE_ID(aa64_dcpodp, ARM_HWCAP2_A64_DCPODP);
> +    return hwcaps;
> +}
> +
> +#undef GET_FEATURE_ID
> +
>  #endif /* not TARGET_AARCH64 */
>  #endif /* TARGET_ARM */
>
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index 297ad5e47a..1713d76590 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -2229,7 +2229,8 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
>  #define ARM_CP_NZCV              (ARM_CP_SPECIAL | 0x0300)
>  #define ARM_CP_CURRENTEL         (ARM_CP_SPECIAL | 0x0400)
>  #define ARM_CP_DC_ZVA            (ARM_CP_SPECIAL | 0x0500)
> -#define ARM_LAST_SPECIAL         ARM_CP_DC_ZVA
> +#define ARM_CP_DC_CVAP           (ARM_CP_SPECIAL | 0x0600)
> +#define ARM_LAST_SPECIAL         ARM_CP_DC_CVAP
>  #define ARM_CP_FPU               0x1000
>  #define ARM_CP_SVE               0x2000
>  #define ARM_CP_NO_GDB            0x4000
> @@ -3572,6 +3573,16 @@ static inline bool isar_feature_aa64_frint(const ARMISARegisters *id)
>      return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FRINTTS) != 0;
>  }
>
> +static inline bool isar_feature_aa64_dcpop(const ARMISARegisters *id)
> +{
> +    return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) != 0;
> +}
> +
> +static inline bool isar_feature_aa64_dcpodp(const ARMISARegisters *id)
> +{
> +    return (FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) >> 1) != 0;
> +}
> +
>  static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id)
>  {
>      /* We always set the AdvSIMD and FP fields identically wrt FP16.  */
> diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
> index d7f5bf610a..20094f980d 100644
> --- a/target/arm/cpu64.c
> +++ b/target/arm/cpu64.c
> @@ -331,6 +331,7 @@ static void aarch64_max_initfn(Object *obj)
>          cpu->isar.id_aa64isar0 = t;
>
>          t = cpu->isar.id_aa64isar1;
> +        t = FIELD_DP64(t, ID_AA64ISAR1, DPB, 2);
>          t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 1);
>          t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1);
>          t = FIELD_DP64(t, ID_AA64ISAR1, APA, 1); /* PAuth, architected only */
> diff --git a/target/arm/helper.c b/target/arm/helper.c
> index 507026c915..99ae01b7e7 100644
> --- a/target/arm/helper.c
> +++ b/target/arm/helper.c
> @@ -3843,6 +3843,22 @@ static CPAccessResult aa64_cacheop_access(CPUARMState *env,
>      return CP_ACCESS_OK;
>  }
>
> +static CPAccessResult aa64_cacheop_persist_access(CPUARMState *env,
> +                                                  const ARMCPRegInfo *ri,
> +                                                  bool isread)
> +{
> +    ARMCPU *cpu = env_archcpu(env);
> +    /*
> +     * Access is UNDEF if lacking implementation for either DC CVAP or DC CVADP
> +     * DC CVAP ->  CRm: 0xc
> +     * DC CVADP -> CRm: 0xd
> +     */
> +    return (ri->crm == 0xc && !cpu_isar_feature(aa64_dcpop, cpu)) ||
> +           (ri->crm == 0xd && !cpu_isar_feature(aa64_dcpodp, cpu))
> +           ? CP_ACCESS_TRAP_UNCATEGORIZED
> +           : aa64_cacheop_access(env, ri, isread);
> +}
> +
>  /* See: D4.7.2 TLB maintenance requirements and the TLB maintenance instructions
>   * Page D4-1736 (DDI0487A.b)
>   */
> @@ -4251,6 +4267,14 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
>        .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 1,
>        .access = PL0_W, .type = ARM_CP_NOP,
>        .accessfn = aa64_cacheop_access },
> +    { .name = "DC_CVAP", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 1,
> +      .access = PL0_W, .type = ARM_CP_DC_CVAP,
> +      .accessfn = aa64_cacheop_persist_access },
> +    { .name = "DC_CVADP", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 1,
> +      .access = PL0_W, .type = ARM_CP_DC_CVAP,
> +      .accessfn = aa64_cacheop_persist_access },
>      { .name = "DC_CSW", .state = ARM_CP_STATE_AA64,
>        .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 2,
>        .access = PL1_W, .type = ARM_CP_NOP },
> diff --git a/target/arm/helper.h b/target/arm/helper.h
> index 1fb2cb5a77..a850364944 100644
> --- a/target/arm/helper.h
> +++ b/target/arm/helper.h
> @@ -561,6 +561,7 @@ DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
>  DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
>  DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
>  DEF_HELPER_2(dc_zva, void, env, i64)
> +DEF_HELPER_2(dc_cvap, void, env, i64)
>
>  DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
>  DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
> diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
> index 0fd4bd0238..75ebf6afa4 100644
> --- a/target/arm/op_helper.c
> +++ b/target/arm/op_helper.c
> @@ -987,3 +987,39 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
>      memset(g2h(vaddr), 0, blocklen);
>  #endif
>  }
> +
> +void HELPER(dc_cvap)(CPUARMState *env, uint64_t vaddr_in)
> +{
> +#ifndef CONFIG_USER_ONLY
> +    ARMCPU *cpu = env_archcpu(env);
> +    /* CTR_EL0 System register -> DminLine, bits [19:16] */
> +    uint64_t dline_size = 4 << ((cpu->ctr >> 16) & 0xF);
> +    uint64_t vaddr = vaddr_in & ~(dline_size - 1);
> +    void *haddr;
> +    int mem_idx = cpu_mmu_index(env, false);
> +
> +    /* This won't be crossing page boundaries */
> +    haddr = probe_read(env, vaddr, dline_size, mem_idx, GETPC());
> +    if (haddr) {
> +
> +        ram_addr_t offset;
> +        MemoryRegion *mr;
> +
> +        /*
> +         * RCU critical section + ref counting,
> +         * so that MR won't disappear behind the scene
> +         */
> +        rcu_read_lock();
> +        mr = memory_region_from_host(haddr, &offset);
> +        if (mr) {
> +            memory_region_ref(mr);
> +        }
> +        rcu_read_unlock();
> +
> +        if (mr) {
> +            memory_region_do_writeback(mr, offset, dline_size);
> +            memory_region_unref(mr);
> +        }
> +    }
> +#endif
> +}
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 2d6cd09634..21ea3631d6 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -1746,6 +1746,11 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
>          tcg_rt = cpu_reg(s, rt);
>          gen_helper_dc_zva(cpu_env, tcg_rt);
>          return;
> +    case ARM_CP_DC_CVAP:
> +        /* DC CVAP / DC CVADP */
> +        tcg_rt = cpu_reg(s, rt);
> +        gen_helper_dc_cvap(cpu_env, tcg_rt);
> +        return;
>      default:
>          break;
>      }


--
Alex Bennée
Alex Bennée Sept. 24, 2019, 1:16 a.m. UTC | #2
Beata Michalska <beata.michalska@linaro.org> writes:

> ARMv8.2 introduced support for Data Cache Clean instructions
> to PoP (point-of-persistence) - DC CVAP and PoDP (point-of-deep-persistence)
> - DV CVADP. Both specify conceptual points in a memory system where all writes
> that are to reach them are considered persistent.
> The support provided considers both to be actually the same so there is no
> distinction between the two. If none is available (there is no backing store
> for given memory) both will result in Data Cache Clean up to the point of
> coherency. Otherwise sync for the specified range shall be performed.
>
> Signed-off-by: Beata Michalska <beata.michalska@linaro.org>
> ---
>  linux-user/elfload.c       | 18 +++++++++++++++++-
>  target/arm/cpu.h           | 13 ++++++++++++-
>  target/arm/cpu64.c         |  1 +
>  target/arm/helper.c        | 24 ++++++++++++++++++++++++
>  target/arm/helper.h        |  1 +
>  target/arm/op_helper.c     | 36 ++++++++++++++++++++++++++++++++++++
>  target/arm/translate-a64.c |  5 +++++
>  7 files changed, 96 insertions(+), 2 deletions(-)
>
> diff --git a/linux-user/elfload.c b/linux-user/elfload.c
> index 3365e192eb..1ec00308d5 100644
> --- a/linux-user/elfload.c
> +++ b/linux-user/elfload.c
> @@ -609,7 +609,12 @@ enum {
>      ARM_HWCAP_A64_PACG          = 1UL << 31,
>  };
>
> +enum {
> +    ARM_HWCAP2_A64_DCPODP   = 1 << 0,
> +};
> +
>  #define ELF_HWCAP get_elf_hwcap()
> +#define ELF_HWCAP2 get_elf_hwcap2()
>
>  static uint32_t get_elf_hwcap(void)
>  {
> @@ -644,12 +649,23 @@ static uint32_t get_elf_hwcap(void)
>      GET_FEATURE_ID(aa64_jscvt, ARM_HWCAP_A64_JSCVT);
>      GET_FEATURE_ID(aa64_sb, ARM_HWCAP_A64_SB);
>      GET_FEATURE_ID(aa64_condm_4, ARM_HWCAP_A64_FLAGM);
> +    GET_FEATURE_ID(aa64_dcpop, ARM_HWCAP_A64_DCPOP);
>
> -#undef GET_FEATURE_ID
>
>      return hwcaps;
>  }
>
> +static uint32_t get_elf_hwcap2(void)
> +{
> +    ARMCPU *cpu = ARM_CPU(thread_cpu);
> +    uint32_t hwcaps = 0;
> +
> +    GET_FEATURE_ID(aa64_dcpodp, ARM_HWCAP2_A64_DCPODP);
> +    return hwcaps;
> +}
> +
> +#undef GET_FEATURE_ID
> +
>  #endif /* not TARGET_AARCH64 */
>  #endif /* TARGET_ARM */
>
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index 297ad5e47a..1713d76590 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -2229,7 +2229,8 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
>  #define ARM_CP_NZCV              (ARM_CP_SPECIAL | 0x0300)
>  #define ARM_CP_CURRENTEL         (ARM_CP_SPECIAL | 0x0400)
>  #define ARM_CP_DC_ZVA            (ARM_CP_SPECIAL | 0x0500)
> -#define ARM_LAST_SPECIAL         ARM_CP_DC_ZVA
> +#define ARM_CP_DC_CVAP           (ARM_CP_SPECIAL | 0x0600)
> +#define ARM_LAST_SPECIAL         ARM_CP_DC_CVAP
>  #define ARM_CP_FPU               0x1000
>  #define ARM_CP_SVE               0x2000
>  #define ARM_CP_NO_GDB            0x4000
> @@ -3572,6 +3573,16 @@ static inline bool isar_feature_aa64_frint(const ARMISARegisters *id)
>      return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FRINTTS) != 0;
>  }
>
> +static inline bool isar_feature_aa64_dcpop(const ARMISARegisters *id)
> +{
> +    return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) != 0;
> +}
> +
> +static inline bool isar_feature_aa64_dcpodp(const ARMISARegisters *id)
> +{
> +    return (FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) >> 1) != 0;
> +}
> +
>  static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id)
>  {
>      /* We always set the AdvSIMD and FP fields identically wrt FP16.  */
> diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
> index d7f5bf610a..20094f980d 100644
> --- a/target/arm/cpu64.c
> +++ b/target/arm/cpu64.c
> @@ -331,6 +331,7 @@ static void aarch64_max_initfn(Object *obj)
>          cpu->isar.id_aa64isar0 = t;
>
>          t = cpu->isar.id_aa64isar1;
> +        t = FIELD_DP64(t, ID_AA64ISAR1, DPB, 2);
>          t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 1);
>          t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1);
>          t = FIELD_DP64(t, ID_AA64ISAR1, APA, 1); /* PAuth, architected only */
> diff --git a/target/arm/helper.c b/target/arm/helper.c
> index 507026c915..99ae01b7e7 100644
> --- a/target/arm/helper.c
> +++ b/target/arm/helper.c
> @@ -3843,6 +3843,22 @@ static CPAccessResult aa64_cacheop_access(CPUARMState *env,
>      return CP_ACCESS_OK;
>  }
>
> +static CPAccessResult aa64_cacheop_persist_access(CPUARMState *env,
> +                                                  const ARMCPRegInfo *ri,
> +                                                  bool isread)
> +{
> +    ARMCPU *cpu = env_archcpu(env);
> +    /*
> +     * Access is UNDEF if lacking implementation for either DC CVAP or DC CVADP
> +     * DC CVAP ->  CRm: 0xc
> +     * DC CVADP -> CRm: 0xd
> +     */
> +    return (ri->crm == 0xc && !cpu_isar_feature(aa64_dcpop, cpu)) ||
> +           (ri->crm == 0xd && !cpu_isar_feature(aa64_dcpodp, cpu))
> +           ? CP_ACCESS_TRAP_UNCATEGORIZED
> +           : aa64_cacheop_access(env, ri, isread);
> +}
> +
>  /* See: D4.7.2 TLB maintenance requirements and the TLB maintenance instructions
>   * Page D4-1736 (DDI0487A.b)
>   */
> @@ -4251,6 +4267,14 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
>        .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 1,
>        .access = PL0_W, .type = ARM_CP_NOP,
>        .accessfn = aa64_cacheop_access },
> +    { .name = "DC_CVAP", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 1,
> +      .access = PL0_W, .type = ARM_CP_DC_CVAP,
> +      .accessfn = aa64_cacheop_persist_access },
> +    { .name = "DC_CVADP", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 1,
> +      .access = PL0_W, .type = ARM_CP_DC_CVAP,
> +      .accessfn = aa64_cacheop_persist_access },
>      { .name = "DC_CSW", .state = ARM_CP_STATE_AA64,
>        .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 2,
>        .access = PL1_W, .type = ARM_CP_NOP },
> diff --git a/target/arm/helper.h b/target/arm/helper.h
> index 1fb2cb5a77..a850364944 100644
> --- a/target/arm/helper.h
> +++ b/target/arm/helper.h
> @@ -561,6 +561,7 @@ DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
>  DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
>  DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
>  DEF_HELPER_2(dc_zva, void, env, i64)
> +DEF_HELPER_2(dc_cvap, void, env, i64)
>
>  DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
>  DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
> diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
> index 0fd4bd0238..75ebf6afa4 100644
> --- a/target/arm/op_helper.c
> +++ b/target/arm/op_helper.c
> @@ -987,3 +987,39 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
>      memset(g2h(vaddr), 0, blocklen);
>  #endif
>  }
> +
> +void HELPER(dc_cvap)(CPUARMState *env, uint64_t vaddr_in)
> +{
> +#ifndef CONFIG_USER_ONLY

Are we essentially saying the operation is a NOP for user-mode
emulation? Is it just because we don't really expose HW to linux-user?

If so move the #ifndef outside the HELPER definition and...

> +    ARMCPU *cpu = env_archcpu(env);
> +    /* CTR_EL0 System register -> DminLine, bits [19:16] */
> +    uint64_t dline_size = 4 << ((cpu->ctr >> 16) & 0xF);
> +    uint64_t vaddr = vaddr_in & ~(dline_size - 1);
> +    void *haddr;
> +    int mem_idx = cpu_mmu_index(env, false);
> +
> +    /* This won't be crossing page boundaries */
> +    haddr = probe_read(env, vaddr, dline_size, mem_idx, GETPC());
> +    if (haddr) {
> +
> +        ram_addr_t offset;
> +        MemoryRegion *mr;
> +
> +        /*
> +         * RCU critical section + ref counting,
> +         * so that MR won't disappear behind the scene
> +         */
> +        rcu_read_lock();
> +        mr = memory_region_from_host(haddr, &offset);
> +        if (mr) {
> +            memory_region_ref(mr);
> +        }
> +        rcu_read_unlock();
> +
> +        if (mr) {
> +            memory_region_do_writeback(mr, offset, dline_size);
> +            memory_region_unref(mr);
> +        }
> +    }
> +#endif
> +}
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 2d6cd09634..21ea3631d6 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -1746,6 +1746,11 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
>          tcg_rt = cpu_reg(s, rt);
>          gen_helper_dc_zva(cpu_env, tcg_rt);
>          return;
> +    case ARM_CP_DC_CVAP:
> +        /* DC CVAP / DC CVADP */

.. #ifndef CONFIG_USER_ONLY this code so we don't add the overhead of a
helper call in linux-user mode.

> +        tcg_rt = cpu_reg(s, rt);
> +        gen_helper_dc_cvap(cpu_env, tcg_rt);
> +        return;
>      default:
>          break;
>      }


--
Alex Bennée
Richard Henderson Sept. 24, 2019, 5:22 p.m. UTC | #3
On 9/10/19 2:56 AM, Beata Michalska wrote:
> @@ -2229,7 +2229,8 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
>  #define ARM_CP_NZCV              (ARM_CP_SPECIAL | 0x0300)
>  #define ARM_CP_CURRENTEL         (ARM_CP_SPECIAL | 0x0400)
>  #define ARM_CP_DC_ZVA            (ARM_CP_SPECIAL | 0x0500)
> -#define ARM_LAST_SPECIAL         ARM_CP_DC_ZVA
> +#define ARM_CP_DC_CVAP           (ARM_CP_SPECIAL | 0x0600)
> +#define ARM_LAST_SPECIAL         ARM_CP_DC_CVAP

I don't see that this operation needs to be handled via "special".  It's a
function call upon write, as for many other system registers.

> +static inline bool isar_feature_aa64_dcpop(const ARMISARegisters *id)
> +{
> +    return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) != 0;
> +}
> +
> +static inline bool isar_feature_aa64_dcpodp(const ARMISARegisters *id)
> +{
> +    return (FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) >> 1) != 0;
> +}

The correct check is FIELD_EX(...) >= 2.  This is a 4-bit field, as with all of
the others.

> +static CPAccessResult aa64_cacheop_persist_access(CPUARMState *env,
> +                                                  const ARMCPRegInfo *ri,
> +                                                  bool isread)
> +{
> +    ARMCPU *cpu = env_archcpu(env);
> +    /*
> +     * Access is UNDEF if lacking implementation for either DC CVAP or DC CVADP
> +     * DC CVAP ->  CRm: 0xc
> +     * DC CVADP -> CRm: 0xd
> +     */
> +    return (ri->crm == 0xc && !cpu_isar_feature(aa64_dcpop, cpu)) ||
> +           (ri->crm == 0xd && !cpu_isar_feature(aa64_dcpodp, cpu))
> +           ? CP_ACCESS_TRAP_UNCATEGORIZED
> +           : aa64_cacheop_access(env, ri, isread);
> +}
...
> +    { .name = "DC_CVAP", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 1,
> +      .access = PL0_W, .type = ARM_CP_DC_CVAP,
> +      .accessfn = aa64_cacheop_persist_access },
> +    { .name = "DC_CVADP", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 1,
> +      .access = PL0_W, .type = ARM_CP_DC_CVAP,
> +      .accessfn = aa64_cacheop_persist_access },

While this access function works, it's better to simply not register these at
all when they're not supported.  Compare the registration of rndr_reginfo.

As I described above, I think this can use a normal write function.  In which
case this would use .type = ARM_CP_NO_RAW | ARM_CP_SUPPRESS_TB_END.

(I believe that ARM_CP_IO is not required, but I'm not 100% sure.  Certainly
there does not seem to be anything in dc_cvap() that affects state which can
queried from another virtual cpu, so there does not appear to be any reason to
grab the global i/o lock.  The host kernel should be just fine with concurrent
msync syscalls, or whatever it is that libpmem uses.)


> +void HELPER(dc_cvap)(CPUARMState *env, uint64_t vaddr_in)
> +{
> +#ifndef CONFIG_USER_ONLY
> +    ARMCPU *cpu = env_archcpu(env);
> +    /* CTR_EL0 System register -> DminLine, bits [19:16] */
> +    uint64_t dline_size = 4 << ((cpu->ctr >> 16) & 0xF);
> +    uint64_t vaddr = vaddr_in & ~(dline_size - 1);
> +    void *haddr;
> +    int mem_idx = cpu_mmu_index(env, false);
> +
> +    /* This won't be crossing page boundaries */
> +    haddr = probe_read(env, vaddr, dline_size, mem_idx, GETPC());
> +    if (haddr) {
> +
> +        ram_addr_t offset;
> +        MemoryRegion *mr;
> +
> +        /*
> +         * RCU critical section + ref counting,
> +         * so that MR won't disappear behind the scene
> +         */
> +        rcu_read_lock();
> +        mr = memory_region_from_host(haddr, &offset);
> +        if (mr) {
> +            memory_region_ref(mr);
> +        }
> +        rcu_read_unlock();
> +
> +        if (mr) {
> +            memory_region_do_writeback(mr, offset, dline_size);
> +            memory_region_unref(mr);
> +        }
> +    }
> +#endif


We hold the rcu lock whenever a TB is executing.  I don't believe there's any
point in increasing the lock count here.  Similarly with memory_region
refcounts -- they cannot vanish while we're executing a TB.

Thus I believe that about half of this function can fold away.


r~
Beata Michalska Oct. 9, 2019, 11:47 a.m. UTC | #4
On Tue, 24 Sep 2019 at 00:54, Alex Bennée <alex.bennee@linaro.org> wrote:
>
>
> Beata Michalska <beata.michalska@linaro.org> writes:
>
> > ARMv8.2 introduced support for Data Cache Clean instructions
> > to PoP (point-of-persistence) - DC CVAP and PoDP (point-of-deep-persistence)
> > - DV CVADP. Both specify conceptual points in a memory system where all writes
> > that are to reach them are considered persistent.
> > The support provided considers both to be actually the same so there is no
> > distinction between the two. If none is available (there is no backing store
> > for given memory) both will result in Data Cache Clean up to the point of
> > coherency. Otherwise sync for the specified range shall be performed.
> >
> > Signed-off-by: Beata Michalska <beata.michalska@linaro.org>
> > ---
> >  linux-user/elfload.c       | 18 +++++++++++++++++-
>
> There are conflicts from the recent elfload.c tweaks to fix on your next rebase.

Will address it in the next version.

> >  target/arm/cpu.h           | 13 ++++++++++++-
> >  target/arm/cpu64.c         |  1 +
> >  target/arm/helper.c        | 24 ++++++++++++++++++++++++
> >  target/arm/helper.h        |  1 +
> >  target/arm/op_helper.c     | 36 ++++++++++++++++++++++++++++++++++++
> >  target/arm/translate-a64.c |  5 +++++
> >  7 files changed, 96 insertions(+), 2 deletions(-)
> >
> > diff --git a/linux-user/elfload.c b/linux-user/elfload.c
> > index 3365e192eb..1ec00308d5 100644
> > --- a/linux-user/elfload.c
> > +++ b/linux-user/elfload.c
> > @@ -609,7 +609,12 @@ enum {
> >      ARM_HWCAP_A64_PACG          = 1UL << 31,
> >  };
> >
> > +enum {
> > +    ARM_HWCAP2_A64_DCPODP   = 1 << 0,
> > +};
> > +
> >  #define ELF_HWCAP get_elf_hwcap()
> > +#define ELF_HWCAP2 get_elf_hwcap2()
> >
> >  static uint32_t get_elf_hwcap(void)
> >  {
> > @@ -644,12 +649,23 @@ static uint32_t get_elf_hwcap(void)
> >      GET_FEATURE_ID(aa64_jscvt, ARM_HWCAP_A64_JSCVT);
> >      GET_FEATURE_ID(aa64_sb, ARM_HWCAP_A64_SB);
> >      GET_FEATURE_ID(aa64_condm_4, ARM_HWCAP_A64_FLAGM);
> > +    GET_FEATURE_ID(aa64_dcpop, ARM_HWCAP_A64_DCPOP);
> >
> > -#undef GET_FEATURE_ID
> >
> >      return hwcaps;
> >  }
> >
> > +static uint32_t get_elf_hwcap2(void)
> > +{
> > +    ARMCPU *cpu = ARM_CPU(thread_cpu);
> > +    uint32_t hwcaps = 0;
> > +
> > +    GET_FEATURE_ID(aa64_dcpodp, ARM_HWCAP2_A64_DCPODP);
> > +    return hwcaps;
> > +}
> > +
> > +#undef GET_FEATURE_ID
> > +
> >  #endif /* not TARGET_AARCH64 */
> >  #endif /* TARGET_ARM */
> >
> > diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> > index 297ad5e47a..1713d76590 100644
> > --- a/target/arm/cpu.h
> > +++ b/target/arm/cpu.h
> > @@ -2229,7 +2229,8 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
> >  #define ARM_CP_NZCV              (ARM_CP_SPECIAL | 0x0300)
> >  #define ARM_CP_CURRENTEL         (ARM_CP_SPECIAL | 0x0400)
> >  #define ARM_CP_DC_ZVA            (ARM_CP_SPECIAL | 0x0500)
> > -#define ARM_LAST_SPECIAL         ARM_CP_DC_ZVA
> > +#define ARM_CP_DC_CVAP           (ARM_CP_SPECIAL | 0x0600)
> > +#define ARM_LAST_SPECIAL         ARM_CP_DC_CVAP
> >  #define ARM_CP_FPU               0x1000
> >  #define ARM_CP_SVE               0x2000
> >  #define ARM_CP_NO_GDB            0x4000
> > @@ -3572,6 +3573,16 @@ static inline bool isar_feature_aa64_frint(const ARMISARegisters *id)
> >      return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FRINTTS) != 0;
> >  }
> >
> > +static inline bool isar_feature_aa64_dcpop(const ARMISARegisters *id)
> > +{
> > +    return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) != 0;
> > +}
> > +
> > +static inline bool isar_feature_aa64_dcpodp(const ARMISARegisters *id)
> > +{
> > +    return (FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) >> 1) != 0;
> > +}
> > +
> >  static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id)
> >  {
> >      /* We always set the AdvSIMD and FP fields identically wrt FP16.  */
> > diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
> > index d7f5bf610a..20094f980d 100644
> > --- a/target/arm/cpu64.c
> > +++ b/target/arm/cpu64.c
> > @@ -331,6 +331,7 @@ static void aarch64_max_initfn(Object *obj)
> >          cpu->isar.id_aa64isar0 = t;
> >
> >          t = cpu->isar.id_aa64isar1;
> > +        t = FIELD_DP64(t, ID_AA64ISAR1, DPB, 2);
> >          t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 1);
> >          t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1);
> >          t = FIELD_DP64(t, ID_AA64ISAR1, APA, 1); /* PAuth, architected only */
> > diff --git a/target/arm/helper.c b/target/arm/helper.c
> > index 507026c915..99ae01b7e7 100644
> > --- a/target/arm/helper.c
> > +++ b/target/arm/helper.c
> > @@ -3843,6 +3843,22 @@ static CPAccessResult aa64_cacheop_access(CPUARMState *env,
> >      return CP_ACCESS_OK;
> >  }
> >
> > +static CPAccessResult aa64_cacheop_persist_access(CPUARMState *env,
> > +                                                  const ARMCPRegInfo *ri,
> > +                                                  bool isread)
> > +{
> > +    ARMCPU *cpu = env_archcpu(env);
> > +    /*
> > +     * Access is UNDEF if lacking implementation for either DC CVAP or DC CVADP
> > +     * DC CVAP ->  CRm: 0xc
> > +     * DC CVADP -> CRm: 0xd
> > +     */
> > +    return (ri->crm == 0xc && !cpu_isar_feature(aa64_dcpop, cpu)) ||
> > +           (ri->crm == 0xd && !cpu_isar_feature(aa64_dcpodp, cpu))
> > +           ? CP_ACCESS_TRAP_UNCATEGORIZED
> > +           : aa64_cacheop_access(env, ri, isread);
> > +}
> > +
> >  /* See: D4.7.2 TLB maintenance requirements and the TLB maintenance instructions
> >   * Page D4-1736 (DDI0487A.b)
> >   */
> > @@ -4251,6 +4267,14 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
> >        .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 1,
> >        .access = PL0_W, .type = ARM_CP_NOP,
> >        .accessfn = aa64_cacheop_access },
> > +    { .name = "DC_CVAP", .state = ARM_CP_STATE_AA64,
> > +      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 1,
> > +      .access = PL0_W, .type = ARM_CP_DC_CVAP,
> > +      .accessfn = aa64_cacheop_persist_access },
> > +    { .name = "DC_CVADP", .state = ARM_CP_STATE_AA64,
> > +      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 1,
> > +      .access = PL0_W, .type = ARM_CP_DC_CVAP,
> > +      .accessfn = aa64_cacheop_persist_access },
> >      { .name = "DC_CSW", .state = ARM_CP_STATE_AA64,
> >        .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 2,
> >        .access = PL1_W, .type = ARM_CP_NOP },
> > diff --git a/target/arm/helper.h b/target/arm/helper.h
> > index 1fb2cb5a77..a850364944 100644
> > --- a/target/arm/helper.h
> > +++ b/target/arm/helper.h
> > @@ -561,6 +561,7 @@ DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
> >  DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
> >  DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
> >  DEF_HELPER_2(dc_zva, void, env, i64)
> > +DEF_HELPER_2(dc_cvap, void, env, i64)
> >
> >  DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
> >  DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
> > diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
> > index 0fd4bd0238..75ebf6afa4 100644
> > --- a/target/arm/op_helper.c
> > +++ b/target/arm/op_helper.c
> > @@ -987,3 +987,39 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
> >      memset(g2h(vaddr), 0, blocklen);
> >  #endif
> >  }
> > +
> > +void HELPER(dc_cvap)(CPUARMState *env, uint64_t vaddr_in)
> > +{
> > +#ifndef CONFIG_USER_ONLY
> > +    ARMCPU *cpu = env_archcpu(env);
> > +    /* CTR_EL0 System register -> DminLine, bits [19:16] */
> > +    uint64_t dline_size = 4 << ((cpu->ctr >> 16) & 0xF);
> > +    uint64_t vaddr = vaddr_in & ~(dline_size - 1);
> > +    void *haddr;
> > +    int mem_idx = cpu_mmu_index(env, false);
> > +
> > +    /* This won't be crossing page boundaries */
> > +    haddr = probe_read(env, vaddr, dline_size, mem_idx, GETPC());
> > +    if (haddr) {
> > +
> > +        ram_addr_t offset;
> > +        MemoryRegion *mr;
> > +
> > +        /*
> > +         * RCU critical section + ref counting,
> > +         * so that MR won't disappear behind the scene
> > +         */
> > +        rcu_read_lock();
> > +        mr = memory_region_from_host(haddr, &offset);
> > +        if (mr) {
> > +            memory_region_ref(mr);
> > +        }
> > +        rcu_read_unlock();
> > +
> > +        if (mr) {
> > +            memory_region_do_writeback(mr, offset, dline_size);
> > +            memory_region_unref(mr);
> > +        }
> > +    }
> > +#endif
> > +}
> > diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> > index 2d6cd09634..21ea3631d6 100644
> > --- a/target/arm/translate-a64.c
> > +++ b/target/arm/translate-a64.c
> > @@ -1746,6 +1746,11 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
> >          tcg_rt = cpu_reg(s, rt);
> >          gen_helper_dc_zva(cpu_env, tcg_rt);
> >          return;
> > +    case ARM_CP_DC_CVAP:
> > +        /* DC CVAP / DC CVADP */
> > +        tcg_rt = cpu_reg(s, rt);
> > +        gen_helper_dc_cvap(cpu_env, tcg_rt);
> > +        return;
> >      default:
> >          break;
> >      }
>
>
> --
> Alex Bennée

BR
Beata
Beata Michalska Oct. 9, 2019, 11:49 a.m. UTC | #5
On Tue, 24 Sep 2019 at 02:16, Alex Bennée <alex.bennee@linaro.org> wrote:
>
>
> Beata Michalska <beata.michalska@linaro.org> writes:
>
> > ARMv8.2 introduced support for Data Cache Clean instructions
> > to PoP (point-of-persistence) - DC CVAP and PoDP (point-of-deep-persistence)
> > - DV CVADP. Both specify conceptual points in a memory system where all writes
> > that are to reach them are considered persistent.
> > The support provided considers both to be actually the same so there is no
> > distinction between the two. If none is available (there is no backing store
> > for given memory) both will result in Data Cache Clean up to the point of
> > coherency. Otherwise sync for the specified range shall be performed.
> >
> > Signed-off-by: Beata Michalska <beata.michalska@linaro.org>
> > ---
> >  linux-user/elfload.c       | 18 +++++++++++++++++-
> >  target/arm/cpu.h           | 13 ++++++++++++-
> >  target/arm/cpu64.c         |  1 +
> >  target/arm/helper.c        | 24 ++++++++++++++++++++++++
> >  target/arm/helper.h        |  1 +
> >  target/arm/op_helper.c     | 36 ++++++++++++++++++++++++++++++++++++
> >  target/arm/translate-a64.c |  5 +++++
> >  7 files changed, 96 insertions(+), 2 deletions(-)
> >
> > diff --git a/linux-user/elfload.c b/linux-user/elfload.c
> > index 3365e192eb..1ec00308d5 100644
> > --- a/linux-user/elfload.c
> > +++ b/linux-user/elfload.c
> > @@ -609,7 +609,12 @@ enum {
> >      ARM_HWCAP_A64_PACG          = 1UL << 31,
> >  };
> >
> > +enum {
> > +    ARM_HWCAP2_A64_DCPODP   = 1 << 0,
> > +};
> > +
> >  #define ELF_HWCAP get_elf_hwcap()
> > +#define ELF_HWCAP2 get_elf_hwcap2()
> >
> >  static uint32_t get_elf_hwcap(void)
> >  {
> > @@ -644,12 +649,23 @@ static uint32_t get_elf_hwcap(void)
> >      GET_FEATURE_ID(aa64_jscvt, ARM_HWCAP_A64_JSCVT);
> >      GET_FEATURE_ID(aa64_sb, ARM_HWCAP_A64_SB);
> >      GET_FEATURE_ID(aa64_condm_4, ARM_HWCAP_A64_FLAGM);
> > +    GET_FEATURE_ID(aa64_dcpop, ARM_HWCAP_A64_DCPOP);
> >
> > -#undef GET_FEATURE_ID
> >
> >      return hwcaps;
> >  }
> >
> > +static uint32_t get_elf_hwcap2(void)
> > +{
> > +    ARMCPU *cpu = ARM_CPU(thread_cpu);
> > +    uint32_t hwcaps = 0;
> > +
> > +    GET_FEATURE_ID(aa64_dcpodp, ARM_HWCAP2_A64_DCPODP);
> > +    return hwcaps;
> > +}
> > +
> > +#undef GET_FEATURE_ID
> > +
> >  #endif /* not TARGET_AARCH64 */
> >  #endif /* TARGET_ARM */
> >
> > diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> > index 297ad5e47a..1713d76590 100644
> > --- a/target/arm/cpu.h
> > +++ b/target/arm/cpu.h
> > @@ -2229,7 +2229,8 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
> >  #define ARM_CP_NZCV              (ARM_CP_SPECIAL | 0x0300)
> >  #define ARM_CP_CURRENTEL         (ARM_CP_SPECIAL | 0x0400)
> >  #define ARM_CP_DC_ZVA            (ARM_CP_SPECIAL | 0x0500)
> > -#define ARM_LAST_SPECIAL         ARM_CP_DC_ZVA
> > +#define ARM_CP_DC_CVAP           (ARM_CP_SPECIAL | 0x0600)
> > +#define ARM_LAST_SPECIAL         ARM_CP_DC_CVAP
> >  #define ARM_CP_FPU               0x1000
> >  #define ARM_CP_SVE               0x2000
> >  #define ARM_CP_NO_GDB            0x4000
> > @@ -3572,6 +3573,16 @@ static inline bool isar_feature_aa64_frint(const ARMISARegisters *id)
> >      return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FRINTTS) != 0;
> >  }
> >
> > +static inline bool isar_feature_aa64_dcpop(const ARMISARegisters *id)
> > +{
> > +    return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) != 0;
> > +}
> > +
> > +static inline bool isar_feature_aa64_dcpodp(const ARMISARegisters *id)
> > +{
> > +    return (FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) >> 1) != 0;
> > +}
> > +
> >  static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id)
> >  {
> >      /* We always set the AdvSIMD and FP fields identically wrt FP16.  */
> > diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
> > index d7f5bf610a..20094f980d 100644
> > --- a/target/arm/cpu64.c
> > +++ b/target/arm/cpu64.c
> > @@ -331,6 +331,7 @@ static void aarch64_max_initfn(Object *obj)
> >          cpu->isar.id_aa64isar0 = t;
> >
> >          t = cpu->isar.id_aa64isar1;
> > +        t = FIELD_DP64(t, ID_AA64ISAR1, DPB, 2);
> >          t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 1);
> >          t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1);
> >          t = FIELD_DP64(t, ID_AA64ISAR1, APA, 1); /* PAuth, architected only */
> > diff --git a/target/arm/helper.c b/target/arm/helper.c
> > index 507026c915..99ae01b7e7 100644
> > --- a/target/arm/helper.c
> > +++ b/target/arm/helper.c
> > @@ -3843,6 +3843,22 @@ static CPAccessResult aa64_cacheop_access(CPUARMState *env,
> >      return CP_ACCESS_OK;
> >  }
> >
> > +static CPAccessResult aa64_cacheop_persist_access(CPUARMState *env,
> > +                                                  const ARMCPRegInfo *ri,
> > +                                                  bool isread)
> > +{
> > +    ARMCPU *cpu = env_archcpu(env);
> > +    /*
> > +     * Access is UNDEF if lacking implementation for either DC CVAP or DC CVADP
> > +     * DC CVAP ->  CRm: 0xc
> > +     * DC CVADP -> CRm: 0xd
> > +     */
> > +    return (ri->crm == 0xc && !cpu_isar_feature(aa64_dcpop, cpu)) ||
> > +           (ri->crm == 0xd && !cpu_isar_feature(aa64_dcpodp, cpu))
> > +           ? CP_ACCESS_TRAP_UNCATEGORIZED
> > +           : aa64_cacheop_access(env, ri, isread);
> > +}
> > +
> >  /* See: D4.7.2 TLB maintenance requirements and the TLB maintenance instructions
> >   * Page D4-1736 (DDI0487A.b)
> >   */
> > @@ -4251,6 +4267,14 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
> >        .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 1,
> >        .access = PL0_W, .type = ARM_CP_NOP,
> >        .accessfn = aa64_cacheop_access },
> > +    { .name = "DC_CVAP", .state = ARM_CP_STATE_AA64,
> > +      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 1,
> > +      .access = PL0_W, .type = ARM_CP_DC_CVAP,
> > +      .accessfn = aa64_cacheop_persist_access },
> > +    { .name = "DC_CVADP", .state = ARM_CP_STATE_AA64,
> > +      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 1,
> > +      .access = PL0_W, .type = ARM_CP_DC_CVAP,
> > +      .accessfn = aa64_cacheop_persist_access },
> >      { .name = "DC_CSW", .state = ARM_CP_STATE_AA64,
> >        .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 2,
> >        .access = PL1_W, .type = ARM_CP_NOP },
> > diff --git a/target/arm/helper.h b/target/arm/helper.h
> > index 1fb2cb5a77..a850364944 100644
> > --- a/target/arm/helper.h
> > +++ b/target/arm/helper.h
> > @@ -561,6 +561,7 @@ DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
> >  DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
> >  DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
> >  DEF_HELPER_2(dc_zva, void, env, i64)
> > +DEF_HELPER_2(dc_cvap, void, env, i64)
> >
> >  DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
> >  DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
> > diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
> > index 0fd4bd0238..75ebf6afa4 100644
> > --- a/target/arm/op_helper.c
> > +++ b/target/arm/op_helper.c
> > @@ -987,3 +987,39 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
> >      memset(g2h(vaddr), 0, blocklen);
> >  #endif
> >  }
> > +
> > +void HELPER(dc_cvap)(CPUARMState *env, uint64_t vaddr_in)
> > +{
> > +#ifndef CONFIG_USER_ONLY
>
> Are we essentially saying the operation is a NOP for user-mode
> emulation? Is it just because we don't really expose HW to linux-user?
>
> If so move the #ifndef outside the HELPER definition and...
>

I do not think there is a way to specify persistent mem for user mode.
Or am I mistaken ?
I will move the switch to the HELPER def.

> > +    ARMCPU *cpu = env_archcpu(env);
> > +    /* CTR_EL0 System register -> DminLine, bits [19:16] */
> > +    uint64_t dline_size = 4 << ((cpu->ctr >> 16) & 0xF);
> > +    uint64_t vaddr = vaddr_in & ~(dline_size - 1);
> > +    void *haddr;
> > +    int mem_idx = cpu_mmu_index(env, false);
> > +
> > +    /* This won't be crossing page boundaries */
> > +    haddr = probe_read(env, vaddr, dline_size, mem_idx, GETPC());
> > +    if (haddr) {
> > +
> > +        ram_addr_t offset;
> > +        MemoryRegion *mr;
> > +
> > +        /*
> > +         * RCU critical section + ref counting,
> > +         * so that MR won't disappear behind the scene
> > +         */
> > +        rcu_read_lock();
> > +        mr = memory_region_from_host(haddr, &offset);
> > +        if (mr) {
> > +            memory_region_ref(mr);
> > +        }
> > +        rcu_read_unlock();
> > +
> > +        if (mr) {
> > +            memory_region_do_writeback(mr, offset, dline_size);
> > +            memory_region_unref(mr);
> > +        }
> > +    }
> > +#endif
> > +}
> > diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> > index 2d6cd09634..21ea3631d6 100644
> > --- a/target/arm/translate-a64.c
> > +++ b/target/arm/translate-a64.c
> > @@ -1746,6 +1746,11 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
> >          tcg_rt = cpu_reg(s, rt);
> >          gen_helper_dc_zva(cpu_env, tcg_rt);
> >          return;
> > +    case ARM_CP_DC_CVAP:
> > +        /* DC CVAP / DC CVADP */
>
> .. #ifndef CONFIG_USER_ONLY this code so we don't add the overhead of a
> helper call in linux-user mode.

Noted.
>
> > +        tcg_rt = cpu_reg(s, rt);
> > +        gen_helper_dc_cvap(cpu_env, tcg_rt);
> > +        return;
> >      default:
> >          break;
> >      }
>
>
> --
> Alex Bennée

Thank you,

BR
Beata
Beata Michalska Oct. 9, 2019, 11:53 a.m. UTC | #6
On Tue, 24 Sep 2019 at 18:22, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 9/10/19 2:56 AM, Beata Michalska wrote:
> > @@ -2229,7 +2229,8 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
> >  #define ARM_CP_NZCV              (ARM_CP_SPECIAL | 0x0300)
> >  #define ARM_CP_CURRENTEL         (ARM_CP_SPECIAL | 0x0400)
> >  #define ARM_CP_DC_ZVA            (ARM_CP_SPECIAL | 0x0500)
> > -#define ARM_LAST_SPECIAL         ARM_CP_DC_ZVA
> > +#define ARM_CP_DC_CVAP           (ARM_CP_SPECIAL | 0x0600)
> > +#define ARM_LAST_SPECIAL         ARM_CP_DC_CVAP
>
> I don't see that this operation needs to be handled via "special".  It's a
> function call upon write, as for many other system registers.
>

Too inspired by ZVA I guess. Will make the appropriate changes in the
next version.

> > +static inline bool isar_feature_aa64_dcpop(const ARMISARegisters *id)
> > +{
> > +    return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) != 0;
> > +}
> > +
> > +static inline bool isar_feature_aa64_dcpodp(const ARMISARegisters *id)
> > +{
> > +    return (FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) >> 1) != 0;
> > +}
>
> The correct check is FIELD_EX(...) >= 2.  This is a 4-bit field, as with all of
> the others.
>
Noted.
> > +static CPAccessResult aa64_cacheop_persist_access(CPUARMState *env,
> > +                                                  const ARMCPRegInfo *ri,
> > +                                                  bool isread)
> > +{
> > +    ARMCPU *cpu = env_archcpu(env);
> > +    /*
> > +     * Access is UNDEF if lacking implementation for either DC CVAP or DC CVADP
> > +     * DC CVAP ->  CRm: 0xc
> > +     * DC CVADP -> CRm: 0xd
> > +     */
> > +    return (ri->crm == 0xc && !cpu_isar_feature(aa64_dcpop, cpu)) ||
> > +           (ri->crm == 0xd && !cpu_isar_feature(aa64_dcpodp, cpu))
> > +           ? CP_ACCESS_TRAP_UNCATEGORIZED
> > +           : aa64_cacheop_access(env, ri, isread);
> > +}
> ...
> > +    { .name = "DC_CVAP", .state = ARM_CP_STATE_AA64,
> > +      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 1,
> > +      .access = PL0_W, .type = ARM_CP_DC_CVAP,
> > +      .accessfn = aa64_cacheop_persist_access },
> > +    { .name = "DC_CVADP", .state = ARM_CP_STATE_AA64,
> > +      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 1,
> > +      .access = PL0_W, .type = ARM_CP_DC_CVAP,
> > +      .accessfn = aa64_cacheop_persist_access },
>
> While this access function works, it's better to simply not register these at
> all when they're not supported.  Compare the registration of rndr_reginfo.
>
> As I described above, I think this can use a normal write function.  In which
> case this would use .type = ARM_CP_NO_RAW | ARM_CP_SUPPRESS_TB_END.
>
> (I believe that ARM_CP_IO is not required, but I'm not 100% sure.  Certainly
> there does not seem to be anything in dc_cvap() that affects state which can
> queried from another virtual cpu, so there does not appear to be any reason to
> grab the global i/o lock.  The host kernel should be just fine with concurrent
> msync syscalls, or whatever it is that libpmem uses.)
>
>
OK, will move that to conditional registration and double check the type.
Thanks for the suggestion.

> > +void HELPER(dc_cvap)(CPUARMState *env, uint64_t vaddr_in)
> > +{
> > +#ifndef CONFIG_USER_ONLY
> > +    ARMCPU *cpu = env_archcpu(env);
> > +    /* CTR_EL0 System register -> DminLine, bits [19:16] */
> > +    uint64_t dline_size = 4 << ((cpu->ctr >> 16) & 0xF);
> > +    uint64_t vaddr = vaddr_in & ~(dline_size - 1);
> > +    void *haddr;
> > +    int mem_idx = cpu_mmu_index(env, false);
> > +
> > +    /* This won't be crossing page boundaries */
> > +    haddr = probe_read(env, vaddr, dline_size, mem_idx, GETPC());
> > +    if (haddr) {
> > +
> > +        ram_addr_t offset;
> > +        MemoryRegion *mr;
> > +
> > +        /*
> > +         * RCU critical section + ref counting,
> > +         * so that MR won't disappear behind the scene
> > +         */
> > +        rcu_read_lock();
> > +        mr = memory_region_from_host(haddr, &offset);
> > +        if (mr) {
> > +            memory_region_ref(mr);
> > +        }
> > +        rcu_read_unlock();
> > +
> > +        if (mr) {
> > +            memory_region_do_writeback(mr, offset, dline_size);
> > +            memory_region_unref(mr);
> > +        }
> > +    }
> > +#endif
>
>
> We hold the rcu lock whenever a TB is executing.  I don't believe there's any
> point in increasing the lock count here.  Similarly with memory_region
> refcounts -- they cannot vanish while we're executing a TB.
>
> Thus I believe that about half of this function can fold away.
>
>
So I was chasing the wrong locking herre...
Indeed if the RCU lock is being already held I can safely drop the locking here.

> r~

Thank you for the review,

BR
Beata
diff mbox series

Patch

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 3365e192eb..1ec00308d5 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -609,7 +609,12 @@  enum {
     ARM_HWCAP_A64_PACG          = 1UL << 31,
 };
 
+enum {
+    ARM_HWCAP2_A64_DCPODP   = 1 << 0,
+};
+
 #define ELF_HWCAP get_elf_hwcap()
+#define ELF_HWCAP2 get_elf_hwcap2()
 
 static uint32_t get_elf_hwcap(void)
 {
@@ -644,12 +649,23 @@  static uint32_t get_elf_hwcap(void)
     GET_FEATURE_ID(aa64_jscvt, ARM_HWCAP_A64_JSCVT);
     GET_FEATURE_ID(aa64_sb, ARM_HWCAP_A64_SB);
     GET_FEATURE_ID(aa64_condm_4, ARM_HWCAP_A64_FLAGM);
+    GET_FEATURE_ID(aa64_dcpop, ARM_HWCAP_A64_DCPOP);
 
-#undef GET_FEATURE_ID
 
     return hwcaps;
 }
 
+static uint32_t get_elf_hwcap2(void)
+{
+    ARMCPU *cpu = ARM_CPU(thread_cpu);
+    uint32_t hwcaps = 0;
+
+    GET_FEATURE_ID(aa64_dcpodp, ARM_HWCAP2_A64_DCPODP);
+    return hwcaps;
+}
+
+#undef GET_FEATURE_ID
+
 #endif /* not TARGET_AARCH64 */
 #endif /* TARGET_ARM */
 
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 297ad5e47a..1713d76590 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -2229,7 +2229,8 @@  static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
 #define ARM_CP_NZCV              (ARM_CP_SPECIAL | 0x0300)
 #define ARM_CP_CURRENTEL         (ARM_CP_SPECIAL | 0x0400)
 #define ARM_CP_DC_ZVA            (ARM_CP_SPECIAL | 0x0500)
-#define ARM_LAST_SPECIAL         ARM_CP_DC_ZVA
+#define ARM_CP_DC_CVAP           (ARM_CP_SPECIAL | 0x0600)
+#define ARM_LAST_SPECIAL         ARM_CP_DC_CVAP
 #define ARM_CP_FPU               0x1000
 #define ARM_CP_SVE               0x2000
 #define ARM_CP_NO_GDB            0x4000
@@ -3572,6 +3573,16 @@  static inline bool isar_feature_aa64_frint(const ARMISARegisters *id)
     return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FRINTTS) != 0;
 }
 
+static inline bool isar_feature_aa64_dcpop(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) != 0;
+}
+
+static inline bool isar_feature_aa64_dcpodp(const ARMISARegisters *id)
+{
+    return (FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) >> 1) != 0;
+}
+
 static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id)
 {
     /* We always set the AdvSIMD and FP fields identically wrt FP16.  */
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index d7f5bf610a..20094f980d 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -331,6 +331,7 @@  static void aarch64_max_initfn(Object *obj)
         cpu->isar.id_aa64isar0 = t;
 
         t = cpu->isar.id_aa64isar1;
+        t = FIELD_DP64(t, ID_AA64ISAR1, DPB, 2);
         t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 1);
         t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1);
         t = FIELD_DP64(t, ID_AA64ISAR1, APA, 1); /* PAuth, architected only */
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 507026c915..99ae01b7e7 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -3843,6 +3843,22 @@  static CPAccessResult aa64_cacheop_access(CPUARMState *env,
     return CP_ACCESS_OK;
 }
 
+static CPAccessResult aa64_cacheop_persist_access(CPUARMState *env,
+                                                  const ARMCPRegInfo *ri,
+                                                  bool isread)
+{
+    ARMCPU *cpu = env_archcpu(env);
+    /*
+     * Access is UNDEF if lacking implementation for either DC CVAP or DC CVADP
+     * DC CVAP ->  CRm: 0xc
+     * DC CVADP -> CRm: 0xd
+     */
+    return (ri->crm == 0xc && !cpu_isar_feature(aa64_dcpop, cpu)) ||
+           (ri->crm == 0xd && !cpu_isar_feature(aa64_dcpodp, cpu))
+           ? CP_ACCESS_TRAP_UNCATEGORIZED
+           : aa64_cacheop_access(env, ri, isread);
+}
+
 /* See: D4.7.2 TLB maintenance requirements and the TLB maintenance instructions
  * Page D4-1736 (DDI0487A.b)
  */
@@ -4251,6 +4267,14 @@  static const ARMCPRegInfo v8_cp_reginfo[] = {
       .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 1,
       .access = PL0_W, .type = ARM_CP_NOP,
       .accessfn = aa64_cacheop_access },
+    { .name = "DC_CVAP", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 1,
+      .access = PL0_W, .type = ARM_CP_DC_CVAP,
+      .accessfn = aa64_cacheop_persist_access },
+    { .name = "DC_CVADP", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 1,
+      .access = PL0_W, .type = ARM_CP_DC_CVAP,
+      .accessfn = aa64_cacheop_persist_access },
     { .name = "DC_CSW", .state = ARM_CP_STATE_AA64,
       .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 2,
       .access = PL1_W, .type = ARM_CP_NOP },
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 1fb2cb5a77..a850364944 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -561,6 +561,7 @@  DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
 DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
 DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
 DEF_HELPER_2(dc_zva, void, env, i64)
+DEF_HELPER_2(dc_cvap, void, env, i64)
 
 DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index 0fd4bd0238..75ebf6afa4 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -987,3 +987,39 @@  void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
     memset(g2h(vaddr), 0, blocklen);
 #endif
 }
+
+void HELPER(dc_cvap)(CPUARMState *env, uint64_t vaddr_in)
+{
+#ifndef CONFIG_USER_ONLY
+    ARMCPU *cpu = env_archcpu(env);
+    /* CTR_EL0 System register -> DminLine, bits [19:16] */
+    uint64_t dline_size = 4 << ((cpu->ctr >> 16) & 0xF);
+    uint64_t vaddr = vaddr_in & ~(dline_size - 1);
+    void *haddr;
+    int mem_idx = cpu_mmu_index(env, false);
+
+    /* This won't be crossing page boundaries */
+    haddr = probe_read(env, vaddr, dline_size, mem_idx, GETPC());
+    if (haddr) {
+
+        ram_addr_t offset;
+        MemoryRegion *mr;
+
+        /*
+         * RCU critical section + ref counting,
+         * so that MR won't disappear behind the scene
+         */
+        rcu_read_lock();
+        mr = memory_region_from_host(haddr, &offset);
+        if (mr) {
+            memory_region_ref(mr);
+        }
+        rcu_read_unlock();
+
+        if (mr) {
+            memory_region_do_writeback(mr, offset, dline_size);
+            memory_region_unref(mr);
+        }
+    }
+#endif
+}
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 2d6cd09634..21ea3631d6 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -1746,6 +1746,11 @@  static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
         tcg_rt = cpu_reg(s, rt);
         gen_helper_dc_zva(cpu_env, tcg_rt);
         return;
+    case ARM_CP_DC_CVAP:
+        /* DC CVAP / DC CVADP */
+        tcg_rt = cpu_reg(s, rt);
+        gen_helper_dc_cvap(cpu_env, tcg_rt);
+        return;
     default:
         break;
     }