diff mbox series

[6/6] target/arm: Implement FEAT_LPA2

Message ID 20211208231154.392029-7-richard.henderson@linaro.org
State New
Headers show
Series target/arm: Implement LVA, LPA, LPA2 features | expand

Commit Message

Richard Henderson Dec. 8, 2021, 11:11 p.m. UTC
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/cpu.h       | 12 +++++++
 target/arm/internals.h |  2 ++
 target/arm/cpu64.c     |  2 ++
 target/arm/helper.c    | 80 +++++++++++++++++++++++++++++++++++-------
 4 files changed, 83 insertions(+), 13 deletions(-)

Comments

Alex Bennée Dec. 14, 2021, 2:57 p.m. UTC | #1
Richard Henderson <richard.henderson@linaro.org> writes:

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/cpu.h       | 12 +++++++
>  target/arm/internals.h |  2 ++
>  target/arm/cpu64.c     |  2 ++
>  target/arm/helper.c    | 80 +++++++++++++++++++++++++++++++++++-------
>  4 files changed, 83 insertions(+), 13 deletions(-)
>
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index 3149000004..379585352b 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -4283,6 +4283,18 @@ static inline bool isar_feature_aa64_i8mm(const ARMISARegisters *id)
>      return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, I8MM) != 0;
>  }
>  
> +static inline bool isar_feature_aa64_tgran4_lpa2(const ARMISARegisters *id)
> +{
> +    return sextract64(id->id_aa64mmfr0,
> +                      R_ID_AA64MMFR0_TGRAN4_SHIFT,
> +                      R_ID_AA64MMFR0_TGRAN4_LENGTH) >= 1;

Is this correct - it shows:

  0b1111 4KB granule not supported.

which I don't think implies FEAT_LPA2 because 1 indicates 4kb granule
supports 52 bit addressing. The other values are also reserved. Maybe it
would be safer just of == 1?

(a little more reading later)

  The ID_AA64MMFR0_EL1.TGran4_2, ID_AA64MMFR0_EL1.TGran16_2 and
  ID_AA64MMFR0_EL1.TGran64_2 fields that identify the memory translation stage 2 granule size, do not follow
  the standard ID scheme. Software must treat these fields as follows:
  • The value 0x0 indicates that support is identified by another field.
  • If the field value is not 0x0, the value indicates the level of support provided.
  This means that software should use a test of the form:
  if (field !=0 and field > value) {
  // do something that relies on the value of the feature
  }

That's just confusing. It implies that you could see any of the TGran
fields set to zero but still support LPA2 if the other fields are set. I
think this at least warrants mentioning in an accompanying comments for
the function. 

Otherwise:

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Richard Henderson Dec. 14, 2021, 8:24 p.m. UTC | #2
On 12/14/21 6:57 AM, Alex Bennée wrote:
>> +static inline bool isar_feature_aa64_tgran4_lpa2(const ARMISARegisters *id)
>> +{
>> +    return sextract64(id->id_aa64mmfr0,
>> +                      R_ID_AA64MMFR0_TGRAN4_SHIFT,
>> +                      R_ID_AA64MMFR0_TGRAN4_LENGTH) >= 1;
> 
> Is this correct - it shows:
> 
>    0b1111 4KB granule not supported.

Yes, that's why the signed extract, so not supported comes out as -1.
See D13.1.3 "Principles of the ID scheme for fields in ID registers".


> (a little more reading later)
> 
>    The ID_AA64MMFR0_EL1.TGran4_2, ID_AA64MMFR0_EL1.TGran16_2 and
>    ID_AA64MMFR0_EL1.TGran64_2 fields that identify the memory translation stage 2 granule size, do not follow
>    the standard ID scheme. Software must treat these fields as follows:

Note that we're not testing the *_2 fields, which are *stage2* support, not stage1.  I did 
add a comment about assuming stage2 encodes the same value as stage1 (which is true for 
all supported cpus).



r~
Peter Maydell Jan. 7, 2022, 2:39 p.m. UTC | #3
On Wed, 8 Dec 2021 at 23:21, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Again, a commit message would be nice.

> ---
>  target/arm/cpu.h       | 12 +++++++
>  target/arm/internals.h |  2 ++
>  target/arm/cpu64.c     |  2 ++
>  target/arm/helper.c    | 80 +++++++++++++++++++++++++++++++++++-------
>  4 files changed, 83 insertions(+), 13 deletions(-)
>
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index 3149000004..379585352b 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -4283,6 +4283,18 @@ static inline bool isar_feature_aa64_i8mm(const ARMISARegisters *id)
>      return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, I8MM) != 0;
>  }
>
> +static inline bool isar_feature_aa64_tgran4_lpa2(const ARMISARegisters *id)
> +{
> +    return sextract64(id->id_aa64mmfr0,
> +                      R_ID_AA64MMFR0_TGRAN4_SHIFT,
> +                      R_ID_AA64MMFR0_TGRAN4_LENGTH) >= 1;
> +}
> +
> +static inline bool isar_feature_aa64_tgran16_lpa2(const ARMISARegisters *id)
> +{
> +    return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16) >= 2;
> +}

(I wonder if we should have a FIELD_SEX64 etc ?)

> +
>  static inline bool isar_feature_aa64_ccidx(const ARMISARegisters *id)
>  {
>      return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, CCIDX) != 0;
> diff --git a/target/arm/internals.h b/target/arm/internals.h
> index 3e801833b4..868cae2a55 100644
> --- a/target/arm/internals.h
> +++ b/target/arm/internals.h
> @@ -1033,12 +1033,14 @@ static inline uint32_t aarch64_pstate_valid_mask(const ARMISARegisters *id)
>  typedef struct ARMVAParameters {
>      unsigned tsz    : 8;
>      unsigned ps     : 3;
> +    unsigned sh     : 2;
>      unsigned select : 1;
>      bool tbi        : 1;
>      bool epd        : 1;
>      bool hpd        : 1;
>      bool using16k   : 1;
>      bool using64k   : 1;
> +    bool ds         : 1;
>  } ARMVAParameters;
>
>  ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
> diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
> index 3bb79ca744..5a1940aa94 100644
> --- a/target/arm/cpu64.c
> +++ b/target/arm/cpu64.c
> @@ -740,6 +740,8 @@ static void aarch64_max_initfn(Object *obj)
>
>          t = cpu->isar.id_aa64mmfr0;
>          t = FIELD_DP64(t, ID_AA64MMFR0, PARANGE, 6); /* FEAT_LPA: 52 bits */
> +        t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16, 2); /* FEAT_LPA2: 52 bits */
> +        t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4, 1);  /* FEAT_LPA2: 52 bits */

Shouldn't we also set the TGRAN4_2 and TGRAN16_2 fields?

>          cpu->isar.id_aa64mmfr0 = t;
>
>          t = cpu->isar.id_aa64mmfr1;
> diff --git a/target/arm/helper.c b/target/arm/helper.c
> index e39c1f5b3a..f4a8b37f98 100644
> --- a/target/arm/helper.c
> +++ b/target/arm/helper.c
> @@ -11008,8 +11008,13 @@ static bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level,
>      const int grainsize = stride + 3;
>      int startsizecheck;
>
> -    /* Negative levels are never allowed.  */
> -    if (level < 0) {
> +    /*
> +     * Negative levels are usually not allowed...
> +     * Except for FEAT_LPA2, 4k page table, 52-bit address space, which
> +     * begins with level -1.  Note that previous feature tests will have
> +     * eliminated this combination if it is not enabled.
> +     */
> +    if (level < (inputsize == 52 && stride == 9 ? -1 : 0)) {
>          return false;
>      }

The checks on validity of 'level' are getting quite complicated:
 * we do this check here (which now involves 'stride')
 * some values of 'level' will cause the startsizecheck to fail
   (calculation of startsizecheck also involves 'stride')
 * we then switch on 'stride' and check for 'level' validity
   differently in the various cases

Can we simplify this at all ? Or are we just following the logic
the pseudocode uses (I haven't checked) ?

> @@ -11150,8 +11155,8 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
>                                     ARMMMUIdx mmu_idx, bool data)
>  {
>      uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr;
> -    bool epd, hpd, using16k, using64k;
> -    int select, tsz, tbi, ps;
> +    bool epd, hpd, using16k, using64k, ds;
> +    int select, tsz, tbi, ps, sh;
>
>      if (!regime_has_2_ranges(mmu_idx)) {
>          select = 0;
> @@ -11165,7 +11170,9 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
>              hpd = extract32(tcr, 24, 1);
>          }
>          epd = false;
> +        sh = extract64(tcr, 12, 2);
>          ps = extract64(tcr, 16, 3);
> +        ds = extract64(tcr, 32, 1);
>      } else {
>          /*
>           * Bit 55 is always between the two regions, and is canonical for
> @@ -11175,6 +11182,7 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
>          if (!select) {
>              tsz = extract32(tcr, 0, 6);
>              epd = extract32(tcr, 7, 1);
> +            sh = extract32(tcr, 12, 2);
>              using64k = extract32(tcr, 14, 1);
>              using16k = extract32(tcr, 15, 1);
>              hpd = extract64(tcr, 41, 1);
> @@ -11184,9 +11192,11 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
>              using64k = tg == 3;
>              tsz = extract32(tcr, 16, 6);
>              epd = extract32(tcr, 23, 1);
> +            sh = extract32(tcr, 28, 2);
>              hpd = extract64(tcr, 42, 1);
>          }
>          ps = extract64(tcr, 32, 3);
> +        ds = extract64(tcr, 59, 1);
>      }
>
>      /* Present TBI as a composite with TBID.  */
> @@ -11199,12 +11209,14 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
>      return (ARMVAParameters) {
>          .tsz = tsz,
>          .ps = ps,
> +        .sh = sh,
>          .select = select,
>          .tbi = tbi,
>          .epd = epd,
>          .hpd = hpd,
>          .using16k = using16k,
>          .using64k = using64k,
> +        .ds = ds,
>      };
>  }
>
> @@ -11332,15 +11344,31 @@ static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
>                                     access_type != MMU_INST_FETCH);
>          level = 0;
>
> -        if (cpu_isar_feature(aa64_st, env_archcpu(env))) {
> +        /* Find the minimum allowed input address size. */
> +        if (cpu_isar_feature(aa64_st, cpu)) {
>              max_tsz = 48 - param.using64k;
>          }
> +
> +        /*
> +         * Find the maximum allowed input address size.
> +         * DS is RES0 unless FEAT_LPA2 is supported for the given page size;
> +         * adjust param.ds to the effective value of DS, as documented.
> +         */
>          if (param.using64k) {
> -            if (cpu_isar_feature(aa64_lva, env_archcpu(env))) {
> +            if (cpu_isar_feature(aa64_lva, cpu)) {
>                  min_tsz = 12;
>              }
> +            param.ds = false;
> +        } else if (param.ds) {
> +            /* ??? Assume tgran{4,16}_2 == 0, i.e. match tgran{4,16}. */

How painful would it be to fix this '???' ? Setting those fields to 0 is
deprecated, so it would be preferable not to start out depending on that.
(We don't currently use the tgran fields at all, I guess because we allow
all page sizes regardless of the ID register values. Maybe we should
correct that too.)

> +            if (param.using16k
> +                ? cpu_isar_feature(aa64_tgran16_lpa2, cpu)
> +                : cpu_isar_feature(aa64_tgran4_lpa2, cpu)) {
> +                min_tsz = 12;
> +            } else {
> +                param.ds = false;
> +            }
>          }
> -        /* TODO: FEAT_LPA2 */
>
>          /*
>           * If TxSZ is programmed to a value larger than the maximum,
> @@ -11441,10 +11469,19 @@ static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
>           * VTCR_EL2.SL0 field (whose interpretation depends on the page size)
>           */
>          uint32_t sl0 = extract32(tcr->raw_tcr, 6, 2);
> +        uint32_t sl2 = extract64(tcr->raw_tcr, 33, 1);
>          uint32_t startlevel;
>          bool ok;
>
> -        if (!aarch64 || stride == 9) {
> +        /* SL2 is RES0 unless DS=1 & 4kb granule. */
> +        if (param.ds && stride == 9 && sl2) {
> +            if (sl0 != 0) {
> +                level = 0;
> +                fault_type = ARMFault_Translation;
> +                goto do_fault;
> +            }
> +            startlevel = -1;
> +        } else if (!aarch64 || stride == 9) {
>              /* AArch32 or 4KB pages */
>              startlevel = 2 - sl0;
>
> @@ -11499,7 +11536,9 @@ static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
>       * but in ARMv8 they are checked for zero and an AddressSize fault
>       * is raised if they are not.
>       */
> -    if (aarch64 || arm_feature(env, ARM_FEATURE_V8)) {
> +    if (param.ds) {
> +        descaddrmask = MAKE_64BIT_MASK(0, 50);
> +    } else if (aarch64 || arm_feature(env, ARM_FEATURE_V8)) {
>          descaddrmask = MAKE_64BIT_MASK(0, 48);
>      } else {
>          descaddrmask = MAKE_64BIT_MASK(0, 40);
> @@ -11534,11 +11573,16 @@ static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
>
>          /*
>           * For FEAT_LPA and PS=6, bits [51:48] of descaddr are in [15:12]
> -         * of descriptor.  Otherwise, if descaddr is out of range, raise
> -         * AddressSizeFault.
> +         * of descriptor.  For FEAT_LPA2 and effective DS, bits [51:50] of
> +         * descaddr are in [9:8].  Otherwise, if descaddr is out of range,
> +         * raise AddressSizeFault.
>           */
>          if (outputsize > 48) {
> -            descaddr |= extract64(descriptor, 12, 4) << 48;
> +            if (param.ds) {
> +                descaddr |= extract64(descriptor, 8, 2) << 50;
> +            } else {
> +                descaddr |= extract64(descriptor, 12, 4) << 48;
> +            }
>          } else if (descaddr >> outputsize) {
>              fault_type = ARMFault_AddressSize;
>              goto do_fault;
> @@ -11632,7 +11676,17 @@ static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
>          assert(attrindx <= 7);
>          cacheattrs->attrs = extract64(mair, attrindx * 8, 8);
>      }
> -    cacheattrs->shareability = extract32(attrs, 6, 2);
> +
> +    /*
> +     * For FEAT_LPA2 and effective DS, the SH field in the attributes
> +     * was re-purposed for output address bits.  The SH attribute in
> +     * that case comes from TCR_ELx, which we extracted earlier.
> +     */
> +    if (param.ds) {
> +        cacheattrs->shareability = param.sh;
> +    } else {
> +        cacheattrs->shareability = extract32(attrs, 6, 2);
> +    }
>
>      *phys_ptr = descaddr;
>      *page_size_ptr = page_size;

The code above looks correct to me. There are some missing pieces
elsewhere, though:

(1) The handling of the BaseADDR field for TLB range
invalidates needs updating (there's a TODO to this effect in
tlbi_aa64_range_get_base()).

Side note: in that function, we shift the field by TARGET_PAGE_BITS,
but the docs say that the shift should depend on the configured
translation granule. Is that a bug?

(2) There are some new long-form fault status codes with FEAT_LPA2,
corresponding to various fault types that can now occur at level -1.
arm_fi_to_lfsc() needs updating to handle fi->level being -1.
(You could do this bit as a preceding patch; it doesn't need to
be squashed into this one.)

thanks
-- PMM
Richard Henderson Feb. 10, 2022, 2:48 a.m. UTC | #4
On 1/8/22 01:39, Peter Maydell wrote:
> (1) The handling of the BaseADDR field for TLB range
> invalidates needs updating (there's a TODO to this effect in
> tlbi_aa64_range_get_base()).
> 
> Side note: in that function, we shift the field by TARGET_PAGE_BITS,
> but the docs say that the shift should depend on the configured
> translation granule. Is that a bug?

Yes.

> (2) There are some new long-form fault status codes with FEAT_LPA2,
> corresponding to various fault types that can now occur at level -1.
> arm_fi_to_lfsc() needs updating to handle fi->level being -1.
> (You could do this bit as a preceding patch; it doesn't need to
> be squashed into this one.)

Yep, thanks.


r~
diff mbox series

Patch

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 3149000004..379585352b 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -4283,6 +4283,18 @@  static inline bool isar_feature_aa64_i8mm(const ARMISARegisters *id)
     return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, I8MM) != 0;
 }
 
+static inline bool isar_feature_aa64_tgran4_lpa2(const ARMISARegisters *id)
+{
+    return sextract64(id->id_aa64mmfr0,
+                      R_ID_AA64MMFR0_TGRAN4_SHIFT,
+                      R_ID_AA64MMFR0_TGRAN4_LENGTH) >= 1;
+}
+
+static inline bool isar_feature_aa64_tgran16_lpa2(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16) >= 2;
+}
+
 static inline bool isar_feature_aa64_ccidx(const ARMISARegisters *id)
 {
     return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, CCIDX) != 0;
diff --git a/target/arm/internals.h b/target/arm/internals.h
index 3e801833b4..868cae2a55 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1033,12 +1033,14 @@  static inline uint32_t aarch64_pstate_valid_mask(const ARMISARegisters *id)
 typedef struct ARMVAParameters {
     unsigned tsz    : 8;
     unsigned ps     : 3;
+    unsigned sh     : 2;
     unsigned select : 1;
     bool tbi        : 1;
     bool epd        : 1;
     bool hpd        : 1;
     bool using16k   : 1;
     bool using64k   : 1;
+    bool ds         : 1;
 } ARMVAParameters;
 
 ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 3bb79ca744..5a1940aa94 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -740,6 +740,8 @@  static void aarch64_max_initfn(Object *obj)
 
         t = cpu->isar.id_aa64mmfr0;
         t = FIELD_DP64(t, ID_AA64MMFR0, PARANGE, 6); /* FEAT_LPA: 52 bits */
+        t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16, 2); /* FEAT_LPA2: 52 bits */
+        t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4, 1);  /* FEAT_LPA2: 52 bits */
         cpu->isar.id_aa64mmfr0 = t;
 
         t = cpu->isar.id_aa64mmfr1;
diff --git a/target/arm/helper.c b/target/arm/helper.c
index e39c1f5b3a..f4a8b37f98 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -11008,8 +11008,13 @@  static bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level,
     const int grainsize = stride + 3;
     int startsizecheck;
 
-    /* Negative levels are never allowed.  */
-    if (level < 0) {
+    /*
+     * Negative levels are usually not allowed...
+     * Except for FEAT_LPA2, 4k page table, 52-bit address space, which
+     * begins with level -1.  Note that previous feature tests will have
+     * eliminated this combination if it is not enabled.
+     */
+    if (level < (inputsize == 52 && stride == 9 ? -1 : 0)) {
         return false;
     }
 
@@ -11150,8 +11155,8 @@  ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
                                    ARMMMUIdx mmu_idx, bool data)
 {
     uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr;
-    bool epd, hpd, using16k, using64k;
-    int select, tsz, tbi, ps;
+    bool epd, hpd, using16k, using64k, ds;
+    int select, tsz, tbi, ps, sh;
 
     if (!regime_has_2_ranges(mmu_idx)) {
         select = 0;
@@ -11165,7 +11170,9 @@  ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
             hpd = extract32(tcr, 24, 1);
         }
         epd = false;
+        sh = extract64(tcr, 12, 2);
         ps = extract64(tcr, 16, 3);
+        ds = extract64(tcr, 32, 1);
     } else {
         /*
          * Bit 55 is always between the two regions, and is canonical for
@@ -11175,6 +11182,7 @@  ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
         if (!select) {
             tsz = extract32(tcr, 0, 6);
             epd = extract32(tcr, 7, 1);
+            sh = extract32(tcr, 12, 2);
             using64k = extract32(tcr, 14, 1);
             using16k = extract32(tcr, 15, 1);
             hpd = extract64(tcr, 41, 1);
@@ -11184,9 +11192,11 @@  ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
             using64k = tg == 3;
             tsz = extract32(tcr, 16, 6);
             epd = extract32(tcr, 23, 1);
+            sh = extract32(tcr, 28, 2);
             hpd = extract64(tcr, 42, 1);
         }
         ps = extract64(tcr, 32, 3);
+        ds = extract64(tcr, 59, 1);
     }
 
     /* Present TBI as a composite with TBID.  */
@@ -11199,12 +11209,14 @@  ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
     return (ARMVAParameters) {
         .tsz = tsz,
         .ps = ps,
+        .sh = sh,
         .select = select,
         .tbi = tbi,
         .epd = epd,
         .hpd = hpd,
         .using16k = using16k,
         .using64k = using64k,
+        .ds = ds,
     };
 }
 
@@ -11332,15 +11344,31 @@  static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
                                    access_type != MMU_INST_FETCH);
         level = 0;
 
-        if (cpu_isar_feature(aa64_st, env_archcpu(env))) {
+        /* Find the minimum allowed input address size. */
+        if (cpu_isar_feature(aa64_st, cpu)) {
             max_tsz = 48 - param.using64k;
         }
+
+        /*
+         * Find the maximum allowed input address size.
+         * DS is RES0 unless FEAT_LPA2 is supported for the given page size;
+         * adjust param.ds to the effective value of DS, as documented.
+         */
         if (param.using64k) {
-            if (cpu_isar_feature(aa64_lva, env_archcpu(env))) {
+            if (cpu_isar_feature(aa64_lva, cpu)) {
                 min_tsz = 12;
             }
+            param.ds = false;
+        } else if (param.ds) {
+            /* ??? Assume tgran{4,16}_2 == 0, i.e. match tgran{4,16}. */
+            if (param.using16k
+                ? cpu_isar_feature(aa64_tgran16_lpa2, cpu)
+                : cpu_isar_feature(aa64_tgran4_lpa2, cpu)) {
+                min_tsz = 12;
+            } else {
+                param.ds = false;
+            }
         }
-        /* TODO: FEAT_LPA2 */
 
         /*
          * If TxSZ is programmed to a value larger than the maximum,
@@ -11441,10 +11469,19 @@  static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
          * VTCR_EL2.SL0 field (whose interpretation depends on the page size)
          */
         uint32_t sl0 = extract32(tcr->raw_tcr, 6, 2);
+        uint32_t sl2 = extract64(tcr->raw_tcr, 33, 1);
         uint32_t startlevel;
         bool ok;
 
-        if (!aarch64 || stride == 9) {
+        /* SL2 is RES0 unless DS=1 & 4kb granule. */
+        if (param.ds && stride == 9 && sl2) {
+            if (sl0 != 0) {
+                level = 0;
+                fault_type = ARMFault_Translation;
+                goto do_fault;
+            }
+            startlevel = -1;
+        } else if (!aarch64 || stride == 9) {
             /* AArch32 or 4KB pages */
             startlevel = 2 - sl0;
 
@@ -11499,7 +11536,9 @@  static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
      * but in ARMv8 they are checked for zero and an AddressSize fault
      * is raised if they are not.
      */
-    if (aarch64 || arm_feature(env, ARM_FEATURE_V8)) {
+    if (param.ds) {
+        descaddrmask = MAKE_64BIT_MASK(0, 50);
+    } else if (aarch64 || arm_feature(env, ARM_FEATURE_V8)) {
         descaddrmask = MAKE_64BIT_MASK(0, 48);
     } else {
         descaddrmask = MAKE_64BIT_MASK(0, 40);
@@ -11534,11 +11573,16 @@  static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
 
         /*
          * For FEAT_LPA and PS=6, bits [51:48] of descaddr are in [15:12]
-         * of descriptor.  Otherwise, if descaddr is out of range, raise
-         * AddressSizeFault.
+         * of descriptor.  For FEAT_LPA2 and effective DS, bits [51:50] of
+         * descaddr are in [9:8].  Otherwise, if descaddr is out of range,
+         * raise AddressSizeFault.
          */
         if (outputsize > 48) {
-            descaddr |= extract64(descriptor, 12, 4) << 48;
+            if (param.ds) {
+                descaddr |= extract64(descriptor, 8, 2) << 50;
+            } else {
+                descaddr |= extract64(descriptor, 12, 4) << 48;
+            }
         } else if (descaddr >> outputsize) {
             fault_type = ARMFault_AddressSize;
             goto do_fault;
@@ -11632,7 +11676,17 @@  static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
         assert(attrindx <= 7);
         cacheattrs->attrs = extract64(mair, attrindx * 8, 8);
     }
-    cacheattrs->shareability = extract32(attrs, 6, 2);
+
+    /*
+     * For FEAT_LPA2 and effective DS, the SH field in the attributes
+     * was re-purposed for output address bits.  The SH attribute in
+     * that case comes from TCR_ELx, which we extracted earlier.
+     */
+    if (param.ds) {
+        cacheattrs->shareability = param.sh;
+    } else {
+        cacheattrs->shareability = extract32(attrs, 6, 2);
+    }
 
     *phys_ptr = descaddr;
     *page_size_ptr = page_size;