diff mbox

x86: implement la57 paging mode

Message ID 20161215001305.146807-1-kirill.shutemov@linux.intel.com
State New
Headers show

Commit Message

Kirill A. Shutemov Dec. 15, 2016, 12:13 a.m. UTC
The new paging more is extension of IA32e mode with more additional page
table level.

It brings support of 57-bit vitrual address space (128PB) and 52-bit
physical address space (4PB).

The structure of new page table level is identical to pml4.

The feature is enumerated with CPUID.(EAX=07H, ECX=0):ECX[bit 16].

CR4.LA57[bit 12] need to be set when pageing enables to activate 5-level
paging mode.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 target-i386/arch_memory_mapping.c |  42 ++++++-
 target-i386/cpu.c                 |  16 ++-
 target-i386/cpu.h                 |   2 +
 target-i386/helper.c              |  54 +++++++--
 target-i386/monitor.c             | 234 +++++++++++++++++++++++++++++---------
 target-i386/translate.c           |   2 +
 6 files changed, 276 insertions(+), 74 deletions(-)

Comments

Paolo Bonzini Dec. 16, 2016, 12:59 p.m. UTC | #1
On 15/12/2016 01:13, Kirill A. Shutemov wrote:
> The new paging more is extension of IA32e mode with more additional page
> table level.
> 
> It brings support of 57-bit vitrual address space (128PB) and 52-bit
> physical address space (4PB).
> 
> The structure of new page table level is identical to pml4.
> 
> The feature is enumerated with CPUID.(EAX=07H, ECX=0):ECX[bit 16].
> 
> CR4.LA57[bit 12] need to be set when pageing enables to activate 5-level
> paging mode.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>

Looks good, thanks!  The target-i386/translate.c bits are not necessary,
but I guess they can also be removed on commit.

Any chance you could also implement the MPX bits?

Thanks,

Paolo

> ---
>  target-i386/arch_memory_mapping.c |  42 ++++++-
>  target-i386/cpu.c                 |  16 ++-
>  target-i386/cpu.h                 |   2 +
>  target-i386/helper.c              |  54 +++++++--
>  target-i386/monitor.c             | 234 +++++++++++++++++++++++++++++---------
>  target-i386/translate.c           |   2 +
>  6 files changed, 276 insertions(+), 74 deletions(-)
> 
> diff --git a/target-i386/arch_memory_mapping.c b/target-i386/arch_memory_mapping.c
> index 88f341e1bbd0..826aee597b13 100644
> --- a/target-i386/arch_memory_mapping.c
> +++ b/target-i386/arch_memory_mapping.c
> @@ -220,7 +220,8 @@ static void walk_pdpe(MemoryMappingList *list, AddressSpace *as,
>  
>  /* IA-32e Paging */
>  static void walk_pml4e(MemoryMappingList *list, AddressSpace *as,
> -                       hwaddr pml4e_start_addr, int32_t a20_mask)
> +                       hwaddr pml4e_start_addr, int32_t a20_mask,
> +                       target_ulong start_line_addr)
>  {
>      hwaddr pml4e_addr, pdpe_start_addr;
>      uint64_t pml4e;
> @@ -236,11 +237,34 @@ static void walk_pml4e(MemoryMappingList *list, AddressSpace *as,
>              continue;
>          }
>  
> -        line_addr = ((i & 0x1ffULL) << 39) | (0xffffULL << 48);
> +        line_addr = start_line_addr | ((i & 0x1ffULL) << 39);
>          pdpe_start_addr = (pml4e & PLM4_ADDR_MASK) & a20_mask;
>          walk_pdpe(list, as, pdpe_start_addr, a20_mask, line_addr);
>      }
>  }
> +
> +static void walk_pml5e(MemoryMappingList *list, AddressSpace *as,
> +                       hwaddr pml5e_start_addr, int32_t a20_mask)
> +{
> +    hwaddr pml5e_addr, pml4e_start_addr;
> +    uint64_t pml5e;
> +    target_ulong line_addr;
> +    int i;
> +
> +    for (i = 0; i < 512; i++) {
> +        pml5e_addr = (pml5e_start_addr + i * 8) & a20_mask;
> +        pml5e = address_space_ldq(as, pml5e_addr, MEMTXATTRS_UNSPECIFIED,
> +                                  NULL);
> +        if (!(pml5e & PG_PRESENT_MASK)) {
> +            /* not present */
> +            continue;
> +        }
> +
> +        line_addr = (0x7fULL << 57) | ((i & 0x1ffULL) << 48);
> +        pml4e_start_addr = (pml5e & PLM4_ADDR_MASK) & a20_mask;
> +        walk_pml4e(list, as, pml4e_start_addr, a20_mask, line_addr);
> +    }
> +}
>  #endif
>  
>  void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list,
> @@ -257,10 +281,18 @@ void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list,
>      if (env->cr[4] & CR4_PAE_MASK) {
>  #ifdef TARGET_X86_64
>          if (env->hflags & HF_LMA_MASK) {
> -            hwaddr pml4e_addr;
> +            if (env->cr[4] & CR4_LA57_MASK) {
> +                hwaddr pml5e_addr;
> +
> +                pml5e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask;
> +                walk_pml5e(list, cs->as, pml5e_addr, env->a20_mask);
> +            } else {
> +                hwaddr pml4e_addr;
>  
> -            pml4e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask;
> -            walk_pml4e(list, cs->as, pml4e_addr, env->a20_mask);
> +                pml4e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask;
> +                walk_pml4e(list, cs->as, pml4e_addr, env->a20_mask,
> +                        0xffffULL << 48);
> +            }
>          } else
>  #endif
>          {
> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> index de1f30eeda63..a4b9832b5916 100644
> --- a/target-i386/cpu.c
> +++ b/target-i386/cpu.c
> @@ -238,7 +238,8 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
>            CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2,
>            CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
>            CPUID_7_0_EBX_RDSEED */
> -#define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE)
> +#define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE | \
> +          CPUID_7_0_ECX_LA57)
>  #define TCG_7_0_EDX_FEATURES 0
>  #define TCG_APM_FEATURES 0
>  #define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT
> @@ -435,7 +436,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
>              "ospke", NULL, NULL, NULL,
>              NULL, NULL, NULL, NULL,
>              NULL, NULL, NULL, NULL,
> -            NULL, NULL, NULL, NULL,
> +            "la57", NULL, NULL, NULL,
>              NULL, NULL, "rdpid", NULL,
>              NULL, NULL, NULL, NULL,
>              NULL, NULL, NULL, NULL,
> @@ -2742,10 +2743,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>      case 0x80000008:
>          /* virtual & phys address size in low 2 bytes. */
>          if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
> -            /* 64 bit processor, 48 bits virtual, configurable
> -             * physical bits.
> -             */
> -            *eax = 0x00003000 + cpu->phys_bits;
> +            /* 64 bit processor */
> +            *eax = cpu->phys_bits; /* configurable physical bits */
> +            if  (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_LA57) {
> +                *eax |= 0x00003900; /* 57 bits virtual */
> +            } else {
> +                *eax |= 0x00003000; /* 48 bits virtual */
> +            }
>          } else {
>              *eax = cpu->phys_bits;
>          }
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index c60572402272..0ba880fc2632 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -224,6 +224,7 @@
>  #define CR4_OSFXSR_SHIFT 9
>  #define CR4_OSFXSR_MASK (1U << CR4_OSFXSR_SHIFT)
>  #define CR4_OSXMMEXCPT_MASK  (1U << 10)
> +#define CR4_LA57_MASK   (1U << 12)
>  #define CR4_VMXE_MASK   (1U << 13)
>  #define CR4_SMXE_MASK   (1U << 14)
>  #define CR4_FSGSBASE_MASK (1U << 16)
> @@ -628,6 +629,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
>  #define CPUID_7_0_ECX_UMIP     (1U << 2)
>  #define CPUID_7_0_ECX_PKU      (1U << 3)
>  #define CPUID_7_0_ECX_OSPKE    (1U << 4)
> +#define CPUID_7_0_ECX_LA57     (1U << 16)
>  #define CPUID_7_0_ECX_RDPID    (1U << 22)
>  
>  #define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network Instructions */
> diff --git a/target-i386/helper.c b/target-i386/helper.c
> index 4ecc0912a48a..43e87ddba001 100644
> --- a/target-i386/helper.c
> +++ b/target-i386/helper.c
> @@ -651,11 +651,11 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
>      uint32_t hflags;
>  
>  #if defined(DEBUG_MMU)
> -    printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]);
> +    printf("CR4 update: %08x -> %08x\n", (uint32_t)env->cr[4], new_cr4);
>  #endif
>      if ((new_cr4 ^ env->cr[4]) &
>          (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK |
> -         CR4_SMEP_MASK | CR4_SMAP_MASK)) {
> +         CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_LA57_MASK)) {
>          tlb_flush(CPU(cpu), 1);
>      }
>  
> @@ -757,19 +757,41 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
>  
>  #ifdef TARGET_X86_64
>          if (env->hflags & HF_LMA_MASK) {
> +            bool la57 = env->cr[4] & CR4_LA57_MASK;
> +            uint64_t pml5e_addr, pml5e;
>              uint64_t pml4e_addr, pml4e;
>              int32_t sext;
>  
>              /* test virtual address sign extension */
> -            sext = (int64_t)addr >> 47;
> +            sext = la57 ? (int64_t)addr >> 56 : (int64_t)addr >> 47;
>              if (sext != 0 && sext != -1) {
>                  env->error_code = 0;
>                  cs->exception_index = EXCP0D_GPF;
>                  return 1;
>              }
>  
> -            pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
> -                env->a20_mask;
> +            if (la57) {
> +                pml5e_addr = ((env->cr[3] & ~0xfff) +
> +                        (((addr >> 48) & 0x1ff) << 3)) & env->a20_mask;
> +                pml5e = x86_ldq_phys(cs, pml5e_addr);
> +                if (!(pml5e & PG_PRESENT_MASK)) {
> +                    goto do_fault;
> +                }
> +                if (pml5e & (rsvd_mask | PG_PSE_MASK)) {
> +                    goto do_fault_rsvd;
> +                }
> +                if (!(pml5e & PG_ACCESSED_MASK)) {
> +                    pml5e |= PG_ACCESSED_MASK;
> +                    x86_stl_phys_notdirty(cs, pml5e_addr, pml5e);
> +                }
> +                ptep = pml5e ^ PG_NX_MASK;
> +            } else {
> +                pml5e = env->cr[3];
> +                ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
> +            }
> +
> +            pml4e_addr = ((pml5e & PG_ADDRESS_MASK) +
> +                    (((addr >> 39) & 0x1ff) << 3)) & env->a20_mask;
>              pml4e = x86_ldq_phys(cs, pml4e_addr);
>              if (!(pml4e & PG_PRESENT_MASK)) {
>                  goto do_fault;
> @@ -781,7 +803,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
>                  pml4e |= PG_ACCESSED_MASK;
>                  x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
>              }
> -            ptep = pml4e ^ PG_NX_MASK;
> +            ptep &= pml4e ^ PG_NX_MASK;
>              pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
>                  env->a20_mask;
>              pdpe = x86_ldq_phys(cs, pdpe_addr);
> @@ -1024,16 +1046,30 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
>  
>  #ifdef TARGET_X86_64
>          if (env->hflags & HF_LMA_MASK) {
> +            bool la57 = env->cr[4] & CR4_LA57_MASK;
> +            uint64_t pml5e_addr, pml5e;
>              uint64_t pml4e_addr, pml4e;
>              int32_t sext;
>  
>              /* test virtual address sign extension */
> -            sext = (int64_t)addr >> 47;
> +            sext = la57 ? (int64_t)addr >> 56 : (int64_t)addr >> 47;
>              if (sext != 0 && sext != -1) {
>                  return -1;
>              }
> -            pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
> -                env->a20_mask;
> +
> +            if (la57) {
> +                pml5e_addr = ((env->cr[3] & ~0xfff) +
> +                        (((addr >> 48) & 0x1ff) << 3)) & env->a20_mask;
> +                pml5e = x86_ldq_phys(cs, pml5e_addr);
> +                if (!(pml5e & PG_PRESENT_MASK)) {
> +                    return -1;
> +                }
> +            } else {
> +                pml5e = env->cr[3];
> +            }
> +
> +            pml4e_addr = ((pml5e & PG_ADDRESS_MASK) +
> +                    (((addr >> 39) & 0x1ff) << 3)) & env->a20_mask;
>              pml4e = x86_ldq_phys(cs, pml4e_addr);
>              if (!(pml4e & PG_PRESENT_MASK)) {
>                  return -1;
> diff --git a/target-i386/monitor.c b/target-i386/monitor.c
> index 9a3b4d746e8d..468aa073bcc9 100644
> --- a/target-i386/monitor.c
> +++ b/target-i386/monitor.c
> @@ -30,13 +30,18 @@
>  #include "hmp.h"
>  
>  
> -static void print_pte(Monitor *mon, hwaddr addr,
> -                      hwaddr pte,
> -                      hwaddr mask)
> +static void print_pte(Monitor *mon, CPUArchState *env, hwaddr addr,
> +                      hwaddr pte, hwaddr mask)
>  {
>  #ifdef TARGET_X86_64
> -    if (addr & (1ULL << 47)) {
> -        addr |= -1LL << 48;
> +    if (env->cr[4] & CR4_LA57_MASK) {
> +        if (addr & (1ULL << 56)) {
> +            addr |= -1LL << 57;
> +        }
> +    } else {
> +        if (addr & (1ULL << 47)) {
> +            addr |= -1LL << 48;
> +        }
>      }
>  #endif
>      monitor_printf(mon, TARGET_FMT_plx ": " TARGET_FMT_plx
> @@ -66,13 +71,13 @@ static void tlb_info_32(Monitor *mon, CPUArchState *env)
>          if (pde & PG_PRESENT_MASK) {
>              if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
>                  /* 4M pages */
> -                print_pte(mon, (l1 << 22), pde, ~((1 << 21) - 1));
> +                print_pte(mon, env, (l1 << 22), pde, ~((1 << 21) - 1));
>              } else {
>                  for(l2 = 0; l2 < 1024; l2++) {
>                      cpu_physical_memory_read((pde & ~0xfff) + l2 * 4, &pte, 4);
>                      pte = le32_to_cpu(pte);
>                      if (pte & PG_PRESENT_MASK) {
> -                        print_pte(mon, (l1 << 22) + (l2 << 12),
> +                        print_pte(mon, env, (l1 << 22) + (l2 << 12),
>                                    pte & ~PG_PSE_MASK,
>                                    ~0xfff);
>                      }
> @@ -100,7 +105,7 @@ static void tlb_info_pae32(Monitor *mon, CPUArchState *env)
>                  if (pde & PG_PRESENT_MASK) {
>                      if (pde & PG_PSE_MASK) {
>                          /* 2M pages with PAE, CR4.PSE is ignored */
> -                        print_pte(mon, (l1 << 30 ) + (l2 << 21), pde,
> +                        print_pte(mon, env, (l1 << 30) + (l2 << 21), pde,
>                                    ~((hwaddr)(1 << 20) - 1));
>                      } else {
>                          pt_addr = pde & 0x3fffffffff000ULL;
> @@ -108,7 +113,7 @@ static void tlb_info_pae32(Monitor *mon, CPUArchState *env)
>                              cpu_physical_memory_read(pt_addr + l3 * 8, &pte, 8);
>                              pte = le64_to_cpu(pte);
>                              if (pte & PG_PRESENT_MASK) {
> -                                print_pte(mon, (l1 << 30 ) + (l2 << 21)
> +                                print_pte(mon, env, (l1 << 30) + (l2 << 21)
>                                            + (l3 << 12),
>                                            pte & ~PG_PSE_MASK,
>                                            ~(hwaddr)0xfff);
> @@ -122,61 +127,82 @@ static void tlb_info_pae32(Monitor *mon, CPUArchState *env)
>  }
>  
>  #ifdef TARGET_X86_64
> -static void tlb_info_64(Monitor *mon, CPUArchState *env)
> +static void tlb_info_la48(Monitor *mon, CPUArchState *env,
> +        uint64_t l0, uint64_t pml4_addr)
>  {
>      uint64_t l1, l2, l3, l4;
>      uint64_t pml4e, pdpe, pde, pte;
> -    uint64_t pml4_addr, pdp_addr, pd_addr, pt_addr;
> +    uint64_t pdp_addr, pd_addr, pt_addr;
>  
> -    pml4_addr = env->cr[3] & 0x3fffffffff000ULL;
>      for (l1 = 0; l1 < 512; l1++) {
>          cpu_physical_memory_read(pml4_addr + l1 * 8, &pml4e, 8);
>          pml4e = le64_to_cpu(pml4e);
> -        if (pml4e & PG_PRESENT_MASK) {
> -            pdp_addr = pml4e & 0x3fffffffff000ULL;
> -            for (l2 = 0; l2 < 512; l2++) {
> -                cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8);
> -                pdpe = le64_to_cpu(pdpe);
> -                if (pdpe & PG_PRESENT_MASK) {
> -                    if (pdpe & PG_PSE_MASK) {
> -                        /* 1G pages, CR4.PSE is ignored */
> -                        print_pte(mon, (l1 << 39) + (l2 << 30), pdpe,
> -                                  0x3ffffc0000000ULL);
> -                    } else {
> -                        pd_addr = pdpe & 0x3fffffffff000ULL;
> -                        for (l3 = 0; l3 < 512; l3++) {
> -                            cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8);
> -                            pde = le64_to_cpu(pde);
> -                            if (pde & PG_PRESENT_MASK) {
> -                                if (pde & PG_PSE_MASK) {
> -                                    /* 2M pages, CR4.PSE is ignored */
> -                                    print_pte(mon, (l1 << 39) + (l2 << 30) +
> -                                              (l3 << 21), pde,
> -                                              0x3ffffffe00000ULL);
> -                                } else {
> -                                    pt_addr = pde & 0x3fffffffff000ULL;
> -                                    for (l4 = 0; l4 < 512; l4++) {
> -                                        cpu_physical_memory_read(pt_addr
> -                                                                 + l4 * 8,
> -                                                                 &pte, 8);
> -                                        pte = le64_to_cpu(pte);
> -                                        if (pte & PG_PRESENT_MASK) {
> -                                            print_pte(mon, (l1 << 39) +
> -                                                      (l2 << 30) +
> -                                                      (l3 << 21) + (l4 << 12),
> -                                                      pte & ~PG_PSE_MASK,
> -                                                      0x3fffffffff000ULL);
> -                                        }
> -                                    }
> -                                }
> -                            }
> -                        }
> +        if (!(pml4e & PG_PRESENT_MASK)) {
> +            continue;
> +        }
> +
> +        pdp_addr = pml4e & 0x3fffffffff000ULL;
> +        for (l2 = 0; l2 < 512; l2++) {
> +            cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8);
> +            pdpe = le64_to_cpu(pdpe);
> +            if (!(pdpe & PG_PRESENT_MASK)) {
> +                continue;
> +            }
> +
> +            if (pdpe & PG_PSE_MASK) {
> +                /* 1G pages, CR4.PSE is ignored */
> +                print_pte(mon, env, (l0 << 48) + (l1 << 39) + (l2 << 30),
> +                        pdpe, 0x3ffffc0000000ULL);
> +                continue;
> +            }
> +
> +            pd_addr = pdpe & 0x3fffffffff000ULL;
> +            for (l3 = 0; l3 < 512; l3++) {
> +                cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8);
> +                pde = le64_to_cpu(pde);
> +                if (!(pde & PG_PRESENT_MASK)) {
> +                    continue;
> +                }
> +
> +                if (pde & PG_PSE_MASK) {
> +                    /* 2M pages, CR4.PSE is ignored */
> +                    print_pte(mon, env, (l0 << 48) + (l1 << 39) + (l2 << 30) +
> +                            (l3 << 21), pde, 0x3ffffffe00000ULL);
> +                    continue;
> +                }
> +
> +                pt_addr = pde & 0x3fffffffff000ULL;
> +                for (l4 = 0; l4 < 512; l4++) {
> +                    cpu_physical_memory_read(pt_addr
> +                            + l4 * 8,
> +                            &pte, 8);
> +                    pte = le64_to_cpu(pte);
> +                    if (pte & PG_PRESENT_MASK) {
> +                        print_pte(mon, env, (l0 << 48) + (l1 << 39) +
> +                                (l2 << 30) + (l3 << 21) + (l4 << 12),
> +                                pte & ~PG_PSE_MASK, 0x3fffffffff000ULL);
>                      }
>                  }
>              }
>          }
>      }
>  }
> +
> +static void tlb_info_la57(Monitor *mon, CPUArchState *env)
> +{
> +    uint64_t l0;
> +    uint64_t pml5e;
> +    uint64_t pml5_addr;
> +
> +    pml5_addr = env->cr[3] & 0x3fffffffff000ULL;
> +    for (l0 = 0; l0 < 512; l0++) {
> +        cpu_physical_memory_read(pml5_addr + l0 * 8, &pml5e, 8);
> +        pml5e = le64_to_cpu(pml5e);
> +        if (pml5e & PG_PRESENT_MASK) {
> +            tlb_info_la48(mon, env, l0, pml5e & 0x3fffffffff000ULL);
> +        }
> +    }
> +}
>  #endif /* TARGET_X86_64 */
>  
>  void hmp_info_tlb(Monitor *mon, const QDict *qdict)
> @@ -192,7 +218,11 @@ void hmp_info_tlb(Monitor *mon, const QDict *qdict)
>      if (env->cr[4] & CR4_PAE_MASK) {
>  #ifdef TARGET_X86_64
>          if (env->hflags & HF_LMA_MASK) {
> -            tlb_info_64(mon, env);
> +            if (env->cr[4] & CR4_LA57_MASK) {
> +                tlb_info_la57(mon, env);
> +            } else {
> +                tlb_info_la48(mon, env, 0, env->cr[3] & 0x3fffffffff000ULL);
> +            }
>          } else
>  #endif
>          {
> @@ -324,7 +354,7 @@ static void mem_info_pae32(Monitor *mon, CPUArchState *env)
>  
>  
>  #ifdef TARGET_X86_64
> -static void mem_info_64(Monitor *mon, CPUArchState *env)
> +static void mem_info_la48(Monitor *mon, CPUArchState *env)
>  {
>      int prot, last_prot;
>      uint64_t l1, l2, l3, l4;
> @@ -400,6 +430,98 @@ static void mem_info_64(Monitor *mon, CPUArchState *env)
>      /* Flush last range */
>      mem_print(mon, &start, &last_prot, (hwaddr)1 << 48, 0);
>  }
> +
> +static void mem_info_la57(Monitor *mon, CPUArchState *env)
> +{
> +    int prot, last_prot;
> +    uint64_t l0, l1, l2, l3, l4;
> +    uint64_t pml5e, pml4e, pdpe, pde, pte;
> +    uint64_t pml5_addr, pml4_addr, pdp_addr, pd_addr, pt_addr, start, end;
> +
> +    pml5_addr = env->cr[3] & 0x3fffffffff000ULL;
> +    last_prot = 0;
> +    start = -1;
> +    for (l0 = 0; l0 < 512; l0++) {
> +        cpu_physical_memory_read(pml5_addr + l0 * 8, &pml5e, 8);
> +        pml4e = le64_to_cpu(pml5e);
> +        end = l0 << 48;
> +        if (!(pml5e & PG_PRESENT_MASK)) {
> +            prot = 0;
> +            mem_print(mon, &start, &last_prot, end, prot);
> +            continue;
> +        }
> +
> +        pml4_addr = pml5e & 0x3fffffffff000ULL;
> +        for (l1 = 0; l1 < 512; l1++) {
> +            cpu_physical_memory_read(pml4_addr + l1 * 8, &pml4e, 8);
> +            pml4e = le64_to_cpu(pml4e);
> +            end = (l0 << 48) + (l1 << 39);
> +            if (!(pml4e & PG_PRESENT_MASK)) {
> +                prot = 0;
> +                mem_print(mon, &start, &last_prot, end, prot);
> +                continue;
> +            }
> +
> +            pdp_addr = pml4e & 0x3fffffffff000ULL;
> +            for (l2 = 0; l2 < 512; l2++) {
> +                cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8);
> +                pdpe = le64_to_cpu(pdpe);
> +                end = (l0 << 48) + (l1 << 39) + (l2 << 30);
> +                if (pdpe & PG_PRESENT_MASK) {
> +                    prot = 0;
> +                    mem_print(mon, &start, &last_prot, end, prot);
> +                    continue;
> +                }
> +
> +                if (pdpe & PG_PSE_MASK) {
> +                    prot = pdpe & (PG_USER_MASK | PG_RW_MASK |
> +                            PG_PRESENT_MASK);
> +                    prot &= pml4e;
> +                    mem_print(mon, &start, &last_prot, end, prot);
> +                    continue;
> +                }
> +
> +                pd_addr = pdpe & 0x3fffffffff000ULL;
> +                for (l3 = 0; l3 < 512; l3++) {
> +                    cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8);
> +                    pde = le64_to_cpu(pde);
> +                    end = (l0 << 48) + (l1 << 39) + (l2 << 30) + (l3 << 21);
> +                    if (pde & PG_PRESENT_MASK) {
> +                        prot = 0;
> +                        mem_print(mon, &start, &last_prot, end, prot);
> +                        continue;
> +                    }
> +
> +                    if (pde & PG_PSE_MASK) {
> +                        prot = pde & (PG_USER_MASK | PG_RW_MASK |
> +                                PG_PRESENT_MASK);
> +                        prot &= pml4e & pdpe;
> +                        mem_print(mon, &start, &last_prot, end, prot);
> +                        continue;
> +                    }
> +
> +                    pt_addr = pde & 0x3fffffffff000ULL;
> +                    for (l4 = 0; l4 < 512; l4++) {
> +                        cpu_physical_memory_read(pt_addr + l4 * 8, &pte, 8);
> +                        pte = le64_to_cpu(pte);
> +                        end = (l0 << 48) + (l1 << 39) + (l2 << 30) +
> +                            (l3 << 21) + (l4 << 12);
> +                        if (pte & PG_PRESENT_MASK) {
> +                            prot = pte & (PG_USER_MASK | PG_RW_MASK |
> +                                    PG_PRESENT_MASK);
> +                            prot &= pml4e & pdpe & pde;
> +                        } else {
> +                            prot = 0;
> +                        }
> +                        mem_print(mon, &start, &last_prot, end, prot);
> +                    }
> +                }
> +            }
> +        }
> +    }
> +    /* Flush last range */
> +    mem_print(mon, &start, &last_prot, (hwaddr)1 << 57, 0);
> +}
>  #endif /* TARGET_X86_64 */
>  
>  void hmp_info_mem(Monitor *mon, const QDict *qdict)
> @@ -415,7 +537,11 @@ void hmp_info_mem(Monitor *mon, const QDict *qdict)
>      if (env->cr[4] & CR4_PAE_MASK) {
>  #ifdef TARGET_X86_64
>          if (env->hflags & HF_LMA_MASK) {
> -            mem_info_64(mon, env);
> +            if (env->cr[4] & CR4_LA57_MASK) {
> +                mem_info_la57(mon, env);
> +            } else {
> +                mem_info_la48(mon, env);
> +            }
>          } else
>  #endif
>          {
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 324103c88521..d2aec5c9bf06 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -137,6 +137,7 @@ typedef struct DisasContext {
>      int cpuid_ext2_features;
>      int cpuid_ext3_features;
>      int cpuid_7_0_ebx_features;
> +    int cpuid_7_0_ecx_features;
>      int cpuid_xsave_features;
>  } DisasContext;
>  
> @@ -8350,6 +8351,7 @@ void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
>      dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
>      dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
>      dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
> +    dc->cpuid_7_0_ecx_features = env->features[FEAT_7_0_ECX];
>      dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
>  #ifdef TARGET_X86_64
>      dc->lma = (flags >> HF_LMA_SHIFT) & 1;
>
Kirill A. Shutemov Dec. 16, 2016, 1:11 p.m. UTC | #2
On Fri, Dec 16, 2016 at 01:59:36PM +0100, Paolo Bonzini wrote:
> 
> 
> On 15/12/2016 01:13, Kirill A. Shutemov wrote:
> > The new paging more is extension of IA32e mode with more additional page
> > table level.
> > 
> > It brings support of 57-bit vitrual address space (128PB) and 52-bit
> > physical address space (4PB).
> > 
> > The structure of new page table level is identical to pml4.
> > 
> > The feature is enumerated with CPUID.(EAX=07H, ECX=0):ECX[bit 16].
> > 
> > CR4.LA57[bit 12] need to be set when pageing enables to activate 5-level
> > paging mode.
> > 
> > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> 
> Looks good, thanks!  The target-i386/translate.c bits are not necessary,
> but I guess they can also be removed on commit.
> 
> Any chance you could also implement the MPX bits?

I don't have time for this right now. Maybe later.
Eduardo Habkost Dec. 18, 2016, 8:54 p.m. UTC | #3
On Thu, Dec 15, 2016 at 03:13:05AM +0300, Kirill A. Shutemov wrote:
> The new paging more is extension of IA32e mode with more additional page
> table level.
> 
> It brings support of 57-bit vitrual address space (128PB) and 52-bit
> physical address space (4PB).
> 
> The structure of new page table level is identical to pml4.
> 
> The feature is enumerated with CPUID.(EAX=07H, ECX=0):ECX[bit 16].
> 
> CR4.LA57[bit 12] need to be set when pageing enables to activate 5-level
> paging mode.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>

For the CPUID code:

Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>

> ---
>  target-i386/arch_memory_mapping.c |  42 ++++++-
>  target-i386/cpu.c                 |  16 ++-
>  target-i386/cpu.h                 |   2 +
>  target-i386/helper.c              |  54 +++++++--
>  target-i386/monitor.c             | 234 +++++++++++++++++++++++++++++---------
>  target-i386/translate.c           |   2 +
>  6 files changed, 276 insertions(+), 74 deletions(-)
> 
> diff --git a/target-i386/arch_memory_mapping.c b/target-i386/arch_memory_mapping.c
> index 88f341e1bbd0..826aee597b13 100644
> --- a/target-i386/arch_memory_mapping.c
> +++ b/target-i386/arch_memory_mapping.c
> @@ -220,7 +220,8 @@ static void walk_pdpe(MemoryMappingList *list, AddressSpace *as,
>  
>  /* IA-32e Paging */
>  static void walk_pml4e(MemoryMappingList *list, AddressSpace *as,
> -                       hwaddr pml4e_start_addr, int32_t a20_mask)
> +                       hwaddr pml4e_start_addr, int32_t a20_mask,
> +                       target_ulong start_line_addr)
>  {
>      hwaddr pml4e_addr, pdpe_start_addr;
>      uint64_t pml4e;
> @@ -236,11 +237,34 @@ static void walk_pml4e(MemoryMappingList *list, AddressSpace *as,
>              continue;
>          }
>  
> -        line_addr = ((i & 0x1ffULL) << 39) | (0xffffULL << 48);
> +        line_addr = start_line_addr | ((i & 0x1ffULL) << 39);
>          pdpe_start_addr = (pml4e & PLM4_ADDR_MASK) & a20_mask;
>          walk_pdpe(list, as, pdpe_start_addr, a20_mask, line_addr);
>      }
>  }
> +
> +static void walk_pml5e(MemoryMappingList *list, AddressSpace *as,
> +                       hwaddr pml5e_start_addr, int32_t a20_mask)
> +{
> +    hwaddr pml5e_addr, pml4e_start_addr;
> +    uint64_t pml5e;
> +    target_ulong line_addr;
> +    int i;
> +
> +    for (i = 0; i < 512; i++) {
> +        pml5e_addr = (pml5e_start_addr + i * 8) & a20_mask;
> +        pml5e = address_space_ldq(as, pml5e_addr, MEMTXATTRS_UNSPECIFIED,
> +                                  NULL);
> +        if (!(pml5e & PG_PRESENT_MASK)) {
> +            /* not present */
> +            continue;
> +        }
> +
> +        line_addr = (0x7fULL << 57) | ((i & 0x1ffULL) << 48);
> +        pml4e_start_addr = (pml5e & PLM4_ADDR_MASK) & a20_mask;
> +        walk_pml4e(list, as, pml4e_start_addr, a20_mask, line_addr);
> +    }
> +}
>  #endif
>  
>  void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list,
> @@ -257,10 +281,18 @@ void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list,
>      if (env->cr[4] & CR4_PAE_MASK) {
>  #ifdef TARGET_X86_64
>          if (env->hflags & HF_LMA_MASK) {
> -            hwaddr pml4e_addr;
> +            if (env->cr[4] & CR4_LA57_MASK) {
> +                hwaddr pml5e_addr;
> +
> +                pml5e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask;
> +                walk_pml5e(list, cs->as, pml5e_addr, env->a20_mask);
> +            } else {
> +                hwaddr pml4e_addr;
>  
> -            pml4e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask;
> -            walk_pml4e(list, cs->as, pml4e_addr, env->a20_mask);
> +                pml4e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask;
> +                walk_pml4e(list, cs->as, pml4e_addr, env->a20_mask,
> +                        0xffffULL << 48);
> +            }
>          } else
>  #endif
>          {
> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> index de1f30eeda63..a4b9832b5916 100644
> --- a/target-i386/cpu.c
> +++ b/target-i386/cpu.c
> @@ -238,7 +238,8 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
>            CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2,
>            CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
>            CPUID_7_0_EBX_RDSEED */
> -#define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE)
> +#define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE | \
> +          CPUID_7_0_ECX_LA57)
>  #define TCG_7_0_EDX_FEATURES 0
>  #define TCG_APM_FEATURES 0
>  #define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT
> @@ -435,7 +436,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
>              "ospke", NULL, NULL, NULL,
>              NULL, NULL, NULL, NULL,
>              NULL, NULL, NULL, NULL,
> -            NULL, NULL, NULL, NULL,
> +            "la57", NULL, NULL, NULL,
>              NULL, NULL, "rdpid", NULL,
>              NULL, NULL, NULL, NULL,
>              NULL, NULL, NULL, NULL,
> @@ -2742,10 +2743,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>      case 0x80000008:
>          /* virtual & phys address size in low 2 bytes. */
>          if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
> -            /* 64 bit processor, 48 bits virtual, configurable
> -             * physical bits.
> -             */
> -            *eax = 0x00003000 + cpu->phys_bits;
> +            /* 64 bit processor */
> +            *eax = cpu->phys_bits; /* configurable physical bits */
> +            if  (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_LA57) {
> +                *eax |= 0x00003900; /* 57 bits virtual */
> +            } else {
> +                *eax |= 0x00003000; /* 48 bits virtual */
> +            }
>          } else {
>              *eax = cpu->phys_bits;
>          }
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index c60572402272..0ba880fc2632 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -224,6 +224,7 @@
>  #define CR4_OSFXSR_SHIFT 9
>  #define CR4_OSFXSR_MASK (1U << CR4_OSFXSR_SHIFT)
>  #define CR4_OSXMMEXCPT_MASK  (1U << 10)
> +#define CR4_LA57_MASK   (1U << 12)
>  #define CR4_VMXE_MASK   (1U << 13)
>  #define CR4_SMXE_MASK   (1U << 14)
>  #define CR4_FSGSBASE_MASK (1U << 16)
> @@ -628,6 +629,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
>  #define CPUID_7_0_ECX_UMIP     (1U << 2)
>  #define CPUID_7_0_ECX_PKU      (1U << 3)
>  #define CPUID_7_0_ECX_OSPKE    (1U << 4)
> +#define CPUID_7_0_ECX_LA57     (1U << 16)
>  #define CPUID_7_0_ECX_RDPID    (1U << 22)
>  
>  #define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network Instructions */
> diff --git a/target-i386/helper.c b/target-i386/helper.c
> index 4ecc0912a48a..43e87ddba001 100644
> --- a/target-i386/helper.c
> +++ b/target-i386/helper.c
> @@ -651,11 +651,11 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
>      uint32_t hflags;
>  
>  #if defined(DEBUG_MMU)
> -    printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]);
> +    printf("CR4 update: %08x -> %08x\n", (uint32_t)env->cr[4], new_cr4);
>  #endif
>      if ((new_cr4 ^ env->cr[4]) &
>          (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK |
> -         CR4_SMEP_MASK | CR4_SMAP_MASK)) {
> +         CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_LA57_MASK)) {
>          tlb_flush(CPU(cpu), 1);
>      }
>  
> @@ -757,19 +757,41 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
>  
>  #ifdef TARGET_X86_64
>          if (env->hflags & HF_LMA_MASK) {
> +            bool la57 = env->cr[4] & CR4_LA57_MASK;
> +            uint64_t pml5e_addr, pml5e;
>              uint64_t pml4e_addr, pml4e;
>              int32_t sext;
>  
>              /* test virtual address sign extension */
> -            sext = (int64_t)addr >> 47;
> +            sext = la57 ? (int64_t)addr >> 56 : (int64_t)addr >> 47;
>              if (sext != 0 && sext != -1) {
>                  env->error_code = 0;
>                  cs->exception_index = EXCP0D_GPF;
>                  return 1;
>              }
>  
> -            pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
> -                env->a20_mask;
> +            if (la57) {
> +                pml5e_addr = ((env->cr[3] & ~0xfff) +
> +                        (((addr >> 48) & 0x1ff) << 3)) & env->a20_mask;
> +                pml5e = x86_ldq_phys(cs, pml5e_addr);
> +                if (!(pml5e & PG_PRESENT_MASK)) {
> +                    goto do_fault;
> +                }
> +                if (pml5e & (rsvd_mask | PG_PSE_MASK)) {
> +                    goto do_fault_rsvd;
> +                }
> +                if (!(pml5e & PG_ACCESSED_MASK)) {
> +                    pml5e |= PG_ACCESSED_MASK;
> +                    x86_stl_phys_notdirty(cs, pml5e_addr, pml5e);
> +                }
> +                ptep = pml5e ^ PG_NX_MASK;
> +            } else {
> +                pml5e = env->cr[3];
> +                ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
> +            }
> +
> +            pml4e_addr = ((pml5e & PG_ADDRESS_MASK) +
> +                    (((addr >> 39) & 0x1ff) << 3)) & env->a20_mask;
>              pml4e = x86_ldq_phys(cs, pml4e_addr);
>              if (!(pml4e & PG_PRESENT_MASK)) {
>                  goto do_fault;
> @@ -781,7 +803,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
>                  pml4e |= PG_ACCESSED_MASK;
>                  x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
>              }
> -            ptep = pml4e ^ PG_NX_MASK;
> +            ptep &= pml4e ^ PG_NX_MASK;
>              pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
>                  env->a20_mask;
>              pdpe = x86_ldq_phys(cs, pdpe_addr);
> @@ -1024,16 +1046,30 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
>  
>  #ifdef TARGET_X86_64
>          if (env->hflags & HF_LMA_MASK) {
> +            bool la57 = env->cr[4] & CR4_LA57_MASK;
> +            uint64_t pml5e_addr, pml5e;
>              uint64_t pml4e_addr, pml4e;
>              int32_t sext;
>  
>              /* test virtual address sign extension */
> -            sext = (int64_t)addr >> 47;
> +            sext = la57 ? (int64_t)addr >> 56 : (int64_t)addr >> 47;
>              if (sext != 0 && sext != -1) {
>                  return -1;
>              }
> -            pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
> -                env->a20_mask;
> +
> +            if (la57) {
> +                pml5e_addr = ((env->cr[3] & ~0xfff) +
> +                        (((addr >> 48) & 0x1ff) << 3)) & env->a20_mask;
> +                pml5e = x86_ldq_phys(cs, pml5e_addr);
> +                if (!(pml5e & PG_PRESENT_MASK)) {
> +                    return -1;
> +                }
> +            } else {
> +                pml5e = env->cr[3];
> +            }
> +
> +            pml4e_addr = ((pml5e & PG_ADDRESS_MASK) +
> +                    (((addr >> 39) & 0x1ff) << 3)) & env->a20_mask;
>              pml4e = x86_ldq_phys(cs, pml4e_addr);
>              if (!(pml4e & PG_PRESENT_MASK)) {
>                  return -1;
> diff --git a/target-i386/monitor.c b/target-i386/monitor.c
> index 9a3b4d746e8d..468aa073bcc9 100644
> --- a/target-i386/monitor.c
> +++ b/target-i386/monitor.c
> @@ -30,13 +30,18 @@
>  #include "hmp.h"
>  
>  
> -static void print_pte(Monitor *mon, hwaddr addr,
> -                      hwaddr pte,
> -                      hwaddr mask)
> +static void print_pte(Monitor *mon, CPUArchState *env, hwaddr addr,
> +                      hwaddr pte, hwaddr mask)
>  {
>  #ifdef TARGET_X86_64
> -    if (addr & (1ULL << 47)) {
> -        addr |= -1LL << 48;
> +    if (env->cr[4] & CR4_LA57_MASK) {
> +        if (addr & (1ULL << 56)) {
> +            addr |= -1LL << 57;
> +        }
> +    } else {
> +        if (addr & (1ULL << 47)) {
> +            addr |= -1LL << 48;
> +        }
>      }
>  #endif
>      monitor_printf(mon, TARGET_FMT_plx ": " TARGET_FMT_plx
> @@ -66,13 +71,13 @@ static void tlb_info_32(Monitor *mon, CPUArchState *env)
>          if (pde & PG_PRESENT_MASK) {
>              if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
>                  /* 4M pages */
> -                print_pte(mon, (l1 << 22), pde, ~((1 << 21) - 1));
> +                print_pte(mon, env, (l1 << 22), pde, ~((1 << 21) - 1));
>              } else {
>                  for(l2 = 0; l2 < 1024; l2++) {
>                      cpu_physical_memory_read((pde & ~0xfff) + l2 * 4, &pte, 4);
>                      pte = le32_to_cpu(pte);
>                      if (pte & PG_PRESENT_MASK) {
> -                        print_pte(mon, (l1 << 22) + (l2 << 12),
> +                        print_pte(mon, env, (l1 << 22) + (l2 << 12),
>                                    pte & ~PG_PSE_MASK,
>                                    ~0xfff);
>                      }
> @@ -100,7 +105,7 @@ static void tlb_info_pae32(Monitor *mon, CPUArchState *env)
>                  if (pde & PG_PRESENT_MASK) {
>                      if (pde & PG_PSE_MASK) {
>                          /* 2M pages with PAE, CR4.PSE is ignored */
> -                        print_pte(mon, (l1 << 30 ) + (l2 << 21), pde,
> +                        print_pte(mon, env, (l1 << 30) + (l2 << 21), pde,
>                                    ~((hwaddr)(1 << 20) - 1));
>                      } else {
>                          pt_addr = pde & 0x3fffffffff000ULL;
> @@ -108,7 +113,7 @@ static void tlb_info_pae32(Monitor *mon, CPUArchState *env)
>                              cpu_physical_memory_read(pt_addr + l3 * 8, &pte, 8);
>                              pte = le64_to_cpu(pte);
>                              if (pte & PG_PRESENT_MASK) {
> -                                print_pte(mon, (l1 << 30 ) + (l2 << 21)
> +                                print_pte(mon, env, (l1 << 30) + (l2 << 21)
>                                            + (l3 << 12),
>                                            pte & ~PG_PSE_MASK,
>                                            ~(hwaddr)0xfff);
> @@ -122,61 +127,82 @@ static void tlb_info_pae32(Monitor *mon, CPUArchState *env)
>  }
>  
>  #ifdef TARGET_X86_64
> -static void tlb_info_64(Monitor *mon, CPUArchState *env)
> +static void tlb_info_la48(Monitor *mon, CPUArchState *env,
> +        uint64_t l0, uint64_t pml4_addr)
>  {
>      uint64_t l1, l2, l3, l4;
>      uint64_t pml4e, pdpe, pde, pte;
> -    uint64_t pml4_addr, pdp_addr, pd_addr, pt_addr;
> +    uint64_t pdp_addr, pd_addr, pt_addr;
>  
> -    pml4_addr = env->cr[3] & 0x3fffffffff000ULL;
>      for (l1 = 0; l1 < 512; l1++) {
>          cpu_physical_memory_read(pml4_addr + l1 * 8, &pml4e, 8);
>          pml4e = le64_to_cpu(pml4e);
> -        if (pml4e & PG_PRESENT_MASK) {
> -            pdp_addr = pml4e & 0x3fffffffff000ULL;
> -            for (l2 = 0; l2 < 512; l2++) {
> -                cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8);
> -                pdpe = le64_to_cpu(pdpe);
> -                if (pdpe & PG_PRESENT_MASK) {
> -                    if (pdpe & PG_PSE_MASK) {
> -                        /* 1G pages, CR4.PSE is ignored */
> -                        print_pte(mon, (l1 << 39) + (l2 << 30), pdpe,
> -                                  0x3ffffc0000000ULL);
> -                    } else {
> -                        pd_addr = pdpe & 0x3fffffffff000ULL;
> -                        for (l3 = 0; l3 < 512; l3++) {
> -                            cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8);
> -                            pde = le64_to_cpu(pde);
> -                            if (pde & PG_PRESENT_MASK) {
> -                                if (pde & PG_PSE_MASK) {
> -                                    /* 2M pages, CR4.PSE is ignored */
> -                                    print_pte(mon, (l1 << 39) + (l2 << 30) +
> -                                              (l3 << 21), pde,
> -                                              0x3ffffffe00000ULL);
> -                                } else {
> -                                    pt_addr = pde & 0x3fffffffff000ULL;
> -                                    for (l4 = 0; l4 < 512; l4++) {
> -                                        cpu_physical_memory_read(pt_addr
> -                                                                 + l4 * 8,
> -                                                                 &pte, 8);
> -                                        pte = le64_to_cpu(pte);
> -                                        if (pte & PG_PRESENT_MASK) {
> -                                            print_pte(mon, (l1 << 39) +
> -                                                      (l2 << 30) +
> -                                                      (l3 << 21) + (l4 << 12),
> -                                                      pte & ~PG_PSE_MASK,
> -                                                      0x3fffffffff000ULL);
> -                                        }
> -                                    }
> -                                }
> -                            }
> -                        }
> +        if (!(pml4e & PG_PRESENT_MASK)) {
> +            continue;
> +        }
> +
> +        pdp_addr = pml4e & 0x3fffffffff000ULL;
> +        for (l2 = 0; l2 < 512; l2++) {
> +            cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8);
> +            pdpe = le64_to_cpu(pdpe);
> +            if (!(pdpe & PG_PRESENT_MASK)) {
> +                continue;
> +            }
> +
> +            if (pdpe & PG_PSE_MASK) {
> +                /* 1G pages, CR4.PSE is ignored */
> +                print_pte(mon, env, (l0 << 48) + (l1 << 39) + (l2 << 30),
> +                        pdpe, 0x3ffffc0000000ULL);
> +                continue;
> +            }
> +
> +            pd_addr = pdpe & 0x3fffffffff000ULL;
> +            for (l3 = 0; l3 < 512; l3++) {
> +                cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8);
> +                pde = le64_to_cpu(pde);
> +                if (!(pde & PG_PRESENT_MASK)) {
> +                    continue;
> +                }
> +
> +                if (pde & PG_PSE_MASK) {
> +                    /* 2M pages, CR4.PSE is ignored */
> +                    print_pte(mon, env, (l0 << 48) + (l1 << 39) + (l2 << 30) +
> +                            (l3 << 21), pde, 0x3ffffffe00000ULL);
> +                    continue;
> +                }
> +
> +                pt_addr = pde & 0x3fffffffff000ULL;
> +                for (l4 = 0; l4 < 512; l4++) {
> +                    cpu_physical_memory_read(pt_addr
> +                            + l4 * 8,
> +                            &pte, 8);
> +                    pte = le64_to_cpu(pte);
> +                    if (pte & PG_PRESENT_MASK) {
> +                        print_pte(mon, env, (l0 << 48) + (l1 << 39) +
> +                                (l2 << 30) + (l3 << 21) + (l4 << 12),
> +                                pte & ~PG_PSE_MASK, 0x3fffffffff000ULL);
>                      }
>                  }
>              }
>          }
>      }
>  }
> +
> +static void tlb_info_la57(Monitor *mon, CPUArchState *env)
> +{
> +    uint64_t l0;
> +    uint64_t pml5e;
> +    uint64_t pml5_addr;
> +
> +    pml5_addr = env->cr[3] & 0x3fffffffff000ULL;
> +    for (l0 = 0; l0 < 512; l0++) {
> +        cpu_physical_memory_read(pml5_addr + l0 * 8, &pml5e, 8);
> +        pml5e = le64_to_cpu(pml5e);
> +        if (pml5e & PG_PRESENT_MASK) {
> +            tlb_info_la48(mon, env, l0, pml5e & 0x3fffffffff000ULL);
> +        }
> +    }
> +}
>  #endif /* TARGET_X86_64 */
>  
>  void hmp_info_tlb(Monitor *mon, const QDict *qdict)
> @@ -192,7 +218,11 @@ void hmp_info_tlb(Monitor *mon, const QDict *qdict)
>      if (env->cr[4] & CR4_PAE_MASK) {
>  #ifdef TARGET_X86_64
>          if (env->hflags & HF_LMA_MASK) {
> -            tlb_info_64(mon, env);
> +            if (env->cr[4] & CR4_LA57_MASK) {
> +                tlb_info_la57(mon, env);
> +            } else {
> +                tlb_info_la48(mon, env, 0, env->cr[3] & 0x3fffffffff000ULL);
> +            }
>          } else
>  #endif
>          {
> @@ -324,7 +354,7 @@ static void mem_info_pae32(Monitor *mon, CPUArchState *env)
>  
>  
>  #ifdef TARGET_X86_64
> -static void mem_info_64(Monitor *mon, CPUArchState *env)
> +static void mem_info_la48(Monitor *mon, CPUArchState *env)
>  {
>      int prot, last_prot;
>      uint64_t l1, l2, l3, l4;
> @@ -400,6 +430,98 @@ static void mem_info_64(Monitor *mon, CPUArchState *env)
>      /* Flush last range */
>      mem_print(mon, &start, &last_prot, (hwaddr)1 << 48, 0);
>  }
> +
> +static void mem_info_la57(Monitor *mon, CPUArchState *env)
> +{
> +    int prot, last_prot;
> +    uint64_t l0, l1, l2, l3, l4;
> +    uint64_t pml5e, pml4e, pdpe, pde, pte;
> +    uint64_t pml5_addr, pml4_addr, pdp_addr, pd_addr, pt_addr, start, end;
> +
> +    pml5_addr = env->cr[3] & 0x3fffffffff000ULL;
> +    last_prot = 0;
> +    start = -1;
> +    for (l0 = 0; l0 < 512; l0++) {
> +        cpu_physical_memory_read(pml5_addr + l0 * 8, &pml5e, 8);
> +        pml4e = le64_to_cpu(pml5e);
> +        end = l0 << 48;
> +        if (!(pml5e & PG_PRESENT_MASK)) {
> +            prot = 0;
> +            mem_print(mon, &start, &last_prot, end, prot);
> +            continue;
> +        }
> +
> +        pml4_addr = pml5e & 0x3fffffffff000ULL;
> +        for (l1 = 0; l1 < 512; l1++) {
> +            cpu_physical_memory_read(pml4_addr + l1 * 8, &pml4e, 8);
> +            pml4e = le64_to_cpu(pml4e);
> +            end = (l0 << 48) + (l1 << 39);
> +            if (!(pml4e & PG_PRESENT_MASK)) {
> +                prot = 0;
> +                mem_print(mon, &start, &last_prot, end, prot);
> +                continue;
> +            }
> +
> +            pdp_addr = pml4e & 0x3fffffffff000ULL;
> +            for (l2 = 0; l2 < 512; l2++) {
> +                cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8);
> +                pdpe = le64_to_cpu(pdpe);
> +                end = (l0 << 48) + (l1 << 39) + (l2 << 30);
> +                if (pdpe & PG_PRESENT_MASK) {
> +                    prot = 0;
> +                    mem_print(mon, &start, &last_prot, end, prot);
> +                    continue;
> +                }
> +
> +                if (pdpe & PG_PSE_MASK) {
> +                    prot = pdpe & (PG_USER_MASK | PG_RW_MASK |
> +                            PG_PRESENT_MASK);
> +                    prot &= pml4e;
> +                    mem_print(mon, &start, &last_prot, end, prot);
> +                    continue;
> +                }
> +
> +                pd_addr = pdpe & 0x3fffffffff000ULL;
> +                for (l3 = 0; l3 < 512; l3++) {
> +                    cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8);
> +                    pde = le64_to_cpu(pde);
> +                    end = (l0 << 48) + (l1 << 39) + (l2 << 30) + (l3 << 21);
> +                    if (pde & PG_PRESENT_MASK) {
> +                        prot = 0;
> +                        mem_print(mon, &start, &last_prot, end, prot);
> +                        continue;
> +                    }
> +
> +                    if (pde & PG_PSE_MASK) {
> +                        prot = pde & (PG_USER_MASK | PG_RW_MASK |
> +                                PG_PRESENT_MASK);
> +                        prot &= pml4e & pdpe;
> +                        mem_print(mon, &start, &last_prot, end, prot);
> +                        continue;
> +                    }
> +
> +                    pt_addr = pde & 0x3fffffffff000ULL;
> +                    for (l4 = 0; l4 < 512; l4++) {
> +                        cpu_physical_memory_read(pt_addr + l4 * 8, &pte, 8);
> +                        pte = le64_to_cpu(pte);
> +                        end = (l0 << 48) + (l1 << 39) + (l2 << 30) +
> +                            (l3 << 21) + (l4 << 12);
> +                        if (pte & PG_PRESENT_MASK) {
> +                            prot = pte & (PG_USER_MASK | PG_RW_MASK |
> +                                    PG_PRESENT_MASK);
> +                            prot &= pml4e & pdpe & pde;
> +                        } else {
> +                            prot = 0;
> +                        }
> +                        mem_print(mon, &start, &last_prot, end, prot);
> +                    }
> +                }
> +            }
> +        }
> +    }
> +    /* Flush last range */
> +    mem_print(mon, &start, &last_prot, (hwaddr)1 << 57, 0);
> +}
>  #endif /* TARGET_X86_64 */
>  
>  void hmp_info_mem(Monitor *mon, const QDict *qdict)
> @@ -415,7 +537,11 @@ void hmp_info_mem(Monitor *mon, const QDict *qdict)
>      if (env->cr[4] & CR4_PAE_MASK) {
>  #ifdef TARGET_X86_64
>          if (env->hflags & HF_LMA_MASK) {
> -            mem_info_64(mon, env);
> +            if (env->cr[4] & CR4_LA57_MASK) {
> +                mem_info_la57(mon, env);
> +            } else {
> +                mem_info_la48(mon, env);
> +            }
>          } else
>  #endif
>          {
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 324103c88521..d2aec5c9bf06 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -137,6 +137,7 @@ typedef struct DisasContext {
>      int cpuid_ext2_features;
>      int cpuid_ext3_features;
>      int cpuid_7_0_ebx_features;
> +    int cpuid_7_0_ecx_features;
>      int cpuid_xsave_features;
>  } DisasContext;
>  
> @@ -8350,6 +8351,7 @@ void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
>      dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
>      dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
>      dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
> +    dc->cpuid_7_0_ecx_features = env->features[FEAT_7_0_ECX];
>      dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
>  #ifdef TARGET_X86_64
>      dc->lma = (flags >> HF_LMA_SHIFT) & 1;
> -- 
> 2.10.2
>
Kirill A. Shutemov Dec. 22, 2016, 12:37 p.m. UTC | #4
On Fri, Dec 16, 2016 at 01:59:36PM +0100, Paolo Bonzini wrote:
> 
> 
> On 15/12/2016 01:13, Kirill A. Shutemov wrote:
> > The new paging more is extension of IA32e mode with more additional page
> > table level.
> > 
> > It brings support of 57-bit vitrual address space (128PB) and 52-bit
> > physical address space (4PB).
> > 
> > The structure of new page table level is identical to pml4.
> > 
> > The feature is enumerated with CPUID.(EAX=07H, ECX=0):ECX[bit 16].
> > 
> > CR4.LA57[bit 12] need to be set when pageing enables to activate 5-level
> > paging mode.
> > 
> > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> 
> Looks good, thanks!  The target-i386/translate.c bits are not necessary,
> but I guess they can also be removed on commit.

Is there anything else I need to do to make it applied?
Paolo Bonzini Dec. 22, 2016, 12:38 p.m. UTC | #5
On 22/12/2016 13:37, Kirill A. Shutemov wrote:
> On Fri, Dec 16, 2016 at 01:59:36PM +0100, Paolo Bonzini wrote:
>>
>>
>> On 15/12/2016 01:13, Kirill A. Shutemov wrote:
>>> The new paging more is extension of IA32e mode with more additional page
>>> table level.
>>>
>>> It brings support of 57-bit vitrual address space (128PB) and 52-bit
>>> physical address space (4PB).
>>>
>>> The structure of new page table level is identical to pml4.
>>>
>>> The feature is enumerated with CPUID.(EAX=07H, ECX=0):ECX[bit 16].
>>>
>>> CR4.LA57[bit 12] need to be set when pageing enables to activate 5-level
>>> paging mode.
>>>
>>> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
>>
>> Looks good, thanks!  The target-i386/translate.c bits are not necessary,
>> but I guess they can also be removed on commit.
> 
> Is there anything else I need to do to make it applied?

No, but 2.8 was only released a couple days ago so we were in freeze.

Paolo
diff mbox

Patch

diff --git a/target-i386/arch_memory_mapping.c b/target-i386/arch_memory_mapping.c
index 88f341e1bbd0..826aee597b13 100644
--- a/target-i386/arch_memory_mapping.c
+++ b/target-i386/arch_memory_mapping.c
@@ -220,7 +220,8 @@  static void walk_pdpe(MemoryMappingList *list, AddressSpace *as,
 
 /* IA-32e Paging */
 static void walk_pml4e(MemoryMappingList *list, AddressSpace *as,
-                       hwaddr pml4e_start_addr, int32_t a20_mask)
+                       hwaddr pml4e_start_addr, int32_t a20_mask,
+                       target_ulong start_line_addr)
 {
     hwaddr pml4e_addr, pdpe_start_addr;
     uint64_t pml4e;
@@ -236,11 +237,34 @@  static void walk_pml4e(MemoryMappingList *list, AddressSpace *as,
             continue;
         }
 
-        line_addr = ((i & 0x1ffULL) << 39) | (0xffffULL << 48);
+        line_addr = start_line_addr | ((i & 0x1ffULL) << 39);
         pdpe_start_addr = (pml4e & PLM4_ADDR_MASK) & a20_mask;
         walk_pdpe(list, as, pdpe_start_addr, a20_mask, line_addr);
     }
 }
+
+static void walk_pml5e(MemoryMappingList *list, AddressSpace *as,
+                       hwaddr pml5e_start_addr, int32_t a20_mask)
+{
+    hwaddr pml5e_addr, pml4e_start_addr;
+    uint64_t pml5e;
+    target_ulong line_addr;
+    int i;
+
+    for (i = 0; i < 512; i++) {
+        pml5e_addr = (pml5e_start_addr + i * 8) & a20_mask;
+        pml5e = address_space_ldq(as, pml5e_addr, MEMTXATTRS_UNSPECIFIED,
+                                  NULL);
+        if (!(pml5e & PG_PRESENT_MASK)) {
+            /* not present */
+            continue;
+        }
+
+        line_addr = (0x7fULL << 57) | ((i & 0x1ffULL) << 48);
+        pml4e_start_addr = (pml5e & PLM4_ADDR_MASK) & a20_mask;
+        walk_pml4e(list, as, pml4e_start_addr, a20_mask, line_addr);
+    }
+}
 #endif
 
 void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list,
@@ -257,10 +281,18 @@  void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list,
     if (env->cr[4] & CR4_PAE_MASK) {
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
-            hwaddr pml4e_addr;
+            if (env->cr[4] & CR4_LA57_MASK) {
+                hwaddr pml5e_addr;
+
+                pml5e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask;
+                walk_pml5e(list, cs->as, pml5e_addr, env->a20_mask);
+            } else {
+                hwaddr pml4e_addr;
 
-            pml4e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask;
-            walk_pml4e(list, cs->as, pml4e_addr, env->a20_mask);
+                pml4e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask;
+                walk_pml4e(list, cs->as, pml4e_addr, env->a20_mask,
+                        0xffffULL << 48);
+            }
         } else
 #endif
         {
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index de1f30eeda63..a4b9832b5916 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -238,7 +238,8 @@  static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
           CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2,
           CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
           CPUID_7_0_EBX_RDSEED */
-#define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE)
+#define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE | \
+          CPUID_7_0_ECX_LA57)
 #define TCG_7_0_EDX_FEATURES 0
 #define TCG_APM_FEATURES 0
 #define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT
@@ -435,7 +436,7 @@  static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
             "ospke", NULL, NULL, NULL,
             NULL, NULL, NULL, NULL,
             NULL, NULL, NULL, NULL,
-            NULL, NULL, NULL, NULL,
+            "la57", NULL, NULL, NULL,
             NULL, NULL, "rdpid", NULL,
             NULL, NULL, NULL, NULL,
             NULL, NULL, NULL, NULL,
@@ -2742,10 +2743,13 @@  void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
     case 0x80000008:
         /* virtual & phys address size in low 2 bytes. */
         if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
-            /* 64 bit processor, 48 bits virtual, configurable
-             * physical bits.
-             */
-            *eax = 0x00003000 + cpu->phys_bits;
+            /* 64 bit processor */
+            *eax = cpu->phys_bits; /* configurable physical bits */
+            if  (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_LA57) {
+                *eax |= 0x00003900; /* 57 bits virtual */
+            } else {
+                *eax |= 0x00003000; /* 48 bits virtual */
+            }
         } else {
             *eax = cpu->phys_bits;
         }
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index c60572402272..0ba880fc2632 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -224,6 +224,7 @@ 
 #define CR4_OSFXSR_SHIFT 9
 #define CR4_OSFXSR_MASK (1U << CR4_OSFXSR_SHIFT)
 #define CR4_OSXMMEXCPT_MASK  (1U << 10)
+#define CR4_LA57_MASK   (1U << 12)
 #define CR4_VMXE_MASK   (1U << 13)
 #define CR4_SMXE_MASK   (1U << 14)
 #define CR4_FSGSBASE_MASK (1U << 16)
@@ -628,6 +629,7 @@  typedef uint32_t FeatureWordArray[FEATURE_WORDS];
 #define CPUID_7_0_ECX_UMIP     (1U << 2)
 #define CPUID_7_0_ECX_PKU      (1U << 3)
 #define CPUID_7_0_ECX_OSPKE    (1U << 4)
+#define CPUID_7_0_ECX_LA57     (1U << 16)
 #define CPUID_7_0_ECX_RDPID    (1U << 22)
 
 #define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network Instructions */
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 4ecc0912a48a..43e87ddba001 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -651,11 +651,11 @@  void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
     uint32_t hflags;
 
 #if defined(DEBUG_MMU)
-    printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]);
+    printf("CR4 update: %08x -> %08x\n", (uint32_t)env->cr[4], new_cr4);
 #endif
     if ((new_cr4 ^ env->cr[4]) &
         (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK |
-         CR4_SMEP_MASK | CR4_SMAP_MASK)) {
+         CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_LA57_MASK)) {
         tlb_flush(CPU(cpu), 1);
     }
 
@@ -757,19 +757,41 @@  int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
 
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
+            bool la57 = env->cr[4] & CR4_LA57_MASK;
+            uint64_t pml5e_addr, pml5e;
             uint64_t pml4e_addr, pml4e;
             int32_t sext;
 
             /* test virtual address sign extension */
-            sext = (int64_t)addr >> 47;
+            sext = la57 ? (int64_t)addr >> 56 : (int64_t)addr >> 47;
             if (sext != 0 && sext != -1) {
                 env->error_code = 0;
                 cs->exception_index = EXCP0D_GPF;
                 return 1;
             }
 
-            pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
-                env->a20_mask;
+            if (la57) {
+                pml5e_addr = ((env->cr[3] & ~0xfff) +
+                        (((addr >> 48) & 0x1ff) << 3)) & env->a20_mask;
+                pml5e = x86_ldq_phys(cs, pml5e_addr);
+                if (!(pml5e & PG_PRESENT_MASK)) {
+                    goto do_fault;
+                }
+                if (pml5e & (rsvd_mask | PG_PSE_MASK)) {
+                    goto do_fault_rsvd;
+                }
+                if (!(pml5e & PG_ACCESSED_MASK)) {
+                    pml5e |= PG_ACCESSED_MASK;
+                    x86_stl_phys_notdirty(cs, pml5e_addr, pml5e);
+                }
+                ptep = pml5e ^ PG_NX_MASK;
+            } else {
+                pml5e = env->cr[3];
+                ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
+            }
+
+            pml4e_addr = ((pml5e & PG_ADDRESS_MASK) +
+                    (((addr >> 39) & 0x1ff) << 3)) & env->a20_mask;
             pml4e = x86_ldq_phys(cs, pml4e_addr);
             if (!(pml4e & PG_PRESENT_MASK)) {
                 goto do_fault;
@@ -781,7 +803,7 @@  int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
                 pml4e |= PG_ACCESSED_MASK;
                 x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
             }
-            ptep = pml4e ^ PG_NX_MASK;
+            ptep &= pml4e ^ PG_NX_MASK;
             pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
                 env->a20_mask;
             pdpe = x86_ldq_phys(cs, pdpe_addr);
@@ -1024,16 +1046,30 @@  hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
 
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
+            bool la57 = env->cr[4] & CR4_LA57_MASK;
+            uint64_t pml5e_addr, pml5e;
             uint64_t pml4e_addr, pml4e;
             int32_t sext;
 
             /* test virtual address sign extension */
-            sext = (int64_t)addr >> 47;
+            sext = la57 ? (int64_t)addr >> 56 : (int64_t)addr >> 47;
             if (sext != 0 && sext != -1) {
                 return -1;
             }
-            pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
-                env->a20_mask;
+
+            if (la57) {
+                pml5e_addr = ((env->cr[3] & ~0xfff) +
+                        (((addr >> 48) & 0x1ff) << 3)) & env->a20_mask;
+                pml5e = x86_ldq_phys(cs, pml5e_addr);
+                if (!(pml5e & PG_PRESENT_MASK)) {
+                    return -1;
+                }
+            } else {
+                pml5e = env->cr[3];
+            }
+
+            pml4e_addr = ((pml5e & PG_ADDRESS_MASK) +
+                    (((addr >> 39) & 0x1ff) << 3)) & env->a20_mask;
             pml4e = x86_ldq_phys(cs, pml4e_addr);
             if (!(pml4e & PG_PRESENT_MASK)) {
                 return -1;
diff --git a/target-i386/monitor.c b/target-i386/monitor.c
index 9a3b4d746e8d..468aa073bcc9 100644
--- a/target-i386/monitor.c
+++ b/target-i386/monitor.c
@@ -30,13 +30,18 @@ 
 #include "hmp.h"
 
 
-static void print_pte(Monitor *mon, hwaddr addr,
-                      hwaddr pte,
-                      hwaddr mask)
+static void print_pte(Monitor *mon, CPUArchState *env, hwaddr addr,
+                      hwaddr pte, hwaddr mask)
 {
 #ifdef TARGET_X86_64
-    if (addr & (1ULL << 47)) {
-        addr |= -1LL << 48;
+    if (env->cr[4] & CR4_LA57_MASK) {
+        if (addr & (1ULL << 56)) {
+            addr |= -1LL << 57;
+        }
+    } else {
+        if (addr & (1ULL << 47)) {
+            addr |= -1LL << 48;
+        }
     }
 #endif
     monitor_printf(mon, TARGET_FMT_plx ": " TARGET_FMT_plx
@@ -66,13 +71,13 @@  static void tlb_info_32(Monitor *mon, CPUArchState *env)
         if (pde & PG_PRESENT_MASK) {
             if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
                 /* 4M pages */
-                print_pte(mon, (l1 << 22), pde, ~((1 << 21) - 1));
+                print_pte(mon, env, (l1 << 22), pde, ~((1 << 21) - 1));
             } else {
                 for(l2 = 0; l2 < 1024; l2++) {
                     cpu_physical_memory_read((pde & ~0xfff) + l2 * 4, &pte, 4);
                     pte = le32_to_cpu(pte);
                     if (pte & PG_PRESENT_MASK) {
-                        print_pte(mon, (l1 << 22) + (l2 << 12),
+                        print_pte(mon, env, (l1 << 22) + (l2 << 12),
                                   pte & ~PG_PSE_MASK,
                                   ~0xfff);
                     }
@@ -100,7 +105,7 @@  static void tlb_info_pae32(Monitor *mon, CPUArchState *env)
                 if (pde & PG_PRESENT_MASK) {
                     if (pde & PG_PSE_MASK) {
                         /* 2M pages with PAE, CR4.PSE is ignored */
-                        print_pte(mon, (l1 << 30 ) + (l2 << 21), pde,
+                        print_pte(mon, env, (l1 << 30) + (l2 << 21), pde,
                                   ~((hwaddr)(1 << 20) - 1));
                     } else {
                         pt_addr = pde & 0x3fffffffff000ULL;
@@ -108,7 +113,7 @@  static void tlb_info_pae32(Monitor *mon, CPUArchState *env)
                             cpu_physical_memory_read(pt_addr + l3 * 8, &pte, 8);
                             pte = le64_to_cpu(pte);
                             if (pte & PG_PRESENT_MASK) {
-                                print_pte(mon, (l1 << 30 ) + (l2 << 21)
+                                print_pte(mon, env, (l1 << 30) + (l2 << 21)
                                           + (l3 << 12),
                                           pte & ~PG_PSE_MASK,
                                           ~(hwaddr)0xfff);
@@ -122,61 +127,82 @@  static void tlb_info_pae32(Monitor *mon, CPUArchState *env)
 }
 
 #ifdef TARGET_X86_64
-static void tlb_info_64(Monitor *mon, CPUArchState *env)
+static void tlb_info_la48(Monitor *mon, CPUArchState *env,
+        uint64_t l0, uint64_t pml4_addr)
 {
     uint64_t l1, l2, l3, l4;
     uint64_t pml4e, pdpe, pde, pte;
-    uint64_t pml4_addr, pdp_addr, pd_addr, pt_addr;
+    uint64_t pdp_addr, pd_addr, pt_addr;
 
-    pml4_addr = env->cr[3] & 0x3fffffffff000ULL;
     for (l1 = 0; l1 < 512; l1++) {
         cpu_physical_memory_read(pml4_addr + l1 * 8, &pml4e, 8);
         pml4e = le64_to_cpu(pml4e);
-        if (pml4e & PG_PRESENT_MASK) {
-            pdp_addr = pml4e & 0x3fffffffff000ULL;
-            for (l2 = 0; l2 < 512; l2++) {
-                cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8);
-                pdpe = le64_to_cpu(pdpe);
-                if (pdpe & PG_PRESENT_MASK) {
-                    if (pdpe & PG_PSE_MASK) {
-                        /* 1G pages, CR4.PSE is ignored */
-                        print_pte(mon, (l1 << 39) + (l2 << 30), pdpe,
-                                  0x3ffffc0000000ULL);
-                    } else {
-                        pd_addr = pdpe & 0x3fffffffff000ULL;
-                        for (l3 = 0; l3 < 512; l3++) {
-                            cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8);
-                            pde = le64_to_cpu(pde);
-                            if (pde & PG_PRESENT_MASK) {
-                                if (pde & PG_PSE_MASK) {
-                                    /* 2M pages, CR4.PSE is ignored */
-                                    print_pte(mon, (l1 << 39) + (l2 << 30) +
-                                              (l3 << 21), pde,
-                                              0x3ffffffe00000ULL);
-                                } else {
-                                    pt_addr = pde & 0x3fffffffff000ULL;
-                                    for (l4 = 0; l4 < 512; l4++) {
-                                        cpu_physical_memory_read(pt_addr
-                                                                 + l4 * 8,
-                                                                 &pte, 8);
-                                        pte = le64_to_cpu(pte);
-                                        if (pte & PG_PRESENT_MASK) {
-                                            print_pte(mon, (l1 << 39) +
-                                                      (l2 << 30) +
-                                                      (l3 << 21) + (l4 << 12),
-                                                      pte & ~PG_PSE_MASK,
-                                                      0x3fffffffff000ULL);
-                                        }
-                                    }
-                                }
-                            }
-                        }
+        if (!(pml4e & PG_PRESENT_MASK)) {
+            continue;
+        }
+
+        pdp_addr = pml4e & 0x3fffffffff000ULL;
+        for (l2 = 0; l2 < 512; l2++) {
+            cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8);
+            pdpe = le64_to_cpu(pdpe);
+            if (!(pdpe & PG_PRESENT_MASK)) {
+                continue;
+            }
+
+            if (pdpe & PG_PSE_MASK) {
+                /* 1G pages, CR4.PSE is ignored */
+                print_pte(mon, env, (l0 << 48) + (l1 << 39) + (l2 << 30),
+                        pdpe, 0x3ffffc0000000ULL);
+                continue;
+            }
+
+            pd_addr = pdpe & 0x3fffffffff000ULL;
+            for (l3 = 0; l3 < 512; l3++) {
+                cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8);
+                pde = le64_to_cpu(pde);
+                if (!(pde & PG_PRESENT_MASK)) {
+                    continue;
+                }
+
+                if (pde & PG_PSE_MASK) {
+                    /* 2M pages, CR4.PSE is ignored */
+                    print_pte(mon, env, (l0 << 48) + (l1 << 39) + (l2 << 30) +
+                            (l3 << 21), pde, 0x3ffffffe00000ULL);
+                    continue;
+                }
+
+                pt_addr = pde & 0x3fffffffff000ULL;
+                for (l4 = 0; l4 < 512; l4++) {
+                    cpu_physical_memory_read(pt_addr
+                            + l4 * 8,
+                            &pte, 8);
+                    pte = le64_to_cpu(pte);
+                    if (pte & PG_PRESENT_MASK) {
+                        print_pte(mon, env, (l0 << 48) + (l1 << 39) +
+                                (l2 << 30) + (l3 << 21) + (l4 << 12),
+                                pte & ~PG_PSE_MASK, 0x3fffffffff000ULL);
                     }
                 }
             }
         }
     }
 }
+
+static void tlb_info_la57(Monitor *mon, CPUArchState *env)
+{
+    uint64_t l0;
+    uint64_t pml5e;
+    uint64_t pml5_addr;
+
+    pml5_addr = env->cr[3] & 0x3fffffffff000ULL;
+    for (l0 = 0; l0 < 512; l0++) {
+        cpu_physical_memory_read(pml5_addr + l0 * 8, &pml5e, 8);
+        pml5e = le64_to_cpu(pml5e);
+        if (pml5e & PG_PRESENT_MASK) {
+            tlb_info_la48(mon, env, l0, pml5e & 0x3fffffffff000ULL);
+        }
+    }
+}
 #endif /* TARGET_X86_64 */
 
 void hmp_info_tlb(Monitor *mon, const QDict *qdict)
@@ -192,7 +218,11 @@  void hmp_info_tlb(Monitor *mon, const QDict *qdict)
     if (env->cr[4] & CR4_PAE_MASK) {
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
-            tlb_info_64(mon, env);
+            if (env->cr[4] & CR4_LA57_MASK) {
+                tlb_info_la57(mon, env);
+            } else {
+                tlb_info_la48(mon, env, 0, env->cr[3] & 0x3fffffffff000ULL);
+            }
         } else
 #endif
         {
@@ -324,7 +354,7 @@  static void mem_info_pae32(Monitor *mon, CPUArchState *env)
 
 
 #ifdef TARGET_X86_64
-static void mem_info_64(Monitor *mon, CPUArchState *env)
+static void mem_info_la48(Monitor *mon, CPUArchState *env)
 {
     int prot, last_prot;
     uint64_t l1, l2, l3, l4;
@@ -400,6 +430,98 @@  static void mem_info_64(Monitor *mon, CPUArchState *env)
     /* Flush last range */
     mem_print(mon, &start, &last_prot, (hwaddr)1 << 48, 0);
 }
+
+static void mem_info_la57(Monitor *mon, CPUArchState *env)
+{
+    int prot, last_prot;
+    uint64_t l0, l1, l2, l3, l4;
+    uint64_t pml5e, pml4e, pdpe, pde, pte;
+    uint64_t pml5_addr, pml4_addr, pdp_addr, pd_addr, pt_addr, start, end;
+
+    pml5_addr = env->cr[3] & 0x3fffffffff000ULL;
+    last_prot = 0;
+    start = -1;
+    for (l0 = 0; l0 < 512; l0++) {
+        cpu_physical_memory_read(pml5_addr + l0 * 8, &pml5e, 8);
+        pml4e = le64_to_cpu(pml5e);
+        end = l0 << 48;
+        if (!(pml5e & PG_PRESENT_MASK)) {
+            prot = 0;
+            mem_print(mon, &start, &last_prot, end, prot);
+            continue;
+        }
+
+        pml4_addr = pml5e & 0x3fffffffff000ULL;
+        for (l1 = 0; l1 < 512; l1++) {
+            cpu_physical_memory_read(pml4_addr + l1 * 8, &pml4e, 8);
+            pml4e = le64_to_cpu(pml4e);
+            end = (l0 << 48) + (l1 << 39);
+            if (!(pml4e & PG_PRESENT_MASK)) {
+                prot = 0;
+                mem_print(mon, &start, &last_prot, end, prot);
+                continue;
+            }
+
+            pdp_addr = pml4e & 0x3fffffffff000ULL;
+            for (l2 = 0; l2 < 512; l2++) {
+                cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8);
+                pdpe = le64_to_cpu(pdpe);
+                end = (l0 << 48) + (l1 << 39) + (l2 << 30);
+                if (pdpe & PG_PRESENT_MASK) {
+                    prot = 0;
+                    mem_print(mon, &start, &last_prot, end, prot);
+                    continue;
+                }
+
+                if (pdpe & PG_PSE_MASK) {
+                    prot = pdpe & (PG_USER_MASK | PG_RW_MASK |
+                            PG_PRESENT_MASK);
+                    prot &= pml4e;
+                    mem_print(mon, &start, &last_prot, end, prot);
+                    continue;
+                }
+
+                pd_addr = pdpe & 0x3fffffffff000ULL;
+                for (l3 = 0; l3 < 512; l3++) {
+                    cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8);
+                    pde = le64_to_cpu(pde);
+                    end = (l0 << 48) + (l1 << 39) + (l2 << 30) + (l3 << 21);
+                    if (pde & PG_PRESENT_MASK) {
+                        prot = 0;
+                        mem_print(mon, &start, &last_prot, end, prot);
+                        continue;
+                    }
+
+                    if (pde & PG_PSE_MASK) {
+                        prot = pde & (PG_USER_MASK | PG_RW_MASK |
+                                PG_PRESENT_MASK);
+                        prot &= pml4e & pdpe;
+                        mem_print(mon, &start, &last_prot, end, prot);
+                        continue;
+                    }
+
+                    pt_addr = pde & 0x3fffffffff000ULL;
+                    for (l4 = 0; l4 < 512; l4++) {
+                        cpu_physical_memory_read(pt_addr + l4 * 8, &pte, 8);
+                        pte = le64_to_cpu(pte);
+                        end = (l0 << 48) + (l1 << 39) + (l2 << 30) +
+                            (l3 << 21) + (l4 << 12);
+                        if (pte & PG_PRESENT_MASK) {
+                            prot = pte & (PG_USER_MASK | PG_RW_MASK |
+                                    PG_PRESENT_MASK);
+                            prot &= pml4e & pdpe & pde;
+                        } else {
+                            prot = 0;
+                        }
+                        mem_print(mon, &start, &last_prot, end, prot);
+                    }
+                }
+            }
+        }
+    }
+    /* Flush last range */
+    mem_print(mon, &start, &last_prot, (hwaddr)1 << 57, 0);
+}
 #endif /* TARGET_X86_64 */
 
 void hmp_info_mem(Monitor *mon, const QDict *qdict)
@@ -415,7 +537,11 @@  void hmp_info_mem(Monitor *mon, const QDict *qdict)
     if (env->cr[4] & CR4_PAE_MASK) {
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
-            mem_info_64(mon, env);
+            if (env->cr[4] & CR4_LA57_MASK) {
+                mem_info_la57(mon, env);
+            } else {
+                mem_info_la48(mon, env);
+            }
         } else
 #endif
         {
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 324103c88521..d2aec5c9bf06 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -137,6 +137,7 @@  typedef struct DisasContext {
     int cpuid_ext2_features;
     int cpuid_ext3_features;
     int cpuid_7_0_ebx_features;
+    int cpuid_7_0_ecx_features;
     int cpuid_xsave_features;
 } DisasContext;
 
@@ -8350,6 +8351,7 @@  void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
     dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
     dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
     dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
+    dc->cpuid_7_0_ecx_features = env->features[FEAT_7_0_ECX];
     dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
 #ifdef TARGET_X86_64
     dc->lma = (flags >> HF_LMA_SHIFT) & 1;