diff mbox

[-V6,2/3] target-ppc: Fix page table lookup with kvm enabled

Message ID 1381827527-24475-2-git-send-email-aneesh.kumar@linux.vnet.ibm.com
State New
Headers show

Commit Message

Aneesh Kumar K.V Oct. 15, 2013, 8:58 a.m. UTC
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

With kvm enabled, we store the hash page table information in the hypervisor.
Use ioctl to read the htab contents. Without this we get the below error when
trying to read the guest address

 (gdb) x/10 do_fork
 0xc000000000098660 <do_fork>:   Cannot access memory at address 0xc000000000098660
 (gdb)

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
Changes from V5:

* Added two new patches
* Address review comments

 hw/ppc/spapr_hcall.c    | 47 ++++++++++++++++++++----------
 target-ppc/kvm.c        | 53 ++++++++++++++++++++++++++++++++++
 target-ppc/kvm_ppc.h    | 19 ++++++++++++
 target-ppc/mmu-hash64.c | 77 ++++++++++++++++++++++++++++++++++++++++---------
 target-ppc/mmu-hash64.h | 23 ++++++++++-----
 5 files changed, 181 insertions(+), 38 deletions(-)

Comments

Alexander Graf Oct. 27, 2013, 6:15 p.m. UTC | #1
On 15.10.2013, at 01:58, Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> wrote:

> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> 
> With kvm enabled, we store the hash page table information in the hypervisor.
> Use ioctl to read the htab contents. Without this we get the below error when
> trying to read the guest address
> 
> (gdb) x/10 do_fork
> 0xc000000000098660 <do_fork>:   Cannot access memory at address 0xc000000000098660
> (gdb)
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
> Changes from V5:
> 
> * Added two new patches
> * Address review comments
> 
> hw/ppc/spapr_hcall.c    | 47 ++++++++++++++++++++----------
> target-ppc/kvm.c        | 53 ++++++++++++++++++++++++++++++++++
> target-ppc/kvm_ppc.h    | 19 ++++++++++++
> target-ppc/mmu-hash64.c | 77 ++++++++++++++++++++++++++++++++++++++++---------
> target-ppc/mmu-hash64.h | 23 ++++++++++-----
> 5 files changed, 181 insertions(+), 38 deletions(-)
> 
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index f10ba8a..e04bf6c 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -52,6 +52,8 @@ static target_ulong h_enter(PowerPCCPU *cpu, sPAPREnvironment *spapr,
>     target_ulong raddr;
>     target_ulong i;
>     hwaddr hpte;
> +    void *token;
> +    bool htab_fd;
> 
>     /* only handle 4k and 16M pages for now */
>     if (pteh & HPTE64_V_LARGE) {
> @@ -94,25 +96,32 @@ static target_ulong h_enter(PowerPCCPU *cpu, sPAPREnvironment *spapr,
>     if ((pte_index * HASH_PTE_SIZE_64) & ~env->htab_mask) {
>         return H_PARAMETER;
>     }
> +
> +    i = 0;
> +    hpte = pte_index * HASH_PTE_SIZE_64;
>     if (likely((flags & H_EXACT) == 0)) {
>         pte_index &= ~7ULL;
> -        hpte = pte_index * HASH_PTE_SIZE_64;
> -        for (i = 0; ; ++i) {
> +        token = ppc_hash64_start_access(cpu, pte_index, &htab_fd);
> +        do {
>             if (i == 8) {
> +                ppc_hash64_stop_access(token, htab_fd);
>                 return H_PTEG_FULL;
>             }
> -            if ((ppc_hash64_load_hpte0(env, hpte) & HPTE64_V_VALID) == 0) {
> +            if ((ppc_hash64_load_hpte0(env, token, i) & HPTE64_V_VALID) == 0) {
>                 break;
>             }
> -            hpte += HASH_PTE_SIZE_64;
> -        }
> +        } while (i++);
> +        ppc_hash64_stop_access(token, htab_fd);
>     } else {
> -        i = 0;
> -        hpte = pte_index * HASH_PTE_SIZE_64;
> -        if (ppc_hash64_load_hpte0(env, hpte) & HPTE64_V_VALID) {
> +        token = ppc_hash64_start_access(cpu, pte_index, &htab_fd);
> +        if (ppc_hash64_load_hpte0(env, token, 0) & HPTE64_V_VALID) {
> +            ppc_hash64_stop_access(token, htab_fd);
>             return H_PTEG_FULL;
>         }
> +        ppc_hash64_stop_access(token, htab_fd);
>     }
> +    hpte += i * HASH_PTE_SIZE_64;
> +
>     ppc_hash64_store_hpte1(env, hpte, ptel);
>     /* eieio();  FIXME: need some sort of barrier for smp? */
>     ppc_hash64_store_hpte0(env, hpte, pteh | HPTE64_V_HPTE_DIRTY);
> @@ -134,16 +143,18 @@ static RemoveResult remove_hpte(CPUPPCState *env, target_ulong ptex,
>                                 target_ulong *vp, target_ulong *rp)
> {
>     hwaddr hpte;
> +    void *token;
> +    bool htab_fd;
>     target_ulong v, r, rb;
> 
>     if ((ptex * HASH_PTE_SIZE_64) & ~env->htab_mask) {
>         return REMOVE_PARM;
>     }
> 
> -    hpte = ptex * HASH_PTE_SIZE_64;
> -
> -    v = ppc_hash64_load_hpte0(env, hpte);
> -    r = ppc_hash64_load_hpte1(env, hpte);
> +    token = ppc_hash64_start_access(ppc_env_get_cpu(env), ptex, &htab_fd);
> +    v = ppc_hash64_load_hpte0(env, token, 0);
> +    r = ppc_hash64_load_hpte1(env, token, 0);
> +    ppc_hash64_stop_access(token, htab_fd);
> 
>     if ((v & HPTE64_V_VALID) == 0 ||
>         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
> @@ -152,6 +163,7 @@ static RemoveResult remove_hpte(CPUPPCState *env, target_ulong ptex,
>     }
>     *vp = v;
>     *rp = r;
> +    hpte = ptex * HASH_PTE_SIZE_64;
>     ppc_hash64_store_hpte0(env, hpte, HPTE64_V_HPTE_DIRTY);
>     rb = compute_tlbie_rb(v, r, ptex);
>     ppc_tlb_invalidate_one(env, rb);
> @@ -260,16 +272,18 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPREnvironment *spapr,
>     target_ulong pte_index = args[1];
>     target_ulong avpn = args[2];
>     hwaddr hpte;
> +    void *token;
> +    bool htab_fd;
>     target_ulong v, r, rb;
> 
>     if ((pte_index * HASH_PTE_SIZE_64) & ~env->htab_mask) {
>         return H_PARAMETER;
>     }
> 
> -    hpte = pte_index * HASH_PTE_SIZE_64;
> -
> -    v = ppc_hash64_load_hpte0(env, hpte);
> -    r = ppc_hash64_load_hpte1(env, hpte);
> +    token = ppc_hash64_start_access(cpu, pte_index, &htab_fd);
> +    v = ppc_hash64_load_hpte0(env, token, 0);
> +    r = ppc_hash64_load_hpte1(env, token, 0);
> +    ppc_hash64_stop_access(token, htab_fd);
> 
>     if ((v & HPTE64_V_VALID) == 0 ||
>         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
> @@ -282,6 +296,7 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPREnvironment *spapr,
>     r |= (flags << 48) & HPTE64_R_KEY_HI;
>     r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
>     rb = compute_tlbie_rb(v, r, pte_index);
> +    hpte = pte_index * HASH_PTE_SIZE_64;
>     ppc_hash64_store_hpte0(env, hpte, (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY);
>     ppc_tlb_invalidate_one(env, rb);
>     ppc_hash64_store_hpte1(env, hpte, r);
> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> index ac70efbf..b8f9544 100644
> --- a/target-ppc/kvm.c
> +++ b/target-ppc/kvm.c
> @@ -1783,6 +1783,11 @@ bool kvmppc_has_cap_epr(void)
>     return cap_epr;
> }
> 
> +bool kvmppc_has_cap_htab_fd(void)
> +{
> +    return cap_htab_fd;
> +}
> +
> static int kvm_ppc_register_host_cpu_type(void)
> {
>     TypeInfo type_info = {
> @@ -1888,3 +1893,51 @@ int kvm_arch_on_sigbus(int code, void *addr)
> void kvm_arch_init_irq_routing(KVMState *s)
> {
> }
> +
> +struct kvm_get_htab_buf {
> +    struct kvm_get_htab_header header;
> +    /*
> +     * We required one extra byte for read
> +     */
> +    unsigned long hpte[(HPTES_PER_GROUP * 2) + 1];
> +};
> +
> +void *kvmppc_hash64_read_pteg(PowerPCCPU *cpu, unsigned long pte_index)
> +{
> +    int htab_fd;
> +    struct kvm_get_htab_fd ghf;
> +    struct kvm_get_htab_buf  *hpte_buf;
> +
> +    ghf.flags = 0;
> +    ghf.start_index = pte_index;
> +    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
> +    if (htab_fd < 0) {
> +        goto error_out;
> +    }
> +
> +    hpte_buf = g_malloc0(sizeof(*hpte_buf));
> +    /*
> +     * Read the hpte group
> +     */
> +    if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
> +        goto out_close;
> +    }
> +
> +    close(htab_fd);
> +    return hpte_buf->hpte;
> +
> +out_close:
> +    g_free(hpte_buf);
> +    close(htab_fd);
> +error_out:
> +    return NULL;
> +}
> +
> +void kvmppc_hash64_free_pteg(void *token)
> +{
> +    struct kvm_get_htab_buf *htab_buf;
> +
> +    htab_buf = container_of(token, struct kvm_get_htab_buf, hpte);
> +    g_free(htab_buf);
> +    return;
> +}
> diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
> index 4ae7bf2..9981e34 100644
> --- a/target-ppc/kvm_ppc.h
> +++ b/target-ppc/kvm_ppc.h
> @@ -38,10 +38,13 @@ uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
> #endif /* !CONFIG_USER_ONLY */
> int kvmppc_fixup_cpu(PowerPCCPU *cpu);
> bool kvmppc_has_cap_epr(void);
> +bool kvmppc_has_cap_htab_fd(void);
> int kvmppc_get_htab_fd(bool write);
> int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns);
> int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
>                            uint16_t n_valid, uint16_t n_invalid);
> +void *kvmppc_hash64_read_pteg(PowerPCCPU *cpu, unsigned long pte_index);
> +void kvmppc_hash64_free_pteg(void *token);
> 
> #else
> 
> @@ -164,6 +167,11 @@ static inline bool kvmppc_has_cap_epr(void)
>     return false;
> }
> 
> +static inline bool kvmppc_has_cap_htab_fd(void)
> +{
> +    return false;
> +}
> +
> static inline int kvmppc_get_htab_fd(bool write)
> {
>     return -1;
> @@ -181,6 +189,17 @@ static inline int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
>     abort();
> }
> 
> +static inline void *kvmppc_hash64_read_pteg(PowerPCCPU *cpu,
> +                                            unsigned long pte_index)
> +{
> +    abort();
> +}
> +
> +static inline void kvmppc_hash64_free_pteg(void *token)
> +{
> +    abort();
> +}
> +
> #endif
> 
> #ifndef CONFIG_KVM
> diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
> index 67fc1b5..5c797c3 100644
> --- a/target-ppc/mmu-hash64.c
> +++ b/target-ppc/mmu-hash64.c
> @@ -302,29 +302,80 @@ static int ppc_hash64_amr_prot(CPUPPCState *env, ppc_hash_pte64_t pte)
>     return prot;
> }
> 
> -static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr pteg_off,
> +void *ppc_hash64_start_access(PowerPCCPU *cpu, unsigned long pte_index,
> +                              bool *htab_fd)
> +{
> +    void *token = NULL;
> +    hwaddr pte_offset;
> +
> +    pte_offset = pte_index * HASH_PTE_SIZE_64;
> +    *htab_fd = 0;
> +    if (kvmppc_has_cap_htab_fd()) {
> +        /*
> +         * HTAB is controlled by KVM. Fetch the PTEG into a new buffer.
> +         */
> +        token = kvmppc_hash64_read_pteg(cpu, pte_index);
> +        if (token) {
> +            *htab_fd = 1;

Why don't you just have a bool that indicates whether this specific VM can do htab fd? Just check for it at the end of kvmppc_set_papr(). By that time we should be pretty certain.


Alex

> +            return token;
> +        }
> +        /*
> +         * CAP_HTAB_FD is a device ioctl, which means we could return true
> +         * when we have both HV and PR KVM support in the kernel. But the
> +         * above read_pteg will fail, if we are using PR KVM
> +         */
> +    }
> +    /*
> +     * HTAB is controlled by QEMU. Just point to the internally
> +     * accessible PTEG.
> +     */
> +    if (cpu->env.external_htab) {
> +        token = cpu->env.external_htab + pte_offset;
> +    } else if (cpu->env.htab_base) {
> +        token = (uint8_t *) cpu->env.htab_base + pte_offset;
> +    }
> +    return token;
> +}
> +
> +void ppc_hash64_stop_access(void *token, bool htab_fd)
> +{
> +    if (htab_fd) {
> +        return kvmppc_hash64_free_pteg(token);
> +    }
> +}
> +
> +static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr hash,
>                                      bool secondary, target_ulong ptem,
>                                      ppc_hash_pte64_t *pte)
> {
> -    hwaddr pte_offset = pteg_off;
> -    target_ulong pte0, pte1;
>     int i;
> +    bool htab_fd;
> +    void *token;
> +    target_ulong pte0, pte1;
> +    unsigned long pte_index;
> 
> +    pte_index = (hash * HPTES_PER_GROUP) & env->htab_mask;
> +    token = ppc_hash64_start_access(ppc_env_get_cpu(env), pte_index, &htab_fd);
> +    if (!token) {
> +        return -1;
> +    }
>     for (i = 0; i < HPTES_PER_GROUP; i++) {
> -        pte0 = ppc_hash64_load_hpte0(env, pte_offset);
> -        pte1 = ppc_hash64_load_hpte1(env, pte_offset);
> +        pte0 = ppc_hash64_load_hpte0(env, token, i);
> +        pte1 = ppc_hash64_load_hpte1(env, token, i);
> 
>         if ((pte0 & HPTE64_V_VALID)
>             && (secondary == !!(pte0 & HPTE64_V_SECONDARY))
>             && HPTE64_V_COMPARE(pte0, ptem)) {
>             pte->pte0 = pte0;
>             pte->pte1 = pte1;
> -            return pte_offset;
> +            ppc_hash64_stop_access(token, htab_fd);
> +            return (pte_index + i) * HASH_PTE_SIZE_64;
>         }
> -
> -        pte_offset += HASH_PTE_SIZE_64;
>     }
> -
> +    ppc_hash64_stop_access(token, htab_fd);
> +    /*
> +     * We didn't find a valid entry.
> +     */
>     return -1;
> }
> 
> @@ -332,7 +383,7 @@ static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env,
>                                      ppc_slb_t *slb, target_ulong eaddr,
>                                      ppc_hash_pte64_t *pte)
> {
> -    hwaddr pteg_off, pte_offset;
> +    hwaddr pte_offset;
>     hwaddr hash;
>     uint64_t vsid, epnshift, epnmask, epn, ptem;
> 
> @@ -367,8 +418,7 @@ static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env,
>             " vsid=" TARGET_FMT_lx " ptem=" TARGET_FMT_lx
>             " hash=" TARGET_FMT_plx "\n",
>             env->htab_base, env->htab_mask, vsid, ptem,  hash);
> -    pteg_off = (hash * HASH_PTEG_SIZE_64) & env->htab_mask;
> -    pte_offset = ppc_hash64_pteg_search(env, pteg_off, 0, ptem, pte);
> +    pte_offset = ppc_hash64_pteg_search(env, hash, 0, ptem, pte);
> 
>     if (pte_offset == -1) {
>         /* Secondary PTEG lookup */
> @@ -377,8 +427,7 @@ static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env,
>                 " hash=" TARGET_FMT_plx "\n", env->htab_base,
>                 env->htab_mask, vsid, ptem, ~hash);
> 
> -        pteg_off = (~hash * HASH_PTEG_SIZE_64) & env->htab_mask;
> -        pte_offset = ppc_hash64_pteg_search(env, pteg_off, 1, ptem, pte);
> +        pte_offset = ppc_hash64_pteg_search(env, ~hash, 1, ptem, pte);
>     }
> 
>     return pte_offset;
> diff --git a/target-ppc/mmu-hash64.h b/target-ppc/mmu-hash64.h
> index 55f5a23..845a2b3 100644
> --- a/target-ppc/mmu-hash64.h
> +++ b/target-ppc/mmu-hash64.h
> @@ -75,23 +75,30 @@ int ppc_hash64_handle_mmu_fault(CPUPPCState *env, target_ulong address, int rw,
> #define HPTE64_V_1TB_SEG        0x4000000000000000ULL
> #define HPTE64_V_VRMA_MASK      0x4001ffffff000000ULL
> 
> -static inline target_ulong ppc_hash64_load_hpte0(CPUPPCState *env,
> -                                                 hwaddr pte_offset)
> +
> +void *ppc_hash64_start_access(PowerPCCPU *cpu, unsigned long pte_index,
> +                              bool *htab_fd);
> +void ppc_hash64_stop_access(void *token, bool htab_fd);
> +
> +static inline target_ulong ppc_hash64_load_hpte0(CPUPPCState *env, void *token,
> +                                                 int index)
> {
> +    index *= HASH_PTE_SIZE_64;
>     if (env->external_htab) {
> -        return  ldq_p(env->external_htab + pte_offset);
> +        return  ldq_p(token + index);
>     } else {
> -        return ldq_phys(env->htab_base + pte_offset);
> +        return ldq_phys((uint64_t)(token + index));
>     }
> }
> 
> -static inline target_ulong ppc_hash64_load_hpte1(CPUPPCState *env,
> -                                                 hwaddr pte_offset)
> +static inline target_ulong ppc_hash64_load_hpte1(CPUPPCState *env, void *token,
> +                                                 int index)
> {
> +    index *= HASH_PTE_SIZE_64;
>     if (env->external_htab) {
> -        return ldq_p(env->external_htab + pte_offset + HASH_PTE_SIZE_64/2);
> +        return  ldq_p(token + index + HASH_PTE_SIZE_64/2);
>     } else {
> -        return ldq_phys(env->htab_base + pte_offset + HASH_PTE_SIZE_64/2);
> +        return ldq_phys((uint64_t)(token + index + HASH_PTE_SIZE_64/2));
>     }
> }
> 
> -- 
> 1.8.3.2
>
diff mbox

Patch

diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index f10ba8a..e04bf6c 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -52,6 +52,8 @@  static target_ulong h_enter(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     target_ulong raddr;
     target_ulong i;
     hwaddr hpte;
+    void *token;
+    bool htab_fd;
 
     /* only handle 4k and 16M pages for now */
     if (pteh & HPTE64_V_LARGE) {
@@ -94,25 +96,32 @@  static target_ulong h_enter(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     if ((pte_index * HASH_PTE_SIZE_64) & ~env->htab_mask) {
         return H_PARAMETER;
     }
+
+    i = 0;
+    hpte = pte_index * HASH_PTE_SIZE_64;
     if (likely((flags & H_EXACT) == 0)) {
         pte_index &= ~7ULL;
-        hpte = pte_index * HASH_PTE_SIZE_64;
-        for (i = 0; ; ++i) {
+        token = ppc_hash64_start_access(cpu, pte_index, &htab_fd);
+        do {
             if (i == 8) {
+                ppc_hash64_stop_access(token, htab_fd);
                 return H_PTEG_FULL;
             }
-            if ((ppc_hash64_load_hpte0(env, hpte) & HPTE64_V_VALID) == 0) {
+            if ((ppc_hash64_load_hpte0(env, token, i) & HPTE64_V_VALID) == 0) {
                 break;
             }
-            hpte += HASH_PTE_SIZE_64;
-        }
+        } while (i++);
+        ppc_hash64_stop_access(token, htab_fd);
     } else {
-        i = 0;
-        hpte = pte_index * HASH_PTE_SIZE_64;
-        if (ppc_hash64_load_hpte0(env, hpte) & HPTE64_V_VALID) {
+        token = ppc_hash64_start_access(cpu, pte_index, &htab_fd);
+        if (ppc_hash64_load_hpte0(env, token, 0) & HPTE64_V_VALID) {
+            ppc_hash64_stop_access(token, htab_fd);
             return H_PTEG_FULL;
         }
+        ppc_hash64_stop_access(token, htab_fd);
     }
+    hpte += i * HASH_PTE_SIZE_64;
+
     ppc_hash64_store_hpte1(env, hpte, ptel);
     /* eieio();  FIXME: need some sort of barrier for smp? */
     ppc_hash64_store_hpte0(env, hpte, pteh | HPTE64_V_HPTE_DIRTY);
@@ -134,16 +143,18 @@  static RemoveResult remove_hpte(CPUPPCState *env, target_ulong ptex,
                                 target_ulong *vp, target_ulong *rp)
 {
     hwaddr hpte;
+    void *token;
+    bool htab_fd;
     target_ulong v, r, rb;
 
     if ((ptex * HASH_PTE_SIZE_64) & ~env->htab_mask) {
         return REMOVE_PARM;
     }
 
-    hpte = ptex * HASH_PTE_SIZE_64;
-
-    v = ppc_hash64_load_hpte0(env, hpte);
-    r = ppc_hash64_load_hpte1(env, hpte);
+    token = ppc_hash64_start_access(ppc_env_get_cpu(env), ptex, &htab_fd);
+    v = ppc_hash64_load_hpte0(env, token, 0);
+    r = ppc_hash64_load_hpte1(env, token, 0);
+    ppc_hash64_stop_access(token, htab_fd);
 
     if ((v & HPTE64_V_VALID) == 0 ||
         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
@@ -152,6 +163,7 @@  static RemoveResult remove_hpte(CPUPPCState *env, target_ulong ptex,
     }
     *vp = v;
     *rp = r;
+    hpte = ptex * HASH_PTE_SIZE_64;
     ppc_hash64_store_hpte0(env, hpte, HPTE64_V_HPTE_DIRTY);
     rb = compute_tlbie_rb(v, r, ptex);
     ppc_tlb_invalidate_one(env, rb);
@@ -260,16 +272,18 @@  static target_ulong h_protect(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     target_ulong pte_index = args[1];
     target_ulong avpn = args[2];
     hwaddr hpte;
+    void *token;
+    bool htab_fd;
     target_ulong v, r, rb;
 
     if ((pte_index * HASH_PTE_SIZE_64) & ~env->htab_mask) {
         return H_PARAMETER;
     }
 
-    hpte = pte_index * HASH_PTE_SIZE_64;
-
-    v = ppc_hash64_load_hpte0(env, hpte);
-    r = ppc_hash64_load_hpte1(env, hpte);
+    token = ppc_hash64_start_access(cpu, pte_index, &htab_fd);
+    v = ppc_hash64_load_hpte0(env, token, 0);
+    r = ppc_hash64_load_hpte1(env, token, 0);
+    ppc_hash64_stop_access(token, htab_fd);
 
     if ((v & HPTE64_V_VALID) == 0 ||
         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
@@ -282,6 +296,7 @@  static target_ulong h_protect(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     r |= (flags << 48) & HPTE64_R_KEY_HI;
     r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
     rb = compute_tlbie_rb(v, r, pte_index);
+    hpte = pte_index * HASH_PTE_SIZE_64;
     ppc_hash64_store_hpte0(env, hpte, (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY);
     ppc_tlb_invalidate_one(env, rb);
     ppc_hash64_store_hpte1(env, hpte, r);
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index ac70efbf..b8f9544 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -1783,6 +1783,11 @@  bool kvmppc_has_cap_epr(void)
     return cap_epr;
 }
 
+bool kvmppc_has_cap_htab_fd(void)
+{
+    return cap_htab_fd;
+}
+
 static int kvm_ppc_register_host_cpu_type(void)
 {
     TypeInfo type_info = {
@@ -1888,3 +1893,51 @@  int kvm_arch_on_sigbus(int code, void *addr)
 void kvm_arch_init_irq_routing(KVMState *s)
 {
 }
+
+struct kvm_get_htab_buf {
+    struct kvm_get_htab_header header;
+    /*
+     * We required one extra byte for read
+     */
+    unsigned long hpte[(HPTES_PER_GROUP * 2) + 1];
+};
+
+void *kvmppc_hash64_read_pteg(PowerPCCPU *cpu, unsigned long pte_index)
+{
+    int htab_fd;
+    struct kvm_get_htab_fd ghf;
+    struct kvm_get_htab_buf  *hpte_buf;
+
+    ghf.flags = 0;
+    ghf.start_index = pte_index;
+    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
+    if (htab_fd < 0) {
+        goto error_out;
+    }
+
+    hpte_buf = g_malloc0(sizeof(*hpte_buf));
+    /*
+     * Read the hpte group
+     */
+    if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
+        goto out_close;
+    }
+
+    close(htab_fd);
+    return hpte_buf->hpte;
+
+out_close:
+    g_free(hpte_buf);
+    close(htab_fd);
+error_out:
+    return NULL;
+}
+
+void kvmppc_hash64_free_pteg(void *token)
+{
+    struct kvm_get_htab_buf *htab_buf;
+
+    htab_buf = container_of(token, struct kvm_get_htab_buf, hpte);
+    g_free(htab_buf);
+    return;
+}
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index 4ae7bf2..9981e34 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -38,10 +38,13 @@  uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
 #endif /* !CONFIG_USER_ONLY */
 int kvmppc_fixup_cpu(PowerPCCPU *cpu);
 bool kvmppc_has_cap_epr(void);
+bool kvmppc_has_cap_htab_fd(void);
 int kvmppc_get_htab_fd(bool write);
 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns);
 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
                            uint16_t n_valid, uint16_t n_invalid);
+void *kvmppc_hash64_read_pteg(PowerPCCPU *cpu, unsigned long pte_index);
+void kvmppc_hash64_free_pteg(void *token);
 
 #else
 
@@ -164,6 +167,11 @@  static inline bool kvmppc_has_cap_epr(void)
     return false;
 }
 
+static inline bool kvmppc_has_cap_htab_fd(void)
+{
+    return false;
+}
+
 static inline int kvmppc_get_htab_fd(bool write)
 {
     return -1;
@@ -181,6 +189,17 @@  static inline int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
     abort();
 }
 
+static inline void *kvmppc_hash64_read_pteg(PowerPCCPU *cpu,
+                                            unsigned long pte_index)
+{
+    abort();
+}
+
+static inline void kvmppc_hash64_free_pteg(void *token)
+{
+    abort();
+}
+
 #endif
 
 #ifndef CONFIG_KVM
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index 67fc1b5..5c797c3 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -302,29 +302,80 @@  static int ppc_hash64_amr_prot(CPUPPCState *env, ppc_hash_pte64_t pte)
     return prot;
 }
 
-static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr pteg_off,
+void *ppc_hash64_start_access(PowerPCCPU *cpu, unsigned long pte_index,
+                              bool *htab_fd)
+{
+    void *token = NULL;
+    hwaddr pte_offset;
+
+    pte_offset = pte_index * HASH_PTE_SIZE_64;
+    *htab_fd = 0;
+    if (kvmppc_has_cap_htab_fd()) {
+        /*
+         * HTAB is controlled by KVM. Fetch the PTEG into a new buffer.
+         */
+        token = kvmppc_hash64_read_pteg(cpu, pte_index);
+        if (token) {
+            *htab_fd = 1;
+            return token;
+        }
+        /*
+         * CAP_HTAB_FD is a device ioctl, which means we could return true
+         * when we have both HV and PR KVM support in the kernel. But the
+         * above read_pteg will fail, if we are using PR KVM
+         */
+    }
+    /*
+     * HTAB is controlled by QEMU. Just point to the internally
+     * accessible PTEG.
+     */
+    if (cpu->env.external_htab) {
+        token = cpu->env.external_htab + pte_offset;
+    } else if (cpu->env.htab_base) {
+        token = (uint8_t *) cpu->env.htab_base + pte_offset;
+    }
+    return token;
+}
+
+void ppc_hash64_stop_access(void *token, bool htab_fd)
+{
+    if (htab_fd) {
+        return kvmppc_hash64_free_pteg(token);
+    }
+}
+
+static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr hash,
                                      bool secondary, target_ulong ptem,
                                      ppc_hash_pte64_t *pte)
 {
-    hwaddr pte_offset = pteg_off;
-    target_ulong pte0, pte1;
     int i;
+    bool htab_fd;
+    void *token;
+    target_ulong pte0, pte1;
+    unsigned long pte_index;
 
+    pte_index = (hash * HPTES_PER_GROUP) & env->htab_mask;
+    token = ppc_hash64_start_access(ppc_env_get_cpu(env), pte_index, &htab_fd);
+    if (!token) {
+        return -1;
+    }
     for (i = 0; i < HPTES_PER_GROUP; i++) {
-        pte0 = ppc_hash64_load_hpte0(env, pte_offset);
-        pte1 = ppc_hash64_load_hpte1(env, pte_offset);
+        pte0 = ppc_hash64_load_hpte0(env, token, i);
+        pte1 = ppc_hash64_load_hpte1(env, token, i);
 
         if ((pte0 & HPTE64_V_VALID)
             && (secondary == !!(pte0 & HPTE64_V_SECONDARY))
             && HPTE64_V_COMPARE(pte0, ptem)) {
             pte->pte0 = pte0;
             pte->pte1 = pte1;
-            return pte_offset;
+            ppc_hash64_stop_access(token, htab_fd);
+            return (pte_index + i) * HASH_PTE_SIZE_64;
         }
-
-        pte_offset += HASH_PTE_SIZE_64;
     }
-
+    ppc_hash64_stop_access(token, htab_fd);
+    /*
+     * We didn't find a valid entry.
+     */
     return -1;
 }
 
@@ -332,7 +383,7 @@  static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env,
                                      ppc_slb_t *slb, target_ulong eaddr,
                                      ppc_hash_pte64_t *pte)
 {
-    hwaddr pteg_off, pte_offset;
+    hwaddr pte_offset;
     hwaddr hash;
     uint64_t vsid, epnshift, epnmask, epn, ptem;
 
@@ -367,8 +418,7 @@  static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env,
             " vsid=" TARGET_FMT_lx " ptem=" TARGET_FMT_lx
             " hash=" TARGET_FMT_plx "\n",
             env->htab_base, env->htab_mask, vsid, ptem,  hash);
-    pteg_off = (hash * HASH_PTEG_SIZE_64) & env->htab_mask;
-    pte_offset = ppc_hash64_pteg_search(env, pteg_off, 0, ptem, pte);
+    pte_offset = ppc_hash64_pteg_search(env, hash, 0, ptem, pte);
 
     if (pte_offset == -1) {
         /* Secondary PTEG lookup */
@@ -377,8 +427,7 @@  static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env,
                 " hash=" TARGET_FMT_plx "\n", env->htab_base,
                 env->htab_mask, vsid, ptem, ~hash);
 
-        pteg_off = (~hash * HASH_PTEG_SIZE_64) & env->htab_mask;
-        pte_offset = ppc_hash64_pteg_search(env, pteg_off, 1, ptem, pte);
+        pte_offset = ppc_hash64_pteg_search(env, ~hash, 1, ptem, pte);
     }
 
     return pte_offset;
diff --git a/target-ppc/mmu-hash64.h b/target-ppc/mmu-hash64.h
index 55f5a23..845a2b3 100644
--- a/target-ppc/mmu-hash64.h
+++ b/target-ppc/mmu-hash64.h
@@ -75,23 +75,30 @@  int ppc_hash64_handle_mmu_fault(CPUPPCState *env, target_ulong address, int rw,
 #define HPTE64_V_1TB_SEG        0x4000000000000000ULL
 #define HPTE64_V_VRMA_MASK      0x4001ffffff000000ULL
 
-static inline target_ulong ppc_hash64_load_hpte0(CPUPPCState *env,
-                                                 hwaddr pte_offset)
+
+void *ppc_hash64_start_access(PowerPCCPU *cpu, unsigned long pte_index,
+                              bool *htab_fd);
+void ppc_hash64_stop_access(void *token, bool htab_fd);
+
+static inline target_ulong ppc_hash64_load_hpte0(CPUPPCState *env, void *token,
+                                                 int index)
 {
+    index *= HASH_PTE_SIZE_64;
     if (env->external_htab) {
-        return  ldq_p(env->external_htab + pte_offset);
+        return  ldq_p(token + index);
     } else {
-        return ldq_phys(env->htab_base + pte_offset);
+        return ldq_phys((uint64_t)(token + index));
     }
 }
 
-static inline target_ulong ppc_hash64_load_hpte1(CPUPPCState *env,
-                                                 hwaddr pte_offset)
+static inline target_ulong ppc_hash64_load_hpte1(CPUPPCState *env, void *token,
+                                                 int index)
 {
+    index *= HASH_PTE_SIZE_64;
     if (env->external_htab) {
-        return ldq_p(env->external_htab + pte_offset + HASH_PTE_SIZE_64/2);
+        return  ldq_p(token + index + HASH_PTE_SIZE_64/2);
     } else {
-        return ldq_phys(env->htab_base + pte_offset + HASH_PTE_SIZE_64/2);
+        return ldq_phys((uint64_t)(token + index + HASH_PTE_SIZE_64/2));
     }
 }