diff mbox

[-V3,2/4] target-ppc: Fix page table lookup with kvm enabled

Message ID 87r4dhdmwc.fsf@linux.vnet.ibm.com
State New
Headers show

Commit Message

Aneesh Kumar K.V Aug. 26, 2013, 5:46 a.m. UTC
Alexander Graf <agraf@suse.de> writes:

> On 23.08.2013, at 06:20, Aneesh Kumar K.V wrote:
>
>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>> 
>> With kvm enabled, we store the hash page table information in the hypervisor.
>> Use ioctl to read the htab contents. Without this we get the below error when
>> trying to read the guest address
>> 
>> (gdb) x/10 do_fork
>> 0xc000000000098660 <do_fork>:   Cannot access memory at address 0xc000000000098660
>> (gdb)
>> 
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
>> ---
>> target-ppc/kvm.c        | 45 +++++++++++++++++++++++++++++++++++++++++++++
>> target-ppc/kvm_ppc.h    |  9 ++++++++-
>> target-ppc/mmu-hash64.c | 25 ++++++++++++++++---------
>> 3 files changed, 69 insertions(+), 10 deletions(-)
>> 
>> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
>> index 6878af2..bcc6544 100644
>> --- a/target-ppc/kvm.c
>> +++ b/target-ppc/kvm.c
>> @@ -1891,3 +1891,48 @@ int kvm_arch_on_sigbus(int code, void *addr)
>> void kvm_arch_init_irq_routing(KVMState *s)
>> {
>> }
>> +
>> +int kvmppc_hash64_load_hpte(PowerPCCPU *cpu, uint64_t index,
>> +                            target_ulong *hpte0, target_ulong *hpte1)
>> +{
>> +    int htab_fd;
>> +    struct kvm_get_htab_fd ghf;
>> +    struct kvm_get_htab_buf {
>> +        struct kvm_get_htab_header header;
>> +        /*
>> +         * Older kernel required one extra byte.
>> +         */
>> +        unsigned long hpte[3];
>> +    } hpte_buf;
>> +
>> +    *hpte0 = 0;
>> +    *hpte1 = 0;
>> +    if (!cap_htab_fd) {
>> +        return 0;
>> +    }
>> +    /*
>> +     * At this point we are only interested in reading only bolted entries
>> +     */
>> +    ghf.flags = KVM_GET_HTAB_BOLTED_ONLY;
>> +    ghf.start_index = index;
>> +    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
>
> We should cache this.

The fd returned by KVM_PPC_GET_HTAB_FD doesn't support a proper lseek
interface. ie, we cannot seek around to read the hpte entries at index.
The call paths are also not in the hot path. Hence I didn't look at
caching. We could definitely avoid doing the ioctl in loop. See below
for the changes.


>
>> +    if (htab_fd < 0) {
>> +        return htab_fd;
>> +    }
>> +
>> +    if (read(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
>> +        goto out;
>> +    }
>> +    /*
>> +     * We only requested for one entry, So we should get only 1
>> +     * valid entry at the same index
>> +     */
>> +    if (hpte_buf.header.n_valid != 1 || hpte_buf.header.index != index) {
>> +        goto out;
>> +    }
>> +    *hpte0 = hpte_buf.hpte[0];
>> +    *hpte1 = hpte_buf.hpte[1];
>> +out:
>> +    close(htab_fd);
>> +    return 0;
>> +}
>> diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
>> index 4ae7bf2..e25373a 100644
>> --- a/target-ppc/kvm_ppc.h
>> +++ b/target-ppc/kvm_ppc.h
>> @@ -42,7 +42,8 @@ int kvmppc_get_htab_fd(bool write);
>> int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns);
>> int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
>>                          uint16_t n_valid, uint16_t n_invalid);
>> -
>> +int kvmppc_hash64_load_hpte(PowerPCCPU *cpu, uint64_t index,
>> +                            target_ulong *hpte0, target_ulong *hpte1);
>> #else
>> 
>> static inline uint32_t kvmppc_get_tbfreq(void)
>> @@ -181,6 +182,12 @@ static inline int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
>>   abort();
>> }
>> 
>> +static inline int kvmppc_hash64_load_hpte(PowerPCCPU *cpu, uint64_t index,
>> +                                          target_ulong *hpte0,
>> +                                          target_ulong *hpte1)
>> +{
>> +    abort();
>> +}
>> #endif
>> 
>> #ifndef CONFIG_KVM
>> diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
>> index 67fc1b5..4d8120c 100644
>> --- a/target-ppc/mmu-hash64.c
>> +++ b/target-ppc/mmu-hash64.c
>> @@ -302,17 +302,26 @@ static int ppc_hash64_amr_prot(CPUPPCState *env, ppc_hash_pte64_t pte)
>>   return prot;
>> }
>> 
>> -static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr pteg_off,
>> +static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr hash,
>>                                    bool secondary, target_ulong ptem,
>>                                    ppc_hash_pte64_t *pte)
>> {
>> -    hwaddr pte_offset = pteg_off;
>> +    uint64_t index;
>> +    hwaddr pte_offset;
>>   target_ulong pte0, pte1;
>>   int i;
>> 
>> +    pte_offset = (hash * HASH_PTEG_SIZE_64) & env->htab_mask;;
>> +    index = (hash * HPTES_PER_GROUP) & env->htab_mask;
>> +
>>   for (i = 0; i < HPTES_PER_GROUP; i++) {
>> -        pte0 = ppc_hash64_load_hpte0(env, pte_offset);
>> -        pte1 = ppc_hash64_load_hpte1(env, pte_offset);
>> +        if (kvm_enabled()) {
>> +            index += i;
>> +            kvmppc_hash64_load_hpte(ppc_env_get_cpu(env), index, &pte0, &pte1);
>
> This breaks PR KVM which doesn't have an HTAB fd.
>
> I think what you want is code in kvmppc_set_papr() that tries to fetch
> an HTAB fd. You can then modify the check to if (kvm_enabled() &&
> kvmppc_has_htab_fd()), as the below case should work just fine on PR
> KVM.

As explained before caching htab fd may not really work. How about 




-aneesh

Comments

Alexander Graf Aug. 26, 2013, 11:09 a.m. UTC | #1
On 26.08.2013, at 07:46, Aneesh Kumar K.V wrote:

> Alexander Graf <agraf@suse.de> writes:
> 
>> On 23.08.2013, at 06:20, Aneesh Kumar K.V wrote:
>> 
>>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>>> 
>>> With kvm enabled, we store the hash page table information in the hypervisor.
>>> Use ioctl to read the htab contents. Without this we get the below error when
>>> trying to read the guest address
>>> 
>>> (gdb) x/10 do_fork
>>> 0xc000000000098660 <do_fork>:   Cannot access memory at address 0xc000000000098660
>>> (gdb)
>>> 
>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
>>> ---
>>> target-ppc/kvm.c        | 45 +++++++++++++++++++++++++++++++++++++++++++++
>>> target-ppc/kvm_ppc.h    |  9 ++++++++-
>>> target-ppc/mmu-hash64.c | 25 ++++++++++++++++---------
>>> 3 files changed, 69 insertions(+), 10 deletions(-)
>>> 
>>> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
>>> index 6878af2..bcc6544 100644
>>> --- a/target-ppc/kvm.c
>>> +++ b/target-ppc/kvm.c
>>> @@ -1891,3 +1891,48 @@ int kvm_arch_on_sigbus(int code, void *addr)
>>> void kvm_arch_init_irq_routing(KVMState *s)
>>> {
>>> }
>>> +
>>> +int kvmppc_hash64_load_hpte(PowerPCCPU *cpu, uint64_t index,
>>> +                            target_ulong *hpte0, target_ulong *hpte1)
>>> +{
>>> +    int htab_fd;
>>> +    struct kvm_get_htab_fd ghf;
>>> +    struct kvm_get_htab_buf {
>>> +        struct kvm_get_htab_header header;
>>> +        /*
>>> +         * Older kernel required one extra byte.
>>> +         */
>>> +        unsigned long hpte[3];
>>> +    } hpte_buf;
>>> +
>>> +    *hpte0 = 0;
>>> +    *hpte1 = 0;
>>> +    if (!cap_htab_fd) {
>>> +        return 0;
>>> +    }
>>> +    /*
>>> +     * At this point we are only interested in reading only bolted entries
>>> +     */
>>> +    ghf.flags = KVM_GET_HTAB_BOLTED_ONLY;
>>> +    ghf.start_index = index;
>>> +    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
>> 
>> We should cache this.
> 
> The fd returned by KVM_PPC_GET_HTAB_FD doesn't support a proper lseek
> interface. ie, we cannot seek around to read the hpte entries at index.
> The call paths are also not in the hot path. Hence I didn't look at
> caching. We could definitely avoid doing the ioctl in loop. See below
> for the changes.
> 
> 
>> 
>>> +    if (htab_fd < 0) {
>>> +        return htab_fd;
>>> +    }
>>> +
>>> +    if (read(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
>>> +        goto out;
>>> +    }
>>> +    /*
>>> +     * We only requested for one entry, So we should get only 1
>>> +     * valid entry at the same index
>>> +     */
>>> +    if (hpte_buf.header.n_valid != 1 || hpte_buf.header.index != index) {
>>> +        goto out;
>>> +    }
>>> +    *hpte0 = hpte_buf.hpte[0];
>>> +    *hpte1 = hpte_buf.hpte[1];
>>> +out:
>>> +    close(htab_fd);
>>> +    return 0;
>>> +}
>>> diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
>>> index 4ae7bf2..e25373a 100644
>>> --- a/target-ppc/kvm_ppc.h
>>> +++ b/target-ppc/kvm_ppc.h
>>> @@ -42,7 +42,8 @@ int kvmppc_get_htab_fd(bool write);
>>> int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns);
>>> int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
>>>                         uint16_t n_valid, uint16_t n_invalid);
>>> -
>>> +int kvmppc_hash64_load_hpte(PowerPCCPU *cpu, uint64_t index,
>>> +                            target_ulong *hpte0, target_ulong *hpte1);
>>> #else
>>> 
>>> static inline uint32_t kvmppc_get_tbfreq(void)
>>> @@ -181,6 +182,12 @@ static inline int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
>>>  abort();
>>> }
>>> 
>>> +static inline int kvmppc_hash64_load_hpte(PowerPCCPU *cpu, uint64_t index,
>>> +                                          target_ulong *hpte0,
>>> +                                          target_ulong *hpte1)
>>> +{
>>> +    abort();
>>> +}
>>> #endif
>>> 
>>> #ifndef CONFIG_KVM
>>> diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
>>> index 67fc1b5..4d8120c 100644
>>> --- a/target-ppc/mmu-hash64.c
>>> +++ b/target-ppc/mmu-hash64.c
>>> @@ -302,17 +302,26 @@ static int ppc_hash64_amr_prot(CPUPPCState *env, ppc_hash_pte64_t pte)
>>>  return prot;
>>> }
>>> 
>>> -static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr pteg_off,
>>> +static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr hash,
>>>                                   bool secondary, target_ulong ptem,
>>>                                   ppc_hash_pte64_t *pte)
>>> {
>>> -    hwaddr pte_offset = pteg_off;
>>> +    uint64_t index;
>>> +    hwaddr pte_offset;
>>>  target_ulong pte0, pte1;
>>>  int i;
>>> 
>>> +    pte_offset = (hash * HASH_PTEG_SIZE_64) & env->htab_mask;;
>>> +    index = (hash * HPTES_PER_GROUP) & env->htab_mask;
>>> +
>>>  for (i = 0; i < HPTES_PER_GROUP; i++) {
>>> -        pte0 = ppc_hash64_load_hpte0(env, pte_offset);
>>> -        pte1 = ppc_hash64_load_hpte1(env, pte_offset);
>>> +        if (kvm_enabled()) {
>>> +            index += i;
>>> +            kvmppc_hash64_load_hpte(ppc_env_get_cpu(env), index, &pte0, &pte1);
>> 
>> This breaks PR KVM which doesn't have an HTAB fd.
>> 
>> I think what you want is code in kvmppc_set_papr() that tries to fetch
>> an HTAB fd. You can then modify the check to if (kvm_enabled() &&
>> kvmppc_has_htab_fd()), as the below case should work just fine on PR
>> KVM.
> 
> As explained before caching htab fd may not really work. How about 
> 
> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> index fce8835..cf6aca4 100644
> --- a/target-ppc/kvm.c
> +++ b/target-ppc/kvm.c
> @@ -1892,19 +1892,24 @@ void kvm_arch_init_irq_routing(KVMState *s)
> {
> }
> 
> -int kvmppc_hash64_load_hpte(PowerPCCPU *cpu, uint64_t index,
> -                            target_ulong *hpte0, target_ulong *hpte1)

Against which tree is this? I don't even have this function in mine.

> +hwaddr kvmppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash,
> +                                 bool secondary, target_ulong ptem,

I'm having a hard time following this code. How do you tell the kernel which PTE group you're interested in?

> +                                 target_ulong *hpte0, target_ulong *hpte1)
> {
>     int htab_fd;
> +    uint64_t index;
> +    hwaddr pte_offset;
> +    target_ulong pte0, pte1;
>     struct kvm_get_htab_fd ghf;
>     struct kvm_get_htab_buf {
>         struct kvm_get_htab_header header;
>         /*
>          * Older kernel required one extra byte.
>          */
> -        unsigned long hpte[3];
> +        unsigned long hpte[(HPTES_PER_GROUP * 2) + 1];

Does this need your kernel patch to work?


Alex
diff mbox

Patch

diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index fce8835..cf6aca4 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -1892,19 +1892,24 @@  void kvm_arch_init_irq_routing(KVMState *s)
 {
 }
 
-int kvmppc_hash64_load_hpte(PowerPCCPU *cpu, uint64_t index,
-                            target_ulong *hpte0, target_ulong *hpte1)
+hwaddr kvmppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash,
+                                 bool secondary, target_ulong ptem,
+                                 target_ulong *hpte0, target_ulong *hpte1)
 {
     int htab_fd;
+    uint64_t index;
+    hwaddr pte_offset;
+    target_ulong pte0, pte1;
     struct kvm_get_htab_fd ghf;
     struct kvm_get_htab_buf {
         struct kvm_get_htab_header header;
         /*
          * Older kernel required one extra byte.
          */
-        unsigned long hpte[3];
+        unsigned long hpte[(HPTES_PER_GROUP * 2) + 1];
     } hpte_buf;
 
+    index = (hash * HPTES_PER_GROUP) & cpu->env.htab_mask;
     *hpte0 = 0;
     *hpte1 = 0;
     if (!cap_htab_fd) {
@@ -1917,22 +1922,33 @@  int kvmppc_hash64_load_hpte(PowerPCCPU *cpu, uint64_t index,
     ghf.start_index = index;
     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
     if (htab_fd < 0) {
-        return htab_fd;
-    }
-
-    if (read(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
-        goto out;
+        goto error_out;
     }
     /*
-     * We only requested for one entry, So we should get only 1
-     * valid entry at the same index
+     * Read the hpte group
      */
-    if (hpte_buf.header.n_valid != 1 || hpte_buf.header.index != index) {
+    if (read(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
         goto out;
     }
-    *hpte0 = hpte_buf.hpte[0];
-    *hpte1 = hpte_buf.hpte[1];
+
+    index = 0;
+    pte_offset = (hash * HASH_PTEG_SIZE_64) & cpu->env.htab_mask;;
+    while (index < hpte_buf.header.n_valid) {
+        pte0 = hpte_buf.hpte[(index * 2)];
+        pte1 = hpte_buf.hpte[(index * 2) + 1];
+        if ((pte0 & HPTE64_V_VALID)
+            && (secondary == !!(pte0 & HPTE64_V_SECONDARY))
+            && HPTE64_V_COMPARE(pte0, ptem)) {
+            *hpte0 = pte0;
+            *hpte1 = pte1;
+            close(htab_fd);
+            return pte_offset;
+        }
+        index++;
+        pte_offset += HASH_PTE_SIZE_64;
+    }
 out:
     close(htab_fd);
-    return 0;
+error_out:
+    return -1;
 }
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index e25373a..dad0e57 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -42,8 +42,9 @@  int kvmppc_get_htab_fd(bool write);
 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns);
 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
                            uint16_t n_valid, uint16_t n_invalid);
-int kvmppc_hash64_load_hpte(PowerPCCPU *cpu, uint64_t index,
-                            target_ulong *hpte0, target_ulong *hpte1);
+hwaddr kvmppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash,
+                                 bool secondary, target_ulong ptem,
+                                 target_ulong *hpte0, target_ulong *hpte1);
 #else
 
 static inline uint32_t kvmppc_get_tbfreq(void)
@@ -182,9 +183,11 @@  static inline int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
     abort();
 }
 
-static inline int kvmppc_hash64_load_hpte(PowerPCCPU *cpu, uint64_t index,
-                                          target_ulong *hpte0,
-                                          target_ulong *hpte1)
+static inline hwaddr kvmppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash,
+                                               bool secondary,
+                                               target_ulong ptem,
+                                               target_ulong *hpte0,
+                                               target_ulong *hpte1)
 {
     abort();
 }
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index 4d8120c..2288fe8 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -306,35 +306,39 @@  static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr hash,
                                      bool secondary, target_ulong ptem,
                                      ppc_hash_pte64_t *pte)
 {
-    uint64_t index;
     hwaddr pte_offset;
     target_ulong pte0, pte1;
-    int i;
-
-    pte_offset = (hash * HASH_PTEG_SIZE_64) & env->htab_mask;;
-    index = (hash * HPTES_PER_GROUP) & env->htab_mask;
-
-    for (i = 0; i < HPTES_PER_GROUP; i++) {
-        if (kvm_enabled()) {
-            index += i;
-            kvmppc_hash64_load_hpte(ppc_env_get_cpu(env), index, &pte0, &pte1);
-        } else {
+    int i, ret = 0;
+
+    if (kvm_enabled()) {
+        ret = kvmppc_hash64_pteg_search(ppc_env_get_cpu(env), hash,
+                                        secondary, ptem,
+                                        &pte->pte0, &pte->pte1);
+    }
+    /*
+     * We don't support htab fd, check whether we have a copy of htab
+     */
+    if (!ret) {
+        pte_offset = (hash * HASH_PTEG_SIZE_64) & env->htab_mask;;
+        for (i = 0; i < HPTES_PER_GROUP; i++) {
             pte0 = ppc_hash64_load_hpte0(env, pte_offset);
             pte1 = ppc_hash64_load_hpte1(env, pte_offset);
-        }
 
-        if ((pte0 & HPTE64_V_VALID)
-            && (secondary == !!(pte0 & HPTE64_V_SECONDARY))
-            && HPTE64_V_COMPARE(pte0, ptem)) {
-            pte->pte0 = pte0;
-            pte->pte1 = pte1;
-            return pte_offset;
+            if ((pte0 & HPTE64_V_VALID)
+                && (secondary == !!(pte0 & HPTE64_V_SECONDARY))
+                && HPTE64_V_COMPARE(pte0, ptem)) {
+                pte->pte0 = pte0;
+                pte->pte1 = pte1;
+                return pte_offset;
+            }
+            pte_offset += HASH_PTE_SIZE_64;
         }
-
-        pte_offset += HASH_PTE_SIZE_64;
+        /*
+         * We didn't find a valid entry.
+         */
+        ret = -1;
     }
-
-    return -1;
+    return ret;
 }
 
 static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env,