Patchwork [1/3] powerpc iommu: multiple TCE requests enabled

login
register
mail settings
Submitter Alexey Kardashevskiy
Date Feb. 19, 2013, 7:43 a.m.
Message ID <1361259817-9788-2-git-send-email-aik@ozlabs.ru>
Download mbox | patch
Permalink /patch/221641/
State New
Headers show

Comments

Alexey Kardashevskiy - Feb. 19, 2013, 7:43 a.m.
Currently only single TCE entry per requiest is supported (H_PUT_TCE).
However PAPR+ specification allows multiple entry requests such as
H_PUT_TCE_INDIRECT and H_STUFFF_TCE. Having less transitions to the host
kernel via ioctls, support of these calls can accelerate IOMMU operations.

The patch adds a check for the KVM_CAP_PPC_MULTITCE capability and
if it is supported, QEMU adds the "call-multi-tce" property to hypertas
list which triggers the guest to use H_PUT_TCE_INDIRECT and H_STUFF_TCE
instead of H_PUT_TCE.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>

Conflicts:
	hw/spapr_iommu.c
	linux-headers/linux/kvm.h
---
 hw/spapr.c                |   12 ++++++--
 hw/spapr_iommu.c          |   71 +++++++++++++++++++++++++++++++++++++++++++++
 linux-headers/linux/kvm.h |    1 +
 3 files changed, 82 insertions(+), 2 deletions(-)
David Gibson - Feb. 21, 2013, 10:52 p.m.
On Tue, Feb 19, 2013 at 06:43:35PM +1100, Alexey Kardashevskiy wrote:
> Currently only single TCE entry per requiest is supported (H_PUT_TCE).
> However PAPR+ specification allows multiple entry requests such as
> H_PUT_TCE_INDIRECT and H_STUFFF_TCE. Having less transitions to the host
> kernel via ioctls, support of these calls can accelerate IOMMU operations.
> 
> The patch adds a check for the KVM_CAP_PPC_MULTITCE capability and
> if it is supported, QEMU adds the "call-multi-tce" property to hypertas
> list which triggers the guest to use H_PUT_TCE_INDIRECT and H_STUFF_TCE
> instead of H_PUT_TCE.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> 
> Conflicts:
> 	hw/spapr_iommu.c
> 	linux-headers/linux/kvm.h

Try to remember to remove the conflict messages before you send out.

> ---
>  hw/spapr.c                |   12 ++++++--
>  hw/spapr_iommu.c          |   71 +++++++++++++++++++++++++++++++++++++++++++++
>  linux-headers/linux/kvm.h |    1 +
>  3 files changed, 82 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/spapr.c b/hw/spapr.c
> index 2ec0cd0..231a7b6 100644
> --- a/hw/spapr.c
> +++ b/hw/spapr.c
> @@ -233,6 +233,9 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
>      CPUPPCState *env;
>      uint32_t start_prop = cpu_to_be32(initrd_base);
>      uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
> +    char hypertas_propm[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
> +        "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk"
> +        "\0hcall-multi-tce";
>      char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
>          "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk";
>      char qemu_hypertas_prop[] = "hcall-memop1";
> @@ -406,8 +409,13 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
>      /* RTAS */
>      _FDT((fdt_begin_node(fdt, "rtas")));
>  
> -    _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
> -                       sizeof(hypertas_prop))));
> +    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_MULTITCE)) {
> +        _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_propm,
> +                           sizeof(hypertas_propm))));
> +    } else {
> +        _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
> +                           sizeof(hypertas_prop))));
> +    }

You've implemented the multitce hypercalls in qemu, but because of the
kvm capability check, you'll never advertise them in full emu.
Instead you should always advertise them as available, and the kvm
capability will just be a question of whether they go fast (through
kvm) or slow (through qemu).

>      _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas_prop,
>                         sizeof(qemu_hypertas_prop))));
>  
> diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
> index 5904c1c..94630c1 100644
> --- a/hw/spapr_iommu.c
> +++ b/hw/spapr_iommu.c
> @@ -234,6 +234,75 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
>      return H_SUCCESS;
>  }
>  
> +static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
> +                                       sPAPREnvironment *spapr,
> +                                       target_ulong opcode, target_ulong *args)
> +{
> +    int i;
> +    target_ulong liobn = args[0];
> +    target_ulong ioba = args[1];
> +    target_ulong tce_list = args[2];
> +    target_ulong npages = args[3];
> +    target_ulong *tces, ret = 0;
> +    sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
> +
> +    if (liobn & 0xFFFFFFFF00000000ULL) {
> +        hcall_dprintf("spapr_vio_put_tce on out-of-boundsw LIOBN "
> +                      TARGET_FMT_lx "\n", liobn);
> +        return H_PARAMETER;
> +    }
> +
> +    tces = (target_ulong *) qemu_get_ram_ptr(tce_list & ~SPAPR_TCE_PAGE_MASK);
> +
> +    if (tcet) {
> +        for (i = 0; (i < npages) && !ret; ++i, ioba += SPAPR_TCE_PAGE_SIZE) {
> +            ret = put_tce_emu(tcet, ioba, tces[i]);
> +        }
> +        return ret;

The !ret in the loop condition is really easy to miss.  I'd prefer an
explicit if (ret != 0) within the loop, even though it's longer.

> +    }
> +#ifdef DEBUG_TCE
> +    fprintf(stderr, "%s on liobn=" TARGET_FMT_lx /*%s*/
> +            "  ioba 0x" TARGET_FMT_lx "  TCE 0x" TARGET_FMT_lx "\n",
> +            __func__, liobn, /*dev->qdev.id, */ioba, tce);
> +#endif
> +
> +    return H_PARAMETER;
> +}
> +
> +static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
> +                              target_ulong opcode, target_ulong *args)
> +{
> +    int i;
> +    target_ulong liobn = args[0];
> +    target_ulong ioba = args[1];
> +    target_ulong tce_value = args[2];
> +    target_ulong npages = args[3];
> +    target_ulong ret = 0;
> +    sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
> +
> +    if (liobn & 0xFFFFFFFF00000000ULL) {
> +        hcall_dprintf("spapr_vio_put_tce on out-of-boundsw LIOBN "
> +                      TARGET_FMT_lx "\n", liobn);
> +        return H_PARAMETER;
> +    }
> +
> +    ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1);
> +
> +    if (tcet) {
> +        for (i = 0; (i < npages) && !ret; ++i, ioba += SPAPR_TCE_PAGE_SIZE) {
> +            ret = put_tce_emu(tcet, ioba, tce_value);
> +        }
> +        return ret;
> +    }
> +#ifdef DEBUG_TCE
> +    fprintf(stderr, "%s on liobn=" TARGET_FMT_lx /*%s*/
> +            "  ioba 0x" TARGET_FMT_lx "  TCE 0x" TARGET_FMT_lx "\n",
> +            __func__, liobn, /*dev->qdev.id, */ioba, tce);
> +#endif
> +
> +    return H_PARAMETER;
> +}
> +
>  static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
>                                target_ulong opcode, target_ulong *args)
>  {
> @@ -268,6 +337,8 @@ void spapr_iommu_init(void)
>  
>      /* hcall-tce */
>      spapr_register_hypercall(H_PUT_TCE, h_put_tce);
> +    spapr_register_hypercall(H_PUT_TCE_INDIRECT, h_put_tce_indirect);
> +    spapr_register_hypercall(H_STUFF_TCE, h_stuff_tce);
>  }
>  
>  int spapr_dma_dt(void *fdt, int node_off, const char *propname,
> diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
> index 56077d5..8aff3b0 100644
> --- a/linux-headers/linux/kvm.h
> +++ b/linux-headers/linux/kvm.h
> @@ -633,6 +633,7 @@ struct kvm_ppc_smmu_info {
>  #define KVM_CAP_PPC_RTAS 84
>  #define KVM_CAP_SPAPR_XICS 85
>  #define KVM_CAP_PPC_HTAB_FD 86
> +#define KVM_CAP_PPC_MULTITCE 87
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>
Alexey Kardashevskiy - Feb. 22, 2013, 1:03 a.m.
On 22/02/13 09:52, David Gibson wrote:
> On Tue, Feb 19, 2013 at 06:43:35PM +1100, Alexey Kardashevskiy wrote:
>> Currently only single TCE entry per requiest is supported (H_PUT_TCE).
>> However PAPR+ specification allows multiple entry requests such as
>> H_PUT_TCE_INDIRECT and H_STUFFF_TCE. Having less transitions to the host
>> kernel via ioctls, support of these calls can accelerate IOMMU operations.
>>
>> The patch adds a check for the KVM_CAP_PPC_MULTITCE capability and
>> if it is supported, QEMU adds the "call-multi-tce" property to hypertas
>> list which triggers the guest to use H_PUT_TCE_INDIRECT and H_STUFF_TCE
>> instead of H_PUT_TCE.
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>
>> Conflicts:
>> 	hw/spapr_iommu.c
>> 	linux-headers/linux/kvm.h
>
> Try to remember to remove the conflict messages before you send out.
>
>> ---
>>   hw/spapr.c                |   12 ++++++--
>>   hw/spapr_iommu.c          |   71 +++++++++++++++++++++++++++++++++++++++++++++
>>   linux-headers/linux/kvm.h |    1 +
>>   3 files changed, 82 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/spapr.c b/hw/spapr.c
>> index 2ec0cd0..231a7b6 100644
>> --- a/hw/spapr.c
>> +++ b/hw/spapr.c
>> @@ -233,6 +233,9 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
>>       CPUPPCState *env;
>>       uint32_t start_prop = cpu_to_be32(initrd_base);
>>       uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
>> +    char hypertas_propm[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
>> +        "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk"
>> +        "\0hcall-multi-tce";
>>       char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
>>           "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk";
>>       char qemu_hypertas_prop[] = "hcall-memop1";
>> @@ -406,8 +409,13 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
>>       /* RTAS */
>>       _FDT((fdt_begin_node(fdt, "rtas")));
>>
>> -    _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
>> -                       sizeof(hypertas_prop))));
>> +    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_MULTITCE)) {
>> +        _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_propm,
>> +                           sizeof(hypertas_propm))));
>> +    } else {
>> +        _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
>> +                           sizeof(hypertas_prop))));
>> +    }
>
> You've implemented the multitce hypercalls in qemu, but because of the
> kvm capability check, you'll never advertise them in full emu.
> Instead you should always advertise them as available, and the kvm
> capability will just be a question of whether they go fast (through
> kvm) or slow (through qemu).

So we do not need the KVM_CAP_PPC_MULTITCE capability at all as we are not 
going to support real mode without multi-tce support in the host kernel, is 
that correct?
David Gibson - Feb. 25, 2013, 1:36 a.m.
On Fri, Feb 22, 2013 at 12:03:49PM +1100, Alexey Kardashevskiy wrote:
> On 22/02/13 09:52, David Gibson wrote:
> >On Tue, Feb 19, 2013 at 06:43:35PM +1100, Alexey Kardashevskiy wrote:
[snip]
> >You've implemented the multitce hypercalls in qemu, but because of the
> >kvm capability check, you'll never advertise them in full emu.
> >Instead you should always advertise them as available, and the kvm
> >capability will just be a question of whether they go fast (through
> >kvm) or slow (through qemu).
> 
> So we do not need the KVM_CAP_PPC_MULTITCE capability at all as we
> are not going to support real mode without multi-tce support in the
> host kernel, is that correct?

Um.. is CAP_PPC_MULTITCE covering the version of the hypercalls for
emulated devices, for vfio devices or both.  For emulated devices I
don't think we strictly need itm because we can fall back to qemu
fine.  It might still be an idea to have the capability, so qemu or
the user know in advance if things are going to be slow or fast.  For
now I think it's simplest for qemu to just do the fallback, but it's
possible we might want to advertise different things to the guest
based on whether they'll be fast or not.

For vfio.. there I don't think we need a capability, since multitce
hypercall support should be there from day one.

Patch

diff --git a/hw/spapr.c b/hw/spapr.c
index 2ec0cd0..231a7b6 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -233,6 +233,9 @@  static void *spapr_create_fdt_skel(const char *cpu_model,
     CPUPPCState *env;
     uint32_t start_prop = cpu_to_be32(initrd_base);
     uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
+    char hypertas_propm[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
+        "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk"
+        "\0hcall-multi-tce";
     char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
         "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk";
     char qemu_hypertas_prop[] = "hcall-memop1";
@@ -406,8 +409,13 @@  static void *spapr_create_fdt_skel(const char *cpu_model,
     /* RTAS */
     _FDT((fdt_begin_node(fdt, "rtas")));
 
-    _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
-                       sizeof(hypertas_prop))));
+    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_MULTITCE)) {
+        _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_propm,
+                           sizeof(hypertas_propm))));
+    } else {
+        _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
+                           sizeof(hypertas_prop))));
+    }
     _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas_prop,
                        sizeof(qemu_hypertas_prop))));
 
diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
index 5904c1c..94630c1 100644
--- a/hw/spapr_iommu.c
+++ b/hw/spapr_iommu.c
@@ -234,6 +234,75 @@  static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
     return H_SUCCESS;
 }
 
+static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
+                                       sPAPREnvironment *spapr,
+                                       target_ulong opcode, target_ulong *args)
+{
+    int i;
+    target_ulong liobn = args[0];
+    target_ulong ioba = args[1];
+    target_ulong tce_list = args[2];
+    target_ulong npages = args[3];
+    target_ulong *tces, ret = 0;
+    sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
+
+    if (liobn & 0xFFFFFFFF00000000ULL) {
+        hcall_dprintf("spapr_vio_put_tce on out-of-boundsw LIOBN "
+                      TARGET_FMT_lx "\n", liobn);
+        return H_PARAMETER;
+    }
+
+    tces = (target_ulong *) qemu_get_ram_ptr(tce_list & ~SPAPR_TCE_PAGE_MASK);
+
+    if (tcet) {
+        for (i = 0; (i < npages) && !ret; ++i, ioba += SPAPR_TCE_PAGE_SIZE) {
+            ret = put_tce_emu(tcet, ioba, tces[i]);
+        }
+        return ret;
+    }
+#ifdef DEBUG_TCE
+    fprintf(stderr, "%s on liobn=" TARGET_FMT_lx /*%s*/
+            "  ioba 0x" TARGET_FMT_lx "  TCE 0x" TARGET_FMT_lx "\n",
+            __func__, liobn, /*dev->qdev.id, */ioba, tce);
+#endif
+
+    return H_PARAMETER;
+}
+
+static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+                              target_ulong opcode, target_ulong *args)
+{
+    int i;
+    target_ulong liobn = args[0];
+    target_ulong ioba = args[1];
+    target_ulong tce_value = args[2];
+    target_ulong npages = args[3];
+    target_ulong ret = 0;
+    sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
+
+    if (liobn & 0xFFFFFFFF00000000ULL) {
+        hcall_dprintf("spapr_vio_put_tce on out-of-boundsw LIOBN "
+                      TARGET_FMT_lx "\n", liobn);
+        return H_PARAMETER;
+    }
+
+    ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1);
+
+    if (tcet) {
+        for (i = 0; (i < npages) && !ret; ++i, ioba += SPAPR_TCE_PAGE_SIZE) {
+            ret = put_tce_emu(tcet, ioba, tce_value);
+        }
+        return ret;
+    }
+#ifdef DEBUG_TCE
+    fprintf(stderr, "%s on liobn=" TARGET_FMT_lx /*%s*/
+            "  ioba 0x" TARGET_FMT_lx "  TCE 0x" TARGET_FMT_lx "\n",
+            __func__, liobn, /*dev->qdev.id, */ioba, tce);
+#endif
+
+    return H_PARAMETER;
+}
+
 static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
                               target_ulong opcode, target_ulong *args)
 {
@@ -268,6 +337,8 @@  void spapr_iommu_init(void)
 
     /* hcall-tce */
     spapr_register_hypercall(H_PUT_TCE, h_put_tce);
+    spapr_register_hypercall(H_PUT_TCE_INDIRECT, h_put_tce_indirect);
+    spapr_register_hypercall(H_STUFF_TCE, h_stuff_tce);
 }
 
 int spapr_dma_dt(void *fdt, int node_off, const char *propname,
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 56077d5..8aff3b0 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -633,6 +633,7 @@  struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_RTAS 84
 #define KVM_CAP_SPAPR_XICS 85
 #define KVM_CAP_PPC_HTAB_FD 86
+#define KVM_CAP_PPC_MULTITCE 87
 
 #ifdef KVM_CAP_IRQ_ROUTING