diff mbox

[qemu] target-ppc: kvm: make use of KVM_CREATE_SPAPR_TCE_64

Message ID 20161222011312.12778-1-aik@ozlabs.ru
State New
Headers show

Commit Message

Alexey Kardashevskiy Dec. 22, 2016, 1:13 a.m. UTC
KVM_CAP_SPAPR_TCE capability allows creating TCE tables in KVM which
allows having in-kernel acceleration for H_PUT_TCE_xxx hypercalls.
However it only supports 32bit DMA windows at zero bus offset.

There is a new KVM_CAP_SPAPR_TCE_64 capability which supports 64bit
window size, variable page size and bus offset.

This makes use of the new capability. The kernel headers are already
updated as the kernel support went in to v4.6.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 target-ppc/kvm_ppc.h | 12 +++++++-----
 hw/ppc/spapr_iommu.c |  8 +++++---
 target-ppc/kvm.c     | 48 +++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 49 insertions(+), 19 deletions(-)

Comments

David Gibson Jan. 3, 2017, 2:26 a.m. UTC | #1
On Thu, Dec 22, 2016 at 12:13:12PM +1100, Alexey Kardashevskiy wrote:
> KVM_CAP_SPAPR_TCE capability allows creating TCE tables in KVM which
> allows having in-kernel acceleration for H_PUT_TCE_xxx hypercalls.
> However it only supports 32bit DMA windows at zero bus offset.
> 
> There is a new KVM_CAP_SPAPR_TCE_64 capability which supports 64bit
> window size, variable page size and bus offset.
> 
> This makes use of the new capability. The kernel headers are already
> updated as the kernel support went in to v4.6.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
>  target-ppc/kvm_ppc.h | 12 +++++++-----
>  hw/ppc/spapr_iommu.c |  8 +++++---
>  target-ppc/kvm.c     | 48 +++++++++++++++++++++++++++++++++++++-----------
>  3 files changed, 49 insertions(+), 19 deletions(-)
> 
> diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
> index bd1d78bfbe..14320c2378 100644
> --- a/target-ppc/kvm_ppc.h
> +++ b/target-ppc/kvm_ppc.h
> @@ -36,8 +36,9 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu);
>  #ifndef CONFIG_USER_ONLY
>  off_t kvmppc_alloc_rma(void **rma);
>  bool kvmppc_spapr_use_multitce(void);
> -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
> -                              bool need_vfio);
> +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
> +                              uint64_t bus_offset, uint32_t nb_table,
> +                              int *pfd, bool need_vfio);
>  int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
>  int kvmppc_reset_htab(int shift_hint);
>  uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
> @@ -168,9 +169,10 @@ static inline bool kvmppc_spapr_use_multitce(void)
>      return false;
>  }
>  
> -static inline void *kvmppc_create_spapr_tce(uint32_t liobn,
> -                                            uint32_t window_size, int *fd,
> -                                            bool need_vfio)
> +static inline void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
> +                                            uint64_t bus_offset,
> +                                            uint32_t nb_table,
> +                                            int *pfd, bool need_vfio)
>  {
>      return NULL;
>  }
> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
> index ae30bbe30f..29c80bb3c8 100644
> --- a/hw/ppc/spapr_iommu.c
> +++ b/hw/ppc/spapr_iommu.c
> @@ -79,15 +79,16 @@ static IOMMUAccessFlags spapr_tce_iommu_access_flags(uint64_t tce)
>  
>  static uint64_t *spapr_tce_alloc_table(uint32_t liobn,
>                                         uint32_t page_shift,
> +                                       uint64_t bus_offset,
>                                         uint32_t nb_table,
>                                         int *fd,
>                                         bool need_vfio)
>  {
>      uint64_t *table = NULL;
> -    uint64_t window_size = (uint64_t)nb_table << page_shift;
>  
> -    if (kvm_enabled() && !(window_size >> 32)) {
> -        table = kvmppc_create_spapr_tce(liobn, window_size, fd, need_vfio);
> +    if (kvm_enabled()) {

This is broken.  Previously, if we had a >4GiB window, we'd fall back
to managing it in userspace, which would work, albeit slowly.  Now, if
you have an older kernel which doesn't support KVM_CAP_SPAPR_TCE_64 it
will attempt to allocate it in the kernel, and fail completely.

> +        table = kvmppc_create_spapr_tce(liobn, page_shift, bus_offset, nb_table,
> +                                        fd, need_vfio);
>      }
>  
>      if (!table) {
> @@ -342,6 +343,7 @@ void spapr_tce_table_enable(sPAPRTCETable *tcet,
>      tcet->nb_table = nb_table;
>      tcet->table = spapr_tce_alloc_table(tcet->liobn,
>                                          tcet->page_shift,
> +                                        tcet->bus_offset,
>                                          tcet->nb_table,
>                                          &tcet->fd,
>                                          tcet->need_vfio);
> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> index 9c4834c4fc..6e91a4d8bb 100644
> --- a/target-ppc/kvm.c
> +++ b/target-ppc/kvm.c
> @@ -71,6 +71,7 @@ static int cap_booke_sregs;
>  static int cap_ppc_smt;
>  static int cap_ppc_rma;
>  static int cap_spapr_tce;
> +static int cap_spapr_tce_64;
>  static int cap_spapr_multitce;
>  static int cap_spapr_vfio;
>  static int cap_hior;
> @@ -123,6 +124,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>      cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
>      cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
>      cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
> +    cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
>      cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
>      cap_spapr_vfio = false;
>      cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
> @@ -2201,13 +2203,10 @@ bool kvmppc_spapr_use_multitce(void)
>      return cap_spapr_multitce;
>  }
>  
> -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
> -                              bool need_vfio)
> +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
> +                              uint64_t bus_offset, uint32_t nb_table,
> +                              int *pfd, bool need_vfio)
>  {
> -    struct kvm_create_spapr_tce args = {
> -        .liobn = liobn,
> -        .window_size = window_size,
> -    };
>      long len;
>      int fd;
>      void *table;
> @@ -2220,14 +2219,41 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
>          return NULL;
>      }
>  
> -    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
> -    if (fd < 0) {
> -        fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
> -                liobn);
> +    if (cap_spapr_tce_64) {
> +        struct kvm_create_spapr_tce_64 args = {
> +            .liobn = liobn,
> +            .page_shift = page_shift,
> +            .offset = bus_offset >> page_shift,
> +            .size = nb_table,
> +            .flags = 0
> +        };
> +        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
> +        if (fd < 0) {
> +            fprintf(stderr,
> +                    "KVM: Failed to create TCE64 table for liobn 0x%x\n",
> +                    liobn);
> +            return NULL;
> +        }
> +    } else if (cap_spapr_tce) {
> +        uint64_t window_size = (uint64_t) nb_table << page_shift;
> +        struct kvm_create_spapr_tce args = {
> +            .liobn = liobn,
> +            .window_size = window_size,
> +        };
> +        if ((window_size != args.window_size) || bus_offset) {
> +            return NULL;
> +        }
> +        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
> +        if (fd < 0) {
> +            fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
> +                    liobn);
> +            return NULL;
> +        }
> +    } else {
>          return NULL;
>      }
>  
> -    len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
> +    len = nb_table * sizeof(uint64_t);
>      /* FIXME: round this up to page size */
>  
>      table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
Alexey Kardashevskiy Jan. 9, 2017, 2:38 a.m. UTC | #2
On 03/01/17 13:26, David Gibson wrote:
> On Thu, Dec 22, 2016 at 12:13:12PM +1100, Alexey Kardashevskiy wrote:
>> KVM_CAP_SPAPR_TCE capability allows creating TCE tables in KVM which
>> allows having in-kernel acceleration for H_PUT_TCE_xxx hypercalls.
>> However it only supports 32bit DMA windows at zero bus offset.
>>
>> There is a new KVM_CAP_SPAPR_TCE_64 capability which supports 64bit
>> window size, variable page size and bus offset.
>>
>> This makes use of the new capability. The kernel headers are already
>> updated as the kernel support went in to v4.6.
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> ---
>>  target-ppc/kvm_ppc.h | 12 +++++++-----
>>  hw/ppc/spapr_iommu.c |  8 +++++---
>>  target-ppc/kvm.c     | 48 +++++++++++++++++++++++++++++++++++++-----------
>>  3 files changed, 49 insertions(+), 19 deletions(-)
>>
>> diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
>> index bd1d78bfbe..14320c2378 100644
>> --- a/target-ppc/kvm_ppc.h
>> +++ b/target-ppc/kvm_ppc.h
>> @@ -36,8 +36,9 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu);
>>  #ifndef CONFIG_USER_ONLY
>>  off_t kvmppc_alloc_rma(void **rma);
>>  bool kvmppc_spapr_use_multitce(void);
>> -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
>> -                              bool need_vfio);
>> +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
>> +                              uint64_t bus_offset, uint32_t nb_table,
>> +                              int *pfd, bool need_vfio);
>>  int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
>>  int kvmppc_reset_htab(int shift_hint);
>>  uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
>> @@ -168,9 +169,10 @@ static inline bool kvmppc_spapr_use_multitce(void)
>>      return false;
>>  }
>>  
>> -static inline void *kvmppc_create_spapr_tce(uint32_t liobn,
>> -                                            uint32_t window_size, int *fd,
>> -                                            bool need_vfio)
>> +static inline void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
>> +                                            uint64_t bus_offset,
>> +                                            uint32_t nb_table,
>> +                                            int *pfd, bool need_vfio)
>>  {
>>      return NULL;
>>  }
>> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
>> index ae30bbe30f..29c80bb3c8 100644
>> --- a/hw/ppc/spapr_iommu.c
>> +++ b/hw/ppc/spapr_iommu.c
>> @@ -79,15 +79,16 @@ static IOMMUAccessFlags spapr_tce_iommu_access_flags(uint64_t tce)
>>  
>>  static uint64_t *spapr_tce_alloc_table(uint32_t liobn,
>>                                         uint32_t page_shift,
>> +                                       uint64_t bus_offset,
>>                                         uint32_t nb_table,
>>                                         int *fd,
>>                                         bool need_vfio)
>>  {
>>      uint64_t *table = NULL;
>> -    uint64_t window_size = (uint64_t)nb_table << page_shift;
>>  
>> -    if (kvm_enabled() && !(window_size >> 32)) {
>> -        table = kvmppc_create_spapr_tce(liobn, window_size, fd, need_vfio);
>> +    if (kvm_enabled()) {
> 
> This is broken.  Previously, if we had a >4GiB window, we'd fall back
> to managing it in userspace, which would work, albeit slowly.  Now, if
> you have an older kernel which doesn't support KVM_CAP_SPAPR_TCE_64 it
> will attempt to allocate it in the kernel, and fail completely.


No, kvmppc_create_spapr_tce() would return NULL and right after that there
is a "if (!table)" (it can be seen at the end of this chunk) to handle the
failure.


> 
>> +        table = kvmppc_create_spapr_tce(liobn, page_shift, bus_offset, nb_table,
>> +                                        fd, need_vfio);
>>      }
>>  
>>      if (!table) {
>> @@ -342,6 +343,7 @@ void spapr_tce_table_enable(sPAPRTCETable *tcet,
>>      tcet->nb_table = nb_table;
>>      tcet->table = spapr_tce_alloc_table(tcet->liobn,
>>                                          tcet->page_shift,
>> +                                        tcet->bus_offset,
>>                                          tcet->nb_table,
>>                                          &tcet->fd,
>>                                          tcet->need_vfio);
>> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
>> index 9c4834c4fc..6e91a4d8bb 100644
>> --- a/target-ppc/kvm.c
>> +++ b/target-ppc/kvm.c
>> @@ -71,6 +71,7 @@ static int cap_booke_sregs;
>>  static int cap_ppc_smt;
>>  static int cap_ppc_rma;
>>  static int cap_spapr_tce;
>> +static int cap_spapr_tce_64;
>>  static int cap_spapr_multitce;
>>  static int cap_spapr_vfio;
>>  static int cap_hior;
>> @@ -123,6 +124,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>>      cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
>>      cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
>>      cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
>> +    cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
>>      cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
>>      cap_spapr_vfio = false;
>>      cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
>> @@ -2201,13 +2203,10 @@ bool kvmppc_spapr_use_multitce(void)
>>      return cap_spapr_multitce;
>>  }
>>  
>> -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
>> -                              bool need_vfio)
>> +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
>> +                              uint64_t bus_offset, uint32_t nb_table,
>> +                              int *pfd, bool need_vfio)
>>  {
>> -    struct kvm_create_spapr_tce args = {
>> -        .liobn = liobn,
>> -        .window_size = window_size,
>> -    };
>>      long len;
>>      int fd;
>>      void *table;
>> @@ -2220,14 +2219,41 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
>>          return NULL;
>>      }
>>  
>> -    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
>> -    if (fd < 0) {
>> -        fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
>> -                liobn);
>> +    if (cap_spapr_tce_64) {
>> +        struct kvm_create_spapr_tce_64 args = {
>> +            .liobn = liobn,
>> +            .page_shift = page_shift,
>> +            .offset = bus_offset >> page_shift,
>> +            .size = nb_table,
>> +            .flags = 0
>> +        };
>> +        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
>> +        if (fd < 0) {
>> +            fprintf(stderr,
>> +                    "KVM: Failed to create TCE64 table for liobn 0x%x\n",
>> +                    liobn);
>> +            return NULL;
>> +        }
>> +    } else if (cap_spapr_tce) {
>> +        uint64_t window_size = (uint64_t) nb_table << page_shift;
>> +        struct kvm_create_spapr_tce args = {
>> +            .liobn = liobn,
>> +            .window_size = window_size,
>> +        };
>> +        if ((window_size != args.window_size) || bus_offset) {
>> +            return NULL;
>> +        }
>> +        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
>> +        if (fd < 0) {
>> +            fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
>> +                    liobn);
>> +            return NULL;
>> +        }
>> +    } else {
>>          return NULL;
>>      }
>>  
>> -    len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
>> +    len = nb_table * sizeof(uint64_t);
>>      /* FIXME: round this up to page size */
>>  
>>      table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
>
David Gibson Jan. 9, 2017, 2:53 a.m. UTC | #3
On Mon, Jan 09, 2017 at 01:38:26PM +1100, Alexey Kardashevskiy wrote:
> On 03/01/17 13:26, David Gibson wrote:
> > On Thu, Dec 22, 2016 at 12:13:12PM +1100, Alexey Kardashevskiy wrote:
> >> KVM_CAP_SPAPR_TCE capability allows creating TCE tables in KVM which
> >> allows having in-kernel acceleration for H_PUT_TCE_xxx hypercalls.
> >> However it only supports 32bit DMA windows at zero bus offset.
> >>
> >> There is a new KVM_CAP_SPAPR_TCE_64 capability which supports 64bit
> >> window size, variable page size and bus offset.
> >>
> >> This makes use of the new capability. The kernel headers are already
> >> updated as the kernel support went in to v4.6.
> >>
> >> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> >> ---
> >>  target-ppc/kvm_ppc.h | 12 +++++++-----
> >>  hw/ppc/spapr_iommu.c |  8 +++++---
> >>  target-ppc/kvm.c     | 48 +++++++++++++++++++++++++++++++++++++-----------
> >>  3 files changed, 49 insertions(+), 19 deletions(-)
> >>
> >> diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
> >> index bd1d78bfbe..14320c2378 100644
> >> --- a/target-ppc/kvm_ppc.h
> >> +++ b/target-ppc/kvm_ppc.h
> >> @@ -36,8 +36,9 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu);
> >>  #ifndef CONFIG_USER_ONLY
> >>  off_t kvmppc_alloc_rma(void **rma);
> >>  bool kvmppc_spapr_use_multitce(void);
> >> -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
> >> -                              bool need_vfio);
> >> +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
> >> +                              uint64_t bus_offset, uint32_t nb_table,
> >> +                              int *pfd, bool need_vfio);
> >>  int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
> >>  int kvmppc_reset_htab(int shift_hint);
> >>  uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
> >> @@ -168,9 +169,10 @@ static inline bool kvmppc_spapr_use_multitce(void)
> >>      return false;
> >>  }
> >>  
> >> -static inline void *kvmppc_create_spapr_tce(uint32_t liobn,
> >> -                                            uint32_t window_size, int *fd,
> >> -                                            bool need_vfio)
> >> +static inline void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
> >> +                                            uint64_t bus_offset,
> >> +                                            uint32_t nb_table,
> >> +                                            int *pfd, bool need_vfio)
> >>  {
> >>      return NULL;
> >>  }
> >> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
> >> index ae30bbe30f..29c80bb3c8 100644
> >> --- a/hw/ppc/spapr_iommu.c
> >> +++ b/hw/ppc/spapr_iommu.c
> >> @@ -79,15 +79,16 @@ static IOMMUAccessFlags spapr_tce_iommu_access_flags(uint64_t tce)
> >>  
> >>  static uint64_t *spapr_tce_alloc_table(uint32_t liobn,
> >>                                         uint32_t page_shift,
> >> +                                       uint64_t bus_offset,
> >>                                         uint32_t nb_table,
> >>                                         int *fd,
> >>                                         bool need_vfio)
> >>  {
> >>      uint64_t *table = NULL;
> >> -    uint64_t window_size = (uint64_t)nb_table << page_shift;
> >>  
> >> -    if (kvm_enabled() && !(window_size >> 32)) {
> >> -        table = kvmppc_create_spapr_tce(liobn, window_size, fd, need_vfio);
> >> +    if (kvm_enabled()) {
> > 
> > This is broken.  Previously, if we had a >4GiB window, we'd fall back
> > to managing it in userspace, which would work, albeit slowly.  Now, if
> > you have an older kernel which doesn't support KVM_CAP_SPAPR_TCE_64 it
> > will attempt to allocate it in the kernel, and fail completely.
> 
> 
> No, kvmppc_create_spapr_tce() would return NULL and right after that there
> is a "if (!table)" (it can be seen at the end of this chunk) to handle the
> failure.

Oh, yes, sorry.  For some reason I thought there was a return in that
if block.

> > 
> >> +        table = kvmppc_create_spapr_tce(liobn, page_shift, bus_offset, nb_table,
> >> +                                        fd, need_vfio);
> >>      }
> >>  
> >>      if (!table) {
> >> @@ -342,6 +343,7 @@ void spapr_tce_table_enable(sPAPRTCETable *tcet,
> >>      tcet->nb_table = nb_table;
> >>      tcet->table = spapr_tce_alloc_table(tcet->liobn,
> >>                                          tcet->page_shift,
> >> +                                        tcet->bus_offset,
> >>                                          tcet->nb_table,
> >>                                          &tcet->fd,
> >>                                          tcet->need_vfio);
> >> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> >> index 9c4834c4fc..6e91a4d8bb 100644
> >> --- a/target-ppc/kvm.c
> >> +++ b/target-ppc/kvm.c
> >> @@ -71,6 +71,7 @@ static int cap_booke_sregs;
> >>  static int cap_ppc_smt;
> >>  static int cap_ppc_rma;
> >>  static int cap_spapr_tce;
> >> +static int cap_spapr_tce_64;
> >>  static int cap_spapr_multitce;
> >>  static int cap_spapr_vfio;
> >>  static int cap_hior;
> >> @@ -123,6 +124,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
> >>      cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
> >>      cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
> >>      cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
> >> +    cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
> >>      cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
> >>      cap_spapr_vfio = false;
> >>      cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
> >> @@ -2201,13 +2203,10 @@ bool kvmppc_spapr_use_multitce(void)
> >>      return cap_spapr_multitce;
> >>  }
> >>  
> >> -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
> >> -                              bool need_vfio)
> >> +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
> >> +                              uint64_t bus_offset, uint32_t nb_table,
> >> +                              int *pfd, bool need_vfio)
> >>  {
> >> -    struct kvm_create_spapr_tce args = {
> >> -        .liobn = liobn,
> >> -        .window_size = window_size,
> >> -    };
> >>      long len;
> >>      int fd;
> >>      void *table;
> >> @@ -2220,14 +2219,41 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
> >>          return NULL;
> >>      }
> >>  
> >> -    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
> >> -    if (fd < 0) {
> >> -        fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
> >> -                liobn);
> >> +    if (cap_spapr_tce_64) {
> >> +        struct kvm_create_spapr_tce_64 args = {
> >> +            .liobn = liobn,
> >> +            .page_shift = page_shift,
> >> +            .offset = bus_offset >> page_shift,
> >> +            .size = nb_table,
> >> +            .flags = 0
> >> +        };
> >> +        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
> >> +        if (fd < 0) {
> >> +            fprintf(stderr,
> >> +                    "KVM: Failed to create TCE64 table for liobn 0x%x\n",
> >> +                    liobn);
> >> +            return NULL;
> >> +        }
> >> +    } else if (cap_spapr_tce) {
> >> +        uint64_t window_size = (uint64_t) nb_table << page_shift;
> >> +        struct kvm_create_spapr_tce args = {
> >> +            .liobn = liobn,
> >> +            .window_size = window_size,
> >> +        };
> >> +        if ((window_size != args.window_size) || bus_offset) {
> >> +            return NULL;
> >> +        }
> >> +        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
> >> +        if (fd < 0) {
> >> +            fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
> >> +                    liobn);
> >> +            return NULL;
> >> +        }
> >> +    } else {
> >>          return NULL;
> >>      }
> >>  
> >> -    len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
> >> +    len = nb_table * sizeof(uint64_t);
> >>      /* FIXME: round this up to page size */
> >>  
> >>      table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
> > 
> 
>
Alexey Kardashevskiy March 2, 2017, 2:40 a.m. UTC | #4
On 09/01/17 13:53, David Gibson wrote:
> On Mon, Jan 09, 2017 at 01:38:26PM +1100, Alexey Kardashevskiy wrote:
>> On 03/01/17 13:26, David Gibson wrote:
>>> On Thu, Dec 22, 2016 at 12:13:12PM +1100, Alexey Kardashevskiy wrote:
>>>> KVM_CAP_SPAPR_TCE capability allows creating TCE tables in KVM which
>>>> allows having in-kernel acceleration for H_PUT_TCE_xxx hypercalls.
>>>> However it only supports 32bit DMA windows at zero bus offset.
>>>>
>>>> There is a new KVM_CAP_SPAPR_TCE_64 capability which supports 64bit
>>>> window size, variable page size and bus offset.
>>>>
>>>> This makes use of the new capability. The kernel headers are already
>>>> updated as the kernel support went in to v4.6.
>>>>
>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>>> ---
>>>>  target-ppc/kvm_ppc.h | 12 +++++++-----
>>>>  hw/ppc/spapr_iommu.c |  8 +++++---
>>>>  target-ppc/kvm.c     | 48 +++++++++++++++++++++++++++++++++++++-----------
>>>>  3 files changed, 49 insertions(+), 19 deletions(-)
>>>>
>>>> diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
>>>> index bd1d78bfbe..14320c2378 100644
>>>> --- a/target-ppc/kvm_ppc.h
>>>> +++ b/target-ppc/kvm_ppc.h
>>>> @@ -36,8 +36,9 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu);
>>>>  #ifndef CONFIG_USER_ONLY
>>>>  off_t kvmppc_alloc_rma(void **rma);
>>>>  bool kvmppc_spapr_use_multitce(void);
>>>> -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
>>>> -                              bool need_vfio);
>>>> +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
>>>> +                              uint64_t bus_offset, uint32_t nb_table,
>>>> +                              int *pfd, bool need_vfio);
>>>>  int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
>>>>  int kvmppc_reset_htab(int shift_hint);
>>>>  uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
>>>> @@ -168,9 +169,10 @@ static inline bool kvmppc_spapr_use_multitce(void)
>>>>      return false;
>>>>  }
>>>>  
>>>> -static inline void *kvmppc_create_spapr_tce(uint32_t liobn,
>>>> -                                            uint32_t window_size, int *fd,
>>>> -                                            bool need_vfio)
>>>> +static inline void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
>>>> +                                            uint64_t bus_offset,
>>>> +                                            uint32_t nb_table,
>>>> +                                            int *pfd, bool need_vfio)
>>>>  {
>>>>      return NULL;
>>>>  }
>>>> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
>>>> index ae30bbe30f..29c80bb3c8 100644
>>>> --- a/hw/ppc/spapr_iommu.c
>>>> +++ b/hw/ppc/spapr_iommu.c
>>>> @@ -79,15 +79,16 @@ static IOMMUAccessFlags spapr_tce_iommu_access_flags(uint64_t tce)
>>>>  
>>>>  static uint64_t *spapr_tce_alloc_table(uint32_t liobn,
>>>>                                         uint32_t page_shift,
>>>> +                                       uint64_t bus_offset,
>>>>                                         uint32_t nb_table,
>>>>                                         int *fd,
>>>>                                         bool need_vfio)
>>>>  {
>>>>      uint64_t *table = NULL;
>>>> -    uint64_t window_size = (uint64_t)nb_table << page_shift;
>>>>  
>>>> -    if (kvm_enabled() && !(window_size >> 32)) {
>>>> -        table = kvmppc_create_spapr_tce(liobn, window_size, fd, need_vfio);
>>>> +    if (kvm_enabled()) {
>>>
>>> This is broken.  Previously, if we had a >4GiB window, we'd fall back
>>> to managing it in userspace, which would work, albeit slowly.  Now, if
>>> you have an older kernel which doesn't support KVM_CAP_SPAPR_TCE_64 it
>>> will attempt to allocate it in the kernel, and fail completely.
>>
>>
>> No, kvmppc_create_spapr_tce() would return NULL and right after that there
>> is a "if (!table)" (it can be seen at the end of this chunk) to handle the
>> failure.
> 
> Oh, yes, sorry.  For some reason I thought there was a return in that
> if block.



This was the only comment which turned out to be not a bug, what now? :)


> 
>>>
>>>> +        table = kvmppc_create_spapr_tce(liobn, page_shift, bus_offset, nb_table,
>>>> +                                        fd, need_vfio);
>>>>      }
>>>>  
>>>>      if (!table) {
>>>> @@ -342,6 +343,7 @@ void spapr_tce_table_enable(sPAPRTCETable *tcet,
>>>>      tcet->nb_table = nb_table;
>>>>      tcet->table = spapr_tce_alloc_table(tcet->liobn,
>>>>                                          tcet->page_shift,
>>>> +                                        tcet->bus_offset,
>>>>                                          tcet->nb_table,
>>>>                                          &tcet->fd,
>>>>                                          tcet->need_vfio);
>>>> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
>>>> index 9c4834c4fc..6e91a4d8bb 100644
>>>> --- a/target-ppc/kvm.c
>>>> +++ b/target-ppc/kvm.c
>>>> @@ -71,6 +71,7 @@ static int cap_booke_sregs;
>>>>  static int cap_ppc_smt;
>>>>  static int cap_ppc_rma;
>>>>  static int cap_spapr_tce;
>>>> +static int cap_spapr_tce_64;
>>>>  static int cap_spapr_multitce;
>>>>  static int cap_spapr_vfio;
>>>>  static int cap_hior;
>>>> @@ -123,6 +124,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>>>>      cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
>>>>      cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
>>>>      cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
>>>> +    cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
>>>>      cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
>>>>      cap_spapr_vfio = false;
>>>>      cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
>>>> @@ -2201,13 +2203,10 @@ bool kvmppc_spapr_use_multitce(void)
>>>>      return cap_spapr_multitce;
>>>>  }
>>>>  
>>>> -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
>>>> -                              bool need_vfio)
>>>> +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
>>>> +                              uint64_t bus_offset, uint32_t nb_table,
>>>> +                              int *pfd, bool need_vfio)
>>>>  {
>>>> -    struct kvm_create_spapr_tce args = {
>>>> -        .liobn = liobn,
>>>> -        .window_size = window_size,
>>>> -    };
>>>>      long len;
>>>>      int fd;
>>>>      void *table;
>>>> @@ -2220,14 +2219,41 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
>>>>          return NULL;
>>>>      }
>>>>  
>>>> -    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
>>>> -    if (fd < 0) {
>>>> -        fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
>>>> -                liobn);
>>>> +    if (cap_spapr_tce_64) {
>>>> +        struct kvm_create_spapr_tce_64 args = {
>>>> +            .liobn = liobn,
>>>> +            .page_shift = page_shift,
>>>> +            .offset = bus_offset >> page_shift,
>>>> +            .size = nb_table,
>>>> +            .flags = 0
>>>> +        };
>>>> +        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
>>>> +        if (fd < 0) {
>>>> +            fprintf(stderr,
>>>> +                    "KVM: Failed to create TCE64 table for liobn 0x%x\n",
>>>> +                    liobn);
>>>> +            return NULL;
>>>> +        }
>>>> +    } else if (cap_spapr_tce) {
>>>> +        uint64_t window_size = (uint64_t) nb_table << page_shift;
>>>> +        struct kvm_create_spapr_tce args = {
>>>> +            .liobn = liobn,
>>>> +            .window_size = window_size,
>>>> +        };
>>>> +        if ((window_size != args.window_size) || bus_offset) {
>>>> +            return NULL;
>>>> +        }
>>>> +        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
>>>> +        if (fd < 0) {
>>>> +            fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
>>>> +                    liobn);
>>>> +            return NULL;
>>>> +        }
>>>> +    } else {
>>>>          return NULL;
>>>>      }
>>>>  
>>>> -    len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
>>>> +    len = nb_table * sizeof(uint64_t);
>>>>      /* FIXME: round this up to page size */
>>>>  
>>>>      table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
>>>
>>
>>
> 
> 
> 
>
Alexey Kardashevskiy March 8, 2017, 4:48 a.m. UTC | #5
Ping? It is not urgent, just wanted to make sure that it is not forgotten
and I do not need to do anything to update it. Thanks.


On 02/03/17 13:40, Alexey Kardashevskiy wrote:
> On 09/01/17 13:53, David Gibson wrote:
>> On Mon, Jan 09, 2017 at 01:38:26PM +1100, Alexey Kardashevskiy wrote:
>>> On 03/01/17 13:26, David Gibson wrote:
>>>> On Thu, Dec 22, 2016 at 12:13:12PM +1100, Alexey Kardashevskiy wrote:
>>>>> KVM_CAP_SPAPR_TCE capability allows creating TCE tables in KVM which
>>>>> allows having in-kernel acceleration for H_PUT_TCE_xxx hypercalls.
>>>>> However it only supports 32bit DMA windows at zero bus offset.
>>>>>
>>>>> There is a new KVM_CAP_SPAPR_TCE_64 capability which supports 64bit
>>>>> window size, variable page size and bus offset.
>>>>>
>>>>> This makes use of the new capability. The kernel headers are already
>>>>> updated as the kernel support went in to v4.6.
>>>>>
>>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>>>> ---
>>>>>  target-ppc/kvm_ppc.h | 12 +++++++-----
>>>>>  hw/ppc/spapr_iommu.c |  8 +++++---
>>>>>  target-ppc/kvm.c     | 48 +++++++++++++++++++++++++++++++++++++-----------
>>>>>  3 files changed, 49 insertions(+), 19 deletions(-)
>>>>>
>>>>> diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
>>>>> index bd1d78bfbe..14320c2378 100644
>>>>> --- a/target-ppc/kvm_ppc.h
>>>>> +++ b/target-ppc/kvm_ppc.h
>>>>> @@ -36,8 +36,9 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu);
>>>>>  #ifndef CONFIG_USER_ONLY
>>>>>  off_t kvmppc_alloc_rma(void **rma);
>>>>>  bool kvmppc_spapr_use_multitce(void);
>>>>> -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
>>>>> -                              bool need_vfio);
>>>>> +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
>>>>> +                              uint64_t bus_offset, uint32_t nb_table,
>>>>> +                              int *pfd, bool need_vfio);
>>>>>  int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
>>>>>  int kvmppc_reset_htab(int shift_hint);
>>>>>  uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
>>>>> @@ -168,9 +169,10 @@ static inline bool kvmppc_spapr_use_multitce(void)
>>>>>      return false;
>>>>>  }
>>>>>  
>>>>> -static inline void *kvmppc_create_spapr_tce(uint32_t liobn,
>>>>> -                                            uint32_t window_size, int *fd,
>>>>> -                                            bool need_vfio)
>>>>> +static inline void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
>>>>> +                                            uint64_t bus_offset,
>>>>> +                                            uint32_t nb_table,
>>>>> +                                            int *pfd, bool need_vfio)
>>>>>  {
>>>>>      return NULL;
>>>>>  }
>>>>> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
>>>>> index ae30bbe30f..29c80bb3c8 100644
>>>>> --- a/hw/ppc/spapr_iommu.c
>>>>> +++ b/hw/ppc/spapr_iommu.c
>>>>> @@ -79,15 +79,16 @@ static IOMMUAccessFlags spapr_tce_iommu_access_flags(uint64_t tce)
>>>>>  
>>>>>  static uint64_t *spapr_tce_alloc_table(uint32_t liobn,
>>>>>                                         uint32_t page_shift,
>>>>> +                                       uint64_t bus_offset,
>>>>>                                         uint32_t nb_table,
>>>>>                                         int *fd,
>>>>>                                         bool need_vfio)
>>>>>  {
>>>>>      uint64_t *table = NULL;
>>>>> -    uint64_t window_size = (uint64_t)nb_table << page_shift;
>>>>>  
>>>>> -    if (kvm_enabled() && !(window_size >> 32)) {
>>>>> -        table = kvmppc_create_spapr_tce(liobn, window_size, fd, need_vfio);
>>>>> +    if (kvm_enabled()) {
>>>>
>>>> This is broken.  Previously, if we had a >4GiB window, we'd fall back
>>>> to managing it in userspace, which would work, albeit slowly.  Now, if
>>>> you have an older kernel which doesn't support KVM_CAP_SPAPR_TCE_64 it
>>>> will attempt to allocate it in the kernel, and fail completely.
>>>
>>>
>>> No, kvmppc_create_spapr_tce() would return NULL and right after that there
>>> is a "if (!table)" (it can be seen at the end of this chunk) to handle the
>>> failure.
>>
>> Oh, yes, sorry.  For some reason I thought there was a return in that
>> if block.
> 
> 
> 
> This was the only comment which turned out to be not a bug, what now? :)
> 
> 
>>
>>>>
>>>>> +        table = kvmppc_create_spapr_tce(liobn, page_shift, bus_offset, nb_table,
>>>>> +                                        fd, need_vfio);
>>>>>      }
>>>>>  
>>>>>      if (!table) {
>>>>> @@ -342,6 +343,7 @@ void spapr_tce_table_enable(sPAPRTCETable *tcet,
>>>>>      tcet->nb_table = nb_table;
>>>>>      tcet->table = spapr_tce_alloc_table(tcet->liobn,
>>>>>                                          tcet->page_shift,
>>>>> +                                        tcet->bus_offset,
>>>>>                                          tcet->nb_table,
>>>>>                                          &tcet->fd,
>>>>>                                          tcet->need_vfio);
>>>>> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
>>>>> index 9c4834c4fc..6e91a4d8bb 100644
>>>>> --- a/target-ppc/kvm.c
>>>>> +++ b/target-ppc/kvm.c
>>>>> @@ -71,6 +71,7 @@ static int cap_booke_sregs;
>>>>>  static int cap_ppc_smt;
>>>>>  static int cap_ppc_rma;
>>>>>  static int cap_spapr_tce;
>>>>> +static int cap_spapr_tce_64;
>>>>>  static int cap_spapr_multitce;
>>>>>  static int cap_spapr_vfio;
>>>>>  static int cap_hior;
>>>>> @@ -123,6 +124,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>>>>>      cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
>>>>>      cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
>>>>>      cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
>>>>> +    cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
>>>>>      cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
>>>>>      cap_spapr_vfio = false;
>>>>>      cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
>>>>> @@ -2201,13 +2203,10 @@ bool kvmppc_spapr_use_multitce(void)
>>>>>      return cap_spapr_multitce;
>>>>>  }
>>>>>  
>>>>> -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
>>>>> -                              bool need_vfio)
>>>>> +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
>>>>> +                              uint64_t bus_offset, uint32_t nb_table,
>>>>> +                              int *pfd, bool need_vfio)
>>>>>  {
>>>>> -    struct kvm_create_spapr_tce args = {
>>>>> -        .liobn = liobn,
>>>>> -        .window_size = window_size,
>>>>> -    };
>>>>>      long len;
>>>>>      int fd;
>>>>>      void *table;
>>>>> @@ -2220,14 +2219,41 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
>>>>>          return NULL;
>>>>>      }
>>>>>  
>>>>> -    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
>>>>> -    if (fd < 0) {
>>>>> -        fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
>>>>> -                liobn);
>>>>> +    if (cap_spapr_tce_64) {
>>>>> +        struct kvm_create_spapr_tce_64 args = {
>>>>> +            .liobn = liobn,
>>>>> +            .page_shift = page_shift,
>>>>> +            .offset = bus_offset >> page_shift,
>>>>> +            .size = nb_table,
>>>>> +            .flags = 0
>>>>> +        };
>>>>> +        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
>>>>> +        if (fd < 0) {
>>>>> +            fprintf(stderr,
>>>>> +                    "KVM: Failed to create TCE64 table for liobn 0x%x\n",
>>>>> +                    liobn);
>>>>> +            return NULL;
>>>>> +        }
>>>>> +    } else if (cap_spapr_tce) {
>>>>> +        uint64_t window_size = (uint64_t) nb_table << page_shift;
>>>>> +        struct kvm_create_spapr_tce args = {
>>>>> +            .liobn = liobn,
>>>>> +            .window_size = window_size,
>>>>> +        };
>>>>> +        if ((window_size != args.window_size) || bus_offset) {
>>>>> +            return NULL;
>>>>> +        }
>>>>> +        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
>>>>> +        if (fd < 0) {
>>>>> +            fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
>>>>> +                    liobn);
>>>>> +            return NULL;
>>>>> +        }
>>>>> +    } else {
>>>>>          return NULL;
>>>>>      }
>>>>>  
>>>>> -    len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
>>>>> +    len = nb_table * sizeof(uint64_t);
>>>>>      /* FIXME: round this up to page size */
>>>>>  
>>>>>      table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
>>>>
>>>
>>>
>>
>>
>>
>>
> 
>
David Gibson March 10, 2017, 12:32 a.m. UTC | #6
On Wed, Mar 08, 2017 at 03:48:49PM +1100, Alexey Kardashevskiy wrote:
> Ping? It is not urgent, just wanted to make sure that it is not forgotten
> and I do not need to do anything to update it. Thanks.

Ah.. sorry, I lost track of this one (and confused it with the kernel
series for VFIO acceleration).  Can you please resend the latest
version.

> On 02/03/17 13:40, Alexey Kardashevskiy wrote:
> > On 09/01/17 13:53, David Gibson wrote:
> >> On Mon, Jan 09, 2017 at 01:38:26PM +1100, Alexey Kardashevskiy wrote:
> >>> On 03/01/17 13:26, David Gibson wrote:
> >>>> On Thu, Dec 22, 2016 at 12:13:12PM +1100, Alexey Kardashevskiy wrote:
> >>>>> KVM_CAP_SPAPR_TCE capability allows creating TCE tables in KVM which
> >>>>> allows having in-kernel acceleration for H_PUT_TCE_xxx hypercalls.
> >>>>> However it only supports 32bit DMA windows at zero bus offset.
> >>>>>
> >>>>> There is a new KVM_CAP_SPAPR_TCE_64 capability which supports 64bit
> >>>>> window size, variable page size and bus offset.
> >>>>>
> >>>>> This makes use of the new capability. The kernel headers are already
> >>>>> updated as the kernel support went in to v4.6.
> >>>>>
> >>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> >>>>> ---
> >>>>>  target-ppc/kvm_ppc.h | 12 +++++++-----
> >>>>>  hw/ppc/spapr_iommu.c |  8 +++++---
> >>>>>  target-ppc/kvm.c     | 48 +++++++++++++++++++++++++++++++++++++-----------
> >>>>>  3 files changed, 49 insertions(+), 19 deletions(-)
> >>>>>
> >>>>> diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
> >>>>> index bd1d78bfbe..14320c2378 100644
> >>>>> --- a/target-ppc/kvm_ppc.h
> >>>>> +++ b/target-ppc/kvm_ppc.h
> >>>>> @@ -36,8 +36,9 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu);
> >>>>>  #ifndef CONFIG_USER_ONLY
> >>>>>  off_t kvmppc_alloc_rma(void **rma);
> >>>>>  bool kvmppc_spapr_use_multitce(void);
> >>>>> -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
> >>>>> -                              bool need_vfio);
> >>>>> +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
> >>>>> +                              uint64_t bus_offset, uint32_t nb_table,
> >>>>> +                              int *pfd, bool need_vfio);
> >>>>>  int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
> >>>>>  int kvmppc_reset_htab(int shift_hint);
> >>>>>  uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
> >>>>> @@ -168,9 +169,10 @@ static inline bool kvmppc_spapr_use_multitce(void)
> >>>>>      return false;
> >>>>>  }
> >>>>>  
> >>>>> -static inline void *kvmppc_create_spapr_tce(uint32_t liobn,
> >>>>> -                                            uint32_t window_size, int *fd,
> >>>>> -                                            bool need_vfio)
> >>>>> +static inline void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
> >>>>> +                                            uint64_t bus_offset,
> >>>>> +                                            uint32_t nb_table,
> >>>>> +                                            int *pfd, bool need_vfio)
> >>>>>  {
> >>>>>      return NULL;
> >>>>>  }
> >>>>> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
> >>>>> index ae30bbe30f..29c80bb3c8 100644
> >>>>> --- a/hw/ppc/spapr_iommu.c
> >>>>> +++ b/hw/ppc/spapr_iommu.c
> >>>>> @@ -79,15 +79,16 @@ static IOMMUAccessFlags spapr_tce_iommu_access_flags(uint64_t tce)
> >>>>>  
> >>>>>  static uint64_t *spapr_tce_alloc_table(uint32_t liobn,
> >>>>>                                         uint32_t page_shift,
> >>>>> +                                       uint64_t bus_offset,
> >>>>>                                         uint32_t nb_table,
> >>>>>                                         int *fd,
> >>>>>                                         bool need_vfio)
> >>>>>  {
> >>>>>      uint64_t *table = NULL;
> >>>>> -    uint64_t window_size = (uint64_t)nb_table << page_shift;
> >>>>>  
> >>>>> -    if (kvm_enabled() && !(window_size >> 32)) {
> >>>>> -        table = kvmppc_create_spapr_tce(liobn, window_size, fd, need_vfio);
> >>>>> +    if (kvm_enabled()) {
> >>>>
> >>>> This is broken.  Previously, if we had a >4GiB window, we'd fall back
> >>>> to managing it in userspace, which would work, albeit slowly.  Now, if
> >>>> you have an older kernel which doesn't support KVM_CAP_SPAPR_TCE_64 it
> >>>> will attempt to allocate it in the kernel, and fail completely.
> >>>
> >>>
> >>> No, kvmppc_create_spapr_tce() would return NULL and right after that there
> >>> is a "if (!table)" (it can be seen at the end of this chunk) to handle the
> >>> failure.
> >>
> >> Oh, yes, sorry.  For some reason I thought there was a return in that
> >> if block.
> > 
> > 
> > 
> > This was the only comment which turned out to be not a bug, what now? :)
> > 
> > 
> >>
> >>>>
> >>>>> +        table = kvmppc_create_spapr_tce(liobn, page_shift, bus_offset, nb_table,
> >>>>> +                                        fd, need_vfio);
> >>>>>      }
> >>>>>  
> >>>>>      if (!table) {
> >>>>> @@ -342,6 +343,7 @@ void spapr_tce_table_enable(sPAPRTCETable *tcet,
> >>>>>      tcet->nb_table = nb_table;
> >>>>>      tcet->table = spapr_tce_alloc_table(tcet->liobn,
> >>>>>                                          tcet->page_shift,
> >>>>> +                                        tcet->bus_offset,
> >>>>>                                          tcet->nb_table,
> >>>>>                                          &tcet->fd,
> >>>>>                                          tcet->need_vfio);
> >>>>> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> >>>>> index 9c4834c4fc..6e91a4d8bb 100644
> >>>>> --- a/target-ppc/kvm.c
> >>>>> +++ b/target-ppc/kvm.c
> >>>>> @@ -71,6 +71,7 @@ static int cap_booke_sregs;
> >>>>>  static int cap_ppc_smt;
> >>>>>  static int cap_ppc_rma;
> >>>>>  static int cap_spapr_tce;
> >>>>> +static int cap_spapr_tce_64;
> >>>>>  static int cap_spapr_multitce;
> >>>>>  static int cap_spapr_vfio;
> >>>>>  static int cap_hior;
> >>>>> @@ -123,6 +124,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
> >>>>>      cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
> >>>>>      cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
> >>>>>      cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
> >>>>> +    cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
> >>>>>      cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
> >>>>>      cap_spapr_vfio = false;
> >>>>>      cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
> >>>>> @@ -2201,13 +2203,10 @@ bool kvmppc_spapr_use_multitce(void)
> >>>>>      return cap_spapr_multitce;
> >>>>>  }
> >>>>>  
> >>>>> -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
> >>>>> -                              bool need_vfio)
> >>>>> +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
> >>>>> +                              uint64_t bus_offset, uint32_t nb_table,
> >>>>> +                              int *pfd, bool need_vfio)
> >>>>>  {
> >>>>> -    struct kvm_create_spapr_tce args = {
> >>>>> -        .liobn = liobn,
> >>>>> -        .window_size = window_size,
> >>>>> -    };
> >>>>>      long len;
> >>>>>      int fd;
> >>>>>      void *table;
> >>>>> @@ -2220,14 +2219,41 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
> >>>>>          return NULL;
> >>>>>      }
> >>>>>  
> >>>>> -    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
> >>>>> -    if (fd < 0) {
> >>>>> -        fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
> >>>>> -                liobn);
> >>>>> +    if (cap_spapr_tce_64) {
> >>>>> +        struct kvm_create_spapr_tce_64 args = {
> >>>>> +            .liobn = liobn,
> >>>>> +            .page_shift = page_shift,
> >>>>> +            .offset = bus_offset >> page_shift,
> >>>>> +            .size = nb_table,
> >>>>> +            .flags = 0
> >>>>> +        };
> >>>>> +        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
> >>>>> +        if (fd < 0) {
> >>>>> +            fprintf(stderr,
> >>>>> +                    "KVM: Failed to create TCE64 table for liobn 0x%x\n",
> >>>>> +                    liobn);
> >>>>> +            return NULL;
> >>>>> +        }
> >>>>> +    } else if (cap_spapr_tce) {
> >>>>> +        uint64_t window_size = (uint64_t) nb_table << page_shift;
> >>>>> +        struct kvm_create_spapr_tce args = {
> >>>>> +            .liobn = liobn,
> >>>>> +            .window_size = window_size,
> >>>>> +        };
> >>>>> +        if ((window_size != args.window_size) || bus_offset) {
> >>>>> +            return NULL;
> >>>>> +        }
> >>>>> +        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
> >>>>> +        if (fd < 0) {
> >>>>> +            fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
> >>>>> +                    liobn);
> >>>>> +            return NULL;
> >>>>> +        }
> >>>>> +    } else {
> >>>>>          return NULL;
> >>>>>      }
> >>>>>  
> >>>>> -    len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
> >>>>> +    len = nb_table * sizeof(uint64_t);
> >>>>>      /* FIXME: round this up to page size */
> >>>>>  
> >>>>>      table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
> >>>>
> >>>
> >>>
> >>
> >>
> >>
> >>
> > 
> > 
> 
>
diff mbox

Patch

diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index bd1d78bfbe..14320c2378 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -36,8 +36,9 @@  int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu);
 #ifndef CONFIG_USER_ONLY
 off_t kvmppc_alloc_rma(void **rma);
 bool kvmppc_spapr_use_multitce(void);
-void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
-                              bool need_vfio);
+void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
+                              uint64_t bus_offset, uint32_t nb_table,
+                              int *pfd, bool need_vfio);
 int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
 int kvmppc_reset_htab(int shift_hint);
 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
@@ -168,9 +169,10 @@  static inline bool kvmppc_spapr_use_multitce(void)
     return false;
 }
 
-static inline void *kvmppc_create_spapr_tce(uint32_t liobn,
-                                            uint32_t window_size, int *fd,
-                                            bool need_vfio)
+static inline void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
+                                            uint64_t bus_offset,
+                                            uint32_t nb_table,
+                                            int *pfd, bool need_vfio)
 {
     return NULL;
 }
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index ae30bbe30f..29c80bb3c8 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -79,15 +79,16 @@  static IOMMUAccessFlags spapr_tce_iommu_access_flags(uint64_t tce)
 
 static uint64_t *spapr_tce_alloc_table(uint32_t liobn,
                                        uint32_t page_shift,
+                                       uint64_t bus_offset,
                                        uint32_t nb_table,
                                        int *fd,
                                        bool need_vfio)
 {
     uint64_t *table = NULL;
-    uint64_t window_size = (uint64_t)nb_table << page_shift;
 
-    if (kvm_enabled() && !(window_size >> 32)) {
-        table = kvmppc_create_spapr_tce(liobn, window_size, fd, need_vfio);
+    if (kvm_enabled()) {
+        table = kvmppc_create_spapr_tce(liobn, page_shift, bus_offset, nb_table,
+                                        fd, need_vfio);
     }
 
     if (!table) {
@@ -342,6 +343,7 @@  void spapr_tce_table_enable(sPAPRTCETable *tcet,
     tcet->nb_table = nb_table;
     tcet->table = spapr_tce_alloc_table(tcet->liobn,
                                         tcet->page_shift,
+                                        tcet->bus_offset,
                                         tcet->nb_table,
                                         &tcet->fd,
                                         tcet->need_vfio);
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 9c4834c4fc..6e91a4d8bb 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -71,6 +71,7 @@  static int cap_booke_sregs;
 static int cap_ppc_smt;
 static int cap_ppc_rma;
 static int cap_spapr_tce;
+static int cap_spapr_tce_64;
 static int cap_spapr_multitce;
 static int cap_spapr_vfio;
 static int cap_hior;
@@ -123,6 +124,7 @@  int kvm_arch_init(MachineState *ms, KVMState *s)
     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
+    cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
     cap_spapr_vfio = false;
     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
@@ -2201,13 +2203,10 @@  bool kvmppc_spapr_use_multitce(void)
     return cap_spapr_multitce;
 }
 
-void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
-                              bool need_vfio)
+void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
+                              uint64_t bus_offset, uint32_t nb_table,
+                              int *pfd, bool need_vfio)
 {
-    struct kvm_create_spapr_tce args = {
-        .liobn = liobn,
-        .window_size = window_size,
-    };
     long len;
     int fd;
     void *table;
@@ -2220,14 +2219,41 @@  void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
         return NULL;
     }
 
-    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
-    if (fd < 0) {
-        fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
-                liobn);
+    if (cap_spapr_tce_64) {
+        struct kvm_create_spapr_tce_64 args = {
+            .liobn = liobn,
+            .page_shift = page_shift,
+            .offset = bus_offset >> page_shift,
+            .size = nb_table,
+            .flags = 0
+        };
+        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
+        if (fd < 0) {
+            fprintf(stderr,
+                    "KVM: Failed to create TCE64 table for liobn 0x%x\n",
+                    liobn);
+            return NULL;
+        }
+    } else if (cap_spapr_tce) {
+        uint64_t window_size = (uint64_t) nb_table << page_shift;
+        struct kvm_create_spapr_tce args = {
+            .liobn = liobn,
+            .window_size = window_size,
+        };
+        if ((window_size != args.window_size) || bus_offset) {
+            return NULL;
+        }
+        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
+        if (fd < 0) {
+            fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
+                    liobn);
+            return NULL;
+        }
+    } else {
         return NULL;
     }
 
-    len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
+    len = nb_table * sizeof(uint64_t);
     /* FIXME: round this up to page size */
 
     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);