diff mbox series

[v2,02/13] spapr/xive: add hcall support when under KVM

Message ID 20190222131322.26079-3-clg@kaod.org
State New
Headers show
Series spapr: add KVM support to the XIVE interrupt mode | expand

Commit Message

Cédric Le Goater Feb. 22, 2019, 1:13 p.m. UTC
XIVE hcalls are all redirected to QEMU as none are on a fast path.
When necessary, QEMU invokes KVM through specific ioctls to perform
host operations. QEMU should have done the necessary checks before
calling KVM and, in case of failure, H_HARDWARE is simply returned.

H_INT_ESB is a special case that could have been handled under KVM
but the impact on performance was low when under QEMU. Here are some
figures :

    kernel irqchip      OFF          ON
    H_INT_ESB                    KVM   QEMU

    rtl8139 (LSI )      1.19     1.24  1.23  Gbits/sec
    virtio             31.80    42.30   --   Gbits/sec

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 include/hw/ppc/spapr_xive.h |  15 +++
 hw/intc/spapr_xive.c        |  87 +++++++++++++++--
 hw/intc/spapr_xive_kvm.c    | 184 ++++++++++++++++++++++++++++++++++++
 3 files changed, 278 insertions(+), 8 deletions(-)

Comments

David Gibson Feb. 25, 2019, 11:22 p.m. UTC | #1
On Fri, Feb 22, 2019 at 02:13:11PM +0100, Cédric Le Goater wrote:
> XIVE hcalls are all redirected to QEMU as none are on a fast path.
> When necessary, QEMU invokes KVM through specific ioctls to perform
> host operations. QEMU should have done the necessary checks before
> calling KVM and, in case of failure, H_HARDWARE is simply returned.
> 
> H_INT_ESB is a special case that could have been handled under KVM
> but the impact on performance was low when under QEMU. Here are some
> figures :
> 
>     kernel irqchip      OFF          ON
>     H_INT_ESB                    KVM   QEMU
> 
>     rtl8139 (LSI )      1.19     1.24  1.23  Gbits/sec
>     virtio             31.80    42.30   --   Gbits/sec
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  include/hw/ppc/spapr_xive.h |  15 +++
>  hw/intc/spapr_xive.c        |  87 +++++++++++++++--
>  hw/intc/spapr_xive_kvm.c    | 184 ++++++++++++++++++++++++++++++++++++
>  3 files changed, 278 insertions(+), 8 deletions(-)
> 
> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> index ab6732b14a02..749c6cbc2c56 100644
> --- a/include/hw/ppc/spapr_xive.h
> +++ b/include/hw/ppc/spapr_xive.h
> @@ -55,9 +55,24 @@ void spapr_xive_set_tctx_os_cam(XiveTCTX *tctx);
>  void spapr_xive_mmio_set_enabled(sPAPRXive *xive, bool enable);
>  void spapr_xive_map_mmio(sPAPRXive *xive);
>  
> +int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx,
> +                             uint32_t *out_server, uint8_t *out_prio);
> +
>  /*
>   * KVM XIVE device helpers
>   */
>  void kvmppc_xive_connect(sPAPRXive *xive, Error **errp);
> +void kvmppc_xive_reset(sPAPRXive *xive, Error **errp);
> +void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas,
> +                                   Error **errp);
> +void kvmppc_xive_sync_source(sPAPRXive *xive, uint32_t lisn, Error **errp);
> +uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
> +                            uint64_t data, bool write);
> +void kvmppc_xive_set_queue_config(sPAPRXive *xive, uint8_t end_blk,
> +                                 uint32_t end_idx, XiveEND *end,
> +                                 Error **errp);
> +void kvmppc_xive_get_queue_config(sPAPRXive *xive, uint8_t end_blk,
> +                                 uint32_t end_idx, XiveEND *end,
> +                                 Error **errp);
>  
>  #endif /* PPC_SPAPR_XIVE_H */
> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> index c24d649e3668..3db24391e31c 100644
> --- a/hw/intc/spapr_xive.c
> +++ b/hw/intc/spapr_xive.c
> @@ -86,6 +86,19 @@ static int spapr_xive_target_to_nvt(uint32_t target,
>   * sPAPR END indexing uses a simple mapping of the CPU vcpu_id, 8
>   * priorities per CPU
>   */
> +int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx,
> +                             uint32_t *out_server, uint8_t *out_prio)
> +{

Since you don't support irq blocks as yet, should this error out
rather than ignoring if end_blk != 0?

> +    if (out_server) {
> +        *out_server = end_idx >> 3;
> +    }
> +
> +    if (out_prio) {
> +        *out_prio = end_idx & 0x7;
> +    }
> +    return 0;
> +}
> +
>  static void spapr_xive_cpu_to_end(PowerPCCPU *cpu, uint8_t prio,
>                                    uint8_t *out_end_blk, uint32_t *out_end_idx)
>  {
> @@ -792,6 +805,16 @@ static target_ulong h_int_set_source_config(PowerPCCPU *cpu,
>          new_eas.w = xive_set_field64(EAS_END_DATA, new_eas.w, eisn);
>      }
>  
> +    if (kvm_irqchip_in_kernel()) {
> +        Error *local_err = NULL;
> +
> +        kvmppc_xive_set_source_config(xive, lisn, &new_eas, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            return H_HARDWARE;
> +        }
> +    }
> +
>  out:
>      xive->eat[lisn] = new_eas;
>      return H_SUCCESS;
> @@ -1097,6 +1120,16 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
>       */
>  
>  out:
> +    if (kvm_irqchip_in_kernel()) {
> +        Error *local_err = NULL;
> +
> +        kvmppc_xive_set_queue_config(xive, end_blk, end_idx, &end, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            return H_HARDWARE;
> +        }
> +    }
> +
>      /* Update END */
>      memcpy(&xive->endt[end_idx], &end, sizeof(XiveEND));
>      return H_SUCCESS;
> @@ -1189,6 +1222,16 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu,
>          args[2] = 0;
>      }
>  
> +    if (kvm_irqchip_in_kernel()) {
> +        Error *local_err = NULL;
> +
> +        kvmppc_xive_get_queue_config(xive, end_blk, end_idx, end, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            return H_HARDWARE;
> +        }
> +    }
> +
>      /* TODO: do we need any locking on the END ? */
>      if (flags & SPAPR_XIVE_END_DEBUG) {
>          /* Load the event queue generation number into the return flags */
> @@ -1341,15 +1384,20 @@ static target_ulong h_int_esb(PowerPCCPU *cpu,
>          return H_P3;
>      }
>  
> -    mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset;
> +    if (kvm_irqchip_in_kernel()) {
> +        args[0] = kvmppc_xive_esb_rw(xsrc, lisn, offset, data,
> +                                     flags & SPAPR_XIVE_ESB_STORE);
> +    } else {
> +        mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset;
>  
> -    if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8,
> -                      (flags & SPAPR_XIVE_ESB_STORE))) {
> -        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%"
> -                      HWADDR_PRIx "\n", mmio_addr);
> -        return H_HARDWARE;
> +        if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8,
> +                          (flags & SPAPR_XIVE_ESB_STORE))) {
> +            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%"
> +                          HWADDR_PRIx "\n", mmio_addr);
> +            return H_HARDWARE;
> +        }
> +        args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data;
>      }
> -    args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data;
>      return H_SUCCESS;
>  }
>  
> @@ -1406,7 +1454,20 @@ static target_ulong h_int_sync(PowerPCCPU *cpu,
>       * This is not needed when running the emulation under QEMU
>       */
>  
> -    /* This is not real hardware. Nothing to be done */
> +    /*
> +     * This is not real hardware. Nothing to be done unless when
> +     * under KVM
> +     */
> +
> +    if (kvm_irqchip_in_kernel()) {
> +        Error *local_err = NULL;
> +
> +        kvmppc_xive_sync_source(xive, lisn, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            return H_HARDWARE;
> +        }
> +    }
>      return H_SUCCESS;
>  }
>  
> @@ -1441,6 +1502,16 @@ static target_ulong h_int_reset(PowerPCCPU *cpu,
>      }
>  
>      device_reset(DEVICE(xive));
> +
> +    if (kvm_irqchip_in_kernel()) {
> +        Error *local_err = NULL;
> +
> +        kvmppc_xive_reset(xive, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            return H_HARDWARE;
> +        }
> +    }
>      return H_SUCCESS;
>  }
>  
> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> index 623fbf74f23e..6b50451b4f85 100644
> --- a/hw/intc/spapr_xive_kvm.c
> +++ b/hw/intc/spapr_xive_kvm.c
> @@ -89,6 +89,52 @@ void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
>   * XIVE Interrupt Source (KVM)
>   */
>  
> +void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas,
> +                                   Error **errp)
> +{
> +    uint32_t end_idx;
> +    uint32_t end_blk;
> +    uint32_t eisn;
> +    uint8_t priority;
> +    uint32_t server;
> +    uint64_t kvm_src;
> +    Error *local_err = NULL;
> +
> +    /*
> +     * No need to set a MASKED source, this is the default state after
> +     * reset.

I don't quite follow this comment, why is there no need to call a
MASKED source?

> +     */
> +    if (!xive_eas_is_valid(eas) || xive_eas_is_masked(eas)) {
> +        return;
> +    }
> +
> +    end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
> +    end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
> +    eisn = xive_get_field64(EAS_END_DATA, eas->w);
> +
> +    spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
> +
> +    kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT &
> +        KVM_XIVE_SOURCE_PRIORITY_MASK;
> +    kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT &
> +        KVM_XIVE_SOURCE_SERVER_MASK;
> +    kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) &
> +        KVM_XIVE_SOURCE_EISN_MASK;
> +
> +    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn,
> +                      &kvm_src, true, &local_err);
> +    if (local_err) {
> +        error_propagate(errp, local_err);
> +        return;
> +    }
> +}
> +
> +void kvmppc_xive_sync_source(sPAPRXive *xive, uint32_t lisn, Error **errp)
> +{
> +    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_SYNC, lisn,
> +                      NULL, true, errp);
> +}
> +
>  /*
>   * At reset, the interrupt sources are simply created and MASKED. We
>   * only need to inform the KVM XIVE device about their type: LSI or
> @@ -125,6 +171,64 @@ void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
>      }
>  }
>  
> +/*
> + * This is used to perform the magic loads on the ESB pages, described
> + * in xive.h.
> + */
> +static uint64_t xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
> +                            uint64_t data, bool write)
> +{
> +    unsigned long addr = (unsigned long) xsrc->esb_mmap +
> +        xive_source_esb_mgmt(xsrc, srcno) + offset;

Casting the esb_mmap into unsigned long then back to a pointer looks
unnecessary.  You should be able to do this with pointer arithmetic.

> +    if (write) {
> +        *((uint64_t *) addr) = data;
> +        return -1;
> +    } else {
> +        return *((uint64_t *) addr);
> +    }

Since this is always dealing with 64-bit values, couldn't you put the
byteswaps in here rather than in all the callers?

> +}
> +
> +static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset)
> +{
> +    /* Prevent the compiler from optimizing away the load */
> +    volatile uint64_t value = xive_esb_rw(xsrc, srcno, offset, 0, 0);

Wouldn't the volatile magic be better inside xive_esb_rw()?

> +    return be64_to_cpu(value) & 0x3;
> +}
> +
> +static void xive_esb_trigger(XiveSource *xsrc, int srcno)
> +{
> +    unsigned long addr = (unsigned long) xsrc->esb_mmap +
> +        xive_source_esb_page(xsrc, srcno);
> +
> +    *((uint64_t *) addr) = 0x0;
> +}

Also.. aren't some of these register accesses likely to need memory
barriers?

> +
> +uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
> +                            uint64_t data, bool write)
> +{
> +    if (write) {
> +        return xive_esb_rw(xsrc, srcno, offset, data, 1);
> +    }
> +
> +    /*
> +     * Special Load EOI handling for LSI sources. Q bit is never set
> +     * and the interrupt should be re-triggered if the level is still
> +     * asserted.
> +     */
> +    if (xive_source_irq_is_lsi(xsrc, srcno) &&
> +        offset == XIVE_ESB_LOAD_EOI) {
> +        xive_esb_read(xsrc, srcno, XIVE_ESB_SET_PQ_00);
> +        if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
> +            xive_esb_trigger(xsrc, srcno);
> +        }
> +        return 0;
> +    } else {
> +        return xive_esb_rw(xsrc, srcno, offset, 0, 0);
> +    }
> +}
> +
>  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
>  {
>      XiveSource *xsrc = opaque;
> @@ -155,6 +259,86 @@ void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
>  /*
>   * sPAPR XIVE interrupt controller (KVM)
>   */
> +void kvmppc_xive_get_queue_config(sPAPRXive *xive, uint8_t end_blk,
> +                                  uint32_t end_idx, XiveEND *end,
> +                                  Error **errp)
> +{
> +    struct kvm_ppc_xive_eq kvm_eq = { 0 };
> +    uint64_t kvm_eq_idx;
> +    uint8_t priority;
> +    uint32_t server;
> +    Error *local_err = NULL;
> +
> +    if (!xive_end_is_valid(end)) {

This should set an error, shouldn't it?

> +        return;
> +    }
> +
> +    /* Encode the tuple (server, prio) as a KVM EQ index */
> +    spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
> +
> +    kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT &
> +            KVM_XIVE_EQ_PRIORITY_MASK;
> +    kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT &
> +        KVM_XIVE_EQ_SERVER_MASK;
> +
> +    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx,
> +                      &kvm_eq, false, &local_err);
> +    if (local_err) {
> +        error_propagate(errp, local_err);
> +        return;
> +    }
> +
> +    /*
> +     * The EQ index and toggle bit are updated by HW. These are the
> +     * only fields we want to return.
> +     */
> +    end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) |
> +        xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex);
> +}
> +
> +void kvmppc_xive_set_queue_config(sPAPRXive *xive, uint8_t end_blk,
> +                                  uint32_t end_idx, XiveEND *end,
> +                                  Error **errp)
> +{
> +    struct kvm_ppc_xive_eq kvm_eq = { 0 };
> +    uint64_t kvm_eq_idx;
> +    uint8_t priority;
> +    uint32_t server;
> +    Error *local_err = NULL;
> +
> +    if (!xive_end_is_valid(end)) {
> +        return;
> +    }
> +
> +    /* Build the KVM state from the local END structure */
> +    kvm_eq.flags   = KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY;
> +    kvm_eq.qsize   = xive_get_field32(END_W0_QSIZE, end->w0) + 12;
> +    kvm_eq.qpage   = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32 |
> +        be32_to_cpu(end->w3);
> +    kvm_eq.qtoggle = xive_get_field32(END_W1_GENERATION, end->w1);
> +    kvm_eq.qindex  = xive_get_field32(END_W1_PAGE_OFF, end->w1);
> +
> +    /* Encode the tuple (server, prio) as a KVM EQ index */
> +    spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
> +
> +    kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT &
> +            KVM_XIVE_EQ_PRIORITY_MASK;
> +    kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT &
> +        KVM_XIVE_EQ_SERVER_MASK;
> +
> +    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx,
> +                      &kvm_eq, true, &local_err);
> +    if (local_err) {
> +        error_propagate(errp, local_err);
> +        return;
> +    }
> +}
> +
> +void kvmppc_xive_reset(sPAPRXive *xive, Error **errp)
> +{
> +    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, KVM_DEV_XIVE_RESET,
> +                      NULL, true, errp);
> +}
>  
>  static void *kvmppc_xive_mmap(sPAPRXive *xive, int pgoff, size_t len,
>                                Error **errp)
Cédric Le Goater March 11, 2019, 5:32 p.m. UTC | #2
On 2/26/19 12:22 AM, David Gibson wrote:
> On Fri, Feb 22, 2019 at 02:13:11PM +0100, Cédric Le Goater wrote:
>> XIVE hcalls are all redirected to QEMU as none are on a fast path.
>> When necessary, QEMU invokes KVM through specific ioctls to perform
>> host operations. QEMU should have done the necessary checks before
>> calling KVM and, in case of failure, H_HARDWARE is simply returned.
>>
>> H_INT_ESB is a special case that could have been handled under KVM
>> but the impact on performance was low when under QEMU. Here are some
>> figures :
>>
>>     kernel irqchip      OFF          ON
>>     H_INT_ESB                    KVM   QEMU
>>
>>     rtl8139 (LSI )      1.19     1.24  1.23  Gbits/sec
>>     virtio             31.80    42.30   --   Gbits/sec
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>>  include/hw/ppc/spapr_xive.h |  15 +++
>>  hw/intc/spapr_xive.c        |  87 +++++++++++++++--
>>  hw/intc/spapr_xive_kvm.c    | 184 ++++++++++++++++++++++++++++++++++++
>>  3 files changed, 278 insertions(+), 8 deletions(-)
>>
>> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
>> index ab6732b14a02..749c6cbc2c56 100644
>> --- a/include/hw/ppc/spapr_xive.h
>> +++ b/include/hw/ppc/spapr_xive.h
>> @@ -55,9 +55,24 @@ void spapr_xive_set_tctx_os_cam(XiveTCTX *tctx);
>>  void spapr_xive_mmio_set_enabled(sPAPRXive *xive, bool enable);
>>  void spapr_xive_map_mmio(sPAPRXive *xive);
>>  
>> +int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx,
>> +                             uint32_t *out_server, uint8_t *out_prio);
>> +
>>  /*
>>   * KVM XIVE device helpers
>>   */
>>  void kvmppc_xive_connect(sPAPRXive *xive, Error **errp);
>> +void kvmppc_xive_reset(sPAPRXive *xive, Error **errp);
>> +void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas,
>> +                                   Error **errp);
>> +void kvmppc_xive_sync_source(sPAPRXive *xive, uint32_t lisn, Error **errp);
>> +uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
>> +                            uint64_t data, bool write);
>> +void kvmppc_xive_set_queue_config(sPAPRXive *xive, uint8_t end_blk,
>> +                                 uint32_t end_idx, XiveEND *end,
>> +                                 Error **errp);
>> +void kvmppc_xive_get_queue_config(sPAPRXive *xive, uint8_t end_blk,
>> +                                 uint32_t end_idx, XiveEND *end,
>> +                                 Error **errp);
>>  
>>  #endif /* PPC_SPAPR_XIVE_H */
>> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
>> index c24d649e3668..3db24391e31c 100644
>> --- a/hw/intc/spapr_xive.c
>> +++ b/hw/intc/spapr_xive.c
>> @@ -86,6 +86,19 @@ static int spapr_xive_target_to_nvt(uint32_t target,
>>   * sPAPR END indexing uses a simple mapping of the CPU vcpu_id, 8
>>   * priorities per CPU
>>   */
>> +int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx,
>> +                             uint32_t *out_server, uint8_t *out_prio)
>> +{
> 
> Since you don't support irq blocks as yet, should this error out
> rather than ignoring if end_blk != 0?

yes we could. I will add a test against SPAPR_XIVE_BLOCK which is the value 
of the sPAPR block ID. I would like to be able to track where it is used 
even if constant.  

> 
>> +    if (out_server) {
>> +        *out_server = end_idx >> 3;
>> +    }
>> +
>> +    if (out_prio) {
>> +        *out_prio = end_idx & 0x7;
>> +    }
>> +    return 0;
>> +}
>> +
>>  static void spapr_xive_cpu_to_end(PowerPCCPU *cpu, uint8_t prio,
>>                                    uint8_t *out_end_blk, uint32_t *out_end_idx)
>>  {
>> @@ -792,6 +805,16 @@ static target_ulong h_int_set_source_config(PowerPCCPU *cpu,
>>          new_eas.w = xive_set_field64(EAS_END_DATA, new_eas.w, eisn);
>>      }
>>  
>> +    if (kvm_irqchip_in_kernel()) {
>> +        Error *local_err = NULL;
>> +
>> +        kvmppc_xive_set_source_config(xive, lisn, &new_eas, &local_err);
>> +        if (local_err) {
>> +            error_report_err(local_err);
>> +            return H_HARDWARE;
>> +        }
>> +    }
>> +
>>  out:
>>      xive->eat[lisn] = new_eas;
>>      return H_SUCCESS;
>> @@ -1097,6 +1120,16 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
>>       */
>>  
>>  out:
>> +    if (kvm_irqchip_in_kernel()) {
>> +        Error *local_err = NULL;
>> +
>> +        kvmppc_xive_set_queue_config(xive, end_blk, end_idx, &end, &local_err);
>> +        if (local_err) {
>> +            error_report_err(local_err);
>> +            return H_HARDWARE;
>> +        }
>> +    }
>> +
>>      /* Update END */
>>      memcpy(&xive->endt[end_idx], &end, sizeof(XiveEND));
>>      return H_SUCCESS;
>> @@ -1189,6 +1222,16 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu,
>>          args[2] = 0;
>>      }
>>  
>> +    if (kvm_irqchip_in_kernel()) {
>> +        Error *local_err = NULL;
>> +
>> +        kvmppc_xive_get_queue_config(xive, end_blk, end_idx, end, &local_err);
>> +        if (local_err) {
>> +            error_report_err(local_err);
>> +            return H_HARDWARE;
>> +        }
>> +    }
>> +
>>      /* TODO: do we need any locking on the END ? */
>>      if (flags & SPAPR_XIVE_END_DEBUG) {
>>          /* Load the event queue generation number into the return flags */
>> @@ -1341,15 +1384,20 @@ static target_ulong h_int_esb(PowerPCCPU *cpu,
>>          return H_P3;
>>      }
>>  
>> -    mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset;
>> +    if (kvm_irqchip_in_kernel()) {
>> +        args[0] = kvmppc_xive_esb_rw(xsrc, lisn, offset, data,
>> +                                     flags & SPAPR_XIVE_ESB_STORE);
>> +    } else {
>> +        mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset;
>>  
>> -    if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8,
>> -                      (flags & SPAPR_XIVE_ESB_STORE))) {
>> -        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%"
>> -                      HWADDR_PRIx "\n", mmio_addr);
>> -        return H_HARDWARE;
>> +        if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8,
>> +                          (flags & SPAPR_XIVE_ESB_STORE))) {
>> +            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%"
>> +                          HWADDR_PRIx "\n", mmio_addr);
>> +            return H_HARDWARE;
>> +        }
>> +        args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data;
>>      }
>> -    args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data;
>>      return H_SUCCESS;
>>  }
>>  
>> @@ -1406,7 +1454,20 @@ static target_ulong h_int_sync(PowerPCCPU *cpu,
>>       * This is not needed when running the emulation under QEMU
>>       */
>>  
>> -    /* This is not real hardware. Nothing to be done */
>> +    /*
>> +     * This is not real hardware. Nothing to be done unless when
>> +     * under KVM
>> +     */
>> +
>> +    if (kvm_irqchip_in_kernel()) {
>> +        Error *local_err = NULL;
>> +
>> +        kvmppc_xive_sync_source(xive, lisn, &local_err);
>> +        if (local_err) {
>> +            error_report_err(local_err);
>> +            return H_HARDWARE;
>> +        }
>> +    }
>>      return H_SUCCESS;
>>  }
>>  
>> @@ -1441,6 +1502,16 @@ static target_ulong h_int_reset(PowerPCCPU *cpu,
>>      }
>>  
>>      device_reset(DEVICE(xive));
>> +
>> +    if (kvm_irqchip_in_kernel()) {
>> +        Error *local_err = NULL;
>> +
>> +        kvmppc_xive_reset(xive, &local_err);
>> +        if (local_err) {
>> +            error_report_err(local_err);
>> +            return H_HARDWARE;
>> +        }
>> +    }
>>      return H_SUCCESS;
>>  }
>>  
>> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
>> index 623fbf74f23e..6b50451b4f85 100644
>> --- a/hw/intc/spapr_xive_kvm.c
>> +++ b/hw/intc/spapr_xive_kvm.c
>> @@ -89,6 +89,52 @@ void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
>>   * XIVE Interrupt Source (KVM)
>>   */
>>  
>> +void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas,
>> +                                   Error **errp)
>> +{
>> +    uint32_t end_idx;
>> +    uint32_t end_blk;
>> +    uint32_t eisn;
>> +    uint8_t priority;
>> +    uint32_t server;
>> +    uint64_t kvm_src;
>> +    Error *local_err = NULL;
>> +
>> +    /*
>> +     * No need to set a MASKED source, this is the default state after
>> +     * reset.
> 
> I don't quite follow this comment, why is there no need to call a
> MASKED source?

because MASKED is the default state in which KVM initializes the IRQ. I will
clarify.
 
>> +     */
>> +    if (!xive_eas_is_valid(eas) || xive_eas_is_masked(eas)) {
>> +        return;
>> +    }
>> +
>> +    end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
>> +    end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
>> +    eisn = xive_get_field64(EAS_END_DATA, eas->w);
>> +
>> +    spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
>> +
>> +    kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT &
>> +        KVM_XIVE_SOURCE_PRIORITY_MASK;
>> +    kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT &
>> +        KVM_XIVE_SOURCE_SERVER_MASK;
>> +    kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) &
>> +        KVM_XIVE_SOURCE_EISN_MASK;
>> +
>> +    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn,
>> +                      &kvm_src, true, &local_err);
>> +    if (local_err) {
>> +        error_propagate(errp, local_err);
>> +        return;
>> +    }
>> +}
>> +
>> +void kvmppc_xive_sync_source(sPAPRXive *xive, uint32_t lisn, Error **errp)
>> +{
>> +    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_SYNC, lisn,
>> +                      NULL, true, errp);
>> +}
>> +
>>  /*
>>   * At reset, the interrupt sources are simply created and MASKED. We
>>   * only need to inform the KVM XIVE device about their type: LSI or
>> @@ -125,6 +171,64 @@ void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
>>      }
>>  }
>>  
>> +/*
>> + * This is used to perform the magic loads on the ESB pages, described
>> + * in xive.h.
>> + */
>> +static uint64_t xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
>> +                            uint64_t data, bool write)
>> +{
>> +    unsigned long addr = (unsigned long) xsrc->esb_mmap +
>> +        xive_source_esb_mgmt(xsrc, srcno) + offset;
> 
> Casting the esb_mmap into unsigned long then back to a pointer looks
> unnecessary.  You should be able to do this with pointer arithmetic.

yes.

>> +    if (write) {
>> +        *((uint64_t *) addr) = data;
>> +        return -1;
>> +    } else {
>> +        return *((uint64_t *) addr);
>> +    }
> 
> Since this is always dealing with 64-bit values, couldn't you put the
> byteswaps in here rather than in all the callers?

indeed.
 
>> +}
>> +
>> +static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset)
>> +{
>> +    /* Prevent the compiler from optimizing away the load */
>> +    volatile uint64_t value = xive_esb_rw(xsrc, srcno, offset, 0, 0);
> 
> Wouldn't the volatile magic be better inside xive_esb_rw()?

sure. I will rework these helpers. 

>> +    return be64_to_cpu(value) & 0x3;
>> +}
>> +
>> +static void xive_esb_trigger(XiveSource *xsrc, int srcno)
>> +{
>> +    unsigned long addr = (unsigned long) xsrc->esb_mmap +
>> +        xive_source_esb_page(xsrc, srcno);
>> +
>> +    *((uint64_t *) addr) = 0x0;
>> +}
> 
> Also.. aren't some of these register accesses likely to need memory
> barriers?

AIUI, these are CI pages. So we shouldn't need barriers.

>> +
>> +uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
>> +                            uint64_t data, bool write)
>> +{
>> +    if (write) {
>> +        return xive_esb_rw(xsrc, srcno, offset, data, 1);
>> +    }
>> +
>> +    /*
>> +     * Special Load EOI handling for LSI sources. Q bit is never set
>> +     * and the interrupt should be re-triggered if the level is still
>> +     * asserted.
>> +     */
>> +    if (xive_source_irq_is_lsi(xsrc, srcno) &&
>> +        offset == XIVE_ESB_LOAD_EOI) {
>> +        xive_esb_read(xsrc, srcno, XIVE_ESB_SET_PQ_00);
>> +        if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
>> +            xive_esb_trigger(xsrc, srcno);
>> +        }
>> +        return 0;
>> +    } else {
>> +        return xive_esb_rw(xsrc, srcno, offset, 0, 0);
>> +    }
>> +}
>> +
>>  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
>>  {
>>      XiveSource *xsrc = opaque;
>> @@ -155,6 +259,86 @@ void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
>>  /*
>>   * sPAPR XIVE interrupt controller (KVM)
>>   */
>> +void kvmppc_xive_get_queue_config(sPAPRXive *xive, uint8_t end_blk,
>> +                                  uint32_t end_idx, XiveEND *end,
>> +                                  Error **errp)
>> +{
>> +    struct kvm_ppc_xive_eq kvm_eq = { 0 };
>> +    uint64_t kvm_eq_idx;
>> +    uint8_t priority;
>> +    uint32_t server;
>> +    Error *local_err = NULL;
>> +
>> +    if (!xive_end_is_valid(end)) {
> 
> This should set an error, shouldn't it?

Hmm, this helper is used in the hcall h_int_get_queue_config() and, later, 
in kvmppc_xive_get_queues() to synchronize the state from KVM. 

I should probably move the test outside this routine, return H_HARDWARE
in the hcall and skip invalid ENDs in kvmppc_xive_get_queues() 

Thanks,

C.


> 
>> +        return;
>> +    }
>> +
>> +    /* Encode the tuple (server, prio) as a KVM EQ index */
>> +    spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
>> +
>> +    kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT &
>> +            KVM_XIVE_EQ_PRIORITY_MASK;
>> +    kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT &
>> +        KVM_XIVE_EQ_SERVER_MASK;
>> +
>> +    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx,
>> +                      &kvm_eq, false, &local_err);
>> +    if (local_err) {
>> +        error_propagate(errp, local_err);
>> +        return;
>> +    }
>> +
>> +    /*
>> +     * The EQ index and toggle bit are updated by HW. These are the
>> +     * only fields we want to return.
>> +     */
>> +    end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) |
>> +        xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex);
>> +}
>> +
>> +void kvmppc_xive_set_queue_config(sPAPRXive *xive, uint8_t end_blk,
>> +                                  uint32_t end_idx, XiveEND *end,
>> +                                  Error **errp)
>> +{
>> +    struct kvm_ppc_xive_eq kvm_eq = { 0 };
>> +    uint64_t kvm_eq_idx;
>> +    uint8_t priority;
>> +    uint32_t server;
>> +    Error *local_err = NULL;
>> +
>> +    if (!xive_end_is_valid(end)) {
>> +        return;
>> +    }
>> +
>> +    /* Build the KVM state from the local END structure */
>> +    kvm_eq.flags   = KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY;
>> +    kvm_eq.qsize   = xive_get_field32(END_W0_QSIZE, end->w0) + 12;
>> +    kvm_eq.qpage   = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32 |
>> +        be32_to_cpu(end->w3);
>> +    kvm_eq.qtoggle = xive_get_field32(END_W1_GENERATION, end->w1);
>> +    kvm_eq.qindex  = xive_get_field32(END_W1_PAGE_OFF, end->w1);
>> +
>> +    /* Encode the tuple (server, prio) as a KVM EQ index */
>> +    spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
>> +
>> +    kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT &
>> +            KVM_XIVE_EQ_PRIORITY_MASK;
>> +    kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT &
>> +        KVM_XIVE_EQ_SERVER_MASK;
>> +
>> +    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx,
>> +                      &kvm_eq, true, &local_err);
>> +    if (local_err) {
>> +        error_propagate(errp, local_err);
>> +        return;
>> +    }
>> +}
>> +
>> +void kvmppc_xive_reset(sPAPRXive *xive, Error **errp)
>> +{
>> +    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, KVM_DEV_XIVE_RESET,
>> +                      NULL, true, errp);
>> +}
>>  
>>  static void *kvmppc_xive_mmap(sPAPRXive *xive, int pgoff, size_t len,
>>                                Error **errp)
>
David Gibson March 12, 2019, 10:26 a.m. UTC | #3
On Mon, Mar 11, 2019 at 06:32:05PM +0100, Cédric Le Goater wrote:
> On 2/26/19 12:22 AM, David Gibson wrote:
> > On Fri, Feb 22, 2019 at 02:13:11PM +0100, Cédric Le Goater wrote:
[snip]
> >> +void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas,
> >> +                                   Error **errp)
> >> +{
> >> +    uint32_t end_idx;
> >> +    uint32_t end_blk;
> >> +    uint32_t eisn;
> >> +    uint8_t priority;
> >> +    uint32_t server;
> >> +    uint64_t kvm_src;
> >> +    Error *local_err = NULL;
> >> +
> >> +    /*
> >> +     * No need to set a MASKED source, this is the default state after
> >> +     * reset.
> > 
> > I don't quite follow this comment, why is there no need to call a
> > MASKED source?
> 
> because MASKED is the default state in which KVM initializes the IRQ. I will
> clarify.

I believe it's possible - though rare - to process an incoming
migration on an established VM which isn't in fresh reset state.  So
it's best not to rely on that.

> >> +static void xive_esb_trigger(XiveSource *xsrc, int srcno)
> >> +{
> >> +    unsigned long addr = (unsigned long) xsrc->esb_mmap +
> >> +        xive_source_esb_page(xsrc, srcno);
> >> +
> >> +    *((uint64_t *) addr) = 0x0;
> >> +}
> > 
> > Also.. aren't some of these register accesses likely to need memory
> > barriers?
> 
> AIUI, these are CI pages. So we shouldn't need barriers.

CI doesn't negate the need for barriers, althugh it might change the
type you need.  At the very least you need a compiler barrier to stop
it re-ordering the access, but you can also have in-cpu store and load
queues.
Cédric Le Goater March 13, 2019, 10:43 a.m. UTC | #4
On 3/12/19 11:26 AM, David Gibson wrote:
> On Mon, Mar 11, 2019 at 06:32:05PM +0100, Cédric Le Goater wrote:
>> On 2/26/19 12:22 AM, David Gibson wrote:
>>> On Fri, Feb 22, 2019 at 02:13:11PM +0100, Cédric Le Goater wrote:
> [snip]
>>>> +void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas,
>>>> +                                   Error **errp)
>>>> +{
>>>> +    uint32_t end_idx;
>>>> +    uint32_t end_blk;
>>>> +    uint32_t eisn;
>>>> +    uint8_t priority;
>>>> +    uint32_t server;
>>>> +    uint64_t kvm_src;
>>>> +    Error *local_err = NULL;
>>>> +
>>>> +    /*
>>>> +     * No need to set a MASKED source, this is the default state after
>>>> +     * reset.
>>>
>>> I don't quite follow this comment, why is there no need to call a
>>> MASKED source?
>>
>> because MASKED is the default state in which KVM initializes the IRQ. I will
>> clarify.
> 
> I believe it's possible - though rare - to process an incoming
> migration on an established VM which isn't in fresh reset state.  So
> it's best not to rely on that.
> 
>>>> +static void xive_esb_trigger(XiveSource *xsrc, int srcno)
>>>> +{
>>>> +    unsigned long addr = (unsigned long) xsrc->esb_mmap +
>>>> +        xive_source_esb_page(xsrc, srcno);
>>>> +
>>>> +    *((uint64_t *) addr) = 0x0;
>>>> +}
>>>
>>> Also.. aren't some of these register accesses likely to need memory
>>> barriers?
>>
>> AIUI, these are CI pages. So we shouldn't need barriers.
> 
> CI doesn't negate the need for barriers, althugh it might change the
> type you need.  At the very least you need a compiler barrier to stop
> it re-ordering the access, but you can also have in-cpu store and load
> queues.
> 

ok. So I will need to add some smp_r/wmb() 

Thanks,

C.
David Gibson March 14, 2019, 2:11 a.m. UTC | #5
On Wed, Mar 13, 2019 at 11:43:54AM +0100, Cédric Le Goater wrote:
> On 3/12/19 11:26 AM, David Gibson wrote:
> > On Mon, Mar 11, 2019 at 06:32:05PM +0100, Cédric Le Goater wrote:
> >> On 2/26/19 12:22 AM, David Gibson wrote:
> >>> On Fri, Feb 22, 2019 at 02:13:11PM +0100, Cédric Le Goater wrote:
> > [snip]
> >>>> +void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas,
> >>>> +                                   Error **errp)
> >>>> +{
> >>>> +    uint32_t end_idx;
> >>>> +    uint32_t end_blk;
> >>>> +    uint32_t eisn;
> >>>> +    uint8_t priority;
> >>>> +    uint32_t server;
> >>>> +    uint64_t kvm_src;
> >>>> +    Error *local_err = NULL;
> >>>> +
> >>>> +    /*
> >>>> +     * No need to set a MASKED source, this is the default state after
> >>>> +     * reset.
> >>>
> >>> I don't quite follow this comment, why is there no need to call a
> >>> MASKED source?
> >>
> >> because MASKED is the default state in which KVM initializes the IRQ. I will
> >> clarify.
> > 
> > I believe it's possible - though rare - to process an incoming
> > migration on an established VM which isn't in fresh reset state.  So
> > it's best not to rely on that.
> > 
> >>>> +static void xive_esb_trigger(XiveSource *xsrc, int srcno)
> >>>> +{
> >>>> +    unsigned long addr = (unsigned long) xsrc->esb_mmap +
> >>>> +        xive_source_esb_page(xsrc, srcno);
> >>>> +
> >>>> +    *((uint64_t *) addr) = 0x0;
> >>>> +}
> >>>
> >>> Also.. aren't some of these register accesses likely to need memory
> >>> barriers?
> >>
> >> AIUI, these are CI pages. So we shouldn't need barriers.
> > 
> > CI doesn't negate the need for barriers, althugh it might change the
> > type you need.  At the very least you need a compiler barrier to stop
> > it re-ordering the access, but you can also have in-cpu store and load
> > queues.
> > 
> 
> ok. So I will need to add some smp_r/wmb() 

No, smp_[rw]mb() is for cases where it's strictly about cpu vs. cpu
ordering.  Here it's cpu vs. IO ordering so you need plain [rw]mb().
Cédric Le Goater March 14, 2019, 9:24 p.m. UTC | #6
On 3/14/19 3:11 AM, David Gibson wrote:
> On Wed, Mar 13, 2019 at 11:43:54AM +0100, Cédric Le Goater wrote:
>> On 3/12/19 11:26 AM, David Gibson wrote:
>>> On Mon, Mar 11, 2019 at 06:32:05PM +0100, Cédric Le Goater wrote:
>>>> On 2/26/19 12:22 AM, David Gibson wrote:
>>>>> On Fri, Feb 22, 2019 at 02:13:11PM +0100, Cédric Le Goater wrote:
>>> [snip]
>>>>>> +void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas,
>>>>>> +                                   Error **errp)
>>>>>> +{
>>>>>> +    uint32_t end_idx;
>>>>>> +    uint32_t end_blk;
>>>>>> +    uint32_t eisn;
>>>>>> +    uint8_t priority;
>>>>>> +    uint32_t server;
>>>>>> +    uint64_t kvm_src;
>>>>>> +    Error *local_err = NULL;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * No need to set a MASKED source, this is the default state after
>>>>>> +     * reset.
>>>>>
>>>>> I don't quite follow this comment, why is there no need to call a
>>>>> MASKED source?
>>>>
>>>> because MASKED is the default state in which KVM initializes the IRQ. I will
>>>> clarify.
>>>
>>> I believe it's possible - though rare - to process an incoming
>>> migration on an established VM which isn't in fresh reset state.  So
>>> it's best not to rely on that.
>>>
>>>>>> +static void xive_esb_trigger(XiveSource *xsrc, int srcno)
>>>>>> +{
>>>>>> +    unsigned long addr = (unsigned long) xsrc->esb_mmap +
>>>>>> +        xive_source_esb_page(xsrc, srcno);
>>>>>> +
>>>>>> +    *((uint64_t *) addr) = 0x0;
>>>>>> +}
>>>>>
>>>>> Also.. aren't some of these register accesses likely to need memory
>>>>> barriers?
>>>>
>>>> AIUI, these are CI pages. So we shouldn't need barriers.
>>>
>>> CI doesn't negate the need for barriers, althugh it might change the
>>> type you need.  At the very least you need a compiler barrier to stop
>>> it re-ordering the access, but you can also have in-cpu store and load
>>> queues.
>>>
>>
>> ok. So I will need to add some smp_r/wmb() 
> 
> No, smp_[rw]mb() is for cases where it's strictly about cpu vs. cpu
> ordering.  Here it's cpu vs. IO ordering so you need plain [rw]mb().

I don't see any in QEMU ?

C.
David Gibson March 15, 2019, 12:26 a.m. UTC | #7
On Thu, Mar 14, 2019 at 10:24:49PM +0100, Cédric Le Goater wrote:
> On 3/14/19 3:11 AM, David Gibson wrote:
> > On Wed, Mar 13, 2019 at 11:43:54AM +0100, Cédric Le Goater wrote:
> >> On 3/12/19 11:26 AM, David Gibson wrote:
> >>> On Mon, Mar 11, 2019 at 06:32:05PM +0100, Cédric Le Goater wrote:
> >>>> On 2/26/19 12:22 AM, David Gibson wrote:
> >>>>> On Fri, Feb 22, 2019 at 02:13:11PM +0100, Cédric Le Goater wrote:
> >>> [snip]
> >>>>>> +void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas,
> >>>>>> +                                   Error **errp)
> >>>>>> +{
> >>>>>> +    uint32_t end_idx;
> >>>>>> +    uint32_t end_blk;
> >>>>>> +    uint32_t eisn;
> >>>>>> +    uint8_t priority;
> >>>>>> +    uint32_t server;
> >>>>>> +    uint64_t kvm_src;
> >>>>>> +    Error *local_err = NULL;
> >>>>>> +
> >>>>>> +    /*
> >>>>>> +     * No need to set a MASKED source, this is the default state after
> >>>>>> +     * reset.
> >>>>>
> >>>>> I don't quite follow this comment, why is there no need to call a
> >>>>> MASKED source?
> >>>>
> >>>> because MASKED is the default state in which KVM initializes the IRQ. I will
> >>>> clarify.
> >>>
> >>> I believe it's possible - though rare - to process an incoming
> >>> migration on an established VM which isn't in fresh reset state.  So
> >>> it's best not to rely on that.
> >>>
> >>>>>> +static void xive_esb_trigger(XiveSource *xsrc, int srcno)
> >>>>>> +{
> >>>>>> +    unsigned long addr = (unsigned long) xsrc->esb_mmap +
> >>>>>> +        xive_source_esb_page(xsrc, srcno);
> >>>>>> +
> >>>>>> +    *((uint64_t *) addr) = 0x0;
> >>>>>> +}
> >>>>>
> >>>>> Also.. aren't some of these register accesses likely to need memory
> >>>>> barriers?
> >>>>
> >>>> AIUI, these are CI pages. So we shouldn't need barriers.
> >>>
> >>> CI doesn't negate the need for barriers, althugh it might change the
> >>> type you need.  At the very least you need a compiler barrier to stop
> >>> it re-ordering the access, but you can also have in-cpu store and load
> >>> queues.
> >>>
> >>
> >> ok. So I will need to add some smp_r/wmb() 
> > 
> > No, smp_[rw]mb() is for cases where it's strictly about cpu vs. cpu
> > ordering.  Here it's cpu vs. IO ordering so you need plain [rw]mb().
> 
> I don't see any in QEMU ?

Ah, my mistake.  I was mixing up the kernel atomics and the qemu
atomics.
diff mbox series

Patch

diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
index ab6732b14a02..749c6cbc2c56 100644
--- a/include/hw/ppc/spapr_xive.h
+++ b/include/hw/ppc/spapr_xive.h
@@ -55,9 +55,24 @@  void spapr_xive_set_tctx_os_cam(XiveTCTX *tctx);
 void spapr_xive_mmio_set_enabled(sPAPRXive *xive, bool enable);
 void spapr_xive_map_mmio(sPAPRXive *xive);
 
+int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx,
+                             uint32_t *out_server, uint8_t *out_prio);
+
 /*
  * KVM XIVE device helpers
  */
 void kvmppc_xive_connect(sPAPRXive *xive, Error **errp);
+void kvmppc_xive_reset(sPAPRXive *xive, Error **errp);
+void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas,
+                                   Error **errp);
+void kvmppc_xive_sync_source(sPAPRXive *xive, uint32_t lisn, Error **errp);
+uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
+                            uint64_t data, bool write);
+void kvmppc_xive_set_queue_config(sPAPRXive *xive, uint8_t end_blk,
+                                 uint32_t end_idx, XiveEND *end,
+                                 Error **errp);
+void kvmppc_xive_get_queue_config(sPAPRXive *xive, uint8_t end_blk,
+                                 uint32_t end_idx, XiveEND *end,
+                                 Error **errp);
 
 #endif /* PPC_SPAPR_XIVE_H */
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
index c24d649e3668..3db24391e31c 100644
--- a/hw/intc/spapr_xive.c
+++ b/hw/intc/spapr_xive.c
@@ -86,6 +86,19 @@  static int spapr_xive_target_to_nvt(uint32_t target,
  * sPAPR END indexing uses a simple mapping of the CPU vcpu_id, 8
  * priorities per CPU
  */
+int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx,
+                             uint32_t *out_server, uint8_t *out_prio)
+{
+    if (out_server) {
+        *out_server = end_idx >> 3;
+    }
+
+    if (out_prio) {
+        *out_prio = end_idx & 0x7;
+    }
+    return 0;
+}
+
 static void spapr_xive_cpu_to_end(PowerPCCPU *cpu, uint8_t prio,
                                   uint8_t *out_end_blk, uint32_t *out_end_idx)
 {
@@ -792,6 +805,16 @@  static target_ulong h_int_set_source_config(PowerPCCPU *cpu,
         new_eas.w = xive_set_field64(EAS_END_DATA, new_eas.w, eisn);
     }
 
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_set_source_config(xive, lisn, &new_eas, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return H_HARDWARE;
+        }
+    }
+
 out:
     xive->eat[lisn] = new_eas;
     return H_SUCCESS;
@@ -1097,6 +1120,16 @@  static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
      */
 
 out:
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_set_queue_config(xive, end_blk, end_idx, &end, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return H_HARDWARE;
+        }
+    }
+
     /* Update END */
     memcpy(&xive->endt[end_idx], &end, sizeof(XiveEND));
     return H_SUCCESS;
@@ -1189,6 +1222,16 @@  static target_ulong h_int_get_queue_config(PowerPCCPU *cpu,
         args[2] = 0;
     }
 
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_get_queue_config(xive, end_blk, end_idx, end, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return H_HARDWARE;
+        }
+    }
+
     /* TODO: do we need any locking on the END ? */
     if (flags & SPAPR_XIVE_END_DEBUG) {
         /* Load the event queue generation number into the return flags */
@@ -1341,15 +1384,20 @@  static target_ulong h_int_esb(PowerPCCPU *cpu,
         return H_P3;
     }
 
-    mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset;
+    if (kvm_irqchip_in_kernel()) {
+        args[0] = kvmppc_xive_esb_rw(xsrc, lisn, offset, data,
+                                     flags & SPAPR_XIVE_ESB_STORE);
+    } else {
+        mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset;
 
-    if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8,
-                      (flags & SPAPR_XIVE_ESB_STORE))) {
-        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%"
-                      HWADDR_PRIx "\n", mmio_addr);
-        return H_HARDWARE;
+        if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8,
+                          (flags & SPAPR_XIVE_ESB_STORE))) {
+            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%"
+                          HWADDR_PRIx "\n", mmio_addr);
+            return H_HARDWARE;
+        }
+        args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data;
     }
-    args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data;
     return H_SUCCESS;
 }
 
@@ -1406,7 +1454,20 @@  static target_ulong h_int_sync(PowerPCCPU *cpu,
      * This is not needed when running the emulation under QEMU
      */
 
-    /* This is not real hardware. Nothing to be done */
+    /*
+     * This is not real hardware. Nothing to be done unless when
+     * under KVM
+     */
+
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_sync_source(xive, lisn, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return H_HARDWARE;
+        }
+    }
     return H_SUCCESS;
 }
 
@@ -1441,6 +1502,16 @@  static target_ulong h_int_reset(PowerPCCPU *cpu,
     }
 
     device_reset(DEVICE(xive));
+
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_reset(xive, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return H_HARDWARE;
+        }
+    }
     return H_SUCCESS;
 }
 
diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index 623fbf74f23e..6b50451b4f85 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -89,6 +89,52 @@  void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
  * XIVE Interrupt Source (KVM)
  */
 
+void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas,
+                                   Error **errp)
+{
+    uint32_t end_idx;
+    uint32_t end_blk;
+    uint32_t eisn;
+    uint8_t priority;
+    uint32_t server;
+    uint64_t kvm_src;
+    Error *local_err = NULL;
+
+    /*
+     * No need to set a MASKED source, this is the default state after
+     * reset.
+     */
+    if (!xive_eas_is_valid(eas) || xive_eas_is_masked(eas)) {
+        return;
+    }
+
+    end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
+    end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
+    eisn = xive_get_field64(EAS_END_DATA, eas->w);
+
+    spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
+
+    kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT &
+        KVM_XIVE_SOURCE_PRIORITY_MASK;
+    kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT &
+        KVM_XIVE_SOURCE_SERVER_MASK;
+    kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) &
+        KVM_XIVE_SOURCE_EISN_MASK;
+
+    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn,
+                      &kvm_src, true, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+}
+
+void kvmppc_xive_sync_source(sPAPRXive *xive, uint32_t lisn, Error **errp)
+{
+    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_SYNC, lisn,
+                      NULL, true, errp);
+}
+
 /*
  * At reset, the interrupt sources are simply created and MASKED. We
  * only need to inform the KVM XIVE device about their type: LSI or
@@ -125,6 +171,64 @@  void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
     }
 }
 
+/*
+ * This is used to perform the magic loads on the ESB pages, described
+ * in xive.h.
+ */
+static uint64_t xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
+                            uint64_t data, bool write)
+{
+    unsigned long addr = (unsigned long) xsrc->esb_mmap +
+        xive_source_esb_mgmt(xsrc, srcno) + offset;
+
+    if (write) {
+        *((uint64_t *) addr) = data;
+        return -1;
+    } else {
+        return *((uint64_t *) addr);
+    }
+}
+
+static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset)
+{
+    /* Prevent the compiler from optimizing away the load */
+    volatile uint64_t value = xive_esb_rw(xsrc, srcno, offset, 0, 0);
+
+    return be64_to_cpu(value) & 0x3;
+}
+
+static void xive_esb_trigger(XiveSource *xsrc, int srcno)
+{
+    unsigned long addr = (unsigned long) xsrc->esb_mmap +
+        xive_source_esb_page(xsrc, srcno);
+
+    *((uint64_t *) addr) = 0x0;
+}
+
+uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
+                            uint64_t data, bool write)
+{
+    if (write) {
+        return xive_esb_rw(xsrc, srcno, offset, data, 1);
+    }
+
+    /*
+     * Special Load EOI handling for LSI sources. Q bit is never set
+     * and the interrupt should be re-triggered if the level is still
+     * asserted.
+     */
+    if (xive_source_irq_is_lsi(xsrc, srcno) &&
+        offset == XIVE_ESB_LOAD_EOI) {
+        xive_esb_read(xsrc, srcno, XIVE_ESB_SET_PQ_00);
+        if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
+            xive_esb_trigger(xsrc, srcno);
+        }
+        return 0;
+    } else {
+        return xive_esb_rw(xsrc, srcno, offset, 0, 0);
+    }
+}
+
 void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
 {
     XiveSource *xsrc = opaque;
@@ -155,6 +259,86 @@  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
 /*
  * sPAPR XIVE interrupt controller (KVM)
  */
+void kvmppc_xive_get_queue_config(sPAPRXive *xive, uint8_t end_blk,
+                                  uint32_t end_idx, XiveEND *end,
+                                  Error **errp)
+{
+    struct kvm_ppc_xive_eq kvm_eq = { 0 };
+    uint64_t kvm_eq_idx;
+    uint8_t priority;
+    uint32_t server;
+    Error *local_err = NULL;
+
+    if (!xive_end_is_valid(end)) {
+        return;
+    }
+
+    /* Encode the tuple (server, prio) as a KVM EQ index */
+    spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
+
+    kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT &
+            KVM_XIVE_EQ_PRIORITY_MASK;
+    kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT &
+        KVM_XIVE_EQ_SERVER_MASK;
+
+    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx,
+                      &kvm_eq, false, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    /*
+     * The EQ index and toggle bit are updated by HW. These are the
+     * only fields we want to return.
+     */
+    end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) |
+        xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex);
+}
+
+void kvmppc_xive_set_queue_config(sPAPRXive *xive, uint8_t end_blk,
+                                  uint32_t end_idx, XiveEND *end,
+                                  Error **errp)
+{
+    struct kvm_ppc_xive_eq kvm_eq = { 0 };
+    uint64_t kvm_eq_idx;
+    uint8_t priority;
+    uint32_t server;
+    Error *local_err = NULL;
+
+    if (!xive_end_is_valid(end)) {
+        return;
+    }
+
+    /* Build the KVM state from the local END structure */
+    kvm_eq.flags   = KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY;
+    kvm_eq.qsize   = xive_get_field32(END_W0_QSIZE, end->w0) + 12;
+    kvm_eq.qpage   = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32 |
+        be32_to_cpu(end->w3);
+    kvm_eq.qtoggle = xive_get_field32(END_W1_GENERATION, end->w1);
+    kvm_eq.qindex  = xive_get_field32(END_W1_PAGE_OFF, end->w1);
+
+    /* Encode the tuple (server, prio) as a KVM EQ index */
+    spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
+
+    kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT &
+            KVM_XIVE_EQ_PRIORITY_MASK;
+    kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT &
+        KVM_XIVE_EQ_SERVER_MASK;
+
+    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx,
+                      &kvm_eq, true, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+}
+
+void kvmppc_xive_reset(sPAPRXive *xive, Error **errp)
+{
+    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, KVM_DEV_XIVE_RESET,
+                      NULL, true, errp);
+}
 
 static void *kvmppc_xive_mmap(sPAPRXive *xive, int pgoff, size_t len,
                               Error **errp)