diff mbox series

[PULL,26/44] spapr/xive: add KVM support

Message ID 20190529065017.15149-27-david@gibson.dropbear.id.au
State New
Headers show
Series [PULL,01/44] tests: Fix up docker cross builds for ppc64 (BE) targets | expand

Commit Message

David Gibson May 29, 2019, 6:49 a.m. UTC
From: Cédric Le Goater <clg@kaod.org>

This introduces a set of helpers when KVM is in use, which create the
KVM XIVE device, initialize the interrupt sources at a KVM level and
connect the interrupt presenters to the vCPU.

They also handle the initialization of the TIMA and the source ESB
memory regions of the controller. These have a different type under
KVM. They are 'ram device' memory mappings, similarly to VFIO, exposed
to the guest and the associated VMAs on the host are populated
dynamically with the appropriate pages using a fault handler.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190513084245.25755-3-clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/intc/Makefile.objs       |   1 +
 hw/intc/spapr_xive.c        |  48 +++++++-
 hw/intc/spapr_xive_kvm.c    | 237 ++++++++++++++++++++++++++++++++++++
 hw/intc/xive.c              |  21 +++-
 hw/ppc/Kconfig              |   5 +
 hw/ppc/spapr_irq.c          |   6 +-
 include/hw/ppc/spapr_xive.h |  10 ++
 include/hw/ppc/xive.h       |  13 ++
 target/ppc/kvm.c            |   7 ++
 target/ppc/kvm_ppc.h        |   6 +
 10 files changed, 344 insertions(+), 10 deletions(-)
 create mode 100644 hw/intc/spapr_xive_kvm.c

Comments

Alexey Kardashevskiy June 4, 2019, 7:23 a.m. UTC | #1
On 29/05/2019 16:49, David Gibson wrote:
> From: Cédric Le Goater <clg@kaod.org>
> 
> This introduces a set of helpers when KVM is in use, which create the
> KVM XIVE device, initialize the interrupt sources at a KVM level and
> connect the interrupt presenters to the vCPU.
> 
> They also handle the initialization of the TIMA and the source ESB
> memory regions of the controller. These have a different type under
> KVM. They are 'ram device' memory mappings, similarly to VFIO, exposed
> to the guest and the associated VMAs on the host are populated
> dynamically with the appropriate pages using a fault handler.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> Message-Id: <20190513084245.25755-3-clg@kaod.org>
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>


This one breaks my setup - it boots up to:


ipr: IBM Power RAID SCSI Device Driver version: 2.6.4 (March 14, 2017)
__vio_register_driver: driver ibmvscsi registering
ibmvscsi 71000001: SRP_VERSION: 16.a
ibmvscsi 71000001: Maximum ID: 64 Maximum LUN: 32 Maximum Channel: 3
scsi host0: IBM POWER Virtual SCSI Adapter 1.5.9


and hangs. Here is the command line:


/home/aik/pbuild/qemu-aikrhel74alt-ppc64/ppc64-softmmu/qemu-system-ppc64 \
-nodefaults \
-chardev stdio,id=STDIO0,signal=off,mux=on \
-device spapr-vty,id=svty0,reg=0x71000110,chardev=STDIO0 \
-mon id=MON0,chardev=STDIO0,mode=readline -nographic -vga none \
-enable-kvm \
-device nec-usb-xhci,id=nec-usb-xhci0 -m 16G \
-netdev "user,id=USER0,hostfwd=tcp::2223-:22" \
-device "virtio-net-pci,id=vnet0,mac=C0:41:49:4b:00:00,netdev=USER0" \
img/u1804-64G-cuda10.1-418.67-swiotlb.qcow2 \
-machine pseries,cap-cfpc=broken,cap-htm=off,ic-mode=xive -snapshot \
-smp 1,threads=1 -bios ./slof.bin \
-L /home/aik/t/qemu-ppc64-bios/ \
-trace events=qemu_trace_events -d guest_errors \
-chardev socket,id=SOCKET0,server,nowait,path=qemu.mon.user2223 \
-mon chardev=SOCKET0,mode=control


The host kernel is v5.2-rc2. The next patch - 0c575703e487 "spapr/xive:
add hcall support when under KVM" - fixes this though but the question
is now if xive emulation in qemu still works (how do I verify it?).


Any clues? Thanks,


> ---
>  hw/intc/Makefile.objs       |   1 +
>  hw/intc/spapr_xive.c        |  48 +++++++-
>  hw/intc/spapr_xive_kvm.c    | 237 ++++++++++++++++++++++++++++++++++++
>  hw/intc/xive.c              |  21 +++-
>  hw/ppc/Kconfig              |   5 +
>  hw/ppc/spapr_irq.c          |   6 +-
>  include/hw/ppc/spapr_xive.h |  10 ++
>  include/hw/ppc/xive.h       |  13 ++
>  target/ppc/kvm.c            |   7 ++
>  target/ppc/kvm_ppc.h        |   6 +
>  10 files changed, 344 insertions(+), 10 deletions(-)
>  create mode 100644 hw/intc/spapr_xive_kvm.c
> 
> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
> index df712c3e6c..03019b9a03 100644
> --- a/hw/intc/Makefile.objs
> +++ b/hw/intc/Makefile.objs
> @@ -39,6 +39,7 @@ obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
>  obj-$(CONFIG_XIVE) += xive.o
>  obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o
> +obj-$(CONFIG_XIVE_KVM) += spapr_xive_kvm.o
>  obj-$(CONFIG_POWERNV) += xics_pnv.o pnv_xive.o
>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
>  obj-$(CONFIG_S390_FLIC) += s390_flic.o
> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> index 62e13ac353..27632683e6 100644
> --- a/hw/intc/spapr_xive.c
> +++ b/hw/intc/spapr_xive.c
> @@ -174,7 +174,7 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
>      }
>  }
>  
> -static void spapr_xive_map_mmio(SpaprXive *xive)
> +void spapr_xive_map_mmio(SpaprXive *xive)
>  {
>      sysbus_mmio_map(SYS_BUS_DEVICE(xive), 0, xive->vc_base);
>      sysbus_mmio_map(SYS_BUS_DEVICE(xive), 1, xive->end_base);
> @@ -251,6 +251,9 @@ static void spapr_xive_instance_init(Object *obj)
>      object_initialize_child(obj, "end_source", &xive->end_source,
>                              sizeof(xive->end_source), TYPE_XIVE_END_SOURCE,
>                              &error_abort, NULL);
> +
> +    /* Not connected to the KVM XIVE device */
> +    xive->fd = -1;
>  }
>  
>  static void spapr_xive_realize(DeviceState *dev, Error **errp)
> @@ -259,6 +262,7 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp)
>      XiveSource *xsrc = &xive->source;
>      XiveENDSource *end_xsrc = &xive->end_source;
>      Error *local_err = NULL;
> +    MachineState *machine = MACHINE(qdev_get_machine());
>  
>      if (!xive->nr_irqs) {
>          error_setg(errp, "Number of interrupt needs to be greater 0");
> @@ -305,6 +309,32 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp)
>      xive->eat = g_new0(XiveEAS, xive->nr_irqs);
>      xive->endt = g_new0(XiveEND, xive->nr_ends);
>  
> +    xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
> +                           xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
> +
> +    qemu_register_reset(spapr_xive_reset, dev);
> +
> +    if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) {
> +        kvmppc_xive_connect(xive, &local_err);
> +        if (local_err && machine_kernel_irqchip_required(machine)) {
> +            error_prepend(&local_err,
> +                          "kernel_irqchip requested but unavailable: ");
> +            error_propagate(errp, local_err);
> +            return;
> +        }
> +
> +        if (!local_err) {
> +            return;
> +        }
> +
> +        /*
> +         * We failed to initialize the XIVE KVM device, fallback to
> +         * emulated mode
> +         */
> +        error_prepend(&local_err, "kernel_irqchip allowed but unavailable: ");
> +        warn_report_err(local_err);
> +    }
> +
>      /* TIMA initialization */
>      memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &xive_tm_ops, xive,
>                            "xive.tima", 4ull << TM_SHIFT);
> @@ -316,11 +346,6 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp)
>  
>      /* Map all regions */
>      spapr_xive_map_mmio(xive);
> -
> -    xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
> -                           xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
> -
> -    qemu_register_reset(spapr_xive_reset, dev);
>  }
>  
>  static int spapr_xive_get_eas(XiveRouter *xrtr, uint8_t eas_blk,
> @@ -495,6 +520,17 @@ bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi)
>      if (lsi) {
>          xive_source_irq_set_lsi(xsrc, lisn);
>      }
> +
> +    if (kvm_irqchip_in_kernel()) {
> +        Error *local_err = NULL;
> +
> +        kvmppc_xive_source_reset_one(xsrc, lisn, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            return false;
> +        }
> +    }
> +
>      return true;
>  }
>  
> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> new file mode 100644
> index 0000000000..7d9e771e8a
> --- /dev/null
> +++ b/hw/intc/spapr_xive_kvm.c
> @@ -0,0 +1,237 @@
> +/*
> + * QEMU PowerPC sPAPR XIVE interrupt controller model
> + *
> + * Copyright (c) 2017-2019, IBM Corporation.
> + *
> + * This code is licensed under the GPL version 2 or later. See the
> + * COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/log.h"
> +#include "qemu/error-report.h"
> +#include "qapi/error.h"
> +#include "target/ppc/cpu.h"
> +#include "sysemu/cpus.h"
> +#include "sysemu/kvm.h"
> +#include "hw/ppc/spapr.h"
> +#include "hw/ppc/spapr_xive.h"
> +#include "hw/ppc/xive.h"
> +#include "kvm_ppc.h"
> +
> +#include <sys/ioctl.h>
> +
> +/*
> + * Helpers for CPU hotplug
> + *
> + * TODO: make a common KVMEnabledCPU layer for XICS and XIVE
> + */
> +typedef struct KVMEnabledCPU {
> +    unsigned long vcpu_id;
> +    QLIST_ENTRY(KVMEnabledCPU) node;
> +} KVMEnabledCPU;
> +
> +static QLIST_HEAD(, KVMEnabledCPU)
> +    kvm_enabled_cpus = QLIST_HEAD_INITIALIZER(&kvm_enabled_cpus);
> +
> +static bool kvm_cpu_is_enabled(CPUState *cs)
> +{
> +    KVMEnabledCPU *enabled_cpu;
> +    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
> +
> +    QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) {
> +        if (enabled_cpu->vcpu_id == vcpu_id) {
> +            return true;
> +        }
> +    }
> +    return false;
> +}
> +
> +static void kvm_cpu_enable(CPUState *cs)
> +{
> +    KVMEnabledCPU *enabled_cpu;
> +    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
> +
> +    enabled_cpu = g_malloc(sizeof(*enabled_cpu));
> +    enabled_cpu->vcpu_id = vcpu_id;
> +    QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node);
> +}
> +
> +/*
> + * XIVE Thread Interrupt Management context (KVM)
> + */
> +
> +void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
> +{
> +    SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive;
> +    unsigned long vcpu_id;
> +    int ret;
> +
> +    /* Check if CPU was hot unplugged and replugged. */
> +    if (kvm_cpu_is_enabled(tctx->cs)) {
> +        return;
> +    }
> +
> +    vcpu_id = kvm_arch_vcpu_id(tctx->cs);
> +
> +    ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd,
> +                              vcpu_id, 0);
> +    if (ret < 0) {
> +        error_setg(errp, "XIVE: unable to connect CPU%ld to KVM device: %s",
> +                   vcpu_id, strerror(errno));
> +        return;
> +    }
> +
> +    kvm_cpu_enable(tctx->cs);
> +}
> +
> +/*
> + * XIVE Interrupt Source (KVM)
> + */
> +
> +/*
> + * At reset, the interrupt sources are simply created and MASKED. We
> + * only need to inform the KVM XIVE device about their type: LSI or
> + * MSI.
> + */
> +void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
> +{
> +    SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
> +    uint64_t state = 0;
> +
> +    if (xive_source_irq_is_lsi(xsrc, srcno)) {
> +        state |= KVM_XIVE_LEVEL_SENSITIVE;
> +        if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
> +            state |= KVM_XIVE_LEVEL_ASSERTED;
> +        }
> +    }
> +
> +    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, srcno, &state,
> +                      true, errp);
> +}
> +
> +void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
> +{
> +    int i;
> +
> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> +        Error *local_err = NULL;
> +
> +        kvmppc_xive_source_reset_one(xsrc, i, &local_err);
> +        if (local_err) {
> +            error_propagate(errp, local_err);
> +            return;
> +        }
> +    }
> +}
> +
> +void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
> +{
> +    XiveSource *xsrc = opaque;
> +    struct kvm_irq_level args;
> +    int rc;
> +
> +    args.irq = srcno;
> +    if (!xive_source_irq_is_lsi(xsrc, srcno)) {
> +        if (!val) {
> +            return;
> +        }
> +        args.level = KVM_INTERRUPT_SET;
> +    } else {
> +        if (val) {
> +            xsrc->status[srcno] |= XIVE_STATUS_ASSERTED;
> +            args.level = KVM_INTERRUPT_SET_LEVEL;
> +        } else {
> +            xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED;
> +            args.level = KVM_INTERRUPT_UNSET;
> +        }
> +    }
> +    rc = kvm_vm_ioctl(kvm_state, KVM_IRQ_LINE, &args);
> +    if (rc < 0) {
> +        error_report("XIVE: kvm_irq_line() failed : %s", strerror(errno));
> +    }
> +}
> +
> +/*
> + * sPAPR XIVE interrupt controller (KVM)
> + */
> +
> +static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len,
> +                              Error **errp)
> +{
> +    void *addr;
> +    uint32_t page_shift = 16; /* TODO: fix page_shift */
> +
> +    addr = mmap(NULL, len, PROT_WRITE | PROT_READ, MAP_SHARED, xive->fd,
> +                pgoff << page_shift);
> +    if (addr == MAP_FAILED) {
> +        error_setg_errno(errp, errno, "XIVE: unable to set memory mapping");
> +        return NULL;
> +    }
> +
> +    return addr;
> +}
> +
> +/*
> + * All the XIVE memory regions are now backed by mappings from the KVM
> + * XIVE device.
> + */
> +void kvmppc_xive_connect(SpaprXive *xive, Error **errp)
> +{
> +    XiveSource *xsrc = &xive->source;
> +    XiveENDSource *end_xsrc = &xive->end_source;
> +    Error *local_err = NULL;
> +    size_t esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
> +    size_t tima_len = 4ull << TM_SHIFT;
> +
> +    if (!kvmppc_has_cap_xive()) {
> +        error_setg(errp, "IRQ_XIVE capability must be present for KVM");
> +        return;
> +    }
> +
> +    /* First, create the KVM XIVE device */
> +    xive->fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false);
> +    if (xive->fd < 0) {
> +        error_setg_errno(errp, -xive->fd, "XIVE: error creating KVM device");
> +        return;
> +    }
> +
> +    /*
> +     * 1. Source ESB pages - KVM mapping
> +     */
> +    xsrc->esb_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len,
> +                                      &local_err);
> +    if (local_err) {
> +        error_propagate(errp, local_err);
> +        return;
> +    }
> +
> +    memory_region_init_ram_device_ptr(&xsrc->esb_mmio, OBJECT(xsrc),
> +                                      "xive.esb", esb_len, xsrc->esb_mmap);
> +    sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xsrc->esb_mmio);
> +
> +    /*
> +     * 2. END ESB pages (No KVM support yet)
> +     */
> +    sysbus_init_mmio(SYS_BUS_DEVICE(xive), &end_xsrc->esb_mmio);
> +
> +    /*
> +     * 3. TIMA pages - KVM mapping
> +     */
> +    xive->tm_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len,
> +                                     &local_err);
> +    if (local_err) {
> +        error_propagate(errp, local_err);
> +        return;
> +    }
> +    memory_region_init_ram_device_ptr(&xive->tm_mmio, OBJECT(xive),
> +                                      "xive.tima", tima_len, xive->tm_mmap);
> +    sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xive->tm_mmio);
> +
> +    kvm_kernel_irqchip = true;
> +    kvm_msi_via_irqfd_allowed = true;
> +    kvm_gsi_direct_mapping = true;
> +
> +    /* Map all regions */
> +    spapr_xive_map_mmio(xive);
> +}
> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> index dcf2fcd108..78047adb11 100644
> --- a/hw/intc/xive.c
> +++ b/hw/intc/xive.c
> @@ -555,6 +555,15 @@ static void xive_tctx_realize(DeviceState *dev, Error **errp)
>          return;
>      }
>  
> +    /* Connect the presenter to the VCPU (required for CPU hotplug) */
> +    if (kvm_irqchip_in_kernel()) {
> +        kvmppc_xive_cpu_connect(tctx, &local_err);
> +        if (local_err) {
> +            error_propagate(errp, local_err);
> +            return;
> +        }
> +    }
> +
>      qemu_register_reset(xive_tctx_reset, dev);
>  }
>  
> @@ -957,6 +966,10 @@ static void xive_source_reset(void *dev)
>  
>      /* PQs are initialized to 0b01 (Q=1) which corresponds to "ints off" */
>      memset(xsrc->status, XIVE_ESB_OFF, xsrc->nr_irqs);
> +
> +    if (kvm_irqchip_in_kernel()) {
> +        kvmppc_xive_source_reset(xsrc, &error_fatal);
> +    }
>  }
>  
>  static void xive_source_realize(DeviceState *dev, Error **errp)
> @@ -990,9 +1003,11 @@ static void xive_source_realize(DeviceState *dev, Error **errp)
>      xsrc->status = g_malloc0(xsrc->nr_irqs);
>      xsrc->lsi_map = bitmap_new(xsrc->nr_irqs);
>  
> -    memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
> -                          &xive_source_esb_ops, xsrc, "xive.esb",
> -                          (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
> +    if (!kvm_irqchip_in_kernel()) {
> +        memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
> +                              &xive_source_esb_ops, xsrc, "xive.esb",
> +                              (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
> +    }
>  
>      qemu_register_reset(xive_source_reset, dev);
>  }
> diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
> index a3465155f0..f927ec9c74 100644
> --- a/hw/ppc/Kconfig
> +++ b/hw/ppc/Kconfig
> @@ -122,3 +122,8 @@ config XIVE_SPAPR
>      default y
>      depends on PSERIES
>      select XIVE
> +
> +config XIVE_KVM
> +    bool
> +    default y
> +    depends on XIVE_SPAPR && KVM
> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> index b1f79ea9de..5c4a44855d 100644
> --- a/hw/ppc/spapr_irq.c
> +++ b/hw/ppc/spapr_irq.c
> @@ -372,7 +372,11 @@ static void spapr_irq_set_irq_xive(void *opaque, int srcno, int val)
>  {
>      SpaprMachineState *spapr = opaque;
>  
> -    xive_source_set_irq(&spapr->xive->source, srcno, val);
> +    if (kvm_irqchip_in_kernel()) {
> +        kvmppc_xive_source_set_irq(&spapr->xive->source, srcno, val);
> +    } else {
> +        xive_source_set_irq(&spapr->xive->source, srcno, val);
> +    }
>  }
>  
>  static const char *spapr_irq_get_nodename_xive(SpaprMachineState *spapr)
> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> index fc3e9652f9..0edcc762de 100644
> --- a/include/hw/ppc/spapr_xive.h
> +++ b/include/hw/ppc/spapr_xive.h
> @@ -38,6 +38,10 @@ typedef struct SpaprXive {
>      /* TIMA mapping address */
>      hwaddr        tm_base;
>      MemoryRegion  tm_mmio;
> +
> +    /* KVM support */
> +    int           fd;
> +    void          *tm_mmap;
>  } SpaprXive;
>  
>  bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi);
> @@ -49,5 +53,11 @@ void spapr_dt_xive(SpaprMachineState *spapr, uint32_t nr_servers, void *fdt,
>                     uint32_t phandle);
>  void spapr_xive_set_tctx_os_cam(XiveTCTX *tctx);
>  void spapr_xive_mmio_set_enabled(SpaprXive *xive, bool enable);
> +void spapr_xive_map_mmio(SpaprXive *xive);
> +
> +/*
> + * KVM XIVE device helpers
> + */
> +void kvmppc_xive_connect(SpaprXive *xive, Error **errp);
>  
>  #endif /* PPC_SPAPR_XIVE_H */
> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> index c4f27742ca..dd115da30e 100644
> --- a/include/hw/ppc/xive.h
> +++ b/include/hw/ppc/xive.h
> @@ -140,6 +140,7 @@
>  #ifndef PPC_XIVE_H
>  #define PPC_XIVE_H
>  
> +#include "sysemu/kvm.h"
>  #include "hw/qdev-core.h"
>  #include "hw/sysbus.h"
>  #include "hw/ppc/xive_regs.h"
> @@ -194,6 +195,9 @@ typedef struct XiveSource {
>      uint32_t        esb_shift;
>      MemoryRegion    esb_mmio;
>  
> +    /* KVM support */
> +    void            *esb_mmap;
> +
>      XiveNotifier    *xive;
>  } XiveSource;
>  
> @@ -423,4 +427,13 @@ static inline uint32_t xive_nvt_cam_line(uint8_t nvt_blk, uint32_t nvt_idx)
>      return (nvt_blk << 19) | nvt_idx;
>  }
>  
> +/*
> + * KVM XIVE device helpers
> + */
> +
> +void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
> +void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp);
> +void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
> +void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
> +
>  #endif /* PPC_XIVE_H */
> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> index 1a9caf8f40..3bf0a46c33 100644
> --- a/target/ppc/kvm.c
> +++ b/target/ppc/kvm.c
> @@ -75,6 +75,7 @@ static int cap_fixup_hcalls;
>  static int cap_htm;             /* Hardware transactional memory support */
>  static int cap_mmu_radix;
>  static int cap_mmu_hash_v3;
> +static int cap_xive;
>  static int cap_resize_hpt;
>  static int cap_ppc_pvr_compat;
>  static int cap_ppc_safe_cache;
> @@ -146,6 +147,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>      cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
>      cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
>      cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
> +    cap_xive = kvm_vm_check_extension(s, KVM_CAP_PPC_IRQ_XIVE);
>      cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
>      kvmppc_get_cpu_characteristics(s);
>      cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
> @@ -2478,6 +2480,11 @@ static int parse_cap_ppc_count_cache_flush_assist(struct kvm_ppc_cpu_char c)
>      return 0;
>  }
>  
> +bool kvmppc_has_cap_xive(void)
> +{
> +    return cap_xive;
> +}
> +
>  static void kvmppc_get_cpu_characteristics(KVMState *s)
>  {
>      struct kvm_ppc_cpu_char c;
> diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
> index 22385134b4..45776cad79 100644
> --- a/target/ppc/kvm_ppc.h
> +++ b/target/ppc/kvm_ppc.h
> @@ -60,6 +60,7 @@ bool kvmppc_has_cap_fixup_hcalls(void);
>  bool kvmppc_has_cap_htm(void);
>  bool kvmppc_has_cap_mmu_radix(void);
>  bool kvmppc_has_cap_mmu_hash_v3(void);
> +bool kvmppc_has_cap_xive(void);
>  int kvmppc_get_cap_safe_cache(void);
>  int kvmppc_get_cap_safe_bounds_check(void);
>  int kvmppc_get_cap_safe_indirect_branch(void);
> @@ -316,6 +317,11 @@ static inline bool kvmppc_has_cap_mmu_hash_v3(void)
>      return false;
>  }
>  
> +static inline bool kvmppc_has_cap_xive(void)
> +{
> +    return false;
> +}
> +
>  static inline int kvmppc_get_cap_safe_cache(void)
>  {
>      return 0;
>
Cédric Le Goater June 4, 2019, 7:54 a.m. UTC | #2
On 04/06/2019 09:23, Alexey Kardashevskiy wrote:
> 
> 
> On 29/05/2019 16:49, David Gibson wrote:
>> From: Cédric Le Goater <clg@kaod.org>
>>
>> This introduces a set of helpers when KVM is in use, which create the
>> KVM XIVE device, initialize the interrupt sources at a KVM level and
>> connect the interrupt presenters to the vCPU.
>>
>> They also handle the initialization of the TIMA and the source ESB
>> memory regions of the controller. These have a different type under
>> KVM. They are 'ram device' memory mappings, similarly to VFIO, exposed
>> to the guest and the associated VMAs on the host are populated
>> dynamically with the appropriate pages using a fault handler.
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
>> Message-Id: <20190513084245.25755-3-clg@kaod.org>
>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> 
> 
> This one breaks my setup - it boots up to:
> 
> 
> ipr: IBM Power RAID SCSI Device Driver version: 2.6.4 (March 14, 2017)
> __vio_register_driver: driver ibmvscsi registering
> ibmvscsi 71000001: SRP_VERSION: 16.a
> ibmvscsi 71000001: Maximum ID: 64 Maximum LUN: 32 Maximum Channel: 3
> scsi host0: IBM POWER Virtual SCSI Adapter 1.5.9
> 
> 
> and hangs. Here is the command line:
> 
> 
> /home/aik/pbuild/qemu-aikrhel74alt-ppc64/ppc64-softmmu/qemu-system-ppc64 \
> -nodefaults \
> -chardev stdio,id=STDIO0,signal=off,mux=on \
> -device spapr-vty,id=svty0,reg=0x71000110,chardev=STDIO0 \
> -mon id=MON0,chardev=STDIO0,mode=readline -nographic -vga none \
> -enable-kvm \
> -device nec-usb-xhci,id=nec-usb-xhci0 -m 16G \
> -netdev "user,id=USER0,hostfwd=tcp::2223-:22" \
> -device "virtio-net-pci,id=vnet0,mac=C0:41:49:4b:00:00,netdev=USER0" \
> img/u1804-64G-cuda10.1-418.67-swiotlb.qcow2 \
> -machine pseries,cap-cfpc=broken,cap-htm=off,ic-mode=xive -snapshot \
> -smp 1,threads=1 -bios ./slof.bin \
> -L /home/aik/t/qemu-ppc64-bios/ \
> -trace events=qemu_trace_events -d guest_errors \
> -chardev socket,id=SOCKET0,server,nowait,path=qemu.mon.user2223 \
> -mon chardev=SOCKET0,mode=control

At this level of patch 38afd772f802 ("spapr/xive: add KVM support"), I am
surprised this is even starting. 

The test in spapr_irq_init_xive() : 

    /* KVM XIVE device not yet available */
    if (kvm_enabled()) {
        if (machine_kernel_irqchip_required(machine)) {
            error_setg(errp, "kernel_irqchip requested. no KVM XIVE support");
            return;
        }
    }

should fail. This is removed later in 0dc9f5f8496a ("spapr/xive: activate 
KVM support")

> The host kernel is v5.2-rc2. The next patch - 0c575703e487 "spapr/xive:
> add hcall support when under KVM" - fixes this though but the question
> is now if xive emulation in qemu still works (how do I verify it?).

kernel_irqchip=off should activate the QEMU XIVE device.

Are you testing bisection ?

C.

> 
> Any clues? Thanks,
> 
> 
>> ---
>>  hw/intc/Makefile.objs       |   1 +
>>  hw/intc/spapr_xive.c        |  48 +++++++-
>>  hw/intc/spapr_xive_kvm.c    | 237 ++++++++++++++++++++++++++++++++++++
>>  hw/intc/xive.c              |  21 +++-
>>  hw/ppc/Kconfig              |   5 +
>>  hw/ppc/spapr_irq.c          |   6 +-
>>  include/hw/ppc/spapr_xive.h |  10 ++
>>  include/hw/ppc/xive.h       |  13 ++
>>  target/ppc/kvm.c            |   7 ++
>>  target/ppc/kvm_ppc.h        |   6 +
>>  10 files changed, 344 insertions(+), 10 deletions(-)
>>  create mode 100644 hw/intc/spapr_xive_kvm.c
>>
>> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
>> index df712c3e6c..03019b9a03 100644
>> --- a/hw/intc/Makefile.objs
>> +++ b/hw/intc/Makefile.objs
>> @@ -39,6 +39,7 @@ obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
>>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
>>  obj-$(CONFIG_XIVE) += xive.o
>>  obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o
>> +obj-$(CONFIG_XIVE_KVM) += spapr_xive_kvm.o
>>  obj-$(CONFIG_POWERNV) += xics_pnv.o pnv_xive.o
>>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
>>  obj-$(CONFIG_S390_FLIC) += s390_flic.o
>> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
>> index 62e13ac353..27632683e6 100644
>> --- a/hw/intc/spapr_xive.c
>> +++ b/hw/intc/spapr_xive.c
>> @@ -174,7 +174,7 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
>>      }
>>  }
>>  
>> -static void spapr_xive_map_mmio(SpaprXive *xive)
>> +void spapr_xive_map_mmio(SpaprXive *xive)
>>  {
>>      sysbus_mmio_map(SYS_BUS_DEVICE(xive), 0, xive->vc_base);
>>      sysbus_mmio_map(SYS_BUS_DEVICE(xive), 1, xive->end_base);
>> @@ -251,6 +251,9 @@ static void spapr_xive_instance_init(Object *obj)
>>      object_initialize_child(obj, "end_source", &xive->end_source,
>>                              sizeof(xive->end_source), TYPE_XIVE_END_SOURCE,
>>                              &error_abort, NULL);
>> +
>> +    /* Not connected to the KVM XIVE device */
>> +    xive->fd = -1;
>>  }
>>  
>>  static void spapr_xive_realize(DeviceState *dev, Error **errp)
>> @@ -259,6 +262,7 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp)
>>      XiveSource *xsrc = &xive->source;
>>      XiveENDSource *end_xsrc = &xive->end_source;
>>      Error *local_err = NULL;
>> +    MachineState *machine = MACHINE(qdev_get_machine());
>>  
>>      if (!xive->nr_irqs) {
>>          error_setg(errp, "Number of interrupt needs to be greater 0");
>> @@ -305,6 +309,32 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp)
>>      xive->eat = g_new0(XiveEAS, xive->nr_irqs);
>>      xive->endt = g_new0(XiveEND, xive->nr_ends);
>>  
>> +    xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
>> +                           xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
>> +
>> +    qemu_register_reset(spapr_xive_reset, dev);
>> +
>> +    if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) {
>> +        kvmppc_xive_connect(xive, &local_err);
>> +        if (local_err && machine_kernel_irqchip_required(machine)) {
>> +            error_prepend(&local_err,
>> +                          "kernel_irqchip requested but unavailable: ");
>> +            error_propagate(errp, local_err);
>> +            return;
>> +        }
>> +
>> +        if (!local_err) {
>> +            return;
>> +        }
>> +
>> +        /*
>> +         * We failed to initialize the XIVE KVM device, fallback to
>> +         * emulated mode
>> +         */
>> +        error_prepend(&local_err, "kernel_irqchip allowed but unavailable: ");
>> +        warn_report_err(local_err);
>> +    }
>> +
>>      /* TIMA initialization */
>>      memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &xive_tm_ops, xive,
>>                            "xive.tima", 4ull << TM_SHIFT);
>> @@ -316,11 +346,6 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp)
>>  
>>      /* Map all regions */
>>      spapr_xive_map_mmio(xive);
>> -
>> -    xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
>> -                           xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
>> -
>> -    qemu_register_reset(spapr_xive_reset, dev);
>>  }
>>  
>>  static int spapr_xive_get_eas(XiveRouter *xrtr, uint8_t eas_blk,
>> @@ -495,6 +520,17 @@ bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi)
>>      if (lsi) {
>>          xive_source_irq_set_lsi(xsrc, lisn);
>>      }
>> +
>> +    if (kvm_irqchip_in_kernel()) {
>> +        Error *local_err = NULL;
>> +
>> +        kvmppc_xive_source_reset_one(xsrc, lisn, &local_err);
>> +        if (local_err) {
>> +            error_report_err(local_err);
>> +            return false;
>> +        }
>> +    }
>> +
>>      return true;
>>  }
>>  
>> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
>> new file mode 100644
>> index 0000000000..7d9e771e8a
>> --- /dev/null
>> +++ b/hw/intc/spapr_xive_kvm.c
>> @@ -0,0 +1,237 @@
>> +/*
>> + * QEMU PowerPC sPAPR XIVE interrupt controller model
>> + *
>> + * Copyright (c) 2017-2019, IBM Corporation.
>> + *
>> + * This code is licensed under the GPL version 2 or later. See the
>> + * COPYING file in the top-level directory.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qemu/log.h"
>> +#include "qemu/error-report.h"
>> +#include "qapi/error.h"
>> +#include "target/ppc/cpu.h"
>> +#include "sysemu/cpus.h"
>> +#include "sysemu/kvm.h"
>> +#include "hw/ppc/spapr.h"
>> +#include "hw/ppc/spapr_xive.h"
>> +#include "hw/ppc/xive.h"
>> +#include "kvm_ppc.h"
>> +
>> +#include <sys/ioctl.h>
>> +
>> +/*
>> + * Helpers for CPU hotplug
>> + *
>> + * TODO: make a common KVMEnabledCPU layer for XICS and XIVE
>> + */
>> +typedef struct KVMEnabledCPU {
>> +    unsigned long vcpu_id;
>> +    QLIST_ENTRY(KVMEnabledCPU) node;
>> +} KVMEnabledCPU;
>> +
>> +static QLIST_HEAD(, KVMEnabledCPU)
>> +    kvm_enabled_cpus = QLIST_HEAD_INITIALIZER(&kvm_enabled_cpus);
>> +
>> +static bool kvm_cpu_is_enabled(CPUState *cs)
>> +{
>> +    KVMEnabledCPU *enabled_cpu;
>> +    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
>> +
>> +    QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) {
>> +        if (enabled_cpu->vcpu_id == vcpu_id) {
>> +            return true;
>> +        }
>> +    }
>> +    return false;
>> +}
>> +
>> +static void kvm_cpu_enable(CPUState *cs)
>> +{
>> +    KVMEnabledCPU *enabled_cpu;
>> +    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
>> +
>> +    enabled_cpu = g_malloc(sizeof(*enabled_cpu));
>> +    enabled_cpu->vcpu_id = vcpu_id;
>> +    QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node);
>> +}
>> +
>> +/*
>> + * XIVE Thread Interrupt Management context (KVM)
>> + */
>> +
>> +void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
>> +{
>> +    SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive;
>> +    unsigned long vcpu_id;
>> +    int ret;
>> +
>> +    /* Check if CPU was hot unplugged and replugged. */
>> +    if (kvm_cpu_is_enabled(tctx->cs)) {
>> +        return;
>> +    }
>> +
>> +    vcpu_id = kvm_arch_vcpu_id(tctx->cs);
>> +
>> +    ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd,
>> +                              vcpu_id, 0);
>> +    if (ret < 0) {
>> +        error_setg(errp, "XIVE: unable to connect CPU%ld to KVM device: %s",
>> +                   vcpu_id, strerror(errno));
>> +        return;
>> +    }
>> +
>> +    kvm_cpu_enable(tctx->cs);
>> +}
>> +
>> +/*
>> + * XIVE Interrupt Source (KVM)
>> + */
>> +
>> +/*
>> + * At reset, the interrupt sources are simply created and MASKED. We
>> + * only need to inform the KVM XIVE device about their type: LSI or
>> + * MSI.
>> + */
>> +void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
>> +{
>> +    SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
>> +    uint64_t state = 0;
>> +
>> +    if (xive_source_irq_is_lsi(xsrc, srcno)) {
>> +        state |= KVM_XIVE_LEVEL_SENSITIVE;
>> +        if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
>> +            state |= KVM_XIVE_LEVEL_ASSERTED;
>> +        }
>> +    }
>> +
>> +    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, srcno, &state,
>> +                      true, errp);
>> +}
>> +
>> +void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
>> +{
>> +    int i;
>> +
>> +    for (i = 0; i < xsrc->nr_irqs; i++) {
>> +        Error *local_err = NULL;
>> +
>> +        kvmppc_xive_source_reset_one(xsrc, i, &local_err);
>> +        if (local_err) {
>> +            error_propagate(errp, local_err);
>> +            return;
>> +        }
>> +    }
>> +}
>> +
>> +void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
>> +{
>> +    XiveSource *xsrc = opaque;
>> +    struct kvm_irq_level args;
>> +    int rc;
>> +
>> +    args.irq = srcno;
>> +    if (!xive_source_irq_is_lsi(xsrc, srcno)) {
>> +        if (!val) {
>> +            return;
>> +        }
>> +        args.level = KVM_INTERRUPT_SET;
>> +    } else {
>> +        if (val) {
>> +            xsrc->status[srcno] |= XIVE_STATUS_ASSERTED;
>> +            args.level = KVM_INTERRUPT_SET_LEVEL;
>> +        } else {
>> +            xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED;
>> +            args.level = KVM_INTERRUPT_UNSET;
>> +        }
>> +    }
>> +    rc = kvm_vm_ioctl(kvm_state, KVM_IRQ_LINE, &args);
>> +    if (rc < 0) {
>> +        error_report("XIVE: kvm_irq_line() failed : %s", strerror(errno));
>> +    }
>> +}
>> +
>> +/*
>> + * sPAPR XIVE interrupt controller (KVM)
>> + */
>> +
>> +static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len,
>> +                              Error **errp)
>> +{
>> +    void *addr;
>> +    uint32_t page_shift = 16; /* TODO: fix page_shift */
>> +
>> +    addr = mmap(NULL, len, PROT_WRITE | PROT_READ, MAP_SHARED, xive->fd,
>> +                pgoff << page_shift);
>> +    if (addr == MAP_FAILED) {
>> +        error_setg_errno(errp, errno, "XIVE: unable to set memory mapping");
>> +        return NULL;
>> +    }
>> +
>> +    return addr;
>> +}
>> +
>> +/*
>> + * All the XIVE memory regions are now backed by mappings from the KVM
>> + * XIVE device.
>> + */
>> +void kvmppc_xive_connect(SpaprXive *xive, Error **errp)
>> +{
>> +    XiveSource *xsrc = &xive->source;
>> +    XiveENDSource *end_xsrc = &xive->end_source;
>> +    Error *local_err = NULL;
>> +    size_t esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
>> +    size_t tima_len = 4ull << TM_SHIFT;
>> +
>> +    if (!kvmppc_has_cap_xive()) {
>> +        error_setg(errp, "IRQ_XIVE capability must be present for KVM");
>> +        return;
>> +    }
>> +
>> +    /* First, create the KVM XIVE device */
>> +    xive->fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false);
>> +    if (xive->fd < 0) {
>> +        error_setg_errno(errp, -xive->fd, "XIVE: error creating KVM device");
>> +        return;
>> +    }
>> +
>> +    /*
>> +     * 1. Source ESB pages - KVM mapping
>> +     */
>> +    xsrc->esb_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len,
>> +                                      &local_err);
>> +    if (local_err) {
>> +        error_propagate(errp, local_err);
>> +        return;
>> +    }
>> +
>> +    memory_region_init_ram_device_ptr(&xsrc->esb_mmio, OBJECT(xsrc),
>> +                                      "xive.esb", esb_len, xsrc->esb_mmap);
>> +    sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xsrc->esb_mmio);
>> +
>> +    /*
>> +     * 2. END ESB pages (No KVM support yet)
>> +     */
>> +    sysbus_init_mmio(SYS_BUS_DEVICE(xive), &end_xsrc->esb_mmio);
>> +
>> +    /*
>> +     * 3. TIMA pages - KVM mapping
>> +     */
>> +    xive->tm_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len,
>> +                                     &local_err);
>> +    if (local_err) {
>> +        error_propagate(errp, local_err);
>> +        return;
>> +    }
>> +    memory_region_init_ram_device_ptr(&xive->tm_mmio, OBJECT(xive),
>> +                                      "xive.tima", tima_len, xive->tm_mmap);
>> +    sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xive->tm_mmio);
>> +
>> +    kvm_kernel_irqchip = true;
>> +    kvm_msi_via_irqfd_allowed = true;
>> +    kvm_gsi_direct_mapping = true;
>> +
>> +    /* Map all regions */
>> +    spapr_xive_map_mmio(xive);
>> +}
>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>> index dcf2fcd108..78047adb11 100644
>> --- a/hw/intc/xive.c
>> +++ b/hw/intc/xive.c
>> @@ -555,6 +555,15 @@ static void xive_tctx_realize(DeviceState *dev, Error **errp)
>>          return;
>>      }
>>  
>> +    /* Connect the presenter to the VCPU (required for CPU hotplug) */
>> +    if (kvm_irqchip_in_kernel()) {
>> +        kvmppc_xive_cpu_connect(tctx, &local_err);
>> +        if (local_err) {
>> +            error_propagate(errp, local_err);
>> +            return;
>> +        }
>> +    }
>> +
>>      qemu_register_reset(xive_tctx_reset, dev);
>>  }
>>  
>> @@ -957,6 +966,10 @@ static void xive_source_reset(void *dev)
>>  
>>      /* PQs are initialized to 0b01 (Q=1) which corresponds to "ints off" */
>>      memset(xsrc->status, XIVE_ESB_OFF, xsrc->nr_irqs);
>> +
>> +    if (kvm_irqchip_in_kernel()) {
>> +        kvmppc_xive_source_reset(xsrc, &error_fatal);
>> +    }
>>  }
>>  
>>  static void xive_source_realize(DeviceState *dev, Error **errp)
>> @@ -990,9 +1003,11 @@ static void xive_source_realize(DeviceState *dev, Error **errp)
>>      xsrc->status = g_malloc0(xsrc->nr_irqs);
>>      xsrc->lsi_map = bitmap_new(xsrc->nr_irqs);
>>  
>> -    memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
>> -                          &xive_source_esb_ops, xsrc, "xive.esb",
>> -                          (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
>> +    if (!kvm_irqchip_in_kernel()) {
>> +        memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
>> +                              &xive_source_esb_ops, xsrc, "xive.esb",
>> +                              (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
>> +    }
>>  
>>      qemu_register_reset(xive_source_reset, dev);
>>  }
>> diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
>> index a3465155f0..f927ec9c74 100644
>> --- a/hw/ppc/Kconfig
>> +++ b/hw/ppc/Kconfig
>> @@ -122,3 +122,8 @@ config XIVE_SPAPR
>>      default y
>>      depends on PSERIES
>>      select XIVE
>> +
>> +config XIVE_KVM
>> +    bool
>> +    default y
>> +    depends on XIVE_SPAPR && KVM
>> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
>> index b1f79ea9de..5c4a44855d 100644
>> --- a/hw/ppc/spapr_irq.c
>> +++ b/hw/ppc/spapr_irq.c
>> @@ -372,7 +372,11 @@ static void spapr_irq_set_irq_xive(void *opaque, int srcno, int val)
>>  {
>>      SpaprMachineState *spapr = opaque;
>>  
>> -    xive_source_set_irq(&spapr->xive->source, srcno, val);
>> +    if (kvm_irqchip_in_kernel()) {
>> +        kvmppc_xive_source_set_irq(&spapr->xive->source, srcno, val);
>> +    } else {
>> +        xive_source_set_irq(&spapr->xive->source, srcno, val);
>> +    }
>>  }
>>  
>>  static const char *spapr_irq_get_nodename_xive(SpaprMachineState *spapr)
>> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
>> index fc3e9652f9..0edcc762de 100644
>> --- a/include/hw/ppc/spapr_xive.h
>> +++ b/include/hw/ppc/spapr_xive.h
>> @@ -38,6 +38,10 @@ typedef struct SpaprXive {
>>      /* TIMA mapping address */
>>      hwaddr        tm_base;
>>      MemoryRegion  tm_mmio;
>> +
>> +    /* KVM support */
>> +    int           fd;
>> +    void          *tm_mmap;
>>  } SpaprXive;
>>  
>>  bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi);
>> @@ -49,5 +53,11 @@ void spapr_dt_xive(SpaprMachineState *spapr, uint32_t nr_servers, void *fdt,
>>                     uint32_t phandle);
>>  void spapr_xive_set_tctx_os_cam(XiveTCTX *tctx);
>>  void spapr_xive_mmio_set_enabled(SpaprXive *xive, bool enable);
>> +void spapr_xive_map_mmio(SpaprXive *xive);
>> +
>> +/*
>> + * KVM XIVE device helpers
>> + */
>> +void kvmppc_xive_connect(SpaprXive *xive, Error **errp);
>>  
>>  #endif /* PPC_SPAPR_XIVE_H */
>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>> index c4f27742ca..dd115da30e 100644
>> --- a/include/hw/ppc/xive.h
>> +++ b/include/hw/ppc/xive.h
>> @@ -140,6 +140,7 @@
>>  #ifndef PPC_XIVE_H
>>  #define PPC_XIVE_H
>>  
>> +#include "sysemu/kvm.h"
>>  #include "hw/qdev-core.h"
>>  #include "hw/sysbus.h"
>>  #include "hw/ppc/xive_regs.h"
>> @@ -194,6 +195,9 @@ typedef struct XiveSource {
>>      uint32_t        esb_shift;
>>      MemoryRegion    esb_mmio;
>>  
>> +    /* KVM support */
>> +    void            *esb_mmap;
>> +
>>      XiveNotifier    *xive;
>>  } XiveSource;
>>  
>> @@ -423,4 +427,13 @@ static inline uint32_t xive_nvt_cam_line(uint8_t nvt_blk, uint32_t nvt_idx)
>>      return (nvt_blk << 19) | nvt_idx;
>>  }
>>  
>> +/*
>> + * KVM XIVE device helpers
>> + */
>> +
>> +void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
>> +void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp);
>> +void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
>> +void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
>> +
>>  #endif /* PPC_XIVE_H */
>> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
>> index 1a9caf8f40..3bf0a46c33 100644
>> --- a/target/ppc/kvm.c
>> +++ b/target/ppc/kvm.c
>> @@ -75,6 +75,7 @@ static int cap_fixup_hcalls;
>>  static int cap_htm;             /* Hardware transactional memory support */
>>  static int cap_mmu_radix;
>>  static int cap_mmu_hash_v3;
>> +static int cap_xive;
>>  static int cap_resize_hpt;
>>  static int cap_ppc_pvr_compat;
>>  static int cap_ppc_safe_cache;
>> @@ -146,6 +147,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>>      cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
>>      cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
>>      cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
>> +    cap_xive = kvm_vm_check_extension(s, KVM_CAP_PPC_IRQ_XIVE);
>>      cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
>>      kvmppc_get_cpu_characteristics(s);
>>      cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
>> @@ -2478,6 +2480,11 @@ static int parse_cap_ppc_count_cache_flush_assist(struct kvm_ppc_cpu_char c)
>>      return 0;
>>  }
>>  
>> +bool kvmppc_has_cap_xive(void)
>> +{
>> +    return cap_xive;
>> +}
>> +
>>  static void kvmppc_get_cpu_characteristics(KVMState *s)
>>  {
>>      struct kvm_ppc_cpu_char c;
>> diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
>> index 22385134b4..45776cad79 100644
>> --- a/target/ppc/kvm_ppc.h
>> +++ b/target/ppc/kvm_ppc.h
>> @@ -60,6 +60,7 @@ bool kvmppc_has_cap_fixup_hcalls(void);
>>  bool kvmppc_has_cap_htm(void);
>>  bool kvmppc_has_cap_mmu_radix(void);
>>  bool kvmppc_has_cap_mmu_hash_v3(void);
>> +bool kvmppc_has_cap_xive(void);
>>  int kvmppc_get_cap_safe_cache(void);
>>  int kvmppc_get_cap_safe_bounds_check(void);
>>  int kvmppc_get_cap_safe_indirect_branch(void);
>> @@ -316,6 +317,11 @@ static inline bool kvmppc_has_cap_mmu_hash_v3(void)
>>      return false;
>>  }
>>  
>> +static inline bool kvmppc_has_cap_xive(void)
>> +{
>> +    return false;
>> +}
>> +
>>  static inline int kvmppc_get_cap_safe_cache(void)
>>  {
>>      return 0;
>>
>
Greg Kurz June 4, 2019, 8:05 a.m. UTC | #3
On Tue, 4 Jun 2019 09:54:59 +0200
Cédric Le Goater <clg@kaod.org> wrote:

> On 04/06/2019 09:23, Alexey Kardashevskiy wrote:
> > 
> > 
> > On 29/05/2019 16:49, David Gibson wrote:  
> >> From: Cédric Le Goater <clg@kaod.org>
> >>
> >> This introduces a set of helpers when KVM is in use, which create the
> >> KVM XIVE device, initialize the interrupt sources at a KVM level and
> >> connect the interrupt presenters to the vCPU.
> >>
> >> They also handle the initialization of the TIMA and the source ESB
> >> memory regions of the controller. These have a different type under
> >> KVM. They are 'ram device' memory mappings, similarly to VFIO, exposed
> >> to the guest and the associated VMAs on the host are populated
> >> dynamically with the appropriate pages using a fault handler.
> >>
> >> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> >> Message-Id: <20190513084245.25755-3-clg@kaod.org>
> >> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>  
> > 
> > 
> > This one breaks my setup - it boots up to:
> > 
> > 
> > ipr: IBM Power RAID SCSI Device Driver version: 2.6.4 (March 14, 2017)
> > __vio_register_driver: driver ibmvscsi registering
> > ibmvscsi 71000001: SRP_VERSION: 16.a
> > ibmvscsi 71000001: Maximum ID: 64 Maximum LUN: 32 Maximum Channel: 3
> > scsi host0: IBM POWER Virtual SCSI Adapter 1.5.9
> > 
> > 
> > and hangs. Here is the command line:
> > 
> > 
> > /home/aik/pbuild/qemu-aikrhel74alt-ppc64/ppc64-softmmu/qemu-system-ppc64 \
> > -nodefaults \
> > -chardev stdio,id=STDIO0,signal=off,mux=on \
> > -device spapr-vty,id=svty0,reg=0x71000110,chardev=STDIO0 \
> > -mon id=MON0,chardev=STDIO0,mode=readline -nographic -vga none \
> > -enable-kvm \
> > -device nec-usb-xhci,id=nec-usb-xhci0 -m 16G \
> > -netdev "user,id=USER0,hostfwd=tcp::2223-:22" \
> > -device "virtio-net-pci,id=vnet0,mac=C0:41:49:4b:00:00,netdev=USER0" \
> > img/u1804-64G-cuda10.1-418.67-swiotlb.qcow2 \
> > -machine pseries,cap-cfpc=broken,cap-htm=off,ic-mode=xive -snapshot \
> > -smp 1,threads=1 -bios ./slof.bin \
> > -L /home/aik/t/qemu-ppc64-bios/ \
> > -trace events=qemu_trace_events -d guest_errors \
> > -chardev socket,id=SOCKET0,server,nowait,path=qemu.mon.user2223 \
> > -mon chardev=SOCKET0,mode=control  
> 
> At this level of patch 38afd772f802 ("spapr/xive: add KVM support"), I am
> surprised this is even starting. 
> 
> The test in spapr_irq_init_xive() : 
> 
>     /* KVM XIVE device not yet available */
>     if (kvm_enabled()) {
>         if (machine_kernel_irqchip_required(machine)) {

The problem is that machine_kernel_irqchip_required(machine) returns false
if kernel_irqchip wasn't specified on the command line, which is the case
here. Maybe machine_kernel_irqchip_allowed() would have make more sense,
but...

>             error_setg(errp, "kernel_irqchip requested. no KVM XIVE support");
>             return;
>         }
>     }
> 
> should fail. This is removed later in 0dc9f5f8496a ("spapr/xive: activate 
> KVM support")
> 

... do we really care since this code gets removed later ?

> > The host kernel is v5.2-rc2. The next patch - 0c575703e487 "spapr/xive:
> > add hcall support when under KVM" - fixes this though but the question
> > is now if xive emulation in qemu still works (how do I verify it?).  
> 
> kernel_irqchip=off should activate the QEMU XIVE device.
> 
> Are you testing bisection ?
> 
> C.
> 
> > 
> > Any clues? Thanks,
> > 
> >   
> >> ---
> >>  hw/intc/Makefile.objs       |   1 +
> >>  hw/intc/spapr_xive.c        |  48 +++++++-
> >>  hw/intc/spapr_xive_kvm.c    | 237 ++++++++++++++++++++++++++++++++++++
> >>  hw/intc/xive.c              |  21 +++-
> >>  hw/ppc/Kconfig              |   5 +
> >>  hw/ppc/spapr_irq.c          |   6 +-
> >>  include/hw/ppc/spapr_xive.h |  10 ++
> >>  include/hw/ppc/xive.h       |  13 ++
> >>  target/ppc/kvm.c            |   7 ++
> >>  target/ppc/kvm_ppc.h        |   6 +
> >>  10 files changed, 344 insertions(+), 10 deletions(-)
> >>  create mode 100644 hw/intc/spapr_xive_kvm.c
> >>
> >> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
> >> index df712c3e6c..03019b9a03 100644
> >> --- a/hw/intc/Makefile.objs
> >> +++ b/hw/intc/Makefile.objs
> >> @@ -39,6 +39,7 @@ obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
> >>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
> >>  obj-$(CONFIG_XIVE) += xive.o
> >>  obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o
> >> +obj-$(CONFIG_XIVE_KVM) += spapr_xive_kvm.o
> >>  obj-$(CONFIG_POWERNV) += xics_pnv.o pnv_xive.o
> >>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
> >>  obj-$(CONFIG_S390_FLIC) += s390_flic.o
> >> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> >> index 62e13ac353..27632683e6 100644
> >> --- a/hw/intc/spapr_xive.c
> >> +++ b/hw/intc/spapr_xive.c
> >> @@ -174,7 +174,7 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
> >>      }
> >>  }
> >>  
> >> -static void spapr_xive_map_mmio(SpaprXive *xive)
> >> +void spapr_xive_map_mmio(SpaprXive *xive)
> >>  {
> >>      sysbus_mmio_map(SYS_BUS_DEVICE(xive), 0, xive->vc_base);
> >>      sysbus_mmio_map(SYS_BUS_DEVICE(xive), 1, xive->end_base);
> >> @@ -251,6 +251,9 @@ static void spapr_xive_instance_init(Object *obj)
> >>      object_initialize_child(obj, "end_source", &xive->end_source,
> >>                              sizeof(xive->end_source), TYPE_XIVE_END_SOURCE,
> >>                              &error_abort, NULL);
> >> +
> >> +    /* Not connected to the KVM XIVE device */
> >> +    xive->fd = -1;
> >>  }
> >>  
> >>  static void spapr_xive_realize(DeviceState *dev, Error **errp)
> >> @@ -259,6 +262,7 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp)
> >>      XiveSource *xsrc = &xive->source;
> >>      XiveENDSource *end_xsrc = &xive->end_source;
> >>      Error *local_err = NULL;
> >> +    MachineState *machine = MACHINE(qdev_get_machine());
> >>  
> >>      if (!xive->nr_irqs) {
> >>          error_setg(errp, "Number of interrupt needs to be greater 0");
> >> @@ -305,6 +309,32 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp)
> >>      xive->eat = g_new0(XiveEAS, xive->nr_irqs);
> >>      xive->endt = g_new0(XiveEND, xive->nr_ends);
> >>  
> >> +    xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
> >> +                           xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
> >> +
> >> +    qemu_register_reset(spapr_xive_reset, dev);
> >> +
> >> +    if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) {
> >> +        kvmppc_xive_connect(xive, &local_err);
> >> +        if (local_err && machine_kernel_irqchip_required(machine)) {
> >> +            error_prepend(&local_err,
> >> +                          "kernel_irqchip requested but unavailable: ");
> >> +            error_propagate(errp, local_err);
> >> +            return;
> >> +        }
> >> +
> >> +        if (!local_err) {
> >> +            return;
> >> +        }
> >> +
> >> +        /*
> >> +         * We failed to initialize the XIVE KVM device, fallback to
> >> +         * emulated mode
> >> +         */
> >> +        error_prepend(&local_err, "kernel_irqchip allowed but unavailable: ");
> >> +        warn_report_err(local_err);
> >> +    }
> >> +
> >>      /* TIMA initialization */
> >>      memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &xive_tm_ops, xive,
> >>                            "xive.tima", 4ull << TM_SHIFT);
> >> @@ -316,11 +346,6 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp)
> >>  
> >>      /* Map all regions */
> >>      spapr_xive_map_mmio(xive);
> >> -
> >> -    xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
> >> -                           xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
> >> -
> >> -    qemu_register_reset(spapr_xive_reset, dev);
> >>  }
> >>  
> >>  static int spapr_xive_get_eas(XiveRouter *xrtr, uint8_t eas_blk,
> >> @@ -495,6 +520,17 @@ bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi)
> >>      if (lsi) {
> >>          xive_source_irq_set_lsi(xsrc, lisn);
> >>      }
> >> +
> >> +    if (kvm_irqchip_in_kernel()) {
> >> +        Error *local_err = NULL;
> >> +
> >> +        kvmppc_xive_source_reset_one(xsrc, lisn, &local_err);
> >> +        if (local_err) {
> >> +            error_report_err(local_err);
> >> +            return false;
> >> +        }
> >> +    }
> >> +
> >>      return true;
> >>  }
> >>  
> >> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> >> new file mode 100644
> >> index 0000000000..7d9e771e8a
> >> --- /dev/null
> >> +++ b/hw/intc/spapr_xive_kvm.c
> >> @@ -0,0 +1,237 @@
> >> +/*
> >> + * QEMU PowerPC sPAPR XIVE interrupt controller model
> >> + *
> >> + * Copyright (c) 2017-2019, IBM Corporation.
> >> + *
> >> + * This code is licensed under the GPL version 2 or later. See the
> >> + * COPYING file in the top-level directory.
> >> + */
> >> +
> >> +#include "qemu/osdep.h"
> >> +#include "qemu/log.h"
> >> +#include "qemu/error-report.h"
> >> +#include "qapi/error.h"
> >> +#include "target/ppc/cpu.h"
> >> +#include "sysemu/cpus.h"
> >> +#include "sysemu/kvm.h"
> >> +#include "hw/ppc/spapr.h"
> >> +#include "hw/ppc/spapr_xive.h"
> >> +#include "hw/ppc/xive.h"
> >> +#include "kvm_ppc.h"
> >> +
> >> +#include <sys/ioctl.h>
> >> +
> >> +/*
> >> + * Helpers for CPU hotplug
> >> + *
> >> + * TODO: make a common KVMEnabledCPU layer for XICS and XIVE
> >> + */
> >> +typedef struct KVMEnabledCPU {
> >> +    unsigned long vcpu_id;
> >> +    QLIST_ENTRY(KVMEnabledCPU) node;
> >> +} KVMEnabledCPU;
> >> +
> >> +static QLIST_HEAD(, KVMEnabledCPU)
> >> +    kvm_enabled_cpus = QLIST_HEAD_INITIALIZER(&kvm_enabled_cpus);
> >> +
> >> +static bool kvm_cpu_is_enabled(CPUState *cs)
> >> +{
> >> +    KVMEnabledCPU *enabled_cpu;
> >> +    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
> >> +
> >> +    QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) {
> >> +        if (enabled_cpu->vcpu_id == vcpu_id) {
> >> +            return true;
> >> +        }
> >> +    }
> >> +    return false;
> >> +}
> >> +
> >> +static void kvm_cpu_enable(CPUState *cs)
> >> +{
> >> +    KVMEnabledCPU *enabled_cpu;
> >> +    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
> >> +
> >> +    enabled_cpu = g_malloc(sizeof(*enabled_cpu));
> >> +    enabled_cpu->vcpu_id = vcpu_id;
> >> +    QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node);
> >> +}
> >> +
> >> +/*
> >> + * XIVE Thread Interrupt Management context (KVM)
> >> + */
> >> +
> >> +void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
> >> +{
> >> +    SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive;
> >> +    unsigned long vcpu_id;
> >> +    int ret;
> >> +
> >> +    /* Check if CPU was hot unplugged and replugged. */
> >> +    if (kvm_cpu_is_enabled(tctx->cs)) {
> >> +        return;
> >> +    }
> >> +
> >> +    vcpu_id = kvm_arch_vcpu_id(tctx->cs);
> >> +
> >> +    ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd,
> >> +                              vcpu_id, 0);
> >> +    if (ret < 0) {
> >> +        error_setg(errp, "XIVE: unable to connect CPU%ld to KVM device: %s",
> >> +                   vcpu_id, strerror(errno));
> >> +        return;
> >> +    }
> >> +
> >> +    kvm_cpu_enable(tctx->cs);
> >> +}
> >> +
> >> +/*
> >> + * XIVE Interrupt Source (KVM)
> >> + */
> >> +
> >> +/*
> >> + * At reset, the interrupt sources are simply created and MASKED. We
> >> + * only need to inform the KVM XIVE device about their type: LSI or
> >> + * MSI.
> >> + */
> >> +void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
> >> +{
> >> +    SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
> >> +    uint64_t state = 0;
> >> +
> >> +    if (xive_source_irq_is_lsi(xsrc, srcno)) {
> >> +        state |= KVM_XIVE_LEVEL_SENSITIVE;
> >> +        if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
> >> +            state |= KVM_XIVE_LEVEL_ASSERTED;
> >> +        }
> >> +    }
> >> +
> >> +    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, srcno, &state,
> >> +                      true, errp);
> >> +}
> >> +
> >> +void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
> >> +{
> >> +    int i;
> >> +
> >> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> >> +        Error *local_err = NULL;
> >> +
> >> +        kvmppc_xive_source_reset_one(xsrc, i, &local_err);
> >> +        if (local_err) {
> >> +            error_propagate(errp, local_err);
> >> +            return;
> >> +        }
> >> +    }
> >> +}
> >> +
> >> +void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
> >> +{
> >> +    XiveSource *xsrc = opaque;
> >> +    struct kvm_irq_level args;
> >> +    int rc;
> >> +
> >> +    args.irq = srcno;
> >> +    if (!xive_source_irq_is_lsi(xsrc, srcno)) {
> >> +        if (!val) {
> >> +            return;
> >> +        }
> >> +        args.level = KVM_INTERRUPT_SET;
> >> +    } else {
> >> +        if (val) {
> >> +            xsrc->status[srcno] |= XIVE_STATUS_ASSERTED;
> >> +            args.level = KVM_INTERRUPT_SET_LEVEL;
> >> +        } else {
> >> +            xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED;
> >> +            args.level = KVM_INTERRUPT_UNSET;
> >> +        }
> >> +    }
> >> +    rc = kvm_vm_ioctl(kvm_state, KVM_IRQ_LINE, &args);
> >> +    if (rc < 0) {
> >> +        error_report("XIVE: kvm_irq_line() failed : %s", strerror(errno));
> >> +    }
> >> +}
> >> +
> >> +/*
> >> + * sPAPR XIVE interrupt controller (KVM)
> >> + */
> >> +
> >> +static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len,
> >> +                              Error **errp)
> >> +{
> >> +    void *addr;
> >> +    uint32_t page_shift = 16; /* TODO: fix page_shift */
> >> +
> >> +    addr = mmap(NULL, len, PROT_WRITE | PROT_READ, MAP_SHARED, xive->fd,
> >> +                pgoff << page_shift);
> >> +    if (addr == MAP_FAILED) {
> >> +        error_setg_errno(errp, errno, "XIVE: unable to set memory mapping");
> >> +        return NULL;
> >> +    }
> >> +
> >> +    return addr;
> >> +}
> >> +
> >> +/*
> >> + * All the XIVE memory regions are now backed by mappings from the KVM
> >> + * XIVE device.
> >> + */
> >> +void kvmppc_xive_connect(SpaprXive *xive, Error **errp)
> >> +{
> >> +    XiveSource *xsrc = &xive->source;
> >> +    XiveENDSource *end_xsrc = &xive->end_source;
> >> +    Error *local_err = NULL;
> >> +    size_t esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
> >> +    size_t tima_len = 4ull << TM_SHIFT;
> >> +
> >> +    if (!kvmppc_has_cap_xive()) {
> >> +        error_setg(errp, "IRQ_XIVE capability must be present for KVM");
> >> +        return;
> >> +    }
> >> +
> >> +    /* First, create the KVM XIVE device */
> >> +    xive->fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false);
> >> +    if (xive->fd < 0) {
> >> +        error_setg_errno(errp, -xive->fd, "XIVE: error creating KVM device");
> >> +        return;
> >> +    }
> >> +
> >> +    /*
> >> +     * 1. Source ESB pages - KVM mapping
> >> +     */
> >> +    xsrc->esb_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len,
> >> +                                      &local_err);
> >> +    if (local_err) {
> >> +        error_propagate(errp, local_err);
> >> +        return;
> >> +    }
> >> +
> >> +    memory_region_init_ram_device_ptr(&xsrc->esb_mmio, OBJECT(xsrc),
> >> +                                      "xive.esb", esb_len, xsrc->esb_mmap);
> >> +    sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xsrc->esb_mmio);
> >> +
> >> +    /*
> >> +     * 2. END ESB pages (No KVM support yet)
> >> +     */
> >> +    sysbus_init_mmio(SYS_BUS_DEVICE(xive), &end_xsrc->esb_mmio);
> >> +
> >> +    /*
> >> +     * 3. TIMA pages - KVM mapping
> >> +     */
> >> +    xive->tm_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len,
> >> +                                     &local_err);
> >> +    if (local_err) {
> >> +        error_propagate(errp, local_err);
> >> +        return;
> >> +    }
> >> +    memory_region_init_ram_device_ptr(&xive->tm_mmio, OBJECT(xive),
> >> +                                      "xive.tima", tima_len, xive->tm_mmap);
> >> +    sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xive->tm_mmio);
> >> +
> >> +    kvm_kernel_irqchip = true;
> >> +    kvm_msi_via_irqfd_allowed = true;
> >> +    kvm_gsi_direct_mapping = true;
> >> +
> >> +    /* Map all regions */
> >> +    spapr_xive_map_mmio(xive);
> >> +}
> >> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> >> index dcf2fcd108..78047adb11 100644
> >> --- a/hw/intc/xive.c
> >> +++ b/hw/intc/xive.c
> >> @@ -555,6 +555,15 @@ static void xive_tctx_realize(DeviceState *dev, Error **errp)
> >>          return;
> >>      }
> >>  
> >> +    /* Connect the presenter to the VCPU (required for CPU hotplug) */
> >> +    if (kvm_irqchip_in_kernel()) {
> >> +        kvmppc_xive_cpu_connect(tctx, &local_err);
> >> +        if (local_err) {
> >> +            error_propagate(errp, local_err);
> >> +            return;
> >> +        }
> >> +    }
> >> +
> >>      qemu_register_reset(xive_tctx_reset, dev);
> >>  }
> >>  
> >> @@ -957,6 +966,10 @@ static void xive_source_reset(void *dev)
> >>  
> >>      /* PQs are initialized to 0b01 (Q=1) which corresponds to "ints off" */
> >>      memset(xsrc->status, XIVE_ESB_OFF, xsrc->nr_irqs);
> >> +
> >> +    if (kvm_irqchip_in_kernel()) {
> >> +        kvmppc_xive_source_reset(xsrc, &error_fatal);
> >> +    }
> >>  }
> >>  
> >>  static void xive_source_realize(DeviceState *dev, Error **errp)
> >> @@ -990,9 +1003,11 @@ static void xive_source_realize(DeviceState *dev, Error **errp)
> >>      xsrc->status = g_malloc0(xsrc->nr_irqs);
> >>      xsrc->lsi_map = bitmap_new(xsrc->nr_irqs);
> >>  
> >> -    memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
> >> -                          &xive_source_esb_ops, xsrc, "xive.esb",
> >> -                          (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
> >> +    if (!kvm_irqchip_in_kernel()) {
> >> +        memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
> >> +                              &xive_source_esb_ops, xsrc, "xive.esb",
> >> +                              (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
> >> +    }
> >>  
> >>      qemu_register_reset(xive_source_reset, dev);
> >>  }
> >> diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
> >> index a3465155f0..f927ec9c74 100644
> >> --- a/hw/ppc/Kconfig
> >> +++ b/hw/ppc/Kconfig
> >> @@ -122,3 +122,8 @@ config XIVE_SPAPR
> >>      default y
> >>      depends on PSERIES
> >>      select XIVE
> >> +
> >> +config XIVE_KVM
> >> +    bool
> >> +    default y
> >> +    depends on XIVE_SPAPR && KVM
> >> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> >> index b1f79ea9de..5c4a44855d 100644
> >> --- a/hw/ppc/spapr_irq.c
> >> +++ b/hw/ppc/spapr_irq.c
> >> @@ -372,7 +372,11 @@ static void spapr_irq_set_irq_xive(void *opaque, int srcno, int val)
> >>  {
> >>      SpaprMachineState *spapr = opaque;
> >>  
> >> -    xive_source_set_irq(&spapr->xive->source, srcno, val);
> >> +    if (kvm_irqchip_in_kernel()) {
> >> +        kvmppc_xive_source_set_irq(&spapr->xive->source, srcno, val);
> >> +    } else {
> >> +        xive_source_set_irq(&spapr->xive->source, srcno, val);
> >> +    }
> >>  }
> >>  
> >>  static const char *spapr_irq_get_nodename_xive(SpaprMachineState *spapr)
> >> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> >> index fc3e9652f9..0edcc762de 100644
> >> --- a/include/hw/ppc/spapr_xive.h
> >> +++ b/include/hw/ppc/spapr_xive.h
> >> @@ -38,6 +38,10 @@ typedef struct SpaprXive {
> >>      /* TIMA mapping address */
> >>      hwaddr        tm_base;
> >>      MemoryRegion  tm_mmio;
> >> +
> >> +    /* KVM support */
> >> +    int           fd;
> >> +    void          *tm_mmap;
> >>  } SpaprXive;
> >>  
> >>  bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi);
> >> @@ -49,5 +53,11 @@ void spapr_dt_xive(SpaprMachineState *spapr, uint32_t nr_servers, void *fdt,
> >>                     uint32_t phandle);
> >>  void spapr_xive_set_tctx_os_cam(XiveTCTX *tctx);
> >>  void spapr_xive_mmio_set_enabled(SpaprXive *xive, bool enable);
> >> +void spapr_xive_map_mmio(SpaprXive *xive);
> >> +
> >> +/*
> >> + * KVM XIVE device helpers
> >> + */
> >> +void kvmppc_xive_connect(SpaprXive *xive, Error **errp);
> >>  
> >>  #endif /* PPC_SPAPR_XIVE_H */
> >> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> >> index c4f27742ca..dd115da30e 100644
> >> --- a/include/hw/ppc/xive.h
> >> +++ b/include/hw/ppc/xive.h
> >> @@ -140,6 +140,7 @@
> >>  #ifndef PPC_XIVE_H
> >>  #define PPC_XIVE_H
> >>  
> >> +#include "sysemu/kvm.h"
> >>  #include "hw/qdev-core.h"
> >>  #include "hw/sysbus.h"
> >>  #include "hw/ppc/xive_regs.h"
> >> @@ -194,6 +195,9 @@ typedef struct XiveSource {
> >>      uint32_t        esb_shift;
> >>      MemoryRegion    esb_mmio;
> >>  
> >> +    /* KVM support */
> >> +    void            *esb_mmap;
> >> +
> >>      XiveNotifier    *xive;
> >>  } XiveSource;
> >>  
> >> @@ -423,4 +427,13 @@ static inline uint32_t xive_nvt_cam_line(uint8_t nvt_blk, uint32_t nvt_idx)
> >>      return (nvt_blk << 19) | nvt_idx;
> >>  }
> >>  
> >> +/*
> >> + * KVM XIVE device helpers
> >> + */
> >> +
> >> +void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
> >> +void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp);
> >> +void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
> >> +void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
> >> +
> >>  #endif /* PPC_XIVE_H */
> >> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> >> index 1a9caf8f40..3bf0a46c33 100644
> >> --- a/target/ppc/kvm.c
> >> +++ b/target/ppc/kvm.c
> >> @@ -75,6 +75,7 @@ static int cap_fixup_hcalls;
> >>  static int cap_htm;             /* Hardware transactional memory support */
> >>  static int cap_mmu_radix;
> >>  static int cap_mmu_hash_v3;
> >> +static int cap_xive;
> >>  static int cap_resize_hpt;
> >>  static int cap_ppc_pvr_compat;
> >>  static int cap_ppc_safe_cache;
> >> @@ -146,6 +147,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
> >>      cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
> >>      cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
> >>      cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
> >> +    cap_xive = kvm_vm_check_extension(s, KVM_CAP_PPC_IRQ_XIVE);
> >>      cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
> >>      kvmppc_get_cpu_characteristics(s);
> >>      cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
> >> @@ -2478,6 +2480,11 @@ static int parse_cap_ppc_count_cache_flush_assist(struct kvm_ppc_cpu_char c)
> >>      return 0;
> >>  }
> >>  
> >> +bool kvmppc_has_cap_xive(void)
> >> +{
> >> +    return cap_xive;
> >> +}
> >> +
> >>  static void kvmppc_get_cpu_characteristics(KVMState *s)
> >>  {
> >>      struct kvm_ppc_cpu_char c;
> >> diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
> >> index 22385134b4..45776cad79 100644
> >> --- a/target/ppc/kvm_ppc.h
> >> +++ b/target/ppc/kvm_ppc.h
> >> @@ -60,6 +60,7 @@ bool kvmppc_has_cap_fixup_hcalls(void);
> >>  bool kvmppc_has_cap_htm(void);
> >>  bool kvmppc_has_cap_mmu_radix(void);
> >>  bool kvmppc_has_cap_mmu_hash_v3(void);
> >> +bool kvmppc_has_cap_xive(void);
> >>  int kvmppc_get_cap_safe_cache(void);
> >>  int kvmppc_get_cap_safe_bounds_check(void);
> >>  int kvmppc_get_cap_safe_indirect_branch(void);
> >> @@ -316,6 +317,11 @@ static inline bool kvmppc_has_cap_mmu_hash_v3(void)
> >>      return false;
> >>  }
> >>  
> >> +static inline bool kvmppc_has_cap_xive(void)
> >> +{
> >> +    return false;
> >> +}
> >> +
> >>  static inline int kvmppc_get_cap_safe_cache(void)
> >>  {
> >>      return 0;
> >>  
> >   
>
Alexey Kardashevskiy June 5, 2019, 7:24 a.m. UTC | #4
On 04/06/2019 17:54, Cédric Le Goater wrote:
> On 04/06/2019 09:23, Alexey Kardashevskiy wrote:
>>
>>
>> On 29/05/2019 16:49, David Gibson wrote:
>>> From: Cédric Le Goater <clg@kaod.org>
>>>
>>> This introduces a set of helpers when KVM is in use, which create the
>>> KVM XIVE device, initialize the interrupt sources at a KVM level and
>>> connect the interrupt presenters to the vCPU.
>>>
>>> They also handle the initialization of the TIMA and the source ESB
>>> memory regions of the controller. These have a different type under
>>> KVM. They are 'ram device' memory mappings, similarly to VFIO, exposed
>>> to the guest and the associated VMAs on the host are populated
>>> dynamically with the appropriate pages using a fault handler.
>>>
>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
>>> Message-Id: <20190513084245.25755-3-clg@kaod.org>
>>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
>>
>>
>> This one breaks my setup - it boots up to:
>>
>>
>> ipr: IBM Power RAID SCSI Device Driver version: 2.6.4 (March 14, 2017)
>> __vio_register_driver: driver ibmvscsi registering
>> ibmvscsi 71000001: SRP_VERSION: 16.a
>> ibmvscsi 71000001: Maximum ID: 64 Maximum LUN: 32 Maximum Channel: 3
>> scsi host0: IBM POWER Virtual SCSI Adapter 1.5.9
>>
>>
>> and hangs. Here is the command line:
>>
>>
>> /home/aik/pbuild/qemu-aikrhel74alt-ppc64/ppc64-softmmu/qemu-system-ppc64 \
>> -nodefaults \
>> -chardev stdio,id=STDIO0,signal=off,mux=on \
>> -device spapr-vty,id=svty0,reg=0x71000110,chardev=STDIO0 \
>> -mon id=MON0,chardev=STDIO0,mode=readline -nographic -vga none \
>> -enable-kvm \
>> -device nec-usb-xhci,id=nec-usb-xhci0 -m 16G \
>> -netdev "user,id=USER0,hostfwd=tcp::2223-:22" \
>> -device "virtio-net-pci,id=vnet0,mac=C0:41:49:4b:00:00,netdev=USER0" \
>> img/u1804-64G-cuda10.1-418.67-swiotlb.qcow2 \
>> -machine pseries,cap-cfpc=broken,cap-htm=off,ic-mode=xive -snapshot \
>> -smp 1,threads=1 -bios ./slof.bin \
>> -L /home/aik/t/qemu-ppc64-bios/ \
>> -trace events=qemu_trace_events -d guest_errors \
>> -chardev socket,id=SOCKET0,server,nowait,path=qemu.mon.user2223 \
>> -mon chardev=SOCKET0,mode=control
> 
> At this level of patch 38afd772f802 ("spapr/xive: add KVM support"), I am
> surprised this is even starting. 
> 
> The test in spapr_irq_init_xive() : 
> 
>     /* KVM XIVE device not yet available */
>     if (kvm_enabled()) {
>         if (machine_kernel_irqchip_required(machine)) {
>             error_setg(errp, "kernel_irqchip requested. no KVM XIVE support");
>             return;
>         }
>     }
> 
> should fail. This is removed later in 0dc9f5f8496a ("spapr/xive: activate 
> KVM support")
> 
>> The host kernel is v5.2-rc2. The next patch - 0c575703e487 "spapr/xive:
>> add hcall support when under KVM" - fixes this though but the question
>> is now if xive emulation in qemu still works (how do I verify it?).
> 
> kernel_irqchip=off should activate the QEMU XIVE device.
> 
> Are you testing bisection ?


I was bisecting as I originally wanted to test the recent David's spapr
pci rework and things broke again, although differently, hence the noise
I made about xive.
diff mbox series

Patch

diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index df712c3e6c..03019b9a03 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -39,6 +39,7 @@  obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
 obj-$(CONFIG_XICS_KVM) += xics_kvm.o
 obj-$(CONFIG_XIVE) += xive.o
 obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o
+obj-$(CONFIG_XIVE_KVM) += spapr_xive_kvm.o
 obj-$(CONFIG_POWERNV) += xics_pnv.o pnv_xive.o
 obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
 obj-$(CONFIG_S390_FLIC) += s390_flic.o
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
index 62e13ac353..27632683e6 100644
--- a/hw/intc/spapr_xive.c
+++ b/hw/intc/spapr_xive.c
@@ -174,7 +174,7 @@  void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
     }
 }
 
-static void spapr_xive_map_mmio(SpaprXive *xive)
+void spapr_xive_map_mmio(SpaprXive *xive)
 {
     sysbus_mmio_map(SYS_BUS_DEVICE(xive), 0, xive->vc_base);
     sysbus_mmio_map(SYS_BUS_DEVICE(xive), 1, xive->end_base);
@@ -251,6 +251,9 @@  static void spapr_xive_instance_init(Object *obj)
     object_initialize_child(obj, "end_source", &xive->end_source,
                             sizeof(xive->end_source), TYPE_XIVE_END_SOURCE,
                             &error_abort, NULL);
+
+    /* Not connected to the KVM XIVE device */
+    xive->fd = -1;
 }
 
 static void spapr_xive_realize(DeviceState *dev, Error **errp)
@@ -259,6 +262,7 @@  static void spapr_xive_realize(DeviceState *dev, Error **errp)
     XiveSource *xsrc = &xive->source;
     XiveENDSource *end_xsrc = &xive->end_source;
     Error *local_err = NULL;
+    MachineState *machine = MACHINE(qdev_get_machine());
 
     if (!xive->nr_irqs) {
         error_setg(errp, "Number of interrupt needs to be greater 0");
@@ -305,6 +309,32 @@  static void spapr_xive_realize(DeviceState *dev, Error **errp)
     xive->eat = g_new0(XiveEAS, xive->nr_irqs);
     xive->endt = g_new0(XiveEND, xive->nr_ends);
 
+    xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
+                           xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
+
+    qemu_register_reset(spapr_xive_reset, dev);
+
+    if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) {
+        kvmppc_xive_connect(xive, &local_err);
+        if (local_err && machine_kernel_irqchip_required(machine)) {
+            error_prepend(&local_err,
+                          "kernel_irqchip requested but unavailable: ");
+            error_propagate(errp, local_err);
+            return;
+        }
+
+        if (!local_err) {
+            return;
+        }
+
+        /*
+         * We failed to initialize the XIVE KVM device, fallback to
+         * emulated mode
+         */
+        error_prepend(&local_err, "kernel_irqchip allowed but unavailable: ");
+        warn_report_err(local_err);
+    }
+
     /* TIMA initialization */
     memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &xive_tm_ops, xive,
                           "xive.tima", 4ull << TM_SHIFT);
@@ -316,11 +346,6 @@  static void spapr_xive_realize(DeviceState *dev, Error **errp)
 
     /* Map all regions */
     spapr_xive_map_mmio(xive);
-
-    xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
-                           xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
-
-    qemu_register_reset(spapr_xive_reset, dev);
 }
 
 static int spapr_xive_get_eas(XiveRouter *xrtr, uint8_t eas_blk,
@@ -495,6 +520,17 @@  bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi)
     if (lsi) {
         xive_source_irq_set_lsi(xsrc, lisn);
     }
+
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_source_reset_one(xsrc, lisn, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return false;
+        }
+    }
+
     return true;
 }
 
diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
new file mode 100644
index 0000000000..7d9e771e8a
--- /dev/null
+++ b/hw/intc/spapr_xive_kvm.c
@@ -0,0 +1,237 @@ 
+/*
+ * QEMU PowerPC sPAPR XIVE interrupt controller model
+ *
+ * Copyright (c) 2017-2019, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "target/ppc/cpu.h"
+#include "sysemu/cpus.h"
+#include "sysemu/kvm.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_xive.h"
+#include "hw/ppc/xive.h"
+#include "kvm_ppc.h"
+
+#include <sys/ioctl.h>
+
+/*
+ * Helpers for CPU hotplug
+ *
+ * TODO: make a common KVMEnabledCPU layer for XICS and XIVE
+ */
+typedef struct KVMEnabledCPU {
+    unsigned long vcpu_id;
+    QLIST_ENTRY(KVMEnabledCPU) node;
+} KVMEnabledCPU;
+
+static QLIST_HEAD(, KVMEnabledCPU)
+    kvm_enabled_cpus = QLIST_HEAD_INITIALIZER(&kvm_enabled_cpus);
+
+static bool kvm_cpu_is_enabled(CPUState *cs)
+{
+    KVMEnabledCPU *enabled_cpu;
+    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
+
+    QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) {
+        if (enabled_cpu->vcpu_id == vcpu_id) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static void kvm_cpu_enable(CPUState *cs)
+{
+    KVMEnabledCPU *enabled_cpu;
+    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
+
+    enabled_cpu = g_malloc(sizeof(*enabled_cpu));
+    enabled_cpu->vcpu_id = vcpu_id;
+    QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node);
+}
+
+/*
+ * XIVE Thread Interrupt Management context (KVM)
+ */
+
+void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
+{
+    SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive;
+    unsigned long vcpu_id;
+    int ret;
+
+    /* Check if CPU was hot unplugged and replugged. */
+    if (kvm_cpu_is_enabled(tctx->cs)) {
+        return;
+    }
+
+    vcpu_id = kvm_arch_vcpu_id(tctx->cs);
+
+    ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd,
+                              vcpu_id, 0);
+    if (ret < 0) {
+        error_setg(errp, "XIVE: unable to connect CPU%ld to KVM device: %s",
+                   vcpu_id, strerror(errno));
+        return;
+    }
+
+    kvm_cpu_enable(tctx->cs);
+}
+
+/*
+ * XIVE Interrupt Source (KVM)
+ */
+
+/*
+ * At reset, the interrupt sources are simply created and MASKED. We
+ * only need to inform the KVM XIVE device about their type: LSI or
+ * MSI.
+ */
+void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
+{
+    SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
+    uint64_t state = 0;
+
+    if (xive_source_irq_is_lsi(xsrc, srcno)) {
+        state |= KVM_XIVE_LEVEL_SENSITIVE;
+        if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
+            state |= KVM_XIVE_LEVEL_ASSERTED;
+        }
+    }
+
+    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, srcno, &state,
+                      true, errp);
+}
+
+void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
+{
+    int i;
+
+    for (i = 0; i < xsrc->nr_irqs; i++) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_source_reset_one(xsrc, i, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+}
+
+void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
+{
+    XiveSource *xsrc = opaque;
+    struct kvm_irq_level args;
+    int rc;
+
+    args.irq = srcno;
+    if (!xive_source_irq_is_lsi(xsrc, srcno)) {
+        if (!val) {
+            return;
+        }
+        args.level = KVM_INTERRUPT_SET;
+    } else {
+        if (val) {
+            xsrc->status[srcno] |= XIVE_STATUS_ASSERTED;
+            args.level = KVM_INTERRUPT_SET_LEVEL;
+        } else {
+            xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED;
+            args.level = KVM_INTERRUPT_UNSET;
+        }
+    }
+    rc = kvm_vm_ioctl(kvm_state, KVM_IRQ_LINE, &args);
+    if (rc < 0) {
+        error_report("XIVE: kvm_irq_line() failed : %s", strerror(errno));
+    }
+}
+
+/*
+ * sPAPR XIVE interrupt controller (KVM)
+ */
+
+static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len,
+                              Error **errp)
+{
+    void *addr;
+    uint32_t page_shift = 16; /* TODO: fix page_shift */
+
+    addr = mmap(NULL, len, PROT_WRITE | PROT_READ, MAP_SHARED, xive->fd,
+                pgoff << page_shift);
+    if (addr == MAP_FAILED) {
+        error_setg_errno(errp, errno, "XIVE: unable to set memory mapping");
+        return NULL;
+    }
+
+    return addr;
+}
+
+/*
+ * All the XIVE memory regions are now backed by mappings from the KVM
+ * XIVE device.
+ */
+void kvmppc_xive_connect(SpaprXive *xive, Error **errp)
+{
+    XiveSource *xsrc = &xive->source;
+    XiveENDSource *end_xsrc = &xive->end_source;
+    Error *local_err = NULL;
+    size_t esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
+    size_t tima_len = 4ull << TM_SHIFT;
+
+    if (!kvmppc_has_cap_xive()) {
+        error_setg(errp, "IRQ_XIVE capability must be present for KVM");
+        return;
+    }
+
+    /* First, create the KVM XIVE device */
+    xive->fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false);
+    if (xive->fd < 0) {
+        error_setg_errno(errp, -xive->fd, "XIVE: error creating KVM device");
+        return;
+    }
+
+    /*
+     * 1. Source ESB pages - KVM mapping
+     */
+    xsrc->esb_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len,
+                                      &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    memory_region_init_ram_device_ptr(&xsrc->esb_mmio, OBJECT(xsrc),
+                                      "xive.esb", esb_len, xsrc->esb_mmap);
+    sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xsrc->esb_mmio);
+
+    /*
+     * 2. END ESB pages (No KVM support yet)
+     */
+    sysbus_init_mmio(SYS_BUS_DEVICE(xive), &end_xsrc->esb_mmio);
+
+    /*
+     * 3. TIMA pages - KVM mapping
+     */
+    xive->tm_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len,
+                                     &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    memory_region_init_ram_device_ptr(&xive->tm_mmio, OBJECT(xive),
+                                      "xive.tima", tima_len, xive->tm_mmap);
+    sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xive->tm_mmio);
+
+    kvm_kernel_irqchip = true;
+    kvm_msi_via_irqfd_allowed = true;
+    kvm_gsi_direct_mapping = true;
+
+    /* Map all regions */
+    spapr_xive_map_mmio(xive);
+}
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index dcf2fcd108..78047adb11 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -555,6 +555,15 @@  static void xive_tctx_realize(DeviceState *dev, Error **errp)
         return;
     }
 
+    /* Connect the presenter to the VCPU (required for CPU hotplug) */
+    if (kvm_irqchip_in_kernel()) {
+        kvmppc_xive_cpu_connect(tctx, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+
     qemu_register_reset(xive_tctx_reset, dev);
 }
 
@@ -957,6 +966,10 @@  static void xive_source_reset(void *dev)
 
     /* PQs are initialized to 0b01 (Q=1) which corresponds to "ints off" */
     memset(xsrc->status, XIVE_ESB_OFF, xsrc->nr_irqs);
+
+    if (kvm_irqchip_in_kernel()) {
+        kvmppc_xive_source_reset(xsrc, &error_fatal);
+    }
 }
 
 static void xive_source_realize(DeviceState *dev, Error **errp)
@@ -990,9 +1003,11 @@  static void xive_source_realize(DeviceState *dev, Error **errp)
     xsrc->status = g_malloc0(xsrc->nr_irqs);
     xsrc->lsi_map = bitmap_new(xsrc->nr_irqs);
 
-    memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
-                          &xive_source_esb_ops, xsrc, "xive.esb",
-                          (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
+    if (!kvm_irqchip_in_kernel()) {
+        memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
+                              &xive_source_esb_ops, xsrc, "xive.esb",
+                              (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
+    }
 
     qemu_register_reset(xive_source_reset, dev);
 }
diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
index a3465155f0..f927ec9c74 100644
--- a/hw/ppc/Kconfig
+++ b/hw/ppc/Kconfig
@@ -122,3 +122,8 @@  config XIVE_SPAPR
     default y
     depends on PSERIES
     select XIVE
+
+config XIVE_KVM
+    bool
+    default y
+    depends on XIVE_SPAPR && KVM
diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
index b1f79ea9de..5c4a44855d 100644
--- a/hw/ppc/spapr_irq.c
+++ b/hw/ppc/spapr_irq.c
@@ -372,7 +372,11 @@  static void spapr_irq_set_irq_xive(void *opaque, int srcno, int val)
 {
     SpaprMachineState *spapr = opaque;
 
-    xive_source_set_irq(&spapr->xive->source, srcno, val);
+    if (kvm_irqchip_in_kernel()) {
+        kvmppc_xive_source_set_irq(&spapr->xive->source, srcno, val);
+    } else {
+        xive_source_set_irq(&spapr->xive->source, srcno, val);
+    }
 }
 
 static const char *spapr_irq_get_nodename_xive(SpaprMachineState *spapr)
diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
index fc3e9652f9..0edcc762de 100644
--- a/include/hw/ppc/spapr_xive.h
+++ b/include/hw/ppc/spapr_xive.h
@@ -38,6 +38,10 @@  typedef struct SpaprXive {
     /* TIMA mapping address */
     hwaddr        tm_base;
     MemoryRegion  tm_mmio;
+
+    /* KVM support */
+    int           fd;
+    void          *tm_mmap;
 } SpaprXive;
 
 bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi);
@@ -49,5 +53,11 @@  void spapr_dt_xive(SpaprMachineState *spapr, uint32_t nr_servers, void *fdt,
                    uint32_t phandle);
 void spapr_xive_set_tctx_os_cam(XiveTCTX *tctx);
 void spapr_xive_mmio_set_enabled(SpaprXive *xive, bool enable);
+void spapr_xive_map_mmio(SpaprXive *xive);
+
+/*
+ * KVM XIVE device helpers
+ */
+void kvmppc_xive_connect(SpaprXive *xive, Error **errp);
 
 #endif /* PPC_SPAPR_XIVE_H */
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index c4f27742ca..dd115da30e 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -140,6 +140,7 @@ 
 #ifndef PPC_XIVE_H
 #define PPC_XIVE_H
 
+#include "sysemu/kvm.h"
 #include "hw/qdev-core.h"
 #include "hw/sysbus.h"
 #include "hw/ppc/xive_regs.h"
@@ -194,6 +195,9 @@  typedef struct XiveSource {
     uint32_t        esb_shift;
     MemoryRegion    esb_mmio;
 
+    /* KVM support */
+    void            *esb_mmap;
+
     XiveNotifier    *xive;
 } XiveSource;
 
@@ -423,4 +427,13 @@  static inline uint32_t xive_nvt_cam_line(uint8_t nvt_blk, uint32_t nvt_idx)
     return (nvt_blk << 19) | nvt_idx;
 }
 
+/*
+ * KVM XIVE device helpers
+ */
+
+void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
+void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp);
+void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
+void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
+
 #endif /* PPC_XIVE_H */
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 1a9caf8f40..3bf0a46c33 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -75,6 +75,7 @@  static int cap_fixup_hcalls;
 static int cap_htm;             /* Hardware transactional memory support */
 static int cap_mmu_radix;
 static int cap_mmu_hash_v3;
+static int cap_xive;
 static int cap_resize_hpt;
 static int cap_ppc_pvr_compat;
 static int cap_ppc_safe_cache;
@@ -146,6 +147,7 @@  int kvm_arch_init(MachineState *ms, KVMState *s)
     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
+    cap_xive = kvm_vm_check_extension(s, KVM_CAP_PPC_IRQ_XIVE);
     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
     kvmppc_get_cpu_characteristics(s);
     cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
@@ -2478,6 +2480,11 @@  static int parse_cap_ppc_count_cache_flush_assist(struct kvm_ppc_cpu_char c)
     return 0;
 }
 
+bool kvmppc_has_cap_xive(void)
+{
+    return cap_xive;
+}
+
 static void kvmppc_get_cpu_characteristics(KVMState *s)
 {
     struct kvm_ppc_cpu_char c;
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 22385134b4..45776cad79 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -60,6 +60,7 @@  bool kvmppc_has_cap_fixup_hcalls(void);
 bool kvmppc_has_cap_htm(void);
 bool kvmppc_has_cap_mmu_radix(void);
 bool kvmppc_has_cap_mmu_hash_v3(void);
+bool kvmppc_has_cap_xive(void);
 int kvmppc_get_cap_safe_cache(void);
 int kvmppc_get_cap_safe_bounds_check(void);
 int kvmppc_get_cap_safe_indirect_branch(void);
@@ -316,6 +317,11 @@  static inline bool kvmppc_has_cap_mmu_hash_v3(void)
     return false;
 }
 
+static inline bool kvmppc_has_cap_xive(void)
+{
+    return false;
+}
+
 static inline int kvmppc_get_cap_safe_cache(void)
 {
     return 0;