Message ID | 20190222131322.26079-3-clg@kaod.org |
---|---|
State | New |
Headers | show |
Series | spapr: add KVM support to the XIVE interrupt mode | expand |
On Fri, Feb 22, 2019 at 02:13:11PM +0100, Cédric Le Goater wrote: > XIVE hcalls are all redirected to QEMU as none are on a fast path. > When necessary, QEMU invokes KVM through specific ioctls to perform > host operations. QEMU should have done the necessary checks before > calling KVM and, in case of failure, H_HARDWARE is simply returned. > > H_INT_ESB is a special case that could have been handled under KVM > but the impact on performance was low when under QEMU. Here are some > figures : > > kernel irqchip OFF ON > H_INT_ESB KVM QEMU > > rtl8139 (LSI ) 1.19 1.24 1.23 Gbits/sec > virtio 31.80 42.30 -- Gbits/sec > > Signed-off-by: Cédric Le Goater <clg@kaod.org> > --- > include/hw/ppc/spapr_xive.h | 15 +++ > hw/intc/spapr_xive.c | 87 +++++++++++++++-- > hw/intc/spapr_xive_kvm.c | 184 ++++++++++++++++++++++++++++++++++++ > 3 files changed, 278 insertions(+), 8 deletions(-) > > diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h > index ab6732b14a02..749c6cbc2c56 100644 > --- a/include/hw/ppc/spapr_xive.h > +++ b/include/hw/ppc/spapr_xive.h > @@ -55,9 +55,24 @@ void spapr_xive_set_tctx_os_cam(XiveTCTX *tctx); > void spapr_xive_mmio_set_enabled(sPAPRXive *xive, bool enable); > void spapr_xive_map_mmio(sPAPRXive *xive); > > +int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx, > + uint32_t *out_server, uint8_t *out_prio); > + > /* > * KVM XIVE device helpers > */ > void kvmppc_xive_connect(sPAPRXive *xive, Error **errp); > +void kvmppc_xive_reset(sPAPRXive *xive, Error **errp); > +void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas, > + Error **errp); > +void kvmppc_xive_sync_source(sPAPRXive *xive, uint32_t lisn, Error **errp); > +uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, > + uint64_t data, bool write); > +void kvmppc_xive_set_queue_config(sPAPRXive *xive, uint8_t end_blk, > + uint32_t end_idx, XiveEND *end, > + Error **errp); > +void kvmppc_xive_get_queue_config(sPAPRXive *xive, uint8_t end_blk, > + uint32_t end_idx, XiveEND *end, > + Error **errp); > > #endif /* PPC_SPAPR_XIVE_H */ > diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c > index c24d649e3668..3db24391e31c 100644 > --- a/hw/intc/spapr_xive.c > +++ b/hw/intc/spapr_xive.c > @@ -86,6 +86,19 @@ static int spapr_xive_target_to_nvt(uint32_t target, > * sPAPR END indexing uses a simple mapping of the CPU vcpu_id, 8 > * priorities per CPU > */ > +int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx, > + uint32_t *out_server, uint8_t *out_prio) > +{ Since you don't support irq blocks as yet, should this error out rather than ignoring if end_blk != 0? > + if (out_server) { > + *out_server = end_idx >> 3; > + } > + > + if (out_prio) { > + *out_prio = end_idx & 0x7; > + } > + return 0; > +} > + > static void spapr_xive_cpu_to_end(PowerPCCPU *cpu, uint8_t prio, > uint8_t *out_end_blk, uint32_t *out_end_idx) > { > @@ -792,6 +805,16 @@ static target_ulong h_int_set_source_config(PowerPCCPU *cpu, > new_eas.w = xive_set_field64(EAS_END_DATA, new_eas.w, eisn); > } > > + if (kvm_irqchip_in_kernel()) { > + Error *local_err = NULL; > + > + kvmppc_xive_set_source_config(xive, lisn, &new_eas, &local_err); > + if (local_err) { > + error_report_err(local_err); > + return H_HARDWARE; > + } > + } > + > out: > xive->eat[lisn] = new_eas; > return H_SUCCESS; > @@ -1097,6 +1120,16 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu, > */ > > out: > + if (kvm_irqchip_in_kernel()) { > + Error *local_err = NULL; > + > + kvmppc_xive_set_queue_config(xive, end_blk, end_idx, &end, &local_err); > + if (local_err) { > + error_report_err(local_err); > + return H_HARDWARE; > + } > + } > + > /* Update END */ > memcpy(&xive->endt[end_idx], &end, sizeof(XiveEND)); > return H_SUCCESS; > @@ -1189,6 +1222,16 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu, > args[2] = 0; > } > > + if (kvm_irqchip_in_kernel()) { > + Error *local_err = NULL; > + > + kvmppc_xive_get_queue_config(xive, end_blk, end_idx, end, &local_err); > + if (local_err) { > + error_report_err(local_err); > + return H_HARDWARE; > + } > + } > + > /* TODO: do we need any locking on the END ? */ > if (flags & SPAPR_XIVE_END_DEBUG) { > /* Load the event queue generation number into the return flags */ > @@ -1341,15 +1384,20 @@ static target_ulong h_int_esb(PowerPCCPU *cpu, > return H_P3; > } > > - mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset; > + if (kvm_irqchip_in_kernel()) { > + args[0] = kvmppc_xive_esb_rw(xsrc, lisn, offset, data, > + flags & SPAPR_XIVE_ESB_STORE); > + } else { > + mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset; > > - if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8, > - (flags & SPAPR_XIVE_ESB_STORE))) { > - qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%" > - HWADDR_PRIx "\n", mmio_addr); > - return H_HARDWARE; > + if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8, > + (flags & SPAPR_XIVE_ESB_STORE))) { > + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%" > + HWADDR_PRIx "\n", mmio_addr); > + return H_HARDWARE; > + } > + args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data; > } > - args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data; > return H_SUCCESS; > } > > @@ -1406,7 +1454,20 @@ static target_ulong h_int_sync(PowerPCCPU *cpu, > * This is not needed when running the emulation under QEMU > */ > > - /* This is not real hardware. Nothing to be done */ > + /* > + * This is not real hardware. Nothing to be done unless when > + * under KVM > + */ > + > + if (kvm_irqchip_in_kernel()) { > + Error *local_err = NULL; > + > + kvmppc_xive_sync_source(xive, lisn, &local_err); > + if (local_err) { > + error_report_err(local_err); > + return H_HARDWARE; > + } > + } > return H_SUCCESS; > } > > @@ -1441,6 +1502,16 @@ static target_ulong h_int_reset(PowerPCCPU *cpu, > } > > device_reset(DEVICE(xive)); > + > + if (kvm_irqchip_in_kernel()) { > + Error *local_err = NULL; > + > + kvmppc_xive_reset(xive, &local_err); > + if (local_err) { > + error_report_err(local_err); > + return H_HARDWARE; > + } > + } > return H_SUCCESS; > } > > diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c > index 623fbf74f23e..6b50451b4f85 100644 > --- a/hw/intc/spapr_xive_kvm.c > +++ b/hw/intc/spapr_xive_kvm.c > @@ -89,6 +89,52 @@ void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) > * XIVE Interrupt Source (KVM) > */ > > +void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas, > + Error **errp) > +{ > + uint32_t end_idx; > + uint32_t end_blk; > + uint32_t eisn; > + uint8_t priority; > + uint32_t server; > + uint64_t kvm_src; > + Error *local_err = NULL; > + > + /* > + * No need to set a MASKED source, this is the default state after > + * reset. I don't quite follow this comment, why is there no need to call a MASKED source? > + */ > + if (!xive_eas_is_valid(eas) || xive_eas_is_masked(eas)) { > + return; > + } > + > + end_idx = xive_get_field64(EAS_END_INDEX, eas->w); > + end_blk = xive_get_field64(EAS_END_BLOCK, eas->w); > + eisn = xive_get_field64(EAS_END_DATA, eas->w); > + > + spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); > + > + kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT & > + KVM_XIVE_SOURCE_PRIORITY_MASK; > + kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT & > + KVM_XIVE_SOURCE_SERVER_MASK; > + kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) & > + KVM_XIVE_SOURCE_EISN_MASK; > + > + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn, > + &kvm_src, true, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > +} > + > +void kvmppc_xive_sync_source(sPAPRXive *xive, uint32_t lisn, Error **errp) > +{ > + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_SYNC, lisn, > + NULL, true, errp); > +} > + > /* > * At reset, the interrupt sources are simply created and MASKED. We > * only need to inform the KVM XIVE device about their type: LSI or > @@ -125,6 +171,64 @@ void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp) > } > } > > +/* > + * This is used to perform the magic loads on the ESB pages, described > + * in xive.h. > + */ > +static uint64_t xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, > + uint64_t data, bool write) > +{ > + unsigned long addr = (unsigned long) xsrc->esb_mmap + > + xive_source_esb_mgmt(xsrc, srcno) + offset; Casting the esb_mmap into unsigned long then back to a pointer looks unnecessary. You should be able to do this with pointer arithmetic. > + if (write) { > + *((uint64_t *) addr) = data; > + return -1; > + } else { > + return *((uint64_t *) addr); > + } Since this is always dealing with 64-bit values, couldn't you put the byteswaps in here rather than in all the callers? > +} > + > +static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset) > +{ > + /* Prevent the compiler from optimizing away the load */ > + volatile uint64_t value = xive_esb_rw(xsrc, srcno, offset, 0, 0); Wouldn't the volatile magic be better inside xive_esb_rw()? > + return be64_to_cpu(value) & 0x3; > +} > + > +static void xive_esb_trigger(XiveSource *xsrc, int srcno) > +{ > + unsigned long addr = (unsigned long) xsrc->esb_mmap + > + xive_source_esb_page(xsrc, srcno); > + > + *((uint64_t *) addr) = 0x0; > +} Also.. aren't some of these register accesses likely to need memory barriers? > + > +uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, > + uint64_t data, bool write) > +{ > + if (write) { > + return xive_esb_rw(xsrc, srcno, offset, data, 1); > + } > + > + /* > + * Special Load EOI handling for LSI sources. Q bit is never set > + * and the interrupt should be re-triggered if the level is still > + * asserted. > + */ > + if (xive_source_irq_is_lsi(xsrc, srcno) && > + offset == XIVE_ESB_LOAD_EOI) { > + xive_esb_read(xsrc, srcno, XIVE_ESB_SET_PQ_00); > + if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { > + xive_esb_trigger(xsrc, srcno); > + } > + return 0; > + } else { > + return xive_esb_rw(xsrc, srcno, offset, 0, 0); > + } > +} > + > void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val) > { > XiveSource *xsrc = opaque; > @@ -155,6 +259,86 @@ void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val) > /* > * sPAPR XIVE interrupt controller (KVM) > */ > +void kvmppc_xive_get_queue_config(sPAPRXive *xive, uint8_t end_blk, > + uint32_t end_idx, XiveEND *end, > + Error **errp) > +{ > + struct kvm_ppc_xive_eq kvm_eq = { 0 }; > + uint64_t kvm_eq_idx; > + uint8_t priority; > + uint32_t server; > + Error *local_err = NULL; > + > + if (!xive_end_is_valid(end)) { This should set an error, shouldn't it? > + return; > + } > + > + /* Encode the tuple (server, prio) as a KVM EQ index */ > + spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); > + > + kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & > + KVM_XIVE_EQ_PRIORITY_MASK; > + kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & > + KVM_XIVE_EQ_SERVER_MASK; > + > + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, > + &kvm_eq, false, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > + > + /* > + * The EQ index and toggle bit are updated by HW. These are the > + * only fields we want to return. > + */ > + end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) | > + xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex); > +} > + > +void kvmppc_xive_set_queue_config(sPAPRXive *xive, uint8_t end_blk, > + uint32_t end_idx, XiveEND *end, > + Error **errp) > +{ > + struct kvm_ppc_xive_eq kvm_eq = { 0 }; > + uint64_t kvm_eq_idx; > + uint8_t priority; > + uint32_t server; > + Error *local_err = NULL; > + > + if (!xive_end_is_valid(end)) { > + return; > + } > + > + /* Build the KVM state from the local END structure */ > + kvm_eq.flags = KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY; > + kvm_eq.qsize = xive_get_field32(END_W0_QSIZE, end->w0) + 12; > + kvm_eq.qpage = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32 | > + be32_to_cpu(end->w3); > + kvm_eq.qtoggle = xive_get_field32(END_W1_GENERATION, end->w1); > + kvm_eq.qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1); > + > + /* Encode the tuple (server, prio) as a KVM EQ index */ > + spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); > + > + kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & > + KVM_XIVE_EQ_PRIORITY_MASK; > + kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & > + KVM_XIVE_EQ_SERVER_MASK; > + > + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, > + &kvm_eq, true, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > +} > + > +void kvmppc_xive_reset(sPAPRXive *xive, Error **errp) > +{ > + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, KVM_DEV_XIVE_RESET, > + NULL, true, errp); > +} > > static void *kvmppc_xive_mmap(sPAPRXive *xive, int pgoff, size_t len, > Error **errp)
On 2/26/19 12:22 AM, David Gibson wrote: > On Fri, Feb 22, 2019 at 02:13:11PM +0100, Cédric Le Goater wrote: >> XIVE hcalls are all redirected to QEMU as none are on a fast path. >> When necessary, QEMU invokes KVM through specific ioctls to perform >> host operations. QEMU should have done the necessary checks before >> calling KVM and, in case of failure, H_HARDWARE is simply returned. >> >> H_INT_ESB is a special case that could have been handled under KVM >> but the impact on performance was low when under QEMU. Here are some >> figures : >> >> kernel irqchip OFF ON >> H_INT_ESB KVM QEMU >> >> rtl8139 (LSI ) 1.19 1.24 1.23 Gbits/sec >> virtio 31.80 42.30 -- Gbits/sec >> >> Signed-off-by: Cédric Le Goater <clg@kaod.org> >> --- >> include/hw/ppc/spapr_xive.h | 15 +++ >> hw/intc/spapr_xive.c | 87 +++++++++++++++-- >> hw/intc/spapr_xive_kvm.c | 184 ++++++++++++++++++++++++++++++++++++ >> 3 files changed, 278 insertions(+), 8 deletions(-) >> >> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h >> index ab6732b14a02..749c6cbc2c56 100644 >> --- a/include/hw/ppc/spapr_xive.h >> +++ b/include/hw/ppc/spapr_xive.h >> @@ -55,9 +55,24 @@ void spapr_xive_set_tctx_os_cam(XiveTCTX *tctx); >> void spapr_xive_mmio_set_enabled(sPAPRXive *xive, bool enable); >> void spapr_xive_map_mmio(sPAPRXive *xive); >> >> +int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx, >> + uint32_t *out_server, uint8_t *out_prio); >> + >> /* >> * KVM XIVE device helpers >> */ >> void kvmppc_xive_connect(sPAPRXive *xive, Error **errp); >> +void kvmppc_xive_reset(sPAPRXive *xive, Error **errp); >> +void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas, >> + Error **errp); >> +void kvmppc_xive_sync_source(sPAPRXive *xive, uint32_t lisn, Error **errp); >> +uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, >> + uint64_t data, bool write); >> +void kvmppc_xive_set_queue_config(sPAPRXive *xive, uint8_t end_blk, >> + uint32_t end_idx, XiveEND *end, >> + Error **errp); >> +void kvmppc_xive_get_queue_config(sPAPRXive *xive, uint8_t end_blk, >> + uint32_t end_idx, XiveEND *end, >> + Error **errp); >> >> #endif /* PPC_SPAPR_XIVE_H */ >> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c >> index c24d649e3668..3db24391e31c 100644 >> --- a/hw/intc/spapr_xive.c >> +++ b/hw/intc/spapr_xive.c >> @@ -86,6 +86,19 @@ static int spapr_xive_target_to_nvt(uint32_t target, >> * sPAPR END indexing uses a simple mapping of the CPU vcpu_id, 8 >> * priorities per CPU >> */ >> +int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx, >> + uint32_t *out_server, uint8_t *out_prio) >> +{ > > Since you don't support irq blocks as yet, should this error out > rather than ignoring if end_blk != 0? yes we could. I will add a test against SPAPR_XIVE_BLOCK which is the value of the sPAPR block ID. I would like to be able to track where it is used even if constant. > >> + if (out_server) { >> + *out_server = end_idx >> 3; >> + } >> + >> + if (out_prio) { >> + *out_prio = end_idx & 0x7; >> + } >> + return 0; >> +} >> + >> static void spapr_xive_cpu_to_end(PowerPCCPU *cpu, uint8_t prio, >> uint8_t *out_end_blk, uint32_t *out_end_idx) >> { >> @@ -792,6 +805,16 @@ static target_ulong h_int_set_source_config(PowerPCCPU *cpu, >> new_eas.w = xive_set_field64(EAS_END_DATA, new_eas.w, eisn); >> } >> >> + if (kvm_irqchip_in_kernel()) { >> + Error *local_err = NULL; >> + >> + kvmppc_xive_set_source_config(xive, lisn, &new_eas, &local_err); >> + if (local_err) { >> + error_report_err(local_err); >> + return H_HARDWARE; >> + } >> + } >> + >> out: >> xive->eat[lisn] = new_eas; >> return H_SUCCESS; >> @@ -1097,6 +1120,16 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu, >> */ >> >> out: >> + if (kvm_irqchip_in_kernel()) { >> + Error *local_err = NULL; >> + >> + kvmppc_xive_set_queue_config(xive, end_blk, end_idx, &end, &local_err); >> + if (local_err) { >> + error_report_err(local_err); >> + return H_HARDWARE; >> + } >> + } >> + >> /* Update END */ >> memcpy(&xive->endt[end_idx], &end, sizeof(XiveEND)); >> return H_SUCCESS; >> @@ -1189,6 +1222,16 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu, >> args[2] = 0; >> } >> >> + if (kvm_irqchip_in_kernel()) { >> + Error *local_err = NULL; >> + >> + kvmppc_xive_get_queue_config(xive, end_blk, end_idx, end, &local_err); >> + if (local_err) { >> + error_report_err(local_err); >> + return H_HARDWARE; >> + } >> + } >> + >> /* TODO: do we need any locking on the END ? */ >> if (flags & SPAPR_XIVE_END_DEBUG) { >> /* Load the event queue generation number into the return flags */ >> @@ -1341,15 +1384,20 @@ static target_ulong h_int_esb(PowerPCCPU *cpu, >> return H_P3; >> } >> >> - mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset; >> + if (kvm_irqchip_in_kernel()) { >> + args[0] = kvmppc_xive_esb_rw(xsrc, lisn, offset, data, >> + flags & SPAPR_XIVE_ESB_STORE); >> + } else { >> + mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset; >> >> - if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8, >> - (flags & SPAPR_XIVE_ESB_STORE))) { >> - qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%" >> - HWADDR_PRIx "\n", mmio_addr); >> - return H_HARDWARE; >> + if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8, >> + (flags & SPAPR_XIVE_ESB_STORE))) { >> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%" >> + HWADDR_PRIx "\n", mmio_addr); >> + return H_HARDWARE; >> + } >> + args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data; >> } >> - args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data; >> return H_SUCCESS; >> } >> >> @@ -1406,7 +1454,20 @@ static target_ulong h_int_sync(PowerPCCPU *cpu, >> * This is not needed when running the emulation under QEMU >> */ >> >> - /* This is not real hardware. Nothing to be done */ >> + /* >> + * This is not real hardware. Nothing to be done unless when >> + * under KVM >> + */ >> + >> + if (kvm_irqchip_in_kernel()) { >> + Error *local_err = NULL; >> + >> + kvmppc_xive_sync_source(xive, lisn, &local_err); >> + if (local_err) { >> + error_report_err(local_err); >> + return H_HARDWARE; >> + } >> + } >> return H_SUCCESS; >> } >> >> @@ -1441,6 +1502,16 @@ static target_ulong h_int_reset(PowerPCCPU *cpu, >> } >> >> device_reset(DEVICE(xive)); >> + >> + if (kvm_irqchip_in_kernel()) { >> + Error *local_err = NULL; >> + >> + kvmppc_xive_reset(xive, &local_err); >> + if (local_err) { >> + error_report_err(local_err); >> + return H_HARDWARE; >> + } >> + } >> return H_SUCCESS; >> } >> >> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c >> index 623fbf74f23e..6b50451b4f85 100644 >> --- a/hw/intc/spapr_xive_kvm.c >> +++ b/hw/intc/spapr_xive_kvm.c >> @@ -89,6 +89,52 @@ void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) >> * XIVE Interrupt Source (KVM) >> */ >> >> +void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas, >> + Error **errp) >> +{ >> + uint32_t end_idx; >> + uint32_t end_blk; >> + uint32_t eisn; >> + uint8_t priority; >> + uint32_t server; >> + uint64_t kvm_src; >> + Error *local_err = NULL; >> + >> + /* >> + * No need to set a MASKED source, this is the default state after >> + * reset. > > I don't quite follow this comment, why is there no need to call a > MASKED source? because MASKED is the default state in which KVM initializes the IRQ. I will clarify. >> + */ >> + if (!xive_eas_is_valid(eas) || xive_eas_is_masked(eas)) { >> + return; >> + } >> + >> + end_idx = xive_get_field64(EAS_END_INDEX, eas->w); >> + end_blk = xive_get_field64(EAS_END_BLOCK, eas->w); >> + eisn = xive_get_field64(EAS_END_DATA, eas->w); >> + >> + spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); >> + >> + kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT & >> + KVM_XIVE_SOURCE_PRIORITY_MASK; >> + kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT & >> + KVM_XIVE_SOURCE_SERVER_MASK; >> + kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) & >> + KVM_XIVE_SOURCE_EISN_MASK; >> + >> + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn, >> + &kvm_src, true, &local_err); >> + if (local_err) { >> + error_propagate(errp, local_err); >> + return; >> + } >> +} >> + >> +void kvmppc_xive_sync_source(sPAPRXive *xive, uint32_t lisn, Error **errp) >> +{ >> + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_SYNC, lisn, >> + NULL, true, errp); >> +} >> + >> /* >> * At reset, the interrupt sources are simply created and MASKED. We >> * only need to inform the KVM XIVE device about their type: LSI or >> @@ -125,6 +171,64 @@ void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp) >> } >> } >> >> +/* >> + * This is used to perform the magic loads on the ESB pages, described >> + * in xive.h. >> + */ >> +static uint64_t xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, >> + uint64_t data, bool write) >> +{ >> + unsigned long addr = (unsigned long) xsrc->esb_mmap + >> + xive_source_esb_mgmt(xsrc, srcno) + offset; > > Casting the esb_mmap into unsigned long then back to a pointer looks > unnecessary. You should be able to do this with pointer arithmetic. yes. >> + if (write) { >> + *((uint64_t *) addr) = data; >> + return -1; >> + } else { >> + return *((uint64_t *) addr); >> + } > > Since this is always dealing with 64-bit values, couldn't you put the > byteswaps in here rather than in all the callers? indeed. >> +} >> + >> +static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset) >> +{ >> + /* Prevent the compiler from optimizing away the load */ >> + volatile uint64_t value = xive_esb_rw(xsrc, srcno, offset, 0, 0); > > Wouldn't the volatile magic be better inside xive_esb_rw()? sure. I will rework these helpers. >> + return be64_to_cpu(value) & 0x3; >> +} >> + >> +static void xive_esb_trigger(XiveSource *xsrc, int srcno) >> +{ >> + unsigned long addr = (unsigned long) xsrc->esb_mmap + >> + xive_source_esb_page(xsrc, srcno); >> + >> + *((uint64_t *) addr) = 0x0; >> +} > > Also.. aren't some of these register accesses likely to need memory > barriers? AIUI, these are CI pages. So we shouldn't need barriers. >> + >> +uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, >> + uint64_t data, bool write) >> +{ >> + if (write) { >> + return xive_esb_rw(xsrc, srcno, offset, data, 1); >> + } >> + >> + /* >> + * Special Load EOI handling for LSI sources. Q bit is never set >> + * and the interrupt should be re-triggered if the level is still >> + * asserted. >> + */ >> + if (xive_source_irq_is_lsi(xsrc, srcno) && >> + offset == XIVE_ESB_LOAD_EOI) { >> + xive_esb_read(xsrc, srcno, XIVE_ESB_SET_PQ_00); >> + if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { >> + xive_esb_trigger(xsrc, srcno); >> + } >> + return 0; >> + } else { >> + return xive_esb_rw(xsrc, srcno, offset, 0, 0); >> + } >> +} >> + >> void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val) >> { >> XiveSource *xsrc = opaque; >> @@ -155,6 +259,86 @@ void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val) >> /* >> * sPAPR XIVE interrupt controller (KVM) >> */ >> +void kvmppc_xive_get_queue_config(sPAPRXive *xive, uint8_t end_blk, >> + uint32_t end_idx, XiveEND *end, >> + Error **errp) >> +{ >> + struct kvm_ppc_xive_eq kvm_eq = { 0 }; >> + uint64_t kvm_eq_idx; >> + uint8_t priority; >> + uint32_t server; >> + Error *local_err = NULL; >> + >> + if (!xive_end_is_valid(end)) { > > This should set an error, shouldn't it? Hmm, this helper is used in the hcall h_int_get_queue_config() and, later, in kvmppc_xive_get_queues() to synchronize the state from KVM. I should probably move the test outside this routine, return H_HARDWARE in the hcall and skip invalid ENDs in kvmppc_xive_get_queues() Thanks, C. > >> + return; >> + } >> + >> + /* Encode the tuple (server, prio) as a KVM EQ index */ >> + spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); >> + >> + kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & >> + KVM_XIVE_EQ_PRIORITY_MASK; >> + kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & >> + KVM_XIVE_EQ_SERVER_MASK; >> + >> + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, >> + &kvm_eq, false, &local_err); >> + if (local_err) { >> + error_propagate(errp, local_err); >> + return; >> + } >> + >> + /* >> + * The EQ index and toggle bit are updated by HW. These are the >> + * only fields we want to return. >> + */ >> + end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) | >> + xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex); >> +} >> + >> +void kvmppc_xive_set_queue_config(sPAPRXive *xive, uint8_t end_blk, >> + uint32_t end_idx, XiveEND *end, >> + Error **errp) >> +{ >> + struct kvm_ppc_xive_eq kvm_eq = { 0 }; >> + uint64_t kvm_eq_idx; >> + uint8_t priority; >> + uint32_t server; >> + Error *local_err = NULL; >> + >> + if (!xive_end_is_valid(end)) { >> + return; >> + } >> + >> + /* Build the KVM state from the local END structure */ >> + kvm_eq.flags = KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY; >> + kvm_eq.qsize = xive_get_field32(END_W0_QSIZE, end->w0) + 12; >> + kvm_eq.qpage = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32 | >> + be32_to_cpu(end->w3); >> + kvm_eq.qtoggle = xive_get_field32(END_W1_GENERATION, end->w1); >> + kvm_eq.qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1); >> + >> + /* Encode the tuple (server, prio) as a KVM EQ index */ >> + spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); >> + >> + kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & >> + KVM_XIVE_EQ_PRIORITY_MASK; >> + kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & >> + KVM_XIVE_EQ_SERVER_MASK; >> + >> + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, >> + &kvm_eq, true, &local_err); >> + if (local_err) { >> + error_propagate(errp, local_err); >> + return; >> + } >> +} >> + >> +void kvmppc_xive_reset(sPAPRXive *xive, Error **errp) >> +{ >> + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, KVM_DEV_XIVE_RESET, >> + NULL, true, errp); >> +} >> >> static void *kvmppc_xive_mmap(sPAPRXive *xive, int pgoff, size_t len, >> Error **errp) >
On Mon, Mar 11, 2019 at 06:32:05PM +0100, Cédric Le Goater wrote: > On 2/26/19 12:22 AM, David Gibson wrote: > > On Fri, Feb 22, 2019 at 02:13:11PM +0100, Cédric Le Goater wrote: [snip] > >> +void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas, > >> + Error **errp) > >> +{ > >> + uint32_t end_idx; > >> + uint32_t end_blk; > >> + uint32_t eisn; > >> + uint8_t priority; > >> + uint32_t server; > >> + uint64_t kvm_src; > >> + Error *local_err = NULL; > >> + > >> + /* > >> + * No need to set a MASKED source, this is the default state after > >> + * reset. > > > > I don't quite follow this comment, why is there no need to call a > > MASKED source? > > because MASKED is the default state in which KVM initializes the IRQ. I will > clarify. I believe it's possible - though rare - to process an incoming migration on an established VM which isn't in fresh reset state. So it's best not to rely on that. > >> +static void xive_esb_trigger(XiveSource *xsrc, int srcno) > >> +{ > >> + unsigned long addr = (unsigned long) xsrc->esb_mmap + > >> + xive_source_esb_page(xsrc, srcno); > >> + > >> + *((uint64_t *) addr) = 0x0; > >> +} > > > > Also.. aren't some of these register accesses likely to need memory > > barriers? > > AIUI, these are CI pages. So we shouldn't need barriers. CI doesn't negate the need for barriers, althugh it might change the type you need. At the very least you need a compiler barrier to stop it re-ordering the access, but you can also have in-cpu store and load queues.
On 3/12/19 11:26 AM, David Gibson wrote: > On Mon, Mar 11, 2019 at 06:32:05PM +0100, Cédric Le Goater wrote: >> On 2/26/19 12:22 AM, David Gibson wrote: >>> On Fri, Feb 22, 2019 at 02:13:11PM +0100, Cédric Le Goater wrote: > [snip] >>>> +void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas, >>>> + Error **errp) >>>> +{ >>>> + uint32_t end_idx; >>>> + uint32_t end_blk; >>>> + uint32_t eisn; >>>> + uint8_t priority; >>>> + uint32_t server; >>>> + uint64_t kvm_src; >>>> + Error *local_err = NULL; >>>> + >>>> + /* >>>> + * No need to set a MASKED source, this is the default state after >>>> + * reset. >>> >>> I don't quite follow this comment, why is there no need to call a >>> MASKED source? >> >> because MASKED is the default state in which KVM initializes the IRQ. I will >> clarify. > > I believe it's possible - though rare - to process an incoming > migration on an established VM which isn't in fresh reset state. So > it's best not to rely on that. > >>>> +static void xive_esb_trigger(XiveSource *xsrc, int srcno) >>>> +{ >>>> + unsigned long addr = (unsigned long) xsrc->esb_mmap + >>>> + xive_source_esb_page(xsrc, srcno); >>>> + >>>> + *((uint64_t *) addr) = 0x0; >>>> +} >>> >>> Also.. aren't some of these register accesses likely to need memory >>> barriers? >> >> AIUI, these are CI pages. So we shouldn't need barriers. > > CI doesn't negate the need for barriers, althugh it might change the > type you need. At the very least you need a compiler barrier to stop > it re-ordering the access, but you can also have in-cpu store and load > queues. > ok. So I will need to add some smp_r/wmb() Thanks, C.
On Wed, Mar 13, 2019 at 11:43:54AM +0100, Cédric Le Goater wrote: > On 3/12/19 11:26 AM, David Gibson wrote: > > On Mon, Mar 11, 2019 at 06:32:05PM +0100, Cédric Le Goater wrote: > >> On 2/26/19 12:22 AM, David Gibson wrote: > >>> On Fri, Feb 22, 2019 at 02:13:11PM +0100, Cédric Le Goater wrote: > > [snip] > >>>> +void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas, > >>>> + Error **errp) > >>>> +{ > >>>> + uint32_t end_idx; > >>>> + uint32_t end_blk; > >>>> + uint32_t eisn; > >>>> + uint8_t priority; > >>>> + uint32_t server; > >>>> + uint64_t kvm_src; > >>>> + Error *local_err = NULL; > >>>> + > >>>> + /* > >>>> + * No need to set a MASKED source, this is the default state after > >>>> + * reset. > >>> > >>> I don't quite follow this comment, why is there no need to call a > >>> MASKED source? > >> > >> because MASKED is the default state in which KVM initializes the IRQ. I will > >> clarify. > > > > I believe it's possible - though rare - to process an incoming > > migration on an established VM which isn't in fresh reset state. So > > it's best not to rely on that. > > > >>>> +static void xive_esb_trigger(XiveSource *xsrc, int srcno) > >>>> +{ > >>>> + unsigned long addr = (unsigned long) xsrc->esb_mmap + > >>>> + xive_source_esb_page(xsrc, srcno); > >>>> + > >>>> + *((uint64_t *) addr) = 0x0; > >>>> +} > >>> > >>> Also.. aren't some of these register accesses likely to need memory > >>> barriers? > >> > >> AIUI, these are CI pages. So we shouldn't need barriers. > > > > CI doesn't negate the need for barriers, althugh it might change the > > type you need. At the very least you need a compiler barrier to stop > > it re-ordering the access, but you can also have in-cpu store and load > > queues. > > > > ok. So I will need to add some smp_r/wmb() No, smp_[rw]mb() is for cases where it's strictly about cpu vs. cpu ordering. Here it's cpu vs. IO ordering so you need plain [rw]mb().
On 3/14/19 3:11 AM, David Gibson wrote: > On Wed, Mar 13, 2019 at 11:43:54AM +0100, Cédric Le Goater wrote: >> On 3/12/19 11:26 AM, David Gibson wrote: >>> On Mon, Mar 11, 2019 at 06:32:05PM +0100, Cédric Le Goater wrote: >>>> On 2/26/19 12:22 AM, David Gibson wrote: >>>>> On Fri, Feb 22, 2019 at 02:13:11PM +0100, Cédric Le Goater wrote: >>> [snip] >>>>>> +void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas, >>>>>> + Error **errp) >>>>>> +{ >>>>>> + uint32_t end_idx; >>>>>> + uint32_t end_blk; >>>>>> + uint32_t eisn; >>>>>> + uint8_t priority; >>>>>> + uint32_t server; >>>>>> + uint64_t kvm_src; >>>>>> + Error *local_err = NULL; >>>>>> + >>>>>> + /* >>>>>> + * No need to set a MASKED source, this is the default state after >>>>>> + * reset. >>>>> >>>>> I don't quite follow this comment, why is there no need to call a >>>>> MASKED source? >>>> >>>> because MASKED is the default state in which KVM initializes the IRQ. I will >>>> clarify. >>> >>> I believe it's possible - though rare - to process an incoming >>> migration on an established VM which isn't in fresh reset state. So >>> it's best not to rely on that. >>> >>>>>> +static void xive_esb_trigger(XiveSource *xsrc, int srcno) >>>>>> +{ >>>>>> + unsigned long addr = (unsigned long) xsrc->esb_mmap + >>>>>> + xive_source_esb_page(xsrc, srcno); >>>>>> + >>>>>> + *((uint64_t *) addr) = 0x0; >>>>>> +} >>>>> >>>>> Also.. aren't some of these register accesses likely to need memory >>>>> barriers? >>>> >>>> AIUI, these are CI pages. So we shouldn't need barriers. >>> >>> CI doesn't negate the need for barriers, althugh it might change the >>> type you need. At the very least you need a compiler barrier to stop >>> it re-ordering the access, but you can also have in-cpu store and load >>> queues. >>> >> >> ok. So I will need to add some smp_r/wmb() > > No, smp_[rw]mb() is for cases where it's strictly about cpu vs. cpu > ordering. Here it's cpu vs. IO ordering so you need plain [rw]mb(). I don't see any in QEMU ? C.
On Thu, Mar 14, 2019 at 10:24:49PM +0100, Cédric Le Goater wrote: > On 3/14/19 3:11 AM, David Gibson wrote: > > On Wed, Mar 13, 2019 at 11:43:54AM +0100, Cédric Le Goater wrote: > >> On 3/12/19 11:26 AM, David Gibson wrote: > >>> On Mon, Mar 11, 2019 at 06:32:05PM +0100, Cédric Le Goater wrote: > >>>> On 2/26/19 12:22 AM, David Gibson wrote: > >>>>> On Fri, Feb 22, 2019 at 02:13:11PM +0100, Cédric Le Goater wrote: > >>> [snip] > >>>>>> +void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas, > >>>>>> + Error **errp) > >>>>>> +{ > >>>>>> + uint32_t end_idx; > >>>>>> + uint32_t end_blk; > >>>>>> + uint32_t eisn; > >>>>>> + uint8_t priority; > >>>>>> + uint32_t server; > >>>>>> + uint64_t kvm_src; > >>>>>> + Error *local_err = NULL; > >>>>>> + > >>>>>> + /* > >>>>>> + * No need to set a MASKED source, this is the default state after > >>>>>> + * reset. > >>>>> > >>>>> I don't quite follow this comment, why is there no need to call a > >>>>> MASKED source? > >>>> > >>>> because MASKED is the default state in which KVM initializes the IRQ. I will > >>>> clarify. > >>> > >>> I believe it's possible - though rare - to process an incoming > >>> migration on an established VM which isn't in fresh reset state. So > >>> it's best not to rely on that. > >>> > >>>>>> +static void xive_esb_trigger(XiveSource *xsrc, int srcno) > >>>>>> +{ > >>>>>> + unsigned long addr = (unsigned long) xsrc->esb_mmap + > >>>>>> + xive_source_esb_page(xsrc, srcno); > >>>>>> + > >>>>>> + *((uint64_t *) addr) = 0x0; > >>>>>> +} > >>>>> > >>>>> Also.. aren't some of these register accesses likely to need memory > >>>>> barriers? > >>>> > >>>> AIUI, these are CI pages. So we shouldn't need barriers. > >>> > >>> CI doesn't negate the need for barriers, althugh it might change the > >>> type you need. At the very least you need a compiler barrier to stop > >>> it re-ordering the access, but you can also have in-cpu store and load > >>> queues. > >>> > >> > >> ok. So I will need to add some smp_r/wmb() > > > > No, smp_[rw]mb() is for cases where it's strictly about cpu vs. cpu > > ordering. Here it's cpu vs. IO ordering so you need plain [rw]mb(). > > I don't see any in QEMU ? Ah, my mistake. I was mixing up the kernel atomics and the qemu atomics.
diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h index ab6732b14a02..749c6cbc2c56 100644 --- a/include/hw/ppc/spapr_xive.h +++ b/include/hw/ppc/spapr_xive.h @@ -55,9 +55,24 @@ void spapr_xive_set_tctx_os_cam(XiveTCTX *tctx); void spapr_xive_mmio_set_enabled(sPAPRXive *xive, bool enable); void spapr_xive_map_mmio(sPAPRXive *xive); +int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx, + uint32_t *out_server, uint8_t *out_prio); + /* * KVM XIVE device helpers */ void kvmppc_xive_connect(sPAPRXive *xive, Error **errp); +void kvmppc_xive_reset(sPAPRXive *xive, Error **errp); +void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas, + Error **errp); +void kvmppc_xive_sync_source(sPAPRXive *xive, uint32_t lisn, Error **errp); +uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, + uint64_t data, bool write); +void kvmppc_xive_set_queue_config(sPAPRXive *xive, uint8_t end_blk, + uint32_t end_idx, XiveEND *end, + Error **errp); +void kvmppc_xive_get_queue_config(sPAPRXive *xive, uint8_t end_blk, + uint32_t end_idx, XiveEND *end, + Error **errp); #endif /* PPC_SPAPR_XIVE_H */ diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c index c24d649e3668..3db24391e31c 100644 --- a/hw/intc/spapr_xive.c +++ b/hw/intc/spapr_xive.c @@ -86,6 +86,19 @@ static int spapr_xive_target_to_nvt(uint32_t target, * sPAPR END indexing uses a simple mapping of the CPU vcpu_id, 8 * priorities per CPU */ +int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx, + uint32_t *out_server, uint8_t *out_prio) +{ + if (out_server) { + *out_server = end_idx >> 3; + } + + if (out_prio) { + *out_prio = end_idx & 0x7; + } + return 0; +} + static void spapr_xive_cpu_to_end(PowerPCCPU *cpu, uint8_t prio, uint8_t *out_end_blk, uint32_t *out_end_idx) { @@ -792,6 +805,16 @@ static target_ulong h_int_set_source_config(PowerPCCPU *cpu, new_eas.w = xive_set_field64(EAS_END_DATA, new_eas.w, eisn); } + if (kvm_irqchip_in_kernel()) { + Error *local_err = NULL; + + kvmppc_xive_set_source_config(xive, lisn, &new_eas, &local_err); + if (local_err) { + error_report_err(local_err); + return H_HARDWARE; + } + } + out: xive->eat[lisn] = new_eas; return H_SUCCESS; @@ -1097,6 +1120,16 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu, */ out: + if (kvm_irqchip_in_kernel()) { + Error *local_err = NULL; + + kvmppc_xive_set_queue_config(xive, end_blk, end_idx, &end, &local_err); + if (local_err) { + error_report_err(local_err); + return H_HARDWARE; + } + } + /* Update END */ memcpy(&xive->endt[end_idx], &end, sizeof(XiveEND)); return H_SUCCESS; @@ -1189,6 +1222,16 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu, args[2] = 0; } + if (kvm_irqchip_in_kernel()) { + Error *local_err = NULL; + + kvmppc_xive_get_queue_config(xive, end_blk, end_idx, end, &local_err); + if (local_err) { + error_report_err(local_err); + return H_HARDWARE; + } + } + /* TODO: do we need any locking on the END ? */ if (flags & SPAPR_XIVE_END_DEBUG) { /* Load the event queue generation number into the return flags */ @@ -1341,15 +1384,20 @@ static target_ulong h_int_esb(PowerPCCPU *cpu, return H_P3; } - mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset; + if (kvm_irqchip_in_kernel()) { + args[0] = kvmppc_xive_esb_rw(xsrc, lisn, offset, data, + flags & SPAPR_XIVE_ESB_STORE); + } else { + mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset; - if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8, - (flags & SPAPR_XIVE_ESB_STORE))) { - qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%" - HWADDR_PRIx "\n", mmio_addr); - return H_HARDWARE; + if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8, + (flags & SPAPR_XIVE_ESB_STORE))) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%" + HWADDR_PRIx "\n", mmio_addr); + return H_HARDWARE; + } + args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data; } - args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data; return H_SUCCESS; } @@ -1406,7 +1454,20 @@ static target_ulong h_int_sync(PowerPCCPU *cpu, * This is not needed when running the emulation under QEMU */ - /* This is not real hardware. Nothing to be done */ + /* + * This is not real hardware. Nothing to be done unless when + * under KVM + */ + + if (kvm_irqchip_in_kernel()) { + Error *local_err = NULL; + + kvmppc_xive_sync_source(xive, lisn, &local_err); + if (local_err) { + error_report_err(local_err); + return H_HARDWARE; + } + } return H_SUCCESS; } @@ -1441,6 +1502,16 @@ static target_ulong h_int_reset(PowerPCCPU *cpu, } device_reset(DEVICE(xive)); + + if (kvm_irqchip_in_kernel()) { + Error *local_err = NULL; + + kvmppc_xive_reset(xive, &local_err); + if (local_err) { + error_report_err(local_err); + return H_HARDWARE; + } + } return H_SUCCESS; } diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c index 623fbf74f23e..6b50451b4f85 100644 --- a/hw/intc/spapr_xive_kvm.c +++ b/hw/intc/spapr_xive_kvm.c @@ -89,6 +89,52 @@ void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) * XIVE Interrupt Source (KVM) */ +void kvmppc_xive_set_source_config(sPAPRXive *xive, uint32_t lisn, XiveEAS *eas, + Error **errp) +{ + uint32_t end_idx; + uint32_t end_blk; + uint32_t eisn; + uint8_t priority; + uint32_t server; + uint64_t kvm_src; + Error *local_err = NULL; + + /* + * No need to set a MASKED source, this is the default state after + * reset. + */ + if (!xive_eas_is_valid(eas) || xive_eas_is_masked(eas)) { + return; + } + + end_idx = xive_get_field64(EAS_END_INDEX, eas->w); + end_blk = xive_get_field64(EAS_END_BLOCK, eas->w); + eisn = xive_get_field64(EAS_END_DATA, eas->w); + + spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); + + kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT & + KVM_XIVE_SOURCE_PRIORITY_MASK; + kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT & + KVM_XIVE_SOURCE_SERVER_MASK; + kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) & + KVM_XIVE_SOURCE_EISN_MASK; + + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn, + &kvm_src, true, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } +} + +void kvmppc_xive_sync_source(sPAPRXive *xive, uint32_t lisn, Error **errp) +{ + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_SYNC, lisn, + NULL, true, errp); +} + /* * At reset, the interrupt sources are simply created and MASKED. We * only need to inform the KVM XIVE device about their type: LSI or @@ -125,6 +171,64 @@ void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp) } } +/* + * This is used to perform the magic loads on the ESB pages, described + * in xive.h. + */ +static uint64_t xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, + uint64_t data, bool write) +{ + unsigned long addr = (unsigned long) xsrc->esb_mmap + + xive_source_esb_mgmt(xsrc, srcno) + offset; + + if (write) { + *((uint64_t *) addr) = data; + return -1; + } else { + return *((uint64_t *) addr); + } +} + +static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset) +{ + /* Prevent the compiler from optimizing away the load */ + volatile uint64_t value = xive_esb_rw(xsrc, srcno, offset, 0, 0); + + return be64_to_cpu(value) & 0x3; +} + +static void xive_esb_trigger(XiveSource *xsrc, int srcno) +{ + unsigned long addr = (unsigned long) xsrc->esb_mmap + + xive_source_esb_page(xsrc, srcno); + + *((uint64_t *) addr) = 0x0; +} + +uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, + uint64_t data, bool write) +{ + if (write) { + return xive_esb_rw(xsrc, srcno, offset, data, 1); + } + + /* + * Special Load EOI handling for LSI sources. Q bit is never set + * and the interrupt should be re-triggered if the level is still + * asserted. + */ + if (xive_source_irq_is_lsi(xsrc, srcno) && + offset == XIVE_ESB_LOAD_EOI) { + xive_esb_read(xsrc, srcno, XIVE_ESB_SET_PQ_00); + if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { + xive_esb_trigger(xsrc, srcno); + } + return 0; + } else { + return xive_esb_rw(xsrc, srcno, offset, 0, 0); + } +} + void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val) { XiveSource *xsrc = opaque; @@ -155,6 +259,86 @@ void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val) /* * sPAPR XIVE interrupt controller (KVM) */ +void kvmppc_xive_get_queue_config(sPAPRXive *xive, uint8_t end_blk, + uint32_t end_idx, XiveEND *end, + Error **errp) +{ + struct kvm_ppc_xive_eq kvm_eq = { 0 }; + uint64_t kvm_eq_idx; + uint8_t priority; + uint32_t server; + Error *local_err = NULL; + + if (!xive_end_is_valid(end)) { + return; + } + + /* Encode the tuple (server, prio) as a KVM EQ index */ + spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); + + kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & + KVM_XIVE_EQ_PRIORITY_MASK; + kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & + KVM_XIVE_EQ_SERVER_MASK; + + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, + &kvm_eq, false, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* + * The EQ index and toggle bit are updated by HW. These are the + * only fields we want to return. + */ + end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) | + xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex); +} + +void kvmppc_xive_set_queue_config(sPAPRXive *xive, uint8_t end_blk, + uint32_t end_idx, XiveEND *end, + Error **errp) +{ + struct kvm_ppc_xive_eq kvm_eq = { 0 }; + uint64_t kvm_eq_idx; + uint8_t priority; + uint32_t server; + Error *local_err = NULL; + + if (!xive_end_is_valid(end)) { + return; + } + + /* Build the KVM state from the local END structure */ + kvm_eq.flags = KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY; + kvm_eq.qsize = xive_get_field32(END_W0_QSIZE, end->w0) + 12; + kvm_eq.qpage = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32 | + be32_to_cpu(end->w3); + kvm_eq.qtoggle = xive_get_field32(END_W1_GENERATION, end->w1); + kvm_eq.qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1); + + /* Encode the tuple (server, prio) as a KVM EQ index */ + spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); + + kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & + KVM_XIVE_EQ_PRIORITY_MASK; + kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & + KVM_XIVE_EQ_SERVER_MASK; + + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, + &kvm_eq, true, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } +} + +void kvmppc_xive_reset(sPAPRXive *xive, Error **errp) +{ + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, KVM_DEV_XIVE_RESET, + NULL, true, errp); +} static void *kvmppc_xive_mmap(sPAPRXive *xive, int pgoff, size_t len, Error **errp)
XIVE hcalls are all redirected to QEMU as none are on a fast path. When necessary, QEMU invokes KVM through specific ioctls to perform host operations. QEMU should have done the necessary checks before calling KVM and, in case of failure, H_HARDWARE is simply returned. H_INT_ESB is a special case that could have been handled under KVM but the impact on performance was low when under QEMU. Here are some figures : kernel irqchip OFF ON H_INT_ESB KVM QEMU rtl8139 (LSI ) 1.19 1.24 1.23 Gbits/sec virtio 31.80 42.30 -- Gbits/sec Signed-off-by: Cédric Le Goater <clg@kaod.org> --- include/hw/ppc/spapr_xive.h | 15 +++ hw/intc/spapr_xive.c | 87 +++++++++++++++-- hw/intc/spapr_xive_kvm.c | 184 ++++++++++++++++++++++++++++++++++++ 3 files changed, 278 insertions(+), 8 deletions(-)