Message ID | 20171123132955.1261-14-clg@kaod.org |
---|---|
State | New |
Headers | show |
Series | spapr: Guest exploitation of the XIVE interrupt controller (POWER9) | expand |
On Thu, 2017-11-23 at 14:29 +0100, Cédric Le Goater wrote: > The Event Queue Descriptor (EQD) table, also known as Event Notification > Descriptor (END), is one of the internal tables the XIVE interrupt > controller uses to redirect exception from event sources to CPU > threads. Keep in mind tha we want to only expose to the guest priorities 0..6 as priority 7 will not be available with KVM on DD2.0 chips. > The EQD specifies on which Event Queue the event data should be posted > when an exception occurs (later on pulled by the OS) and which server > (VPD in XIVE terminology) to notify. The Event Queue is a much more > complex structure but we start with a simple model for the sPAPR > machine. > > There is one XiveEQ per priority and the model chooses to store them > under the Xive Interrupt presenter model. It will be retrieved, just > like for XICS, through the 'intc' object pointer of the CPU. > > The EQ indexing follows a simple pattern: > > (server << 3) | (priority & 0x7) > > Signed-off-by: Cédric Le Goater <clg@kaod.org> > --- > hw/intc/spapr_xive.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ > hw/intc/xive-internal.h | 50 +++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 106 insertions(+) > > diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c > index 554b25e0884c..983317a6b3f6 100644 > --- a/hw/intc/spapr_xive.c > +++ b/hw/intc/spapr_xive.c > @@ -23,6 +23,7 @@ > #include "sysemu/dma.h" > #include "monitor/monitor.h" > #include "hw/ppc/spapr_xive.h" > +#include "hw/ppc/spapr.h" > #include "hw/ppc/xics.h" > > #include "xive-internal.h" > @@ -34,6 +35,8 @@ struct sPAPRXiveICP { > uint8_t tima[TM_RING_COUNT * 0x10]; > uint8_t *tima_os; > qemu_irq output; > + > + XiveEQ eqt[XIVE_PRIORITY_MAX + 1]; > }; > > static uint64_t spapr_xive_icp_accept(sPAPRXiveICP *icp) > @@ -183,6 +186,13 @@ static const MemoryRegionOps spapr_xive_tm_ops = { > }, > }; > > +static sPAPRXiveICP *spapr_xive_icp_get(sPAPRXive *xive, int server) > +{ > + PowerPCCPU *cpu = spapr_find_cpu(server); > + > + return cpu ? SPAPR_XIVE_ICP(cpu->intc) : NULL; > +} > + > static void spapr_xive_irq(sPAPRXive *xive, int lisn) > { > > @@ -632,6 +642,8 @@ static void spapr_xive_icp_reset(void *dev) > sPAPRXiveICP *xicp = SPAPR_XIVE_ICP(dev); > > memset(xicp->tima, 0, sizeof(xicp->tima)); > + > + memset(xicp->eqt, 0, sizeof(xicp->eqt)); > } > > static void spapr_xive_icp_realize(DeviceState *dev, Error **errp) > @@ -683,6 +695,23 @@ static void spapr_xive_icp_init(Object *obj) > xicp->tima_os = &xicp->tima[TM_QW1_OS]; > } > > +static const VMStateDescription vmstate_spapr_xive_icp_eq = { > + .name = TYPE_SPAPR_XIVE_ICP "/eq", > + .version_id = 1, > + .minimum_version_id = 1, > + .fields = (VMStateField []) { > + VMSTATE_UINT32(w0, XiveEQ), > + VMSTATE_UINT32(w1, XiveEQ), > + VMSTATE_UINT32(w2, XiveEQ), > + VMSTATE_UINT32(w3, XiveEQ), > + VMSTATE_UINT32(w4, XiveEQ), > + VMSTATE_UINT32(w5, XiveEQ), > + VMSTATE_UINT32(w6, XiveEQ), > + VMSTATE_UINT32(w7, XiveEQ), > + VMSTATE_END_OF_LIST() > + }, > +}; > + > static bool vmstate_spapr_xive_icp_needed(void *opaque) > { > /* TODO check machine XIVE support */ > @@ -696,6 +725,8 @@ static const VMStateDescription vmstate_spapr_xive_icp = { > .needed = vmstate_spapr_xive_icp_needed, > .fields = (VMStateField[]) { > VMSTATE_BUFFER(tima, sPAPRXiveICP), > + VMSTATE_STRUCT_ARRAY(eqt, sPAPRXiveICP, (XIVE_PRIORITY_MAX + 1), 1, > + vmstate_spapr_xive_icp_eq, XiveEQ), > VMSTATE_END_OF_LIST() > }, > }; > @@ -755,3 +786,28 @@ bool spapr_xive_irq_unset(sPAPRXive *xive, uint32_t lisn) > ive->w &= ~IVE_VALID; > return true; > } > + > +/* > + * Use a simple indexing for the EQs. > + */ > +XiveEQ *spapr_xive_get_eq(sPAPRXive *xive, uint32_t eq_idx) > +{ > + int priority = eq_idx & 0x7; > + sPAPRXiveICP *xicp = spapr_xive_icp_get(xive, eq_idx >> 3); > + > + return xicp ? &xicp->eqt[priority] : NULL; > +} > + > +bool spapr_xive_eq_for_server(sPAPRXive *xive, uint32_t server, > + uint8_t priority, uint32_t *out_eq_idx) > +{ > + if (priority > XIVE_PRIORITY_MAX) { > + return false; > + } > + > + if (out_eq_idx) { > + *out_eq_idx = (server << 3) | (priority & 0x7); > + } > + > + return true; > +} > diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h > index 7d329f203a9b..c3949671aa03 100644 > --- a/hw/intc/xive-internal.h > +++ b/hw/intc/xive-internal.h > @@ -131,9 +131,59 @@ typedef struct XiveIVE { > #define IVE_EQ_DATA PPC_BITMASK(33, 63) /* Data written to the EQ */ > } XiveIVE; > > +/* EQ */ > +typedef struct XiveEQ { > + uint32_t w0; > +#define EQ_W0_VALID PPC_BIT32(0) > +#define EQ_W0_ENQUEUE PPC_BIT32(1) > +#define EQ_W0_UCOND_NOTIFY PPC_BIT32(2) > +#define EQ_W0_BACKLOG PPC_BIT32(3) > +#define EQ_W0_PRECL_ESC_CTL PPC_BIT32(4) > +#define EQ_W0_ESCALATE_CTL PPC_BIT32(5) > +#define EQ_W0_END_OF_INTR PPC_BIT32(6) > +#define EQ_W0_QSIZE PPC_BITMASK32(12, 15) > +#define EQ_W0_SW0 PPC_BIT32(16) > +#define EQ_W0_FIRMWARE EQ_W0_SW0 /* Owned by FW */ > +#define EQ_QSIZE_4K 0 > +#define EQ_QSIZE_64K 4 > +#define EQ_W0_HWDEP PPC_BITMASK32(24, 31) > + uint32_t w1; > +#define EQ_W1_ESn PPC_BITMASK32(0, 1) > +#define EQ_W1_ESn_P PPC_BIT32(0) > +#define EQ_W1_ESn_Q PPC_BIT32(1) > +#define EQ_W1_ESe PPC_BITMASK32(2, 3) > +#define EQ_W1_ESe_P PPC_BIT32(2) > +#define EQ_W1_ESe_Q PPC_BIT32(3) > +#define EQ_W1_GENERATION PPC_BIT32(9) > +#define EQ_W1_PAGE_OFF PPC_BITMASK32(10, 31) > + uint32_t w2; > +#define EQ_W2_MIGRATION_REG PPC_BITMASK32(0, 3) > +#define EQ_W2_OP_DESC_HI PPC_BITMASK32(4, 31) > + uint32_t w3; > +#define EQ_W3_OP_DESC_LO PPC_BITMASK32(0, 31) > + uint32_t w4; > +#define EQ_W4_ESC_EQ_BLOCK PPC_BITMASK32(4, 7) > +#define EQ_W4_ESC_EQ_INDEX PPC_BITMASK32(8, 31) > + uint32_t w5; > +#define EQ_W5_ESC_EQ_DATA PPC_BITMASK32(1, 31) > + uint32_t w6; > +#define EQ_W6_FORMAT_BIT PPC_BIT32(8) > +#define EQ_W6_NVT_BLOCK PPC_BITMASK32(9, 12) > +#define EQ_W6_NVT_INDEX PPC_BITMASK32(13, 31) > + uint32_t w7; > +#define EQ_W7_F0_IGNORE PPC_BIT32(0) > +#define EQ_W7_F0_BLK_GROUPING PPC_BIT32(1) > +#define EQ_W7_F0_PRIORITY PPC_BITMASK32(8, 15) > +#define EQ_W7_F1_WAKEZ PPC_BIT32(0) > +#define EQ_W7_F1_LOG_SERVER_ID PPC_BITMASK32(1, 31) > +} XiveEQ; > + > #define XIVE_PRIORITY_MAX 7 > > void spapr_xive_reset(void *dev); > XiveIVE *spapr_xive_get_ive(sPAPRXive *xive, uint32_t lisn); > +XiveEQ *spapr_xive_get_eq(sPAPRXive *xive, uint32_t idx); > +bool spapr_xive_eq_for_server(sPAPRXive *xive, uint32_t server, uint8_t prio, > + uint32_t *out_eq_idx); > > #endif /* _INTC_XIVE_INTERNAL_H */
On 11/23/2017 09:31 PM, Benjamin Herrenschmidt wrote: > On Thu, 2017-11-23 at 14:29 +0100, Cédric Le Goater wrote: >> The Event Queue Descriptor (EQD) table, also known as Event Notification >> Descriptor (END), is one of the internal tables the XIVE interrupt >> controller uses to redirect exception from event sources to CPU >> threads. > > Keep in mind tha we want to only expose to the guest priorities 0..6 > as priority 7 will not be available with KVM on DD2.0 chips. The hcall patch 19 introduces the "ibm,plat-res-int-priorities" property. The priority ranges reserved by the hypervisor are the following : static const uint32_t reserved_priorities[] = { 7, /* start */ 0xf8, /* count */ }; So The Linux driver is expected to choose priority 6. The priority validity is then checked in each hcall returning H_P4/H_P3 in case of failure. But it is true that we scale the arrays with : #define XIVE_PRIORITY_MAX 7 Do you want QEMU to completely remove prio 7 ? C. >> The EQD specifies on which Event Queue the event data should be posted >> when an exception occurs (later on pulled by the OS) and which server >> (VPD in XIVE terminology) to notify. The Event Queue is a much more >> complex structure but we start with a simple model for the sPAPR >> machine. >> >> There is one XiveEQ per priority and the model chooses to store them >> under the Xive Interrupt presenter model. It will be retrieved, just >> like for XICS, through the 'intc' object pointer of the CPU. >> >> The EQ indexing follows a simple pattern: >> >> (server << 3) | (priority & 0x7) >> >> Signed-off-by: Cédric Le Goater <clg@kaod.org> >> --- >> hw/intc/spapr_xive.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ >> hw/intc/xive-internal.h | 50 +++++++++++++++++++++++++++++++++++++++++++ >> 2 files changed, 106 insertions(+) >> >> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c >> index 554b25e0884c..983317a6b3f6 100644 >> --- a/hw/intc/spapr_xive.c >> +++ b/hw/intc/spapr_xive.c >> @@ -23,6 +23,7 @@ >> #include "sysemu/dma.h" >> #include "monitor/monitor.h" >> #include "hw/ppc/spapr_xive.h" >> +#include "hw/ppc/spapr.h" >> #include "hw/ppc/xics.h" >> >> #include "xive-internal.h" >> @@ -34,6 +35,8 @@ struct sPAPRXiveICP { >> uint8_t tima[TM_RING_COUNT * 0x10]; >> uint8_t *tima_os; >> qemu_irq output; >> + >> + XiveEQ eqt[XIVE_PRIORITY_MAX + 1]; >> }; >> >> static uint64_t spapr_xive_icp_accept(sPAPRXiveICP *icp) >> @@ -183,6 +186,13 @@ static const MemoryRegionOps spapr_xive_tm_ops = { >> }, >> }; >> >> +static sPAPRXiveICP *spapr_xive_icp_get(sPAPRXive *xive, int server) >> +{ >> + PowerPCCPU *cpu = spapr_find_cpu(server); >> + >> + return cpu ? SPAPR_XIVE_ICP(cpu->intc) : NULL; >> +} >> + >> static void spapr_xive_irq(sPAPRXive *xive, int lisn) >> { >> >> @@ -632,6 +642,8 @@ static void spapr_xive_icp_reset(void *dev) >> sPAPRXiveICP *xicp = SPAPR_XIVE_ICP(dev); >> >> memset(xicp->tima, 0, sizeof(xicp->tima)); >> + >> + memset(xicp->eqt, 0, sizeof(xicp->eqt)); >> } >> >> static void spapr_xive_icp_realize(DeviceState *dev, Error **errp) >> @@ -683,6 +695,23 @@ static void spapr_xive_icp_init(Object *obj) >> xicp->tima_os = &xicp->tima[TM_QW1_OS]; >> } >> >> +static const VMStateDescription vmstate_spapr_xive_icp_eq = { >> + .name = TYPE_SPAPR_XIVE_ICP "/eq", >> + .version_id = 1, >> + .minimum_version_id = 1, >> + .fields = (VMStateField []) { >> + VMSTATE_UINT32(w0, XiveEQ), >> + VMSTATE_UINT32(w1, XiveEQ), >> + VMSTATE_UINT32(w2, XiveEQ), >> + VMSTATE_UINT32(w3, XiveEQ), >> + VMSTATE_UINT32(w4, XiveEQ), >> + VMSTATE_UINT32(w5, XiveEQ), >> + VMSTATE_UINT32(w6, XiveEQ), >> + VMSTATE_UINT32(w7, XiveEQ), >> + VMSTATE_END_OF_LIST() >> + }, >> +}; >> + >> static bool vmstate_spapr_xive_icp_needed(void *opaque) >> { >> /* TODO check machine XIVE support */ >> @@ -696,6 +725,8 @@ static const VMStateDescription vmstate_spapr_xive_icp = { >> .needed = vmstate_spapr_xive_icp_needed, >> .fields = (VMStateField[]) { >> VMSTATE_BUFFER(tima, sPAPRXiveICP), >> + VMSTATE_STRUCT_ARRAY(eqt, sPAPRXiveICP, (XIVE_PRIORITY_MAX + 1), 1, >> + vmstate_spapr_xive_icp_eq, XiveEQ), >> VMSTATE_END_OF_LIST() >> }, >> }; >> @@ -755,3 +786,28 @@ bool spapr_xive_irq_unset(sPAPRXive *xive, uint32_t lisn) >> ive->w &= ~IVE_VALID; >> return true; >> } >> + >> +/* >> + * Use a simple indexing for the EQs. >> + */ >> +XiveEQ *spapr_xive_get_eq(sPAPRXive *xive, uint32_t eq_idx) >> +{ >> + int priority = eq_idx & 0x7; >> + sPAPRXiveICP *xicp = spapr_xive_icp_get(xive, eq_idx >> 3); >> + >> + return xicp ? &xicp->eqt[priority] : NULL; >> +} >> + >> +bool spapr_xive_eq_for_server(sPAPRXive *xive, uint32_t server, >> + uint8_t priority, uint32_t *out_eq_idx) >> +{ >> + if (priority > XIVE_PRIORITY_MAX) { >> + return false; >> + } >> + >> + if (out_eq_idx) { >> + *out_eq_idx = (server << 3) | (priority & 0x7); >> + } >> + >> + return true; >> +} >> diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h >> index 7d329f203a9b..c3949671aa03 100644 >> --- a/hw/intc/xive-internal.h >> +++ b/hw/intc/xive-internal.h >> @@ -131,9 +131,59 @@ typedef struct XiveIVE { >> #define IVE_EQ_DATA PPC_BITMASK(33, 63) /* Data written to the EQ */ >> } XiveIVE; >> >> +/* EQ */ >> +typedef struct XiveEQ { >> + uint32_t w0; >> +#define EQ_W0_VALID PPC_BIT32(0) >> +#define EQ_W0_ENQUEUE PPC_BIT32(1) >> +#define EQ_W0_UCOND_NOTIFY PPC_BIT32(2) >> +#define EQ_W0_BACKLOG PPC_BIT32(3) >> +#define EQ_W0_PRECL_ESC_CTL PPC_BIT32(4) >> +#define EQ_W0_ESCALATE_CTL PPC_BIT32(5) >> +#define EQ_W0_END_OF_INTR PPC_BIT32(6) >> +#define EQ_W0_QSIZE PPC_BITMASK32(12, 15) >> +#define EQ_W0_SW0 PPC_BIT32(16) >> +#define EQ_W0_FIRMWARE EQ_W0_SW0 /* Owned by FW */ >> +#define EQ_QSIZE_4K 0 >> +#define EQ_QSIZE_64K 4 >> +#define EQ_W0_HWDEP PPC_BITMASK32(24, 31) >> + uint32_t w1; >> +#define EQ_W1_ESn PPC_BITMASK32(0, 1) >> +#define EQ_W1_ESn_P PPC_BIT32(0) >> +#define EQ_W1_ESn_Q PPC_BIT32(1) >> +#define EQ_W1_ESe PPC_BITMASK32(2, 3) >> +#define EQ_W1_ESe_P PPC_BIT32(2) >> +#define EQ_W1_ESe_Q PPC_BIT32(3) >> +#define EQ_W1_GENERATION PPC_BIT32(9) >> +#define EQ_W1_PAGE_OFF PPC_BITMASK32(10, 31) >> + uint32_t w2; >> +#define EQ_W2_MIGRATION_REG PPC_BITMASK32(0, 3) >> +#define EQ_W2_OP_DESC_HI PPC_BITMASK32(4, 31) >> + uint32_t w3; >> +#define EQ_W3_OP_DESC_LO PPC_BITMASK32(0, 31) >> + uint32_t w4; >> +#define EQ_W4_ESC_EQ_BLOCK PPC_BITMASK32(4, 7) >> +#define EQ_W4_ESC_EQ_INDEX PPC_BITMASK32(8, 31) >> + uint32_t w5; >> +#define EQ_W5_ESC_EQ_DATA PPC_BITMASK32(1, 31) >> + uint32_t w6; >> +#define EQ_W6_FORMAT_BIT PPC_BIT32(8) >> +#define EQ_W6_NVT_BLOCK PPC_BITMASK32(9, 12) >> +#define EQ_W6_NVT_INDEX PPC_BITMASK32(13, 31) >> + uint32_t w7; >> +#define EQ_W7_F0_IGNORE PPC_BIT32(0) >> +#define EQ_W7_F0_BLK_GROUPING PPC_BIT32(1) >> +#define EQ_W7_F0_PRIORITY PPC_BITMASK32(8, 15) >> +#define EQ_W7_F1_WAKEZ PPC_BIT32(0) >> +#define EQ_W7_F1_LOG_SERVER_ID PPC_BITMASK32(1, 31) >> +} XiveEQ; >> + >> #define XIVE_PRIORITY_MAX 7 >> >> void spapr_xive_reset(void *dev); >> XiveIVE *spapr_xive_get_ive(sPAPRXive *xive, uint32_t lisn); >> +XiveEQ *spapr_xive_get_eq(sPAPRXive *xive, uint32_t idx); >> +bool spapr_xive_eq_for_server(sPAPRXive *xive, uint32_t server, uint8_t prio, >> + uint32_t *out_eq_idx); >> >> #endif /* _INTC_XIVE_INTERNAL_H */
On Fri, 2017-11-24 at 09:15 +0100, Cédric Le Goater wrote: > So The Linux driver is expected to choose priority 6. The priority > validity is then checked in each hcall returning H_P4/H_P3 in case of > failure. > > But it is true that we scale the arrays with : > > #define XIVE_PRIORITY_MAX 7 > > Do you want QEMU to completely remove prio 7 ? I'd like qemu to be consistent, at least make sure it errors out if the OS tries to configue prio 7 or route an irq to it. Cheers, Ben.
On Thu, Nov 23, 2017 at 02:29:43PM +0100, Cédric Le Goater wrote: > The Event Queue Descriptor (EQD) table, also known as Event Notification > Descriptor (END), is one of the internal tables the XIVE interrupt > controller uses to redirect exception from event sources to CPU > threads. > > The EQD specifies on which Event Queue the event data should be posted > when an exception occurs (later on pulled by the OS) and which server > (VPD in XIVE terminology) to notify. The Event Queue is a much more > complex structure but we start with a simple model for the sPAPR > machine. Just to clarify my understanding a server / VPD in XIVE would typically correspond to a cpu - either real or virtual, yes? > There is one XiveEQ per priority and the model chooses to store them > under the Xive Interrupt presenter model. It will be retrieved, just > like for XICS, through the 'intc' object pointer of the CPU. > > The EQ indexing follows a simple pattern: > > (server << 3) | (priority & 0x7) > > Signed-off-by: Cédric Le Goater <clg@kaod.org> > --- > hw/intc/spapr_xive.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ > hw/intc/xive-internal.h | 50 +++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 106 insertions(+) > > diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c > index 554b25e0884c..983317a6b3f6 100644 > --- a/hw/intc/spapr_xive.c > +++ b/hw/intc/spapr_xive.c > @@ -23,6 +23,7 @@ > #include "sysemu/dma.h" > #include "monitor/monitor.h" > #include "hw/ppc/spapr_xive.h" > +#include "hw/ppc/spapr.h" > #include "hw/ppc/xics.h" > > #include "xive-internal.h" > @@ -34,6 +35,8 @@ struct sPAPRXiveICP { > uint8_t tima[TM_RING_COUNT * 0x10]; > uint8_t *tima_os; > qemu_irq output; > + > + XiveEQ eqt[XIVE_PRIORITY_MAX + 1]; > }; > > static uint64_t spapr_xive_icp_accept(sPAPRXiveICP *icp) > @@ -183,6 +186,13 @@ static const MemoryRegionOps spapr_xive_tm_ops = { > }, > }; > > +static sPAPRXiveICP *spapr_xive_icp_get(sPAPRXive *xive, int server) > +{ > + PowerPCCPU *cpu = spapr_find_cpu(server); > + > + return cpu ? SPAPR_XIVE_ICP(cpu->intc) : NULL; > +} > + > static void spapr_xive_irq(sPAPRXive *xive, int lisn) > { > > @@ -632,6 +642,8 @@ static void spapr_xive_icp_reset(void *dev) > sPAPRXiveICP *xicp = SPAPR_XIVE_ICP(dev); > > memset(xicp->tima, 0, sizeof(xicp->tima)); > + > + memset(xicp->eqt, 0, sizeof(xicp->eqt)); > } > > static void spapr_xive_icp_realize(DeviceState *dev, Error **errp) > @@ -683,6 +695,23 @@ static void spapr_xive_icp_init(Object *obj) > xicp->tima_os = &xicp->tima[TM_QW1_OS]; > } > > +static const VMStateDescription vmstate_spapr_xive_icp_eq = { > + .name = TYPE_SPAPR_XIVE_ICP "/eq", > + .version_id = 1, > + .minimum_version_id = 1, > + .fields = (VMStateField []) { > + VMSTATE_UINT32(w0, XiveEQ), > + VMSTATE_UINT32(w1, XiveEQ), > + VMSTATE_UINT32(w2, XiveEQ), > + VMSTATE_UINT32(w3, XiveEQ), > + VMSTATE_UINT32(w4, XiveEQ), > + VMSTATE_UINT32(w5, XiveEQ), > + VMSTATE_UINT32(w6, XiveEQ), > + VMSTATE_UINT32(w7, XiveEQ), Wow. Super descriptive field names there, but I guess that's not your fault. > + VMSTATE_END_OF_LIST() > + }, > +}; > + > static bool vmstate_spapr_xive_icp_needed(void *opaque) > { > /* TODO check machine XIVE support */ > @@ -696,6 +725,8 @@ static const VMStateDescription vmstate_spapr_xive_icp = { > .needed = vmstate_spapr_xive_icp_needed, > .fields = (VMStateField[]) { > VMSTATE_BUFFER(tima, sPAPRXiveICP), > + VMSTATE_STRUCT_ARRAY(eqt, sPAPRXiveICP, (XIVE_PRIORITY_MAX + 1), 1, > + vmstate_spapr_xive_icp_eq, XiveEQ), > VMSTATE_END_OF_LIST() > }, > }; > @@ -755,3 +786,28 @@ bool spapr_xive_irq_unset(sPAPRXive *xive, uint32_t lisn) > ive->w &= ~IVE_VALID; > return true; > } > + > +/* > + * Use a simple indexing for the EQs. Is this server+priority encoding architected anywhere? Otherwise, why not use separate parameters? > + */ > +XiveEQ *spapr_xive_get_eq(sPAPRXive *xive, uint32_t eq_idx) > +{ > + int priority = eq_idx & 0x7; > + sPAPRXiveICP *xicp = spapr_xive_icp_get(xive, eq_idx >> 3); > + > + return xicp ? &xicp->eqt[priority] : NULL; > +} > + > +bool spapr_xive_eq_for_server(sPAPRXive *xive, uint32_t server, > + uint8_t priority, uint32_t *out_eq_idx) > +{ > + if (priority > XIVE_PRIORITY_MAX) { > + return false; > + } > + > + if (out_eq_idx) { > + *out_eq_idx = (server << 3) | (priority & 0x7); > + } > + > + return true; > +} > diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h > index 7d329f203a9b..c3949671aa03 100644 > --- a/hw/intc/xive-internal.h > +++ b/hw/intc/xive-internal.h > @@ -131,9 +131,59 @@ typedef struct XiveIVE { > #define IVE_EQ_DATA PPC_BITMASK(33, 63) /* Data written to the EQ */ > } XiveIVE; > > +/* EQ */ > +typedef struct XiveEQ { > + uint32_t w0; > +#define EQ_W0_VALID PPC_BIT32(0) > +#define EQ_W0_ENQUEUE PPC_BIT32(1) > +#define EQ_W0_UCOND_NOTIFY PPC_BIT32(2) > +#define EQ_W0_BACKLOG PPC_BIT32(3) > +#define EQ_W0_PRECL_ESC_CTL PPC_BIT32(4) > +#define EQ_W0_ESCALATE_CTL PPC_BIT32(5) > +#define EQ_W0_END_OF_INTR PPC_BIT32(6) > +#define EQ_W0_QSIZE PPC_BITMASK32(12, 15) > +#define EQ_W0_SW0 PPC_BIT32(16) > +#define EQ_W0_FIRMWARE EQ_W0_SW0 /* Owned by FW */ > +#define EQ_QSIZE_4K 0 > +#define EQ_QSIZE_64K 4 > +#define EQ_W0_HWDEP PPC_BITMASK32(24, 31) > + uint32_t w1; > +#define EQ_W1_ESn PPC_BITMASK32(0, 1) > +#define EQ_W1_ESn_P PPC_BIT32(0) > +#define EQ_W1_ESn_Q PPC_BIT32(1) > +#define EQ_W1_ESe PPC_BITMASK32(2, 3) > +#define EQ_W1_ESe_P PPC_BIT32(2) > +#define EQ_W1_ESe_Q PPC_BIT32(3) > +#define EQ_W1_GENERATION PPC_BIT32(9) > +#define EQ_W1_PAGE_OFF PPC_BITMASK32(10, 31) > + uint32_t w2; > +#define EQ_W2_MIGRATION_REG PPC_BITMASK32(0, 3) > +#define EQ_W2_OP_DESC_HI PPC_BITMASK32(4, 31) > + uint32_t w3; > +#define EQ_W3_OP_DESC_LO PPC_BITMASK32(0, 31) > + uint32_t w4; > +#define EQ_W4_ESC_EQ_BLOCK PPC_BITMASK32(4, 7) > +#define EQ_W4_ESC_EQ_INDEX PPC_BITMASK32(8, 31) > + uint32_t w5; > +#define EQ_W5_ESC_EQ_DATA PPC_BITMASK32(1, 31) > + uint32_t w6; > +#define EQ_W6_FORMAT_BIT PPC_BIT32(8) > +#define EQ_W6_NVT_BLOCK PPC_BITMASK32(9, 12) > +#define EQ_W6_NVT_INDEX PPC_BITMASK32(13, 31) > + uint32_t w7; > +#define EQ_W7_F0_IGNORE PPC_BIT32(0) > +#define EQ_W7_F0_BLK_GROUPING PPC_BIT32(1) > +#define EQ_W7_F0_PRIORITY PPC_BITMASK32(8, 15) > +#define EQ_W7_F1_WAKEZ PPC_BIT32(0) > +#define EQ_W7_F1_LOG_SERVER_ID PPC_BITMASK32(1, 31) > +} XiveEQ; > + > #define XIVE_PRIORITY_MAX 7 > > void spapr_xive_reset(void *dev); > XiveIVE *spapr_xive_get_ive(sPAPRXive *xive, uint32_t lisn); > +XiveEQ *spapr_xive_get_eq(sPAPRXive *xive, uint32_t idx); > +bool spapr_xive_eq_for_server(sPAPRXive *xive, uint32_t server, uint8_t prio, > + uint32_t *out_eq_idx); > > #endif /* _INTC_XIVE_INTERNAL_H */
On 11/30/2017 04:38 AM, David Gibson wrote: > On Thu, Nov 23, 2017 at 02:29:43PM +0100, Cédric Le Goater wrote: >> The Event Queue Descriptor (EQD) table, also known as Event Notification >> Descriptor (END), is one of the internal tables the XIVE interrupt >> controller uses to redirect exception from event sources to CPU >> threads. >> >> The EQD specifies on which Event Queue the event data should be posted >> when an exception occurs (later on pulled by the OS) and which server >> (VPD in XIVE terminology) to notify. The Event Queue is a much more >> complex structure but we start with a simple model for the sPAPR >> machine. > > Just to clarify my understanding a server / VPD in XIVE would > typically correspond to a cpu - either real or virtual, yes? yes. VP for "virtual processor" and VPD for "virtual processor descriptor" which contains the XIVE interrupt state of the VP when not dispatched. It is still described in some documentation as an NVT : Notification Virtual Target. XIVE concepts were renamed at some time but the old name perdured. I am still struggling my way through all the names. >> There is one XiveEQ per priority and the model chooses to store them >> under the Xive Interrupt presenter model. It will be retrieved, just >> like for XICS, through the 'intc' object pointer of the CPU. >> >> The EQ indexing follows a simple pattern: >> >> (server << 3) | (priority & 0x7) >> >> Signed-off-by: Cédric Le Goater <clg@kaod.org> >> --- >> hw/intc/spapr_xive.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ >> hw/intc/xive-internal.h | 50 +++++++++++++++++++++++++++++++++++++++++++ >> 2 files changed, 106 insertions(+) >> >> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c >> index 554b25e0884c..983317a6b3f6 100644 >> --- a/hw/intc/spapr_xive.c >> +++ b/hw/intc/spapr_xive.c >> @@ -23,6 +23,7 @@ >> #include "sysemu/dma.h" >> #include "monitor/monitor.h" >> #include "hw/ppc/spapr_xive.h" >> +#include "hw/ppc/spapr.h" >> #include "hw/ppc/xics.h" >> >> #include "xive-internal.h" >> @@ -34,6 +35,8 @@ struct sPAPRXiveICP { >> uint8_t tima[TM_RING_COUNT * 0x10]; >> uint8_t *tima_os; >> qemu_irq output; >> + >> + XiveEQ eqt[XIVE_PRIORITY_MAX + 1]; >> }; >> >> static uint64_t spapr_xive_icp_accept(sPAPRXiveICP *icp) >> @@ -183,6 +186,13 @@ static const MemoryRegionOps spapr_xive_tm_ops = { >> }, >> }; >> >> +static sPAPRXiveICP *spapr_xive_icp_get(sPAPRXive *xive, int server) >> +{ >> + PowerPCCPU *cpu = spapr_find_cpu(server); >> + >> + return cpu ? SPAPR_XIVE_ICP(cpu->intc) : NULL; >> +} >> + >> static void spapr_xive_irq(sPAPRXive *xive, int lisn) >> { >> >> @@ -632,6 +642,8 @@ static void spapr_xive_icp_reset(void *dev) >> sPAPRXiveICP *xicp = SPAPR_XIVE_ICP(dev); >> >> memset(xicp->tima, 0, sizeof(xicp->tima)); >> + >> + memset(xicp->eqt, 0, sizeof(xicp->eqt)); >> } >> >> static void spapr_xive_icp_realize(DeviceState *dev, Error **errp) >> @@ -683,6 +695,23 @@ static void spapr_xive_icp_init(Object *obj) >> xicp->tima_os = &xicp->tima[TM_QW1_OS]; >> } >> >> +static const VMStateDescription vmstate_spapr_xive_icp_eq = { >> + .name = TYPE_SPAPR_XIVE_ICP "/eq", >> + .version_id = 1, >> + .minimum_version_id = 1, >> + .fields = (VMStateField []) { >> + VMSTATE_UINT32(w0, XiveEQ), >> + VMSTATE_UINT32(w1, XiveEQ), >> + VMSTATE_UINT32(w2, XiveEQ), >> + VMSTATE_UINT32(w3, XiveEQ), >> + VMSTATE_UINT32(w4, XiveEQ), >> + VMSTATE_UINT32(w5, XiveEQ), >> + VMSTATE_UINT32(w6, XiveEQ), >> + VMSTATE_UINT32(w7, XiveEQ), > > Wow. Super descriptive field names there, but I guess that's not your fault. The defines in the "xive-internal.h" give a better view ... >> + VMSTATE_END_OF_LIST() >> + }, >> +}; >> + >> static bool vmstate_spapr_xive_icp_needed(void *opaque) >> { >> /* TODO check machine XIVE support */ >> @@ -696,6 +725,8 @@ static const VMStateDescription vmstate_spapr_xive_icp = { >> .needed = vmstate_spapr_xive_icp_needed, >> .fields = (VMStateField[]) { >> VMSTATE_BUFFER(tima, sPAPRXiveICP), >> + VMSTATE_STRUCT_ARRAY(eqt, sPAPRXiveICP, (XIVE_PRIORITY_MAX + 1), 1, >> + vmstate_spapr_xive_icp_eq, XiveEQ), >> VMSTATE_END_OF_LIST() >> }, >> }; >> @@ -755,3 +786,28 @@ bool spapr_xive_irq_unset(sPAPRXive *xive, uint32_t lisn) >> ive->w &= ~IVE_VALID; >> return true; >> } >> + >> +/* >> + * Use a simple indexing for the EQs. > > Is this server+priority encoding architected anywhere? no. This is a model shortcut. > Otherwise, why not use separate parameters? yes. spapr_xive_get_eq() could use separate parameters and it would shorten the some of the hcalls. The result is stored in a single field of the IVE, EQ_INDEX. So I will still need mangle/demangle routines but these could be simple macros. I will look at it. >> + */ >> +XiveEQ *spapr_xive_get_eq(sPAPRXive *xive, uint32_t eq_idx) >> +{ >> + int priority = eq_idx & 0x7; >> + sPAPRXiveICP *xicp = spapr_xive_icp_get(xive, eq_idx >> 3); >> + >> + return xicp ? &xicp->eqt[priority] : NULL; >> +} >> + >> +bool spapr_xive_eq_for_server(sPAPRXive *xive, uint32_t server, >> + uint8_t priority, uint32_t *out_eq_idx) >> +{ >> + if (priority > XIVE_PRIORITY_MAX) { >> + return false; >> + } >> + >> + if (out_eq_idx) { >> + *out_eq_idx = (server << 3) | (priority & 0x7); >> + } >> + >> + return true; >> +} >> diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h >> index 7d329f203a9b..c3949671aa03 100644 >> --- a/hw/intc/xive-internal.h >> +++ b/hw/intc/xive-internal.h >> @@ -131,9 +131,59 @@ typedef struct XiveIVE { >> #define IVE_EQ_DATA PPC_BITMASK(33, 63) /* Data written to the EQ */ >> } XiveIVE; >> >> +/* EQ */ >> +typedef struct XiveEQ { >> + uint32_t w0; >> +#define EQ_W0_VALID PPC_BIT32(0) >> +#define EQ_W0_ENQUEUE PPC_BIT32(1) >> +#define EQ_W0_UCOND_NOTIFY PPC_BIT32(2) >> +#define EQ_W0_BACKLOG PPC_BIT32(3) >> +#define EQ_W0_PRECL_ESC_CTL PPC_BIT32(4) >> +#define EQ_W0_ESCALATE_CTL PPC_BIT32(5) >> +#define EQ_W0_END_OF_INTR PPC_BIT32(6) >> +#define EQ_W0_QSIZE PPC_BITMASK32(12, 15) >> +#define EQ_W0_SW0 PPC_BIT32(16) >> +#define EQ_W0_FIRMWARE EQ_W0_SW0 /* Owned by FW */ >> +#define EQ_QSIZE_4K 0 >> +#define EQ_QSIZE_64K 4 >> +#define EQ_W0_HWDEP PPC_BITMASK32(24, 31) >> + uint32_t w1; >> +#define EQ_W1_ESn PPC_BITMASK32(0, 1) >> +#define EQ_W1_ESn_P PPC_BIT32(0) >> +#define EQ_W1_ESn_Q PPC_BIT32(1) >> +#define EQ_W1_ESe PPC_BITMASK32(2, 3) >> +#define EQ_W1_ESe_P PPC_BIT32(2) >> +#define EQ_W1_ESe_Q PPC_BIT32(3) >> +#define EQ_W1_GENERATION PPC_BIT32(9) >> +#define EQ_W1_PAGE_OFF PPC_BITMASK32(10, 31) >> + uint32_t w2; >> +#define EQ_W2_MIGRATION_REG PPC_BITMASK32(0, 3) >> +#define EQ_W2_OP_DESC_HI PPC_BITMASK32(4, 31) >> + uint32_t w3; >> +#define EQ_W3_OP_DESC_LO PPC_BITMASK32(0, 31) >> + uint32_t w4; >> +#define EQ_W4_ESC_EQ_BLOCK PPC_BITMASK32(4, 7) >> +#define EQ_W4_ESC_EQ_INDEX PPC_BITMASK32(8, 31) >> + uint32_t w5; >> +#define EQ_W5_ESC_EQ_DATA PPC_BITMASK32(1, 31) >> + uint32_t w6; >> +#define EQ_W6_FORMAT_BIT PPC_BIT32(8) >> +#define EQ_W6_NVT_BLOCK PPC_BITMASK32(9, 12) >> +#define EQ_W6_NVT_INDEX PPC_BITMASK32(13, 31) >> + uint32_t w7; >> +#define EQ_W7_F0_IGNORE PPC_BIT32(0) >> +#define EQ_W7_F0_BLK_GROUPING PPC_BIT32(1) >> +#define EQ_W7_F0_PRIORITY PPC_BITMASK32(8, 15) >> +#define EQ_W7_F1_WAKEZ PPC_BIT32(0) >> +#define EQ_W7_F1_LOG_SERVER_ID PPC_BITMASK32(1, 31) >> +} XiveEQ; >> + >> #define XIVE_PRIORITY_MAX 7 >> >> void spapr_xive_reset(void *dev); >> XiveIVE *spapr_xive_get_ive(sPAPRXive *xive, uint32_t lisn); >> +XiveEQ *spapr_xive_get_eq(sPAPRXive *xive, uint32_t idx); >> +bool spapr_xive_eq_for_server(sPAPRXive *xive, uint32_t server, uint8_t prio, >> + uint32_t *out_eq_idx); >> >> #endif /* _INTC_XIVE_INTERNAL_H */ >
On Thu, Nov 30, 2017 at 02:06:27PM +0000, Cédric Le Goater wrote: > On 11/30/2017 04:38 AM, David Gibson wrote: > > On Thu, Nov 23, 2017 at 02:29:43PM +0100, Cédric Le Goater wrote: > >> The Event Queue Descriptor (EQD) table, also known as Event Notification > >> Descriptor (END), is one of the internal tables the XIVE interrupt > >> controller uses to redirect exception from event sources to CPU > >> threads. > >> > >> The EQD specifies on which Event Queue the event data should be posted > >> when an exception occurs (later on pulled by the OS) and which server > >> (VPD in XIVE terminology) to notify. The Event Queue is a much more > >> complex structure but we start with a simple model for the sPAPR > >> machine. > > > > Just to clarify my understanding a server / VPD in XIVE would > > typically correspond to a cpu - either real or virtual, yes? > > yes. VP for "virtual processor" and VPD for "virtual processor > descriptor" which contains the XIVE interrupt state of the VP > when not dispatched. It is still described in some documentation > as an NVT : Notification Virtual Target. > > XIVE concepts were renamed at some time but the old name perdured. > I am still struggling my way through all the names. > > > >> There is one XiveEQ per priority and the model chooses to store them > >> under the Xive Interrupt presenter model. It will be retrieved, just > >> like for XICS, through the 'intc' object pointer of the CPU. > >> > >> The EQ indexing follows a simple pattern: > >> > >> (server << 3) | (priority & 0x7) > >> > >> Signed-off-by: Cédric Le Goater <clg@kaod.org> > >> --- > >> hw/intc/spapr_xive.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ > >> hw/intc/xive-internal.h | 50 +++++++++++++++++++++++++++++++++++++++++++ > >> 2 files changed, 106 insertions(+) > >> > >> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c > >> index 554b25e0884c..983317a6b3f6 100644 > >> --- a/hw/intc/spapr_xive.c > >> +++ b/hw/intc/spapr_xive.c > >> @@ -23,6 +23,7 @@ > >> #include "sysemu/dma.h" > >> #include "monitor/monitor.h" > >> #include "hw/ppc/spapr_xive.h" > >> +#include "hw/ppc/spapr.h" > >> #include "hw/ppc/xics.h" > >> > >> #include "xive-internal.h" > >> @@ -34,6 +35,8 @@ struct sPAPRXiveICP { > >> uint8_t tima[TM_RING_COUNT * 0x10]; > >> uint8_t *tima_os; > >> qemu_irq output; > >> + > >> + XiveEQ eqt[XIVE_PRIORITY_MAX + 1]; > >> }; > >> > >> static uint64_t spapr_xive_icp_accept(sPAPRXiveICP *icp) > >> @@ -183,6 +186,13 @@ static const MemoryRegionOps spapr_xive_tm_ops = { > >> }, > >> }; > >> > >> +static sPAPRXiveICP *spapr_xive_icp_get(sPAPRXive *xive, int server) > >> +{ > >> + PowerPCCPU *cpu = spapr_find_cpu(server); > >> + > >> + return cpu ? SPAPR_XIVE_ICP(cpu->intc) : NULL; > >> +} > >> + > >> static void spapr_xive_irq(sPAPRXive *xive, int lisn) > >> { > >> > >> @@ -632,6 +642,8 @@ static void spapr_xive_icp_reset(void *dev) > >> sPAPRXiveICP *xicp = SPAPR_XIVE_ICP(dev); > >> > >> memset(xicp->tima, 0, sizeof(xicp->tima)); > >> + > >> + memset(xicp->eqt, 0, sizeof(xicp->eqt)); > >> } > >> > >> static void spapr_xive_icp_realize(DeviceState *dev, Error **errp) > >> @@ -683,6 +695,23 @@ static void spapr_xive_icp_init(Object *obj) > >> xicp->tima_os = &xicp->tima[TM_QW1_OS]; > >> } > >> > >> +static const VMStateDescription vmstate_spapr_xive_icp_eq = { > >> + .name = TYPE_SPAPR_XIVE_ICP "/eq", > >> + .version_id = 1, > >> + .minimum_version_id = 1, > >> + .fields = (VMStateField []) { > >> + VMSTATE_UINT32(w0, XiveEQ), > >> + VMSTATE_UINT32(w1, XiveEQ), > >> + VMSTATE_UINT32(w2, XiveEQ), > >> + VMSTATE_UINT32(w3, XiveEQ), > >> + VMSTATE_UINT32(w4, XiveEQ), > >> + VMSTATE_UINT32(w5, XiveEQ), > >> + VMSTATE_UINT32(w6, XiveEQ), > >> + VMSTATE_UINT32(w7, XiveEQ), > > > > Wow. Super descriptive field names there, but I guess that's not your fault. > > The defines in the "xive-internal.h" give a better view ... > > >> + VMSTATE_END_OF_LIST() > >> + }, > >> +}; > >> + > >> static bool vmstate_spapr_xive_icp_needed(void *opaque) > >> { > >> /* TODO check machine XIVE support */ > >> @@ -696,6 +725,8 @@ static const VMStateDescription vmstate_spapr_xive_icp = { > >> .needed = vmstate_spapr_xive_icp_needed, > >> .fields = (VMStateField[]) { > >> VMSTATE_BUFFER(tima, sPAPRXiveICP), > >> + VMSTATE_STRUCT_ARRAY(eqt, sPAPRXiveICP, (XIVE_PRIORITY_MAX + 1), 1, > >> + vmstate_spapr_xive_icp_eq, XiveEQ), > >> VMSTATE_END_OF_LIST() > >> }, > >> }; > >> @@ -755,3 +786,28 @@ bool spapr_xive_irq_unset(sPAPRXive *xive, uint32_t lisn) > >> ive->w &= ~IVE_VALID; > >> return true; > >> } > >> + > >> +/* > >> + * Use a simple indexing for the EQs. > > > > Is this server+priority encoding architected anywhere? > > no. This is a model shortcut. > > > Otherwise, why not use separate parameters? > > yes. spapr_xive_get_eq() could use separate parameters and it would > shorten the some of the hcalls. > > The result is stored in a single field of the IVE, EQ_INDEX. So I will > still need mangle/demangle routines but these could be simple macros. > I will look at it. Hm, ok. So it's architected in the sense that you're using the encoding from the EQ_INDEX field throughout. That's could be a reasonable choice, I can't really tell yet. On the other hand, it might be easier to read if we use server and priority as separate parameters until the point we actually encode into the EQ_INDEX field. > > >> + */ > >> +XiveEQ *spapr_xive_get_eq(sPAPRXive *xive, uint32_t eq_idx) > >> +{ > >> + int priority = eq_idx & 0x7; > >> + sPAPRXiveICP *xicp = spapr_xive_icp_get(xive, eq_idx >> 3); > >> + > >> + return xicp ? &xicp->eqt[priority] : NULL; > >> +} > >> + > >> +bool spapr_xive_eq_for_server(sPAPRXive *xive, uint32_t server, > >> + uint8_t priority, uint32_t *out_eq_idx) > >> +{ > >> + if (priority > XIVE_PRIORITY_MAX) { > >> + return false; > >> + } > >> + > >> + if (out_eq_idx) { > >> + *out_eq_idx = (server << 3) | (priority & 0x7); > >> + } > >> + > >> + return true; > >> +} > >> diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h > >> index 7d329f203a9b..c3949671aa03 100644 > >> --- a/hw/intc/xive-internal.h > >> +++ b/hw/intc/xive-internal.h > >> @@ -131,9 +131,59 @@ typedef struct XiveIVE { > >> #define IVE_EQ_DATA PPC_BITMASK(33, 63) /* Data written to the EQ */ > >> } XiveIVE; > >> > >> +/* EQ */ > >> +typedef struct XiveEQ { > >> + uint32_t w0; > >> +#define EQ_W0_VALID PPC_BIT32(0) > >> +#define EQ_W0_ENQUEUE PPC_BIT32(1) > >> +#define EQ_W0_UCOND_NOTIFY PPC_BIT32(2) > >> +#define EQ_W0_BACKLOG PPC_BIT32(3) > >> +#define EQ_W0_PRECL_ESC_CTL PPC_BIT32(4) > >> +#define EQ_W0_ESCALATE_CTL PPC_BIT32(5) > >> +#define EQ_W0_END_OF_INTR PPC_BIT32(6) > >> +#define EQ_W0_QSIZE PPC_BITMASK32(12, 15) > >> +#define EQ_W0_SW0 PPC_BIT32(16) > >> +#define EQ_W0_FIRMWARE EQ_W0_SW0 /* Owned by FW */ > >> +#define EQ_QSIZE_4K 0 > >> +#define EQ_QSIZE_64K 4 > >> +#define EQ_W0_HWDEP PPC_BITMASK32(24, 31) > >> + uint32_t w1; > >> +#define EQ_W1_ESn PPC_BITMASK32(0, 1) > >> +#define EQ_W1_ESn_P PPC_BIT32(0) > >> +#define EQ_W1_ESn_Q PPC_BIT32(1) > >> +#define EQ_W1_ESe PPC_BITMASK32(2, 3) > >> +#define EQ_W1_ESe_P PPC_BIT32(2) > >> +#define EQ_W1_ESe_Q PPC_BIT32(3) > >> +#define EQ_W1_GENERATION PPC_BIT32(9) > >> +#define EQ_W1_PAGE_OFF PPC_BITMASK32(10, 31) > >> + uint32_t w2; > >> +#define EQ_W2_MIGRATION_REG PPC_BITMASK32(0, 3) > >> +#define EQ_W2_OP_DESC_HI PPC_BITMASK32(4, 31) > >> + uint32_t w3; > >> +#define EQ_W3_OP_DESC_LO PPC_BITMASK32(0, 31) > >> + uint32_t w4; > >> +#define EQ_W4_ESC_EQ_BLOCK PPC_BITMASK32(4, 7) > >> +#define EQ_W4_ESC_EQ_INDEX PPC_BITMASK32(8, 31) > >> + uint32_t w5; > >> +#define EQ_W5_ESC_EQ_DATA PPC_BITMASK32(1, 31) > >> + uint32_t w6; > >> +#define EQ_W6_FORMAT_BIT PPC_BIT32(8) > >> +#define EQ_W6_NVT_BLOCK PPC_BITMASK32(9, 12) > >> +#define EQ_W6_NVT_INDEX PPC_BITMASK32(13, 31) > >> + uint32_t w7; > >> +#define EQ_W7_F0_IGNORE PPC_BIT32(0) > >> +#define EQ_W7_F0_BLK_GROUPING PPC_BIT32(1) > >> +#define EQ_W7_F0_PRIORITY PPC_BITMASK32(8, 15) > >> +#define EQ_W7_F1_WAKEZ PPC_BIT32(0) > >> +#define EQ_W7_F1_LOG_SERVER_ID PPC_BITMASK32(1, 31) > >> +} XiveEQ; > >> + > >> #define XIVE_PRIORITY_MAX 7 > >> > >> void spapr_xive_reset(void *dev); > >> XiveIVE *spapr_xive_get_ive(sPAPRXive *xive, uint32_t lisn); > >> +XiveEQ *spapr_xive_get_eq(sPAPRXive *xive, uint32_t idx); > >> +bool spapr_xive_eq_for_server(sPAPRXive *xive, uint32_t server, uint8_t prio, > >> + uint32_t *out_eq_idx); > >> > >> #endif /* _INTC_XIVE_INTERNAL_H */ > > >
On 12/01/2017 12:35 AM, David Gibson wrote: > On Thu, Nov 30, 2017 at 02:06:27PM +0000, Cédric Le Goater wrote: >> On 11/30/2017 04:38 AM, David Gibson wrote: >>> On Thu, Nov 23, 2017 at 02:29:43PM +0100, Cédric Le Goater wrote: >>>> The Event Queue Descriptor (EQD) table, also known as Event Notification >>>> Descriptor (END), is one of the internal tables the XIVE interrupt >>>> controller uses to redirect exception from event sources to CPU >>>> threads. >>>> >>>> The EQD specifies on which Event Queue the event data should be posted >>>> when an exception occurs (later on pulled by the OS) and which server >>>> (VPD in XIVE terminology) to notify. The Event Queue is a much more >>>> complex structure but we start with a simple model for the sPAPR >>>> machine. >>> >>> Just to clarify my understanding a server / VPD in XIVE would >>> typically correspond to a cpu - either real or virtual, yes? >> >> yes. VP for "virtual processor" and VPD for "virtual processor >> descriptor" which contains the XIVE interrupt state of the VP >> when not dispatched. It is still described in some documentation >> as an NVT : Notification Virtual Target. >> >> XIVE concepts were renamed at some time but the old name perdured. >> I am still struggling my way through all the names. >> >> >>>> There is one XiveEQ per priority and the model chooses to store them >>>> under the Xive Interrupt presenter model. It will be retrieved, just >>>> like for XICS, through the 'intc' object pointer of the CPU. >>>> >>>> The EQ indexing follows a simple pattern: >>>> >>>> (server << 3) | (priority & 0x7) >>>> >>>> Signed-off-by: Cédric Le Goater <clg@kaod.org> >>>> --- >>>> hw/intc/spapr_xive.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ >>>> hw/intc/xive-internal.h | 50 +++++++++++++++++++++++++++++++++++++++++++ >>>> 2 files changed, 106 insertions(+) >>>> >>>> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c >>>> index 554b25e0884c..983317a6b3f6 100644 >>>> --- a/hw/intc/spapr_xive.c >>>> +++ b/hw/intc/spapr_xive.c >>>> @@ -23,6 +23,7 @@ >>>> #include "sysemu/dma.h" >>>> #include "monitor/monitor.h" >>>> #include "hw/ppc/spapr_xive.h" >>>> +#include "hw/ppc/spapr.h" >>>> #include "hw/ppc/xics.h" >>>> >>>> #include "xive-internal.h" >>>> @@ -34,6 +35,8 @@ struct sPAPRXiveICP { >>>> uint8_t tima[TM_RING_COUNT * 0x10]; >>>> uint8_t *tima_os; >>>> qemu_irq output; >>>> + >>>> + XiveEQ eqt[XIVE_PRIORITY_MAX + 1]; >>>> }; >>>> >>>> static uint64_t spapr_xive_icp_accept(sPAPRXiveICP *icp) >>>> @@ -183,6 +186,13 @@ static const MemoryRegionOps spapr_xive_tm_ops = { >>>> }, >>>> }; >>>> >>>> +static sPAPRXiveICP *spapr_xive_icp_get(sPAPRXive *xive, int server) >>>> +{ >>>> + PowerPCCPU *cpu = spapr_find_cpu(server); >>>> + >>>> + return cpu ? SPAPR_XIVE_ICP(cpu->intc) : NULL; >>>> +} >>>> + >>>> static void spapr_xive_irq(sPAPRXive *xive, int lisn) >>>> { >>>> >>>> @@ -632,6 +642,8 @@ static void spapr_xive_icp_reset(void *dev) >>>> sPAPRXiveICP *xicp = SPAPR_XIVE_ICP(dev); >>>> >>>> memset(xicp->tima, 0, sizeof(xicp->tima)); >>>> + >>>> + memset(xicp->eqt, 0, sizeof(xicp->eqt)); >>>> } >>>> >>>> static void spapr_xive_icp_realize(DeviceState *dev, Error **errp) >>>> @@ -683,6 +695,23 @@ static void spapr_xive_icp_init(Object *obj) >>>> xicp->tima_os = &xicp->tima[TM_QW1_OS]; >>>> } >>>> >>>> +static const VMStateDescription vmstate_spapr_xive_icp_eq = { >>>> + .name = TYPE_SPAPR_XIVE_ICP "/eq", >>>> + .version_id = 1, >>>> + .minimum_version_id = 1, >>>> + .fields = (VMStateField []) { >>>> + VMSTATE_UINT32(w0, XiveEQ), >>>> + VMSTATE_UINT32(w1, XiveEQ), >>>> + VMSTATE_UINT32(w2, XiveEQ), >>>> + VMSTATE_UINT32(w3, XiveEQ), >>>> + VMSTATE_UINT32(w4, XiveEQ), >>>> + VMSTATE_UINT32(w5, XiveEQ), >>>> + VMSTATE_UINT32(w6, XiveEQ), >>>> + VMSTATE_UINT32(w7, XiveEQ), >>> >>> Wow. Super descriptive field names there, but I guess that's not your fault. >> >> The defines in the "xive-internal.h" give a better view ... >> >>>> + VMSTATE_END_OF_LIST() >>>> + }, >>>> +}; >>>> + >>>> static bool vmstate_spapr_xive_icp_needed(void *opaque) >>>> { >>>> /* TODO check machine XIVE support */ >>>> @@ -696,6 +725,8 @@ static const VMStateDescription vmstate_spapr_xive_icp = { >>>> .needed = vmstate_spapr_xive_icp_needed, >>>> .fields = (VMStateField[]) { >>>> VMSTATE_BUFFER(tima, sPAPRXiveICP), >>>> + VMSTATE_STRUCT_ARRAY(eqt, sPAPRXiveICP, (XIVE_PRIORITY_MAX + 1), 1, >>>> + vmstate_spapr_xive_icp_eq, XiveEQ), >>>> VMSTATE_END_OF_LIST() >>>> }, >>>> }; >>>> @@ -755,3 +786,28 @@ bool spapr_xive_irq_unset(sPAPRXive *xive, uint32_t lisn) >>>> ive->w &= ~IVE_VALID; >>>> return true; >>>> } >>>> + >>>> +/* >>>> + * Use a simple indexing for the EQs. >>> >>> Is this server+priority encoding architected anywhere? >> >> no. This is a model shortcut. >> >>> Otherwise, why not use separate parameters? >> >> yes. spapr_xive_get_eq() could use separate parameters and it would >> shorten the some of the hcalls. >> >> The result is stored in a single field of the IVE, EQ_INDEX. So I will >> still need mangle/demangle routines but these could be simple macros. >> I will look at it. > > Hm, ok. So it's architected in the sense that you're using the > encoding from the EQ_INDEX field throughout. That's could be a > reasonable choice, I can't really tell yet. > > On the other hand, it might be easier to read if we use server and > priority as separate parameters until the point we actually encode > into the EQ_INDEX field. In the architecture, the EQ_INDEX field contains an index to an Event Queue Descriptor and the Event Queue Descriptor has a EQ_W6_NVT_INDEX field pointing to an Notification Virtual Target. So there are two extra tables for the EQs and for the NVTs used by the HW. In the sPAPR model, an EQ array is stored under the sPAPRXiveNVT object which is stored under the ->intc pointer of the CPUState object So the EQ_INDEX field is really taking a shortcut, encoding the cpu number and the priority to find an EQ, and the EQ_W6_NVT_INDEX field holds a value which is the cpu number. But at the end, we save two tables. C. > >> >>>> + */ >>>> +XiveEQ *spapr_xive_get_eq(sPAPRXive *xive, uint32_t eq_idx) >>>> +{ >>>> + int priority = eq_idx & 0x7; >>>> + sPAPRXiveICP *xicp = spapr_xive_icp_get(xive, eq_idx >> 3); >>>> + >>>> + return xicp ? &xicp->eqt[priority] : NULL; >>>> +} >>>> + >>>> +bool spapr_xive_eq_for_server(sPAPRXive *xive, uint32_t server, >>>> + uint8_t priority, uint32_t *out_eq_idx) >>>> +{ >>>> + if (priority > XIVE_PRIORITY_MAX) { >>>> + return false; >>>> + } >>>> + >>>> + if (out_eq_idx) { >>>> + *out_eq_idx = (server << 3) | (priority & 0x7); >>>> + } >>>> + >>>> + return true; >>>> +} >>>> diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h >>>> index 7d329f203a9b..c3949671aa03 100644 >>>> --- a/hw/intc/xive-internal.h >>>> +++ b/hw/intc/xive-internal.h >>>> @@ -131,9 +131,59 @@ typedef struct XiveIVE { >>>> #define IVE_EQ_DATA PPC_BITMASK(33, 63) /* Data written to the EQ */ >>>> } XiveIVE; >>>> >>>> +/* EQ */ >>>> +typedef struct XiveEQ { >>>> + uint32_t w0; >>>> +#define EQ_W0_VALID PPC_BIT32(0) >>>> +#define EQ_W0_ENQUEUE PPC_BIT32(1) >>>> +#define EQ_W0_UCOND_NOTIFY PPC_BIT32(2) >>>> +#define EQ_W0_BACKLOG PPC_BIT32(3) >>>> +#define EQ_W0_PRECL_ESC_CTL PPC_BIT32(4) >>>> +#define EQ_W0_ESCALATE_CTL PPC_BIT32(5) >>>> +#define EQ_W0_END_OF_INTR PPC_BIT32(6) >>>> +#define EQ_W0_QSIZE PPC_BITMASK32(12, 15) >>>> +#define EQ_W0_SW0 PPC_BIT32(16) >>>> +#define EQ_W0_FIRMWARE EQ_W0_SW0 /* Owned by FW */ >>>> +#define EQ_QSIZE_4K 0 >>>> +#define EQ_QSIZE_64K 4 >>>> +#define EQ_W0_HWDEP PPC_BITMASK32(24, 31) >>>> + uint32_t w1; >>>> +#define EQ_W1_ESn PPC_BITMASK32(0, 1) >>>> +#define EQ_W1_ESn_P PPC_BIT32(0) >>>> +#define EQ_W1_ESn_Q PPC_BIT32(1) >>>> +#define EQ_W1_ESe PPC_BITMASK32(2, 3) >>>> +#define EQ_W1_ESe_P PPC_BIT32(2) >>>> +#define EQ_W1_ESe_Q PPC_BIT32(3) >>>> +#define EQ_W1_GENERATION PPC_BIT32(9) >>>> +#define EQ_W1_PAGE_OFF PPC_BITMASK32(10, 31) >>>> + uint32_t w2; >>>> +#define EQ_W2_MIGRATION_REG PPC_BITMASK32(0, 3) >>>> +#define EQ_W2_OP_DESC_HI PPC_BITMASK32(4, 31) >>>> + uint32_t w3; >>>> +#define EQ_W3_OP_DESC_LO PPC_BITMASK32(0, 31) >>>> + uint32_t w4; >>>> +#define EQ_W4_ESC_EQ_BLOCK PPC_BITMASK32(4, 7) >>>> +#define EQ_W4_ESC_EQ_INDEX PPC_BITMASK32(8, 31) >>>> + uint32_t w5; >>>> +#define EQ_W5_ESC_EQ_DATA PPC_BITMASK32(1, 31) >>>> + uint32_t w6; >>>> +#define EQ_W6_FORMAT_BIT PPC_BIT32(8) >>>> +#define EQ_W6_NVT_BLOCK PPC_BITMASK32(9, 12) >>>> +#define EQ_W6_NVT_INDEX PPC_BITMASK32(13, 31) >>>> + uint32_t w7; >>>> +#define EQ_W7_F0_IGNORE PPC_BIT32(0) >>>> +#define EQ_W7_F0_BLK_GROUPING PPC_BIT32(1) >>>> +#define EQ_W7_F0_PRIORITY PPC_BITMASK32(8, 15) >>>> +#define EQ_W7_F1_WAKEZ PPC_BIT32(0) >>>> +#define EQ_W7_F1_LOG_SERVER_ID PPC_BITMASK32(1, 31) >>>> +} XiveEQ; >>>> + >>>> #define XIVE_PRIORITY_MAX 7 >>>> >>>> void spapr_xive_reset(void *dev); >>>> XiveIVE *spapr_xive_get_ive(sPAPRXive *xive, uint32_t lisn); >>>> +XiveEQ *spapr_xive_get_eq(sPAPRXive *xive, uint32_t idx); >>>> +bool spapr_xive_eq_for_server(sPAPRXive *xive, uint32_t server, uint8_t prio, >>>> + uint32_t *out_eq_idx); >>>> >>>> #endif /* _INTC_XIVE_INTERNAL_H */ >>> >> >
On Thu, 2017-11-30 at 15:38 +1100, David Gibson wrote: > On Thu, Nov 23, 2017 at 02:29:43PM +0100, Cédric Le Goater wrote: > > The Event Queue Descriptor (EQD) table, also known as Event Notification > > Descriptor (END), is one of the internal tables the XIVE interrupt > > controller uses to redirect exception from event sources to CPU > > threads. > > > > The EQD specifies on which Event Queue the event data should be posted > > when an exception occurs (later on pulled by the OS) and which server > > (VPD in XIVE terminology) to notify. The Event Queue is a much more > > complex structure but we start with a simple model for the sPAPR > > machine. > > Just to clarify my understanding a server / VPD in XIVE would > typically correspond to a cpu - either real or virtual, yes? The IVEs and EQs are managed by the virtualization controller. The VPs (aka ENDs) are managed by the presentation controller. There's a VP per real and virtual CPU. You can think of the XIVE as having 3 main component types: - Source controller(s). There are some in the PHBs, one generic in the XIVE itself, and one in the PSI bridge. Those contain the PQ bits and thus the trigger & coalescing logic. They effectively shoot an MMIO to the virtualization controller on events. - Virtualization controller (one per chip). This receives the above MMIOs from the sources, manages the IVEs to get the target queue and remap the number, and manages the queues. When a queue is enabled for notification (or escalation) and such an event occurs, an MMIO goes to the corresponding presentation controller. - Presentation controller (one per chip). This receives the above notifications and sets as a result the IPB bits for one of the 8 priorities. Basically this guy tracks a single pending bit per priority for each VP indicating whether there's something in the queue for that priority and delivers interrupts to the core accordingly. Now this is a simplified view. The PC supports groups but we don't handle that yet, there are escalation interrupts, there are redistribution mechanisms etc... but for now you get the basic idea. Cheers, Ben.
On Sat, 2017-12-02 at 08:39 -0600, Benjamin Herrenschmidt wrote: > The IVEs and EQs are managed by the virtualization controller. The VPs > (aka ENDs) typo. aka NVTs > are managed by the presentation controller. There's a VP per > real and virtual CPU.
On Fri, Dec 01, 2017 at 05:36:39PM +0100, Cédric Le Goater wrote: > On 12/01/2017 12:35 AM, David Gibson wrote: > > On Thu, Nov 30, 2017 at 02:06:27PM +0000, Cédric Le Goater wrote: > >> On 11/30/2017 04:38 AM, David Gibson wrote: > >>> On Thu, Nov 23, 2017 at 02:29:43PM +0100, Cédric Le Goater wrote: > >>>> The Event Queue Descriptor (EQD) table, also known as Event Notification > >>>> Descriptor (END), is one of the internal tables the XIVE interrupt > >>>> controller uses to redirect exception from event sources to CPU > >>>> threads. > >>>> > >>>> The EQD specifies on which Event Queue the event data should be posted > >>>> when an exception occurs (later on pulled by the OS) and which server > >>>> (VPD in XIVE terminology) to notify. The Event Queue is a much more > >>>> complex structure but we start with a simple model for the sPAPR > >>>> machine. > >>> > >>> Just to clarify my understanding a server / VPD in XIVE would > >>> typically correspond to a cpu - either real or virtual, yes? > >> > >> yes. VP for "virtual processor" and VPD for "virtual processor > >> descriptor" which contains the XIVE interrupt state of the VP > >> when not dispatched. It is still described in some documentation > >> as an NVT : Notification Virtual Target. > >> > >> XIVE concepts were renamed at some time but the old name perdured. > >> I am still struggling my way through all the names. > >> > >> > >>>> There is one XiveEQ per priority and the model chooses to store them > >>>> under the Xive Interrupt presenter model. It will be retrieved, just > >>>> like for XICS, through the 'intc' object pointer of the CPU. > >>>> > >>>> The EQ indexing follows a simple pattern: > >>>> > >>>> (server << 3) | (priority & 0x7) > >>>> > >>>> Signed-off-by: Cédric Le Goater <clg@kaod.org> > >>>> --- > >>>> hw/intc/spapr_xive.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ > >>>> hw/intc/xive-internal.h | 50 +++++++++++++++++++++++++++++++++++++++++++ > >>>> 2 files changed, 106 insertions(+) > >>>> > >>>> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c > >>>> index 554b25e0884c..983317a6b3f6 100644 > >>>> --- a/hw/intc/spapr_xive.c > >>>> +++ b/hw/intc/spapr_xive.c > >>>> @@ -23,6 +23,7 @@ > >>>> #include "sysemu/dma.h" > >>>> #include "monitor/monitor.h" > >>>> #include "hw/ppc/spapr_xive.h" > >>>> +#include "hw/ppc/spapr.h" > >>>> #include "hw/ppc/xics.h" > >>>> > >>>> #include "xive-internal.h" > >>>> @@ -34,6 +35,8 @@ struct sPAPRXiveICP { > >>>> uint8_t tima[TM_RING_COUNT * 0x10]; > >>>> uint8_t *tima_os; > >>>> qemu_irq output; > >>>> + > >>>> + XiveEQ eqt[XIVE_PRIORITY_MAX + 1]; > >>>> }; > >>>> > >>>> static uint64_t spapr_xive_icp_accept(sPAPRXiveICP *icp) > >>>> @@ -183,6 +186,13 @@ static const MemoryRegionOps spapr_xive_tm_ops = { > >>>> }, > >>>> }; > >>>> > >>>> +static sPAPRXiveICP *spapr_xive_icp_get(sPAPRXive *xive, int server) > >>>> +{ > >>>> + PowerPCCPU *cpu = spapr_find_cpu(server); > >>>> + > >>>> + return cpu ? SPAPR_XIVE_ICP(cpu->intc) : NULL; > >>>> +} > >>>> + > >>>> static void spapr_xive_irq(sPAPRXive *xive, int lisn) > >>>> { > >>>> > >>>> @@ -632,6 +642,8 @@ static void spapr_xive_icp_reset(void *dev) > >>>> sPAPRXiveICP *xicp = SPAPR_XIVE_ICP(dev); > >>>> > >>>> memset(xicp->tima, 0, sizeof(xicp->tima)); > >>>> + > >>>> + memset(xicp->eqt, 0, sizeof(xicp->eqt)); > >>>> } > >>>> > >>>> static void spapr_xive_icp_realize(DeviceState *dev, Error **errp) > >>>> @@ -683,6 +695,23 @@ static void spapr_xive_icp_init(Object *obj) > >>>> xicp->tima_os = &xicp->tima[TM_QW1_OS]; > >>>> } > >>>> > >>>> +static const VMStateDescription vmstate_spapr_xive_icp_eq = { > >>>> + .name = TYPE_SPAPR_XIVE_ICP "/eq", > >>>> + .version_id = 1, > >>>> + .minimum_version_id = 1, > >>>> + .fields = (VMStateField []) { > >>>> + VMSTATE_UINT32(w0, XiveEQ), > >>>> + VMSTATE_UINT32(w1, XiveEQ), > >>>> + VMSTATE_UINT32(w2, XiveEQ), > >>>> + VMSTATE_UINT32(w3, XiveEQ), > >>>> + VMSTATE_UINT32(w4, XiveEQ), > >>>> + VMSTATE_UINT32(w5, XiveEQ), > >>>> + VMSTATE_UINT32(w6, XiveEQ), > >>>> + VMSTATE_UINT32(w7, XiveEQ), > >>> > >>> Wow. Super descriptive field names there, but I guess that's not your fault. > >> > >> The defines in the "xive-internal.h" give a better view ... > >> > >>>> + VMSTATE_END_OF_LIST() > >>>> + }, > >>>> +}; > >>>> + > >>>> static bool vmstate_spapr_xive_icp_needed(void *opaque) > >>>> { > >>>> /* TODO check machine XIVE support */ > >>>> @@ -696,6 +725,8 @@ static const VMStateDescription vmstate_spapr_xive_icp = { > >>>> .needed = vmstate_spapr_xive_icp_needed, > >>>> .fields = (VMStateField[]) { > >>>> VMSTATE_BUFFER(tima, sPAPRXiveICP), > >>>> + VMSTATE_STRUCT_ARRAY(eqt, sPAPRXiveICP, (XIVE_PRIORITY_MAX + 1), 1, > >>>> + vmstate_spapr_xive_icp_eq, XiveEQ), > >>>> VMSTATE_END_OF_LIST() > >>>> }, > >>>> }; > >>>> @@ -755,3 +786,28 @@ bool spapr_xive_irq_unset(sPAPRXive *xive, uint32_t lisn) > >>>> ive->w &= ~IVE_VALID; > >>>> return true; > >>>> } > >>>> + > >>>> +/* > >>>> + * Use a simple indexing for the EQs. > >>> > >>> Is this server+priority encoding architected anywhere? > >> > >> no. This is a model shortcut. > >> > >>> Otherwise, why not use separate parameters? > >> > >> yes. spapr_xive_get_eq() could use separate parameters and it would > >> shorten the some of the hcalls. > >> > >> The result is stored in a single field of the IVE, EQ_INDEX. So I will > >> still need mangle/demangle routines but these could be simple macros. > >> I will look at it. > > > > Hm, ok. So it's architected in the sense that you're using the > > encoding from the EQ_INDEX field throughout. That's could be a > > reasonable choice, I can't really tell yet. > > > > On the other hand, it might be easier to read if we use server and > > priority as separate parameters until the point we actually encode > > into the EQ_INDEX field. > > In the architecture, the EQ_INDEX field contains an index to an > Event Queue Descriptor and the Event Queue Descriptor has a > EQ_W6_NVT_INDEX field pointing to an Notification Virtual Target. > So there are two extra tables for the EQs and for the NVTs > used by the HW. Ok. In the PAPR interface is the EQ_INDEX ever exposed to the guest? Or does it just supply target/priority numbers and the hypervisor manages the mapping to queues internally? > In the sPAPR model, an EQ array is stored under the sPAPRXiveNVT > object which is stored under the ->intc pointer of the CPUState > object > > So the EQ_INDEX field is really taking a shortcut, encoding > the cpu number and the priority to find an EQ, and the > EQ_W6_NVT_INDEX field holds a value which is the cpu number. > But at the end, we save two tables. > > C. > > > > > >> > >>>> + */ > >>>> +XiveEQ *spapr_xive_get_eq(sPAPRXive *xive, uint32_t eq_idx) > >>>> +{ > >>>> + int priority = eq_idx & 0x7; > >>>> + sPAPRXiveICP *xicp = spapr_xive_icp_get(xive, eq_idx >> 3); > >>>> + > >>>> + return xicp ? &xicp->eqt[priority] : NULL; > >>>> +} > >>>> + > >>>> +bool spapr_xive_eq_for_server(sPAPRXive *xive, uint32_t server, > >>>> + uint8_t priority, uint32_t *out_eq_idx) > >>>> +{ > >>>> + if (priority > XIVE_PRIORITY_MAX) { > >>>> + return false; > >>>> + } > >>>> + > >>>> + if (out_eq_idx) { > >>>> + *out_eq_idx = (server << 3) | (priority & 0x7); > >>>> + } > >>>> + > >>>> + return true; > >>>> +} > >>>> diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h > >>>> index 7d329f203a9b..c3949671aa03 100644 > >>>> --- a/hw/intc/xive-internal.h > >>>> +++ b/hw/intc/xive-internal.h > >>>> @@ -131,9 +131,59 @@ typedef struct XiveIVE { > >>>> #define IVE_EQ_DATA PPC_BITMASK(33, 63) /* Data written to the EQ */ > >>>> } XiveIVE; > >>>> > >>>> +/* EQ */ > >>>> +typedef struct XiveEQ { > >>>> + uint32_t w0; > >>>> +#define EQ_W0_VALID PPC_BIT32(0) > >>>> +#define EQ_W0_ENQUEUE PPC_BIT32(1) > >>>> +#define EQ_W0_UCOND_NOTIFY PPC_BIT32(2) > >>>> +#define EQ_W0_BACKLOG PPC_BIT32(3) > >>>> +#define EQ_W0_PRECL_ESC_CTL PPC_BIT32(4) > >>>> +#define EQ_W0_ESCALATE_CTL PPC_BIT32(5) > >>>> +#define EQ_W0_END_OF_INTR PPC_BIT32(6) > >>>> +#define EQ_W0_QSIZE PPC_BITMASK32(12, 15) > >>>> +#define EQ_W0_SW0 PPC_BIT32(16) > >>>> +#define EQ_W0_FIRMWARE EQ_W0_SW0 /* Owned by FW */ > >>>> +#define EQ_QSIZE_4K 0 > >>>> +#define EQ_QSIZE_64K 4 > >>>> +#define EQ_W0_HWDEP PPC_BITMASK32(24, 31) > >>>> + uint32_t w1; > >>>> +#define EQ_W1_ESn PPC_BITMASK32(0, 1) > >>>> +#define EQ_W1_ESn_P PPC_BIT32(0) > >>>> +#define EQ_W1_ESn_Q PPC_BIT32(1) > >>>> +#define EQ_W1_ESe PPC_BITMASK32(2, 3) > >>>> +#define EQ_W1_ESe_P PPC_BIT32(2) > >>>> +#define EQ_W1_ESe_Q PPC_BIT32(3) > >>>> +#define EQ_W1_GENERATION PPC_BIT32(9) > >>>> +#define EQ_W1_PAGE_OFF PPC_BITMASK32(10, 31) > >>>> + uint32_t w2; > >>>> +#define EQ_W2_MIGRATION_REG PPC_BITMASK32(0, 3) > >>>> +#define EQ_W2_OP_DESC_HI PPC_BITMASK32(4, 31) > >>>> + uint32_t w3; > >>>> +#define EQ_W3_OP_DESC_LO PPC_BITMASK32(0, 31) > >>>> + uint32_t w4; > >>>> +#define EQ_W4_ESC_EQ_BLOCK PPC_BITMASK32(4, 7) > >>>> +#define EQ_W4_ESC_EQ_INDEX PPC_BITMASK32(8, 31) > >>>> + uint32_t w5; > >>>> +#define EQ_W5_ESC_EQ_DATA PPC_BITMASK32(1, 31) > >>>> + uint32_t w6; > >>>> +#define EQ_W6_FORMAT_BIT PPC_BIT32(8) > >>>> +#define EQ_W6_NVT_BLOCK PPC_BITMASK32(9, 12) > >>>> +#define EQ_W6_NVT_INDEX PPC_BITMASK32(13, 31) > >>>> + uint32_t w7; > >>>> +#define EQ_W7_F0_IGNORE PPC_BIT32(0) > >>>> +#define EQ_W7_F0_BLK_GROUPING PPC_BIT32(1) > >>>> +#define EQ_W7_F0_PRIORITY PPC_BITMASK32(8, 15) > >>>> +#define EQ_W7_F1_WAKEZ PPC_BIT32(0) > >>>> +#define EQ_W7_F1_LOG_SERVER_ID PPC_BITMASK32(1, 31) > >>>> +} XiveEQ; > >>>> + > >>>> #define XIVE_PRIORITY_MAX 7 > >>>> > >>>> void spapr_xive_reset(void *dev); > >>>> XiveIVE *spapr_xive_get_ive(sPAPRXive *xive, uint32_t lisn); > >>>> +XiveEQ *spapr_xive_get_eq(sPAPRXive *xive, uint32_t idx); > >>>> +bool spapr_xive_eq_for_server(sPAPRXive *xive, uint32_t server, uint8_t prio, > >>>> + uint32_t *out_eq_idx); > >>>> > >>>> #endif /* _INTC_XIVE_INTERNAL_H */ > >>> > >> > > >
On 12/04/2017 02:09 AM, David Gibson wrote: > On Fri, Dec 01, 2017 at 05:36:39PM +0100, Cédric Le Goater wrote: >> On 12/01/2017 12:35 AM, David Gibson wrote: >>> On Thu, Nov 30, 2017 at 02:06:27PM +0000, Cédric Le Goater wrote: >>>> On 11/30/2017 04:38 AM, David Gibson wrote: >>>>> On Thu, Nov 23, 2017 at 02:29:43PM +0100, Cédric Le Goater wrote: >>>>>> The Event Queue Descriptor (EQD) table, also known as Event Notification >>>>>> Descriptor (END), is one of the internal tables the XIVE interrupt >>>>>> controller uses to redirect exception from event sources to CPU >>>>>> threads. >>>>>> >>>>>> The EQD specifies on which Event Queue the event data should be posted >>>>>> when an exception occurs (later on pulled by the OS) and which server >>>>>> (VPD in XIVE terminology) to notify. The Event Queue is a much more >>>>>> complex structure but we start with a simple model for the sPAPR >>>>>> machine. >>>>> >>>>> Just to clarify my understanding a server / VPD in XIVE would >>>>> typically correspond to a cpu - either real or virtual, yes? >>>> >>>> yes. VP for "virtual processor" and VPD for "virtual processor >>>> descriptor" which contains the XIVE interrupt state of the VP >>>> when not dispatched. It is still described in some documentation >>>> as an NVT : Notification Virtual Target. >>>> >>>> XIVE concepts were renamed at some time but the old name perdured. >>>> I am still struggling my way through all the names. >>>> >>>> >>>>>> There is one XiveEQ per priority and the model chooses to store them >>>>>> under the Xive Interrupt presenter model. It will be retrieved, just >>>>>> like for XICS, through the 'intc' object pointer of the CPU. >>>>>> >>>>>> The EQ indexing follows a simple pattern: >>>>>> >>>>>> (server << 3) | (priority & 0x7) >>>>>> >>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org> >>>>>> --- >>>>>> hw/intc/spapr_xive.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ >>>>>> hw/intc/xive-internal.h | 50 +++++++++++++++++++++++++++++++++++++++++++ >>>>>> 2 files changed, 106 insertions(+) >>>>>> >>>>>> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c >>>>>> index 554b25e0884c..983317a6b3f6 100644 >>>>>> --- a/hw/intc/spapr_xive.c >>>>>> +++ b/hw/intc/spapr_xive.c >>>>>> @@ -23,6 +23,7 @@ >>>>>> #include "sysemu/dma.h" >>>>>> #include "monitor/monitor.h" >>>>>> #include "hw/ppc/spapr_xive.h" >>>>>> +#include "hw/ppc/spapr.h" >>>>>> #include "hw/ppc/xics.h" >>>>>> >>>>>> #include "xive-internal.h" >>>>>> @@ -34,6 +35,8 @@ struct sPAPRXiveICP { >>>>>> uint8_t tima[TM_RING_COUNT * 0x10]; >>>>>> uint8_t *tima_os; >>>>>> qemu_irq output; >>>>>> + >>>>>> + XiveEQ eqt[XIVE_PRIORITY_MAX + 1]; >>>>>> }; >>>>>> >>>>>> static uint64_t spapr_xive_icp_accept(sPAPRXiveICP *icp) >>>>>> @@ -183,6 +186,13 @@ static const MemoryRegionOps spapr_xive_tm_ops = { >>>>>> }, >>>>>> }; >>>>>> >>>>>> +static sPAPRXiveICP *spapr_xive_icp_get(sPAPRXive *xive, int server) >>>>>> +{ >>>>>> + PowerPCCPU *cpu = spapr_find_cpu(server); >>>>>> + >>>>>> + return cpu ? SPAPR_XIVE_ICP(cpu->intc) : NULL; >>>>>> +} >>>>>> + >>>>>> static void spapr_xive_irq(sPAPRXive *xive, int lisn) >>>>>> { >>>>>> >>>>>> @@ -632,6 +642,8 @@ static void spapr_xive_icp_reset(void *dev) >>>>>> sPAPRXiveICP *xicp = SPAPR_XIVE_ICP(dev); >>>>>> >>>>>> memset(xicp->tima, 0, sizeof(xicp->tima)); >>>>>> + >>>>>> + memset(xicp->eqt, 0, sizeof(xicp->eqt)); >>>>>> } >>>>>> >>>>>> static void spapr_xive_icp_realize(DeviceState *dev, Error **errp) >>>>>> @@ -683,6 +695,23 @@ static void spapr_xive_icp_init(Object *obj) >>>>>> xicp->tima_os = &xicp->tima[TM_QW1_OS]; >>>>>> } >>>>>> >>>>>> +static const VMStateDescription vmstate_spapr_xive_icp_eq = { >>>>>> + .name = TYPE_SPAPR_XIVE_ICP "/eq", >>>>>> + .version_id = 1, >>>>>> + .minimum_version_id = 1, >>>>>> + .fields = (VMStateField []) { >>>>>> + VMSTATE_UINT32(w0, XiveEQ), >>>>>> + VMSTATE_UINT32(w1, XiveEQ), >>>>>> + VMSTATE_UINT32(w2, XiveEQ), >>>>>> + VMSTATE_UINT32(w3, XiveEQ), >>>>>> + VMSTATE_UINT32(w4, XiveEQ), >>>>>> + VMSTATE_UINT32(w5, XiveEQ), >>>>>> + VMSTATE_UINT32(w6, XiveEQ), >>>>>> + VMSTATE_UINT32(w7, XiveEQ), >>>>> >>>>> Wow. Super descriptive field names there, but I guess that's not your fault. >>>> >>>> The defines in the "xive-internal.h" give a better view ... >>>> >>>>>> + VMSTATE_END_OF_LIST() >>>>>> + }, >>>>>> +}; >>>>>> + >>>>>> static bool vmstate_spapr_xive_icp_needed(void *opaque) >>>>>> { >>>>>> /* TODO check machine XIVE support */ >>>>>> @@ -696,6 +725,8 @@ static const VMStateDescription vmstate_spapr_xive_icp = { >>>>>> .needed = vmstate_spapr_xive_icp_needed, >>>>>> .fields = (VMStateField[]) { >>>>>> VMSTATE_BUFFER(tima, sPAPRXiveICP), >>>>>> + VMSTATE_STRUCT_ARRAY(eqt, sPAPRXiveICP, (XIVE_PRIORITY_MAX + 1), 1, >>>>>> + vmstate_spapr_xive_icp_eq, XiveEQ), >>>>>> VMSTATE_END_OF_LIST() >>>>>> }, >>>>>> }; >>>>>> @@ -755,3 +786,28 @@ bool spapr_xive_irq_unset(sPAPRXive *xive, uint32_t lisn) >>>>>> ive->w &= ~IVE_VALID; >>>>>> return true; >>>>>> } >>>>>> + >>>>>> +/* >>>>>> + * Use a simple indexing for the EQs. >>>>> >>>>> Is this server+priority encoding architected anywhere? >>>> >>>> no. This is a model shortcut. >>>> >>>>> Otherwise, why not use separate parameters? >>>> >>>> yes. spapr_xive_get_eq() could use separate parameters and it would >>>> shorten the some of the hcalls. >>>> >>>> The result is stored in a single field of the IVE, EQ_INDEX. So I will >>>> still need mangle/demangle routines but these could be simple macros. >>>> I will look at it. >>> >>> Hm, ok. So it's architected in the sense that you're using the >>> encoding from the EQ_INDEX field throughout. That's could be a >>> reasonable choice, I can't really tell yet. >>> >>> On the other hand, it might be easier to read if we use server and >>> priority as separate parameters until the point we actually encode >>> into the EQ_INDEX field. >> >> In the architecture, the EQ_INDEX field contains an index to an >> Event Queue Descriptor and the Event Queue Descriptor has a >> EQ_W6_NVT_INDEX field pointing to an Notification Virtual Target. >> So there are two extra tables for the EQs and for the NVTs >> used by the HW. > > Ok. In the PAPR interface is the EQ_INDEX ever exposed to the guest? never. > Or does it just supply target/priority numbers and the hypervisor > manages the mapping to queues internally? Yes. target/priority numbers is the interface used by the hcalls. Same for baremetal. OPAL handles the EQ indexing because it creates the EQ table and register it in the controller C.
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c index 554b25e0884c..983317a6b3f6 100644 --- a/hw/intc/spapr_xive.c +++ b/hw/intc/spapr_xive.c @@ -23,6 +23,7 @@ #include "sysemu/dma.h" #include "monitor/monitor.h" #include "hw/ppc/spapr_xive.h" +#include "hw/ppc/spapr.h" #include "hw/ppc/xics.h" #include "xive-internal.h" @@ -34,6 +35,8 @@ struct sPAPRXiveICP { uint8_t tima[TM_RING_COUNT * 0x10]; uint8_t *tima_os; qemu_irq output; + + XiveEQ eqt[XIVE_PRIORITY_MAX + 1]; }; static uint64_t spapr_xive_icp_accept(sPAPRXiveICP *icp) @@ -183,6 +186,13 @@ static const MemoryRegionOps spapr_xive_tm_ops = { }, }; +static sPAPRXiveICP *spapr_xive_icp_get(sPAPRXive *xive, int server) +{ + PowerPCCPU *cpu = spapr_find_cpu(server); + + return cpu ? SPAPR_XIVE_ICP(cpu->intc) : NULL; +} + static void spapr_xive_irq(sPAPRXive *xive, int lisn) { @@ -632,6 +642,8 @@ static void spapr_xive_icp_reset(void *dev) sPAPRXiveICP *xicp = SPAPR_XIVE_ICP(dev); memset(xicp->tima, 0, sizeof(xicp->tima)); + + memset(xicp->eqt, 0, sizeof(xicp->eqt)); } static void spapr_xive_icp_realize(DeviceState *dev, Error **errp) @@ -683,6 +695,23 @@ static void spapr_xive_icp_init(Object *obj) xicp->tima_os = &xicp->tima[TM_QW1_OS]; } +static const VMStateDescription vmstate_spapr_xive_icp_eq = { + .name = TYPE_SPAPR_XIVE_ICP "/eq", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField []) { + VMSTATE_UINT32(w0, XiveEQ), + VMSTATE_UINT32(w1, XiveEQ), + VMSTATE_UINT32(w2, XiveEQ), + VMSTATE_UINT32(w3, XiveEQ), + VMSTATE_UINT32(w4, XiveEQ), + VMSTATE_UINT32(w5, XiveEQ), + VMSTATE_UINT32(w6, XiveEQ), + VMSTATE_UINT32(w7, XiveEQ), + VMSTATE_END_OF_LIST() + }, +}; + static bool vmstate_spapr_xive_icp_needed(void *opaque) { /* TODO check machine XIVE support */ @@ -696,6 +725,8 @@ static const VMStateDescription vmstate_spapr_xive_icp = { .needed = vmstate_spapr_xive_icp_needed, .fields = (VMStateField[]) { VMSTATE_BUFFER(tima, sPAPRXiveICP), + VMSTATE_STRUCT_ARRAY(eqt, sPAPRXiveICP, (XIVE_PRIORITY_MAX + 1), 1, + vmstate_spapr_xive_icp_eq, XiveEQ), VMSTATE_END_OF_LIST() }, }; @@ -755,3 +786,28 @@ bool spapr_xive_irq_unset(sPAPRXive *xive, uint32_t lisn) ive->w &= ~IVE_VALID; return true; } + +/* + * Use a simple indexing for the EQs. + */ +XiveEQ *spapr_xive_get_eq(sPAPRXive *xive, uint32_t eq_idx) +{ + int priority = eq_idx & 0x7; + sPAPRXiveICP *xicp = spapr_xive_icp_get(xive, eq_idx >> 3); + + return xicp ? &xicp->eqt[priority] : NULL; +} + +bool spapr_xive_eq_for_server(sPAPRXive *xive, uint32_t server, + uint8_t priority, uint32_t *out_eq_idx) +{ + if (priority > XIVE_PRIORITY_MAX) { + return false; + } + + if (out_eq_idx) { + *out_eq_idx = (server << 3) | (priority & 0x7); + } + + return true; +} diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h index 7d329f203a9b..c3949671aa03 100644 --- a/hw/intc/xive-internal.h +++ b/hw/intc/xive-internal.h @@ -131,9 +131,59 @@ typedef struct XiveIVE { #define IVE_EQ_DATA PPC_BITMASK(33, 63) /* Data written to the EQ */ } XiveIVE; +/* EQ */ +typedef struct XiveEQ { + uint32_t w0; +#define EQ_W0_VALID PPC_BIT32(0) +#define EQ_W0_ENQUEUE PPC_BIT32(1) +#define EQ_W0_UCOND_NOTIFY PPC_BIT32(2) +#define EQ_W0_BACKLOG PPC_BIT32(3) +#define EQ_W0_PRECL_ESC_CTL PPC_BIT32(4) +#define EQ_W0_ESCALATE_CTL PPC_BIT32(5) +#define EQ_W0_END_OF_INTR PPC_BIT32(6) +#define EQ_W0_QSIZE PPC_BITMASK32(12, 15) +#define EQ_W0_SW0 PPC_BIT32(16) +#define EQ_W0_FIRMWARE EQ_W0_SW0 /* Owned by FW */ +#define EQ_QSIZE_4K 0 +#define EQ_QSIZE_64K 4 +#define EQ_W0_HWDEP PPC_BITMASK32(24, 31) + uint32_t w1; +#define EQ_W1_ESn PPC_BITMASK32(0, 1) +#define EQ_W1_ESn_P PPC_BIT32(0) +#define EQ_W1_ESn_Q PPC_BIT32(1) +#define EQ_W1_ESe PPC_BITMASK32(2, 3) +#define EQ_W1_ESe_P PPC_BIT32(2) +#define EQ_W1_ESe_Q PPC_BIT32(3) +#define EQ_W1_GENERATION PPC_BIT32(9) +#define EQ_W1_PAGE_OFF PPC_BITMASK32(10, 31) + uint32_t w2; +#define EQ_W2_MIGRATION_REG PPC_BITMASK32(0, 3) +#define EQ_W2_OP_DESC_HI PPC_BITMASK32(4, 31) + uint32_t w3; +#define EQ_W3_OP_DESC_LO PPC_BITMASK32(0, 31) + uint32_t w4; +#define EQ_W4_ESC_EQ_BLOCK PPC_BITMASK32(4, 7) +#define EQ_W4_ESC_EQ_INDEX PPC_BITMASK32(8, 31) + uint32_t w5; +#define EQ_W5_ESC_EQ_DATA PPC_BITMASK32(1, 31) + uint32_t w6; +#define EQ_W6_FORMAT_BIT PPC_BIT32(8) +#define EQ_W6_NVT_BLOCK PPC_BITMASK32(9, 12) +#define EQ_W6_NVT_INDEX PPC_BITMASK32(13, 31) + uint32_t w7; +#define EQ_W7_F0_IGNORE PPC_BIT32(0) +#define EQ_W7_F0_BLK_GROUPING PPC_BIT32(1) +#define EQ_W7_F0_PRIORITY PPC_BITMASK32(8, 15) +#define EQ_W7_F1_WAKEZ PPC_BIT32(0) +#define EQ_W7_F1_LOG_SERVER_ID PPC_BITMASK32(1, 31) +} XiveEQ; + #define XIVE_PRIORITY_MAX 7 void spapr_xive_reset(void *dev); XiveIVE *spapr_xive_get_ive(sPAPRXive *xive, uint32_t lisn); +XiveEQ *spapr_xive_get_eq(sPAPRXive *xive, uint32_t idx); +bool spapr_xive_eq_for_server(sPAPRXive *xive, uint32_t server, uint8_t prio, + uint32_t *out_eq_idx); #endif /* _INTC_XIVE_INTERNAL_H */
The Event Queue Descriptor (EQD) table, also known as Event Notification Descriptor (END), is one of the internal tables the XIVE interrupt controller uses to redirect exception from event sources to CPU threads. The EQD specifies on which Event Queue the event data should be posted when an exception occurs (later on pulled by the OS) and which server (VPD in XIVE terminology) to notify. The Event Queue is a much more complex structure but we start with a simple model for the sPAPR machine. There is one XiveEQ per priority and the model chooses to store them under the Xive Interrupt presenter model. It will be retrieved, just like for XICS, through the 'intc' object pointer of the CPU. The EQ indexing follows a simple pattern: (server << 3) | (priority & 0x7) Signed-off-by: Cédric Le Goater <clg@kaod.org> --- hw/intc/spapr_xive.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ hw/intc/xive-internal.h | 50 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+)