diff mbox series

[v5,09/36] ppc/xive: notify the CPU when the interrupt priority is more privileged

Message ID 20181116105729.23240-10-clg@kaod.org
State New
Headers show
Series ppc: support for the XIVE interrupt controller (POWER9) | expand

Commit Message

Cédric Le Goater Nov. 16, 2018, 10:57 a.m. UTC
After the event data was pushed in the O/S Event Queue, the IVPE
raises the bit corresponding to the priority of the pending interrupt
in the register IBP (Interrupt Pending Buffer) to indicate there is an
event pending in one of the 8 priority queues. The Pending Interrupt
Priority Register (PIPR) is also updated using the IPB. This register
represent the priority of the most favored pending notification.

The PIPR is then compared to the the Current Processor Priority
Register (CPPR). If it is more favored (numerically less than), the
CPU interrupt line is raised and the EO bit of the Notification Source
Register (NSR) is updated to notify the presence of an exception for
the O/S. The check needs to be done whenever the PIPR or the CPPR are
changed.

The O/S acknowledges the interrupt with a special load in the Thread
Interrupt Management Area. If the EO bit of the NSR is set, the CPPR
takes the value of PIPR. The bit number in the IBP corresponding to
the priority of the pending interrupt is reseted and so is the EO bit
of the NSR.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 hw/intc/xive.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 93 insertions(+), 1 deletion(-)

Comments

David Gibson Nov. 28, 2018, 12:13 a.m. UTC | #1
On Fri, Nov 16, 2018 at 11:57:02AM +0100, Cédric Le Goater wrote:
> After the event data was pushed in the O/S Event Queue, the IVPE
> raises the bit corresponding to the priority of the pending interrupt
> in the register IBP (Interrupt Pending Buffer) to indicate there is an
> event pending in one of the 8 priority queues. The Pending Interrupt
> Priority Register (PIPR) is also updated using the IPB. This register
> represent the priority of the most favored pending notification.
> 
> The PIPR is then compared to the the Current Processor Priority
> Register (CPPR). If it is more favored (numerically less than), the
> CPU interrupt line is raised and the EO bit of the Notification Source
> Register (NSR) is updated to notify the presence of an exception for
> the O/S. The check needs to be done whenever the PIPR or the CPPR are
> changed.
> 
> The O/S acknowledges the interrupt with a special load in the Thread
> Interrupt Management Area. If the EO bit of the NSR is set, the CPPR
> takes the value of PIPR. The bit number in the IBP corresponding to
> the priority of the pending interrupt is reseted and so is the EO bit
> of the NSR.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  hw/intc/xive.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 93 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> index 5ba3b06e6e25..c49932d2b799 100644
> --- a/hw/intc/xive.c
> +++ b/hw/intc/xive.c
> @@ -21,9 +21,73 @@
>   * XIVE Thread Interrupt Management context
>   */
>  
> +/* Convert a priority number to an Interrupt Pending Buffer (IPB)
> + * register, which indicates a pending interrupt at the priority
> + * corresponding to the bit number
> + */
> +static uint8_t priority_to_ipb(uint8_t priority)
> +{
> +    return priority > XIVE_PRIORITY_MAX ?
> +        0 : 1 << (XIVE_PRIORITY_MAX - priority);
> +}
> +
> +/* Convert an Interrupt Pending Buffer (IPB) register to a Pending
> + * Interrupt Priority Register (PIPR), which contains the priority of
> + * the most favored pending notification.
> + */
> +static uint8_t ipb_to_pipr(uint8_t ibp)
> +{
> +    return ibp ? clz32((uint32_t)ibp << 24) : 0xff;
> +}
> +
> +static void ipb_update(uint8_t *regs, uint8_t priority)
> +{
> +    regs[TM_IPB] |= priority_to_ipb(priority);
> +    regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
> +}
> +
> +static uint8_t exception_mask(uint8_t ring)
> +{
> +    switch (ring) {
> +    case TM_QW1_OS:
> +        return TM_QW1_NSR_EO;
> +    default:
> +        g_assert_not_reached();
> +    }
> +}
> +
>  static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
>  {
> -    return 0;
> +    uint8_t *regs = &tctx->regs[ring];
> +    uint8_t nsr = regs[TM_NSR];
> +    uint8_t mask = exception_mask(ring);
> +
> +    qemu_irq_lower(tctx->output);
> +
> +    if (regs[TM_NSR] & mask) {
> +        uint8_t cppr = regs[TM_PIPR];
> +
> +        regs[TM_CPPR] = cppr;
> +
> +        /* Reset the pending buffer bit */
> +        regs[TM_IPB] &= ~priority_to_ipb(cppr);
> +        regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
> +
> +        /* Drop Exception bit */
> +        regs[TM_NSR] &= ~mask;
> +    }
> +
> +    return (nsr << 8) | regs[TM_CPPR];

Don't you need a cast to avoid (nsr << 8) being a shift-wider-than-size?

> +}
> +
> +static void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring)
> +{
> +    uint8_t *regs = &tctx->regs[ring];
> +
> +    if (regs[TM_PIPR] < regs[TM_CPPR]) {
> +        regs[TM_NSR] |= exception_mask(ring);
> +        qemu_irq_raise(tctx->output);
> +    }
>  }
>  
>  static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
> @@ -33,6 +97,9 @@ static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
>      }
>  
>      tctx->regs[ring + TM_CPPR] = cppr;
> +
> +    /* CPPR has changed, check if we need to raise a pending exception */
> +    xive_tctx_notify(tctx, ring);
>  }
>  
>  /*
> @@ -198,6 +265,17 @@ static void xive_tm_set_os_cppr(XiveTCTX *tctx, hwaddr offset,
>      xive_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff);
>  }
>  
> +/*
> + * Adjust the IPB to allow a CPU to process event queues of other
> + * priorities during one physical interrupt cycle.
> + */
> +static void xive_tm_set_os_pending(XiveTCTX *tctx, hwaddr offset,
> +                                   uint64_t value, unsigned size)
> +{
> +    ipb_update(&tctx->regs[TM_QW1_OS], value & 0xff);
> +    xive_tctx_notify(tctx, TM_QW1_OS);
> +}
> +
>  /*
>   * Define a mapping of "special" operations depending on the TIMA page
>   * offset and the size of the operation.
> @@ -220,6 +298,7 @@ static const XiveTmOp xive_tm_operations[] = {
>  
>      /* MMIOs above 2K : special operations with side effects */
>      { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG,     2, NULL, xive_tm_ack_os_reg },
> +    { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending, NULL },
>  };
>  
>  static const XiveTmOp *xive_tm_find_op(hwaddr offset, unsigned size, bool write)
> @@ -409,6 +488,13 @@ static void xive_tctx_reset(void *dev)
>      tctx->regs[TM_QW1_OS + TM_LSMFB] = 0xFF;
>      tctx->regs[TM_QW1_OS + TM_ACK_CNT] = 0xFF;
>      tctx->regs[TM_QW1_OS + TM_AGE] = 0xFF;
> +
> +    /*
> +     * Initialize PIPR to 0xFF to avoid phantom interrupts when the
> +     * CPPR is first set.
> +     */
> +    tctx->regs[TM_QW1_OS + TM_PIPR] =
> +        ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
>  }
>  
>  static void xive_tctx_realize(DeviceState *dev, Error **errp)
> @@ -1218,9 +1304,15 @@ static void xive_presenter_notify(XiveRouter *xrtr, uint8_t format,
>      found = xive_presenter_match(xrtr, format, nvt_blk, nvt_idx, cam_ignore,
>                                   priority, logic_serv, &match);
>      if (found) {
> +        ipb_update(&match.tctx->regs[match.ring], priority);
> +        xive_tctx_notify(match.tctx, match.ring);
>          return;
>      }
>  
> +    /* Record the IPB in the associated NVT structure */
> +    ipb_update((uint8_t *) &nvt.w4, priority);
> +    xive_router_set_nvt(xrtr, nvt_blk, nvt_idx, &nvt);

You're only writing back the NVT in the !found case.  Don't you still
need to update it in the found case?

>      /* If no matching NVT is dispatched on a HW thread :
>       * - update the NVT structure if backlog is activated
>       * - escalate (ESe PQ bits and EAS in w4-5) if escalation is
Benjamin Herrenschmidt Nov. 28, 2018, 2:32 a.m. UTC | #2
On Wed, 2018-11-28 at 11:13 +1100, David Gibson wrote:
> Don't you need a cast to avoid (nsr << 8) being a shift-wider-than-size?

Shouldn't be a problem as long as it fits in an int, no ?

Cheers,
Ben.
David Gibson Nov. 28, 2018, 2:41 a.m. UTC | #3
On Wed, Nov 28, 2018 at 01:32:21PM +1100, Benjamin Herrenschmidt wrote:
> On Wed, 2018-11-28 at 11:13 +1100, David Gibson wrote:
> > Don't you need a cast to avoid (nsr << 8) being a shift-wider-than-size?
> 
> Shouldn't be a problem as long as it fits in an int, no ?

I dunno, I can never remember the rules about when C extends and when
it doesn't.  I'm not sure anybody does, which is kind of the point.
Eric Blake Nov. 28, 2018, 3 a.m. UTC | #4
On 11/27/18 8:41 PM, David Gibson wrote:
> On Wed, Nov 28, 2018 at 01:32:21PM +1100, Benjamin Herrenschmidt wrote:
>> On Wed, 2018-11-28 at 11:13 +1100, David Gibson wrote:
>>> Don't you need a cast to avoid (nsr << 8) being a shift-wider-than-size?
>>
>> Shouldn't be a problem as long as it fits in an int, no ?
> 
> I dunno, I can never remember the rules about when C extends and when
> it doesn't.  I'm not sure anybody does, which is kind of the point.

As soon as you perform arithmetic on a type narrower than int, it is 
first promoted to at least int (possibly wider, depending on the type of 
the other operand to the binary operator).  And, since C guarantees that 
int is larger than 8 bits, 'uint8_t << 8' and 'int8_t << 8' are both 
well-defined, without needing a cast of nsr.
Cédric Le Goater Nov. 28, 2018, 11:30 a.m. UTC | #5
On 11/28/18 1:13 AM, David Gibson wrote:
> On Fri, Nov 16, 2018 at 11:57:02AM +0100, Cédric Le Goater wrote:
>> After the event data was pushed in the O/S Event Queue, the IVPE
>> raises the bit corresponding to the priority of the pending interrupt
>> in the register IBP (Interrupt Pending Buffer) to indicate there is an
>> event pending in one of the 8 priority queues. The Pending Interrupt
>> Priority Register (PIPR) is also updated using the IPB. This register
>> represent the priority of the most favored pending notification.
>>
>> The PIPR is then compared to the the Current Processor Priority
>> Register (CPPR). If it is more favored (numerically less than), the
>> CPU interrupt line is raised and the EO bit of the Notification Source
>> Register (NSR) is updated to notify the presence of an exception for
>> the O/S. The check needs to be done whenever the PIPR or the CPPR are
>> changed.
>>
>> The O/S acknowledges the interrupt with a special load in the Thread
>> Interrupt Management Area. If the EO bit of the NSR is set, the CPPR
>> takes the value of PIPR. The bit number in the IBP corresponding to
>> the priority of the pending interrupt is reseted and so is the EO bit
>> of the NSR.
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>>  hw/intc/xive.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 93 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>> index 5ba3b06e6e25..c49932d2b799 100644
>> --- a/hw/intc/xive.c
>> +++ b/hw/intc/xive.c
>> @@ -21,9 +21,73 @@
>>   * XIVE Thread Interrupt Management context
>>   */
>>  
>> +/* Convert a priority number to an Interrupt Pending Buffer (IPB)
>> + * register, which indicates a pending interrupt at the priority
>> + * corresponding to the bit number
>> + */
>> +static uint8_t priority_to_ipb(uint8_t priority)
>> +{
>> +    return priority > XIVE_PRIORITY_MAX ?
>> +        0 : 1 << (XIVE_PRIORITY_MAX - priority);
>> +}
>> +
>> +/* Convert an Interrupt Pending Buffer (IPB) register to a Pending
>> + * Interrupt Priority Register (PIPR), which contains the priority of
>> + * the most favored pending notification.
>> + */
>> +static uint8_t ipb_to_pipr(uint8_t ibp)
>> +{
>> +    return ibp ? clz32((uint32_t)ibp << 24) : 0xff;
>> +}
>> +
>> +static void ipb_update(uint8_t *regs, uint8_t priority)
>> +{
>> +    regs[TM_IPB] |= priority_to_ipb(priority);
>> +    regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
>> +}
>> +
>> +static uint8_t exception_mask(uint8_t ring)
>> +{
>> +    switch (ring) {
>> +    case TM_QW1_OS:
>> +        return TM_QW1_NSR_EO;
>> +    default:
>> +        g_assert_not_reached();
>> +    }
>> +}
>> +
>>  static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
>>  {
>> -    return 0;
>> +    uint8_t *regs = &tctx->regs[ring];
>> +    uint8_t nsr = regs[TM_NSR];
>> +    uint8_t mask = exception_mask(ring);
>> +
>> +    qemu_irq_lower(tctx->output);
>> +
>> +    if (regs[TM_NSR] & mask) {
>> +        uint8_t cppr = regs[TM_PIPR];
>> +
>> +        regs[TM_CPPR] = cppr;
>> +
>> +        /* Reset the pending buffer bit */
>> +        regs[TM_IPB] &= ~priority_to_ipb(cppr);
>> +        regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
>> +
>> +        /* Drop Exception bit */
>> +        regs[TM_NSR] &= ~mask;
>> +    }
>> +
>> +    return (nsr << 8) | regs[TM_CPPR];
> 
> Don't you need a cast to avoid (nsr << 8) being a shift-wider-than-size?

I will check. 

> 
>> +}
>> +
>> +static void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring)
>> +{
>> +    uint8_t *regs = &tctx->regs[ring];
>> +
>> +    if (regs[TM_PIPR] < regs[TM_CPPR]) {
>> +        regs[TM_NSR] |= exception_mask(ring);
>> +        qemu_irq_raise(tctx->output);
>> +    }
>>  }
>>  
>>  static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
>> @@ -33,6 +97,9 @@ static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
>>      }
>>  
>>      tctx->regs[ring + TM_CPPR] = cppr;
>> +
>> +    /* CPPR has changed, check if we need to raise a pending exception */
>> +    xive_tctx_notify(tctx, ring);
>>  }
>>  
>>  /*
>> @@ -198,6 +265,17 @@ static void xive_tm_set_os_cppr(XiveTCTX *tctx, hwaddr offset,
>>      xive_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff);
>>  }
>>  
>> +/*
>> + * Adjust the IPB to allow a CPU to process event queues of other
>> + * priorities during one physical interrupt cycle.
>> + */
>> +static void xive_tm_set_os_pending(XiveTCTX *tctx, hwaddr offset,
>> +                                   uint64_t value, unsigned size)
>> +{
>> +    ipb_update(&tctx->regs[TM_QW1_OS], value & 0xff);
>> +    xive_tctx_notify(tctx, TM_QW1_OS);
>> +}
>> +
>>  /*
>>   * Define a mapping of "special" operations depending on the TIMA page
>>   * offset and the size of the operation.
>> @@ -220,6 +298,7 @@ static const XiveTmOp xive_tm_operations[] = {
>>  
>>      /* MMIOs above 2K : special operations with side effects */
>>      { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG,     2, NULL, xive_tm_ack_os_reg },
>> +    { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending, NULL },
>>  };
>>  
>>  static const XiveTmOp *xive_tm_find_op(hwaddr offset, unsigned size, bool write)
>> @@ -409,6 +488,13 @@ static void xive_tctx_reset(void *dev)
>>      tctx->regs[TM_QW1_OS + TM_LSMFB] = 0xFF;
>>      tctx->regs[TM_QW1_OS + TM_ACK_CNT] = 0xFF;
>>      tctx->regs[TM_QW1_OS + TM_AGE] = 0xFF;
>> +
>> +    /*
>> +     * Initialize PIPR to 0xFF to avoid phantom interrupts when the
>> +     * CPPR is first set.
>> +     */
>> +    tctx->regs[TM_QW1_OS + TM_PIPR] =
>> +        ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
>>  }
>>  
>>  static void xive_tctx_realize(DeviceState *dev, Error **errp)
>> @@ -1218,9 +1304,15 @@ static void xive_presenter_notify(XiveRouter *xrtr, uint8_t format,
>>      found = xive_presenter_match(xrtr, format, nvt_blk, nvt_idx, cam_ignore,
>>                                   priority, logic_serv, &match);
>>      if (found) {
>> +        ipb_update(&match.tctx->regs[match.ring], priority);
>> +        xive_tctx_notify(match.tctx, match.ring);
>>          return;
>>      }
>>  
>> +    /* Record the IPB in the associated NVT structure */
>> +    ipb_update((uint8_t *) &nvt.w4, priority);
>> +    xive_router_set_nvt(xrtr, nvt_blk, nvt_idx, &nvt);
> 
> You're only writing back the NVT in the !found case.  Don't you still
> need to update it in the found case?

I would say no unless we add support for redistribution which would
mean the model supports logical servers. 

These are much more complex scenarios in which the IPVE returns multiple 
matching targets, the IVRE selects one but then the context changes.
 

C.

> 
>>      /* If no matching NVT is dispatched on a HW thread :
>>       * - update the NVT structure if backlog is activated
>>       * - escalate (ESe PQ bits and EAS in w4-5) if escalation is
>
David Gibson Nov. 29, 2018, 12:49 a.m. UTC | #6
On Wed, Nov 28, 2018 at 12:30:45PM +0100, Cédric Le Goater wrote:
> On 11/28/18 1:13 AM, David Gibson wrote:
> > On Fri, Nov 16, 2018 at 11:57:02AM +0100, Cédric Le Goater wrote:
> >> After the event data was pushed in the O/S Event Queue, the IVPE
> >> raises the bit corresponding to the priority of the pending interrupt
> >> in the register IBP (Interrupt Pending Buffer) to indicate there is an
> >> event pending in one of the 8 priority queues. The Pending Interrupt
> >> Priority Register (PIPR) is also updated using the IPB. This register
> >> represent the priority of the most favored pending notification.
> >>
> >> The PIPR is then compared to the the Current Processor Priority
> >> Register (CPPR). If it is more favored (numerically less than), the
> >> CPU interrupt line is raised and the EO bit of the Notification Source
> >> Register (NSR) is updated to notify the presence of an exception for
> >> the O/S. The check needs to be done whenever the PIPR or the CPPR are
> >> changed.
> >>
> >> The O/S acknowledges the interrupt with a special load in the Thread
> >> Interrupt Management Area. If the EO bit of the NSR is set, the CPPR
> >> takes the value of PIPR. The bit number in the IBP corresponding to
> >> the priority of the pending interrupt is reseted and so is the EO bit
> >> of the NSR.
> >>
> >> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >> ---
> >>  hw/intc/xive.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++-
> >>  1 file changed, 93 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> >> index 5ba3b06e6e25..c49932d2b799 100644
> >> --- a/hw/intc/xive.c
> >> +++ b/hw/intc/xive.c
> >> @@ -21,9 +21,73 @@
> >>   * XIVE Thread Interrupt Management context
> >>   */
> >>  
> >> +/* Convert a priority number to an Interrupt Pending Buffer (IPB)
> >> + * register, which indicates a pending interrupt at the priority
> >> + * corresponding to the bit number
> >> + */
> >> +static uint8_t priority_to_ipb(uint8_t priority)
> >> +{
> >> +    return priority > XIVE_PRIORITY_MAX ?
> >> +        0 : 1 << (XIVE_PRIORITY_MAX - priority);
> >> +}
> >> +
> >> +/* Convert an Interrupt Pending Buffer (IPB) register to a Pending
> >> + * Interrupt Priority Register (PIPR), which contains the priority of
> >> + * the most favored pending notification.
> >> + */
> >> +static uint8_t ipb_to_pipr(uint8_t ibp)
> >> +{
> >> +    return ibp ? clz32((uint32_t)ibp << 24) : 0xff;
> >> +}
> >> +
> >> +static void ipb_update(uint8_t *regs, uint8_t priority)
> >> +{
> >> +    regs[TM_IPB] |= priority_to_ipb(priority);
> >> +    regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
> >> +}
> >> +
> >> +static uint8_t exception_mask(uint8_t ring)
> >> +{
> >> +    switch (ring) {
> >> +    case TM_QW1_OS:
> >> +        return TM_QW1_NSR_EO;
> >> +    default:
> >> +        g_assert_not_reached();
> >> +    }
> >> +}
> >> +
> >>  static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
> >>  {
> >> -    return 0;
> >> +    uint8_t *regs = &tctx->regs[ring];
> >> +    uint8_t nsr = regs[TM_NSR];
> >> +    uint8_t mask = exception_mask(ring);
> >> +
> >> +    qemu_irq_lower(tctx->output);
> >> +
> >> +    if (regs[TM_NSR] & mask) {
> >> +        uint8_t cppr = regs[TM_PIPR];
> >> +
> >> +        regs[TM_CPPR] = cppr;
> >> +
> >> +        /* Reset the pending buffer bit */
> >> +        regs[TM_IPB] &= ~priority_to_ipb(cppr);
> >> +        regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
> >> +
> >> +        /* Drop Exception bit */
> >> +        regs[TM_NSR] &= ~mask;
> >> +    }
> >> +
> >> +    return (nsr << 8) | regs[TM_CPPR];
> > 
> > Don't you need a cast to avoid (nsr << 8) being a shift-wider-than-size?
> 
> I will check.

According to Eric, it doesn't, and given the compiler isn't
complaining I'm pretty sure that's right.  Makes me a bit nervous
though.

> 
> > 
> >> +}
> >> +
> >> +static void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring)
> >> +{
> >> +    uint8_t *regs = &tctx->regs[ring];
> >> +
> >> +    if (regs[TM_PIPR] < regs[TM_CPPR]) {
> >> +        regs[TM_NSR] |= exception_mask(ring);
> >> +        qemu_irq_raise(tctx->output);
> >> +    }
> >>  }
> >>  
> >>  static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
> >> @@ -33,6 +97,9 @@ static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
> >>      }
> >>  
> >>      tctx->regs[ring + TM_CPPR] = cppr;
> >> +
> >> +    /* CPPR has changed, check if we need to raise a pending exception */
> >> +    xive_tctx_notify(tctx, ring);
> >>  }
> >>  
> >>  /*
> >> @@ -198,6 +265,17 @@ static void xive_tm_set_os_cppr(XiveTCTX *tctx, hwaddr offset,
> >>      xive_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff);
> >>  }
> >>  
> >> +/*
> >> + * Adjust the IPB to allow a CPU to process event queues of other
> >> + * priorities during one physical interrupt cycle.
> >> + */
> >> +static void xive_tm_set_os_pending(XiveTCTX *tctx, hwaddr offset,
> >> +                                   uint64_t value, unsigned size)
> >> +{
> >> +    ipb_update(&tctx->regs[TM_QW1_OS], value & 0xff);
> >> +    xive_tctx_notify(tctx, TM_QW1_OS);
> >> +}
> >> +
> >>  /*
> >>   * Define a mapping of "special" operations depending on the TIMA page
> >>   * offset and the size of the operation.
> >> @@ -220,6 +298,7 @@ static const XiveTmOp xive_tm_operations[] = {
> >>  
> >>      /* MMIOs above 2K : special operations with side effects */
> >>      { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG,     2, NULL, xive_tm_ack_os_reg },
> >> +    { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending, NULL },
> >>  };
> >>  
> >>  static const XiveTmOp *xive_tm_find_op(hwaddr offset, unsigned size, bool write)
> >> @@ -409,6 +488,13 @@ static void xive_tctx_reset(void *dev)
> >>      tctx->regs[TM_QW1_OS + TM_LSMFB] = 0xFF;
> >>      tctx->regs[TM_QW1_OS + TM_ACK_CNT] = 0xFF;
> >>      tctx->regs[TM_QW1_OS + TM_AGE] = 0xFF;
> >> +
> >> +    /*
> >> +     * Initialize PIPR to 0xFF to avoid phantom interrupts when the
> >> +     * CPPR is first set.
> >> +     */
> >> +    tctx->regs[TM_QW1_OS + TM_PIPR] =
> >> +        ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
> >>  }
> >>  
> >>  static void xive_tctx_realize(DeviceState *dev, Error **errp)
> >> @@ -1218,9 +1304,15 @@ static void xive_presenter_notify(XiveRouter *xrtr, uint8_t format,
> >>      found = xive_presenter_match(xrtr, format, nvt_blk, nvt_idx, cam_ignore,
> >>                                   priority, logic_serv, &match);
> >>      if (found) {
> >> +        ipb_update(&match.tctx->regs[match.ring], priority);
> >> +        xive_tctx_notify(match.tctx, match.ring);
> >>          return;
> >>      }
> >>  
> >> +    /* Record the IPB in the associated NVT structure */
> >> +    ipb_update((uint8_t *) &nvt.w4, priority);
> >> +    xive_router_set_nvt(xrtr, nvt_blk, nvt_idx, &nvt);
> > 
> > You're only writing back the NVT in the !found case.  Don't you still
> > need to update it in the found case?
> 
> I would say no unless we add support for redistribution which would
> mean the model supports logical servers. 

Oh, sorry, I think I missed that the ipb_update() was only touching
the NVT in the !found case.

> These are much more complex scenarios in which the IPVE returns multiple 
> matching targets, the IVRE selects one but then the context changes.
>  
> 
> C.
> 
> > 
> >>      /* If no matching NVT is dispatched on a HW thread :
> >>       * - update the NVT structure if backlog is activated
> >>       * - escalate (ESe PQ bits and EAS in w4-5) if escalation is
> > 
>
diff mbox series

Patch

diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index 5ba3b06e6e25..c49932d2b799 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -21,9 +21,73 @@ 
  * XIVE Thread Interrupt Management context
  */
 
+/* Convert a priority number to an Interrupt Pending Buffer (IPB)
+ * register, which indicates a pending interrupt at the priority
+ * corresponding to the bit number
+ */
+static uint8_t priority_to_ipb(uint8_t priority)
+{
+    return priority > XIVE_PRIORITY_MAX ?
+        0 : 1 << (XIVE_PRIORITY_MAX - priority);
+}
+
+/* Convert an Interrupt Pending Buffer (IPB) register to a Pending
+ * Interrupt Priority Register (PIPR), which contains the priority of
+ * the most favored pending notification.
+ */
+static uint8_t ipb_to_pipr(uint8_t ibp)
+{
+    return ibp ? clz32((uint32_t)ibp << 24) : 0xff;
+}
+
+static void ipb_update(uint8_t *regs, uint8_t priority)
+{
+    regs[TM_IPB] |= priority_to_ipb(priority);
+    regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
+}
+
+static uint8_t exception_mask(uint8_t ring)
+{
+    switch (ring) {
+    case TM_QW1_OS:
+        return TM_QW1_NSR_EO;
+    default:
+        g_assert_not_reached();
+    }
+}
+
 static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
 {
-    return 0;
+    uint8_t *regs = &tctx->regs[ring];
+    uint8_t nsr = regs[TM_NSR];
+    uint8_t mask = exception_mask(ring);
+
+    qemu_irq_lower(tctx->output);
+
+    if (regs[TM_NSR] & mask) {
+        uint8_t cppr = regs[TM_PIPR];
+
+        regs[TM_CPPR] = cppr;
+
+        /* Reset the pending buffer bit */
+        regs[TM_IPB] &= ~priority_to_ipb(cppr);
+        regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
+
+        /* Drop Exception bit */
+        regs[TM_NSR] &= ~mask;
+    }
+
+    return (nsr << 8) | regs[TM_CPPR];
+}
+
+static void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring)
+{
+    uint8_t *regs = &tctx->regs[ring];
+
+    if (regs[TM_PIPR] < regs[TM_CPPR]) {
+        regs[TM_NSR] |= exception_mask(ring);
+        qemu_irq_raise(tctx->output);
+    }
 }
 
 static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
@@ -33,6 +97,9 @@  static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
     }
 
     tctx->regs[ring + TM_CPPR] = cppr;
+
+    /* CPPR has changed, check if we need to raise a pending exception */
+    xive_tctx_notify(tctx, ring);
 }
 
 /*
@@ -198,6 +265,17 @@  static void xive_tm_set_os_cppr(XiveTCTX *tctx, hwaddr offset,
     xive_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff);
 }
 
+/*
+ * Adjust the IPB to allow a CPU to process event queues of other
+ * priorities during one physical interrupt cycle.
+ */
+static void xive_tm_set_os_pending(XiveTCTX *tctx, hwaddr offset,
+                                   uint64_t value, unsigned size)
+{
+    ipb_update(&tctx->regs[TM_QW1_OS], value & 0xff);
+    xive_tctx_notify(tctx, TM_QW1_OS);
+}
+
 /*
  * Define a mapping of "special" operations depending on the TIMA page
  * offset and the size of the operation.
@@ -220,6 +298,7 @@  static const XiveTmOp xive_tm_operations[] = {
 
     /* MMIOs above 2K : special operations with side effects */
     { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG,     2, NULL, xive_tm_ack_os_reg },
+    { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending, NULL },
 };
 
 static const XiveTmOp *xive_tm_find_op(hwaddr offset, unsigned size, bool write)
@@ -409,6 +488,13 @@  static void xive_tctx_reset(void *dev)
     tctx->regs[TM_QW1_OS + TM_LSMFB] = 0xFF;
     tctx->regs[TM_QW1_OS + TM_ACK_CNT] = 0xFF;
     tctx->regs[TM_QW1_OS + TM_AGE] = 0xFF;
+
+    /*
+     * Initialize PIPR to 0xFF to avoid phantom interrupts when the
+     * CPPR is first set.
+     */
+    tctx->regs[TM_QW1_OS + TM_PIPR] =
+        ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
 }
 
 static void xive_tctx_realize(DeviceState *dev, Error **errp)
@@ -1218,9 +1304,15 @@  static void xive_presenter_notify(XiveRouter *xrtr, uint8_t format,
     found = xive_presenter_match(xrtr, format, nvt_blk, nvt_idx, cam_ignore,
                                  priority, logic_serv, &match);
     if (found) {
+        ipb_update(&match.tctx->regs[match.ring], priority);
+        xive_tctx_notify(match.tctx, match.ring);
         return;
     }
 
+    /* Record the IPB in the associated NVT structure */
+    ipb_update((uint8_t *) &nvt.w4, priority);
+    xive_router_set_nvt(xrtr, nvt_blk, nvt_idx, &nvt);
+
     /* If no matching NVT is dispatched on a HW thread :
      * - update the NVT structure if backlog is activated
      * - escalate (ESe PQ bits and EAS in w4-5) if escalation is