diff mbox series

[for-6.0,4/8] spapr/xive: Add "nr-ipis" property

Message ID 20201120174646.619395-5-groug@kaod.org
State New
Headers show
Series spapr: Address the confusion between IPI numbers and vCPU ids | expand

Commit Message

Greg Kurz Nov. 20, 2020, 5:46 p.m. UTC
The sPAPR XIVE device exposes a range of LISNs that the guest uses
for IPIs. This range is currently sized according to the highest
vCPU id, ie. spapr_max_server_number(), as obtained from the machine
through the "nr_servers" argument of the generic spapr_irq_dt() call.

This makes sense for the "ibm,interrupt-server-ranges" property of
sPAPR XICS, but certainly not for "ibm,xive-lisn-ranges". The range
should be sized to the maximum number of possible vCPUs. It happens
that spapr_max_server_number() == smp.max_cpus in practice with the
machine default settings. This might not be the case though when
VSMT is in use : we can end up with a much larger range (up to 8
times bigger) than needed and waste LISNs. But most importantly, this
lures people into thinking that IPI numbers are always equal to
vCPU ids, which is wrong and bit us recently:

https://lists.gnu.org/archive/html/qemu-devel/2020-11/msg01476.html

Introduce an "nr-ipis" property that the machine sets to smp.max_cpus
before realizing the deice. Use that instead of "nr_servers" in
spapr_xive_dt(). Have the machine to claim the same number of IPIs
in spapr_irq_init().

This doesn't cause any guest visible change when using the machine
default settings (ie. VSMT == smp.threads).

Signed-off-by: Greg Kurz <groug@kaod.org>
---
 include/hw/ppc/spapr_xive.h | 8 ++++++++
 hw/intc/spapr_xive.c        | 4 +++-
 hw/ppc/spapr_irq.c          | 4 +++-
 3 files changed, 14 insertions(+), 2 deletions(-)

Comments

David Gibson Nov. 23, 2020, 4:10 a.m. UTC | #1
On Fri, Nov 20, 2020 at 06:46:42PM +0100, Greg Kurz wrote:
> The sPAPR XIVE device exposes a range of LISNs that the guest uses
> for IPIs. This range is currently sized according to the highest
> vCPU id, ie. spapr_max_server_number(), as obtained from the machine
> through the "nr_servers" argument of the generic spapr_irq_dt() call.
> 
> This makes sense for the "ibm,interrupt-server-ranges" property of
> sPAPR XICS, but certainly not for "ibm,xive-lisn-ranges". The range
> should be sized to the maximum number of possible vCPUs. It happens
> that spapr_max_server_number() == smp.max_cpus in practice with the
> machine default settings. This might not be the case though when
> VSMT is in use : we can end up with a much larger range (up to 8
> times bigger) than needed and waste LISNs. But most importantly, this
> lures people into thinking that IPI numbers are always equal to
> vCPU ids, which is wrong and bit us recently:
> 
> https://lists.gnu.org/archive/html/qemu-devel/2020-11/msg01476.html
> 
> Introduce an "nr-ipis" property that the machine sets to smp.max_cpus
> before realizing the deice. Use that instead of "nr_servers" in
> spapr_xive_dt(). Have the machine to claim the same number of IPIs
> in spapr_irq_init().
> 
> This doesn't cause any guest visible change when using the machine
> default settings (ie. VSMT == smp.threads).
> 
> Signed-off-by: Greg Kurz <groug@kaod.org>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  include/hw/ppc/spapr_xive.h | 8 ++++++++
>  hw/intc/spapr_xive.c        | 4 +++-
>  hw/ppc/spapr_irq.c          | 4 +++-
>  3 files changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> index 7123ea07ed78..69b9fbc1b9a5 100644
> --- a/include/hw/ppc/spapr_xive.h
> +++ b/include/hw/ppc/spapr_xive.h
> @@ -43,6 +43,14 @@ typedef struct SpaprXive {
>  
>      /* DT */
>      gchar *nodename;
> +    /*
> +     * The sPAPR XIVE device needs to know how many vCPUs it
> +     * might be exposed to in order to size the IPI range in
> +     * "ibm,interrupt-server-ranges". It is the purpose of the
> +     * "nr-ipis" property which *must* be set to a non-null
> +     * value before realizing the sPAPR XIVE device.
> +     */
> +    uint32_t nr_ipis;
>  
>      /* Routing table */
>      XiveEAS       *eat;
> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> index e4dbf9c2c409..d13a2955ce9b 100644
> --- a/hw/intc/spapr_xive.c
> +++ b/hw/intc/spapr_xive.c
> @@ -311,6 +311,7 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp)
>      /* Set by spapr_irq_init() */
>      g_assert(xive->nr_irqs);
>      g_assert(xive->nr_servers);
> +    g_assert(xive->nr_ipis);
>  
>      sxc->parent_realize(dev, &local_err);
>      if (local_err) {
> @@ -608,6 +609,7 @@ static Property spapr_xive_properties[] = {
>       */
>      DEFINE_PROP_UINT32("nr-ends", SpaprXive, nr_ends_vmstate, 0),
>      DEFINE_PROP_UINT32("nr-servers", SpaprXive, nr_servers, 0),
> +    DEFINE_PROP_UINT32("nr-ipis", SpaprXive, nr_ipis, 0),
>      DEFINE_PROP_UINT64("vc-base", SpaprXive, vc_base, SPAPR_XIVE_VC_BASE),
>      DEFINE_PROP_UINT64("tm-base", SpaprXive, tm_base, SPAPR_XIVE_TM_BASE),
>      DEFINE_PROP_UINT8("hv-prio", SpaprXive, hv_prio, 7),
> @@ -698,7 +700,7 @@ static void spapr_xive_dt(SpaprInterruptController *intc, uint32_t nr_servers,
>      /* Interrupt number ranges for the IPIs */
>      uint32_t lisn_ranges[] = {
>          cpu_to_be32(SPAPR_IRQ_IPI),
> -        cpu_to_be32(SPAPR_IRQ_IPI + nr_servers),
> +        cpu_to_be32(SPAPR_IRQ_IPI + xive->nr_ipis),
>      };
>      /*
>       * EQ size - the sizes of pages supported by the system 4K, 64K,
> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> index 8c5627225636..a0fc474ecb06 100644
> --- a/hw/ppc/spapr_irq.c
> +++ b/hw/ppc/spapr_irq.c
> @@ -325,12 +325,14 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
>  
>      if (spapr->irq->xive) {
>          uint32_t nr_servers = spapr_max_server_number(spapr);
> +        uint32_t max_cpus = MACHINE(spapr)->smp.max_cpus;
>          DeviceState *dev;
>          int i;
>  
>          dev = qdev_new(TYPE_SPAPR_XIVE);
>          qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_XIRQ_BASE);
>          qdev_prop_set_uint32(dev, "nr-servers", nr_servers);
> +        qdev_prop_set_uint32(dev, "nr-ipis", max_cpus);
>          object_property_set_link(OBJECT(dev), "xive-fabric", OBJECT(spapr),
>                                   &error_abort);
>          sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
> @@ -338,7 +340,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
>          spapr->xive = SPAPR_XIVE(dev);
>  
>          /* Enable the CPU IPIs */
> -        for (i = 0; i < nr_servers; ++i) {
> +        for (i = 0; i < max_cpus; ++i) {
>              SpaprInterruptControllerClass *sicc
>                  = SPAPR_INTC_GET_CLASS(spapr->xive);
>
C├ędric Le Goater Nov. 23, 2020, 10:13 a.m. UTC | #2
On 11/20/20 6:46 PM, Greg Kurz wrote:
> The sPAPR XIVE device exposes a range of LISNs that the guest uses
> for IPIs. This range is currently sized according to the highest
> vCPU id, ie. spapr_max_server_number(), as obtained from the machine
> through the "nr_servers" argument of the generic spapr_irq_dt() call.
> 
> This makes sense for the "ibm,interrupt-server-ranges" property of
> sPAPR XICS, but certainly not for "ibm,xive-lisn-ranges". The range
> should be sized to the maximum number of possible vCPUs. It happens
> that spapr_max_server_number() == smp.max_cpus in practice with the
> machine default settings. This might not be the case though when
> VSMT is in use : we can end up with a much larger range (up to 8
> times bigger) than needed and waste LISNs. 

will it exceed 4K ? if not, we are fine.

The fact that it is so complex to get the number of vCPUs is a 
problem by it self to me. Can we simplify that ? or introduce a 
spapr_max_server_number_id() ? 

> But most importantly, this
> lures people into thinking that IPI numbers are always equal to
> vCPU ids, which is wrong and bit us recently:

do we have a converting routing vcpu_id_to_ipi ? I think we have
that in KVM.

> https://lists.gnu.org/archive/html/qemu-devel/2020-11/msg01476.html
> 
> Introduce an "nr-ipis" property that the machine sets to smp.max_cpus
> before realizing the deice. Use that instead of "nr_servers" in
> spapr_xive_dt(). Have the machine to claim the same number of IPIs
> in spapr_irq_init().
> 
> This doesn't cause any guest visible change when using the machine
> default settings (ie. VSMT == smp.threads).>
> Signed-off-by: Greg Kurz <groug@kaod.org>
> ---
>  include/hw/ppc/spapr_xive.h | 8 ++++++++
>  hw/intc/spapr_xive.c        | 4 +++-
>  hw/ppc/spapr_irq.c          | 4 +++-
>  3 files changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> index 7123ea07ed78..69b9fbc1b9a5 100644
> --- a/include/hw/ppc/spapr_xive.h
> +++ b/include/hw/ppc/spapr_xive.h
> @@ -43,6 +43,14 @@ typedef struct SpaprXive {
>  
>      /* DT */
>      gchar *nodename;
> +    /*
> +     * The sPAPR XIVE device needs to know how many vCPUs it
> +     * might be exposed to in order to size the IPI range in
> +     * "ibm,interrupt-server-ranges". It is the purpose of the

There is no "ibm,interrupt-server-ranges"  property in XIVE

> +     * "nr-ipis" property which *must* be set to a non-null
> +     * value before realizing the sPAPR XIVE device.
> +     */
> +    uint32_t nr_ipis;
>  
>      /* Routing table */
>      XiveEAS       *eat;
> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> index e4dbf9c2c409..d13a2955ce9b 100644
> --- a/hw/intc/spapr_xive.c
> +++ b/hw/intc/spapr_xive.c
> @@ -311,6 +311,7 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp)
>      /* Set by spapr_irq_init() */
>      g_assert(xive->nr_irqs);
>      g_assert(xive->nr_servers);
> +    g_assert(xive->nr_ipis);
>  
>      sxc->parent_realize(dev, &local_err);
>      if (local_err) {
> @@ -608,6 +609,7 @@ static Property spapr_xive_properties[] = {
>       */
>      DEFINE_PROP_UINT32("nr-ends", SpaprXive, nr_ends_vmstate, 0),
>      DEFINE_PROP_UINT32("nr-servers", SpaprXive, nr_servers, 0),
> +    DEFINE_PROP_UINT32("nr-ipis", SpaprXive, nr_ipis, 0),
>      DEFINE_PROP_UINT64("vc-base", SpaprXive, vc_base, SPAPR_XIVE_VC_BASE),
>      DEFINE_PROP_UINT64("tm-base", SpaprXive, tm_base, SPAPR_XIVE_TM_BASE),
>      DEFINE_PROP_UINT8("hv-prio", SpaprXive, hv_prio, 7),
> @@ -698,7 +700,7 @@ static void spapr_xive_dt(SpaprInterruptController *intc, uint32_t nr_servers,
>      /* Interrupt number ranges for the IPIs */
>      uint32_t lisn_ranges[] = {
>          cpu_to_be32(SPAPR_IRQ_IPI),
> -        cpu_to_be32(SPAPR_IRQ_IPI + nr_servers),
> +        cpu_to_be32(SPAPR_IRQ_IPI + xive->nr_ipis),

I don't understand why we need nr_ipis. Can't we pass the same value in 
nr_servers from the machine ?

( Conceptually, the first 4K are all IPIs. The first range is for 
  HW threads ) 


>      };
>      /*
>       * EQ size - the sizes of pages supported by the system 4K, 64K,
> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> index 8c5627225636..a0fc474ecb06 100644
> --- a/hw/ppc/spapr_irq.c
> +++ b/hw/ppc/spapr_irq.c
> @@ -325,12 +325,14 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
>  
>      if (spapr->irq->xive) {
>          uint32_t nr_servers = spapr_max_server_number(spapr);
> +        uint32_t max_cpus = MACHINE(spapr)->smp.max_cpus;

So that's the value we should be using in case of VSMT and not 
spapr_max_server_number(). If I am correct, this is a max_cpu_id ?


>          DeviceState *dev;
>          int i;
>  
>          dev = qdev_new(TYPE_SPAPR_XIVE);
>          qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_XIRQ_BASE);
>          qdev_prop_set_uint32(dev, "nr-servers", nr_servers);
> +        qdev_prop_set_uint32(dev, "nr-ipis", max_cpus);
>          object_property_set_link(OBJECT(dev), "xive-fabric", OBJECT(spapr),
>                                   &error_abort);
>          sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
> @@ -338,7 +340,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
>          spapr->xive = SPAPR_XIVE(dev);
>  
>          /* Enable the CPU IPIs */
> -        for (i = 0; i < nr_servers; ++i) {
> +        for (i = 0; i < max_cpus; ++i) {
>              SpaprInterruptControllerClass *sicc
>                  = SPAPR_INTC_GET_CLASS(spapr->xive);
diff mbox series

Patch

diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
index 7123ea07ed78..69b9fbc1b9a5 100644
--- a/include/hw/ppc/spapr_xive.h
+++ b/include/hw/ppc/spapr_xive.h
@@ -43,6 +43,14 @@  typedef struct SpaprXive {
 
     /* DT */
     gchar *nodename;
+    /*
+     * The sPAPR XIVE device needs to know how many vCPUs it
+     * might be exposed to in order to size the IPI range in
+     * "ibm,interrupt-server-ranges". It is the purpose of the
+     * "nr-ipis" property which *must* be set to a non-null
+     * value before realizing the sPAPR XIVE device.
+     */
+    uint32_t nr_ipis;
 
     /* Routing table */
     XiveEAS       *eat;
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
index e4dbf9c2c409..d13a2955ce9b 100644
--- a/hw/intc/spapr_xive.c
+++ b/hw/intc/spapr_xive.c
@@ -311,6 +311,7 @@  static void spapr_xive_realize(DeviceState *dev, Error **errp)
     /* Set by spapr_irq_init() */
     g_assert(xive->nr_irqs);
     g_assert(xive->nr_servers);
+    g_assert(xive->nr_ipis);
 
     sxc->parent_realize(dev, &local_err);
     if (local_err) {
@@ -608,6 +609,7 @@  static Property spapr_xive_properties[] = {
      */
     DEFINE_PROP_UINT32("nr-ends", SpaprXive, nr_ends_vmstate, 0),
     DEFINE_PROP_UINT32("nr-servers", SpaprXive, nr_servers, 0),
+    DEFINE_PROP_UINT32("nr-ipis", SpaprXive, nr_ipis, 0),
     DEFINE_PROP_UINT64("vc-base", SpaprXive, vc_base, SPAPR_XIVE_VC_BASE),
     DEFINE_PROP_UINT64("tm-base", SpaprXive, tm_base, SPAPR_XIVE_TM_BASE),
     DEFINE_PROP_UINT8("hv-prio", SpaprXive, hv_prio, 7),
@@ -698,7 +700,7 @@  static void spapr_xive_dt(SpaprInterruptController *intc, uint32_t nr_servers,
     /* Interrupt number ranges for the IPIs */
     uint32_t lisn_ranges[] = {
         cpu_to_be32(SPAPR_IRQ_IPI),
-        cpu_to_be32(SPAPR_IRQ_IPI + nr_servers),
+        cpu_to_be32(SPAPR_IRQ_IPI + xive->nr_ipis),
     };
     /*
      * EQ size - the sizes of pages supported by the system 4K, 64K,
diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
index 8c5627225636..a0fc474ecb06 100644
--- a/hw/ppc/spapr_irq.c
+++ b/hw/ppc/spapr_irq.c
@@ -325,12 +325,14 @@  void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
 
     if (spapr->irq->xive) {
         uint32_t nr_servers = spapr_max_server_number(spapr);
+        uint32_t max_cpus = MACHINE(spapr)->smp.max_cpus;
         DeviceState *dev;
         int i;
 
         dev = qdev_new(TYPE_SPAPR_XIVE);
         qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_XIRQ_BASE);
         qdev_prop_set_uint32(dev, "nr-servers", nr_servers);
+        qdev_prop_set_uint32(dev, "nr-ipis", max_cpus);
         object_property_set_link(OBJECT(dev), "xive-fabric", OBJECT(spapr),
                                  &error_abort);
         sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
@@ -338,7 +340,7 @@  void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
         spapr->xive = SPAPR_XIVE(dev);
 
         /* Enable the CPU IPIs */
-        for (i = 0; i < nr_servers; ++i) {
+        for (i = 0; i < max_cpus; ++i) {
             SpaprInterruptControllerClass *sicc
                 = SPAPR_INTC_GET_CLASS(spapr->xive);