diff mbox series

[for-6.0,2/8] spapr/xive: Introduce spapr_xive_nr_ends()

Message ID 20201120174646.619395-3-groug@kaod.org
State New
Headers show
Series spapr: Address the confusion between IPI numbers and vCPU ids | expand

Commit Message

Greg Kurz Nov. 20, 2020, 5:46 p.m. UTC
We're going to kill the "nr_ends" field in a subsequent patch.
Prepare ground by using an helper instead of peeking into
the sPAPR XIVE structure directly.

Signed-off-by: Greg Kurz <groug@kaod.org>
---
 include/hw/ppc/spapr_xive.h |  1 +
 hw/intc/spapr_xive.c        | 23 ++++++++++++++---------
 hw/intc/spapr_xive_kvm.c    |  4 ++--
 3 files changed, 17 insertions(+), 11 deletions(-)

Comments

David Gibson Nov. 23, 2020, 3:33 a.m. UTC | #1
On Fri, Nov 20, 2020 at 06:46:40PM +0100, Greg Kurz wrote:
> We're going to kill the "nr_ends" field in a subsequent patch.
> Prepare ground by using an helper instead of peeking into
> the sPAPR XIVE structure directly.
> 
> Signed-off-by: Greg Kurz <groug@kaod.org>

Applied to ppc-for-6.0, thanks.


> ---
>  include/hw/ppc/spapr_xive.h |  1 +
>  hw/intc/spapr_xive.c        | 23 ++++++++++++++---------
>  hw/intc/spapr_xive_kvm.c    |  4 ++--
>  3 files changed, 17 insertions(+), 11 deletions(-)
> 
> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> index 26c8d90d7196..4b967f13c030 100644
> --- a/include/hw/ppc/spapr_xive.h
> +++ b/include/hw/ppc/spapr_xive.h
> @@ -75,6 +75,7 @@ void spapr_xive_map_mmio(SpaprXive *xive);
>  
>  int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx,
>                               uint32_t *out_server, uint8_t *out_prio);
> +uint32_t spapr_xive_nr_ends(const SpaprXive *xive);
>  
>  /*
>   * KVM XIVE device helpers
> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> index 60e0d5769dcc..f473ad9cba47 100644
> --- a/hw/intc/spapr_xive.c
> +++ b/hw/intc/spapr_xive.c
> @@ -192,7 +192,7 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
>              uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
>              XiveEND *end;
>  
> -            assert(end_idx < xive->nr_ends);
> +            assert(end_idx < spapr_xive_nr_ends(xive));
>              end = &xive->endt[end_idx];
>  
>              if (xive_end_is_valid(end)) {
> @@ -270,7 +270,7 @@ static void spapr_xive_reset(void *dev)
>      }
>  
>      /* Clear all ENDs */
> -    for (i = 0; i < xive->nr_ends; i++) {
> +    for (i = 0; i < spapr_xive_nr_ends(xive); i++) {
>          spapr_xive_end_reset(&xive->endt[i]);
>      }
>  }
> @@ -288,6 +288,11 @@ static void spapr_xive_instance_init(Object *obj)
>      xive->fd = -1;
>  }
>  
> +uint32_t spapr_xive_nr_ends(const SpaprXive *xive)
> +{
> +    return xive->nr_ends;
> +}
> +
>  static void spapr_xive_realize(DeviceState *dev, Error **errp)
>  {
>      SpaprXive *xive = SPAPR_XIVE(dev);
> @@ -336,7 +341,7 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp)
>       * Allocate the routing tables
>       */
>      xive->eat = g_new0(XiveEAS, xive->nr_irqs);
> -    xive->endt = g_new0(XiveEND, xive->nr_ends);
> +    xive->endt = g_new0(XiveEND, spapr_xive_nr_ends(xive));
>  
>      xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
>                             xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
> @@ -375,7 +380,7 @@ static int spapr_xive_get_end(XiveRouter *xrtr,
>  {
>      SpaprXive *xive = SPAPR_XIVE(xrtr);
>  
> -    if (end_idx >= xive->nr_ends) {
> +    if (end_idx >= spapr_xive_nr_ends(xive)) {
>          return -1;
>      }
>  
> @@ -389,7 +394,7 @@ static int spapr_xive_write_end(XiveRouter *xrtr, uint8_t end_blk,
>  {
>      SpaprXive *xive = SPAPR_XIVE(xrtr);
>  
> -    if (end_idx >= xive->nr_ends) {
> +    if (end_idx >= spapr_xive_nr_ends(xive)) {
>          return -1;
>      }
>  
> @@ -1138,7 +1143,7 @@ static target_ulong h_int_get_source_config(PowerPCCPU *cpu,
>      /* EAS_END_BLOCK is unused on sPAPR */
>      end_idx = xive_get_field64(EAS_END_INDEX, eas.w);
>  
> -    assert(end_idx < xive->nr_ends);
> +    assert(end_idx < spapr_xive_nr_ends(xive));
>      end = &xive->endt[end_idx];
>  
>      nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end->w6);
> @@ -1216,7 +1221,7 @@ static target_ulong h_int_get_queue_info(PowerPCCPU *cpu,
>          return H_P2;
>      }
>  
> -    assert(end_idx < xive->nr_ends);
> +    assert(end_idx < spapr_xive_nr_ends(xive));
>      end = &xive->endt[end_idx];
>  
>      args[0] = xive->end_base + (1ull << (end_xsrc->esb_shift + 1)) * end_idx;
> @@ -1304,7 +1309,7 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
>          return H_P2;
>      }
>  
> -    assert(end_idx < xive->nr_ends);
> +    assert(end_idx < spapr_xive_nr_ends(xive));
>      memcpy(&end, &xive->endt[end_idx], sizeof(XiveEND));
>  
>      switch (qsize) {
> @@ -1470,7 +1475,7 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu,
>          return H_P2;
>      }
>  
> -    assert(end_idx < xive->nr_ends);
> +    assert(end_idx < spapr_xive_nr_ends(xive));
>      end = &xive->endt[end_idx];
>  
>      args[0] = 0;
> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> index 66bf4c06fe55..1566016f0e28 100644
> --- a/hw/intc/spapr_xive_kvm.c
> +++ b/hw/intc/spapr_xive_kvm.c
> @@ -531,7 +531,7 @@ static int kvmppc_xive_get_queues(SpaprXive *xive, Error **errp)
>      int i;
>      int ret;
>  
> -    for (i = 0; i < xive->nr_ends; i++) {
> +    for (i = 0; i < spapr_xive_nr_ends(xive); i++) {
>          if (!xive_end_is_valid(&xive->endt[i])) {
>              continue;
>          }
> @@ -701,7 +701,7 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
>      assert(xive->fd != -1);
>  
>      /* Restore the ENDT first. The targetting depends on it. */
> -    for (i = 0; i < xive->nr_ends; i++) {
> +    for (i = 0; i < spapr_xive_nr_ends(xive); i++) {
>          if (!xive_end_is_valid(&xive->endt[i])) {
>              continue;
>          }
Cédric Le Goater Nov. 23, 2020, 9:46 a.m. UTC | #2
On 11/20/20 6:46 PM, Greg Kurz wrote:
> We're going to kill the "nr_ends" field in a subsequent patch.

why ? it is one of the tables of the controller and its part of 
the main XIVE concepts. Conceptually, we could let the machine 
dimension it with an arbitrary value as OPAL does. The controller
would fail when the table is fully used. 

 
> Prepare ground by using an helper instead of peeking into
> the sPAPR XIVE structure directly.


I am not against the helper though but we should introduce a 
prio_shift value which would let us define the number of 
available priorities. To be linked with "hv-prio"

C.


> Signed-off-by: Greg Kurz <groug@kaod.org>
> ---
>  include/hw/ppc/spapr_xive.h |  1 +
>  hw/intc/spapr_xive.c        | 23 ++++++++++++++---------
>  hw/intc/spapr_xive_kvm.c    |  4 ++--
>  3 files changed, 17 insertions(+), 11 deletions(-)
> 
> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> index 26c8d90d7196..4b967f13c030 100644
> --- a/include/hw/ppc/spapr_xive.h
> +++ b/include/hw/ppc/spapr_xive.h
> @@ -75,6 +75,7 @@ void spapr_xive_map_mmio(SpaprXive *xive);
>  
>  int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx,
>                               uint32_t *out_server, uint8_t *out_prio);
> +uint32_t spapr_xive_nr_ends(const SpaprXive *xive);
>  
>  /*
>   * KVM XIVE device helpers
> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> index 60e0d5769dcc..f473ad9cba47 100644
> --- a/hw/intc/spapr_xive.c
> +++ b/hw/intc/spapr_xive.c
> @@ -192,7 +192,7 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
>              uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
>              XiveEND *end;
>  
> -            assert(end_idx < xive->nr_ends);
> +            assert(end_idx < spapr_xive_nr_ends(xive));
>              end = &xive->endt[end_idx];
>  
>              if (xive_end_is_valid(end)) {
> @@ -270,7 +270,7 @@ static void spapr_xive_reset(void *dev)
>      }
>  
>      /* Clear all ENDs */
> -    for (i = 0; i < xive->nr_ends; i++) {
> +    for (i = 0; i < spapr_xive_nr_ends(xive); i++) {
>          spapr_xive_end_reset(&xive->endt[i]);
>      }
>  }
> @@ -288,6 +288,11 @@ static void spapr_xive_instance_init(Object *obj)
>      xive->fd = -1;
>  }
>  
> +uint32_t spapr_xive_nr_ends(const SpaprXive *xive)
> +{
> +    return xive->nr_ends;
> +}
> +
>  static void spapr_xive_realize(DeviceState *dev, Error **errp)
>  {
>      SpaprXive *xive = SPAPR_XIVE(dev);
> @@ -336,7 +341,7 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp)
>       * Allocate the routing tables
>       */
>      xive->eat = g_new0(XiveEAS, xive->nr_irqs);
> -    xive->endt = g_new0(XiveEND, xive->nr_ends);
> +    xive->endt = g_new0(XiveEND, spapr_xive_nr_ends(xive));
>  
>      xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
>                             xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
> @@ -375,7 +380,7 @@ static int spapr_xive_get_end(XiveRouter *xrtr,
>  {
>      SpaprXive *xive = SPAPR_XIVE(xrtr);
>  
> -    if (end_idx >= xive->nr_ends) {
> +    if (end_idx >= spapr_xive_nr_ends(xive)) {
>          return -1;
>      }
>  
> @@ -389,7 +394,7 @@ static int spapr_xive_write_end(XiveRouter *xrtr, uint8_t end_blk,
>  {
>      SpaprXive *xive = SPAPR_XIVE(xrtr);
>  
> -    if (end_idx >= xive->nr_ends) {
> +    if (end_idx >= spapr_xive_nr_ends(xive)) {
>          return -1;
>      }
>  
> @@ -1138,7 +1143,7 @@ static target_ulong h_int_get_source_config(PowerPCCPU *cpu,
>      /* EAS_END_BLOCK is unused on sPAPR */
>      end_idx = xive_get_field64(EAS_END_INDEX, eas.w);
>  
> -    assert(end_idx < xive->nr_ends);
> +    assert(end_idx < spapr_xive_nr_ends(xive));
>      end = &xive->endt[end_idx];
>  
>      nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end->w6);
> @@ -1216,7 +1221,7 @@ static target_ulong h_int_get_queue_info(PowerPCCPU *cpu,
>          return H_P2;
>      }
>  
> -    assert(end_idx < xive->nr_ends);
> +    assert(end_idx < spapr_xive_nr_ends(xive));
>      end = &xive->endt[end_idx];
>  
>      args[0] = xive->end_base + (1ull << (end_xsrc->esb_shift + 1)) * end_idx;
> @@ -1304,7 +1309,7 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
>          return H_P2;
>      }
>  
> -    assert(end_idx < xive->nr_ends);
> +    assert(end_idx < spapr_xive_nr_ends(xive));
>      memcpy(&end, &xive->endt[end_idx], sizeof(XiveEND));
>  
>      switch (qsize) {
> @@ -1470,7 +1475,7 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu,
>          return H_P2;
>      }
>  
> -    assert(end_idx < xive->nr_ends);
> +    assert(end_idx < spapr_xive_nr_ends(xive));
>      end = &xive->endt[end_idx];
>  
>      args[0] = 0;
> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> index 66bf4c06fe55..1566016f0e28 100644
> --- a/hw/intc/spapr_xive_kvm.c
> +++ b/hw/intc/spapr_xive_kvm.c
> @@ -531,7 +531,7 @@ static int kvmppc_xive_get_queues(SpaprXive *xive, Error **errp)
>      int i;
>      int ret;
>  
> -    for (i = 0; i < xive->nr_ends; i++) {
> +    for (i = 0; i < spapr_xive_nr_ends(xive); i++) {
>          if (!xive_end_is_valid(&xive->endt[i])) {
>              continue;
>          }
> @@ -701,7 +701,7 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
>      assert(xive->fd != -1);
>  
>      /* Restore the ENDT first. The targetting depends on it. */
> -    for (i = 0; i < xive->nr_ends; i++) {
> +    for (i = 0; i < spapr_xive_nr_ends(xive); i++) {
>          if (!xive_end_is_valid(&xive->endt[i])) {
>              continue;
>          }
>
Greg Kurz Nov. 23, 2020, 11:16 a.m. UTC | #3
On Mon, 23 Nov 2020 10:46:38 +0100
Cédric Le Goater <clg@kaod.org> wrote:

> On 11/20/20 6:46 PM, Greg Kurz wrote:
> > We're going to kill the "nr_ends" field in a subsequent patch.
> 
> why ? it is one of the tables of the controller and its part of 
> the main XIVE concepts. Conceptually, we could let the machine 
> dimension it with an arbitrary value as OPAL does. The controller
> would fail when the table is fully used. 
> 

The idea is that the sPAPR machine only true need is to create a
controller that can accommodate up to a certain number of vCPU ids.
It doesn't really to know about the END itself IMHO.

This being said, if we decide to pass both spapr_max_server_number()
and smp.max_cpus down to the backends as function arguments, we won't
have to change "nr_ends" at all.

>  
> > Prepare ground by using an helper instead of peeking into
> > the sPAPR XIVE structure directly.
> 
> 
> I am not against the helper though but we should introduce a 
> prio_shift value which would let us define the number of 
> available priorities. To be linked with "hv-prio"
> 
> C.
> 
> 
> > Signed-off-by: Greg Kurz <groug@kaod.org>
> > ---
> >  include/hw/ppc/spapr_xive.h |  1 +
> >  hw/intc/spapr_xive.c        | 23 ++++++++++++++---------
> >  hw/intc/spapr_xive_kvm.c    |  4 ++--
> >  3 files changed, 17 insertions(+), 11 deletions(-)
> > 
> > diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> > index 26c8d90d7196..4b967f13c030 100644
> > --- a/include/hw/ppc/spapr_xive.h
> > +++ b/include/hw/ppc/spapr_xive.h
> > @@ -75,6 +75,7 @@ void spapr_xive_map_mmio(SpaprXive *xive);
> >  
> >  int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx,
> >                               uint32_t *out_server, uint8_t *out_prio);
> > +uint32_t spapr_xive_nr_ends(const SpaprXive *xive);
> >  
> >  /*
> >   * KVM XIVE device helpers
> > diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> > index 60e0d5769dcc..f473ad9cba47 100644
> > --- a/hw/intc/spapr_xive.c
> > +++ b/hw/intc/spapr_xive.c
> > @@ -192,7 +192,7 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
> >              uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
> >              XiveEND *end;
> >  
> > -            assert(end_idx < xive->nr_ends);
> > +            assert(end_idx < spapr_xive_nr_ends(xive));
> >              end = &xive->endt[end_idx];
> >  
> >              if (xive_end_is_valid(end)) {
> > @@ -270,7 +270,7 @@ static void spapr_xive_reset(void *dev)
> >      }
> >  
> >      /* Clear all ENDs */
> > -    for (i = 0; i < xive->nr_ends; i++) {
> > +    for (i = 0; i < spapr_xive_nr_ends(xive); i++) {
> >          spapr_xive_end_reset(&xive->endt[i]);
> >      }
> >  }
> > @@ -288,6 +288,11 @@ static void spapr_xive_instance_init(Object *obj)
> >      xive->fd = -1;
> >  }
> >  
> > +uint32_t spapr_xive_nr_ends(const SpaprXive *xive)
> > +{
> > +    return xive->nr_ends;
> > +}
> > +
> >  static void spapr_xive_realize(DeviceState *dev, Error **errp)
> >  {
> >      SpaprXive *xive = SPAPR_XIVE(dev);
> > @@ -336,7 +341,7 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp)
> >       * Allocate the routing tables
> >       */
> >      xive->eat = g_new0(XiveEAS, xive->nr_irqs);
> > -    xive->endt = g_new0(XiveEND, xive->nr_ends);
> > +    xive->endt = g_new0(XiveEND, spapr_xive_nr_ends(xive));
> >  
> >      xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
> >                             xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
> > @@ -375,7 +380,7 @@ static int spapr_xive_get_end(XiveRouter *xrtr,
> >  {
> >      SpaprXive *xive = SPAPR_XIVE(xrtr);
> >  
> > -    if (end_idx >= xive->nr_ends) {
> > +    if (end_idx >= spapr_xive_nr_ends(xive)) {
> >          return -1;
> >      }
> >  
> > @@ -389,7 +394,7 @@ static int spapr_xive_write_end(XiveRouter *xrtr, uint8_t end_blk,
> >  {
> >      SpaprXive *xive = SPAPR_XIVE(xrtr);
> >  
> > -    if (end_idx >= xive->nr_ends) {
> > +    if (end_idx >= spapr_xive_nr_ends(xive)) {
> >          return -1;
> >      }
> >  
> > @@ -1138,7 +1143,7 @@ static target_ulong h_int_get_source_config(PowerPCCPU *cpu,
> >      /* EAS_END_BLOCK is unused on sPAPR */
> >      end_idx = xive_get_field64(EAS_END_INDEX, eas.w);
> >  
> > -    assert(end_idx < xive->nr_ends);
> > +    assert(end_idx < spapr_xive_nr_ends(xive));
> >      end = &xive->endt[end_idx];
> >  
> >      nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end->w6);
> > @@ -1216,7 +1221,7 @@ static target_ulong h_int_get_queue_info(PowerPCCPU *cpu,
> >          return H_P2;
> >      }
> >  
> > -    assert(end_idx < xive->nr_ends);
> > +    assert(end_idx < spapr_xive_nr_ends(xive));
> >      end = &xive->endt[end_idx];
> >  
> >      args[0] = xive->end_base + (1ull << (end_xsrc->esb_shift + 1)) * end_idx;
> > @@ -1304,7 +1309,7 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
> >          return H_P2;
> >      }
> >  
> > -    assert(end_idx < xive->nr_ends);
> > +    assert(end_idx < spapr_xive_nr_ends(xive));
> >      memcpy(&end, &xive->endt[end_idx], sizeof(XiveEND));
> >  
> >      switch (qsize) {
> > @@ -1470,7 +1475,7 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu,
> >          return H_P2;
> >      }
> >  
> > -    assert(end_idx < xive->nr_ends);
> > +    assert(end_idx < spapr_xive_nr_ends(xive));
> >      end = &xive->endt[end_idx];
> >  
> >      args[0] = 0;
> > diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> > index 66bf4c06fe55..1566016f0e28 100644
> > --- a/hw/intc/spapr_xive_kvm.c
> > +++ b/hw/intc/spapr_xive_kvm.c
> > @@ -531,7 +531,7 @@ static int kvmppc_xive_get_queues(SpaprXive *xive, Error **errp)
> >      int i;
> >      int ret;
> >  
> > -    for (i = 0; i < xive->nr_ends; i++) {
> > +    for (i = 0; i < spapr_xive_nr_ends(xive); i++) {
> >          if (!xive_end_is_valid(&xive->endt[i])) {
> >              continue;
> >          }
> > @@ -701,7 +701,7 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
> >      assert(xive->fd != -1);
> >  
> >      /* Restore the ENDT first. The targetting depends on it. */
> > -    for (i = 0; i < xive->nr_ends; i++) {
> > +    for (i = 0; i < spapr_xive_nr_ends(xive); i++) {
> >          if (!xive_end_is_valid(&xive->endt[i])) {
> >              continue;
> >          }
> > 
>
Cédric Le Goater Nov. 24, 2020, 1:54 p.m. UTC | #4
On 11/23/20 12:16 PM, Greg Kurz wrote:
> On Mon, 23 Nov 2020 10:46:38 +0100
> Cédric Le Goater <clg@kaod.org> wrote:
> 
>> On 11/20/20 6:46 PM, Greg Kurz wrote:
>>> We're going to kill the "nr_ends" field in a subsequent patch.
>>
>> why ? it is one of the tables of the controller and its part of 
>> the main XIVE concepts. Conceptually, we could let the machine 
>> dimension it with an arbitrary value as OPAL does. The controller
>> would fail when the table is fully used. 
>>
> 
> The idea is that the sPAPR machine only true need is to create a
> controller that can accommodate up to a certain number of vCPU ids.
> It doesn't really to know about the END itself IMHO.> 
> This being said, if we decide to pass both spapr_max_server_number()
> and smp.max_cpus down to the backends as function arguments, we won't
> have to change "nr_ends" at all.

I would prefer that but I am still not sure what they represent. 

Looking at the sPAPR XIVE code, we deal with numbers/ranges in the 
following places today.

 * spapr_xive_dt() 

   It defines a range of interrupt numbers to be used by the guest 
   for the threads/vCPUs IPIs. It's a subset of interrupt numbers 
   in :

		[ SPAPR_IRQ_IPI - SPAPR_IRQ_EPOW [

   These are not vCPU ids.

   Since these interrupt numbers will be considered as free to use
   by the OS, it makes sense to pre-claim them. But claiming an 
   interrupt number in the guest can potentially set up, through 
   the KVM device, a mapping on the host and in HW. See below why
   this can be a problem.

 * kvmppc_xive_cpu_connect()   

   This sizes the NVT tables in OPAL for the guest. This is the  
   max number of vCPUs of the guest (not vCPU ids)

 * spapr_irq_init()

   This is where the IPI interrupt numbers are claimed today. 
   Directly in QEMU and KVM, if the machine is running XIVE only, 
   indirectly if it's dual, first in QEMU and then in KVM when 
   the machine switches of interrupt mode in CAS. 

   The problem is that the underlying XIVE resources in HW are 
   allocated where the QEMU process is running. Which is not the
   best option when the vCPUs are pinned on different chips.

   My patchset was trying to improve that by claiming the IPI on 
   demand when the vCPU is connected to the KVM device. But it 
   was using the vCPU id as the IPI interrupt number which is 
   utterly wrong, the guest OS could use any number in the range 
   exposed in the DT.
   
   The last patch you sent was going in the right direction I think.
   That is to claim the IPI when the guest OS is requesting for it. 

   http://patchwork.ozlabs.org/project/qemu-devel/patch/160528045027.804522.6161091782230763832.stgit@bahia.lan/
   
   But I don't understand why it was so complex. It should be like
   the MSIs claimed by PCI devices.


All this to say, that we need to size better the range in the 
"ibm,xive-lisn-ranges" property if that's broken for vSMT. 

Then, I think the IPIs can be treated just like the PCI MSIs
but they need to be claimed first. That's the ugly part. 

Should we add a special check in h_int_set_source_config to
deal with unclaimed IPIs that are being configured ?


C.
Greg Kurz Nov. 24, 2020, 5:01 p.m. UTC | #5
On Tue, 24 Nov 2020 14:54:38 +0100
Cédric Le Goater <clg@kaod.org> wrote:

> On 11/23/20 12:16 PM, Greg Kurz wrote:
> > On Mon, 23 Nov 2020 10:46:38 +0100
> > Cédric Le Goater <clg@kaod.org> wrote:
> > 
> >> On 11/20/20 6:46 PM, Greg Kurz wrote:
> >>> We're going to kill the "nr_ends" field in a subsequent patch.
> >>
> >> why ? it is one of the tables of the controller and its part of 
> >> the main XIVE concepts. Conceptually, we could let the machine 
> >> dimension it with an arbitrary value as OPAL does. The controller
> >> would fail when the table is fully used. 
> >>
> > 
> > The idea is that the sPAPR machine only true need is to create a
> > controller that can accommodate up to a certain number of vCPU ids.
> > It doesn't really to know about the END itself IMHO.> 
> > This being said, if we decide to pass both spapr_max_server_number()
> > and smp.max_cpus down to the backends as function arguments, we won't
> > have to change "nr_ends" at all.
> 
> I would prefer that but I am still not sure what they represent. 
> 
> Looking at the sPAPR XIVE code, we deal with numbers/ranges in the 
> following places today.
> 
>  * spapr_xive_dt() 
> 
>    It defines a range of interrupt numbers to be used by the guest 
>    for the threads/vCPUs IPIs. It's a subset of interrupt numbers 
>    in :
> 
> 		[ SPAPR_IRQ_IPI - SPAPR_IRQ_EPOW [
> 
>    These are not vCPU ids.
> 
>    Since these interrupt numbers will be considered as free to use
>    by the OS, it makes sense to pre-claim them. But claiming an 
>    interrupt number in the guest can potentially set up, through 
>    the KVM device, a mapping on the host and in HW. See below why
>    this can be a problem.
> 
>  * kvmppc_xive_cpu_connect()   
> 
>    This sizes the NVT tables in OPAL for the guest. This is the  
>    max number of vCPUs of the guest (not vCPU ids)
> 

I guess you're talking about KVM_DEV_XIVE_NR_SERVERS in
kvmppc_xive_connect() actually. We're currently passing
spapr_max_server_number() (vCPU id) but you might be
right.

I need to re-read the story around VSMT and XIVE.

commit 1e175d2e07c71d9574f5b1c74523abca54e2654f
Author: Sam Bobroff <sam.bobroff@au1.ibm.com>
Date:   Wed Jul 25 16:12:02 2018 +1000

    KVM: PPC: Book3S HV: Pack VCORE IDs to access full VCPU ID space

>  * spapr_irq_init()
> 
>    This is where the IPI interrupt numbers are claimed today. 
>    Directly in QEMU and KVM, if the machine is running XIVE only, 
>    indirectly if it's dual, first in QEMU and then in KVM when 
>    the machine switches of interrupt mode in CAS. 
> 
>    The problem is that the underlying XIVE resources in HW are 
>    allocated where the QEMU process is running. Which is not the
>    best option when the vCPUs are pinned on different chips.
> 
>    My patchset was trying to improve that by claiming the IPI on 
>    demand when the vCPU is connected to the KVM device. But it 
>    was using the vCPU id as the IPI interrupt number which is 
>    utterly wrong, the guest OS could use any number in the range 
>    exposed in the DT.
>    
>    The last patch you sent was going in the right direction I think.
>    That is to claim the IPI when the guest OS is requesting for it. 
> 
>    http://patchwork.ozlabs.org/project/qemu-devel/patch/160528045027.804522.6161091782230763832.stgit@bahia.lan/
>    
>    But I don't understand why it was so complex. It should be like
>    the MSIs claimed by PCI devices.
> 

The difference here is that the guest doesn't claim IPIs. They are
supposedly pre-claimed in "ibm,xive-lisn-ranges". And this is actually
the case in QEMU.

The IPI setup sequence in the guest is basically:
1) grab a free irq from the bitmap, ie. "ibm,xive-lisn-ranges"
2) calls H_INT_GET_SOURCE_INFO, ie. populate_irq_data()
3) calls H_INT_SET_SOURCE_CONFIG, ie, configure_irq())

If we want an IPI to be claimed by the appropriate vCPU, we
can only do this from under H_INT_SET_SOURCE_CONFIG. And
until the guest eventually configures the IPI, KVM and QEMU
are out of sync.

This complexifies migration because we have to guess at
post load if we should claim the IPI in KVM or not. The
simple presence of the vCPU isn't enough : we need to
guess if the guest actually configured the IPI or not.

> 
> All this to say, that we need to size better the range in the 
> "ibm,xive-lisn-ranges" property if that's broken for vSMT. 
> 

Sizing the range to smp.max_cpus as proposed in this series
is fine, no matter what the VSMT is.

> Then, I think the IPIs can be treated just like the PCI MSIs
> but they need to be claimed first. That's the ugly part. 
> 

Yeah that's the big difference. For PCI MSIs, QEMU owns the
bitmap and the guest can claim (or release) a number of
MSIs the "ibm,change-msi" RTAS interface. There's no
such thing for IPIs : they are supposedly already claimed.

> Should we add a special check in h_int_set_source_config to
> deal with unclaimed IPIs that are being configured ?
> 

This is what my tentative fix does.

> 
> C.
Cédric Le Goater Nov. 24, 2020, 5:56 p.m. UTC | #6
On 11/24/20 6:01 PM, Greg Kurz wrote:
> On Tue, 24 Nov 2020 14:54:38 +0100
> Cédric Le Goater <clg@kaod.org> wrote:
> 
>> On 11/23/20 12:16 PM, Greg Kurz wrote:
>>> On Mon, 23 Nov 2020 10:46:38 +0100
>>> Cédric Le Goater <clg@kaod.org> wrote:
>>>
>>>> On 11/20/20 6:46 PM, Greg Kurz wrote:
>>>>> We're going to kill the "nr_ends" field in a subsequent patch.
>>>>
>>>> why ? it is one of the tables of the controller and its part of 
>>>> the main XIVE concepts. Conceptually, we could let the machine 
>>>> dimension it with an arbitrary value as OPAL does. The controller
>>>> would fail when the table is fully used. 
>>>>
>>>
>>> The idea is that the sPAPR machine only true need is to create a
>>> controller that can accommodate up to a certain number of vCPU ids.
>>> It doesn't really to know about the END itself IMHO.> 
>>> This being said, if we decide to pass both spapr_max_server_number()
>>> and smp.max_cpus down to the backends as function arguments, we won't
>>> have to change "nr_ends" at all.
>>
>> I would prefer that but I am still not sure what they represent. 
>>
>> Looking at the sPAPR XIVE code, we deal with numbers/ranges in the 
>> following places today.
>>
>>  * spapr_xive_dt() 
>>
>>    It defines a range of interrupt numbers to be used by the guest 
>>    for the threads/vCPUs IPIs. It's a subset of interrupt numbers 
>>    in :
>>
>> 		[ SPAPR_IRQ_IPI - SPAPR_IRQ_EPOW [
>>
>>    These are not vCPU ids.
>>
>>    Since these interrupt numbers will be considered as free to use
>>    by the OS, it makes sense to pre-claim them. But claiming an 
>>    interrupt number in the guest can potentially set up, through 
>>    the KVM device, a mapping on the host and in HW. See below why
>>    this can be a problem.
>>
>>  * kvmppc_xive_cpu_connect()   
>>
>>    This sizes the NVT tables in OPAL for the guest. This is the  
>>    max number of vCPUs of the guest (not vCPU ids)
>>
> 
> I guess you're talking about KVM_DEV_XIVE_NR_SERVERS in
> kvmppc_xive_connect() actually. We're currently passing
> spapr_max_server_number() (vCPU id) but you might be
> right.
> 
> I need to re-read the story around VSMT and XIVE.

ok. What we care about here, is a size to allocate the NVT block
representing the vCPUs in HW. NVT ids are pushed in the thread 
contexts when the vCPUs are scheduled to run and looked for by 
the presenter when an interrupt is to be delivered.

 
> commit 1e175d2e07c71d9574f5b1c74523abca54e2654f
> Author: Sam Bobroff <sam.bobroff@au1.ibm.com>
> Date:   Wed Jul 25 16:12:02 2018 +1000
> 
>     KVM: PPC: Book3S HV: Pack VCORE IDs to access full VCPU ID space
> 
>>  * spapr_irq_init()
>>
>>    This is where the IPI interrupt numbers are claimed today. 
>>    Directly in QEMU and KVM, if the machine is running XIVE only, 
>>    indirectly if it's dual, first in QEMU and then in KVM when 
>>    the machine switches of interrupt mode in CAS. 
>>
>>    The problem is that the underlying XIVE resources in HW are 
>>    allocated where the QEMU process is running. Which is not the
>>    best option when the vCPUs are pinned on different chips.
>>
>>    My patchset was trying to improve that by claiming the IPI on 
>>    demand when the vCPU is connected to the KVM device. But it 
>>    was using the vCPU id as the IPI interrupt number which is 
>>    utterly wrong, the guest OS could use any number in the range 
>>    exposed in the DT.
>>    
>>    The last patch you sent was going in the right direction I think.
>>    That is to claim the IPI when the guest OS is requesting for it. 
>>
>>    http://patchwork.ozlabs.org/project/qemu-devel/patch/160528045027.804522.6161091782230763832.stgit@bahia.lan/
>>    
>>    But I don't understand why it was so complex. It should be like
>>    the MSIs claimed by PCI devices.
>>
> 
> The difference here is that the guest doesn't claim IPIs. They are
> supposedly pre-claimed in "ibm,xive-lisn-ranges". And this is actually
> the case in QEMU.

yes. That's what I want to change (for performance)

> The IPI setup sequence in the guest is basically:
> 1) grab a free irq from the bitmap, ie. "ibm,xive-lisn-ranges"
> 2) calls H_INT_GET_SOURCE_INFO, ie. populate_irq_data()
> 3) calls H_INT_SET_SOURCE_CONFIG, ie, configure_irq())
> 
> If we want an IPI to be claimed by the appropriate vCPU, we
> can only do this from under H_INT_SET_SOURCE_CONFIG. And
> until the guest eventually configures the IPI, KVM and QEMU
> are out of sync.

Well, KVM doesn't know either how much PCI MSIs will be claimed.
It all depends on the guest OS. 

I don't think this is a problem to expose unclaimed interrupt
numbers to the guest if they are IPIs. We can detect that
easily with the range and claim the source at KVM level when 
it's configured or in h_int_get_source_info(). Talking of which, 
it might be good to have a KVM command to query the source 
characteristics on the host. I sent a patchset a while ago in 
that sense.

> This complexifies migration because we have to guess at
> post load if we should claim the IPI in KVM or not. The
> simple presence of the vCPU isn't enough : we need to
> guess if the guest actually configured the IPI or not.

The EAT will be transferred from the source and the call to 
kvmppc_xive_source_reset_one() should initialize the KVM 
device correctly on the target for all interrupts.

>> All this to say, that we need to size better the range in the 
>> "ibm,xive-lisn-ranges" property if that's broken for vSMT. 
>>
> 
> Sizing the range to smp.max_cpus as proposed in this series
> is fine, no matter what the VSMT is.

ok. That's a fix for spapr_irq_dt() then. And possibly, there 
is a similar one for KVM_DEV_XIVE_NR_SERVERS.
 
>> Then, I think the IPIs can be treated just like the PCI MSIs
>> but they need to be claimed first. That's the ugly part. 
>>
> 
> Yeah that's the big difference. For PCI MSIs, QEMU owns the
> bitmap and the guest can claim (or release) a number of
> MSIs the "ibm,change-msi" RTAS interface. There's no
> such thing for IPIs : they are supposedly already claimed.

IPIs are a bit special because there are no I/O devices to
claim them. We could consider the vCPU has being the device. 
That was my first attempt but it was wrong since the OS is 
in charge of choosing an interrupt number for the IPI. 

>> Should we add a special check in h_int_set_source_config to
>> deal with unclaimed IPIs that are being configured ?
>>
> 
> This is what my tentative fix does.

I didn't understand the complexity of it, may be due to my
patchset.

You should try again :)

C.
Greg Kurz Nov. 25, 2020, 9:33 a.m. UTC | #7
On Tue, 24 Nov 2020 18:56:02 +0100
Cédric Le Goater <clg@kaod.org> wrote:

[...]

> > 
> > I guess you're talking about KVM_DEV_XIVE_NR_SERVERS in
> > kvmppc_xive_connect() actually. We're currently passing
> > spapr_max_server_number() (vCPU id) but you might be
> > right.
> > 
> > I need to re-read the story around VSMT and XIVE.
> 
> ok. What we care about here, is a size to allocate the NVT block
> representing the vCPUs in HW. NVT ids are pushed in the thread 
> contexts when the vCPUs are scheduled to run and looked for by 
> the presenter when an interrupt is to be delivered.
> 

Yeah, looking at the code again, I realize there was a confusion
when we added the possibility to size the NVT. This should not
depend on the vCPU id limnit as it is done today, it should just
be the maximum number of possible vCPUs (ie. smp.max_cpus).

[...]
> > 
> > The difference here is that the guest doesn't claim IPIs. They are
> > supposedly pre-claimed in "ibm,xive-lisn-ranges". And this is actually
> > the case in QEMU.
> 
> yes. That's what I want to change (for performance)
> 

I understand the purpose of claiming the IPI from its associated
vCPU context. But this can only be done on a path where we have
both the vCPU id and the IPI ; kvmppc_xive_set_source_config()
looks like a good candidate to handle this for runtime and
post-load.

> > The IPI setup sequence in the guest is basically:
> > 1) grab a free irq from the bitmap, ie. "ibm,xive-lisn-ranges"
> > 2) calls H_INT_GET_SOURCE_INFO, ie. populate_irq_data()
> > 3) calls H_INT_SET_SOURCE_CONFIG, ie, configure_irq())
> > 
> > If we want an IPI to be claimed by the appropriate vCPU, we
> > can only do this from under H_INT_SET_SOURCE_CONFIG. And
> > until the guest eventually configures the IPI, KVM and QEMU
> > are out of sync.
> 
> Well, KVM doesn't know either how much PCI MSIs will be claimed.
> It all depends on the guest OS. 
> 

Yes but QEMU and KVM are always in sync for them. When the
guest calls the "ibm,change-msi" RTAS interface to get some
MSIs for a device, they are immediately claimed both in
QEMU and KVM.

> I don't think this is a problem to expose unclaimed interrupt
> numbers to the guest if they are IPIs. We can detect that
> easily with the range and claim the source at KVM level when 
> it's configured or in h_int_get_source_info(). Talking of which, 

We cannot claim the source at the KVM level from
H_INT_GET_SOURCE_INFO because we don't know about
the vCPU id here => we can't do the run_on_cpu()
optimization.

> it might be good to have a KVM command to query the source 
> characteristics on the host. I sent a patchset a while ago in 
> that sense.
> 
> > This complexifies migration because we have to guess at
> > post load if we should claim the IPI in KVM or not. The
> > simple presence of the vCPU isn't enough : we need to
> > guess if the guest actually configured the IPI or not.
> 
> The EAT will be transferred from the source and the call to 
> kvmppc_xive_source_reset_one() should initialize the KVM 
> device correctly on the target for all interrupts.
> 

Except that the EAS appears as valid for all IPIs, even
though the source didn't claim them at the KVM level. It
looks wrong to blindly restore all of them in post-load.

> >> All this to say, that we need to size better the range in the 
> >> "ibm,xive-lisn-ranges" property if that's broken for vSMT. 
> >>
> > 
> > Sizing the range to smp.max_cpus as proposed in this series
> > is fine, no matter what the VSMT is.
> 
> ok. That's a fix for spapr_irq_dt() then. And possibly, there 
> is a similar one for KVM_DEV_XIVE_NR_SERVERS.
>  

Yup.

> >> Then, I think the IPIs can be treated just like the PCI MSIs
> >> but they need to be claimed first. That's the ugly part. 
> >>
> > 
> > Yeah that's the big difference. For PCI MSIs, QEMU owns the
> > bitmap and the guest can claim (or release) a number of
> > MSIs the "ibm,change-msi" RTAS interface. There's no
> > such thing for IPIs : they are supposedly already claimed.
> 
> IPIs are a bit special because there are no I/O devices to
> claim them. We could consider the vCPU has being the device. 
> That was my first attempt but it was wrong since the OS is 
> in charge of choosing an interrupt number for the IPI. 
> 
> >> Should we add a special check in h_int_set_source_config to
> >> deal with unclaimed IPIs that are being configured ?
> >>
> > 
> > This is what my tentative fix does.
> 
> I didn't understand the complexity of it, may be due to my
> patchset.
> 
> You should try again :)
> 

Will do when I've fixed the misuses of spapr_max_server_number().

> C.
Cédric Le Goater Nov. 25, 2020, 11:34 a.m. UTC | #8
>>> This complexifies migration because we have to guess at
>>> post load if we should claim the IPI in KVM or not. The
>>> simple presence of the vCPU isn't enough : we need to
>>> guess if the guest actually configured the IPI or not.
>>
>> The EAT will be transferred from the source and the call to 
>> kvmppc_xive_source_reset_one() should initialize the KVM 
>> device correctly on the target for all interrupts.
>>
> 
> Except that the EAS appears as valid for all IPIs, even
> though the source didn't claim them at the KVM level. 

why ? we would stop claiming IPIs in spapr_irq_init() and so
they won't appear as being valid anymore, at boot time or
restore time.


C.
Greg Kurz Nov. 25, 2020, 12:26 p.m. UTC | #9
On Wed, 25 Nov 2020 12:34:25 +0100
Cédric Le Goater <clg@kaod.org> wrote:

>  
> >>> This complexifies migration because we have to guess at
> >>> post load if we should claim the IPI in KVM or not. The
> >>> simple presence of the vCPU isn't enough : we need to
> >>> guess if the guest actually configured the IPI or not.
> >>
> >> The EAT will be transferred from the source and the call to 
> >> kvmppc_xive_source_reset_one() should initialize the KVM 
> >> device correctly on the target for all interrupts.
> >>
> > 
> > Except that the EAS appears as valid for all IPIs, even
> > though the source didn't claim them at the KVM level. 
> 
> why ? we would stop claiming IPIs in spapr_irq_init() and so
> they won't appear as being valid anymore, at boot time or
> restore time.
> 

If we don't claim the IPIs in spapr_irq_init() anymore then
we must at least claim them on the path of H_INT_GET_SOURCE_INFO
otherwise it will fail with H_P2 and the guest won't even
try to setup the IPI. Even if we do that, we still have a
window where the source is valid in QEMU but not yet at
the KVM level.

> 
> C.
Greg Kurz Nov. 25, 2020, 10:43 p.m. UTC | #10
On Mon, 23 Nov 2020 14:33:55 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Fri, Nov 20, 2020 at 06:46:40PM +0100, Greg Kurz wrote:
> > We're going to kill the "nr_ends" field in a subsequent patch.
> > Prepare ground by using an helper instead of peeking into
> > the sPAPR XIVE structure directly.
> > 
> > Signed-off-by: Greg Kurz <groug@kaod.org>
> 
> Applied to ppc-for-6.0, thanks.
> 

I'm working on a new approach that doesn't need this change. Especially the
new approach doesn't kill the "nr_ends" fields, which makes the changelog of
this patch slightly wrong. Since it doesn't bring much in the end, maybe you
can just drop it from ppc-for-6.0 ?

> 
> > ---
> >  include/hw/ppc/spapr_xive.h |  1 +
> >  hw/intc/spapr_xive.c        | 23 ++++++++++++++---------
> >  hw/intc/spapr_xive_kvm.c    |  4 ++--
> >  3 files changed, 17 insertions(+), 11 deletions(-)
> > 
> > diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> > index 26c8d90d7196..4b967f13c030 100644
> > --- a/include/hw/ppc/spapr_xive.h
> > +++ b/include/hw/ppc/spapr_xive.h
> > @@ -75,6 +75,7 @@ void spapr_xive_map_mmio(SpaprXive *xive);
> >  
> >  int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx,
> >                               uint32_t *out_server, uint8_t *out_prio);
> > +uint32_t spapr_xive_nr_ends(const SpaprXive *xive);
> >  
> >  /*
> >   * KVM XIVE device helpers
> > diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> > index 60e0d5769dcc..f473ad9cba47 100644
> > --- a/hw/intc/spapr_xive.c
> > +++ b/hw/intc/spapr_xive.c
> > @@ -192,7 +192,7 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
> >              uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
> >              XiveEND *end;
> >  
> > -            assert(end_idx < xive->nr_ends);
> > +            assert(end_idx < spapr_xive_nr_ends(xive));
> >              end = &xive->endt[end_idx];
> >  
> >              if (xive_end_is_valid(end)) {
> > @@ -270,7 +270,7 @@ static void spapr_xive_reset(void *dev)
> >      }
> >  
> >      /* Clear all ENDs */
> > -    for (i = 0; i < xive->nr_ends; i++) {
> > +    for (i = 0; i < spapr_xive_nr_ends(xive); i++) {
> >          spapr_xive_end_reset(&xive->endt[i]);
> >      }
> >  }
> > @@ -288,6 +288,11 @@ static void spapr_xive_instance_init(Object *obj)
> >      xive->fd = -1;
> >  }
> >  
> > +uint32_t spapr_xive_nr_ends(const SpaprXive *xive)
> > +{
> > +    return xive->nr_ends;
> > +}
> > +
> >  static void spapr_xive_realize(DeviceState *dev, Error **errp)
> >  {
> >      SpaprXive *xive = SPAPR_XIVE(dev);
> > @@ -336,7 +341,7 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp)
> >       * Allocate the routing tables
> >       */
> >      xive->eat = g_new0(XiveEAS, xive->nr_irqs);
> > -    xive->endt = g_new0(XiveEND, xive->nr_ends);
> > +    xive->endt = g_new0(XiveEND, spapr_xive_nr_ends(xive));
> >  
> >      xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
> >                             xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
> > @@ -375,7 +380,7 @@ static int spapr_xive_get_end(XiveRouter *xrtr,
> >  {
> >      SpaprXive *xive = SPAPR_XIVE(xrtr);
> >  
> > -    if (end_idx >= xive->nr_ends) {
> > +    if (end_idx >= spapr_xive_nr_ends(xive)) {
> >          return -1;
> >      }
> >  
> > @@ -389,7 +394,7 @@ static int spapr_xive_write_end(XiveRouter *xrtr, uint8_t end_blk,
> >  {
> >      SpaprXive *xive = SPAPR_XIVE(xrtr);
> >  
> > -    if (end_idx >= xive->nr_ends) {
> > +    if (end_idx >= spapr_xive_nr_ends(xive)) {
> >          return -1;
> >      }
> >  
> > @@ -1138,7 +1143,7 @@ static target_ulong h_int_get_source_config(PowerPCCPU *cpu,
> >      /* EAS_END_BLOCK is unused on sPAPR */
> >      end_idx = xive_get_field64(EAS_END_INDEX, eas.w);
> >  
> > -    assert(end_idx < xive->nr_ends);
> > +    assert(end_idx < spapr_xive_nr_ends(xive));
> >      end = &xive->endt[end_idx];
> >  
> >      nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end->w6);
> > @@ -1216,7 +1221,7 @@ static target_ulong h_int_get_queue_info(PowerPCCPU *cpu,
> >          return H_P2;
> >      }
> >  
> > -    assert(end_idx < xive->nr_ends);
> > +    assert(end_idx < spapr_xive_nr_ends(xive));
> >      end = &xive->endt[end_idx];
> >  
> >      args[0] = xive->end_base + (1ull << (end_xsrc->esb_shift + 1)) * end_idx;
> > @@ -1304,7 +1309,7 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
> >          return H_P2;
> >      }
> >  
> > -    assert(end_idx < xive->nr_ends);
> > +    assert(end_idx < spapr_xive_nr_ends(xive));
> >      memcpy(&end, &xive->endt[end_idx], sizeof(XiveEND));
> >  
> >      switch (qsize) {
> > @@ -1470,7 +1475,7 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu,
> >          return H_P2;
> >      }
> >  
> > -    assert(end_idx < xive->nr_ends);
> > +    assert(end_idx < spapr_xive_nr_ends(xive));
> >      end = &xive->endt[end_idx];
> >  
> >      args[0] = 0;
> > diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> > index 66bf4c06fe55..1566016f0e28 100644
> > --- a/hw/intc/spapr_xive_kvm.c
> > +++ b/hw/intc/spapr_xive_kvm.c
> > @@ -531,7 +531,7 @@ static int kvmppc_xive_get_queues(SpaprXive *xive, Error **errp)
> >      int i;
> >      int ret;
> >  
> > -    for (i = 0; i < xive->nr_ends; i++) {
> > +    for (i = 0; i < spapr_xive_nr_ends(xive); i++) {
> >          if (!xive_end_is_valid(&xive->endt[i])) {
> >              continue;
> >          }
> > @@ -701,7 +701,7 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
> >      assert(xive->fd != -1);
> >  
> >      /* Restore the ENDT first. The targetting depends on it. */
> > -    for (i = 0; i < xive->nr_ends; i++) {
> > +    for (i = 0; i < spapr_xive_nr_ends(xive); i++) {
> >          if (!xive_end_is_valid(&xive->endt[i])) {
> >              continue;
> >          }
>
David Gibson Nov. 26, 2020, 12:06 a.m. UTC | #11
On Wed, Nov 25, 2020 at 11:43:26PM +0100, Greg Kurz wrote:
> On Mon, 23 Nov 2020 14:33:55 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > On Fri, Nov 20, 2020 at 06:46:40PM +0100, Greg Kurz wrote:
> > > We're going to kill the "nr_ends" field in a subsequent patch.
> > > Prepare ground by using an helper instead of peeking into
> > > the sPAPR XIVE structure directly.
> > > 
> > > Signed-off-by: Greg Kurz <groug@kaod.org>
> > 
> > Applied to ppc-for-6.0, thanks.
> > 
> 
> I'm working on a new approach that doesn't need this change. Especially the
> new approach doesn't kill the "nr_ends" fields, which makes the changelog of
> this patch slightly wrong. Since it doesn't bring much in the end, maybe you
> can just drop it from ppc-for-6.0 ?

Done.

> 
> > 
> > > ---
> > >  include/hw/ppc/spapr_xive.h |  1 +
> > >  hw/intc/spapr_xive.c        | 23 ++++++++++++++---------
> > >  hw/intc/spapr_xive_kvm.c    |  4 ++--
> > >  3 files changed, 17 insertions(+), 11 deletions(-)
> > > 
> > > diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> > > index 26c8d90d7196..4b967f13c030 100644
> > > --- a/include/hw/ppc/spapr_xive.h
> > > +++ b/include/hw/ppc/spapr_xive.h
> > > @@ -75,6 +75,7 @@ void spapr_xive_map_mmio(SpaprXive *xive);
> > >  
> > >  int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx,
> > >                               uint32_t *out_server, uint8_t *out_prio);
> > > +uint32_t spapr_xive_nr_ends(const SpaprXive *xive);
> > >  
> > >  /*
> > >   * KVM XIVE device helpers
> > > diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> > > index 60e0d5769dcc..f473ad9cba47 100644
> > > --- a/hw/intc/spapr_xive.c
> > > +++ b/hw/intc/spapr_xive.c
> > > @@ -192,7 +192,7 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
> > >              uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
> > >              XiveEND *end;
> > >  
> > > -            assert(end_idx < xive->nr_ends);
> > > +            assert(end_idx < spapr_xive_nr_ends(xive));
> > >              end = &xive->endt[end_idx];
> > >  
> > >              if (xive_end_is_valid(end)) {
> > > @@ -270,7 +270,7 @@ static void spapr_xive_reset(void *dev)
> > >      }
> > >  
> > >      /* Clear all ENDs */
> > > -    for (i = 0; i < xive->nr_ends; i++) {
> > > +    for (i = 0; i < spapr_xive_nr_ends(xive); i++) {
> > >          spapr_xive_end_reset(&xive->endt[i]);
> > >      }
> > >  }
> > > @@ -288,6 +288,11 @@ static void spapr_xive_instance_init(Object *obj)
> > >      xive->fd = -1;
> > >  }
> > >  
> > > +uint32_t spapr_xive_nr_ends(const SpaprXive *xive)
> > > +{
> > > +    return xive->nr_ends;
> > > +}
> > > +
> > >  static void spapr_xive_realize(DeviceState *dev, Error **errp)
> > >  {
> > >      SpaprXive *xive = SPAPR_XIVE(dev);
> > > @@ -336,7 +341,7 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp)
> > >       * Allocate the routing tables
> > >       */
> > >      xive->eat = g_new0(XiveEAS, xive->nr_irqs);
> > > -    xive->endt = g_new0(XiveEND, xive->nr_ends);
> > > +    xive->endt = g_new0(XiveEND, spapr_xive_nr_ends(xive));
> > >  
> > >      xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
> > >                             xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
> > > @@ -375,7 +380,7 @@ static int spapr_xive_get_end(XiveRouter *xrtr,
> > >  {
> > >      SpaprXive *xive = SPAPR_XIVE(xrtr);
> > >  
> > > -    if (end_idx >= xive->nr_ends) {
> > > +    if (end_idx >= spapr_xive_nr_ends(xive)) {
> > >          return -1;
> > >      }
> > >  
> > > @@ -389,7 +394,7 @@ static int spapr_xive_write_end(XiveRouter *xrtr, uint8_t end_blk,
> > >  {
> > >      SpaprXive *xive = SPAPR_XIVE(xrtr);
> > >  
> > > -    if (end_idx >= xive->nr_ends) {
> > > +    if (end_idx >= spapr_xive_nr_ends(xive)) {
> > >          return -1;
> > >      }
> > >  
> > > @@ -1138,7 +1143,7 @@ static target_ulong h_int_get_source_config(PowerPCCPU *cpu,
> > >      /* EAS_END_BLOCK is unused on sPAPR */
> > >      end_idx = xive_get_field64(EAS_END_INDEX, eas.w);
> > >  
> > > -    assert(end_idx < xive->nr_ends);
> > > +    assert(end_idx < spapr_xive_nr_ends(xive));
> > >      end = &xive->endt[end_idx];
> > >  
> > >      nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end->w6);
> > > @@ -1216,7 +1221,7 @@ static target_ulong h_int_get_queue_info(PowerPCCPU *cpu,
> > >          return H_P2;
> > >      }
> > >  
> > > -    assert(end_idx < xive->nr_ends);
> > > +    assert(end_idx < spapr_xive_nr_ends(xive));
> > >      end = &xive->endt[end_idx];
> > >  
> > >      args[0] = xive->end_base + (1ull << (end_xsrc->esb_shift + 1)) * end_idx;
> > > @@ -1304,7 +1309,7 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
> > >          return H_P2;
> > >      }
> > >  
> > > -    assert(end_idx < xive->nr_ends);
> > > +    assert(end_idx < spapr_xive_nr_ends(xive));
> > >      memcpy(&end, &xive->endt[end_idx], sizeof(XiveEND));
> > >  
> > >      switch (qsize) {
> > > @@ -1470,7 +1475,7 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu,
> > >          return H_P2;
> > >      }
> > >  
> > > -    assert(end_idx < xive->nr_ends);
> > > +    assert(end_idx < spapr_xive_nr_ends(xive));
> > >      end = &xive->endt[end_idx];
> > >  
> > >      args[0] = 0;
> > > diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> > > index 66bf4c06fe55..1566016f0e28 100644
> > > --- a/hw/intc/spapr_xive_kvm.c
> > > +++ b/hw/intc/spapr_xive_kvm.c
> > > @@ -531,7 +531,7 @@ static int kvmppc_xive_get_queues(SpaprXive *xive, Error **errp)
> > >      int i;
> > >      int ret;
> > >  
> > > -    for (i = 0; i < xive->nr_ends; i++) {
> > > +    for (i = 0; i < spapr_xive_nr_ends(xive); i++) {
> > >          if (!xive_end_is_valid(&xive->endt[i])) {
> > >              continue;
> > >          }
> > > @@ -701,7 +701,7 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
> > >      assert(xive->fd != -1);
> > >  
> > >      /* Restore the ENDT first. The targetting depends on it. */
> > > -    for (i = 0; i < xive->nr_ends; i++) {
> > > +    for (i = 0; i < spapr_xive_nr_ends(xive); i++) {
> > >          if (!xive_end_is_valid(&xive->endt[i])) {
> > >              continue;
> > >          }
> > 
>
Cédric Le Goater Nov. 26, 2020, 7:06 a.m. UTC | #12
>> why ? we would stop claiming IPIs in spapr_irq_init() and so
>> they won't appear as being valid anymore, at boot time or
>> restore time.
>>
> 
> If we don't claim the IPIs in spapr_irq_init() anymore then
> we must at least claim them on the path of H_INT_GET_SOURCE_INFO
> otherwise it will fail with H_P2 and the guest won't even
> try to setup the IPI. 

IPIs could be handled a special case as we know in which range 
they are.

> Even if we do that, we still have a
> window where the source is valid in QEMU but not yet at
> the KVM level.

May be some kind of lazy binding with KVM would help us.

The problem is that the guest ESB pages won't be backed by HW 
pages which could lead to a crash. We could overlap the emulated 
one until we are bound to KVM. Just a thought.

C.
diff mbox series

Patch

diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
index 26c8d90d7196..4b967f13c030 100644
--- a/include/hw/ppc/spapr_xive.h
+++ b/include/hw/ppc/spapr_xive.h
@@ -75,6 +75,7 @@  void spapr_xive_map_mmio(SpaprXive *xive);
 
 int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx,
                              uint32_t *out_server, uint8_t *out_prio);
+uint32_t spapr_xive_nr_ends(const SpaprXive *xive);
 
 /*
  * KVM XIVE device helpers
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
index 60e0d5769dcc..f473ad9cba47 100644
--- a/hw/intc/spapr_xive.c
+++ b/hw/intc/spapr_xive.c
@@ -192,7 +192,7 @@  void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
             uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
             XiveEND *end;
 
-            assert(end_idx < xive->nr_ends);
+            assert(end_idx < spapr_xive_nr_ends(xive));
             end = &xive->endt[end_idx];
 
             if (xive_end_is_valid(end)) {
@@ -270,7 +270,7 @@  static void spapr_xive_reset(void *dev)
     }
 
     /* Clear all ENDs */
-    for (i = 0; i < xive->nr_ends; i++) {
+    for (i = 0; i < spapr_xive_nr_ends(xive); i++) {
         spapr_xive_end_reset(&xive->endt[i]);
     }
 }
@@ -288,6 +288,11 @@  static void spapr_xive_instance_init(Object *obj)
     xive->fd = -1;
 }
 
+uint32_t spapr_xive_nr_ends(const SpaprXive *xive)
+{
+    return xive->nr_ends;
+}
+
 static void spapr_xive_realize(DeviceState *dev, Error **errp)
 {
     SpaprXive *xive = SPAPR_XIVE(dev);
@@ -336,7 +341,7 @@  static void spapr_xive_realize(DeviceState *dev, Error **errp)
      * Allocate the routing tables
      */
     xive->eat = g_new0(XiveEAS, xive->nr_irqs);
-    xive->endt = g_new0(XiveEND, xive->nr_ends);
+    xive->endt = g_new0(XiveEND, spapr_xive_nr_ends(xive));
 
     xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
                            xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
@@ -375,7 +380,7 @@  static int spapr_xive_get_end(XiveRouter *xrtr,
 {
     SpaprXive *xive = SPAPR_XIVE(xrtr);
 
-    if (end_idx >= xive->nr_ends) {
+    if (end_idx >= spapr_xive_nr_ends(xive)) {
         return -1;
     }
 
@@ -389,7 +394,7 @@  static int spapr_xive_write_end(XiveRouter *xrtr, uint8_t end_blk,
 {
     SpaprXive *xive = SPAPR_XIVE(xrtr);
 
-    if (end_idx >= xive->nr_ends) {
+    if (end_idx >= spapr_xive_nr_ends(xive)) {
         return -1;
     }
 
@@ -1138,7 +1143,7 @@  static target_ulong h_int_get_source_config(PowerPCCPU *cpu,
     /* EAS_END_BLOCK is unused on sPAPR */
     end_idx = xive_get_field64(EAS_END_INDEX, eas.w);
 
-    assert(end_idx < xive->nr_ends);
+    assert(end_idx < spapr_xive_nr_ends(xive));
     end = &xive->endt[end_idx];
 
     nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end->w6);
@@ -1216,7 +1221,7 @@  static target_ulong h_int_get_queue_info(PowerPCCPU *cpu,
         return H_P2;
     }
 
-    assert(end_idx < xive->nr_ends);
+    assert(end_idx < spapr_xive_nr_ends(xive));
     end = &xive->endt[end_idx];
 
     args[0] = xive->end_base + (1ull << (end_xsrc->esb_shift + 1)) * end_idx;
@@ -1304,7 +1309,7 @@  static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
         return H_P2;
     }
 
-    assert(end_idx < xive->nr_ends);
+    assert(end_idx < spapr_xive_nr_ends(xive));
     memcpy(&end, &xive->endt[end_idx], sizeof(XiveEND));
 
     switch (qsize) {
@@ -1470,7 +1475,7 @@  static target_ulong h_int_get_queue_config(PowerPCCPU *cpu,
         return H_P2;
     }
 
-    assert(end_idx < xive->nr_ends);
+    assert(end_idx < spapr_xive_nr_ends(xive));
     end = &xive->endt[end_idx];
 
     args[0] = 0;
diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index 66bf4c06fe55..1566016f0e28 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -531,7 +531,7 @@  static int kvmppc_xive_get_queues(SpaprXive *xive, Error **errp)
     int i;
     int ret;
 
-    for (i = 0; i < xive->nr_ends; i++) {
+    for (i = 0; i < spapr_xive_nr_ends(xive); i++) {
         if (!xive_end_is_valid(&xive->endt[i])) {
             continue;
         }
@@ -701,7 +701,7 @@  int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
     assert(xive->fd != -1);
 
     /* Restore the ENDT first. The targetting depends on it. */
-    for (i = 0; i < xive->nr_ends; i++) {
+    for (i = 0; i < spapr_xive_nr_ends(xive); i++) {
         if (!xive_end_is_valid(&xive->endt[i])) {
             continue;
         }