diff mbox series

[6/6] xive: Don't use CPU_FOREACH() to perform CAM line matching

Message ID 157184234731.3053790.18369348907304339634.stgit@bahia.lan
State New
Headers show
Series ppc: Reparent the interrupt presenter | expand

Commit Message

Greg Kurz Oct. 23, 2019, 2:52 p.m. UTC
Now that the TCTX objects are children of the XIVE router, stop
using CPU_FOREACH() when looking for a matching VCPU target.

Signed-off-by: Greg Kurz <groug@kaod.org>
---
 hw/intc/xive.c |  100 +++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 62 insertions(+), 38 deletions(-)

Comments

David Gibson Oct. 24, 2019, 3:05 a.m. UTC | #1
On Wed, Oct 23, 2019 at 04:52:27PM +0200, Greg Kurz wrote:
> Now that the TCTX objects are children of the XIVE router, stop
> using CPU_FOREACH() when looking for a matching VCPU target.
> 
> Signed-off-by: Greg Kurz <groug@kaod.org>
> ---
>  hw/intc/xive.c |  100 +++++++++++++++++++++++++++++++++++---------------------
>  1 file changed, 62 insertions(+), 38 deletions(-)
> 
> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> index 40ce43152456..ec5e7d0ee39a 100644
> --- a/hw/intc/xive.c
> +++ b/hw/intc/xive.c
> @@ -1403,55 +1403,79 @@ typedef struct XiveTCTXMatch {
>      uint8_t ring;
>  } XiveTCTXMatch;
>  
> -static bool xive_presenter_match(XiveRouter *xrtr, uint8_t format,
> -                                 uint8_t nvt_blk, uint32_t nvt_idx,
> -                                 bool cam_ignore, uint8_t priority,
> -                                 uint32_t logic_serv, XiveTCTXMatch *match)
> +typedef struct XivePresenterMatch {
> +    uint8_t format;
> +    uint8_t nvt_blk;
> +    uint32_t nvt_idx;
> +    bool cam_ignore;
> +    uint8_t priority;
> +    uint32_t logic_serv;
> +    XiveTCTXMatch *match;
> +    int count;
> +} XivePresenterMatch;
> +
> +static int do_xive_presenter_match(Object *child, void *opaque)
>  {
> -    CPUState *cs;
> +    XiveTCTX *tctx = XIVE_TCTX(child);
> +    XivePresenterMatch *xpm = opaque;
> +    int ring;
>  
>      /*
>       * TODO (PowerNV): handle chip_id overwrite of block field for
>       * hardwired CAM compares
>       */
>  
> -    CPU_FOREACH(cs) {
> -        XiveTCTX *tctx = xive_router_get_tctx(xrtr, cs);
> -        int ring;
> +    /*
> +     * HW checks that the CPU is enabled in the Physical Thread
> +     * Enable Register (PTER).
> +     */
>  
> -        /*
> -         * Skip partially initialized vCPUs. This can happen when
> -         * vCPUs are hotplugged.
> -         */
> -        if (!tctx) {
> -            continue;
> +    /*
> +     * Check the thread context CAM lines and record matches. We
> +     * will handle CPU exception delivery later
> +     */
> +    ring = xive_presenter_tctx_match(tctx, xpm->format, xpm->nvt_blk,
> +                                     xpm->nvt_idx, xpm->cam_ignore,
> +                                     xpm->logic_serv);
> +
> +    /*
> +     * Save the context and follow on to catch duplicates, that we
> +     * don't support yet.
> +     */
> +    if (ring != -1) {
> +        if (xpm->match->tctx) {
> +            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a thread "
> +                          "context NVT %x/%x\n", xpm->nvt_blk, xpm->nvt_idx);
> +            return -1;
>          }
>  
> -        /*
> -         * HW checks that the CPU is enabled in the Physical Thread
> -         * Enable Register (PTER).
> -         */
> +        xpm->match->ring = ring;
> +        xpm->match->tctx = tctx;
> +        xpm->count++;
> +    }
>  
> -        /*
> -         * Check the thread context CAM lines and record matches. We
> -         * will handle CPU exception delivery later
> -         */
> -        ring = xive_presenter_tctx_match(tctx, format, nvt_blk, nvt_idx,
> -                                         cam_ignore, logic_serv);
> -        /*
> -         * Save the context and follow on to catch duplicates, that we
> -         * don't support yet.
> -         */
> -        if (ring != -1) {
> -            if (match->tctx) {
> -                qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a thread "
> -                              "context NVT %x/%x\n", nvt_blk, nvt_idx);
> -                return false;
> -            }
> -
> -            match->ring = ring;
> -            match->tctx = tctx;
> -        }
> +    return 0;
> +}
> +
> +static bool xive_presenter_match(XiveRouter *xrtr, uint8_t format,
> +                                 uint8_t nvt_blk, uint32_t nvt_idx,
> +                                 bool cam_ignore, uint8_t priority,
> +                                 uint32_t logic_serv, XiveTCTXMatch *match)
> +{
> +    XivePresenterMatch xpm = {
> +        .format     = format,
> +        .nvt_blk    = nvt_blk,
> +        .nvt_idx    = nvt_idx,
> +        .cam_ignore = cam_ignore,
> +        .priority   = priority,
> +        .logic_serv = logic_serv,
> +        .match      = match,
> +        .count      = 0,
> +    };
> +
> +    if (object_child_foreach_type(OBJECT(xrtr), TYPE_XIVE_TCTX,
> +                                  do_xive_presenter_match, &xpm) < 0) {
> +        return false;

Hrm... xive_presenter_match() is potentially a pretty hot path, it's
called on every interrupt delivery - especially since we don't have a
usable KVM irq chip for Boston machines.  I'm concerned that using
something as heavyweight as object_child_foreach() might have a
noticeable performance impact.

>      }
>  
>      if (!match->tctx) {
>
Greg Kurz Oct. 24, 2019, 12:33 p.m. UTC | #2
On Thu, 24 Oct 2019 14:05:36 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Wed, Oct 23, 2019 at 04:52:27PM +0200, Greg Kurz wrote:
> > Now that the TCTX objects are children of the XIVE router, stop
> > using CPU_FOREACH() when looking for a matching VCPU target.
> > 
> > Signed-off-by: Greg Kurz <groug@kaod.org>
> > ---
> >  hw/intc/xive.c |  100 +++++++++++++++++++++++++++++++++++---------------------
> >  1 file changed, 62 insertions(+), 38 deletions(-)
> > 
> > diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> > index 40ce43152456..ec5e7d0ee39a 100644
> > --- a/hw/intc/xive.c
> > +++ b/hw/intc/xive.c
> > @@ -1403,55 +1403,79 @@ typedef struct XiveTCTXMatch {
> >      uint8_t ring;
> >  } XiveTCTXMatch;
> >  
> > -static bool xive_presenter_match(XiveRouter *xrtr, uint8_t format,
> > -                                 uint8_t nvt_blk, uint32_t nvt_idx,
> > -                                 bool cam_ignore, uint8_t priority,
> > -                                 uint32_t logic_serv, XiveTCTXMatch *match)
> > +typedef struct XivePresenterMatch {
> > +    uint8_t format;
> > +    uint8_t nvt_blk;
> > +    uint32_t nvt_idx;
> > +    bool cam_ignore;
> > +    uint8_t priority;
> > +    uint32_t logic_serv;
> > +    XiveTCTXMatch *match;
> > +    int count;
> > +} XivePresenterMatch;
> > +
> > +static int do_xive_presenter_match(Object *child, void *opaque)
> >  {
> > -    CPUState *cs;
> > +    XiveTCTX *tctx = XIVE_TCTX(child);
> > +    XivePresenterMatch *xpm = opaque;
> > +    int ring;
> >  
> >      /*
> >       * TODO (PowerNV): handle chip_id overwrite of block field for
> >       * hardwired CAM compares
> >       */
> >  
> > -    CPU_FOREACH(cs) {
> > -        XiveTCTX *tctx = xive_router_get_tctx(xrtr, cs);
> > -        int ring;
> > +    /*
> > +     * HW checks that the CPU is enabled in the Physical Thread
> > +     * Enable Register (PTER).
> > +     */
> >  
> > -        /*
> > -         * Skip partially initialized vCPUs. This can happen when
> > -         * vCPUs are hotplugged.
> > -         */
> > -        if (!tctx) {
> > -            continue;
> > +    /*
> > +     * Check the thread context CAM lines and record matches. We
> > +     * will handle CPU exception delivery later
> > +     */
> > +    ring = xive_presenter_tctx_match(tctx, xpm->format, xpm->nvt_blk,
> > +                                     xpm->nvt_idx, xpm->cam_ignore,
> > +                                     xpm->logic_serv);
> > +
> > +    /*
> > +     * Save the context and follow on to catch duplicates, that we
> > +     * don't support yet.
> > +     */
> > +    if (ring != -1) {
> > +        if (xpm->match->tctx) {
> > +            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a thread "
> > +                          "context NVT %x/%x\n", xpm->nvt_blk, xpm->nvt_idx);
> > +            return -1;
> >          }
> >  
> > -        /*
> > -         * HW checks that the CPU is enabled in the Physical Thread
> > -         * Enable Register (PTER).
> > -         */
> > +        xpm->match->ring = ring;
> > +        xpm->match->tctx = tctx;
> > +        xpm->count++;
> > +    }
> >  
> > -        /*
> > -         * Check the thread context CAM lines and record matches. We
> > -         * will handle CPU exception delivery later
> > -         */
> > -        ring = xive_presenter_tctx_match(tctx, format, nvt_blk, nvt_idx,
> > -                                         cam_ignore, logic_serv);
> > -        /*
> > -         * Save the context and follow on to catch duplicates, that we
> > -         * don't support yet.
> > -         */
> > -        if (ring != -1) {
> > -            if (match->tctx) {
> > -                qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a thread "
> > -                              "context NVT %x/%x\n", nvt_blk, nvt_idx);
> > -                return false;
> > -            }
> > -
> > -            match->ring = ring;
> > -            match->tctx = tctx;
> > -        }
> > +    return 0;
> > +}
> > +
> > +static bool xive_presenter_match(XiveRouter *xrtr, uint8_t format,
> > +                                 uint8_t nvt_blk, uint32_t nvt_idx,
> > +                                 bool cam_ignore, uint8_t priority,
> > +                                 uint32_t logic_serv, XiveTCTXMatch *match)
> > +{
> > +    XivePresenterMatch xpm = {
> > +        .format     = format,
> > +        .nvt_blk    = nvt_blk,
> > +        .nvt_idx    = nvt_idx,
> > +        .cam_ignore = cam_ignore,
> > +        .priority   = priority,
> > +        .logic_serv = logic_serv,
> > +        .match      = match,
> > +        .count      = 0,
> > +    };
> > +
> > +    if (object_child_foreach_type(OBJECT(xrtr), TYPE_XIVE_TCTX,
> > +                                  do_xive_presenter_match, &xpm) < 0) {
> > +        return false;
> 
> Hrm... xive_presenter_match() is potentially a pretty hot path, it's
> called on every interrupt delivery - especially since we don't have a
> usable KVM irq chip for Boston machines.  I'm concerned that using
> something as heavyweight as object_child_foreach() might have a
> noticeable performance impact.
> 

Well, the XiveRouter _only_ has 3 extra children (XiveSource,
XiveENDSource and TIMA) but indeed object_child_foreach() might
cost more than QTAILQ_FOREACH_RCU(). A possible option could be
to have a QTAILQ of presenters under the machine for sPAPR or
under the chip for PNV, in order to avoid the need to filter out
VCPUs that we don't want to consider, ie. partly realized with
sPAPR and from another chip with PNV.

But as said in another mail, the safer for 4.2 is probably to
fix the CPU_FOREACH() users, which is already the case here for
sPAPR.

> >      }
> >  
> >      if (!match->tctx) {
> > 
>
David Gibson Oct. 27, 2019, 5:03 p.m. UTC | #3
On Thu, Oct 24, 2019 at 02:33:27PM +0200, Greg Kurz wrote:
> On Thu, 24 Oct 2019 14:05:36 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > On Wed, Oct 23, 2019 at 04:52:27PM +0200, Greg Kurz wrote:
> > > Now that the TCTX objects are children of the XIVE router, stop
> > > using CPU_FOREACH() when looking for a matching VCPU target.
> > > 
> > > Signed-off-by: Greg Kurz <groug@kaod.org>
> > > ---
> > >  hw/intc/xive.c |  100 +++++++++++++++++++++++++++++++++++---------------------
> > >  1 file changed, 62 insertions(+), 38 deletions(-)
> > > 
> > > diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> > > index 40ce43152456..ec5e7d0ee39a 100644
> > > --- a/hw/intc/xive.c
> > > +++ b/hw/intc/xive.c
> > > @@ -1403,55 +1403,79 @@ typedef struct XiveTCTXMatch {
> > >      uint8_t ring;
> > >  } XiveTCTXMatch;
> > >  
> > > -static bool xive_presenter_match(XiveRouter *xrtr, uint8_t format,
> > > -                                 uint8_t nvt_blk, uint32_t nvt_idx,
> > > -                                 bool cam_ignore, uint8_t priority,
> > > -                                 uint32_t logic_serv, XiveTCTXMatch *match)
> > > +typedef struct XivePresenterMatch {
> > > +    uint8_t format;
> > > +    uint8_t nvt_blk;
> > > +    uint32_t nvt_idx;
> > > +    bool cam_ignore;
> > > +    uint8_t priority;
> > > +    uint32_t logic_serv;
> > > +    XiveTCTXMatch *match;
> > > +    int count;
> > > +} XivePresenterMatch;
> > > +
> > > +static int do_xive_presenter_match(Object *child, void *opaque)
> > >  {
> > > -    CPUState *cs;
> > > +    XiveTCTX *tctx = XIVE_TCTX(child);
> > > +    XivePresenterMatch *xpm = opaque;
> > > +    int ring;
> > >  
> > >      /*
> > >       * TODO (PowerNV): handle chip_id overwrite of block field for
> > >       * hardwired CAM compares
> > >       */
> > >  
> > > -    CPU_FOREACH(cs) {
> > > -        XiveTCTX *tctx = xive_router_get_tctx(xrtr, cs);
> > > -        int ring;
> > > +    /*
> > > +     * HW checks that the CPU is enabled in the Physical Thread
> > > +     * Enable Register (PTER).
> > > +     */
> > >  
> > > -        /*
> > > -         * Skip partially initialized vCPUs. This can happen when
> > > -         * vCPUs are hotplugged.
> > > -         */
> > > -        if (!tctx) {
> > > -            continue;
> > > +    /*
> > > +     * Check the thread context CAM lines and record matches. We
> > > +     * will handle CPU exception delivery later
> > > +     */
> > > +    ring = xive_presenter_tctx_match(tctx, xpm->format, xpm->nvt_blk,
> > > +                                     xpm->nvt_idx, xpm->cam_ignore,
> > > +                                     xpm->logic_serv);
> > > +
> > > +    /*
> > > +     * Save the context and follow on to catch duplicates, that we
> > > +     * don't support yet.
> > > +     */
> > > +    if (ring != -1) {
> > > +        if (xpm->match->tctx) {
> > > +            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a thread "
> > > +                          "context NVT %x/%x\n", xpm->nvt_blk, xpm->nvt_idx);
> > > +            return -1;
> > >          }
> > >  
> > > -        /*
> > > -         * HW checks that the CPU is enabled in the Physical Thread
> > > -         * Enable Register (PTER).
> > > -         */
> > > +        xpm->match->ring = ring;
> > > +        xpm->match->tctx = tctx;
> > > +        xpm->count++;
> > > +    }
> > >  
> > > -        /*
> > > -         * Check the thread context CAM lines and record matches. We
> > > -         * will handle CPU exception delivery later
> > > -         */
> > > -        ring = xive_presenter_tctx_match(tctx, format, nvt_blk, nvt_idx,
> > > -                                         cam_ignore, logic_serv);
> > > -        /*
> > > -         * Save the context and follow on to catch duplicates, that we
> > > -         * don't support yet.
> > > -         */
> > > -        if (ring != -1) {
> > > -            if (match->tctx) {
> > > -                qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a thread "
> > > -                              "context NVT %x/%x\n", nvt_blk, nvt_idx);
> > > -                return false;
> > > -            }
> > > -
> > > -            match->ring = ring;
> > > -            match->tctx = tctx;
> > > -        }
> > > +    return 0;
> > > +}
> > > +
> > > +static bool xive_presenter_match(XiveRouter *xrtr, uint8_t format,
> > > +                                 uint8_t nvt_blk, uint32_t nvt_idx,
> > > +                                 bool cam_ignore, uint8_t priority,
> > > +                                 uint32_t logic_serv, XiveTCTXMatch *match)
> > > +{
> > > +    XivePresenterMatch xpm = {
> > > +        .format     = format,
> > > +        .nvt_blk    = nvt_blk,
> > > +        .nvt_idx    = nvt_idx,
> > > +        .cam_ignore = cam_ignore,
> > > +        .priority   = priority,
> > > +        .logic_serv = logic_serv,
> > > +        .match      = match,
> > > +        .count      = 0,
> > > +    };
> > > +
> > > +    if (object_child_foreach_type(OBJECT(xrtr), TYPE_XIVE_TCTX,
> > > +                                  do_xive_presenter_match, &xpm) < 0) {
> > > +        return false;
> > 
> > Hrm... xive_presenter_match() is potentially a pretty hot path, it's
> > called on every interrupt delivery - especially since we don't have a
> > usable KVM irq chip for Boston machines.  I'm concerned that using
> > something as heavyweight as object_child_foreach() might have a
> > noticeable performance impact.
> > 
> 
> Well, the XiveRouter _only_ has 3 extra children (XiveSource,
> XiveENDSource and TIMA) but indeed object_child_foreach() might
> cost more than QTAILQ_FOREACH_RCU().

Right, it's not so much the redundant children, but whatever we have
to do to delve into the children data structure I'm concerned about
here.

> A possible option could be
> to have a QTAILQ of presenters under the machine for sPAPR or
> under the chip for PNV, in order to avoid the need to filter out
> VCPUs that we don't want to consider, ie. partly realized with
> sPAPR and from another chip with PNV.

If we can manage it, I'd actually suggest a plain old array/vector of
them, rather than a QTAILQ.

> But as said in another mail, the safer for 4.2 is probably to
> fix the CPU_FOREACH() users, which is already the case here for
> sPAPR.
> 
> > >      }
> > >  
> > >      if (!match->tctx) {
> > > 
> > 
>
diff mbox series

Patch

diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index 40ce43152456..ec5e7d0ee39a 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -1403,55 +1403,79 @@  typedef struct XiveTCTXMatch {
     uint8_t ring;
 } XiveTCTXMatch;
 
-static bool xive_presenter_match(XiveRouter *xrtr, uint8_t format,
-                                 uint8_t nvt_blk, uint32_t nvt_idx,
-                                 bool cam_ignore, uint8_t priority,
-                                 uint32_t logic_serv, XiveTCTXMatch *match)
+typedef struct XivePresenterMatch {
+    uint8_t format;
+    uint8_t nvt_blk;
+    uint32_t nvt_idx;
+    bool cam_ignore;
+    uint8_t priority;
+    uint32_t logic_serv;
+    XiveTCTXMatch *match;
+    int count;
+} XivePresenterMatch;
+
+static int do_xive_presenter_match(Object *child, void *opaque)
 {
-    CPUState *cs;
+    XiveTCTX *tctx = XIVE_TCTX(child);
+    XivePresenterMatch *xpm = opaque;
+    int ring;
 
     /*
      * TODO (PowerNV): handle chip_id overwrite of block field for
      * hardwired CAM compares
      */
 
-    CPU_FOREACH(cs) {
-        XiveTCTX *tctx = xive_router_get_tctx(xrtr, cs);
-        int ring;
+    /*
+     * HW checks that the CPU is enabled in the Physical Thread
+     * Enable Register (PTER).
+     */
 
-        /*
-         * Skip partially initialized vCPUs. This can happen when
-         * vCPUs are hotplugged.
-         */
-        if (!tctx) {
-            continue;
+    /*
+     * Check the thread context CAM lines and record matches. We
+     * will handle CPU exception delivery later
+     */
+    ring = xive_presenter_tctx_match(tctx, xpm->format, xpm->nvt_blk,
+                                     xpm->nvt_idx, xpm->cam_ignore,
+                                     xpm->logic_serv);
+
+    /*
+     * Save the context and follow on to catch duplicates, that we
+     * don't support yet.
+     */
+    if (ring != -1) {
+        if (xpm->match->tctx) {
+            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a thread "
+                          "context NVT %x/%x\n", xpm->nvt_blk, xpm->nvt_idx);
+            return -1;
         }
 
-        /*
-         * HW checks that the CPU is enabled in the Physical Thread
-         * Enable Register (PTER).
-         */
+        xpm->match->ring = ring;
+        xpm->match->tctx = tctx;
+        xpm->count++;
+    }
 
-        /*
-         * Check the thread context CAM lines and record matches. We
-         * will handle CPU exception delivery later
-         */
-        ring = xive_presenter_tctx_match(tctx, format, nvt_blk, nvt_idx,
-                                         cam_ignore, logic_serv);
-        /*
-         * Save the context and follow on to catch duplicates, that we
-         * don't support yet.
-         */
-        if (ring != -1) {
-            if (match->tctx) {
-                qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a thread "
-                              "context NVT %x/%x\n", nvt_blk, nvt_idx);
-                return false;
-            }
-
-            match->ring = ring;
-            match->tctx = tctx;
-        }
+    return 0;
+}
+
+static bool xive_presenter_match(XiveRouter *xrtr, uint8_t format,
+                                 uint8_t nvt_blk, uint32_t nvt_idx,
+                                 bool cam_ignore, uint8_t priority,
+                                 uint32_t logic_serv, XiveTCTXMatch *match)
+{
+    XivePresenterMatch xpm = {
+        .format     = format,
+        .nvt_blk    = nvt_blk,
+        .nvt_idx    = nvt_idx,
+        .cam_ignore = cam_ignore,
+        .priority   = priority,
+        .logic_serv = logic_serv,
+        .match      = match,
+        .count      = 0,
+    };
+
+    if (object_child_foreach_type(OBJECT(xrtr), TYPE_XIVE_TCTX,
+                                  do_xive_presenter_match, &xpm) < 0) {
+        return false;
     }
 
     if (!match->tctx) {