diff mbox series

[v2,3/4] spapr/xive: Allocate IPIs independently from the other sources

Message ID 20200820134547.2355743-4-clg@kaod.org
State New
Headers show
Series spapr/xive: Allocate vCPU IPIs from the vCPU contexts | expand

Commit Message

Cédric Le Goater Aug. 20, 2020, 1:45 p.m. UTC
The vCPU IPIs are now allocated in kvmppc_xive_cpu_connect() when the
vCPU connects to the KVM device and not when all the sources are reset
in kvmppc_xive_source_reset()

This requires extra care for hotplug vCPUs and VM restore.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 hw/intc/spapr_xive_kvm.c | 47 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 42 insertions(+), 5 deletions(-)

Comments

Cédric Le Goater Nov. 5, 2020, 8:37 a.m. UTC | #1
On 8/20/20 3:45 PM, Cédric Le Goater wrote:
> The vCPU IPIs are now allocated in kvmppc_xive_cpu_connect() when the
> vCPU connects to the KVM device and not when all the sources are reset
> in kvmppc_xive_source_reset()

This patch is introducing a regression when vsmt is in used.

  https://bugs.launchpad.net/qemu/+bug/1900241

when the OS boots, H_INT_SET_SOURCE_CONFIG fails with EINVAL, which 
should mean that the IPI is not created at the host/KVM level.

> This requires extra care for hotplug vCPUs and VM restore.
>
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  hw/intc/spapr_xive_kvm.c | 47 +++++++++++++++++++++++++++++++++++-----
>  1 file changed, 42 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> index 4ea687c93ecd..f838c208b559 100644
> --- a/hw/intc/spapr_xive_kvm.c
> +++ b/hw/intc/spapr_xive_kvm.c
> @@ -146,6 +146,15 @@ int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
>      return s.ret;
>  }
>  
> +static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, Error **errp)
> +{
> +    unsigned long ipi = kvm_arch_vcpu_id(cs);

( I am wondering if this is the correct id to use ? )

> +    uint64_t state = 0;
> +
> +    return kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, ipi,
> +                              &state, true, errp);
> +}
> +
>  int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
>  {
>      ERRP_GUARD();
> @@ -175,6 +184,12 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
>          return ret;
>      }
>  
> +    /* Create/reset the vCPU IPI */
> +    ret = kvmppc_xive_reset_ipi(xive, tctx->cs, errp);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
>      kvm_cpu_enable(tctx->cs);
>      return 0;
>  }
> @@ -260,6 +275,12 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
>  
>      assert(xive->fd != -1);
>  
> +    /*
> +     * The vCPU IPIs are now allocated in kvmppc_xive_cpu_connect()
> +     * and not with all sources in kvmppc_xive_source_reset()
> +     */
> +    assert(srcno >= SPAPR_XIRQ_BASE);
> +
>      if (xive_source_irq_is_lsi(xsrc, srcno)) {
>          state |= KVM_XIVE_LEVEL_SENSITIVE;
>          if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
> @@ -271,12 +292,28 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
>                               true, errp);
>  }
>  
> +/*
> + * To be valid, a source must have been claimed by the machine (valid
> + * entry in the EAS table) and if it is a vCPU IPI, the vCPU should
> + * have been enabled, which means the IPI has been allocated in
> + * kvmppc_xive_cpu_connect().
> + */
> +static bool xive_source_is_valid(SpaprXive *xive, int i)
> +{
> +    return xive_eas_is_valid(&xive->eat[i]) &&
> +        (i >= SPAPR_XIRQ_BASE || kvm_cpu_is_enabled(i));
> +}
> +
>  static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
>  {
>      SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
>      int i;
>  
> -    for (i = 0; i < xsrc->nr_irqs; i++) {
> +    /*
> +     * Skip the vCPU IPIs. These are created/reset when the vCPUs are
> +     * connected in kvmppc_xive_cpu_connect()
> +     */
> +    for (i = SPAPR_XIRQ_BASE; i < xsrc->nr_irqs; i++) {

This change skips the range [ 0x0 ... 0x1000 ] and relies on the presenter
to create the vCPU IPIs at the KVM level. But spapr_irq_init() could have 
claimed more in : 

        /* Enable the CPU IPIs */
        for (i = 0; i < nr_servers; ++i) {
            SpaprInterruptControllerClass *sicc
                = SPAPR_INTC_GET_CLASS(spapr->xive);

            if (sicc->claim_irq(SPAPR_INTC(spapr->xive), SPAPR_IRQ_IPI + i,
                                false, errp) < 0) {
                return;
            }
        }

I think this is what is happening with vsmt. However, I don't know how to
fix it :/

Thanks,

C.

>          int ret;
>  
>          if (!xive_eas_is_valid(&xive->eat[i])) {
> @@ -370,7 +407,7 @@ static void kvmppc_xive_source_get_state(XiveSource *xsrc)
>      for (i = 0; i < xsrc->nr_irqs; i++) {
>          uint8_t pq;
>  
> -        if (!xive_eas_is_valid(&xive->eat[i])) {
> +        if (!xive_source_is_valid(xive, i)) {
>              continue;
>          }
>  
> @@ -553,7 +590,7 @@ static void kvmppc_xive_change_state_handler(void *opaque, int running,
>              uint8_t pq;
>              uint8_t old_pq;
>  
> -            if (!xive_eas_is_valid(&xive->eat[i])) {
> +            if (!xive_source_is_valid(xive, i)) {
>                  continue;
>              }
>  
> @@ -581,7 +618,7 @@ static void kvmppc_xive_change_state_handler(void *opaque, int running,
>      for (i = 0; i < xsrc->nr_irqs; i++) {
>          uint8_t pq;
>  
> -        if (!xive_eas_is_valid(&xive->eat[i])) {
> +        if (!xive_source_is_valid(xive, i)) {
>              continue;
>          }
>  
> @@ -696,7 +733,7 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
>  
>      /* Restore the EAT */
>      for (i = 0; i < xive->nr_irqs; i++) {
> -        if (!xive_eas_is_valid(&xive->eat[i])) {
> +        if (!xive_source_is_valid(xive, i)) {
>              continue;
>          }
>  
>
Greg Kurz Nov. 5, 2020, 11:39 a.m. UTC | #2
On Thu, 5 Nov 2020 09:37:27 +0100
Cédric Le Goater <clg@kaod.org> wrote:

> On 8/20/20 3:45 PM, Cédric Le Goater wrote:
> > The vCPU IPIs are now allocated in kvmppc_xive_cpu_connect() when the
> > vCPU connects to the KVM device and not when all the sources are reset
> > in kvmppc_xive_source_reset()
> 
> This patch is introducing a regression when vsmt is in used.
> 

Well, it isn't exactly when vsmt is used, it is when vsmt is set
to a different value than the one which is passed to -smp threads,
otherwise you always get consecutive vcpu ids.

>   https://bugs.launchpad.net/qemu/+bug/1900241
> 

In this report we have threads=1, so depending on vsmt this gives
the following vcpu ids:

-M vsmt=1 -smp 8,cores=8,threads=1
kvmppc_xive_reset_ipi_on_cpu: cpu_index=0 vcpu_id=0
kvmppc_xive_reset_ipi_on_cpu: cpu_index=1 vcpu_id=1
kvmppc_xive_reset_ipi_on_cpu: cpu_index=2 vcpu_id=2
kvmppc_xive_reset_ipi_on_cpu: cpu_index=3 vcpu_id=3
kvmppc_xive_reset_ipi_on_cpu: cpu_index=4 vcpu_id=4
kvmppc_xive_reset_ipi_on_cpu: cpu_index=5 vcpu_id=5
kvmppc_xive_reset_ipi_on_cpu: cpu_index=6 vcpu_id=6
kvmppc_xive_reset_ipi_on_cpu: cpu_index=7 vcpu_id=7

-M vsmt=2 -smp 8,cores=8,threads=1
kvmppc_xive_reset_ipi_on_cpu: cpu_index=0 vcpu_id=0
kvmppc_xive_reset_ipi_on_cpu: cpu_index=1 vcpu_id=2
kvmppc_xive_reset_ipi_on_cpu: cpu_index=2 vcpu_id=4
kvmppc_xive_reset_ipi_on_cpu: cpu_index=3 vcpu_id=6
kvmppc_xive_reset_ipi_on_cpu: cpu_index=4 vcpu_id=8
kvmppc_xive_reset_ipi_on_cpu: cpu_index=5 vcpu_id=10
kvmppc_xive_reset_ipi_on_cpu: cpu_index=6 vcpu_id=12
kvmppc_xive_reset_ipi_on_cpu: cpu_index=7 vcpu_id=14

-M vsmt=4 -smp 8,cores=8,threads=1 
kvmppc_xive_reset_ipi_on_cpu: cpu_index=0 vcpu_id=0
kvmppc_xive_reset_ipi_on_cpu: cpu_index=1 vcpu_id=4
kvmppc_xive_reset_ipi_on_cpu: cpu_index=2 vcpu_id=8
kvmppc_xive_reset_ipi_on_cpu: cpu_index=3 vcpu_id=12
kvmppc_xive_reset_ipi_on_cpu: cpu_index=4 vcpu_id=16
kvmppc_xive_reset_ipi_on_cpu: cpu_index=5 vcpu_id=20
kvmppc_xive_reset_ipi_on_cpu: cpu_index=6 vcpu_id=24
kvmppc_xive_reset_ipi_on_cpu: cpu_index=7 vcpu_id=28

-M vsmt=8 -smp 8,cores=8,threads=1 
kvmppc_xive_reset_ipi_on_cpu: cpu_index=0 vcpu_id=0
kvmppc_xive_reset_ipi_on_cpu: cpu_index=1 vcpu_id=8
kvmppc_xive_reset_ipi_on_cpu: cpu_index=2 vcpu_id=16
kvmppc_xive_reset_ipi_on_cpu: cpu_index=3 vcpu_id=24
kvmppc_xive_reset_ipi_on_cpu: cpu_index=4 vcpu_id=32
kvmppc_xive_reset_ipi_on_cpu: cpu_index=5 vcpu_id=40
kvmppc_xive_reset_ipi_on_cpu: cpu_index=6 vcpu_id=48
kvmppc_xive_reset_ipi_on_cpu: cpu_index=7 vcpu_id=56

> when the OS boots, H_INT_SET_SOURCE_CONFIG fails with EINVAL, which 
> should mean that the IPI is not created at the host/KVM level.
> 

[...]

> > +static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, Error **errp)
> > +{
> > +    unsigned long ipi = kvm_arch_vcpu_id(cs);
> 
> ( I am wondering if this is the correct id to use ? )
> 

Setting the ipi to the vcpu id seems to assume that the vcpu ids are
consecutive, which is definitely not the case when vsmt != threads
as explained above.

Passing cs->cpu_index would provide consecutive ids but I'm not
sure this is a correct fix. I gave a try : all the vCPUs get
online in the guest as expected but something goes wrong when
terminating QEMU:

[ 5557.599881] WARNING: CPU: 40 PID: 59101 at arch/powerpc/kvm/book3s_xive_native.c:259 xive_native_esb_fault+0xe4/0x240 [kvm]
[ 5557.599897] Modules linked in: xt_CHECKSUM ipt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 nft_compat nft_counter nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables nfnetlink tun bridge stp llc nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache kvm_hv kvm i2c_dev sunrpc ext4 mbcache jbd2 xts vmx_crypto ofpart ipmi_powernv ipmi_devintf powernv_flash ipmi_msghandler mtd ibmpowernv opal_prd at24 uio_pdrv_genirq uio ip_tables xfs libcrc32c sd_mod sg ast i2c_algo_bit drm_vram_helper drm_ttm_helper ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm ahci libahci tg3 libata drm_panel_orientation_quirks dm_mirror dm_region_hash dm_log dm_mod
[ 5557.600010] CPU: 40 PID: 59101 Comm: qemu-system-ppc Kdump: loaded Tainted: G        W        --------- -  - 4.18.0-240.el8.ppc64le #1
[ 5557.600041] NIP:  c00800000e949fac LR: c00000000044b164 CTR: c00800000e949ec8
[ 5557.600060] REGS: c000001f69617840 TRAP: 0700   Tainted: G        W        --------- -  -  (4.18.0-240.el8.ppc64le)
[ 5557.600089] MSR:  9000000000029033 <SF,HV,EE,ME,IR,DR,RI,LE>  CR: 44044282  XER: 00000000
[ 5557.600110] CFAR: c00000000044b160 IRQMASK: 0 
[ 5557.600110] GPR00: c00000000044b164 c000001f69617ac0 c00800000e96e000 c000001f69617c10 
[ 5557.600110] GPR04: 05faa2b21e000080 0000000000000000 0000000000000005 ffffffffffffffff 
[ 5557.600110] GPR08: 0000000000000000 0000000000000001 0000000000000000 0000000000000001 
[ 5557.600110] GPR12: c00800000e949ec8 c000001ffffd3400 0000000000000000 0000000000000000 
[ 5557.600110] GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 
[ 5557.600110] GPR20: 0000000000000000 0000000000000000 c000001f5c065160 c000000001c76f90 
[ 5557.600110] GPR24: c000001f06f20000 c000001f5c065100 0000000000000008 c000001f0eb98c78 
[ 5557.600110] GPR28: c000001dcab40000 c000001dcab403d8 c000001f69617c10 0000000000000011 
[ 5557.600255] NIP [c00800000e949fac] xive_native_esb_fault+0xe4/0x240 [kvm]
[ 5557.600274] LR [c00000000044b164] __do_fault+0x64/0x220
[ 5557.600298] Call Trace:
[ 5557.600302] [c000001f69617ac0] [0000000137a5dc20] 0x137a5dc20 (unreliable)
[ 5557.600320] [c000001f69617b50] [c00000000044b164] __do_fault+0x64/0x220
[ 5557.600337] [c000001f69617b90] [c000000000453838] do_fault+0x218/0x930
[ 5557.600355] [c000001f69617bf0] [c000000000456f50] __handle_mm_fault+0x350/0xdf0
[ 5557.600373] [c000001f69617cd0] [c000000000457b1c] handle_mm_fault+0x12c/0x310
[ 5557.600393] [c000001f69617d10] [c00000000007ef44] __do_page_fault+0x264/0xbb0
[ 5557.600411] [c000001f69617df0] [c00000000007f8c8] do_page_fault+0x38/0xd0
[ 5557.600429] [c000001f69617e30] [c00000000000a714] handle_page_fault+0x18/0x38
[ 5557.600438] Instruction dump:
[ 5557.600444] 40c2fff0 7c2004ac 2fa90000 409e0118 73e90001 41820080 e8bd0008 7c2004ac 
[ 5557.600455] 7ca90074 39400000 915c0000 7929d182 <0b090000> 2fa50000 419e0080 e89e0018 
[ 5557.600485] ---[ end trace 66c6ff034c53f64f ]---
[ 5557.600509] xive-kvm: xive_native_esb_fault: accessing invalid ESB page for source 8 !

So it looks like something needs to be done in the XIVE KVM device anyway.

[...]

> >  static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
> >  {
> >      SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
> >      int i;
> >  
> > -    for (i = 0; i < xsrc->nr_irqs; i++) {
> > +    /*
> > +     * Skip the vCPU IPIs. These are created/reset when the vCPUs are
> > +     * connected in kvmppc_xive_cpu_connect()
> > +     */
> > +    for (i = SPAPR_XIRQ_BASE; i < xsrc->nr_irqs; i++) {
> 
> This change skips the range [ 0x0 ... 0x1000 ] and relies on the presenter
> to create the vCPU IPIs at the KVM level. But spapr_irq_init() could have 
> claimed more in : 
> 
>         /* Enable the CPU IPIs */
>         for (i = 0; i < nr_servers; ++i) {
>             SpaprInterruptControllerClass *sicc
>                 = SPAPR_INTC_GET_CLASS(spapr->xive);
> 
>             if (sicc->claim_irq(SPAPR_INTC(spapr->xive), SPAPR_IRQ_IPI + i,
>                                 false, errp) < 0) {
>                 return;
>             }
>         }
> 

This doesn't reach the XIVE KVM device when running in dual mode because
it doesn't exist yet.

> I think this is what is happening with vsmt. However, I don't know how to
> fix it :/
> 
> Thanks,
> 
> C.
>
diff mbox series

Patch

diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index 4ea687c93ecd..f838c208b559 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -146,6 +146,15 @@  int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
     return s.ret;
 }
 
+static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, Error **errp)
+{
+    unsigned long ipi = kvm_arch_vcpu_id(cs);
+    uint64_t state = 0;
+
+    return kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, ipi,
+                              &state, true, errp);
+}
+
 int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
 {
     ERRP_GUARD();
@@ -175,6 +184,12 @@  int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
         return ret;
     }
 
+    /* Create/reset the vCPU IPI */
+    ret = kvmppc_xive_reset_ipi(xive, tctx->cs, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
     kvm_cpu_enable(tctx->cs);
     return 0;
 }
@@ -260,6 +275,12 @@  int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
 
     assert(xive->fd != -1);
 
+    /*
+     * The vCPU IPIs are now allocated in kvmppc_xive_cpu_connect()
+     * and not with all sources in kvmppc_xive_source_reset()
+     */
+    assert(srcno >= SPAPR_XIRQ_BASE);
+
     if (xive_source_irq_is_lsi(xsrc, srcno)) {
         state |= KVM_XIVE_LEVEL_SENSITIVE;
         if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
@@ -271,12 +292,28 @@  int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
                              true, errp);
 }
 
+/*
+ * To be valid, a source must have been claimed by the machine (valid
+ * entry in the EAS table) and if it is a vCPU IPI, the vCPU should
+ * have been enabled, which means the IPI has been allocated in
+ * kvmppc_xive_cpu_connect().
+ */
+static bool xive_source_is_valid(SpaprXive *xive, int i)
+{
+    return xive_eas_is_valid(&xive->eat[i]) &&
+        (i >= SPAPR_XIRQ_BASE || kvm_cpu_is_enabled(i));
+}
+
 static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
 {
     SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
     int i;
 
-    for (i = 0; i < xsrc->nr_irqs; i++) {
+    /*
+     * Skip the vCPU IPIs. These are created/reset when the vCPUs are
+     * connected in kvmppc_xive_cpu_connect()
+     */
+    for (i = SPAPR_XIRQ_BASE; i < xsrc->nr_irqs; i++) {
         int ret;
 
         if (!xive_eas_is_valid(&xive->eat[i])) {
@@ -370,7 +407,7 @@  static void kvmppc_xive_source_get_state(XiveSource *xsrc)
     for (i = 0; i < xsrc->nr_irqs; i++) {
         uint8_t pq;
 
-        if (!xive_eas_is_valid(&xive->eat[i])) {
+        if (!xive_source_is_valid(xive, i)) {
             continue;
         }
 
@@ -553,7 +590,7 @@  static void kvmppc_xive_change_state_handler(void *opaque, int running,
             uint8_t pq;
             uint8_t old_pq;
 
-            if (!xive_eas_is_valid(&xive->eat[i])) {
+            if (!xive_source_is_valid(xive, i)) {
                 continue;
             }
 
@@ -581,7 +618,7 @@  static void kvmppc_xive_change_state_handler(void *opaque, int running,
     for (i = 0; i < xsrc->nr_irqs; i++) {
         uint8_t pq;
 
-        if (!xive_eas_is_valid(&xive->eat[i])) {
+        if (!xive_source_is_valid(xive, i)) {
             continue;
         }
 
@@ -696,7 +733,7 @@  int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
 
     /* Restore the EAT */
     for (i = 0; i < xive->nr_irqs; i++) {
-        if (!xive_eas_is_valid(&xive->eat[i])) {
+        if (!xive_source_is_valid(xive, i)) {
             continue;
         }