diff mbox series

[v2,04/13] spapr/xive: add state synchronization with KVM

Message ID 20190222131322.26079-5-clg@kaod.org
State New
Headers show
Series spapr: add KVM support to the XIVE interrupt mode | expand

Commit Message

Cédric Le Goater Feb. 22, 2019, 1:13 p.m. UTC
This extends the KVM XIVE device backend with 'synchronize_state'
methods used to retrieve the state from KVM. The HW state of the
sources, the KVM device and the thread interrupt contexts are
collected for the monitor usage and also migration.

These get operations rely on their KVM counterpart in the host kernel
which acts as a proxy for OPAL, the host firmware. The set operations
will be added for migration support later.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 include/hw/ppc/spapr_xive.h |  8 ++++
 include/hw/ppc/xive.h       |  1 +
 hw/intc/spapr_xive.c        | 17 ++++---
 hw/intc/spapr_xive_kvm.c    | 89 +++++++++++++++++++++++++++++++++++++
 hw/intc/xive.c              | 10 +++++
 5 files changed, 118 insertions(+), 7 deletions(-)

Comments

David Gibson Feb. 26, 2019, 12:01 a.m. UTC | #1
On Fri, Feb 22, 2019 at 02:13:13PM +0100, Cédric Le Goater wrote:
> This extends the KVM XIVE device backend with 'synchronize_state'
> methods used to retrieve the state from KVM. The HW state of the
> sources, the KVM device and the thread interrupt contexts are
> collected for the monitor usage and also migration.
> 
> These get operations rely on their KVM counterpart in the host kernel
> which acts as a proxy for OPAL, the host firmware. The set operations
> will be added for migration support later.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  include/hw/ppc/spapr_xive.h |  8 ++++
>  include/hw/ppc/xive.h       |  1 +
>  hw/intc/spapr_xive.c        | 17 ++++---
>  hw/intc/spapr_xive_kvm.c    | 89 +++++++++++++++++++++++++++++++++++++
>  hw/intc/xive.c              | 10 +++++
>  5 files changed, 118 insertions(+), 7 deletions(-)
> 
> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> index 749c6cbc2c56..ebd65e7fe36b 100644
> --- a/include/hw/ppc/spapr_xive.h
> +++ b/include/hw/ppc/spapr_xive.h
> @@ -44,6 +44,13 @@ typedef struct sPAPRXive {
>      void          *tm_mmap;
>  } sPAPRXive;
>  
> +/*
> + * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
> + * to the controller block id value. It can nevertheless be changed
> + * for testing purpose.
> + */
> +#define SPAPR_XIVE_BLOCK_ID 0x0
> +
>  bool spapr_xive_irq_claim(sPAPRXive *xive, uint32_t lisn, bool lsi);
>  bool spapr_xive_irq_free(sPAPRXive *xive, uint32_t lisn);
>  void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
> @@ -74,5 +81,6 @@ void kvmppc_xive_set_queue_config(sPAPRXive *xive, uint8_t end_blk,
>  void kvmppc_xive_get_queue_config(sPAPRXive *xive, uint8_t end_blk,
>                                   uint32_t end_idx, XiveEND *end,
>                                   Error **errp);
> +void kvmppc_xive_synchronize_state(sPAPRXive *xive, Error **errp);
>  
>  #endif /* PPC_SPAPR_XIVE_H */
> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> index 061d43fea24d..f3766fd881a2 100644
> --- a/include/hw/ppc/xive.h
> +++ b/include/hw/ppc/xive.h
> @@ -431,5 +431,6 @@ void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
>  void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp);
>  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
>  void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
>  
>  #endif /* PPC_XIVE_H */
> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> index 3db24391e31c..9f07567f4d78 100644
> --- a/hw/intc/spapr_xive.c
> +++ b/hw/intc/spapr_xive.c
> @@ -40,13 +40,6 @@
>  
>  #define SPAPR_XIVE_NVT_BASE 0x400
>  
> -/*
> - * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
> - * to the controller block id value. It can nevertheless be changed
> - * for testing purpose.
> - */
> -#define SPAPR_XIVE_BLOCK_ID 0x0
> -
>  /*
>   * sPAPR NVT and END indexing helpers
>   */
> @@ -153,6 +146,16 @@ void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon)
>      XiveSource *xsrc = &xive->source;
>      int i;
>  
> +    if (kvm_irqchip_in_kernel()) {
> +        Error *local_err = NULL;
> +
> +        kvmppc_xive_synchronize_state(xive, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            return;
> +        }
> +    }
> +
>      monitor_printf(mon, "  LSIN         PQ    EISN     CPU/PRIO EQ\n");
>  
>      for (i = 0; i < xive->nr_irqs; i++) {
> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> index 6b50451b4f85..4b1ffb9835f9 100644
> --- a/hw/intc/spapr_xive_kvm.c
> +++ b/hw/intc/spapr_xive_kvm.c
> @@ -60,6 +60,57 @@ static void kvm_cpu_enable(CPUState *cs)
>  /*
>   * XIVE Thread Interrupt Management context (KVM)
>   */
> +static void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
> +{
> +    uint64_t state[4] = { 0 };
> +    int ret;
> +
> +    ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_NVT_STATE, state);
> +    if (ret != 0) {
> +        error_setg_errno(errp, errno,
> +                         "XIVE: could not capture KVM state of CPU %ld",
> +                         kvm_arch_vcpu_id(tctx->cs));
> +        return;
> +    }
> +
> +    /* word0 and word1 of the OS ring. */
> +    *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
> +
> +    /*
> +     * KVM also returns word2 containing the OS CAM line which is
> +     * interesting to print out in the QEMU monitor.
> +     */
> +    *((uint64_t *) &tctx->regs[TM_QW1_OS + TM_WORD2]) = state[1];

As mentioned elsewhere, it is interesting for debugging, but doesn't
seem to really match the guest visible CAM state, so I'm not convinced
it's a good idea to put it into the regs[] structure.

> +}
> +
> +typedef struct {
> +    XiveTCTX *tctx;
> +    Error *err;
> +} XiveCpuGetState;
> +
> +static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
> +                                                 run_on_cpu_data arg)
> +{
> +    XiveCpuGetState *s = arg.host_ptr;
> +
> +    kvmppc_xive_cpu_get_state(s->tctx, &s->err);
> +}
> +
> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
> +{
> +    XiveCpuGetState s = {
> +        .tctx = tctx,
> +        .err = NULL,
> +    };
> +
> +    run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state,
> +               RUN_ON_CPU_HOST_PTR(&s));

Why does this need a run_on_cpu() ?  The KVM call which is getting the
actual info takes a cpu parameter.

> +
> +    if (s.err) {
> +        error_propagate(errp, s.err);
> +        return;
> +    }
> +}
>  
>  void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
>  {
> @@ -229,6 +280,19 @@ uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
>      }
>  }
>  
> +static void kvmppc_xive_source_get_state(XiveSource *xsrc)
> +{
> +    int i;
> +
> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> +        /* Perform a load without side effect to retrieve the PQ bits */
> +        uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
> +
> +        /* and save PQ locally */
> +        xive_source_esb_set(xsrc, i, pq);
> +    }
> +}
> +
>  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
>  {
>      XiveSource *xsrc = opaque;
> @@ -340,6 +404,31 @@ void kvmppc_xive_reset(sPAPRXive *xive, Error **errp)
>                        NULL, true, errp);
>  }
>  
> +static void kvmppc_xive_get_queues(sPAPRXive *xive, Error **errp)
> +{
> +    Error *local_err = NULL;
> +    int i;
> +
> +    for (i = 0; i < xive->nr_ends; i++) {
> +        kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i,
> +                                     &xive->endt[i], &local_err);
> +        if (local_err) {
> +            error_propagate(errp, local_err);
> +            return;
> +        }
> +    }
> +}
> +
> +void kvmppc_xive_synchronize_state(sPAPRXive *xive, Error **errp)
> +{
> +    kvmppc_xive_source_get_state(&xive->source);
> +
> +    /* EAT: there is no extra state to query from KVM */
> +
> +    /* ENDT */
> +    kvmppc_xive_get_queues(xive, errp);
> +}
> +
>  static void *kvmppc_xive_mmap(sPAPRXive *xive, int pgoff, size_t len,
>                                Error **errp)
>  {
> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> index 0284b5803551..f478c52ab2a0 100644
> --- a/hw/intc/xive.c
> +++ b/hw/intc/xive.c
> @@ -431,6 +431,16 @@ void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
>      int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
>      int i;
>  
> +    if (kvm_irqchip_in_kernel()) {
> +        Error *local_err = NULL;
> +
> +        kvmppc_xive_cpu_synchronize_state(tctx, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            return;
> +        }
> +    }
> +
>      monitor_printf(mon, "CPU[%04x]:   QW   NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
>                     "  W2\n", cpu_index);
>
Cédric Le Goater March 11, 2019, 8:41 p.m. UTC | #2
On 2/26/19 1:01 AM, David Gibson wrote:
> On Fri, Feb 22, 2019 at 02:13:13PM +0100, Cédric Le Goater wrote:
>> This extends the KVM XIVE device backend with 'synchronize_state'
>> methods used to retrieve the state from KVM. The HW state of the
>> sources, the KVM device and the thread interrupt contexts are
>> collected for the monitor usage and also migration.
>>
>> These get operations rely on their KVM counterpart in the host kernel
>> which acts as a proxy for OPAL, the host firmware. The set operations
>> will be added for migration support later.
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>>  include/hw/ppc/spapr_xive.h |  8 ++++
>>  include/hw/ppc/xive.h       |  1 +
>>  hw/intc/spapr_xive.c        | 17 ++++---
>>  hw/intc/spapr_xive_kvm.c    | 89 +++++++++++++++++++++++++++++++++++++
>>  hw/intc/xive.c              | 10 +++++
>>  5 files changed, 118 insertions(+), 7 deletions(-)
>>
>> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
>> index 749c6cbc2c56..ebd65e7fe36b 100644
>> --- a/include/hw/ppc/spapr_xive.h
>> +++ b/include/hw/ppc/spapr_xive.h
>> @@ -44,6 +44,13 @@ typedef struct sPAPRXive {
>>      void          *tm_mmap;
>>  } sPAPRXive;
>>  
>> +/*
>> + * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
>> + * to the controller block id value. It can nevertheless be changed
>> + * for testing purpose.
>> + */
>> +#define SPAPR_XIVE_BLOCK_ID 0x0
>> +
>>  bool spapr_xive_irq_claim(sPAPRXive *xive, uint32_t lisn, bool lsi);
>>  bool spapr_xive_irq_free(sPAPRXive *xive, uint32_t lisn);
>>  void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
>> @@ -74,5 +81,6 @@ void kvmppc_xive_set_queue_config(sPAPRXive *xive, uint8_t end_blk,
>>  void kvmppc_xive_get_queue_config(sPAPRXive *xive, uint8_t end_blk,
>>                                   uint32_t end_idx, XiveEND *end,
>>                                   Error **errp);
>> +void kvmppc_xive_synchronize_state(sPAPRXive *xive, Error **errp);
>>  
>>  #endif /* PPC_SPAPR_XIVE_H */
>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>> index 061d43fea24d..f3766fd881a2 100644
>> --- a/include/hw/ppc/xive.h
>> +++ b/include/hw/ppc/xive.h
>> @@ -431,5 +431,6 @@ void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
>>  void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp);
>>  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
>>  void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
>> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
>>  
>>  #endif /* PPC_XIVE_H */
>> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
>> index 3db24391e31c..9f07567f4d78 100644
>> --- a/hw/intc/spapr_xive.c
>> +++ b/hw/intc/spapr_xive.c
>> @@ -40,13 +40,6 @@
>>  
>>  #define SPAPR_XIVE_NVT_BASE 0x400
>>  
>> -/*
>> - * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
>> - * to the controller block id value. It can nevertheless be changed
>> - * for testing purpose.
>> - */
>> -#define SPAPR_XIVE_BLOCK_ID 0x0
>> -
>>  /*
>>   * sPAPR NVT and END indexing helpers
>>   */
>> @@ -153,6 +146,16 @@ void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon)
>>      XiveSource *xsrc = &xive->source;
>>      int i;
>>  
>> +    if (kvm_irqchip_in_kernel()) {
>> +        Error *local_err = NULL;
>> +
>> +        kvmppc_xive_synchronize_state(xive, &local_err);
>> +        if (local_err) {
>> +            error_report_err(local_err);
>> +            return;
>> +        }
>> +    }
>> +
>>      monitor_printf(mon, "  LSIN         PQ    EISN     CPU/PRIO EQ\n");
>>  
>>      for (i = 0; i < xive->nr_irqs; i++) {
>> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
>> index 6b50451b4f85..4b1ffb9835f9 100644
>> --- a/hw/intc/spapr_xive_kvm.c
>> +++ b/hw/intc/spapr_xive_kvm.c
>> @@ -60,6 +60,57 @@ static void kvm_cpu_enable(CPUState *cs)
>>  /*
>>   * XIVE Thread Interrupt Management context (KVM)
>>   */
>> +static void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
>> +{
>> +    uint64_t state[4] = { 0 };
>> +    int ret;
>> +
>> +    ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_NVT_STATE, state);
>> +    if (ret != 0) {
>> +        error_setg_errno(errp, errno,
>> +                         "XIVE: could not capture KVM state of CPU %ld",
>> +                         kvm_arch_vcpu_id(tctx->cs));
>> +        return;
>> +    }
>> +
>> +    /* word0 and word1 of the OS ring. */
>> +    *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
>> +
>> +    /*
>> +     * KVM also returns word2 containing the OS CAM line which is
>> +     * interesting to print out in the QEMU monitor.
>> +     */
>> +    *((uint64_t *) &tctx->regs[TM_QW1_OS + TM_WORD2]) = state[1];
> 
> As mentioned elsewhere, it is interesting for debugging, but doesn't
> seem to really match the guest visible CAM state, 

The guest is not allowed to see these registers in the TIMA OS page 
and we are not violating the XIVE architecture. That is where the 
CAM value belong in HW. The exact same place. I was even thinking 
to propagate the POOL value which could be useful for nested.

> so I'm not convinced it's a good idea to put it into the regs[] 
> structure.

I understand it is problematic in case of a KVM->QEMU migration 
because we need to force a XiveTCTX reset to update the registers 
with the QEMU CAM line which has been overridden with the KVM CAM 
line. 

Another solution could be to add a 'nvt_base' property to SpaprXive 
and a KVM control to get its value (xive->vp_base in the KVM XIVE 
device). It would be migrated and used by the QEMU XIVE device 
after migration. 
 
>> +}
>> +
>> +typedef struct {
>> +    XiveTCTX *tctx;
>> +    Error *err;
>> +} XiveCpuGetState;
>> +
>> +static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
>> +                                                 run_on_cpu_data arg)
>> +{
>> +    XiveCpuGetState *s = arg.host_ptr;
>> +
>> +    kvmppc_xive_cpu_get_state(s->tctx, &s->err);
>> +}
>> +
>> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
>> +{
>> +    XiveCpuGetState s = {
>> +        .tctx = tctx,
>> +        .err = NULL,
>> +    };
>> +
>> +    run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state,
>> +               RUN_ON_CPU_HOST_PTR(&s));
> 
> Why does this need a run_on_cpu() ?  The KVM call which is getting the
> actual info takes a cpu parameter.

Don't we need to kick the vCPU ? 

Thanks,

C. 


> 
>> +
>> +    if (s.err) {
>> +        error_propagate(errp, s.err);
>> +        return;
>> +    }
>> +}
>>  
>>  void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
>>  {
>> @@ -229,6 +280,19 @@ uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
>>      }
>>  }
>>  
>> +static void kvmppc_xive_source_get_state(XiveSource *xsrc)
>> +{
>> +    int i;
>> +
>> +    for (i = 0; i < xsrc->nr_irqs; i++) {
>> +        /* Perform a load without side effect to retrieve the PQ bits */
>> +        uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
>> +
>> +        /* and save PQ locally */
>> +        xive_source_esb_set(xsrc, i, pq);
>> +    }
>> +}
>> +
>>  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
>>  {
>>      XiveSource *xsrc = opaque;
>> @@ -340,6 +404,31 @@ void kvmppc_xive_reset(sPAPRXive *xive, Error **errp)
>>                        NULL, true, errp);
>>  }
>>  
>> +static void kvmppc_xive_get_queues(sPAPRXive *xive, Error **errp)
>> +{
>> +    Error *local_err = NULL;
>> +    int i;
>> +
>> +    for (i = 0; i < xive->nr_ends; i++) {
>> +        kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i,
>> +                                     &xive->endt[i], &local_err);
>> +        if (local_err) {
>> +            error_propagate(errp, local_err);
>> +            return;
>> +        }
>> +    }
>> +}
>> +
>> +void kvmppc_xive_synchronize_state(sPAPRXive *xive, Error **errp)
>> +{
>> +    kvmppc_xive_source_get_state(&xive->source);
>> +
>> +    /* EAT: there is no extra state to query from KVM */
>> +
>> +    /* ENDT */
>> +    kvmppc_xive_get_queues(xive, errp);
>> +}
>> +
>>  static void *kvmppc_xive_mmap(sPAPRXive *xive, int pgoff, size_t len,
>>                                Error **errp)
>>  {
>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>> index 0284b5803551..f478c52ab2a0 100644
>> --- a/hw/intc/xive.c
>> +++ b/hw/intc/xive.c
>> @@ -431,6 +431,16 @@ void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
>>      int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
>>      int i;
>>  
>> +    if (kvm_irqchip_in_kernel()) {
>> +        Error *local_err = NULL;
>> +
>> +        kvmppc_xive_cpu_synchronize_state(tctx, &local_err);
>> +        if (local_err) {
>> +            error_report_err(local_err);
>> +            return;
>> +        }
>> +    }
>> +
>>      monitor_printf(mon, "CPU[%04x]:   QW   NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
>>                     "  W2\n", cpu_index);
>>  
>
David Gibson March 14, 2019, 2:10 a.m. UTC | #3
On Mon, Mar 11, 2019 at 09:41:12PM +0100, Cédric Le Goater wrote:
> On 2/26/19 1:01 AM, David Gibson wrote:
> > On Fri, Feb 22, 2019 at 02:13:13PM +0100, Cédric Le Goater wrote:
> >> This extends the KVM XIVE device backend with 'synchronize_state'
> >> methods used to retrieve the state from KVM. The HW state of the
> >> sources, the KVM device and the thread interrupt contexts are
> >> collected for the monitor usage and also migration.
> >>
> >> These get operations rely on their KVM counterpart in the host kernel
> >> which acts as a proxy for OPAL, the host firmware. The set operations
> >> will be added for migration support later.
> >>
> >> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >> ---
> >>  include/hw/ppc/spapr_xive.h |  8 ++++
> >>  include/hw/ppc/xive.h       |  1 +
> >>  hw/intc/spapr_xive.c        | 17 ++++---
> >>  hw/intc/spapr_xive_kvm.c    | 89 +++++++++++++++++++++++++++++++++++++
> >>  hw/intc/xive.c              | 10 +++++
> >>  5 files changed, 118 insertions(+), 7 deletions(-)
> >>
> >> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> >> index 749c6cbc2c56..ebd65e7fe36b 100644
> >> --- a/include/hw/ppc/spapr_xive.h
> >> +++ b/include/hw/ppc/spapr_xive.h
> >> @@ -44,6 +44,13 @@ typedef struct sPAPRXive {
> >>      void          *tm_mmap;
> >>  } sPAPRXive;
> >>  
> >> +/*
> >> + * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
> >> + * to the controller block id value. It can nevertheless be changed
> >> + * for testing purpose.
> >> + */
> >> +#define SPAPR_XIVE_BLOCK_ID 0x0
> >> +
> >>  bool spapr_xive_irq_claim(sPAPRXive *xive, uint32_t lisn, bool lsi);
> >>  bool spapr_xive_irq_free(sPAPRXive *xive, uint32_t lisn);
> >>  void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
> >> @@ -74,5 +81,6 @@ void kvmppc_xive_set_queue_config(sPAPRXive *xive, uint8_t end_blk,
> >>  void kvmppc_xive_get_queue_config(sPAPRXive *xive, uint8_t end_blk,
> >>                                   uint32_t end_idx, XiveEND *end,
> >>                                   Error **errp);
> >> +void kvmppc_xive_synchronize_state(sPAPRXive *xive, Error **errp);
> >>  
> >>  #endif /* PPC_SPAPR_XIVE_H */
> >> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> >> index 061d43fea24d..f3766fd881a2 100644
> >> --- a/include/hw/ppc/xive.h
> >> +++ b/include/hw/ppc/xive.h
> >> @@ -431,5 +431,6 @@ void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
> >>  void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp);
> >>  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
> >>  void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
> >> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
> >>  
> >>  #endif /* PPC_XIVE_H */
> >> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> >> index 3db24391e31c..9f07567f4d78 100644
> >> --- a/hw/intc/spapr_xive.c
> >> +++ b/hw/intc/spapr_xive.c
> >> @@ -40,13 +40,6 @@
> >>  
> >>  #define SPAPR_XIVE_NVT_BASE 0x400
> >>  
> >> -/*
> >> - * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
> >> - * to the controller block id value. It can nevertheless be changed
> >> - * for testing purpose.
> >> - */
> >> -#define SPAPR_XIVE_BLOCK_ID 0x0
> >> -
> >>  /*
> >>   * sPAPR NVT and END indexing helpers
> >>   */
> >> @@ -153,6 +146,16 @@ void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon)
> >>      XiveSource *xsrc = &xive->source;
> >>      int i;
> >>  
> >> +    if (kvm_irqchip_in_kernel()) {
> >> +        Error *local_err = NULL;
> >> +
> >> +        kvmppc_xive_synchronize_state(xive, &local_err);
> >> +        if (local_err) {
> >> +            error_report_err(local_err);
> >> +            return;
> >> +        }
> >> +    }
> >> +
> >>      monitor_printf(mon, "  LSIN         PQ    EISN     CPU/PRIO EQ\n");
> >>  
> >>      for (i = 0; i < xive->nr_irqs; i++) {
> >> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> >> index 6b50451b4f85..4b1ffb9835f9 100644
> >> --- a/hw/intc/spapr_xive_kvm.c
> >> +++ b/hw/intc/spapr_xive_kvm.c
> >> @@ -60,6 +60,57 @@ static void kvm_cpu_enable(CPUState *cs)
> >>  /*
> >>   * XIVE Thread Interrupt Management context (KVM)
> >>   */
> >> +static void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
> >> +{
> >> +    uint64_t state[4] = { 0 };
> >> +    int ret;
> >> +
> >> +    ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_NVT_STATE, state);
> >> +    if (ret != 0) {
> >> +        error_setg_errno(errp, errno,
> >> +                         "XIVE: could not capture KVM state of CPU %ld",
> >> +                         kvm_arch_vcpu_id(tctx->cs));
> >> +        return;
> >> +    }
> >> +
> >> +    /* word0 and word1 of the OS ring. */
> >> +    *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
> >> +
> >> +    /*
> >> +     * KVM also returns word2 containing the OS CAM line which is
> >> +     * interesting to print out in the QEMU monitor.
> >> +     */
> >> +    *((uint64_t *) &tctx->regs[TM_QW1_OS + TM_WORD2]) = state[1];
> > 
> > As mentioned elsewhere, it is interesting for debugging, but doesn't
> > seem to really match the guest visible CAM state, 
> 
> The guest is not allowed to see these registers in the TIMA OS page 

Ah.. right.  IIRC, roughly speaking the first doubleword in each ring
belongs to that level, but the second doubleword belongs to the right
above (since that's what manages the scheduling of what's in this
ring).

Makes it a big bogus to carry that as migrated state then.

> and we are not violating the XIVE architecture. That is where the 
> CAM value belong in HW. The exact same place. I was even thinking 
> to propagate the POOL value which could be useful for nested.
> 
> > so I'm not convinced it's a good idea to put it into the regs[] 
> > structure.
> 
> I understand it is problematic in case of a KVM->QEMU migration 
> because we need to force a XiveTCTX reset to update the registers 
> with the QEMU CAM line which has been overridden with the KVM CAM 
> line. 
> 
> Another solution could be to add a 'nvt_base' property to SpaprXive 
> and a KVM control to get its value (xive->vp_base in the KVM XIVE 
> device). It would be migrated and used by the QEMU XIVE device 
> after migration. 
>  
> >> +}
> >> +
> >> +typedef struct {
> >> +    XiveTCTX *tctx;
> >> +    Error *err;
> >> +} XiveCpuGetState;
> >> +
> >> +static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
> >> +                                                 run_on_cpu_data arg)
> >> +{
> >> +    XiveCpuGetState *s = arg.host_ptr;
> >> +
> >> +    kvmppc_xive_cpu_get_state(s->tctx, &s->err);
> >> +}
> >> +
> >> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
> >> +{
> >> +    XiveCpuGetState s = {
> >> +        .tctx = tctx,
> >> +        .err = NULL,
> >> +    };
> >> +
> >> +    run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state,
> >> +               RUN_ON_CPU_HOST_PTR(&s));
> > 
> > Why does this need a run_on_cpu() ?  The KVM call which is getting the
> > actual info takes a cpu parameter.
> 
> Don't we need to kick the vCPU ?

Um.. you tell me?  If it's not safe to call on a vcpu other than one
owned by the calling thread, I'm not sure if the cpu parameter makes
sense.
Cédric Le Goater March 14, 2019, 6:55 a.m. UTC | #4
On 3/14/19 3:10 AM, David Gibson wrote:
> On Mon, Mar 11, 2019 at 09:41:12PM +0100, Cédric Le Goater wrote:
>> On 2/26/19 1:01 AM, David Gibson wrote:
>>> On Fri, Feb 22, 2019 at 02:13:13PM +0100, Cédric Le Goater wrote:
>>>> This extends the KVM XIVE device backend with 'synchronize_state'
>>>> methods used to retrieve the state from KVM. The HW state of the
>>>> sources, the KVM device and the thread interrupt contexts are
>>>> collected for the monitor usage and also migration.
>>>>
>>>> These get operations rely on their KVM counterpart in the host kernel
>>>> which acts as a proxy for OPAL, the host firmware. The set operations
>>>> will be added for migration support later.
>>>>
>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>> ---
>>>>  include/hw/ppc/spapr_xive.h |  8 ++++
>>>>  include/hw/ppc/xive.h       |  1 +
>>>>  hw/intc/spapr_xive.c        | 17 ++++---
>>>>  hw/intc/spapr_xive_kvm.c    | 89 +++++++++++++++++++++++++++++++++++++
>>>>  hw/intc/xive.c              | 10 +++++
>>>>  5 files changed, 118 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
>>>> index 749c6cbc2c56..ebd65e7fe36b 100644
>>>> --- a/include/hw/ppc/spapr_xive.h
>>>> +++ b/include/hw/ppc/spapr_xive.h
>>>> @@ -44,6 +44,13 @@ typedef struct sPAPRXive {
>>>>      void          *tm_mmap;
>>>>  } sPAPRXive;
>>>>  
>>>> +/*
>>>> + * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
>>>> + * to the controller block id value. It can nevertheless be changed
>>>> + * for testing purpose.
>>>> + */
>>>> +#define SPAPR_XIVE_BLOCK_ID 0x0
>>>> +
>>>>  bool spapr_xive_irq_claim(sPAPRXive *xive, uint32_t lisn, bool lsi);
>>>>  bool spapr_xive_irq_free(sPAPRXive *xive, uint32_t lisn);
>>>>  void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
>>>> @@ -74,5 +81,6 @@ void kvmppc_xive_set_queue_config(sPAPRXive *xive, uint8_t end_blk,
>>>>  void kvmppc_xive_get_queue_config(sPAPRXive *xive, uint8_t end_blk,
>>>>                                   uint32_t end_idx, XiveEND *end,
>>>>                                   Error **errp);
>>>> +void kvmppc_xive_synchronize_state(sPAPRXive *xive, Error **errp);
>>>>  
>>>>  #endif /* PPC_SPAPR_XIVE_H */
>>>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>>>> index 061d43fea24d..f3766fd881a2 100644
>>>> --- a/include/hw/ppc/xive.h
>>>> +++ b/include/hw/ppc/xive.h
>>>> @@ -431,5 +431,6 @@ void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
>>>>  void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp);
>>>>  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
>>>>  void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
>>>> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
>>>>  
>>>>  #endif /* PPC_XIVE_H */
>>>> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
>>>> index 3db24391e31c..9f07567f4d78 100644
>>>> --- a/hw/intc/spapr_xive.c
>>>> +++ b/hw/intc/spapr_xive.c
>>>> @@ -40,13 +40,6 @@
>>>>  
>>>>  #define SPAPR_XIVE_NVT_BASE 0x400
>>>>  
>>>> -/*
>>>> - * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
>>>> - * to the controller block id value. It can nevertheless be changed
>>>> - * for testing purpose.
>>>> - */
>>>> -#define SPAPR_XIVE_BLOCK_ID 0x0
>>>> -
>>>>  /*
>>>>   * sPAPR NVT and END indexing helpers
>>>>   */
>>>> @@ -153,6 +146,16 @@ void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon)
>>>>      XiveSource *xsrc = &xive->source;
>>>>      int i;
>>>>  
>>>> +    if (kvm_irqchip_in_kernel()) {
>>>> +        Error *local_err = NULL;
>>>> +
>>>> +        kvmppc_xive_synchronize_state(xive, &local_err);
>>>> +        if (local_err) {
>>>> +            error_report_err(local_err);
>>>> +            return;
>>>> +        }
>>>> +    }
>>>> +
>>>>      monitor_printf(mon, "  LSIN         PQ    EISN     CPU/PRIO EQ\n");
>>>>  
>>>>      for (i = 0; i < xive->nr_irqs; i++) {
>>>> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
>>>> index 6b50451b4f85..4b1ffb9835f9 100644
>>>> --- a/hw/intc/spapr_xive_kvm.c
>>>> +++ b/hw/intc/spapr_xive_kvm.c
>>>> @@ -60,6 +60,57 @@ static void kvm_cpu_enable(CPUState *cs)
>>>>  /*
>>>>   * XIVE Thread Interrupt Management context (KVM)
>>>>   */
>>>> +static void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
>>>> +{
>>>> +    uint64_t state[4] = { 0 };
>>>> +    int ret;
>>>> +
>>>> +    ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_NVT_STATE, state);
>>>> +    if (ret != 0) {
>>>> +        error_setg_errno(errp, errno,
>>>> +                         "XIVE: could not capture KVM state of CPU %ld",
>>>> +                         kvm_arch_vcpu_id(tctx->cs));
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    /* word0 and word1 of the OS ring. */
>>>> +    *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
>>>> +
>>>> +    /*
>>>> +     * KVM also returns word2 containing the OS CAM line which is
>>>> +     * interesting to print out in the QEMU monitor.
>>>> +     */
>>>> +    *((uint64_t *) &tctx->regs[TM_QW1_OS + TM_WORD2]) = state[1];
>>>
>>> As mentioned elsewhere, it is interesting for debugging, but doesn't
>>> seem to really match the guest visible CAM state, 
>>
>> The guest is not allowed to see these registers in the TIMA OS page 
> 
> Ah.. right.  IIRC, roughly speaking the first doubleword in each ring
> belongs to that level, but the second doubleword belongs to the right
> above (since that's what manages the scheduling of what's in this
> ring).

yes. It is the hypervisor (KVM, QEMU) that sets the CAM line value
in the OS ring.

> Makes it a big bogus to carry that as migrated state then.

It is true that the OS CAM line value does not need to be migrated
as it will be different on the destination.

We still want to print out this state and the only place where we can 
put it is in the OS CAM line of the OS ring. I see that we don't 
have a satisfying solution for now. So I will remove these changes 
from the proposal and keep them in my tree nevertheless.

>> and we are not violating the XIVE architecture. That is where the 
>> CAM value belong in HW. The exact same place. I was even thinking 
>> to propagate the POOL value which could be useful for nested.
>>
>>> so I'm not convinced it's a good idea to put it into the regs[] 
>>> structure.
>>
>> I understand it is problematic in case of a KVM->QEMU migration 
>> because we need to force a XiveTCTX reset to update the registers 
>> with the QEMU CAM line which has been overridden with the KVM CAM 
>> line. 
>>
>> Another solution could be to add a 'nvt_base' property to SpaprXive 
>> and a KVM control to get its value (xive->vp_base in the KVM XIVE 
>> device). It would be migrated and used by the QEMU XIVE device 
>> after migration. 
>>  
>>>> +}
>>>> +
>>>> +typedef struct {
>>>> +    XiveTCTX *tctx;
>>>> +    Error *err;
>>>> +} XiveCpuGetState;
>>>> +
>>>> +static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
>>>> +                                                 run_on_cpu_data arg)
>>>> +{
>>>> +    XiveCpuGetState *s = arg.host_ptr;
>>>> +
>>>> +    kvmppc_xive_cpu_get_state(s->tctx, &s->err);
>>>> +}
>>>> +
>>>> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
>>>> +{
>>>> +    XiveCpuGetState s = {
>>>> +        .tctx = tctx,
>>>> +        .err = NULL,
>>>> +    };
>>>> +
>>>> +    run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state,
>>>> +               RUN_ON_CPU_HOST_PTR(&s));
>>>
>>> Why does this need a run_on_cpu() ?  The KVM call which is getting the
>>> actual info takes a cpu parameter.
>>
>> Don't we need to kick the vCPU ?
> 
> Um.. you tell me?  If it's not safe to call on a vcpu other than one
> owned by the calling thread, I'm not sure if the cpu parameter makes
> sense.

OK. I will check if we can do anything better.

Thanks,

C.
Cédric Le Goater March 14, 2019, 7:56 a.m. UTC | #5
On 3/14/19 3:10 AM, David Gibson wrote:
> On Mon, Mar 11, 2019 at 09:41:12PM +0100, Cédric Le Goater wrote:
>> On 2/26/19 1:01 AM, David Gibson wrote:
>>> On Fri, Feb 22, 2019 at 02:13:13PM +0100, Cédric Le Goater wrote:
>>>> This extends the KVM XIVE device backend with 'synchronize_state'
>>>> methods used to retrieve the state from KVM. The HW state of the
>>>> sources, the KVM device and the thread interrupt contexts are
>>>> collected for the monitor usage and also migration.
>>>>
>>>> These get operations rely on their KVM counterpart in the host kernel
>>>> which acts as a proxy for OPAL, the host firmware. The set operations
>>>> will be added for migration support later.
>>>>
>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>> ---
>>>>  include/hw/ppc/spapr_xive.h |  8 ++++
>>>>  include/hw/ppc/xive.h       |  1 +
>>>>  hw/intc/spapr_xive.c        | 17 ++++---
>>>>  hw/intc/spapr_xive_kvm.c    | 89 +++++++++++++++++++++++++++++++++++++
>>>>  hw/intc/xive.c              | 10 +++++
>>>>  5 files changed, 118 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
>>>> index 749c6cbc2c56..ebd65e7fe36b 100644
>>>> --- a/include/hw/ppc/spapr_xive.h
>>>> +++ b/include/hw/ppc/spapr_xive.h
>>>> @@ -44,6 +44,13 @@ typedef struct sPAPRXive {
>>>>      void          *tm_mmap;
>>>>  } sPAPRXive;
>>>>  
>>>> +/*
>>>> + * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
>>>> + * to the controller block id value. It can nevertheless be changed
>>>> + * for testing purpose.
>>>> + */
>>>> +#define SPAPR_XIVE_BLOCK_ID 0x0
>>>> +
>>>>  bool spapr_xive_irq_claim(sPAPRXive *xive, uint32_t lisn, bool lsi);
>>>>  bool spapr_xive_irq_free(sPAPRXive *xive, uint32_t lisn);
>>>>  void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
>>>> @@ -74,5 +81,6 @@ void kvmppc_xive_set_queue_config(sPAPRXive *xive, uint8_t end_blk,
>>>>  void kvmppc_xive_get_queue_config(sPAPRXive *xive, uint8_t end_blk,
>>>>                                   uint32_t end_idx, XiveEND *end,
>>>>                                   Error **errp);
>>>> +void kvmppc_xive_synchronize_state(sPAPRXive *xive, Error **errp);
>>>>  
>>>>  #endif /* PPC_SPAPR_XIVE_H */
>>>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>>>> index 061d43fea24d..f3766fd881a2 100644
>>>> --- a/include/hw/ppc/xive.h
>>>> +++ b/include/hw/ppc/xive.h
>>>> @@ -431,5 +431,6 @@ void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
>>>>  void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp);
>>>>  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
>>>>  void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
>>>> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
>>>>  
>>>>  #endif /* PPC_XIVE_H */
>>>> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
>>>> index 3db24391e31c..9f07567f4d78 100644
>>>> --- a/hw/intc/spapr_xive.c
>>>> +++ b/hw/intc/spapr_xive.c
>>>> @@ -40,13 +40,6 @@
>>>>  
>>>>  #define SPAPR_XIVE_NVT_BASE 0x400
>>>>  
>>>> -/*
>>>> - * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
>>>> - * to the controller block id value. It can nevertheless be changed
>>>> - * for testing purpose.
>>>> - */
>>>> -#define SPAPR_XIVE_BLOCK_ID 0x0
>>>> -
>>>>  /*
>>>>   * sPAPR NVT and END indexing helpers
>>>>   */
>>>> @@ -153,6 +146,16 @@ void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon)
>>>>      XiveSource *xsrc = &xive->source;
>>>>      int i;
>>>>  
>>>> +    if (kvm_irqchip_in_kernel()) {
>>>> +        Error *local_err = NULL;
>>>> +
>>>> +        kvmppc_xive_synchronize_state(xive, &local_err);
>>>> +        if (local_err) {
>>>> +            error_report_err(local_err);
>>>> +            return;
>>>> +        }
>>>> +    }
>>>> +
>>>>      monitor_printf(mon, "  LSIN         PQ    EISN     CPU/PRIO EQ\n");
>>>>  
>>>>      for (i = 0; i < xive->nr_irqs; i++) {
>>>> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
>>>> index 6b50451b4f85..4b1ffb9835f9 100644
>>>> --- a/hw/intc/spapr_xive_kvm.c
>>>> +++ b/hw/intc/spapr_xive_kvm.c
>>>> @@ -60,6 +60,57 @@ static void kvm_cpu_enable(CPUState *cs)
>>>>  /*
>>>>   * XIVE Thread Interrupt Management context (KVM)
>>>>   */
>>>> +static void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
>>>> +{
>>>> +    uint64_t state[4] = { 0 };
>>>> +    int ret;
>>>> +
>>>> +    ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_NVT_STATE, state);
>>>> +    if (ret != 0) {
>>>> +        error_setg_errno(errp, errno,
>>>> +                         "XIVE: could not capture KVM state of CPU %ld",
>>>> +                         kvm_arch_vcpu_id(tctx->cs));
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    /* word0 and word1 of the OS ring. */
>>>> +    *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
>>>> +
>>>> +    /*
>>>> +     * KVM also returns word2 containing the OS CAM line which is
>>>> +     * interesting to print out in the QEMU monitor.
>>>> +     */
>>>> +    *((uint64_t *) &tctx->regs[TM_QW1_OS + TM_WORD2]) = state[1];
>>>
>>> As mentioned elsewhere, it is interesting for debugging, but doesn't
>>> seem to really match the guest visible CAM state, 
>>
>> The guest is not allowed to see these registers in the TIMA OS page 
> 
> Ah.. right.  IIRC, roughly speaking the first doubleword in each ring
> belongs to that level, but the second doubleword belongs to the right
> above (since that's what manages the scheduling of what's in this
> ring).
> 
> Makes it a big bogus to carry that as migrated state then.
> 
>> and we are not violating the XIVE architecture. That is where the 
>> CAM value belong in HW. The exact same place. I was even thinking 
>> to propagate the POOL value which could be useful for nested.
>>
>>> so I'm not convinced it's a good idea to put it into the regs[] 
>>> structure.
>>
>> I understand it is problematic in case of a KVM->QEMU migration 
>> because we need to force a XiveTCTX reset to update the registers 
>> with the QEMU CAM line which has been overridden with the KVM CAM 
>> line. 
>>
>> Another solution could be to add a 'nvt_base' property to SpaprXive 
>> and a KVM control to get its value (xive->vp_base in the KVM XIVE 
>> device). It would be migrated and used by the QEMU XIVE device 
>> after migration. 
>>  
>>>> +}
>>>> +
>>>> +typedef struct {
>>>> +    XiveTCTX *tctx;
>>>> +    Error *err;
>>>> +} XiveCpuGetState;
>>>> +
>>>> +static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
>>>> +                                                 run_on_cpu_data arg)
>>>> +{
>>>> +    XiveCpuGetState *s = arg.host_ptr;
>>>> +
>>>> +    kvmppc_xive_cpu_get_state(s->tctx, &s->err);
>>>> +}
>>>> +
>>>> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
>>>> +{
>>>> +    XiveCpuGetState s = {
>>>> +        .tctx = tctx,
>>>> +        .err = NULL,
>>>> +    };
>>>> +
>>>> +    run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state,
>>>> +               RUN_ON_CPU_HOST_PTR(&s));
>>>
>>> Why does this need a run_on_cpu() ?  The KVM call which is getting the
>>> actual info takes a cpu parameter.
>>
>> Don't we need to kick the vCPU ?
> 
> Um.. you tell me?  

we do need to kick the vCPU. 

> If it's not safe to call on a vcpu other than one
> owned by the calling thread, I'm not sure if the cpu parameter makes
> sense.

That's the typedef we need to use with run_on_cpu() :

	typedef void (*run_on_cpu_func)(CPUState *cpu, run_on_cpu_data data);

I am not sure where you are getting at with this cpu parameter.

C.
David Gibson March 15, 2019, 12:23 a.m. UTC | #6
On Thu, Mar 14, 2019 at 08:56:25AM +0100, Cédric Le Goater wrote:
> On 3/14/19 3:10 AM, David Gibson wrote:
> > On Mon, Mar 11, 2019 at 09:41:12PM +0100, Cédric Le Goater wrote:
> >> On 2/26/19 1:01 AM, David Gibson wrote:
> >>> On Fri, Feb 22, 2019 at 02:13:13PM +0100, Cédric Le Goater wrote:
> >>>> This extends the KVM XIVE device backend with 'synchronize_state'
> >>>> methods used to retrieve the state from KVM. The HW state of the
> >>>> sources, the KVM device and the thread interrupt contexts are
> >>>> collected for the monitor usage and also migration.
> >>>>
> >>>> These get operations rely on their KVM counterpart in the host kernel
> >>>> which acts as a proxy for OPAL, the host firmware. The set operations
> >>>> will be added for migration support later.
> >>>>
> >>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >>>> ---
> >>>>  include/hw/ppc/spapr_xive.h |  8 ++++
> >>>>  include/hw/ppc/xive.h       |  1 +
> >>>>  hw/intc/spapr_xive.c        | 17 ++++---
> >>>>  hw/intc/spapr_xive_kvm.c    | 89 +++++++++++++++++++++++++++++++++++++
> >>>>  hw/intc/xive.c              | 10 +++++
> >>>>  5 files changed, 118 insertions(+), 7 deletions(-)
> >>>>
> >>>> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> >>>> index 749c6cbc2c56..ebd65e7fe36b 100644
> >>>> --- a/include/hw/ppc/spapr_xive.h
> >>>> +++ b/include/hw/ppc/spapr_xive.h
> >>>> @@ -44,6 +44,13 @@ typedef struct sPAPRXive {
> >>>>      void          *tm_mmap;
> >>>>  } sPAPRXive;
> >>>>  
> >>>> +/*
> >>>> + * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
> >>>> + * to the controller block id value. It can nevertheless be changed
> >>>> + * for testing purpose.
> >>>> + */
> >>>> +#define SPAPR_XIVE_BLOCK_ID 0x0
> >>>> +
> >>>>  bool spapr_xive_irq_claim(sPAPRXive *xive, uint32_t lisn, bool lsi);
> >>>>  bool spapr_xive_irq_free(sPAPRXive *xive, uint32_t lisn);
> >>>>  void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
> >>>> @@ -74,5 +81,6 @@ void kvmppc_xive_set_queue_config(sPAPRXive *xive, uint8_t end_blk,
> >>>>  void kvmppc_xive_get_queue_config(sPAPRXive *xive, uint8_t end_blk,
> >>>>                                   uint32_t end_idx, XiveEND *end,
> >>>>                                   Error **errp);
> >>>> +void kvmppc_xive_synchronize_state(sPAPRXive *xive, Error **errp);
> >>>>  
> >>>>  #endif /* PPC_SPAPR_XIVE_H */
> >>>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> >>>> index 061d43fea24d..f3766fd881a2 100644
> >>>> --- a/include/hw/ppc/xive.h
> >>>> +++ b/include/hw/ppc/xive.h
> >>>> @@ -431,5 +431,6 @@ void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
> >>>>  void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp);
> >>>>  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
> >>>>  void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
> >>>> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
> >>>>  
> >>>>  #endif /* PPC_XIVE_H */
> >>>> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> >>>> index 3db24391e31c..9f07567f4d78 100644
> >>>> --- a/hw/intc/spapr_xive.c
> >>>> +++ b/hw/intc/spapr_xive.c
> >>>> @@ -40,13 +40,6 @@
> >>>>  
> >>>>  #define SPAPR_XIVE_NVT_BASE 0x400
> >>>>  
> >>>> -/*
> >>>> - * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
> >>>> - * to the controller block id value. It can nevertheless be changed
> >>>> - * for testing purpose.
> >>>> - */
> >>>> -#define SPAPR_XIVE_BLOCK_ID 0x0
> >>>> -
> >>>>  /*
> >>>>   * sPAPR NVT and END indexing helpers
> >>>>   */
> >>>> @@ -153,6 +146,16 @@ void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon)
> >>>>      XiveSource *xsrc = &xive->source;
> >>>>      int i;
> >>>>  
> >>>> +    if (kvm_irqchip_in_kernel()) {
> >>>> +        Error *local_err = NULL;
> >>>> +
> >>>> +        kvmppc_xive_synchronize_state(xive, &local_err);
> >>>> +        if (local_err) {
> >>>> +            error_report_err(local_err);
> >>>> +            return;
> >>>> +        }
> >>>> +    }
> >>>> +
> >>>>      monitor_printf(mon, "  LSIN         PQ    EISN     CPU/PRIO EQ\n");
> >>>>  
> >>>>      for (i = 0; i < xive->nr_irqs; i++) {
> >>>> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> >>>> index 6b50451b4f85..4b1ffb9835f9 100644
> >>>> --- a/hw/intc/spapr_xive_kvm.c
> >>>> +++ b/hw/intc/spapr_xive_kvm.c
> >>>> @@ -60,6 +60,57 @@ static void kvm_cpu_enable(CPUState *cs)
> >>>>  /*
> >>>>   * XIVE Thread Interrupt Management context (KVM)
> >>>>   */
> >>>> +static void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
> >>>> +{
> >>>> +    uint64_t state[4] = { 0 };
> >>>> +    int ret;
> >>>> +
> >>>> +    ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_NVT_STATE, state);
> >>>> +    if (ret != 0) {
> >>>> +        error_setg_errno(errp, errno,
> >>>> +                         "XIVE: could not capture KVM state of CPU %ld",
> >>>> +                         kvm_arch_vcpu_id(tctx->cs));
> >>>> +        return;
> >>>> +    }
> >>>> +
> >>>> +    /* word0 and word1 of the OS ring. */
> >>>> +    *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
> >>>> +
> >>>> +    /*
> >>>> +     * KVM also returns word2 containing the OS CAM line which is
> >>>> +     * interesting to print out in the QEMU monitor.
> >>>> +     */
> >>>> +    *((uint64_t *) &tctx->regs[TM_QW1_OS + TM_WORD2]) = state[1];
> >>>
> >>> As mentioned elsewhere, it is interesting for debugging, but doesn't
> >>> seem to really match the guest visible CAM state, 
> >>
> >> The guest is not allowed to see these registers in the TIMA OS page 
> > 
> > Ah.. right.  IIRC, roughly speaking the first doubleword in each ring
> > belongs to that level, but the second doubleword belongs to the right
> > above (since that's what manages the scheduling of what's in this
> > ring).
> > 
> > Makes it a big bogus to carry that as migrated state then.
> > 
> >> and we are not violating the XIVE architecture. That is where the 
> >> CAM value belong in HW. The exact same place. I was even thinking 
> >> to propagate the POOL value which could be useful for nested.
> >>
> >>> so I'm not convinced it's a good idea to put it into the regs[] 
> >>> structure.
> >>
> >> I understand it is problematic in case of a KVM->QEMU migration 
> >> because we need to force a XiveTCTX reset to update the registers 
> >> with the QEMU CAM line which has been overridden with the KVM CAM 
> >> line. 
> >>
> >> Another solution could be to add a 'nvt_base' property to SpaprXive 
> >> and a KVM control to get its value (xive->vp_base in the KVM XIVE 
> >> device). It would be migrated and used by the QEMU XIVE device 
> >> after migration. 
> >>  
> >>>> +}
> >>>> +
> >>>> +typedef struct {
> >>>> +    XiveTCTX *tctx;
> >>>> +    Error *err;
> >>>> +} XiveCpuGetState;
> >>>> +
> >>>> +static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
> >>>> +                                                 run_on_cpu_data arg)
> >>>> +{
> >>>> +    XiveCpuGetState *s = arg.host_ptr;
> >>>> +
> >>>> +    kvmppc_xive_cpu_get_state(s->tctx, &s->err);
> >>>> +}
> >>>> +
> >>>> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
> >>>> +{
> >>>> +    XiveCpuGetState s = {
> >>>> +        .tctx = tctx,
> >>>> +        .err = NULL,
> >>>> +    };
> >>>> +
> >>>> +    run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state,
> >>>> +               RUN_ON_CPU_HOST_PTR(&s));
> >>>
> >>> Why does this need a run_on_cpu() ?  The KVM call which is getting the
> >>> actual info takes a cpu parameter.
> >>
> >> Don't we need to kick the vCPU ?
> > 
> > Um.. you tell me?  
> 
> we do need to kick the vCPU. 
> 
> > If it's not safe to call on a vcpu other than one
> > owned by the calling thread, I'm not sure if the cpu parameter makes
> > sense.
> 
> That's the typedef we need to use with run_on_cpu() :
> 
> 	typedef void (*run_on_cpu_func)(CPUState *cpu, run_on_cpu_data data);
> 
> I am not sure where you are getting at with this cpu parameter.

Uh.. I was getting confused, never mind.

A brief comment here saying why this needs to be run on the vcpu
thread would be helpful, though.
Cédric Le Goater March 15, 2019, 6:40 a.m. UTC | #7
On 3/15/19 1:23 AM, David Gibson wrote:
> On Thu, Mar 14, 2019 at 08:56:25AM +0100, Cédric Le Goater wrote:
>> On 3/14/19 3:10 AM, David Gibson wrote:
>>> On Mon, Mar 11, 2019 at 09:41:12PM +0100, Cédric Le Goater wrote:
>>>> On 2/26/19 1:01 AM, David Gibson wrote:
>>>>> On Fri, Feb 22, 2019 at 02:13:13PM +0100, Cédric Le Goater wrote:
>>>>>> This extends the KVM XIVE device backend with 'synchronize_state'
>>>>>> methods used to retrieve the state from KVM. The HW state of the
>>>>>> sources, the KVM device and the thread interrupt contexts are
>>>>>> collected for the monitor usage and also migration.
>>>>>>
>>>>>> These get operations rely on their KVM counterpart in the host kernel
>>>>>> which acts as a proxy for OPAL, the host firmware. The set operations
>>>>>> will be added for migration support later.
>>>>>>
>>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>>>> ---
>>>>>>  include/hw/ppc/spapr_xive.h |  8 ++++
>>>>>>  include/hw/ppc/xive.h       |  1 +
>>>>>>  hw/intc/spapr_xive.c        | 17 ++++---
>>>>>>  hw/intc/spapr_xive_kvm.c    | 89 +++++++++++++++++++++++++++++++++++++
>>>>>>  hw/intc/xive.c              | 10 +++++
>>>>>>  5 files changed, 118 insertions(+), 7 deletions(-)
>>>>>>
>>>>>> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
>>>>>> index 749c6cbc2c56..ebd65e7fe36b 100644
>>>>>> --- a/include/hw/ppc/spapr_xive.h
>>>>>> +++ b/include/hw/ppc/spapr_xive.h
>>>>>> @@ -44,6 +44,13 @@ typedef struct sPAPRXive {
>>>>>>      void          *tm_mmap;
>>>>>>  } sPAPRXive;
>>>>>>  
>>>>>> +/*
>>>>>> + * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
>>>>>> + * to the controller block id value. It can nevertheless be changed
>>>>>> + * for testing purpose.
>>>>>> + */
>>>>>> +#define SPAPR_XIVE_BLOCK_ID 0x0
>>>>>> +
>>>>>>  bool spapr_xive_irq_claim(sPAPRXive *xive, uint32_t lisn, bool lsi);
>>>>>>  bool spapr_xive_irq_free(sPAPRXive *xive, uint32_t lisn);
>>>>>>  void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
>>>>>> @@ -74,5 +81,6 @@ void kvmppc_xive_set_queue_config(sPAPRXive *xive, uint8_t end_blk,
>>>>>>  void kvmppc_xive_get_queue_config(sPAPRXive *xive, uint8_t end_blk,
>>>>>>                                   uint32_t end_idx, XiveEND *end,
>>>>>>                                   Error **errp);
>>>>>> +void kvmppc_xive_synchronize_state(sPAPRXive *xive, Error **errp);
>>>>>>  
>>>>>>  #endif /* PPC_SPAPR_XIVE_H */
>>>>>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>>>>>> index 061d43fea24d..f3766fd881a2 100644
>>>>>> --- a/include/hw/ppc/xive.h
>>>>>> +++ b/include/hw/ppc/xive.h
>>>>>> @@ -431,5 +431,6 @@ void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
>>>>>>  void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp);
>>>>>>  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
>>>>>>  void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
>>>>>> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
>>>>>>  
>>>>>>  #endif /* PPC_XIVE_H */
>>>>>> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
>>>>>> index 3db24391e31c..9f07567f4d78 100644
>>>>>> --- a/hw/intc/spapr_xive.c
>>>>>> +++ b/hw/intc/spapr_xive.c
>>>>>> @@ -40,13 +40,6 @@
>>>>>>  
>>>>>>  #define SPAPR_XIVE_NVT_BASE 0x400
>>>>>>  
>>>>>> -/*
>>>>>> - * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
>>>>>> - * to the controller block id value. It can nevertheless be changed
>>>>>> - * for testing purpose.
>>>>>> - */
>>>>>> -#define SPAPR_XIVE_BLOCK_ID 0x0
>>>>>> -
>>>>>>  /*
>>>>>>   * sPAPR NVT and END indexing helpers
>>>>>>   */
>>>>>> @@ -153,6 +146,16 @@ void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon)
>>>>>>      XiveSource *xsrc = &xive->source;
>>>>>>      int i;
>>>>>>  
>>>>>> +    if (kvm_irqchip_in_kernel()) {
>>>>>> +        Error *local_err = NULL;
>>>>>> +
>>>>>> +        kvmppc_xive_synchronize_state(xive, &local_err);
>>>>>> +        if (local_err) {
>>>>>> +            error_report_err(local_err);
>>>>>> +            return;
>>>>>> +        }
>>>>>> +    }
>>>>>> +
>>>>>>      monitor_printf(mon, "  LSIN         PQ    EISN     CPU/PRIO EQ\n");
>>>>>>  
>>>>>>      for (i = 0; i < xive->nr_irqs; i++) {
>>>>>> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
>>>>>> index 6b50451b4f85..4b1ffb9835f9 100644
>>>>>> --- a/hw/intc/spapr_xive_kvm.c
>>>>>> +++ b/hw/intc/spapr_xive_kvm.c
>>>>>> @@ -60,6 +60,57 @@ static void kvm_cpu_enable(CPUState *cs)
>>>>>>  /*
>>>>>>   * XIVE Thread Interrupt Management context (KVM)
>>>>>>   */
>>>>>> +static void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
>>>>>> +{
>>>>>> +    uint64_t state[4] = { 0 };
>>>>>> +    int ret;
>>>>>> +
>>>>>> +    ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_NVT_STATE, state);
>>>>>> +    if (ret != 0) {
>>>>>> +        error_setg_errno(errp, errno,
>>>>>> +                         "XIVE: could not capture KVM state of CPU %ld",
>>>>>> +                         kvm_arch_vcpu_id(tctx->cs));
>>>>>> +        return;
>>>>>> +    }
>>>>>> +
>>>>>> +    /* word0 and word1 of the OS ring. */
>>>>>> +    *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
>>>>>> +
>>>>>> +    /*
>>>>>> +     * KVM also returns word2 containing the OS CAM line which is
>>>>>> +     * interesting to print out in the QEMU monitor.
>>>>>> +     */
>>>>>> +    *((uint64_t *) &tctx->regs[TM_QW1_OS + TM_WORD2]) = state[1];
>>>>>
>>>>> As mentioned elsewhere, it is interesting for debugging, but doesn't
>>>>> seem to really match the guest visible CAM state, 
>>>>
>>>> The guest is not allowed to see these registers in the TIMA OS page 
>>>
>>> Ah.. right.  IIRC, roughly speaking the first doubleword in each ring
>>> belongs to that level, but the second doubleword belongs to the right
>>> above (since that's what manages the scheduling of what's in this
>>> ring).
>>>
>>> Makes it a big bogus to carry that as migrated state then.
>>>
>>>> and we are not violating the XIVE architecture. That is where the 
>>>> CAM value belong in HW. The exact same place. I was even thinking 
>>>> to propagate the POOL value which could be useful for nested.
>>>>
>>>>> so I'm not convinced it's a good idea to put it into the regs[] 
>>>>> structure.
>>>>
>>>> I understand it is problematic in case of a KVM->QEMU migration 
>>>> because we need to force a XiveTCTX reset to update the registers 
>>>> with the QEMU CAM line which has been overridden with the KVM CAM 
>>>> line. 
>>>>
>>>> Another solution could be to add a 'nvt_base' property to SpaprXive 
>>>> and a KVM control to get its value (xive->vp_base in the KVM XIVE 
>>>> device). It would be migrated and used by the QEMU XIVE device 
>>>> after migration. 
>>>>  
>>>>>> +}
>>>>>> +
>>>>>> +typedef struct {
>>>>>> +    XiveTCTX *tctx;
>>>>>> +    Error *err;
>>>>>> +} XiveCpuGetState;
>>>>>> +
>>>>>> +static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
>>>>>> +                                                 run_on_cpu_data arg)
>>>>>> +{
>>>>>> +    XiveCpuGetState *s = arg.host_ptr;
>>>>>> +
>>>>>> +    kvmppc_xive_cpu_get_state(s->tctx, &s->err);
>>>>>> +}
>>>>>> +
>>>>>> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
>>>>>> +{
>>>>>> +    XiveCpuGetState s = {
>>>>>> +        .tctx = tctx,
>>>>>> +        .err = NULL,
>>>>>> +    };
>>>>>> +
>>>>>> +    run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state,
>>>>>> +               RUN_ON_CPU_HOST_PTR(&s));
>>>>>
>>>>> Why does this need a run_on_cpu() ?  The KVM call which is getting the
>>>>> actual info takes a cpu parameter.
>>>>
>>>> Don't we need to kick the vCPU ?
>>>
>>> Um.. you tell me?  
>>
>> we do need to kick the vCPU. 
>>
>>> If it's not safe to call on a vcpu other than one
>>> owned by the calling thread, I'm not sure if the cpu parameter makes
>>> sense.
>>
>> That's the typedef we need to use with run_on_cpu() :
>>
>> 	typedef void (*run_on_cpu_func)(CPUState *cpu, run_on_cpu_data data);
>>
>> I am not sure where you are getting at with this cpu parameter.
> 
> Uh.. I was getting confused, never mind.
> 
> A brief comment here saying why this needs to be run on the vcpu
> thread would be helpful, though.

yes. Sure.

Thanks,

C.
diff mbox series

Patch

diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
index 749c6cbc2c56..ebd65e7fe36b 100644
--- a/include/hw/ppc/spapr_xive.h
+++ b/include/hw/ppc/spapr_xive.h
@@ -44,6 +44,13 @@  typedef struct sPAPRXive {
     void          *tm_mmap;
 } sPAPRXive;
 
+/*
+ * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
+ * to the controller block id value. It can nevertheless be changed
+ * for testing purpose.
+ */
+#define SPAPR_XIVE_BLOCK_ID 0x0
+
 bool spapr_xive_irq_claim(sPAPRXive *xive, uint32_t lisn, bool lsi);
 bool spapr_xive_irq_free(sPAPRXive *xive, uint32_t lisn);
 void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
@@ -74,5 +81,6 @@  void kvmppc_xive_set_queue_config(sPAPRXive *xive, uint8_t end_blk,
 void kvmppc_xive_get_queue_config(sPAPRXive *xive, uint8_t end_blk,
                                  uint32_t end_idx, XiveEND *end,
                                  Error **errp);
+void kvmppc_xive_synchronize_state(sPAPRXive *xive, Error **errp);
 
 #endif /* PPC_SPAPR_XIVE_H */
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index 061d43fea24d..f3766fd881a2 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -431,5 +431,6 @@  void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
 void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp);
 void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
 void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
+void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
 
 #endif /* PPC_XIVE_H */
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
index 3db24391e31c..9f07567f4d78 100644
--- a/hw/intc/spapr_xive.c
+++ b/hw/intc/spapr_xive.c
@@ -40,13 +40,6 @@ 
 
 #define SPAPR_XIVE_NVT_BASE 0x400
 
-/*
- * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
- * to the controller block id value. It can nevertheless be changed
- * for testing purpose.
- */
-#define SPAPR_XIVE_BLOCK_ID 0x0
-
 /*
  * sPAPR NVT and END indexing helpers
  */
@@ -153,6 +146,16 @@  void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon)
     XiveSource *xsrc = &xive->source;
     int i;
 
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_synchronize_state(xive, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return;
+        }
+    }
+
     monitor_printf(mon, "  LSIN         PQ    EISN     CPU/PRIO EQ\n");
 
     for (i = 0; i < xive->nr_irqs; i++) {
diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index 6b50451b4f85..4b1ffb9835f9 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -60,6 +60,57 @@  static void kvm_cpu_enable(CPUState *cs)
 /*
  * XIVE Thread Interrupt Management context (KVM)
  */
+static void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
+{
+    uint64_t state[4] = { 0 };
+    int ret;
+
+    ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_NVT_STATE, state);
+    if (ret != 0) {
+        error_setg_errno(errp, errno,
+                         "XIVE: could not capture KVM state of CPU %ld",
+                         kvm_arch_vcpu_id(tctx->cs));
+        return;
+    }
+
+    /* word0 and word1 of the OS ring. */
+    *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
+
+    /*
+     * KVM also returns word2 containing the OS CAM line which is
+     * interesting to print out in the QEMU monitor.
+     */
+    *((uint64_t *) &tctx->regs[TM_QW1_OS + TM_WORD2]) = state[1];
+}
+
+typedef struct {
+    XiveTCTX *tctx;
+    Error *err;
+} XiveCpuGetState;
+
+static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
+                                                 run_on_cpu_data arg)
+{
+    XiveCpuGetState *s = arg.host_ptr;
+
+    kvmppc_xive_cpu_get_state(s->tctx, &s->err);
+}
+
+void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
+{
+    XiveCpuGetState s = {
+        .tctx = tctx,
+        .err = NULL,
+    };
+
+    run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state,
+               RUN_ON_CPU_HOST_PTR(&s));
+
+    if (s.err) {
+        error_propagate(errp, s.err);
+        return;
+    }
+}
 
 void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
 {
@@ -229,6 +280,19 @@  uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
     }
 }
 
+static void kvmppc_xive_source_get_state(XiveSource *xsrc)
+{
+    int i;
+
+    for (i = 0; i < xsrc->nr_irqs; i++) {
+        /* Perform a load without side effect to retrieve the PQ bits */
+        uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
+
+        /* and save PQ locally */
+        xive_source_esb_set(xsrc, i, pq);
+    }
+}
+
 void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
 {
     XiveSource *xsrc = opaque;
@@ -340,6 +404,31 @@  void kvmppc_xive_reset(sPAPRXive *xive, Error **errp)
                       NULL, true, errp);
 }
 
+static void kvmppc_xive_get_queues(sPAPRXive *xive, Error **errp)
+{
+    Error *local_err = NULL;
+    int i;
+
+    for (i = 0; i < xive->nr_ends; i++) {
+        kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i,
+                                     &xive->endt[i], &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+}
+
+void kvmppc_xive_synchronize_state(sPAPRXive *xive, Error **errp)
+{
+    kvmppc_xive_source_get_state(&xive->source);
+
+    /* EAT: there is no extra state to query from KVM */
+
+    /* ENDT */
+    kvmppc_xive_get_queues(xive, errp);
+}
+
 static void *kvmppc_xive_mmap(sPAPRXive *xive, int pgoff, size_t len,
                               Error **errp)
 {
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index 0284b5803551..f478c52ab2a0 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -431,6 +431,16 @@  void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
     int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
     int i;
 
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_cpu_synchronize_state(tctx, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return;
+        }
+    }
+
     monitor_printf(mon, "CPU[%04x]:   QW   NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
                    "  W2\n", cpu_index);