[17/19] KVM: PPC: Book3S HV: add get/set accessors for the VP XIVE state

Message ID 20190107191006.10648-1-clg@kaod.org
State Changes Requested
Headers show
Series
  • KVM: PPC: Book3S HV: add XIVE native exploitation mode
Related show

Commit Message

Cédric Le Goater Jan. 7, 2019, 7:10 p.m.
At a VCPU level, the state of the thread context interrupt management
registers needs to be collected. These registers are cached under the
'xive_saved_state.w01' field of the VCPU when the VPCU context is
pulled from the HW thread. An OPAL call retrieves the backup of the
IPB register in the NVT structure and merges it in the KVM state.

The structures of the interface between QEMU and KVM provisions some
extra room (two u64) for further extensions if more state needs to be
transferred back to QEMU.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/include/asm/kvm_ppc.h    |  5 ++
 arch/powerpc/include/uapi/asm/kvm.h   |  2 +
 arch/powerpc/kvm/book3s.c             | 24 +++++++++
 arch/powerpc/kvm/book3s_xive_native.c | 78 +++++++++++++++++++++++++++
 4 files changed, 109 insertions(+)

Comments

David Gibson Feb. 4, 2019, 5:26 a.m. | #1
On Mon, Jan 07, 2019 at 08:10:04PM +0100, Cédric Le Goater wrote:
> At a VCPU level, the state of the thread context interrupt management
> registers needs to be collected. These registers are cached under the
> 'xive_saved_state.w01' field of the VCPU when the VPCU context is
> pulled from the HW thread. An OPAL call retrieves the backup of the
> IPB register in the NVT structure and merges it in the KVM state.
> 
> The structures of the interface between QEMU and KVM provisions some
> extra room (two u64) for further extensions if more state needs to be
> transferred back to QEMU.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  arch/powerpc/include/asm/kvm_ppc.h    |  5 ++
>  arch/powerpc/include/uapi/asm/kvm.h   |  2 +
>  arch/powerpc/kvm/book3s.c             | 24 +++++++++
>  arch/powerpc/kvm/book3s_xive_native.c | 78 +++++++++++++++++++++++++++
>  4 files changed, 109 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index 4cc897039485..49c488af168c 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -270,6 +270,7 @@ union kvmppc_one_reg {
>  		u64	addr;
>  		u64	length;
>  	}	vpaval;
> +	u64	xive_timaval[4];
>  };
>  
>  struct kvmppc_ops {
> @@ -603,6 +604,8 @@ extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
>  extern void kvmppc_xive_native_init_module(void);
>  extern void kvmppc_xive_native_exit_module(void);
>  extern int kvmppc_xive_native_hcall(struct kvm_vcpu *vcpu, u32 cmd);
> +extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val);
> +extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val);
>  
>  #else
>  static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
> @@ -637,6 +640,8 @@ static inline void kvmppc_xive_native_init_module(void) { }
>  static inline void kvmppc_xive_native_exit_module(void) { }
>  static inline int kvmppc_xive_native_hcall(struct kvm_vcpu *vcpu, u32 cmd)
>  	{ return 0; }
> +static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) { return 0; }
> +static inline int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) { return -ENOENT; }

IIRC "VP" is the old name for "TCTX".  Since we're using tctx in the
rest of the XIVE code, can we use it here as well.

>  #endif /* CONFIG_KVM_XIVE */
>  
> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
> index 95302558ce10..3c958c39a782 100644
> --- a/arch/powerpc/include/uapi/asm/kvm.h
> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> @@ -480,6 +480,8 @@ struct kvm_ppc_cpu_char {
>  #define  KVM_REG_PPC_ICP_PPRI_SHIFT	16	/* pending irq priority */
>  #define  KVM_REG_PPC_ICP_PPRI_MASK	0xff
>  
> +#define KVM_REG_PPC_VP_STATE	(KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x8d)
> +
>  /* Device control API: PPC-specific devices */
>  #define KVM_DEV_MPIC_GRP_MISC		1
>  #define   KVM_DEV_MPIC_BASE_ADDR	0	/* 64-bit */
> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> index de7eed191107..5ad658077a35 100644
> --- a/arch/powerpc/kvm/book3s.c
> +++ b/arch/powerpc/kvm/book3s.c
> @@ -641,6 +641,18 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
>  				*val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
>  			break;
>  #endif /* CONFIG_KVM_XICS */
> +#ifdef CONFIG_KVM_XIVE
> +		case KVM_REG_PPC_VP_STATE:
> +			if (!vcpu->arch.xive_vcpu) {
> +				r = -ENXIO;
> +				break;
> +			}
> +			if (xive_enabled())
> +				r = kvmppc_xive_native_get_vp(vcpu, val);
> +			else
> +				r = -ENXIO;
> +			break;
> +#endif /* CONFIG_KVM_XIVE */
>  		case KVM_REG_PPC_FSCR:
>  			*val = get_reg_val(id, vcpu->arch.fscr);
>  			break;
> @@ -714,6 +726,18 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
>  				r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val));
>  			break;
>  #endif /* CONFIG_KVM_XICS */
> +#ifdef CONFIG_KVM_XIVE
> +		case KVM_REG_PPC_VP_STATE:
> +			if (!vcpu->arch.xive_vcpu) {
> +				r = -ENXIO;
> +				break;
> +			}
> +			if (xive_enabled())
> +				r = kvmppc_xive_native_set_vp(vcpu, val);
> +			else
> +				r = -ENXIO;
> +			break;
> +#endif /* CONFIG_KVM_XIVE */
>  		case KVM_REG_PPC_FSCR:
>  			vcpu->arch.fscr = set_reg_val(id, *val);
>  			break;
> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
> index f4eb71eafc57..1aefb366df0b 100644
> --- a/arch/powerpc/kvm/book3s_xive_native.c
> +++ b/arch/powerpc/kvm/book3s_xive_native.c
> @@ -424,6 +424,84 @@ static int xive_native_validate_queue_size(u32 qsize)
>  	}
>  }
>  
> +#define TM_IPB_SHIFT 40
> +#define TM_IPB_MASK  (((u64) 0xFF) << TM_IPB_SHIFT)
> +
> +int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
> +{
> +	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> +	u64 opal_state;
> +	int rc;
> +
> +	if (!kvmppc_xive_enabled(vcpu))
> +		return -EPERM;
> +
> +	if (!xc)
> +		return -ENOENT;
> +
> +	/* Thread context registers. We only care about IPB and CPPR */
> +	val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01;
> +
> +	/*
> +	 * Return the OS CAM line to print out the VP identifier in
> +	 * the QEMU monitor. This is not restored.
> +	 */
> +	val->xive_timaval[1] = vcpu->arch.xive_cam_word;
> +
> +	/* Get the VP state from OPAL */
> +	rc = xive_native_get_vp_state(xc->vp_id, &opal_state);
> +	if (rc)
> +		return rc;
> +
> +	/*
> +	 * Capture the backup of IPB register in the NVT structure and
> +	 * merge it in our KVM VP state.
> +	 *
> +	 * TODO: P10 support.
> +	 */
> +	val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK);
> +
> +	pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n",
> +		 __func__,
> +		 vcpu->arch.xive_saved_state.nsr,
> +		 vcpu->arch.xive_saved_state.cppr,
> +		 vcpu->arch.xive_saved_state.ipb,
> +		 vcpu->arch.xive_saved_state.pipr,
> +		 vcpu->arch.xive_saved_state.w01,
> +		 (u32) vcpu->arch.xive_cam_word, opal_state);
> +
> +	return 0;
> +}
> +
> +int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
> +{
> +	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> +	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
> +
> +	pr_devel("%s w01=%016llx vp=%016llx\n", __func__,
> +		 val->xive_timaval[0], val->xive_timaval[1]);
> +
> +	if (!kvmppc_xive_enabled(vcpu))
> +		return -EPERM;
> +
> +	if (!xc || !xive)
> +		return -ENOENT;
> +
> +	/* We can't update the state of a "pushed" VCPU	 */
> +	if (WARN_ON(vcpu->arch.xive_pushed))
> +		return -EIO;
> +
> +	/* Thread context registers. only restore IPB and CPPR ? */
> +	vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0];
> +
> +	/*
> +	 * There is no need to restore the XIVE internal state (IPB
> +	 * stored in the NVT) as the IPB register was merged in KVM VP
> +	 * state.
> +	 */
> +	return 0;
> +}
> +
>  static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
>  					 u64 addr)
>  {
Cédric Le Goater Feb. 4, 2019, 6:57 p.m. | #2
On 2/4/19 6:26 AM, David Gibson wrote:
> On Mon, Jan 07, 2019 at 08:10:04PM +0100, Cédric Le Goater wrote:
>> At a VCPU level, the state of the thread context interrupt management
>> registers needs to be collected. These registers are cached under the
>> 'xive_saved_state.w01' field of the VCPU when the VPCU context is
>> pulled from the HW thread. An OPAL call retrieves the backup of the
>> IPB register in the NVT structure and merges it in the KVM state.
>>
>> The structures of the interface between QEMU and KVM provisions some
>> extra room (two u64) for further extensions if more state needs to be
>> transferred back to QEMU.
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>>  arch/powerpc/include/asm/kvm_ppc.h    |  5 ++
>>  arch/powerpc/include/uapi/asm/kvm.h   |  2 +
>>  arch/powerpc/kvm/book3s.c             | 24 +++++++++
>>  arch/powerpc/kvm/book3s_xive_native.c | 78 +++++++++++++++++++++++++++
>>  4 files changed, 109 insertions(+)
>>
>> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
>> index 4cc897039485..49c488af168c 100644
>> --- a/arch/powerpc/include/asm/kvm_ppc.h
>> +++ b/arch/powerpc/include/asm/kvm_ppc.h
>> @@ -270,6 +270,7 @@ union kvmppc_one_reg {
>>  		u64	addr;
>>  		u64	length;
>>  	}	vpaval;
>> +	u64	xive_timaval[4];
>>  };
>>  
>>  struct kvmppc_ops {
>> @@ -603,6 +604,8 @@ extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
>>  extern void kvmppc_xive_native_init_module(void);
>>  extern void kvmppc_xive_native_exit_module(void);
>>  extern int kvmppc_xive_native_hcall(struct kvm_vcpu *vcpu, u32 cmd);
>> +extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val);
>> +extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val);
>>  
>>  #else
>>  static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
>> @@ -637,6 +640,8 @@ static inline void kvmppc_xive_native_init_module(void) { }
>>  static inline void kvmppc_xive_native_exit_module(void) { }
>>  static inline int kvmppc_xive_native_hcall(struct kvm_vcpu *vcpu, u32 cmd)
>>  	{ return 0; }
>> +static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) { return 0; }
>> +static inline int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) { return -ENOENT; }
> 
> IIRC "VP" is the old name for "TCTX".  Since we're using tctx in the
> rest of the XIVE code, can we use it here as well.

OK. The state we are getting or setting is indeed related to the thread 
interrupt  context registers. 

The name VP is related to an identifier to some interrupt context under 
OPAL (NVT in HW to be precise).  

C.

> 
>>  #endif /* CONFIG_KVM_XIVE */
>>  
>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
>> index 95302558ce10..3c958c39a782 100644
>> --- a/arch/powerpc/include/uapi/asm/kvm.h
>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
>> @@ -480,6 +480,8 @@ struct kvm_ppc_cpu_char {
>>  #define  KVM_REG_PPC_ICP_PPRI_SHIFT	16	/* pending irq priority */
>>  #define  KVM_REG_PPC_ICP_PPRI_MASK	0xff
>>  
>> +#define KVM_REG_PPC_VP_STATE	(KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x8d)
>> +
>>  /* Device control API: PPC-specific devices */
>>  #define KVM_DEV_MPIC_GRP_MISC		1
>>  #define   KVM_DEV_MPIC_BASE_ADDR	0	/* 64-bit */
>> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
>> index de7eed191107..5ad658077a35 100644
>> --- a/arch/powerpc/kvm/book3s.c
>> +++ b/arch/powerpc/kvm/book3s.c
>> @@ -641,6 +641,18 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
>>  				*val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
>>  			break;
>>  #endif /* CONFIG_KVM_XICS */
>> +#ifdef CONFIG_KVM_XIVE
>> +		case KVM_REG_PPC_VP_STATE:
>> +			if (!vcpu->arch.xive_vcpu) {
>> +				r = -ENXIO;
>> +				break;
>> +			}
>> +			if (xive_enabled())
>> +				r = kvmppc_xive_native_get_vp(vcpu, val);
>> +			else
>> +				r = -ENXIO;
>> +			break;
>> +#endif /* CONFIG_KVM_XIVE */
>>  		case KVM_REG_PPC_FSCR:
>>  			*val = get_reg_val(id, vcpu->arch.fscr);
>>  			break;
>> @@ -714,6 +726,18 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
>>  				r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val));
>>  			break;
>>  #endif /* CONFIG_KVM_XICS */
>> +#ifdef CONFIG_KVM_XIVE
>> +		case KVM_REG_PPC_VP_STATE:
>> +			if (!vcpu->arch.xive_vcpu) {
>> +				r = -ENXIO;
>> +				break;
>> +			}
>> +			if (xive_enabled())
>> +				r = kvmppc_xive_native_set_vp(vcpu, val);
>> +			else
>> +				r = -ENXIO;
>> +			break;
>> +#endif /* CONFIG_KVM_XIVE */
>>  		case KVM_REG_PPC_FSCR:
>>  			vcpu->arch.fscr = set_reg_val(id, *val);
>>  			break;
>> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
>> index f4eb71eafc57..1aefb366df0b 100644
>> --- a/arch/powerpc/kvm/book3s_xive_native.c
>> +++ b/arch/powerpc/kvm/book3s_xive_native.c
>> @@ -424,6 +424,84 @@ static int xive_native_validate_queue_size(u32 qsize)
>>  	}
>>  }
>>  
>> +#define TM_IPB_SHIFT 40
>> +#define TM_IPB_MASK  (((u64) 0xFF) << TM_IPB_SHIFT)
>> +
>> +int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
>> +{
>> +	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> +	u64 opal_state;
>> +	int rc;
>> +
>> +	if (!kvmppc_xive_enabled(vcpu))
>> +		return -EPERM;
>> +
>> +	if (!xc)
>> +		return -ENOENT;
>> +
>> +	/* Thread context registers. We only care about IPB and CPPR */
>> +	val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01;
>> +
>> +	/*
>> +	 * Return the OS CAM line to print out the VP identifier in
>> +	 * the QEMU monitor. This is not restored.
>> +	 */
>> +	val->xive_timaval[1] = vcpu->arch.xive_cam_word;
>> +
>> +	/* Get the VP state from OPAL */
>> +	rc = xive_native_get_vp_state(xc->vp_id, &opal_state);
>> +	if (rc)
>> +		return rc;
>> +
>> +	/*
>> +	 * Capture the backup of IPB register in the NVT structure and
>> +	 * merge it in our KVM VP state.
>> +	 *
>> +	 * TODO: P10 support.
>> +	 */
>> +	val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK);
>> +
>> +	pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n",
>> +		 __func__,
>> +		 vcpu->arch.xive_saved_state.nsr,
>> +		 vcpu->arch.xive_saved_state.cppr,
>> +		 vcpu->arch.xive_saved_state.ipb,
>> +		 vcpu->arch.xive_saved_state.pipr,
>> +		 vcpu->arch.xive_saved_state.w01,
>> +		 (u32) vcpu->arch.xive_cam_word, opal_state);
>> +
>> +	return 0;
>> +}
>> +
>> +int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
>> +{
>> +	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> +	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
>> +
>> +	pr_devel("%s w01=%016llx vp=%016llx\n", __func__,
>> +		 val->xive_timaval[0], val->xive_timaval[1]);
>> +
>> +	if (!kvmppc_xive_enabled(vcpu))
>> +		return -EPERM;
>> +
>> +	if (!xc || !xive)
>> +		return -ENOENT;
>> +
>> +	/* We can't update the state of a "pushed" VCPU	 */
>> +	if (WARN_ON(vcpu->arch.xive_pushed))
>> +		return -EIO;
>> +
>> +	/* Thread context registers. only restore IPB and CPPR ? */
>> +	vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0];
>> +
>> +	/*
>> +	 * There is no need to restore the XIVE internal state (IPB
>> +	 * stored in the NVT) as the IPB register was merged in KVM VP
>> +	 * state.
>> +	 */
>> +	return 0;
>> +}
>> +
>>  static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
>>  					 u64 addr)
>>  {
>
David Gibson Feb. 5, 2019, 5:33 a.m. | #3
On Mon, Feb 04, 2019 at 07:57:26PM +0100, Cédric Le Goater wrote:
> On 2/4/19 6:26 AM, David Gibson wrote:
> > On Mon, Jan 07, 2019 at 08:10:04PM +0100, Cédric Le Goater wrote:
> >> At a VCPU level, the state of the thread context interrupt management
> >> registers needs to be collected. These registers are cached under the
> >> 'xive_saved_state.w01' field of the VCPU when the VPCU context is
> >> pulled from the HW thread. An OPAL call retrieves the backup of the
> >> IPB register in the NVT structure and merges it in the KVM state.
> >>
> >> The structures of the interface between QEMU and KVM provisions some
> >> extra room (two u64) for further extensions if more state needs to be
> >> transferred back to QEMU.
> >>
> >> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >> ---
> >>  arch/powerpc/include/asm/kvm_ppc.h    |  5 ++
> >>  arch/powerpc/include/uapi/asm/kvm.h   |  2 +
> >>  arch/powerpc/kvm/book3s.c             | 24 +++++++++
> >>  arch/powerpc/kvm/book3s_xive_native.c | 78 +++++++++++++++++++++++++++
> >>  4 files changed, 109 insertions(+)
> >>
> >> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> >> index 4cc897039485..49c488af168c 100644
> >> --- a/arch/powerpc/include/asm/kvm_ppc.h
> >> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> >> @@ -270,6 +270,7 @@ union kvmppc_one_reg {
> >>  		u64	addr;
> >>  		u64	length;
> >>  	}	vpaval;
> >> +	u64	xive_timaval[4];
> >>  };
> >>  
> >>  struct kvmppc_ops {
> >> @@ -603,6 +604,8 @@ extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
> >>  extern void kvmppc_xive_native_init_module(void);
> >>  extern void kvmppc_xive_native_exit_module(void);
> >>  extern int kvmppc_xive_native_hcall(struct kvm_vcpu *vcpu, u32 cmd);
> >> +extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val);
> >> +extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val);
> >>  
> >>  #else
> >>  static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
> >> @@ -637,6 +640,8 @@ static inline void kvmppc_xive_native_init_module(void) { }
> >>  static inline void kvmppc_xive_native_exit_module(void) { }
> >>  static inline int kvmppc_xive_native_hcall(struct kvm_vcpu *vcpu, u32 cmd)
> >>  	{ return 0; }
> >> +static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) { return 0; }
> >> +static inline int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) { return -ENOENT; }
> > 
> > IIRC "VP" is the old name for "TCTX".  Since we're using tctx in the
> > rest of the XIVE code, can we use it here as well.
> 
> OK. The state we are getting or setting is indeed related to the thread 
> interrupt  context registers. 
> 
> The name VP is related to an identifier to some interrupt context under 
> OPAL (NVT in HW to be precise).

Oh, sorry, "NVT" was the name I was looking for, not "TCTX".  But in
any case, please lets standardize on one.
Cédric Le Goater Feb. 5, 2019, 11:58 a.m. | #4
On 2/5/19 6:33 AM, David Gibson wrote:
> On Mon, Feb 04, 2019 at 07:57:26PM +0100, Cédric Le Goater wrote:
>> On 2/4/19 6:26 AM, David Gibson wrote:
>>> On Mon, Jan 07, 2019 at 08:10:04PM +0100, Cédric Le Goater wrote:
>>>> At a VCPU level, the state of the thread context interrupt management
>>>> registers needs to be collected. These registers are cached under the
>>>> 'xive_saved_state.w01' field of the VCPU when the VPCU context is
>>>> pulled from the HW thread. An OPAL call retrieves the backup of the
>>>> IPB register in the NVT structure and merges it in the KVM state.
>>>>
>>>> The structures of the interface between QEMU and KVM provisions some
>>>> extra room (two u64) for further extensions if more state needs to be
>>>> transferred back to QEMU.
>>>>
>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>> ---
>>>>  arch/powerpc/include/asm/kvm_ppc.h    |  5 ++
>>>>  arch/powerpc/include/uapi/asm/kvm.h   |  2 +
>>>>  arch/powerpc/kvm/book3s.c             | 24 +++++++++
>>>>  arch/powerpc/kvm/book3s_xive_native.c | 78 +++++++++++++++++++++++++++
>>>>  4 files changed, 109 insertions(+)
>>>>
>>>> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
>>>> index 4cc897039485..49c488af168c 100644
>>>> --- a/arch/powerpc/include/asm/kvm_ppc.h
>>>> +++ b/arch/powerpc/include/asm/kvm_ppc.h
>>>> @@ -270,6 +270,7 @@ union kvmppc_one_reg {
>>>>  		u64	addr;
>>>>  		u64	length;
>>>>  	}	vpaval;
>>>> +	u64	xive_timaval[4];
>>>>  };
>>>>  
>>>>  struct kvmppc_ops {
>>>> @@ -603,6 +604,8 @@ extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
>>>>  extern void kvmppc_xive_native_init_module(void);
>>>>  extern void kvmppc_xive_native_exit_module(void);
>>>>  extern int kvmppc_xive_native_hcall(struct kvm_vcpu *vcpu, u32 cmd);
>>>> +extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val);
>>>> +extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val);
>>>>  
>>>>  #else
>>>>  static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
>>>> @@ -637,6 +640,8 @@ static inline void kvmppc_xive_native_init_module(void) { }
>>>>  static inline void kvmppc_xive_native_exit_module(void) { }
>>>>  static inline int kvmppc_xive_native_hcall(struct kvm_vcpu *vcpu, u32 cmd)
>>>>  	{ return 0; }
>>>> +static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) { return 0; }
>>>> +static inline int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) { return -ENOENT; }
>>>
>>> IIRC "VP" is the old name for "TCTX".  Since we're using tctx in the
>>> rest of the XIVE code, can we use it here as well.
>>
>> OK. The state we are getting or setting is indeed related to the thread 
>> interrupt  context registers. 
>>
>> The name VP is related to an identifier to some interrupt context under 
>> OPAL (NVT in HW to be precise).
> 
> Oh, sorry, "NVT" was the name I was looking for, not "TCTX".  But in
> any case, please lets standardize on one.

There is some confusion in the naming for :

 - VP    Virtual Processor (XIVE 1)
 - VPD   Virtual Processor Descriptor (XIVE 1)
 - TCTX  Thread interrupt context registers
 - NVT   Notify Virtual Target. Former VP. 
 - NVTS  Notify Virtual Target Structure. Where the TCTX regs are cached.


I am fine with using NVT because this is indeed the name of the XIVE 
structure where the HW caches the thread interrupt context registers.

But the XIVE native layer and the XICS-over-XIVE KVM device use the
name VP (the old one). I don't think we want to change these now.

C.
David Gibson Feb. 6, 2019, 1:19 a.m. | #5
On Tue, Feb 05, 2019 at 12:58:54PM +0100, Cédric Le Goater wrote:
> On 2/5/19 6:33 AM, David Gibson wrote:
> > On Mon, Feb 04, 2019 at 07:57:26PM +0100, Cédric Le Goater wrote:
> >> On 2/4/19 6:26 AM, David Gibson wrote:
> >>> On Mon, Jan 07, 2019 at 08:10:04PM +0100, Cédric Le Goater wrote:
> >>>> At a VCPU level, the state of the thread context interrupt management
> >>>> registers needs to be collected. These registers are cached under the
> >>>> 'xive_saved_state.w01' field of the VCPU when the VPCU context is
> >>>> pulled from the HW thread. An OPAL call retrieves the backup of the
> >>>> IPB register in the NVT structure and merges it in the KVM state.
> >>>>
> >>>> The structures of the interface between QEMU and KVM provisions some
> >>>> extra room (two u64) for further extensions if more state needs to be
> >>>> transferred back to QEMU.
> >>>>
> >>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >>>> ---
> >>>>  arch/powerpc/include/asm/kvm_ppc.h    |  5 ++
> >>>>  arch/powerpc/include/uapi/asm/kvm.h   |  2 +
> >>>>  arch/powerpc/kvm/book3s.c             | 24 +++++++++
> >>>>  arch/powerpc/kvm/book3s_xive_native.c | 78 +++++++++++++++++++++++++++
> >>>>  4 files changed, 109 insertions(+)
> >>>>
> >>>> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> >>>> index 4cc897039485..49c488af168c 100644
> >>>> --- a/arch/powerpc/include/asm/kvm_ppc.h
> >>>> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> >>>> @@ -270,6 +270,7 @@ union kvmppc_one_reg {
> >>>>  		u64	addr;
> >>>>  		u64	length;
> >>>>  	}	vpaval;
> >>>> +	u64	xive_timaval[4];
> >>>>  };
> >>>>  
> >>>>  struct kvmppc_ops {
> >>>> @@ -603,6 +604,8 @@ extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
> >>>>  extern void kvmppc_xive_native_init_module(void);
> >>>>  extern void kvmppc_xive_native_exit_module(void);
> >>>>  extern int kvmppc_xive_native_hcall(struct kvm_vcpu *vcpu, u32 cmd);
> >>>> +extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val);
> >>>> +extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val);
> >>>>  
> >>>>  #else
> >>>>  static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
> >>>> @@ -637,6 +640,8 @@ static inline void kvmppc_xive_native_init_module(void) { }
> >>>>  static inline void kvmppc_xive_native_exit_module(void) { }
> >>>>  static inline int kvmppc_xive_native_hcall(struct kvm_vcpu *vcpu, u32 cmd)
> >>>>  	{ return 0; }
> >>>> +static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) { return 0; }
> >>>> +static inline int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) { return -ENOENT; }
> >>>
> >>> IIRC "VP" is the old name for "TCTX".  Since we're using tctx in the
> >>> rest of the XIVE code, can we use it here as well.
> >>
> >> OK. The state we are getting or setting is indeed related to the thread 
> >> interrupt  context registers. 
> >>
> >> The name VP is related to an identifier to some interrupt context under 
> >> OPAL (NVT in HW to be precise).
> > 
> > Oh, sorry, "NVT" was the name I was looking for, not "TCTX".  But in
> > any case, please lets standardize on one.
> 
> There is some confusion in the naming for :
> 
>  - VP    Virtual Processor (XIVE 1)
>  - VPD   Virtual Processor Descriptor (XIVE 1)
>  - TCTX  Thread interrupt context registers
>  - NVT   Notify Virtual Target. Former VP. 
>  - NVTS  Notify Virtual Target Structure. Where the TCTX regs are cached.
> 
> 
> I am fine with using NVT because this is indeed the name of the XIVE 
> structure where the HW caches the thread interrupt context registers.
> 
> But the XIVE native layer and the XICS-over-XIVE KVM device use the
> name VP (the old one). I don't think we want to change these now.

Ah, right.  It now occurs to me that the place I've already seen NVT
used is in the qemu code, whereas this is kernel.  In that case
sticking to VP here makes sense.

Patch

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 4cc897039485..49c488af168c 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -270,6 +270,7 @@  union kvmppc_one_reg {
 		u64	addr;
 		u64	length;
 	}	vpaval;
+	u64	xive_timaval[4];
 };
 
 struct kvmppc_ops {
@@ -603,6 +604,8 @@  extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
 extern void kvmppc_xive_native_init_module(void);
 extern void kvmppc_xive_native_exit_module(void);
 extern int kvmppc_xive_native_hcall(struct kvm_vcpu *vcpu, u32 cmd);
+extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val);
+extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val);
 
 #else
 static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
@@ -637,6 +640,8 @@  static inline void kvmppc_xive_native_init_module(void) { }
 static inline void kvmppc_xive_native_exit_module(void) { }
 static inline int kvmppc_xive_native_hcall(struct kvm_vcpu *vcpu, u32 cmd)
 	{ return 0; }
+static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) { return 0; }
+static inline int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) { return -ENOENT; }
 
 #endif /* CONFIG_KVM_XIVE */
 
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 95302558ce10..3c958c39a782 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -480,6 +480,8 @@  struct kvm_ppc_cpu_char {
 #define  KVM_REG_PPC_ICP_PPRI_SHIFT	16	/* pending irq priority */
 #define  KVM_REG_PPC_ICP_PPRI_MASK	0xff
 
+#define KVM_REG_PPC_VP_STATE	(KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x8d)
+
 /* Device control API: PPC-specific devices */
 #define KVM_DEV_MPIC_GRP_MISC		1
 #define   KVM_DEV_MPIC_BASE_ADDR	0	/* 64-bit */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index de7eed191107..5ad658077a35 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -641,6 +641,18 @@  int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
 				*val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
 			break;
 #endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+		case KVM_REG_PPC_VP_STATE:
+			if (!vcpu->arch.xive_vcpu) {
+				r = -ENXIO;
+				break;
+			}
+			if (xive_enabled())
+				r = kvmppc_xive_native_get_vp(vcpu, val);
+			else
+				r = -ENXIO;
+			break;
+#endif /* CONFIG_KVM_XIVE */
 		case KVM_REG_PPC_FSCR:
 			*val = get_reg_val(id, vcpu->arch.fscr);
 			break;
@@ -714,6 +726,18 @@  int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
 				r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val));
 			break;
 #endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+		case KVM_REG_PPC_VP_STATE:
+			if (!vcpu->arch.xive_vcpu) {
+				r = -ENXIO;
+				break;
+			}
+			if (xive_enabled())
+				r = kvmppc_xive_native_set_vp(vcpu, val);
+			else
+				r = -ENXIO;
+			break;
+#endif /* CONFIG_KVM_XIVE */
 		case KVM_REG_PPC_FSCR:
 			vcpu->arch.fscr = set_reg_val(id, *val);
 			break;
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index f4eb71eafc57..1aefb366df0b 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -424,6 +424,84 @@  static int xive_native_validate_queue_size(u32 qsize)
 	}
 }
 
+#define TM_IPB_SHIFT 40
+#define TM_IPB_MASK  (((u64) 0xFF) << TM_IPB_SHIFT)
+
+int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	u64 opal_state;
+	int rc;
+
+	if (!kvmppc_xive_enabled(vcpu))
+		return -EPERM;
+
+	if (!xc)
+		return -ENOENT;
+
+	/* Thread context registers. We only care about IPB and CPPR */
+	val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01;
+
+	/*
+	 * Return the OS CAM line to print out the VP identifier in
+	 * the QEMU monitor. This is not restored.
+	 */
+	val->xive_timaval[1] = vcpu->arch.xive_cam_word;
+
+	/* Get the VP state from OPAL */
+	rc = xive_native_get_vp_state(xc->vp_id, &opal_state);
+	if (rc)
+		return rc;
+
+	/*
+	 * Capture the backup of IPB register in the NVT structure and
+	 * merge it in our KVM VP state.
+	 *
+	 * TODO: P10 support.
+	 */
+	val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK);
+
+	pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n",
+		 __func__,
+		 vcpu->arch.xive_saved_state.nsr,
+		 vcpu->arch.xive_saved_state.cppr,
+		 vcpu->arch.xive_saved_state.ipb,
+		 vcpu->arch.xive_saved_state.pipr,
+		 vcpu->arch.xive_saved_state.w01,
+		 (u32) vcpu->arch.xive_cam_word, opal_state);
+
+	return 0;
+}
+
+int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+
+	pr_devel("%s w01=%016llx vp=%016llx\n", __func__,
+		 val->xive_timaval[0], val->xive_timaval[1]);
+
+	if (!kvmppc_xive_enabled(vcpu))
+		return -EPERM;
+
+	if (!xc || !xive)
+		return -ENOENT;
+
+	/* We can't update the state of a "pushed" VCPU	 */
+	if (WARN_ON(vcpu->arch.xive_pushed))
+		return -EIO;
+
+	/* Thread context registers. only restore IPB and CPPR ? */
+	vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0];
+
+	/*
+	 * There is no need to restore the XIVE internal state (IPB
+	 * stored in the NVT) as the IPB register was merged in KVM VP
+	 * state.
+	 */
+	return 0;
+}
+
 static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
 					 u64 addr)
 {