[15/19] KVM: PPC: Book3S HV: add get/set accessors for the source configuration
diff mbox series

Message ID 20190107184331.8429-16-clg@kaod.org
State Changes Requested
Headers show
Series
  • KVM: PPC: Book3S HV: add XIVE native exploitation mode
Related show

Commit Message

Cédric Le Goater Jan. 7, 2019, 6:43 p.m. UTC
Theses are use to capure the XIVE EAS table of the KVM device, the
configuration of the source targets.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/include/uapi/asm/kvm.h   | 11 ++++
 arch/powerpc/kvm/book3s_xive_native.c | 87 +++++++++++++++++++++++++++
 2 files changed, 98 insertions(+)

Comments

David Gibson Feb. 4, 2019, 5:21 a.m. UTC | #1
On Mon, Jan 07, 2019 at 07:43:27PM +0100, Cédric Le Goater wrote:
> Theses are use to capure the XIVE EAS table of the KVM device, the
> configuration of the source targets.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  arch/powerpc/include/uapi/asm/kvm.h   | 11 ++++
>  arch/powerpc/kvm/book3s_xive_native.c | 87 +++++++++++++++++++++++++++
>  2 files changed, 98 insertions(+)
> 
> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
> index 1a8740629acf..faf024f39858 100644
> --- a/arch/powerpc/include/uapi/asm/kvm.h
> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> @@ -683,9 +683,20 @@ struct kvm_ppc_cpu_char {
>  #define   KVM_DEV_XIVE_SAVE_EQ_PAGES	4
>  #define KVM_DEV_XIVE_GRP_SOURCES	2	/* 64-bit source attributes */
>  #define KVM_DEV_XIVE_GRP_SYNC		3	/* 64-bit source attributes */
> +#define KVM_DEV_XIVE_GRP_EAS		4	/* 64-bit eas attributes */
>  
>  /* Layout of 64-bit XIVE source attribute values */
>  #define KVM_XIVE_LEVEL_SENSITIVE	(1ULL << 0)
>  #define KVM_XIVE_LEVEL_ASSERTED		(1ULL << 1)
>  
> +/* Layout of 64-bit eas attribute values */
> +#define KVM_XIVE_EAS_PRIORITY_SHIFT	0
> +#define KVM_XIVE_EAS_PRIORITY_MASK	0x7
> +#define KVM_XIVE_EAS_SERVER_SHIFT	3
> +#define KVM_XIVE_EAS_SERVER_MASK	0xfffffff8ULL
> +#define KVM_XIVE_EAS_MASK_SHIFT		32
> +#define KVM_XIVE_EAS_MASK_MASK		0x100000000ULL
> +#define KVM_XIVE_EAS_EISN_SHIFT		33
> +#define KVM_XIVE_EAS_EISN_MASK		0xfffffffe00000000ULL
> +
>  #endif /* __LINUX_KVM_POWERPC_H */
> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
> index f2de1bcf3b35..0468b605baa7 100644
> --- a/arch/powerpc/kvm/book3s_xive_native.c
> +++ b/arch/powerpc/kvm/book3s_xive_native.c
> @@ -525,6 +525,88 @@ static int kvmppc_xive_native_sync(struct kvmppc_xive *xive, long irq, u64 addr)
>  	return 0;
>  }
>  
> +static int kvmppc_xive_native_set_eas(struct kvmppc_xive *xive, long irq,
> +				      u64 addr)

I'd prefer to avoid the name "EAS" here.  IIUC these aren't "raw" EAS
values, but rather essentially the "source config" in the terminology
of the PAPR hcalls.  Which, yes, is basically implemented by setting
the EAS, but since it's the PAPR architected state that we need to
preserve across migration, I'd prefer to stick as close as we can to
the PAPR terminology.

> +{
> +	struct kvmppc_xive_src_block *sb;
> +	struct kvmppc_xive_irq_state *state;
> +	u64 __user *ubufp = (u64 __user *) addr;
> +	u16 src;
> +	u64 kvm_eas;
> +	u32 server;
> +	u8 priority;
> +	u32 eisn;
> +
> +	sb = kvmppc_xive_find_source(xive, irq, &src);
> +	if (!sb)
> +		return -ENOENT;
> +
> +	state = &sb->irq_state[src];
> +
> +	if (!state->valid)
> +		return -EINVAL;
> +
> +	if (get_user(kvm_eas, ubufp))
> +		return -EFAULT;
> +
> +	pr_devel("%s irq=0x%lx eas=%016llx\n", __func__, irq, kvm_eas);
> +
> +	priority = (kvm_eas & KVM_XIVE_EAS_PRIORITY_MASK) >>
> +		KVM_XIVE_EAS_PRIORITY_SHIFT;
> +	server = (kvm_eas & KVM_XIVE_EAS_SERVER_MASK) >>
> +		KVM_XIVE_EAS_SERVER_SHIFT;
> +	eisn = (kvm_eas & KVM_XIVE_EAS_EISN_MASK) >> KVM_XIVE_EAS_EISN_SHIFT;
> +
> +	if (priority != xive_prio_from_guest(priority)) {
> +		pr_err("invalid priority for queue %d for VCPU %d\n",
> +		       priority, server);
> +		return -EINVAL;
> +	}
> +
> +	return kvmppc_xive_native_set_source_config(xive, sb, state, server,
> +						    priority, eisn);
> +}
> +
> +static int kvmppc_xive_native_get_eas(struct kvmppc_xive *xive, long irq,
> +				      u64 addr)
> +{
> +	struct kvmppc_xive_src_block *sb;
> +	struct kvmppc_xive_irq_state *state;
> +	u64 __user *ubufp = (u64 __user *) addr;
> +	u16 src;
> +	u64 kvm_eas;
> +
> +	sb = kvmppc_xive_find_source(xive, irq, &src);
> +	if (!sb)
> +		return -ENOENT;
> +
> +	state = &sb->irq_state[src];
> +
> +	if (!state->valid)
> +		return -EINVAL;
> +
> +	arch_spin_lock(&sb->lock);
> +
> +	if (state->act_priority == MASKED)
> +		kvm_eas = KVM_XIVE_EAS_MASK_MASK;
> +	else {
> +		kvm_eas = (state->act_priority << KVM_XIVE_EAS_PRIORITY_SHIFT) &
> +			KVM_XIVE_EAS_PRIORITY_MASK;
> +		kvm_eas |= (state->act_server << KVM_XIVE_EAS_SERVER_SHIFT) &
> +			KVM_XIVE_EAS_SERVER_MASK;
> +		kvm_eas |= ((u64) state->eisn << KVM_XIVE_EAS_EISN_SHIFT) &
> +			KVM_XIVE_EAS_EISN_MASK;
> +	}
> +	arch_spin_unlock(&sb->lock);
> +
> +	pr_devel("%s irq=0x%lx eas=%016llx\n", __func__, irq, kvm_eas);
> +
> +	if (put_user(kvm_eas, ubufp))
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +
>  static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
>  				       struct kvm_device_attr *attr)
>  {
> @@ -544,6 +626,8 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
>  						     attr->addr);
>  	case KVM_DEV_XIVE_GRP_SYNC:
>  		return kvmppc_xive_native_sync(xive, attr->attr, attr->addr);
> +	case KVM_DEV_XIVE_GRP_EAS:
> +		return kvmppc_xive_native_set_eas(xive, attr->attr, attr->addr);
>  	}
>  	return -ENXIO;
>  }
> @@ -564,6 +648,8 @@ static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
>  			return kvmppc_xive_native_get_vc_base(xive, attr->addr);
>  		}
>  		break;
> +	case KVM_DEV_XIVE_GRP_EAS:
> +		return kvmppc_xive_native_get_eas(xive, attr->attr, attr->addr);
>  	}
>  	return -ENXIO;
>  }
> @@ -583,6 +669,7 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
>  		break;
>  	case KVM_DEV_XIVE_GRP_SOURCES:
>  	case KVM_DEV_XIVE_GRP_SYNC:
> +	case KVM_DEV_XIVE_GRP_EAS:
>  		if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
>  		    attr->attr < KVMPPC_XIVE_NR_IRQS)
>  			return 0;
Cédric Le Goater Feb. 4, 2019, 4:07 p.m. UTC | #2
On 2/4/19 6:21 AM, David Gibson wrote:
> On Mon, Jan 07, 2019 at 07:43:27PM +0100, Cédric Le Goater wrote:
>> Theses are use to capure the XIVE EAS table of the KVM device, the
>> configuration of the source targets.
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>>  arch/powerpc/include/uapi/asm/kvm.h   | 11 ++++
>>  arch/powerpc/kvm/book3s_xive_native.c | 87 +++++++++++++++++++++++++++
>>  2 files changed, 98 insertions(+)
>>
>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
>> index 1a8740629acf..faf024f39858 100644
>> --- a/arch/powerpc/include/uapi/asm/kvm.h
>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
>> @@ -683,9 +683,20 @@ struct kvm_ppc_cpu_char {
>>  #define   KVM_DEV_XIVE_SAVE_EQ_PAGES	4
>>  #define KVM_DEV_XIVE_GRP_SOURCES	2	/* 64-bit source attributes */
>>  #define KVM_DEV_XIVE_GRP_SYNC		3	/* 64-bit source attributes */
>> +#define KVM_DEV_XIVE_GRP_EAS		4	/* 64-bit eas attributes */
>>  
>>  /* Layout of 64-bit XIVE source attribute values */
>>  #define KVM_XIVE_LEVEL_SENSITIVE	(1ULL << 0)
>>  #define KVM_XIVE_LEVEL_ASSERTED		(1ULL << 1)
>>  
>> +/* Layout of 64-bit eas attribute values */
>> +#define KVM_XIVE_EAS_PRIORITY_SHIFT	0
>> +#define KVM_XIVE_EAS_PRIORITY_MASK	0x7
>> +#define KVM_XIVE_EAS_SERVER_SHIFT	3
>> +#define KVM_XIVE_EAS_SERVER_MASK	0xfffffff8ULL
>> +#define KVM_XIVE_EAS_MASK_SHIFT		32
>> +#define KVM_XIVE_EAS_MASK_MASK		0x100000000ULL
>> +#define KVM_XIVE_EAS_EISN_SHIFT		33
>> +#define KVM_XIVE_EAS_EISN_MASK		0xfffffffe00000000ULL
>> +
>>  #endif /* __LINUX_KVM_POWERPC_H */
>> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
>> index f2de1bcf3b35..0468b605baa7 100644
>> --- a/arch/powerpc/kvm/book3s_xive_native.c
>> +++ b/arch/powerpc/kvm/book3s_xive_native.c
>> @@ -525,6 +525,88 @@ static int kvmppc_xive_native_sync(struct kvmppc_xive *xive, long irq, u64 addr)
>>  	return 0;
>>  }
>>  
>> +static int kvmppc_xive_native_set_eas(struct kvmppc_xive *xive, long irq,
>> +				      u64 addr)
> 
> I'd prefer to avoid the name "EAS" here.  IIUC these aren't "raw" EAS
> values, but rather essentially the "source config" in the terminology
> of the PAPR hcalls.  Which, yes, is basically implemented by setting
> the EAS, but since it's the PAPR architected state that we need to
> preserve across migration, I'd prefer to stick as close as we can to
> the PAPR terminology.

But we don't have an equivalent name in the PAPR specs for the tuple 
(prio, server). We could use the generic 'target' name may be ? even 
if this is usually referring to a CPU number.

Or, IVE (Interrupt Vector Entry) ? which makes some sense. 
This is was the former name in HW. I think we recycle it for KVM. 
 
C.  

> 
>> +{
>> +	struct kvmppc_xive_src_block *sb;
>> +	struct kvmppc_xive_irq_state *state;
>> +	u64 __user *ubufp = (u64 __user *) addr;
>> +	u16 src;
>> +	u64 kvm_eas;
>> +	u32 server;
>> +	u8 priority;
>> +	u32 eisn;
>> +
>> +	sb = kvmppc_xive_find_source(xive, irq, &src);
>> +	if (!sb)
>> +		return -ENOENT;
>> +
>> +	state = &sb->irq_state[src];
>> +
>> +	if (!state->valid)
>> +		return -EINVAL;
>> +
>> +	if (get_user(kvm_eas, ubufp))
>> +		return -EFAULT;
>> +
>> +	pr_devel("%s irq=0x%lx eas=%016llx\n", __func__, irq, kvm_eas);
>> +
>> +	priority = (kvm_eas & KVM_XIVE_EAS_PRIORITY_MASK) >>
>> +		KVM_XIVE_EAS_PRIORITY_SHIFT;
>> +	server = (kvm_eas & KVM_XIVE_EAS_SERVER_MASK) >>
>> +		KVM_XIVE_EAS_SERVER_SHIFT;
>> +	eisn = (kvm_eas & KVM_XIVE_EAS_EISN_MASK) >> KVM_XIVE_EAS_EISN_SHIFT;
>> +
>> +	if (priority != xive_prio_from_guest(priority)) {
>> +		pr_err("invalid priority for queue %d for VCPU %d\n",
>> +		       priority, server);
>> +		return -EINVAL;
>> +	}
>> +
>> +	return kvmppc_xive_native_set_source_config(xive, sb, state, server,
>> +						    priority, eisn);
>> +}
>> +
>> +static int kvmppc_xive_native_get_eas(struct kvmppc_xive *xive, long irq,
>> +				      u64 addr)
>> +{
>> +	struct kvmppc_xive_src_block *sb;
>> +	struct kvmppc_xive_irq_state *state;
>> +	u64 __user *ubufp = (u64 __user *) addr;
>> +	u16 src;
>> +	u64 kvm_eas;
>> +
>> +	sb = kvmppc_xive_find_source(xive, irq, &src);
>> +	if (!sb)
>> +		return -ENOENT;
>> +
>> +	state = &sb->irq_state[src];
>> +
>> +	if (!state->valid)
>> +		return -EINVAL;
>> +
>> +	arch_spin_lock(&sb->lock);
>> +
>> +	if (state->act_priority == MASKED)
>> +		kvm_eas = KVM_XIVE_EAS_MASK_MASK;
>> +	else {
>> +		kvm_eas = (state->act_priority << KVM_XIVE_EAS_PRIORITY_SHIFT) &
>> +			KVM_XIVE_EAS_PRIORITY_MASK;
>> +		kvm_eas |= (state->act_server << KVM_XIVE_EAS_SERVER_SHIFT) &
>> +			KVM_XIVE_EAS_SERVER_MASK;
>> +		kvm_eas |= ((u64) state->eisn << KVM_XIVE_EAS_EISN_SHIFT) &
>> +			KVM_XIVE_EAS_EISN_MASK;
>> +	}
>> +	arch_spin_unlock(&sb->lock);
>> +
>> +	pr_devel("%s irq=0x%lx eas=%016llx\n", __func__, irq, kvm_eas);
>> +
>> +	if (put_user(kvm_eas, ubufp))
>> +		return -EFAULT;
>> +
>> +	return 0;
>> +}
>> +
>>  static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
>>  				       struct kvm_device_attr *attr)
>>  {
>> @@ -544,6 +626,8 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
>>  						     attr->addr);
>>  	case KVM_DEV_XIVE_GRP_SYNC:
>>  		return kvmppc_xive_native_sync(xive, attr->attr, attr->addr);
>> +	case KVM_DEV_XIVE_GRP_EAS:
>> +		return kvmppc_xive_native_set_eas(xive, attr->attr, attr->addr);
>>  	}
>>  	return -ENXIO;
>>  }
>> @@ -564,6 +648,8 @@ static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
>>  			return kvmppc_xive_native_get_vc_base(xive, attr->addr);
>>  		}
>>  		break;
>> +	case KVM_DEV_XIVE_GRP_EAS:
>> +		return kvmppc_xive_native_get_eas(xive, attr->attr, attr->addr);
>>  	}
>>  	return -ENXIO;
>>  }
>> @@ -583,6 +669,7 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
>>  		break;
>>  	case KVM_DEV_XIVE_GRP_SOURCES:
>>  	case KVM_DEV_XIVE_GRP_SYNC:
>> +	case KVM_DEV_XIVE_GRP_EAS:
>>  		if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
>>  		    attr->attr < KVMPPC_XIVE_NR_IRQS)
>>  			return 0;
>
David Gibson Feb. 5, 2019, 5:32 a.m. UTC | #3
On Mon, Feb 04, 2019 at 05:07:28PM +0100, Cédric Le Goater wrote:
> On 2/4/19 6:21 AM, David Gibson wrote:
> > On Mon, Jan 07, 2019 at 07:43:27PM +0100, Cédric Le Goater wrote:
> >> Theses are use to capure the XIVE EAS table of the KVM device, the
> >> configuration of the source targets.
> >>
> >> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >> ---
> >>  arch/powerpc/include/uapi/asm/kvm.h   | 11 ++++
> >>  arch/powerpc/kvm/book3s_xive_native.c | 87 +++++++++++++++++++++++++++
> >>  2 files changed, 98 insertions(+)
> >>
> >> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
> >> index 1a8740629acf..faf024f39858 100644
> >> --- a/arch/powerpc/include/uapi/asm/kvm.h
> >> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> >> @@ -683,9 +683,20 @@ struct kvm_ppc_cpu_char {
> >>  #define   KVM_DEV_XIVE_SAVE_EQ_PAGES	4
> >>  #define KVM_DEV_XIVE_GRP_SOURCES	2	/* 64-bit source attributes */
> >>  #define KVM_DEV_XIVE_GRP_SYNC		3	/* 64-bit source attributes */
> >> +#define KVM_DEV_XIVE_GRP_EAS		4	/* 64-bit eas attributes */
> >>  
> >>  /* Layout of 64-bit XIVE source attribute values */
> >>  #define KVM_XIVE_LEVEL_SENSITIVE	(1ULL << 0)
> >>  #define KVM_XIVE_LEVEL_ASSERTED		(1ULL << 1)
> >>  
> >> +/* Layout of 64-bit eas attribute values */
> >> +#define KVM_XIVE_EAS_PRIORITY_SHIFT	0
> >> +#define KVM_XIVE_EAS_PRIORITY_MASK	0x7
> >> +#define KVM_XIVE_EAS_SERVER_SHIFT	3
> >> +#define KVM_XIVE_EAS_SERVER_MASK	0xfffffff8ULL
> >> +#define KVM_XIVE_EAS_MASK_SHIFT		32
> >> +#define KVM_XIVE_EAS_MASK_MASK		0x100000000ULL
> >> +#define KVM_XIVE_EAS_EISN_SHIFT		33
> >> +#define KVM_XIVE_EAS_EISN_MASK		0xfffffffe00000000ULL
> >> +
> >>  #endif /* __LINUX_KVM_POWERPC_H */
> >> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
> >> index f2de1bcf3b35..0468b605baa7 100644
> >> --- a/arch/powerpc/kvm/book3s_xive_native.c
> >> +++ b/arch/powerpc/kvm/book3s_xive_native.c
> >> @@ -525,6 +525,88 @@ static int kvmppc_xive_native_sync(struct kvmppc_xive *xive, long irq, u64 addr)
> >>  	return 0;
> >>  }
> >>  
> >> +static int kvmppc_xive_native_set_eas(struct kvmppc_xive *xive, long irq,
> >> +				      u64 addr)
> > 
> > I'd prefer to avoid the name "EAS" here.  IIUC these aren't "raw" EAS
> > values, but rather essentially the "source config" in the terminology
> > of the PAPR hcalls.  Which, yes, is basically implemented by setting
> > the EAS, but since it's the PAPR architected state that we need to
> > preserve across migration, I'd prefer to stick as close as we can to
> > the PAPR terminology.
> 
> But we don't have an equivalent name in the PAPR specs for the tuple 
> (prio, server). We could use the generic 'target' name may be ? even 
> if this is usually referring to a CPU number.

Um.. what?  That's about terminology for one of the fields in this
thing, not about the name for the thing itself.

> Or, IVE (Interrupt Vector Entry) ? which makes some sense. 
> This is was the former name in HW. I think we recycle it for KVM.

That's a terrible idea, which will make a confusing situation even
more confusing.
Cédric Le Goater Feb. 5, 2019, 1:03 p.m. UTC | #4
On 2/5/19 6:32 AM, David Gibson wrote:
> On Mon, Feb 04, 2019 at 05:07:28PM +0100, Cédric Le Goater wrote:
>> On 2/4/19 6:21 AM, David Gibson wrote:
>>> On Mon, Jan 07, 2019 at 07:43:27PM +0100, Cédric Le Goater wrote:
>>>> Theses are use to capure the XIVE EAS table of the KVM device, the
>>>> configuration of the source targets.
>>>>
>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>> ---
>>>>  arch/powerpc/include/uapi/asm/kvm.h   | 11 ++++
>>>>  arch/powerpc/kvm/book3s_xive_native.c | 87 +++++++++++++++++++++++++++
>>>>  2 files changed, 98 insertions(+)
>>>>
>>>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
>>>> index 1a8740629acf..faf024f39858 100644
>>>> --- a/arch/powerpc/include/uapi/asm/kvm.h
>>>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
>>>> @@ -683,9 +683,20 @@ struct kvm_ppc_cpu_char {
>>>>  #define   KVM_DEV_XIVE_SAVE_EQ_PAGES	4
>>>>  #define KVM_DEV_XIVE_GRP_SOURCES	2	/* 64-bit source attributes */
>>>>  #define KVM_DEV_XIVE_GRP_SYNC		3	/* 64-bit source attributes */
>>>> +#define KVM_DEV_XIVE_GRP_EAS		4	/* 64-bit eas attributes */
>>>>  
>>>>  /* Layout of 64-bit XIVE source attribute values */
>>>>  #define KVM_XIVE_LEVEL_SENSITIVE	(1ULL << 0)
>>>>  #define KVM_XIVE_LEVEL_ASSERTED		(1ULL << 1)
>>>>  
>>>> +/* Layout of 64-bit eas attribute values */
>>>> +#define KVM_XIVE_EAS_PRIORITY_SHIFT	0
>>>> +#define KVM_XIVE_EAS_PRIORITY_MASK	0x7
>>>> +#define KVM_XIVE_EAS_SERVER_SHIFT	3
>>>> +#define KVM_XIVE_EAS_SERVER_MASK	0xfffffff8ULL
>>>> +#define KVM_XIVE_EAS_MASK_SHIFT		32
>>>> +#define KVM_XIVE_EAS_MASK_MASK		0x100000000ULL
>>>> +#define KVM_XIVE_EAS_EISN_SHIFT		33
>>>> +#define KVM_XIVE_EAS_EISN_MASK		0xfffffffe00000000ULL
>>>> +
>>>>  #endif /* __LINUX_KVM_POWERPC_H */
>>>> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
>>>> index f2de1bcf3b35..0468b605baa7 100644
>>>> --- a/arch/powerpc/kvm/book3s_xive_native.c
>>>> +++ b/arch/powerpc/kvm/book3s_xive_native.c
>>>> @@ -525,6 +525,88 @@ static int kvmppc_xive_native_sync(struct kvmppc_xive *xive, long irq, u64 addr)
>>>>  	return 0;
>>>>  }
>>>>  
>>>> +static int kvmppc_xive_native_set_eas(struct kvmppc_xive *xive, long irq,
>>>> +				      u64 addr)
>>>
>>> I'd prefer to avoid the name "EAS" here.  IIUC these aren't "raw" EAS
>>> values, but rather essentially the "source config" in the terminology
>>> of the PAPR hcalls.  Which, yes, is basically implemented by setting
>>> the EAS, but since it's the PAPR architected state that we need to
>>> preserve across migration, I'd prefer to stick as close as we can to
>>> the PAPR terminology.
>>
>> But we don't have an equivalent name in the PAPR specs for the tuple 
>> (prio, server). We could use the generic 'target' name may be ? even 
>> if this is usually referring to a CPU number.
> 
> Um.. what?  That's about terminology for one of the fields in this
> thing, not about the name for the thing itself.
> 
>> Or, IVE (Interrupt Vector Entry) ? which makes some sense. 
>> This is was the former name in HW. I think we recycle it for KVM.
> 
> That's a terrible idea, which will make a confusing situation even
> more confusing.

Let's use SOURCE_CONFIG and QUEUE_CONFIG. The KVM ioctls are very 
similar to the hcalls anyhow.

C.
David Gibson Feb. 6, 2019, 1:23 a.m. UTC | #5
On Tue, Feb 05, 2019 at 02:03:11PM +0100, Cédric Le Goater wrote:
> On 2/5/19 6:32 AM, David Gibson wrote:
> > On Mon, Feb 04, 2019 at 05:07:28PM +0100, Cédric Le Goater wrote:
> >> On 2/4/19 6:21 AM, David Gibson wrote:
> >>> On Mon, Jan 07, 2019 at 07:43:27PM +0100, Cédric Le Goater wrote:
> >>>> Theses are use to capure the XIVE EAS table of the KVM device, the
> >>>> configuration of the source targets.
> >>>>
> >>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >>>> ---
> >>>>  arch/powerpc/include/uapi/asm/kvm.h   | 11 ++++
> >>>>  arch/powerpc/kvm/book3s_xive_native.c | 87 +++++++++++++++++++++++++++
> >>>>  2 files changed, 98 insertions(+)
> >>>>
> >>>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
> >>>> index 1a8740629acf..faf024f39858 100644
> >>>> --- a/arch/powerpc/include/uapi/asm/kvm.h
> >>>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> >>>> @@ -683,9 +683,20 @@ struct kvm_ppc_cpu_char {
> >>>>  #define   KVM_DEV_XIVE_SAVE_EQ_PAGES	4
> >>>>  #define KVM_DEV_XIVE_GRP_SOURCES	2	/* 64-bit source attributes */
> >>>>  #define KVM_DEV_XIVE_GRP_SYNC		3	/* 64-bit source attributes */
> >>>> +#define KVM_DEV_XIVE_GRP_EAS		4	/* 64-bit eas attributes */
> >>>>  
> >>>>  /* Layout of 64-bit XIVE source attribute values */
> >>>>  #define KVM_XIVE_LEVEL_SENSITIVE	(1ULL << 0)
> >>>>  #define KVM_XIVE_LEVEL_ASSERTED		(1ULL << 1)
> >>>>  
> >>>> +/* Layout of 64-bit eas attribute values */
> >>>> +#define KVM_XIVE_EAS_PRIORITY_SHIFT	0
> >>>> +#define KVM_XIVE_EAS_PRIORITY_MASK	0x7
> >>>> +#define KVM_XIVE_EAS_SERVER_SHIFT	3
> >>>> +#define KVM_XIVE_EAS_SERVER_MASK	0xfffffff8ULL
> >>>> +#define KVM_XIVE_EAS_MASK_SHIFT		32
> >>>> +#define KVM_XIVE_EAS_MASK_MASK		0x100000000ULL
> >>>> +#define KVM_XIVE_EAS_EISN_SHIFT		33
> >>>> +#define KVM_XIVE_EAS_EISN_MASK		0xfffffffe00000000ULL
> >>>> +
> >>>>  #endif /* __LINUX_KVM_POWERPC_H */
> >>>> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
> >>>> index f2de1bcf3b35..0468b605baa7 100644
> >>>> --- a/arch/powerpc/kvm/book3s_xive_native.c
> >>>> +++ b/arch/powerpc/kvm/book3s_xive_native.c
> >>>> @@ -525,6 +525,88 @@ static int kvmppc_xive_native_sync(struct kvmppc_xive *xive, long irq, u64 addr)
> >>>>  	return 0;
> >>>>  }
> >>>>  
> >>>> +static int kvmppc_xive_native_set_eas(struct kvmppc_xive *xive, long irq,
> >>>> +				      u64 addr)
> >>>
> >>> I'd prefer to avoid the name "EAS" here.  IIUC these aren't "raw" EAS
> >>> values, but rather essentially the "source config" in the terminology
> >>> of the PAPR hcalls.  Which, yes, is basically implemented by setting
> >>> the EAS, but since it's the PAPR architected state that we need to
> >>> preserve across migration, I'd prefer to stick as close as we can to
> >>> the PAPR terminology.
> >>
> >> But we don't have an equivalent name in the PAPR specs for the tuple 
> >> (prio, server). We could use the generic 'target' name may be ? even 
> >> if this is usually referring to a CPU number.
> > 
> > Um.. what?  That's about terminology for one of the fields in this
> > thing, not about the name for the thing itself.
> > 
> >> Or, IVE (Interrupt Vector Entry) ? which makes some sense. 
> >> This is was the former name in HW. I think we recycle it for KVM.
> > 
> > That's a terrible idea, which will make a confusing situation even
> > more confusing.
> 
> Let's use SOURCE_CONFIG and QUEUE_CONFIG. The KVM ioctls are very 
> similar to the hcalls anyhow.

Yes, I think that's a good idea.
David Gibson Feb. 6, 2019, 1:24 a.m. UTC | #6
On Wed, Feb 06, 2019 at 12:23:29PM +1100, David Gibson wrote:
> On Tue, Feb 05, 2019 at 02:03:11PM +0100, Cédric Le Goater wrote:
> > On 2/5/19 6:32 AM, David Gibson wrote:
> > > On Mon, Feb 04, 2019 at 05:07:28PM +0100, Cédric Le Goater wrote:
> > >> On 2/4/19 6:21 AM, David Gibson wrote:
> > >>> On Mon, Jan 07, 2019 at 07:43:27PM +0100, Cédric Le Goater wrote:
> > >>>> Theses are use to capure the XIVE EAS table of the KVM device, the
> > >>>> configuration of the source targets.
> > >>>>
> > >>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> > >>>> ---
> > >>>>  arch/powerpc/include/uapi/asm/kvm.h   | 11 ++++
> > >>>>  arch/powerpc/kvm/book3s_xive_native.c | 87 +++++++++++++++++++++++++++
> > >>>>  2 files changed, 98 insertions(+)
> > >>>>
> > >>>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
> > >>>> index 1a8740629acf..faf024f39858 100644
> > >>>> --- a/arch/powerpc/include/uapi/asm/kvm.h
> > >>>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> > >>>> @@ -683,9 +683,20 @@ struct kvm_ppc_cpu_char {
> > >>>>  #define   KVM_DEV_XIVE_SAVE_EQ_PAGES	4
> > >>>>  #define KVM_DEV_XIVE_GRP_SOURCES	2	/* 64-bit source attributes */
> > >>>>  #define KVM_DEV_XIVE_GRP_SYNC		3	/* 64-bit source attributes */
> > >>>> +#define KVM_DEV_XIVE_GRP_EAS		4	/* 64-bit eas attributes */
> > >>>>  
> > >>>>  /* Layout of 64-bit XIVE source attribute values */
> > >>>>  #define KVM_XIVE_LEVEL_SENSITIVE	(1ULL << 0)
> > >>>>  #define KVM_XIVE_LEVEL_ASSERTED		(1ULL << 1)
> > >>>>  
> > >>>> +/* Layout of 64-bit eas attribute values */
> > >>>> +#define KVM_XIVE_EAS_PRIORITY_SHIFT	0
> > >>>> +#define KVM_XIVE_EAS_PRIORITY_MASK	0x7
> > >>>> +#define KVM_XIVE_EAS_SERVER_SHIFT	3
> > >>>> +#define KVM_XIVE_EAS_SERVER_MASK	0xfffffff8ULL
> > >>>> +#define KVM_XIVE_EAS_MASK_SHIFT		32
> > >>>> +#define KVM_XIVE_EAS_MASK_MASK		0x100000000ULL
> > >>>> +#define KVM_XIVE_EAS_EISN_SHIFT		33
> > >>>> +#define KVM_XIVE_EAS_EISN_MASK		0xfffffffe00000000ULL
> > >>>> +
> > >>>>  #endif /* __LINUX_KVM_POWERPC_H */
> > >>>> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
> > >>>> index f2de1bcf3b35..0468b605baa7 100644
> > >>>> --- a/arch/powerpc/kvm/book3s_xive_native.c
> > >>>> +++ b/arch/powerpc/kvm/book3s_xive_native.c
> > >>>> @@ -525,6 +525,88 @@ static int kvmppc_xive_native_sync(struct kvmppc_xive *xive, long irq, u64 addr)
> > >>>>  	return 0;
> > >>>>  }
> > >>>>  
> > >>>> +static int kvmppc_xive_native_set_eas(struct kvmppc_xive *xive, long irq,
> > >>>> +				      u64 addr)
> > >>>
> > >>> I'd prefer to avoid the name "EAS" here.  IIUC these aren't "raw" EAS
> > >>> values, but rather essentially the "source config" in the terminology
> > >>> of the PAPR hcalls.  Which, yes, is basically implemented by setting
> > >>> the EAS, but since it's the PAPR architected state that we need to
> > >>> preserve across migration, I'd prefer to stick as close as we can to
> > >>> the PAPR terminology.
> > >>
> > >> But we don't have an equivalent name in the PAPR specs for the tuple 
> > >> (prio, server). We could use the generic 'target' name may be ? even 
> > >> if this is usually referring to a CPU number.
> > > 
> > > Um.. what?  That's about terminology for one of the fields in this
> > > thing, not about the name for the thing itself.
> > > 
> > >> Or, IVE (Interrupt Vector Entry) ? which makes some sense. 
> > >> This is was the former name in HW. I think we recycle it for KVM.
> > > 
> > > That's a terrible idea, which will make a confusing situation even
> > > more confusing.
> > 
> > Let's use SOURCE_CONFIG and QUEUE_CONFIG. The KVM ioctls are very 
> > similar to the hcalls anyhow.
> 
> Yes, I think that's a good idea.

Actually... AIUI the SET_CONFIG hcalls shouldn't be a fast path.  Can
we simplify things further by removing the hcall implementation from
the kernel entirely, and have qemu implement them by basically just
forwarding them to the appropriate SET_CONFIG ioctl()?
Cédric Le Goater Feb. 6, 2019, 7:07 a.m. UTC | #7
On 2/6/19 2:24 AM, David Gibson wrote:
> On Wed, Feb 06, 2019 at 12:23:29PM +1100, David Gibson wrote:
>> On Tue, Feb 05, 2019 at 02:03:11PM +0100, Cédric Le Goater wrote:
>>> On 2/5/19 6:32 AM, David Gibson wrote:
>>>> On Mon, Feb 04, 2019 at 05:07:28PM +0100, Cédric Le Goater wrote:
>>>>> On 2/4/19 6:21 AM, David Gibson wrote:
>>>>>> On Mon, Jan 07, 2019 at 07:43:27PM +0100, Cédric Le Goater wrote:
>>>>>>> Theses are use to capure the XIVE EAS table of the KVM device, the
>>>>>>> configuration of the source targets.
>>>>>>>
>>>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>>>>> ---
>>>>>>>  arch/powerpc/include/uapi/asm/kvm.h   | 11 ++++
>>>>>>>  arch/powerpc/kvm/book3s_xive_native.c | 87 +++++++++++++++++++++++++++
>>>>>>>  2 files changed, 98 insertions(+)
>>>>>>>
>>>>>>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
>>>>>>> index 1a8740629acf..faf024f39858 100644
>>>>>>> --- a/arch/powerpc/include/uapi/asm/kvm.h
>>>>>>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
>>>>>>> @@ -683,9 +683,20 @@ struct kvm_ppc_cpu_char {
>>>>>>>  #define   KVM_DEV_XIVE_SAVE_EQ_PAGES	4
>>>>>>>  #define KVM_DEV_XIVE_GRP_SOURCES	2	/* 64-bit source attributes */
>>>>>>>  #define KVM_DEV_XIVE_GRP_SYNC		3	/* 64-bit source attributes */
>>>>>>> +#define KVM_DEV_XIVE_GRP_EAS		4	/* 64-bit eas attributes */
>>>>>>>  
>>>>>>>  /* Layout of 64-bit XIVE source attribute values */
>>>>>>>  #define KVM_XIVE_LEVEL_SENSITIVE	(1ULL << 0)
>>>>>>>  #define KVM_XIVE_LEVEL_ASSERTED		(1ULL << 1)
>>>>>>>  
>>>>>>> +/* Layout of 64-bit eas attribute values */
>>>>>>> +#define KVM_XIVE_EAS_PRIORITY_SHIFT	0
>>>>>>> +#define KVM_XIVE_EAS_PRIORITY_MASK	0x7
>>>>>>> +#define KVM_XIVE_EAS_SERVER_SHIFT	3
>>>>>>> +#define KVM_XIVE_EAS_SERVER_MASK	0xfffffff8ULL
>>>>>>> +#define KVM_XIVE_EAS_MASK_SHIFT		32
>>>>>>> +#define KVM_XIVE_EAS_MASK_MASK		0x100000000ULL
>>>>>>> +#define KVM_XIVE_EAS_EISN_SHIFT		33
>>>>>>> +#define KVM_XIVE_EAS_EISN_MASK		0xfffffffe00000000ULL
>>>>>>> +
>>>>>>>  #endif /* __LINUX_KVM_POWERPC_H */
>>>>>>> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
>>>>>>> index f2de1bcf3b35..0468b605baa7 100644
>>>>>>> --- a/arch/powerpc/kvm/book3s_xive_native.c
>>>>>>> +++ b/arch/powerpc/kvm/book3s_xive_native.c
>>>>>>> @@ -525,6 +525,88 @@ static int kvmppc_xive_native_sync(struct kvmppc_xive *xive, long irq, u64 addr)
>>>>>>>  	return 0;
>>>>>>>  }
>>>>>>>  
>>>>>>> +static int kvmppc_xive_native_set_eas(struct kvmppc_xive *xive, long irq,
>>>>>>> +				      u64 addr)
>>>>>>
>>>>>> I'd prefer to avoid the name "EAS" here.  IIUC these aren't "raw" EAS
>>>>>> values, but rather essentially the "source config" in the terminology
>>>>>> of the PAPR hcalls.  Which, yes, is basically implemented by setting
>>>>>> the EAS, but since it's the PAPR architected state that we need to
>>>>>> preserve across migration, I'd prefer to stick as close as we can to
>>>>>> the PAPR terminology.
>>>>>
>>>>> But we don't have an equivalent name in the PAPR specs for the tuple 
>>>>> (prio, server). We could use the generic 'target' name may be ? even 
>>>>> if this is usually referring to a CPU number.
>>>>
>>>> Um.. what?  That's about terminology for one of the fields in this
>>>> thing, not about the name for the thing itself.
>>>>
>>>>> Or, IVE (Interrupt Vector Entry) ? which makes some sense. 
>>>>> This is was the former name in HW. I think we recycle it for KVM.
>>>>
>>>> That's a terrible idea, which will make a confusing situation even
>>>> more confusing.
>>>
>>> Let's use SOURCE_CONFIG and QUEUE_CONFIG. The KVM ioctls are very 
>>> similar to the hcalls anyhow.
>>
>> Yes, I think that's a good idea.
> 
> Actually... AIUI the SET_CONFIG hcalls shouldn't be a fast path.  

No indeed. I have move them to standard hcalls in the current version.

> Can
> we simplify things further by removing the hcall implementation from
> the kernel entirely, and have qemu implement them by basically just
> forwarding them to the appropriate SET_CONFIG ioctl()?

Yes. I think we could. 

The hcalls H_INT_SET_SOURCE_CONFIG and H_INT_SET_QUEUE_CONFIG and 
the KVM ioctls to set the EQ and the SOURCE configuration have a 
lot in common. I need to look at how we can plug the KVM ioctl in 
the hcalls under QEMU.

We will have to convert the returned error to respect the PAPR 
specs or have the ioctls return H_* errors.


Let's dig that idea. If we choose that path, QEMU will have an 
up-to-date EAT and so we won't need to synchronize its state anymore 
for migration.
 
H_INT_GET_SOURCE_CONFIG can be implemented in QEMU without any KVM 
ioctl.

H_INT_GET_QUEUE_INFO could be implemented in QEMU. I need to check 
how we return the address of the END ESB in sPAPR. We haven't paid 
much attention to these pages because they are not used under Linux
and today the address is returned by OPAL. 

H_INT_GET_QUEUE_CONFIG is a little more problematic because we need
to query into the XIVE HW the EQ index and toggle bit. OPAL support
is required for that. But we could reduce the KVM support to the 
ioctl querying these EQ information.

H_INT_ESB could be entirely done under QEMU.

H_INT_SYNC and H_INT_RESET can not.

H_INT_GET_OS_REPORTING_LINE and H_INT_SET_OS_REPORTING_LINE are not
implemented.

C.
David Gibson Feb. 7, 2019, 2:48 a.m. UTC | #8
On Wed, Feb 06, 2019 at 08:07:36AM +0100, Cédric Le Goater wrote:
> On 2/6/19 2:24 AM, David Gibson wrote:
> > On Wed, Feb 06, 2019 at 12:23:29PM +1100, David Gibson wrote:
> >> On Tue, Feb 05, 2019 at 02:03:11PM +0100, Cédric Le Goater wrote:
> >>> On 2/5/19 6:32 AM, David Gibson wrote:
> >>>> On Mon, Feb 04, 2019 at 05:07:28PM +0100, Cédric Le Goater wrote:
> >>>>> On 2/4/19 6:21 AM, David Gibson wrote:
> >>>>>> On Mon, Jan 07, 2019 at 07:43:27PM +0100, Cédric Le Goater wrote:
> >>>>>>> Theses are use to capure the XIVE EAS table of the KVM device, the
> >>>>>>> configuration of the source targets.
> >>>>>>>
> >>>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >>>>>>> ---
> >>>>>>>  arch/powerpc/include/uapi/asm/kvm.h   | 11 ++++
> >>>>>>>  arch/powerpc/kvm/book3s_xive_native.c | 87 +++++++++++++++++++++++++++
> >>>>>>>  2 files changed, 98 insertions(+)
> >>>>>>>
> >>>>>>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
> >>>>>>> index 1a8740629acf..faf024f39858 100644
> >>>>>>> --- a/arch/powerpc/include/uapi/asm/kvm.h
> >>>>>>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> >>>>>>> @@ -683,9 +683,20 @@ struct kvm_ppc_cpu_char {
> >>>>>>>  #define   KVM_DEV_XIVE_SAVE_EQ_PAGES	4
> >>>>>>>  #define KVM_DEV_XIVE_GRP_SOURCES	2	/* 64-bit source attributes */
> >>>>>>>  #define KVM_DEV_XIVE_GRP_SYNC		3	/* 64-bit source attributes */
> >>>>>>> +#define KVM_DEV_XIVE_GRP_EAS		4	/* 64-bit eas attributes */
> >>>>>>>  
> >>>>>>>  /* Layout of 64-bit XIVE source attribute values */
> >>>>>>>  #define KVM_XIVE_LEVEL_SENSITIVE	(1ULL << 0)
> >>>>>>>  #define KVM_XIVE_LEVEL_ASSERTED		(1ULL << 1)
> >>>>>>>  
> >>>>>>> +/* Layout of 64-bit eas attribute values */
> >>>>>>> +#define KVM_XIVE_EAS_PRIORITY_SHIFT	0
> >>>>>>> +#define KVM_XIVE_EAS_PRIORITY_MASK	0x7
> >>>>>>> +#define KVM_XIVE_EAS_SERVER_SHIFT	3
> >>>>>>> +#define KVM_XIVE_EAS_SERVER_MASK	0xfffffff8ULL
> >>>>>>> +#define KVM_XIVE_EAS_MASK_SHIFT		32
> >>>>>>> +#define KVM_XIVE_EAS_MASK_MASK		0x100000000ULL
> >>>>>>> +#define KVM_XIVE_EAS_EISN_SHIFT		33
> >>>>>>> +#define KVM_XIVE_EAS_EISN_MASK		0xfffffffe00000000ULL
> >>>>>>> +
> >>>>>>>  #endif /* __LINUX_KVM_POWERPC_H */
> >>>>>>> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
> >>>>>>> index f2de1bcf3b35..0468b605baa7 100644
> >>>>>>> --- a/arch/powerpc/kvm/book3s_xive_native.c
> >>>>>>> +++ b/arch/powerpc/kvm/book3s_xive_native.c
> >>>>>>> @@ -525,6 +525,88 @@ static int kvmppc_xive_native_sync(struct kvmppc_xive *xive, long irq, u64 addr)
> >>>>>>>  	return 0;
> >>>>>>>  }
> >>>>>>>  
> >>>>>>> +static int kvmppc_xive_native_set_eas(struct kvmppc_xive *xive, long irq,
> >>>>>>> +				      u64 addr)
> >>>>>>
> >>>>>> I'd prefer to avoid the name "EAS" here.  IIUC these aren't "raw" EAS
> >>>>>> values, but rather essentially the "source config" in the terminology
> >>>>>> of the PAPR hcalls.  Which, yes, is basically implemented by setting
> >>>>>> the EAS, but since it's the PAPR architected state that we need to
> >>>>>> preserve across migration, I'd prefer to stick as close as we can to
> >>>>>> the PAPR terminology.
> >>>>>
> >>>>> But we don't have an equivalent name in the PAPR specs for the tuple 
> >>>>> (prio, server). We could use the generic 'target' name may be ? even 
> >>>>> if this is usually referring to a CPU number.
> >>>>
> >>>> Um.. what?  That's about terminology for one of the fields in this
> >>>> thing, not about the name for the thing itself.
> >>>>
> >>>>> Or, IVE (Interrupt Vector Entry) ? which makes some sense. 
> >>>>> This is was the former name in HW. I think we recycle it for KVM.
> >>>>
> >>>> That's a terrible idea, which will make a confusing situation even
> >>>> more confusing.
> >>>
> >>> Let's use SOURCE_CONFIG and QUEUE_CONFIG. The KVM ioctls are very 
> >>> similar to the hcalls anyhow.
> >>
> >> Yes, I think that's a good idea.
> > 
> > Actually... AIUI the SET_CONFIG hcalls shouldn't be a fast path.  
> 
> No indeed. I have move them to standard hcalls in the current version.
> 
> > Can
> > we simplify things further by removing the hcall implementation from
> > the kernel entirely, and have qemu implement them by basically just
> > forwarding them to the appropriate SET_CONFIG ioctl()?
> 
> Yes. I think we could. 

Great!

> The hcalls H_INT_SET_SOURCE_CONFIG and H_INT_SET_QUEUE_CONFIG and 
> the KVM ioctls to set the EQ and the SOURCE configuration have a 
> lot in common. I need to look at how we can plug the KVM ioctl in 
> the hcalls under QEMU.
> 
> We will have to convert the returned error to respect the PAPR 
> specs or have the ioctls return H_* errors.

I don't think returning H_* values from a kernel call is a good idea.
Converting errors is kinda ugly, but I still think it's the better
option.  Note that we already have something like this for the HPT
resizing hcalls.

> Let's dig that idea. If we choose that path, QEMU will have an 
> up-to-date EAT and so we won't need to synchronize its state anymore 
> for migration.

I guess so, though I don't see that as essential.

> H_INT_GET_SOURCE_CONFIG can be implemented in QEMU without any KVM 
> ioctl.
> 
> H_INT_GET_QUEUE_INFO could be implemented in QEMU. I need to check 
> how we return the address of the END ESB in sPAPR. We haven't paid 
> much attention to these pages because they are not used under Linux
> and today the address is returned by OPAL. 
> 
> H_INT_GET_QUEUE_CONFIG is a little more problematic because we need
> to query into the XIVE HW the EQ index and toggle bit. OPAL support
> is required for that. But we could reduce the KVM support to the 
> ioctl querying these EQ information.

Right, and we'd need an ioctl() like that for migration anyway, yes?

> H_INT_ESB could be entirely done under QEMU.

This one can actually happen on fairly hot paths, so I think doing
that in qemu probably isn't a good idea.

> H_INT_SYNC and H_INT_RESET can not.
> 
> H_INT_GET_OS_REPORTING_LINE and H_INT_SET_OS_REPORTING_LINE are not
> implemented.
> 
> C.
>
Cédric Le Goater Feb. 7, 2019, 9:13 a.m. UTC | #9
On 2/7/19 3:48 AM, David Gibson wrote:
> On Wed, Feb 06, 2019 at 08:07:36AM +0100, Cédric Le Goater wrote:
>> On 2/6/19 2:24 AM, David Gibson wrote:
>>> On Wed, Feb 06, 2019 at 12:23:29PM +1100, David Gibson wrote:
>>>> On Tue, Feb 05, 2019 at 02:03:11PM +0100, Cédric Le Goater wrote:
>>>>> On 2/5/19 6:32 AM, David Gibson wrote:
>>>>>> On Mon, Feb 04, 2019 at 05:07:28PM +0100, Cédric Le Goater wrote:
>>>>>>> On 2/4/19 6:21 AM, David Gibson wrote:
>>>>>>>> On Mon, Jan 07, 2019 at 07:43:27PM +0100, Cédric Le Goater wrote:
>>>>>>>>> Theses are use to capure the XIVE EAS table of the KVM device, the
>>>>>>>>> configuration of the source targets.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>>>>>>> ---
>>>>>>>>>  arch/powerpc/include/uapi/asm/kvm.h   | 11 ++++
>>>>>>>>>  arch/powerpc/kvm/book3s_xive_native.c | 87 +++++++++++++++++++++++++++
>>>>>>>>>  2 files changed, 98 insertions(+)
>>>>>>>>>
>>>>>>>>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
>>>>>>>>> index 1a8740629acf..faf024f39858 100644
>>>>>>>>> --- a/arch/powerpc/include/uapi/asm/kvm.h
>>>>>>>>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
>>>>>>>>> @@ -683,9 +683,20 @@ struct kvm_ppc_cpu_char {
>>>>>>>>>  #define   KVM_DEV_XIVE_SAVE_EQ_PAGES	4
>>>>>>>>>  #define KVM_DEV_XIVE_GRP_SOURCES	2	/* 64-bit source attributes */
>>>>>>>>>  #define KVM_DEV_XIVE_GRP_SYNC		3	/* 64-bit source attributes */
>>>>>>>>> +#define KVM_DEV_XIVE_GRP_EAS		4	/* 64-bit eas attributes */
>>>>>>>>>  
>>>>>>>>>  /* Layout of 64-bit XIVE source attribute values */
>>>>>>>>>  #define KVM_XIVE_LEVEL_SENSITIVE	(1ULL << 0)
>>>>>>>>>  #define KVM_XIVE_LEVEL_ASSERTED		(1ULL << 1)
>>>>>>>>>  
>>>>>>>>> +/* Layout of 64-bit eas attribute values */
>>>>>>>>> +#define KVM_XIVE_EAS_PRIORITY_SHIFT	0
>>>>>>>>> +#define KVM_XIVE_EAS_PRIORITY_MASK	0x7
>>>>>>>>> +#define KVM_XIVE_EAS_SERVER_SHIFT	3
>>>>>>>>> +#define KVM_XIVE_EAS_SERVER_MASK	0xfffffff8ULL
>>>>>>>>> +#define KVM_XIVE_EAS_MASK_SHIFT		32
>>>>>>>>> +#define KVM_XIVE_EAS_MASK_MASK		0x100000000ULL
>>>>>>>>> +#define KVM_XIVE_EAS_EISN_SHIFT		33
>>>>>>>>> +#define KVM_XIVE_EAS_EISN_MASK		0xfffffffe00000000ULL
>>>>>>>>> +
>>>>>>>>>  #endif /* __LINUX_KVM_POWERPC_H */
>>>>>>>>> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
>>>>>>>>> index f2de1bcf3b35..0468b605baa7 100644
>>>>>>>>> --- a/arch/powerpc/kvm/book3s_xive_native.c
>>>>>>>>> +++ b/arch/powerpc/kvm/book3s_xive_native.c
>>>>>>>>> @@ -525,6 +525,88 @@ static int kvmppc_xive_native_sync(struct kvmppc_xive *xive, long irq, u64 addr)
>>>>>>>>>  	return 0;
>>>>>>>>>  }
>>>>>>>>>  
>>>>>>>>> +static int kvmppc_xive_native_set_eas(struct kvmppc_xive *xive, long irq,
>>>>>>>>> +				      u64 addr)
>>>>>>>>
>>>>>>>> I'd prefer to avoid the name "EAS" here.  IIUC these aren't "raw" EAS
>>>>>>>> values, but rather essentially the "source config" in the terminology
>>>>>>>> of the PAPR hcalls.  Which, yes, is basically implemented by setting
>>>>>>>> the EAS, but since it's the PAPR architected state that we need to
>>>>>>>> preserve across migration, I'd prefer to stick as close as we can to
>>>>>>>> the PAPR terminology.
>>>>>>>
>>>>>>> But we don't have an equivalent name in the PAPR specs for the tuple 
>>>>>>> (prio, server). We could use the generic 'target' name may be ? even 
>>>>>>> if this is usually referring to a CPU number.
>>>>>>
>>>>>> Um.. what?  That's about terminology for one of the fields in this
>>>>>> thing, not about the name for the thing itself.
>>>>>>
>>>>>>> Or, IVE (Interrupt Vector Entry) ? which makes some sense. 
>>>>>>> This is was the former name in HW. I think we recycle it for KVM.
>>>>>>
>>>>>> That's a terrible idea, which will make a confusing situation even
>>>>>> more confusing.
>>>>>
>>>>> Let's use SOURCE_CONFIG and QUEUE_CONFIG. The KVM ioctls are very 
>>>>> similar to the hcalls anyhow.
>>>>
>>>> Yes, I think that's a good idea.
>>>
>>> Actually... AIUI the SET_CONFIG hcalls shouldn't be a fast path.  
>>
>> No indeed. I have move them to standard hcalls in the current version.
>>
>>> Can
>>> we simplify things further by removing the hcall implementation from
>>> the kernel entirely, and have qemu implement them by basically just
>>> forwarding them to the appropriate SET_CONFIG ioctl()?
>>
>> Yes. I think we could. 
> 
> Great!
> 
>> The hcalls H_INT_SET_SOURCE_CONFIG and H_INT_SET_QUEUE_CONFIG and 
>> the KVM ioctls to set the EQ and the SOURCE configuration have a 
>> lot in common. I need to look at how we can plug the KVM ioctl in 
>> the hcalls under QEMU.
>>
>> We will have to convert the returned error to respect the PAPR 
>> specs or have the ioctls return H_* errors.
> 
> I don't think returning H_* values from a kernel call is a good idea.
> Converting errors is kinda ugly, but I still think it's the better
> option.  Note that we already have something like this for the HPT
> resizing hcalls.

ok.
 
>> Let's dig that idea. If we choose that path, QEMU will have an 
>> up-to-date EAT and so we won't need to synchronize its state anymore 
>> for migration.
> 
> I guess so, though I don't see that as essential.
> 
>> H_INT_GET_SOURCE_CONFIG can be implemented in QEMU without any KVM 
>> ioctl.
>>
>> H_INT_GET_QUEUE_INFO could be implemented in QEMU. I need to check 
>> how we return the address of the END ESB in sPAPR. We haven't paid 
>> much attention to these pages because they are not used under Linux
>> and today the address is returned by OPAL. 
>>
>> H_INT_GET_QUEUE_CONFIG is a little more problematic because we need
>> to query into the XIVE HW the EQ index and toggle bit. OPAL support
>> is required for that. But we could reduce the KVM support to the 
>> ioctl querying these EQ information.
> 
> Right, and we'd need an ioctl() like that for migration anyway, yes?

Yes. it is the same need.

>> H_INT_ESB could be entirely done under QEMU.
> 
> This one can actually happen on fairly hot paths, so I think doing
> that in qemu probably isn't a good idea.

I agree It would nice to have some performance.

This hcall is used when LSIs are involved, which is not really a common 
configuration. There are no OPAL calls involved. And we are duplicating 
code at the KVM level to retrigger the interrupt when the level is still
asserted.

I will benchmark the two options before making a choice. 

C.


>> H_INT_SYNC and H_INT_RESET can not.
>>
>> H_INT_GET_OS_REPORTING_LINE and H_INT_SET_OS_REPORTING_LINE are not
>> implemented.
>>
>> C.
>>
>
David Gibson Feb. 8, 2019, 5:15 a.m. UTC | #10
On Thu, Feb 07, 2019 at 10:13:48AM +0100, Cédric Le Goater wrote:
> On 2/7/19 3:48 AM, David Gibson wrote:
> > On Wed, Feb 06, 2019 at 08:07:36AM +0100, Cédric Le Goater wrote:
> >> On 2/6/19 2:24 AM, David Gibson wrote:
> >>> On Wed, Feb 06, 2019 at 12:23:29PM +1100, David Gibson wrote:
> >>>> On Tue, Feb 05, 2019 at 02:03:11PM +0100, Cédric Le Goater wrote:
> >>>>> On 2/5/19 6:32 AM, David Gibson wrote:
> >>>>>> On Mon, Feb 04, 2019 at 05:07:28PM +0100, Cédric Le Goater wrote:
> >>>>>>> On 2/4/19 6:21 AM, David Gibson wrote:
> >>>>>>>> On Mon, Jan 07, 2019 at 07:43:27PM +0100, Cédric Le Goater wrote:
> >>>>>>>>> Theses are use to capure the XIVE EAS table of the KVM device, the
> >>>>>>>>> configuration of the source targets.
> >>>>>>>>>
> >>>>>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >>>>>>>>> ---
> >>>>>>>>>  arch/powerpc/include/uapi/asm/kvm.h   | 11 ++++
> >>>>>>>>>  arch/powerpc/kvm/book3s_xive_native.c | 87 +++++++++++++++++++++++++++
> >>>>>>>>>  2 files changed, 98 insertions(+)
> >>>>>>>>>
> >>>>>>>>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
> >>>>>>>>> index 1a8740629acf..faf024f39858 100644
> >>>>>>>>> --- a/arch/powerpc/include/uapi/asm/kvm.h
> >>>>>>>>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> >>>>>>>>> @@ -683,9 +683,20 @@ struct kvm_ppc_cpu_char {
> >>>>>>>>>  #define   KVM_DEV_XIVE_SAVE_EQ_PAGES	4
> >>>>>>>>>  #define KVM_DEV_XIVE_GRP_SOURCES	2	/* 64-bit source attributes */
> >>>>>>>>>  #define KVM_DEV_XIVE_GRP_SYNC		3	/* 64-bit source attributes */
> >>>>>>>>> +#define KVM_DEV_XIVE_GRP_EAS		4	/* 64-bit eas attributes */
> >>>>>>>>>  
> >>>>>>>>>  /* Layout of 64-bit XIVE source attribute values */
> >>>>>>>>>  #define KVM_XIVE_LEVEL_SENSITIVE	(1ULL << 0)
> >>>>>>>>>  #define KVM_XIVE_LEVEL_ASSERTED		(1ULL << 1)
> >>>>>>>>>  
> >>>>>>>>> +/* Layout of 64-bit eas attribute values */
> >>>>>>>>> +#define KVM_XIVE_EAS_PRIORITY_SHIFT	0
> >>>>>>>>> +#define KVM_XIVE_EAS_PRIORITY_MASK	0x7
> >>>>>>>>> +#define KVM_XIVE_EAS_SERVER_SHIFT	3
> >>>>>>>>> +#define KVM_XIVE_EAS_SERVER_MASK	0xfffffff8ULL
> >>>>>>>>> +#define KVM_XIVE_EAS_MASK_SHIFT		32
> >>>>>>>>> +#define KVM_XIVE_EAS_MASK_MASK		0x100000000ULL
> >>>>>>>>> +#define KVM_XIVE_EAS_EISN_SHIFT		33
> >>>>>>>>> +#define KVM_XIVE_EAS_EISN_MASK		0xfffffffe00000000ULL
> >>>>>>>>> +
> >>>>>>>>>  #endif /* __LINUX_KVM_POWERPC_H */
> >>>>>>>>> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
> >>>>>>>>> index f2de1bcf3b35..0468b605baa7 100644
> >>>>>>>>> --- a/arch/powerpc/kvm/book3s_xive_native.c
> >>>>>>>>> +++ b/arch/powerpc/kvm/book3s_xive_native.c
> >>>>>>>>> @@ -525,6 +525,88 @@ static int kvmppc_xive_native_sync(struct kvmppc_xive *xive, long irq, u64 addr)
> >>>>>>>>>  	return 0;
> >>>>>>>>>  }
> >>>>>>>>>  
> >>>>>>>>> +static int kvmppc_xive_native_set_eas(struct kvmppc_xive *xive, long irq,
> >>>>>>>>> +				      u64 addr)
> >>>>>>>>
> >>>>>>>> I'd prefer to avoid the name "EAS" here.  IIUC these aren't "raw" EAS
> >>>>>>>> values, but rather essentially the "source config" in the terminology
> >>>>>>>> of the PAPR hcalls.  Which, yes, is basically implemented by setting
> >>>>>>>> the EAS, but since it's the PAPR architected state that we need to
> >>>>>>>> preserve across migration, I'd prefer to stick as close as we can to
> >>>>>>>> the PAPR terminology.
> >>>>>>>
> >>>>>>> But we don't have an equivalent name in the PAPR specs for the tuple 
> >>>>>>> (prio, server). We could use the generic 'target' name may be ? even 
> >>>>>>> if this is usually referring to a CPU number.
> >>>>>>
> >>>>>> Um.. what?  That's about terminology for one of the fields in this
> >>>>>> thing, not about the name for the thing itself.
> >>>>>>
> >>>>>>> Or, IVE (Interrupt Vector Entry) ? which makes some sense. 
> >>>>>>> This is was the former name in HW. I think we recycle it for KVM.
> >>>>>>
> >>>>>> That's a terrible idea, which will make a confusing situation even
> >>>>>> more confusing.
> >>>>>
> >>>>> Let's use SOURCE_CONFIG and QUEUE_CONFIG. The KVM ioctls are very 
> >>>>> similar to the hcalls anyhow.
> >>>>
> >>>> Yes, I think that's a good idea.
> >>>
> >>> Actually... AIUI the SET_CONFIG hcalls shouldn't be a fast path.  
> >>
> >> No indeed. I have move them to standard hcalls in the current version.
> >>
> >>> Can
> >>> we simplify things further by removing the hcall implementation from
> >>> the kernel entirely, and have qemu implement them by basically just
> >>> forwarding them to the appropriate SET_CONFIG ioctl()?
> >>
> >> Yes. I think we could. 
> > 
> > Great!
> > 
> >> The hcalls H_INT_SET_SOURCE_CONFIG and H_INT_SET_QUEUE_CONFIG and 
> >> the KVM ioctls to set the EQ and the SOURCE configuration have a 
> >> lot in common. I need to look at how we can plug the KVM ioctl in 
> >> the hcalls under QEMU.
> >>
> >> We will have to convert the returned error to respect the PAPR 
> >> specs or have the ioctls return H_* errors.
> > 
> > I don't think returning H_* values from a kernel call is a good idea.
> > Converting errors is kinda ugly, but I still think it's the better
> > option.  Note that we already have something like this for the HPT
> > resizing hcalls.
> 
> ok.
>  
> >> Let's dig that idea. If we choose that path, QEMU will have an 
> >> up-to-date EAT and so we won't need to synchronize its state anymore 
> >> for migration.
> > 
> > I guess so, though I don't see that as essential.
> > 
> >> H_INT_GET_SOURCE_CONFIG can be implemented in QEMU without any KVM 
> >> ioctl.
> >>
> >> H_INT_GET_QUEUE_INFO could be implemented in QEMU. I need to check 
> >> how we return the address of the END ESB in sPAPR. We haven't paid 
> >> much attention to these pages because they are not used under Linux
> >> and today the address is returned by OPAL. 
> >>
> >> H_INT_GET_QUEUE_CONFIG is a little more problematic because we need
> >> to query into the XIVE HW the EQ index and toggle bit. OPAL support
> >> is required for that. But we could reduce the KVM support to the 
> >> ioctl querying these EQ information.
> > 
> > Right, and we'd need an ioctl() like that for migration anyway, yes?
> 
> Yes. it is the same need.
> 
> >> H_INT_ESB could be entirely done under QEMU.
> > 
> > This one can actually happen on fairly hot paths, so I think doing
> > that in qemu probably isn't a good idea.
> 
> I agree It would nice to have some performance.
> 
> This hcall is used when LSIs are involved, which is not really a common 
> configuration. There are no OPAL calls involved. And we are duplicating 
> code at the KVM level to retrigger the interrupt when the level is still
> asserted.
> 
> I will benchmark the two options before making a choice.

Ok.
Cédric Le Goater Feb. 14, 2019, 4:50 p.m. UTC | #11
On 2/8/19 6:15 AM, David Gibson wrote:
> On Thu, Feb 07, 2019 at 10:13:48AM +0100, Cédric Le Goater wrote:
>> On 2/7/19 3:48 AM, David Gibson wrote:
>>> On Wed, Feb 06, 2019 at 08:07:36AM +0100, Cédric Le Goater wrote:
>>>> On 2/6/19 2:24 AM, David Gibson wrote:
>>>>> On Wed, Feb 06, 2019 at 12:23:29PM +1100, David Gibson wrote:
>>>>>> On Tue, Feb 05, 2019 at 02:03:11PM +0100, Cédric Le Goater wrote:
>>>>>>> On 2/5/19 6:32 AM, David Gibson wrote:
>>>>>>>> On Mon, Feb 04, 2019 at 05:07:28PM +0100, Cédric Le Goater wrote:
>>>>>>>>> On 2/4/19 6:21 AM, David Gibson wrote:
>>>>>>>>>> On Mon, Jan 07, 2019 at 07:43:27PM +0100, Cédric Le Goater wrote:
>>>>>>>>>>> Theses are use to capure the XIVE EAS table of the KVM device, the
>>>>>>>>>>> configuration of the source targets.
>>>>>>>>>>>
>>>>>>>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>>>>>>>>> ---
>>>>>>>>>>>  arch/powerpc/include/uapi/asm/kvm.h   | 11 ++++
>>>>>>>>>>>  arch/powerpc/kvm/book3s_xive_native.c | 87 +++++++++++++++++++++++++++
>>>>>>>>>>>  2 files changed, 98 insertions(+)
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
>>>>>>>>>>> index 1a8740629acf..faf024f39858 100644
>>>>>>>>>>> --- a/arch/powerpc/include/uapi/asm/kvm.h
>>>>>>>>>>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
>>>>>>>>>>> @@ -683,9 +683,20 @@ struct kvm_ppc_cpu_char {
>>>>>>>>>>>  #define   KVM_DEV_XIVE_SAVE_EQ_PAGES	4
>>>>>>>>>>>  #define KVM_DEV_XIVE_GRP_SOURCES	2	/* 64-bit source attributes */
>>>>>>>>>>>  #define KVM_DEV_XIVE_GRP_SYNC		3	/* 64-bit source attributes */
>>>>>>>>>>> +#define KVM_DEV_XIVE_GRP_EAS		4	/* 64-bit eas attributes */
>>>>>>>>>>>  
>>>>>>>>>>>  /* Layout of 64-bit XIVE source attribute values */
>>>>>>>>>>>  #define KVM_XIVE_LEVEL_SENSITIVE	(1ULL << 0)
>>>>>>>>>>>  #define KVM_XIVE_LEVEL_ASSERTED		(1ULL << 1)
>>>>>>>>>>>  
>>>>>>>>>>> +/* Layout of 64-bit eas attribute values */
>>>>>>>>>>> +#define KVM_XIVE_EAS_PRIORITY_SHIFT	0
>>>>>>>>>>> +#define KVM_XIVE_EAS_PRIORITY_MASK	0x7
>>>>>>>>>>> +#define KVM_XIVE_EAS_SERVER_SHIFT	3
>>>>>>>>>>> +#define KVM_XIVE_EAS_SERVER_MASK	0xfffffff8ULL
>>>>>>>>>>> +#define KVM_XIVE_EAS_MASK_SHIFT		32
>>>>>>>>>>> +#define KVM_XIVE_EAS_MASK_MASK		0x100000000ULL
>>>>>>>>>>> +#define KVM_XIVE_EAS_EISN_SHIFT		33
>>>>>>>>>>> +#define KVM_XIVE_EAS_EISN_MASK		0xfffffffe00000000ULL
>>>>>>>>>>> +
>>>>>>>>>>>  #endif /* __LINUX_KVM_POWERPC_H */
>>>>>>>>>>> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
>>>>>>>>>>> index f2de1bcf3b35..0468b605baa7 100644
>>>>>>>>>>> --- a/arch/powerpc/kvm/book3s_xive_native.c
>>>>>>>>>>> +++ b/arch/powerpc/kvm/book3s_xive_native.c
>>>>>>>>>>> @@ -525,6 +525,88 @@ static int kvmppc_xive_native_sync(struct kvmppc_xive *xive, long irq, u64 addr)
>>>>>>>>>>>  	return 0;
>>>>>>>>>>>  }
>>>>>>>>>>>  
>>>>>>>>>>> +static int kvmppc_xive_native_set_eas(struct kvmppc_xive *xive, long irq,
>>>>>>>>>>> +				      u64 addr)
>>>>>>>>>>
>>>>>>>>>> I'd prefer to avoid the name "EAS" here.  IIUC these aren't "raw" EAS
>>>>>>>>>> values, but rather essentially the "source config" in the terminology
>>>>>>>>>> of the PAPR hcalls.  Which, yes, is basically implemented by setting
>>>>>>>>>> the EAS, but since it's the PAPR architected state that we need to
>>>>>>>>>> preserve across migration, I'd prefer to stick as close as we can to
>>>>>>>>>> the PAPR terminology.
>>>>>>>>>
>>>>>>>>> But we don't have an equivalent name in the PAPR specs for the tuple 
>>>>>>>>> (prio, server). We could use the generic 'target' name may be ? even 
>>>>>>>>> if this is usually referring to a CPU number.
>>>>>>>>
>>>>>>>> Um.. what?  That's about terminology for one of the fields in this
>>>>>>>> thing, not about the name for the thing itself.
>>>>>>>>
>>>>>>>>> Or, IVE (Interrupt Vector Entry) ? which makes some sense. 
>>>>>>>>> This is was the former name in HW. I think we recycle it for KVM.
>>>>>>>>
>>>>>>>> That's a terrible idea, which will make a confusing situation even
>>>>>>>> more confusing.
>>>>>>>
>>>>>>> Let's use SOURCE_CONFIG and QUEUE_CONFIG. The KVM ioctls are very 
>>>>>>> similar to the hcalls anyhow.
>>>>>>
>>>>>> Yes, I think that's a good idea.
>>>>>
>>>>> Actually... AIUI the SET_CONFIG hcalls shouldn't be a fast path.  
>>>>
>>>> No indeed. I have move them to standard hcalls in the current version.
>>>>
>>>>> Can
>>>>> we simplify things further by removing the hcall implementation from
>>>>> the kernel entirely, and have qemu implement them by basically just
>>>>> forwarding them to the appropriate SET_CONFIG ioctl()?
>>>>
>>>> Yes. I think we could. 
>>>
>>> Great!
>>>
>>>> The hcalls H_INT_SET_SOURCE_CONFIG and H_INT_SET_QUEUE_CONFIG and 
>>>> the KVM ioctls to set the EQ and the SOURCE configuration have a 
>>>> lot in common. I need to look at how we can plug the KVM ioctl in 
>>>> the hcalls under QEMU.
>>>>
>>>> We will have to convert the returned error to respect the PAPR 
>>>> specs or have the ioctls return H_* errors.
>>>
>>> I don't think returning H_* values from a kernel call is a good idea.
>>> Converting errors is kinda ugly, but I still think it's the better
>>> option.  Note that we already have something like this for the HPT
>>> resizing hcalls.
>>
>> ok.
>>  
>>>> Let's dig that idea. If we choose that path, QEMU will have an 
>>>> up-to-date EAT and so we won't need to synchronize its state anymore 
>>>> for migration.
>>>
>>> I guess so, though I don't see that as essential.
>>>
>>>> H_INT_GET_SOURCE_CONFIG can be implemented in QEMU without any KVM 
>>>> ioctl.
>>>>
>>>> H_INT_GET_QUEUE_INFO could be implemented in QEMU. I need to check 
>>>> how we return the address of the END ESB in sPAPR. We haven't paid 
>>>> much attention to these pages because they are not used under Linux
>>>> and today the address is returned by OPAL. 
>>>>
>>>> H_INT_GET_QUEUE_CONFIG is a little more problematic because we need
>>>> to query into the XIVE HW the EQ index and toggle bit. OPAL support
>>>> is required for that. But we could reduce the KVM support to the 
>>>> ioctl querying these EQ information.
>>>
>>> Right, and we'd need an ioctl() like that for migration anyway, yes?
>>
>> Yes. it is the same need.
>>
>>>> H_INT_ESB could be entirely done under QEMU.
>>>
>>> This one can actually happen on fairly hot paths, so I think doing
>>> that in qemu probably isn't a good idea.
>>
>> I agree It would nice to have some performance.
>>
>> This hcall is used when LSIs are involved, which is not really a common 
>> configuration. There are no OPAL calls involved. And we are duplicating 
>> code at the KVM level to retrigger the interrupt when the level is still
>> asserted.
>>
>> I will benchmark the two options before making a choice.
> 
> Ok.
 

Here are some iperf results for a 4 vCPUs guest running a 5.0.0 kernel
on a small initrd image. I didn't do any kind of tuning like CPU pinning. 
So these are really rough figures :   


  kernel irqchip            OFF      ON    ON (*)

  rtl8139  (LSI)           1.19    1.24   1.23    Gbits/sec
  VIRTIO                  31.80   42.30    --     Gbits/sec


There is not much benefit in handling the H_INT_ESB hcall under KVM it seems. 
I think we can leave it under QEMU.  


C.

Patch
diff mbox series

diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 1a8740629acf..faf024f39858 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -683,9 +683,20 @@  struct kvm_ppc_cpu_char {
 #define   KVM_DEV_XIVE_SAVE_EQ_PAGES	4
 #define KVM_DEV_XIVE_GRP_SOURCES	2	/* 64-bit source attributes */
 #define KVM_DEV_XIVE_GRP_SYNC		3	/* 64-bit source attributes */
+#define KVM_DEV_XIVE_GRP_EAS		4	/* 64-bit eas attributes */
 
 /* Layout of 64-bit XIVE source attribute values */
 #define KVM_XIVE_LEVEL_SENSITIVE	(1ULL << 0)
 #define KVM_XIVE_LEVEL_ASSERTED		(1ULL << 1)
 
+/* Layout of 64-bit eas attribute values */
+#define KVM_XIVE_EAS_PRIORITY_SHIFT	0
+#define KVM_XIVE_EAS_PRIORITY_MASK	0x7
+#define KVM_XIVE_EAS_SERVER_SHIFT	3
+#define KVM_XIVE_EAS_SERVER_MASK	0xfffffff8ULL
+#define KVM_XIVE_EAS_MASK_SHIFT		32
+#define KVM_XIVE_EAS_MASK_MASK		0x100000000ULL
+#define KVM_XIVE_EAS_EISN_SHIFT		33
+#define KVM_XIVE_EAS_EISN_MASK		0xfffffffe00000000ULL
+
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index f2de1bcf3b35..0468b605baa7 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -525,6 +525,88 @@  static int kvmppc_xive_native_sync(struct kvmppc_xive *xive, long irq, u64 addr)
 	return 0;
 }
 
+static int kvmppc_xive_native_set_eas(struct kvmppc_xive *xive, long irq,
+				      u64 addr)
+{
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u64 __user *ubufp = (u64 __user *) addr;
+	u16 src;
+	u64 kvm_eas;
+	u32 server;
+	u8 priority;
+	u32 eisn;
+
+	sb = kvmppc_xive_find_source(xive, irq, &src);
+	if (!sb)
+		return -ENOENT;
+
+	state = &sb->irq_state[src];
+
+	if (!state->valid)
+		return -EINVAL;
+
+	if (get_user(kvm_eas, ubufp))
+		return -EFAULT;
+
+	pr_devel("%s irq=0x%lx eas=%016llx\n", __func__, irq, kvm_eas);
+
+	priority = (kvm_eas & KVM_XIVE_EAS_PRIORITY_MASK) >>
+		KVM_XIVE_EAS_PRIORITY_SHIFT;
+	server = (kvm_eas & KVM_XIVE_EAS_SERVER_MASK) >>
+		KVM_XIVE_EAS_SERVER_SHIFT;
+	eisn = (kvm_eas & KVM_XIVE_EAS_EISN_MASK) >> KVM_XIVE_EAS_EISN_SHIFT;
+
+	if (priority != xive_prio_from_guest(priority)) {
+		pr_err("invalid priority for queue %d for VCPU %d\n",
+		       priority, server);
+		return -EINVAL;
+	}
+
+	return kvmppc_xive_native_set_source_config(xive, sb, state, server,
+						    priority, eisn);
+}
+
+static int kvmppc_xive_native_get_eas(struct kvmppc_xive *xive, long irq,
+				      u64 addr)
+{
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u64 __user *ubufp = (u64 __user *) addr;
+	u16 src;
+	u64 kvm_eas;
+
+	sb = kvmppc_xive_find_source(xive, irq, &src);
+	if (!sb)
+		return -ENOENT;
+
+	state = &sb->irq_state[src];
+
+	if (!state->valid)
+		return -EINVAL;
+
+	arch_spin_lock(&sb->lock);
+
+	if (state->act_priority == MASKED)
+		kvm_eas = KVM_XIVE_EAS_MASK_MASK;
+	else {
+		kvm_eas = (state->act_priority << KVM_XIVE_EAS_PRIORITY_SHIFT) &
+			KVM_XIVE_EAS_PRIORITY_MASK;
+		kvm_eas |= (state->act_server << KVM_XIVE_EAS_SERVER_SHIFT) &
+			KVM_XIVE_EAS_SERVER_MASK;
+		kvm_eas |= ((u64) state->eisn << KVM_XIVE_EAS_EISN_SHIFT) &
+			KVM_XIVE_EAS_EISN_MASK;
+	}
+	arch_spin_unlock(&sb->lock);
+
+	pr_devel("%s irq=0x%lx eas=%016llx\n", __func__, irq, kvm_eas);
+
+	if (put_user(kvm_eas, ubufp))
+		return -EFAULT;
+
+	return 0;
+}
+
 static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
 				       struct kvm_device_attr *attr)
 {
@@ -544,6 +626,8 @@  static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
 						     attr->addr);
 	case KVM_DEV_XIVE_GRP_SYNC:
 		return kvmppc_xive_native_sync(xive, attr->attr, attr->addr);
+	case KVM_DEV_XIVE_GRP_EAS:
+		return kvmppc_xive_native_set_eas(xive, attr->attr, attr->addr);
 	}
 	return -ENXIO;
 }
@@ -564,6 +648,8 @@  static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
 			return kvmppc_xive_native_get_vc_base(xive, attr->addr);
 		}
 		break;
+	case KVM_DEV_XIVE_GRP_EAS:
+		return kvmppc_xive_native_get_eas(xive, attr->attr, attr->addr);
 	}
 	return -ENXIO;
 }
@@ -583,6 +669,7 @@  static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
 		break;
 	case KVM_DEV_XIVE_GRP_SOURCES:
 	case KVM_DEV_XIVE_GRP_SYNC:
+	case KVM_DEV_XIVE_GRP_EAS:
 		if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
 		    attr->attr < KVMPPC_XIVE_NR_IRQS)
 			return 0;