[09/19] KVM: PPC: Book3S HV: add a SET_SOURCE control to the XIVE native device

Message ID 20190107184331.8429-10-clg@kaod.org
State Changes Requested
Headers show
Series
  • KVM: PPC: Book3S HV: add XIVE native exploitation mode
Related show

Commit Message

Cédric Le Goater Jan. 7, 2019, 6:43 p.m.
Interrupt sources are simply created at the OPAL level and then
MASKED. KVM only needs to know about their type: LSI or MSI.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/include/uapi/asm/kvm.h           |  5 +
 arch/powerpc/kvm/book3s_xive_native.c         | 98 +++++++++++++++++++
 .../powerpc/kvm/book3s_xive_native_template.c | 27 +++++
 3 files changed, 130 insertions(+)
 create mode 100644 arch/powerpc/kvm/book3s_xive_native_template.c

Comments

David Gibson Feb. 4, 2019, 4:57 a.m. | #1
On Mon, Jan 07, 2019 at 07:43:21PM +0100, Cédric Le Goater wrote:
> Interrupt sources are simply created at the OPAL level and then
> MASKED. KVM only needs to know about their type: LSI or MSI.

This commit message isn't very illuminating.

> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  arch/powerpc/include/uapi/asm/kvm.h           |  5 +
>  arch/powerpc/kvm/book3s_xive_native.c         | 98 +++++++++++++++++++
>  .../powerpc/kvm/book3s_xive_native_template.c | 27 +++++
>  3 files changed, 130 insertions(+)
>  create mode 100644 arch/powerpc/kvm/book3s_xive_native_template.c
> 
> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
> index 8b78b12aa118..6fc9660c5aec 100644
> --- a/arch/powerpc/include/uapi/asm/kvm.h
> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> @@ -680,5 +680,10 @@ struct kvm_ppc_cpu_char {
>  #define   KVM_DEV_XIVE_GET_ESB_FD	1
>  #define   KVM_DEV_XIVE_GET_TIMA_FD	2
>  #define   KVM_DEV_XIVE_VC_BASE		3
> +#define KVM_DEV_XIVE_GRP_SOURCES	2	/* 64-bit source attributes */
> +
> +/* Layout of 64-bit XIVE source attribute values */
> +#define KVM_XIVE_LEVEL_SENSITIVE	(1ULL << 0)
> +#define KVM_XIVE_LEVEL_ASSERTED		(1ULL << 1)
>  
>  #endif /* __LINUX_KVM_POWERPC_H */
> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
> index 29a62914de55..2518640d4a58 100644
> --- a/arch/powerpc/kvm/book3s_xive_native.c
> +++ b/arch/powerpc/kvm/book3s_xive_native.c
> @@ -31,6 +31,24 @@
>  
>  #include "book3s_xive.h"
>  
> +/*
> + * We still instantiate them here because we use some of the
> + * generated utility functions as well in this file.

And this comment is downright cryptic.

> + */
> +#define XIVE_RUNTIME_CHECKS
> +#define X_PFX xive_vm_
> +#define X_STATIC static
> +#define X_STAT_PFX stat_vm_
> +#define __x_tima		xive_tima
> +#define __x_eoi_page(xd)	((void __iomem *)((xd)->eoi_mmio))
> +#define __x_trig_page(xd)	((void __iomem *)((xd)->trig_mmio))
> +#define __x_writeb	__raw_writeb
> +#define __x_readw	__raw_readw
> +#define __x_readq	__raw_readq
> +#define __x_writeq	__raw_writeq
> +
> +#include "book3s_xive_native_template.c"
> +
>  static void xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
>  {
>  	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> @@ -305,6 +323,78 @@ static int kvmppc_xive_native_get_tima_fd(struct kvmppc_xive *xive, u64 addr)
>  	return put_user(ret, ubufp);
>  }
>  
> +static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
> +					 u64 addr)
> +{
> +	struct kvmppc_xive_src_block *sb;
> +	struct kvmppc_xive_irq_state *state;
> +	u64 __user *ubufp = (u64 __user *) addr;
> +	u64 val;
> +	u16 idx;
> +
> +	pr_devel("%s irq=0x%lx\n", __func__, irq);
> +
> +	if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS)
> +		return -ENOENT;
> +
> +	sb = kvmppc_xive_find_source(xive, irq, &idx);
> +	if (!sb) {
> +		pr_debug("No source, creating source block...\n");

Doesn't this need to be protected by some lock?

> +		sb = kvmppc_xive_create_src_block(xive, irq);
> +		if (!sb) {
> +			pr_err("Failed to create block...\n");
> +			return -ENOMEM;
> +		}
> +	}
> +	state = &sb->irq_state[idx];
> +
> +	if (get_user(val, ubufp)) {
> +		pr_err("fault getting user info !\n");
> +		return -EFAULT;
> +	}
> +
> +	/*
> +	 * If the source doesn't already have an IPI, allocate
> +	 * one and get the corresponding data
> +	 */
> +	if (!state->ipi_number) {
> +		state->ipi_number = xive_native_alloc_irq();
> +		if (state->ipi_number == 0) {
> +			pr_err("Failed to allocate IRQ !\n");
> +			return -ENOMEM;
> +		}

Am I right in thinking this is the point at which a specific guest irq
number gets bound to a specific host irq number?

> +		xive_native_populate_irq_data(state->ipi_number,
> +					      &state->ipi_data);
> +		pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__,
> +			 state->ipi_number, irq);
> +	}
> +
> +	arch_spin_lock(&sb->lock);
> +
> +	/* Restore LSI state */
> +	if (val & KVM_XIVE_LEVEL_SENSITIVE) {
> +		state->lsi = true;
> +		if (val & KVM_XIVE_LEVEL_ASSERTED)
> +			state->asserted = true;
> +		pr_devel("  LSI ! Asserted=%d\n", state->asserted);
> +	}
> +
> +	/* Mask IRQ to start with */
> +	state->act_server = 0;
> +	state->act_priority = MASKED;
> +	xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
> +	xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
> +
> +	/* Increment the number of valid sources and mark this one valid */
> +	if (!state->valid)
> +		xive->src_count++;
> +	state->valid = true;
> +
> +	arch_spin_unlock(&sb->lock);
> +
> +	return 0;
> +}
> +
>  static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
>  				       struct kvm_device_attr *attr)
>  {
> @@ -317,6 +407,9 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
>  			return kvmppc_xive_native_set_vc_base(xive, attr->addr);
>  		}
>  		break;
> +	case KVM_DEV_XIVE_GRP_SOURCES:
> +		return kvmppc_xive_native_set_source(xive, attr->attr,
> +						     attr->addr);
>  	}
>  	return -ENXIO;
>  }
> @@ -353,6 +446,11 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
>  			return 0;
>  		}
>  		break;
> +	case KVM_DEV_XIVE_GRP_SOURCES:
> +		if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
> +		    attr->attr < KVMPPC_XIVE_NR_IRQS)
> +			return 0;
> +		break;
>  	}
>  	return -ENXIO;
>  }
> diff --git a/arch/powerpc/kvm/book3s_xive_native_template.c b/arch/powerpc/kvm/book3s_xive_native_template.c
> new file mode 100644
> index 000000000000..e7260da4a596
> --- /dev/null
> +++ b/arch/powerpc/kvm/book3s_xive_native_template.c
> @@ -0,0 +1,27 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2017-2019, IBM Corporation.
> + */
> +
> +/* File to be included by other .c files */
> +
> +#define XGLUE(a, b) a##b
> +#define GLUE(a, b) XGLUE(a, b)
> +
> +/*
> + * TODO: introduce a common template file with the XIVE native layer
> + * and the XICS-on-XIVE glue for the utility functions
> + */
> +static u8 GLUE(X_PFX, esb_load)(struct xive_irq_data *xd, u32 offset)
> +{
> +	u64 val;
> +
> +	if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
> +		offset |= offset << 4;
> +
> +	val = __x_readq(__x_eoi_page(xd) + offset);
> +#ifdef __LITTLE_ENDIAN__
> +	val >>= 64-8;
> +#endif
> +	return (u8)val;
> +}
Cédric Le Goater Feb. 4, 2019, 7:07 p.m. | #2
On 2/4/19 5:57 AM, David Gibson wrote:
> On Mon, Jan 07, 2019 at 07:43:21PM +0100, Cédric Le Goater wrote:
>> Interrupt sources are simply created at the OPAL level and then
>> MASKED. KVM only needs to know about their type: LSI or MSI.
> 
> This commit message isn't very illuminating.

There is room for improvement certainly.
 
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>>  arch/powerpc/include/uapi/asm/kvm.h           |  5 +
>>  arch/powerpc/kvm/book3s_xive_native.c         | 98 +++++++++++++++++++
>>  .../powerpc/kvm/book3s_xive_native_template.c | 27 +++++
>>  3 files changed, 130 insertions(+)
>>  create mode 100644 arch/powerpc/kvm/book3s_xive_native_template.c
>>
>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
>> index 8b78b12aa118..6fc9660c5aec 100644
>> --- a/arch/powerpc/include/uapi/asm/kvm.h
>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
>> @@ -680,5 +680,10 @@ struct kvm_ppc_cpu_char {
>>  #define   KVM_DEV_XIVE_GET_ESB_FD	1
>>  #define   KVM_DEV_XIVE_GET_TIMA_FD	2
>>  #define   KVM_DEV_XIVE_VC_BASE		3
>> +#define KVM_DEV_XIVE_GRP_SOURCES	2	/* 64-bit source attributes */
>> +
>> +/* Layout of 64-bit XIVE source attribute values */
>> +#define KVM_XIVE_LEVEL_SENSITIVE	(1ULL << 0)
>> +#define KVM_XIVE_LEVEL_ASSERTED		(1ULL << 1)
>>  
>>  #endif /* __LINUX_KVM_POWERPC_H */
>> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
>> index 29a62914de55..2518640d4a58 100644
>> --- a/arch/powerpc/kvm/book3s_xive_native.c
>> +++ b/arch/powerpc/kvm/book3s_xive_native.c
>> @@ -31,6 +31,24 @@
>>  
>>  #include "book3s_xive.h"
>>  
>> +/*
>> + * We still instantiate them here because we use some of the
>> + * generated utility functions as well in this file.
> 
> And this comment is downright cryptic.

I have removed this part now that the hcalls are not done under
real mode anymore.
 
> 
>> + */
>> +#define XIVE_RUNTIME_CHECKS
>> +#define X_PFX xive_vm_
>> +#define X_STATIC static
>> +#define X_STAT_PFX stat_vm_
>> +#define __x_tima		xive_tima
>> +#define __x_eoi_page(xd)	((void __iomem *)((xd)->eoi_mmio))
>> +#define __x_trig_page(xd)	((void __iomem *)((xd)->trig_mmio))
>> +#define __x_writeb	__raw_writeb
>> +#define __x_readw	__raw_readw
>> +#define __x_readq	__raw_readq
>> +#define __x_writeq	__raw_writeq
>> +
>> +#include "book3s_xive_native_template.c"
>> +
>>  static void xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
>>  {
>>  	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> @@ -305,6 +323,78 @@ static int kvmppc_xive_native_get_tima_fd(struct kvmppc_xive *xive, u64 addr)
>>  	return put_user(ret, ubufp);
>>  }
>>  
>> +static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
>> +					 u64 addr)
>> +{
>> +	struct kvmppc_xive_src_block *sb;
>> +	struct kvmppc_xive_irq_state *state;
>> +	u64 __user *ubufp = (u64 __user *) addr;
>> +	u64 val;
>> +	u16 idx;
>> +
>> +	pr_devel("%s irq=0x%lx\n", __func__, irq);
>> +
>> +	if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS)
>> +		return -ENOENT;
>> +
>> +	sb = kvmppc_xive_find_source(xive, irq, &idx);
>> +	if (!sb) {
>> +		pr_debug("No source, creating source block...\n");
> 
> Doesn't this need to be protected by some lock?
> 
>> +		sb = kvmppc_xive_create_src_block(xive, irq);
>> +		if (!sb) {
>> +			pr_err("Failed to create block...\n");
>> +			return -ENOMEM;
>> +		}
>> +	}
>> +	state = &sb->irq_state[idx];
>> +
>> +	if (get_user(val, ubufp)) {
>> +		pr_err("fault getting user info !\n");
>> +		return -EFAULT;
>> +	}
>> +
>> +	/*
>> +	 * If the source doesn't already have an IPI, allocate
>> +	 * one and get the corresponding data
>> +	 */
>> +	if (!state->ipi_number) {
>> +		state->ipi_number = xive_native_alloc_irq();
>> +		if (state->ipi_number == 0) {
>> +			pr_err("Failed to allocate IRQ !\n");
>> +			return -ENOMEM;
>> +		}
> 
> Am I right in thinking this is the point at which a specific guest irq
> number gets bound to a specific host irq number?

yes. the XIVE IRQ state caches this information and 'state' should be 
protected before being assigned, indeed ... The XICS-over-XIVE device
also has the same race issue.

It's not showing because where initializing the KVM device sequentially
from QEMU and only once.

Thanks,

C. 
 

> 
>> +		xive_native_populate_irq_data(state->ipi_number,
>> +					      &state->ipi_data);
>> +		pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__,
>> +			 state->ipi_number, irq);
>> +	}
>> +
>> +	arch_spin_lock(&sb->lock);
>> +
>> +	/* Restore LSI state */
>> +	if (val & KVM_XIVE_LEVEL_SENSITIVE) {
>> +		state->lsi = true;
>> +		if (val & KVM_XIVE_LEVEL_ASSERTED)
>> +			state->asserted = true;
>> +		pr_devel("  LSI ! Asserted=%d\n", state->asserted);
>> +	}
>> +
>> +	/* Mask IRQ to start with */
>> +	state->act_server = 0;
>> +	state->act_priority = MASKED;
>> +	xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
>> +	xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
>> +
>> +	/* Increment the number of valid sources and mark this one valid */
>> +	if (!state->valid)
>> +		xive->src_count++;
>> +	state->valid = true;
>> +
>> +	arch_spin_unlock(&sb->lock);
>> +
>> +	return 0;
>> +}
>> +
>>  static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
>>  				       struct kvm_device_attr *attr)
>>  {
>> @@ -317,6 +407,9 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
>>  			return kvmppc_xive_native_set_vc_base(xive, attr->addr);
>>  		}
>>  		break;
>> +	case KVM_DEV_XIVE_GRP_SOURCES:
>> +		return kvmppc_xive_native_set_source(xive, attr->attr,
>> +						     attr->addr);
>>  	}
>>  	return -ENXIO;
>>  }
>> @@ -353,6 +446,11 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
>>  			return 0;
>>  		}
>>  		break;
>> +	case KVM_DEV_XIVE_GRP_SOURCES:
>> +		if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
>> +		    attr->attr < KVMPPC_XIVE_NR_IRQS)
>> +			return 0;
>> +		break;
>>  	}
>>  	return -ENXIO;
>>  }
>> diff --git a/arch/powerpc/kvm/book3s_xive_native_template.c b/arch/powerpc/kvm/book3s_xive_native_template.c
>> new file mode 100644
>> index 000000000000..e7260da4a596
>> --- /dev/null
>> +++ b/arch/powerpc/kvm/book3s_xive_native_template.c
>> @@ -0,0 +1,27 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Copyright (c) 2017-2019, IBM Corporation.
>> + */
>> +
>> +/* File to be included by other .c files */
>> +
>> +#define XGLUE(a, b) a##b
>> +#define GLUE(a, b) XGLUE(a, b)
>> +
>> +/*
>> + * TODO: introduce a common template file with the XIVE native layer
>> + * and the XICS-on-XIVE glue for the utility functions
>> + */
>> +static u8 GLUE(X_PFX, esb_load)(struct xive_irq_data *xd, u32 offset)
>> +{
>> +	u64 val;
>> +
>> +	if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
>> +		offset |= offset << 4;
>> +
>> +	val = __x_readq(__x_eoi_page(xd) + offset);
>> +#ifdef __LITTLE_ENDIAN__
>> +	val >>= 64-8;
>> +#endif
>> +	return (u8)val;
>> +}
>
David Gibson Feb. 5, 2019, 5:35 a.m. | #3
On Mon, Feb 04, 2019 at 08:07:20PM +0100, Cédric Le Goater wrote:
> On 2/4/19 5:57 AM, David Gibson wrote:
> > On Mon, Jan 07, 2019 at 07:43:21PM +0100, Cédric Le Goater wrote:
[snip]
> >> +		sb = kvmppc_xive_create_src_block(xive, irq);
> >> +		if (!sb) {
> >> +			pr_err("Failed to create block...\n");
> >> +			return -ENOMEM;
> >> +		}
> >> +	}
> >> +	state = &sb->irq_state[idx];
> >> +
> >> +	if (get_user(val, ubufp)) {
> >> +		pr_err("fault getting user info !\n");
> >> +		return -EFAULT;
> >> +	}
> >> +
> >> +	/*
> >> +	 * If the source doesn't already have an IPI, allocate
> >> +	 * one and get the corresponding data
> >> +	 */
> >> +	if (!state->ipi_number) {
> >> +		state->ipi_number = xive_native_alloc_irq();
> >> +		if (state->ipi_number == 0) {
> >> +			pr_err("Failed to allocate IRQ !\n");
> >> +			return -ENOMEM;
> >> +		}
> > 
> > Am I right in thinking this is the point at which a specific guest irq
> > number gets bound to a specific host irq number?
> 
> yes. the XIVE IRQ state caches this information and 'state' should be 
> protected before being assigned, indeed ... The XICS-over-XIVE device
> also has the same race issue.
> 
> It's not showing because where initializing the KVM device sequentially
> from QEMU and only once.

Ok.

So, for the passthrough case, what's the point at which we know that a
particular guest interrupt needs to be bound to a specific real
hardware interrupt, rather than a generic IPI?
Cédric Le Goater Feb. 5, 2019, 1:39 p.m. | #4
On 2/5/19 6:35 AM, David Gibson wrote:
> On Mon, Feb 04, 2019 at 08:07:20PM +0100, Cédric Le Goater wrote:
>> On 2/4/19 5:57 AM, David Gibson wrote:
>>> On Mon, Jan 07, 2019 at 07:43:21PM +0100, Cédric Le Goater wrote:
> [snip]
>>>> +		sb = kvmppc_xive_create_src_block(xive, irq);
>>>> +		if (!sb) {
>>>> +			pr_err("Failed to create block...\n");
>>>> +			return -ENOMEM;
>>>> +		}
>>>> +	}
>>>> +	state = &sb->irq_state[idx];
>>>> +
>>>> +	if (get_user(val, ubufp)) {
>>>> +		pr_err("fault getting user info !\n");
>>>> +		return -EFAULT;
>>>> +	}
>>>> +
>>>> +	/*
>>>> +	 * If the source doesn't already have an IPI, allocate
>>>> +	 * one and get the corresponding data
>>>> +	 */
>>>> +	if (!state->ipi_number) {
>>>> +		state->ipi_number = xive_native_alloc_irq();
>>>> +		if (state->ipi_number == 0) {
>>>> +			pr_err("Failed to allocate IRQ !\n");
>>>> +			return -ENOMEM;
>>>> +		}
>>>
>>> Am I right in thinking this is the point at which a specific guest irq
>>> number gets bound to a specific host irq number?
>>
>> yes. the XIVE IRQ state caches this information and 'state' should be 
>> protected before being assigned, indeed ... The XICS-over-XIVE device
>> also has the same race issue.
>>
>> It's not showing because where initializing the KVM device sequentially
>> from QEMU and only once.
> 
> Ok.
> 
> So, for the passthrough case, what's the point at which we know that a
> particular guest interrupt needs to be bound to a specific real
> hardware interrupt, rather than a generic IPI?

when the guest driver requests MSIs, VFIO requests a mapping of the 
HW irqs in the guest IRQ space. This is very briefly said as VFIO is 
a huge framework. 

Patch 18 adds some initial support to handle the ESB pages but this 
should be done at the QEMU level.

C.

Patch

diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 8b78b12aa118..6fc9660c5aec 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -680,5 +680,10 @@  struct kvm_ppc_cpu_char {
 #define   KVM_DEV_XIVE_GET_ESB_FD	1
 #define   KVM_DEV_XIVE_GET_TIMA_FD	2
 #define   KVM_DEV_XIVE_VC_BASE		3
+#define KVM_DEV_XIVE_GRP_SOURCES	2	/* 64-bit source attributes */
+
+/* Layout of 64-bit XIVE source attribute values */
+#define KVM_XIVE_LEVEL_SENSITIVE	(1ULL << 0)
+#define KVM_XIVE_LEVEL_ASSERTED		(1ULL << 1)
 
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 29a62914de55..2518640d4a58 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -31,6 +31,24 @@ 
 
 #include "book3s_xive.h"
 
+/*
+ * We still instantiate them here because we use some of the
+ * generated utility functions as well in this file.
+ */
+#define XIVE_RUNTIME_CHECKS
+#define X_PFX xive_vm_
+#define X_STATIC static
+#define X_STAT_PFX stat_vm_
+#define __x_tima		xive_tima
+#define __x_eoi_page(xd)	((void __iomem *)((xd)->eoi_mmio))
+#define __x_trig_page(xd)	((void __iomem *)((xd)->trig_mmio))
+#define __x_writeb	__raw_writeb
+#define __x_readw	__raw_readw
+#define __x_readq	__raw_readq
+#define __x_writeq	__raw_writeq
+
+#include "book3s_xive_native_template.c"
+
 static void xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
 {
 	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
@@ -305,6 +323,78 @@  static int kvmppc_xive_native_get_tima_fd(struct kvmppc_xive *xive, u64 addr)
 	return put_user(ret, ubufp);
 }
 
+static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
+					 u64 addr)
+{
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u64 __user *ubufp = (u64 __user *) addr;
+	u64 val;
+	u16 idx;
+
+	pr_devel("%s irq=0x%lx\n", __func__, irq);
+
+	if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS)
+		return -ENOENT;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb) {
+		pr_debug("No source, creating source block...\n");
+		sb = kvmppc_xive_create_src_block(xive, irq);
+		if (!sb) {
+			pr_err("Failed to create block...\n");
+			return -ENOMEM;
+		}
+	}
+	state = &sb->irq_state[idx];
+
+	if (get_user(val, ubufp)) {
+		pr_err("fault getting user info !\n");
+		return -EFAULT;
+	}
+
+	/*
+	 * If the source doesn't already have an IPI, allocate
+	 * one and get the corresponding data
+	 */
+	if (!state->ipi_number) {
+		state->ipi_number = xive_native_alloc_irq();
+		if (state->ipi_number == 0) {
+			pr_err("Failed to allocate IRQ !\n");
+			return -ENOMEM;
+		}
+		xive_native_populate_irq_data(state->ipi_number,
+					      &state->ipi_data);
+		pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__,
+			 state->ipi_number, irq);
+	}
+
+	arch_spin_lock(&sb->lock);
+
+	/* Restore LSI state */
+	if (val & KVM_XIVE_LEVEL_SENSITIVE) {
+		state->lsi = true;
+		if (val & KVM_XIVE_LEVEL_ASSERTED)
+			state->asserted = true;
+		pr_devel("  LSI ! Asserted=%d\n", state->asserted);
+	}
+
+	/* Mask IRQ to start with */
+	state->act_server = 0;
+	state->act_priority = MASKED;
+	xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
+	xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
+
+	/* Increment the number of valid sources and mark this one valid */
+	if (!state->valid)
+		xive->src_count++;
+	state->valid = true;
+
+	arch_spin_unlock(&sb->lock);
+
+	return 0;
+}
+
 static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
 				       struct kvm_device_attr *attr)
 {
@@ -317,6 +407,9 @@  static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
 			return kvmppc_xive_native_set_vc_base(xive, attr->addr);
 		}
 		break;
+	case KVM_DEV_XIVE_GRP_SOURCES:
+		return kvmppc_xive_native_set_source(xive, attr->attr,
+						     attr->addr);
 	}
 	return -ENXIO;
 }
@@ -353,6 +446,11 @@  static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
 			return 0;
 		}
 		break;
+	case KVM_DEV_XIVE_GRP_SOURCES:
+		if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
+		    attr->attr < KVMPPC_XIVE_NR_IRQS)
+			return 0;
+		break;
 	}
 	return -ENXIO;
 }
diff --git a/arch/powerpc/kvm/book3s_xive_native_template.c b/arch/powerpc/kvm/book3s_xive_native_template.c
new file mode 100644
index 000000000000..e7260da4a596
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive_native_template.c
@@ -0,0 +1,27 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017-2019, IBM Corporation.
+ */
+
+/* File to be included by other .c files */
+
+#define XGLUE(a, b) a##b
+#define GLUE(a, b) XGLUE(a, b)
+
+/*
+ * TODO: introduce a common template file with the XIVE native layer
+ * and the XICS-on-XIVE glue for the utility functions
+ */
+static u8 GLUE(X_PFX, esb_load)(struct xive_irq_data *xd, u32 offset)
+{
+	u64 val;
+
+	if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
+		offset |= offset << 4;
+
+	val = __x_readq(__x_eoi_page(xd) + offset);
+#ifdef __LITTLE_ENDIAN__
+	val >>= 64-8;
+#endif
+	return (u8)val;
+}