[14/19] KVM: PPC: Book3S HV: add a control to make the XIVE EQ pages dirty

Message ID 20190107184331.8429-15-clg@kaod.org
State Changes Requested
Headers show
Series
  • KVM: PPC: Book3S HV: add XIVE native exploitation mode
Related show

Commit Message

Cédric Le Goater Jan. 7, 2019, 6:43 p.m.
When the VM is stopped in a migration sequence, the sources are masked
and the XIVE IC is synced to stabilize the EQs. When done, the KVM
ioctl KVM_DEV_XIVE_SAVE_EQ_PAGES is called to mark dirty the EQ pages.

The migration can then transfer the remaining dirty pages to the
destination and start collecting the state of the devices.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/include/uapi/asm/kvm.h   |  1 +
 arch/powerpc/kvm/book3s_xive_native.c | 40 +++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

Comments

David Gibson Feb. 4, 2019, 5:18 a.m. | #1
On Mon, Jan 07, 2019 at 07:43:26PM +0100, Cédric Le Goater wrote:
> When the VM is stopped in a migration sequence, the sources are masked
> and the XIVE IC is synced to stabilize the EQs. When done, the KVM
> ioctl KVM_DEV_XIVE_SAVE_EQ_PAGES is called to mark dirty the EQ pages.
> 
> The migration can then transfer the remaining dirty pages to the
> destination and start collecting the state of the devices.

Is there a reason to make this a separate step from the SYNC
operation?

> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  arch/powerpc/include/uapi/asm/kvm.h   |  1 +
>  arch/powerpc/kvm/book3s_xive_native.c | 40 +++++++++++++++++++++++++++
>  2 files changed, 41 insertions(+)
> 
> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
> index f3b859223b80..1a8740629acf 100644
> --- a/arch/powerpc/include/uapi/asm/kvm.h
> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> @@ -680,6 +680,7 @@ struct kvm_ppc_cpu_char {
>  #define   KVM_DEV_XIVE_GET_ESB_FD	1
>  #define   KVM_DEV_XIVE_GET_TIMA_FD	2
>  #define   KVM_DEV_XIVE_VC_BASE		3
> +#define   KVM_DEV_XIVE_SAVE_EQ_PAGES	4
>  #define KVM_DEV_XIVE_GRP_SOURCES	2	/* 64-bit source attributes */
>  #define KVM_DEV_XIVE_GRP_SYNC		3	/* 64-bit source attributes */
>  
> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
> index a8052867afc1..f2de1bcf3b35 100644
> --- a/arch/powerpc/kvm/book3s_xive_native.c
> +++ b/arch/powerpc/kvm/book3s_xive_native.c
> @@ -373,6 +373,43 @@ static int kvmppc_xive_native_get_tima_fd(struct kvmppc_xive *xive, u64 addr)
>  	return put_user(ret, ubufp);
>  }
>  
> +static int kvmppc_xive_native_vcpu_save_eq_pages(struct kvm_vcpu *vcpu)
> +{
> +	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> +	unsigned int prio;
> +
> +	if (!xc)
> +		return -ENOENT;
> +
> +	for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
> +		struct xive_q *q = &xc->queues[prio];
> +
> +		if (!q->qpage)
> +			continue;
> +
> +		/* Mark EQ page dirty for migration */
> +		mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qpage));
> +	}
> +	return 0;
> +}
> +
> +static int kvmppc_xive_native_save_eq_pages(struct kvmppc_xive *xive)
> +{
> +	struct kvm *kvm = xive->kvm;
> +	struct kvm_vcpu *vcpu;
> +	unsigned int i;
> +
> +	pr_devel("%s\n", __func__);
> +
> +	mutex_lock(&kvm->lock);
> +	kvm_for_each_vcpu(i, vcpu, kvm) {
> +		kvmppc_xive_native_vcpu_save_eq_pages(vcpu);
> +	}
> +	mutex_unlock(&kvm->lock);
> +
> +	return 0;
> +}
> +
>  static int xive_native_validate_queue_size(u32 qsize)
>  {
>  	switch (qsize) {
> @@ -498,6 +535,8 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
>  		switch (attr->attr) {
>  		case KVM_DEV_XIVE_VC_BASE:
>  			return kvmppc_xive_native_set_vc_base(xive, attr->addr);
> +		case KVM_DEV_XIVE_SAVE_EQ_PAGES:
> +			return kvmppc_xive_native_save_eq_pages(xive);
>  		}
>  		break;
>  	case KVM_DEV_XIVE_GRP_SOURCES:
> @@ -538,6 +577,7 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
>  		case KVM_DEV_XIVE_GET_ESB_FD:
>  		case KVM_DEV_XIVE_GET_TIMA_FD:
>  		case KVM_DEV_XIVE_VC_BASE:
> +		case KVM_DEV_XIVE_SAVE_EQ_PAGES:
>  			return 0;
>  		}
>  		break;
Cédric Le Goater Feb. 4, 2019, 3:46 p.m. | #2
On 2/4/19 6:18 AM, David Gibson wrote:
> On Mon, Jan 07, 2019 at 07:43:26PM +0100, Cédric Le Goater wrote:
>> When the VM is stopped in a migration sequence, the sources are masked
>> and the XIVE IC is synced to stabilize the EQs. When done, the KVM
>> ioctl KVM_DEV_XIVE_SAVE_EQ_PAGES is called to mark dirty the EQ pages.
>>
>> The migration can then transfer the remaining dirty pages to the
>> destination and start collecting the state of the devices.
> 
> Is there a reason to make this a separate step from the SYNC
> operation?

Hmm, apart from letting QEMU orchestrate the migration step by step, no.

We could merge the SYNC and the SAVE_EQ_PAGES in a single KVM operation. 
I think that should be fine. 

However, it does not make sense to call this operation without the VM 
being stopped. I wonder how this can checked from KVM. May be we can't. 

C. 

> 
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>>  arch/powerpc/include/uapi/asm/kvm.h   |  1 +
>>  arch/powerpc/kvm/book3s_xive_native.c | 40 +++++++++++++++++++++++++++
>>  2 files changed, 41 insertions(+)
>>
>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
>> index f3b859223b80..1a8740629acf 100644
>> --- a/arch/powerpc/include/uapi/asm/kvm.h
>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
>> @@ -680,6 +680,7 @@ struct kvm_ppc_cpu_char {
>>  #define   KVM_DEV_XIVE_GET_ESB_FD	1
>>  #define   KVM_DEV_XIVE_GET_TIMA_FD	2
>>  #define   KVM_DEV_XIVE_VC_BASE		3
>> +#define   KVM_DEV_XIVE_SAVE_EQ_PAGES	4
>>  #define KVM_DEV_XIVE_GRP_SOURCES	2	/* 64-bit source attributes */
>>  #define KVM_DEV_XIVE_GRP_SYNC		3	/* 64-bit source attributes */
>>  
>> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
>> index a8052867afc1..f2de1bcf3b35 100644
>> --- a/arch/powerpc/kvm/book3s_xive_native.c
>> +++ b/arch/powerpc/kvm/book3s_xive_native.c
>> @@ -373,6 +373,43 @@ static int kvmppc_xive_native_get_tima_fd(struct kvmppc_xive *xive, u64 addr)
>>  	return put_user(ret, ubufp);
>>  }
>>  
>> +static int kvmppc_xive_native_vcpu_save_eq_pages(struct kvm_vcpu *vcpu)
>> +{
>> +	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> +	unsigned int prio;
>> +
>> +	if (!xc)
>> +		return -ENOENT;
>> +
>> +	for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
>> +		struct xive_q *q = &xc->queues[prio];
>> +
>> +		if (!q->qpage)
>> +			continue;
>> +
>> +		/* Mark EQ page dirty for migration */
>> +		mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qpage));
>> +	}
>> +	return 0;
>> +}
>> +
>> +static int kvmppc_xive_native_save_eq_pages(struct kvmppc_xive *xive)
>> +{
>> +	struct kvm *kvm = xive->kvm;
>> +	struct kvm_vcpu *vcpu;
>> +	unsigned int i;
>> +
>> +	pr_devel("%s\n", __func__);
>> +
>> +	mutex_lock(&kvm->lock);
>> +	kvm_for_each_vcpu(i, vcpu, kvm) {
>> +		kvmppc_xive_native_vcpu_save_eq_pages(vcpu);
>> +	}
>> +	mutex_unlock(&kvm->lock);
>> +
>> +	return 0;
>> +}
>> +
>>  static int xive_native_validate_queue_size(u32 qsize)
>>  {
>>  	switch (qsize) {
>> @@ -498,6 +535,8 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
>>  		switch (attr->attr) {
>>  		case KVM_DEV_XIVE_VC_BASE:
>>  			return kvmppc_xive_native_set_vc_base(xive, attr->addr);
>> +		case KVM_DEV_XIVE_SAVE_EQ_PAGES:
>> +			return kvmppc_xive_native_save_eq_pages(xive);
>>  		}
>>  		break;
>>  	case KVM_DEV_XIVE_GRP_SOURCES:
>> @@ -538,6 +577,7 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
>>  		case KVM_DEV_XIVE_GET_ESB_FD:
>>  		case KVM_DEV_XIVE_GET_TIMA_FD:
>>  		case KVM_DEV_XIVE_VC_BASE:
>> +		case KVM_DEV_XIVE_SAVE_EQ_PAGES:
>>  			return 0;
>>  		}
>>  		break;
>
David Gibson Feb. 5, 2019, 5:30 a.m. | #3
On Mon, Feb 04, 2019 at 04:46:00PM +0100, Cédric Le Goater wrote:
> On 2/4/19 6:18 AM, David Gibson wrote:
> > On Mon, Jan 07, 2019 at 07:43:26PM +0100, Cédric Le Goater wrote:
> >> When the VM is stopped in a migration sequence, the sources are masked
> >> and the XIVE IC is synced to stabilize the EQs. When done, the KVM
> >> ioctl KVM_DEV_XIVE_SAVE_EQ_PAGES is called to mark dirty the EQ pages.
> >>
> >> The migration can then transfer the remaining dirty pages to the
> >> destination and start collecting the state of the devices.
> > 
> > Is there a reason to make this a separate step from the SYNC
> > operation?
> 
> Hmm, apart from letting QEMU orchestrate the migration step by step, no.
> 
> We could merge the SYNC and the SAVE_EQ_PAGES in a single KVM operation. 
> I think that should be fine.

I think that makes sense.  SYNC is supposed to complete delivery of
any in-flight interrupts, and to me writing to the queue page and
marking it dirty as a result is a logical part of that.

> However, it does not make sense to call this operation without the VM 
> being stopped. I wonder how this can checked from KVM. May be we
> can't.

I don't think it matters.  qemu is allowed to shoot itself in the
foot.

Patch

diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index f3b859223b80..1a8740629acf 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -680,6 +680,7 @@  struct kvm_ppc_cpu_char {
 #define   KVM_DEV_XIVE_GET_ESB_FD	1
 #define   KVM_DEV_XIVE_GET_TIMA_FD	2
 #define   KVM_DEV_XIVE_VC_BASE		3
+#define   KVM_DEV_XIVE_SAVE_EQ_PAGES	4
 #define KVM_DEV_XIVE_GRP_SOURCES	2	/* 64-bit source attributes */
 #define KVM_DEV_XIVE_GRP_SYNC		3	/* 64-bit source attributes */
 
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index a8052867afc1..f2de1bcf3b35 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -373,6 +373,43 @@  static int kvmppc_xive_native_get_tima_fd(struct kvmppc_xive *xive, u64 addr)
 	return put_user(ret, ubufp);
 }
 
+static int kvmppc_xive_native_vcpu_save_eq_pages(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	unsigned int prio;
+
+	if (!xc)
+		return -ENOENT;
+
+	for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
+		struct xive_q *q = &xc->queues[prio];
+
+		if (!q->qpage)
+			continue;
+
+		/* Mark EQ page dirty for migration */
+		mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qpage));
+	}
+	return 0;
+}
+
+static int kvmppc_xive_native_save_eq_pages(struct kvmppc_xive *xive)
+{
+	struct kvm *kvm = xive->kvm;
+	struct kvm_vcpu *vcpu;
+	unsigned int i;
+
+	pr_devel("%s\n", __func__);
+
+	mutex_lock(&kvm->lock);
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		kvmppc_xive_native_vcpu_save_eq_pages(vcpu);
+	}
+	mutex_unlock(&kvm->lock);
+
+	return 0;
+}
+
 static int xive_native_validate_queue_size(u32 qsize)
 {
 	switch (qsize) {
@@ -498,6 +535,8 @@  static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
 		switch (attr->attr) {
 		case KVM_DEV_XIVE_VC_BASE:
 			return kvmppc_xive_native_set_vc_base(xive, attr->addr);
+		case KVM_DEV_XIVE_SAVE_EQ_PAGES:
+			return kvmppc_xive_native_save_eq_pages(xive);
 		}
 		break;
 	case KVM_DEV_XIVE_GRP_SOURCES:
@@ -538,6 +577,7 @@  static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
 		case KVM_DEV_XIVE_GET_ESB_FD:
 		case KVM_DEV_XIVE_GET_TIMA_FD:
 		case KVM_DEV_XIVE_VC_BASE:
+		case KVM_DEV_XIVE_SAVE_EQ_PAGES:
 			return 0;
 		}
 		break;