Message ID | 20190107184331.8429-15-clg@kaod.org (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | KVM: PPC: Book3S HV: add XIVE native exploitation mode | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | success | next/apply_patch Successfully applied |
snowpatch_ozlabs/checkpatch | success | total: 0 errors, 0 warnings, 0 checks, 65 lines checked |
On Mon, Jan 07, 2019 at 07:43:26PM +0100, Cédric Le Goater wrote: > When the VM is stopped in a migration sequence, the sources are masked > and the XIVE IC is synced to stabilize the EQs. When done, the KVM > ioctl KVM_DEV_XIVE_SAVE_EQ_PAGES is called to mark dirty the EQ pages. > > The migration can then transfer the remaining dirty pages to the > destination and start collecting the state of the devices. Is there a reason to make this a separate step from the SYNC operation? > > Signed-off-by: Cédric Le Goater <clg@kaod.org> > --- > arch/powerpc/include/uapi/asm/kvm.h | 1 + > arch/powerpc/kvm/book3s_xive_native.c | 40 +++++++++++++++++++++++++++ > 2 files changed, 41 insertions(+) > > diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h > index f3b859223b80..1a8740629acf 100644 > --- a/arch/powerpc/include/uapi/asm/kvm.h > +++ b/arch/powerpc/include/uapi/asm/kvm.h > @@ -680,6 +680,7 @@ struct kvm_ppc_cpu_char { > #define KVM_DEV_XIVE_GET_ESB_FD 1 > #define KVM_DEV_XIVE_GET_TIMA_FD 2 > #define KVM_DEV_XIVE_VC_BASE 3 > +#define KVM_DEV_XIVE_SAVE_EQ_PAGES 4 > #define KVM_DEV_XIVE_GRP_SOURCES 2 /* 64-bit source attributes */ > #define KVM_DEV_XIVE_GRP_SYNC 3 /* 64-bit source attributes */ > > diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c > index a8052867afc1..f2de1bcf3b35 100644 > --- a/arch/powerpc/kvm/book3s_xive_native.c > +++ b/arch/powerpc/kvm/book3s_xive_native.c > @@ -373,6 +373,43 @@ static int kvmppc_xive_native_get_tima_fd(struct kvmppc_xive *xive, u64 addr) > return put_user(ret, ubufp); > } > > +static int kvmppc_xive_native_vcpu_save_eq_pages(struct kvm_vcpu *vcpu) > +{ > + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; > + unsigned int prio; > + > + if (!xc) > + return -ENOENT; > + > + for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { > + struct xive_q *q = &xc->queues[prio]; > + > + if (!q->qpage) > + continue; > + > + /* Mark EQ page dirty for migration */ > + mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qpage)); > + } > + return 0; > +} > + > +static int kvmppc_xive_native_save_eq_pages(struct kvmppc_xive *xive) > +{ > + struct kvm *kvm = xive->kvm; > + struct kvm_vcpu *vcpu; > + unsigned int i; > + > + pr_devel("%s\n", __func__); > + > + mutex_lock(&kvm->lock); > + kvm_for_each_vcpu(i, vcpu, kvm) { > + kvmppc_xive_native_vcpu_save_eq_pages(vcpu); > + } > + mutex_unlock(&kvm->lock); > + > + return 0; > +} > + > static int xive_native_validate_queue_size(u32 qsize) > { > switch (qsize) { > @@ -498,6 +535,8 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev, > switch (attr->attr) { > case KVM_DEV_XIVE_VC_BASE: > return kvmppc_xive_native_set_vc_base(xive, attr->addr); > + case KVM_DEV_XIVE_SAVE_EQ_PAGES: > + return kvmppc_xive_native_save_eq_pages(xive); > } > break; > case KVM_DEV_XIVE_GRP_SOURCES: > @@ -538,6 +577,7 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev, > case KVM_DEV_XIVE_GET_ESB_FD: > case KVM_DEV_XIVE_GET_TIMA_FD: > case KVM_DEV_XIVE_VC_BASE: > + case KVM_DEV_XIVE_SAVE_EQ_PAGES: > return 0; > } > break;
On 2/4/19 6:18 AM, David Gibson wrote: > On Mon, Jan 07, 2019 at 07:43:26PM +0100, Cédric Le Goater wrote: >> When the VM is stopped in a migration sequence, the sources are masked >> and the XIVE IC is synced to stabilize the EQs. When done, the KVM >> ioctl KVM_DEV_XIVE_SAVE_EQ_PAGES is called to mark dirty the EQ pages. >> >> The migration can then transfer the remaining dirty pages to the >> destination and start collecting the state of the devices. > > Is there a reason to make this a separate step from the SYNC > operation? Hmm, apart from letting QEMU orchestrate the migration step by step, no. We could merge the SYNC and the SAVE_EQ_PAGES in a single KVM operation. I think that should be fine. However, it does not make sense to call this operation without the VM being stopped. I wonder how this can checked from KVM. May be we can't. C. > >> >> Signed-off-by: Cédric Le Goater <clg@kaod.org> >> --- >> arch/powerpc/include/uapi/asm/kvm.h | 1 + >> arch/powerpc/kvm/book3s_xive_native.c | 40 +++++++++++++++++++++++++++ >> 2 files changed, 41 insertions(+) >> >> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h >> index f3b859223b80..1a8740629acf 100644 >> --- a/arch/powerpc/include/uapi/asm/kvm.h >> +++ b/arch/powerpc/include/uapi/asm/kvm.h >> @@ -680,6 +680,7 @@ struct kvm_ppc_cpu_char { >> #define KVM_DEV_XIVE_GET_ESB_FD 1 >> #define KVM_DEV_XIVE_GET_TIMA_FD 2 >> #define KVM_DEV_XIVE_VC_BASE 3 >> +#define KVM_DEV_XIVE_SAVE_EQ_PAGES 4 >> #define KVM_DEV_XIVE_GRP_SOURCES 2 /* 64-bit source attributes */ >> #define KVM_DEV_XIVE_GRP_SYNC 3 /* 64-bit source attributes */ >> >> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c >> index a8052867afc1..f2de1bcf3b35 100644 >> --- a/arch/powerpc/kvm/book3s_xive_native.c >> +++ b/arch/powerpc/kvm/book3s_xive_native.c >> @@ -373,6 +373,43 @@ static int kvmppc_xive_native_get_tima_fd(struct kvmppc_xive *xive, u64 addr) >> return put_user(ret, ubufp); >> } >> >> +static int kvmppc_xive_native_vcpu_save_eq_pages(struct kvm_vcpu *vcpu) >> +{ >> + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; >> + unsigned int prio; >> + >> + if (!xc) >> + return -ENOENT; >> + >> + for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { >> + struct xive_q *q = &xc->queues[prio]; >> + >> + if (!q->qpage) >> + continue; >> + >> + /* Mark EQ page dirty for migration */ >> + mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qpage)); >> + } >> + return 0; >> +} >> + >> +static int kvmppc_xive_native_save_eq_pages(struct kvmppc_xive *xive) >> +{ >> + struct kvm *kvm = xive->kvm; >> + struct kvm_vcpu *vcpu; >> + unsigned int i; >> + >> + pr_devel("%s\n", __func__); >> + >> + mutex_lock(&kvm->lock); >> + kvm_for_each_vcpu(i, vcpu, kvm) { >> + kvmppc_xive_native_vcpu_save_eq_pages(vcpu); >> + } >> + mutex_unlock(&kvm->lock); >> + >> + return 0; >> +} >> + >> static int xive_native_validate_queue_size(u32 qsize) >> { >> switch (qsize) { >> @@ -498,6 +535,8 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev, >> switch (attr->attr) { >> case KVM_DEV_XIVE_VC_BASE: >> return kvmppc_xive_native_set_vc_base(xive, attr->addr); >> + case KVM_DEV_XIVE_SAVE_EQ_PAGES: >> + return kvmppc_xive_native_save_eq_pages(xive); >> } >> break; >> case KVM_DEV_XIVE_GRP_SOURCES: >> @@ -538,6 +577,7 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev, >> case KVM_DEV_XIVE_GET_ESB_FD: >> case KVM_DEV_XIVE_GET_TIMA_FD: >> case KVM_DEV_XIVE_VC_BASE: >> + case KVM_DEV_XIVE_SAVE_EQ_PAGES: >> return 0; >> } >> break; >
On Mon, Feb 04, 2019 at 04:46:00PM +0100, Cédric Le Goater wrote: > On 2/4/19 6:18 AM, David Gibson wrote: > > On Mon, Jan 07, 2019 at 07:43:26PM +0100, Cédric Le Goater wrote: > >> When the VM is stopped in a migration sequence, the sources are masked > >> and the XIVE IC is synced to stabilize the EQs. When done, the KVM > >> ioctl KVM_DEV_XIVE_SAVE_EQ_PAGES is called to mark dirty the EQ pages. > >> > >> The migration can then transfer the remaining dirty pages to the > >> destination and start collecting the state of the devices. > > > > Is there a reason to make this a separate step from the SYNC > > operation? > > Hmm, apart from letting QEMU orchestrate the migration step by step, no. > > We could merge the SYNC and the SAVE_EQ_PAGES in a single KVM operation. > I think that should be fine. I think that makes sense. SYNC is supposed to complete delivery of any in-flight interrupts, and to me writing to the queue page and marking it dirty as a result is a logical part of that. > However, it does not make sense to call this operation without the VM > being stopped. I wonder how this can checked from KVM. May be we > can't. I don't think it matters. qemu is allowed to shoot itself in the foot.
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index f3b859223b80..1a8740629acf 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -680,6 +680,7 @@ struct kvm_ppc_cpu_char { #define KVM_DEV_XIVE_GET_ESB_FD 1 #define KVM_DEV_XIVE_GET_TIMA_FD 2 #define KVM_DEV_XIVE_VC_BASE 3 +#define KVM_DEV_XIVE_SAVE_EQ_PAGES 4 #define KVM_DEV_XIVE_GRP_SOURCES 2 /* 64-bit source attributes */ #define KVM_DEV_XIVE_GRP_SYNC 3 /* 64-bit source attributes */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index a8052867afc1..f2de1bcf3b35 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -373,6 +373,43 @@ static int kvmppc_xive_native_get_tima_fd(struct kvmppc_xive *xive, u64 addr) return put_user(ret, ubufp); } +static int kvmppc_xive_native_vcpu_save_eq_pages(struct kvm_vcpu *vcpu) +{ + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + unsigned int prio; + + if (!xc) + return -ENOENT; + + for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { + struct xive_q *q = &xc->queues[prio]; + + if (!q->qpage) + continue; + + /* Mark EQ page dirty for migration */ + mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qpage)); + } + return 0; +} + +static int kvmppc_xive_native_save_eq_pages(struct kvmppc_xive *xive) +{ + struct kvm *kvm = xive->kvm; + struct kvm_vcpu *vcpu; + unsigned int i; + + pr_devel("%s\n", __func__); + + mutex_lock(&kvm->lock); + kvm_for_each_vcpu(i, vcpu, kvm) { + kvmppc_xive_native_vcpu_save_eq_pages(vcpu); + } + mutex_unlock(&kvm->lock); + + return 0; +} + static int xive_native_validate_queue_size(u32 qsize) { switch (qsize) { @@ -498,6 +535,8 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev, switch (attr->attr) { case KVM_DEV_XIVE_VC_BASE: return kvmppc_xive_native_set_vc_base(xive, attr->addr); + case KVM_DEV_XIVE_SAVE_EQ_PAGES: + return kvmppc_xive_native_save_eq_pages(xive); } break; case KVM_DEV_XIVE_GRP_SOURCES: @@ -538,6 +577,7 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev, case KVM_DEV_XIVE_GET_ESB_FD: case KVM_DEV_XIVE_GET_TIMA_FD: case KVM_DEV_XIVE_VC_BASE: + case KVM_DEV_XIVE_SAVE_EQ_PAGES: return 0; } break;
When the VM is stopped in a migration sequence, the sources are masked and the XIVE IC is synced to stabilize the EQs. When done, the KVM ioctl KVM_DEV_XIVE_SAVE_EQ_PAGES is called to mark dirty the EQ pages. The migration can then transfer the remaining dirty pages to the destination and start collecting the state of the devices. Signed-off-by: Cédric Le Goater <clg@kaod.org> --- arch/powerpc/include/uapi/asm/kvm.h | 1 + arch/powerpc/kvm/book3s_xive_native.c | 40 +++++++++++++++++++++++++++ 2 files changed, 41 insertions(+)