[RFC,14/16] KVM: PPC: Book3S HV: add support for XIVE native migration

Message ID 20180423164341.15767-15-clg@kaod.org
State RFC
Headers show
Series
  • KVM: PPC: Book3S HV: add XIVE native exploitation mode
Related show

Commit Message

Cédric Le Goater April 23, 2018, 4:43 p.m.
States we need to capture are :

 - the IVE table defining the source targeting
 - the main interrupt management registers for each vCPU
 - the EQs. Also mark the EQ page dirty to make sure it is transferred.

This is work in progress. We need to make sure the HW has reached a
quiescence point.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/include/asm/kvm_ppc.h    |  10 ++
 arch/powerpc/include/uapi/asm/kvm.h   |  11 ++
 arch/powerpc/kvm/book3s.c             |  46 +++++
 arch/powerpc/kvm/book3s_xive_native.c | 320 ++++++++++++++++++++++++++++++++++
 4 files changed, 387 insertions(+)

Patch

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index b5fceb4d7776..748518c7bf70 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -269,6 +269,8 @@  union kvmppc_one_reg {
 		u64	addr;
 		u64	length;
 	}	vpaval;
+	u32	xeqval[8];
+	u64	vpval[2];
 };
 
 struct kvmppc_ops {
@@ -594,6 +596,10 @@  extern int kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
 extern void kvmppc_xive_native_init_module(void);
 extern void kvmppc_xive_native_exit_module(void);
 extern int kvmppc_xive_hcall(struct kvm_vcpu *vcpu, u32 cmd);
+extern int kvmppc_xive_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val);
+extern int kvmppc_xive_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val);
+extern int kvmppc_xive_get_vp_queue(struct kvm_vcpu *vcpu, int priority, union kvmppc_one_reg *val);
+extern int kvmppc_xive_set_vp_queue(struct kvm_vcpu *vcpu, int priority, union kvmppc_one_reg *val);
 
 #else
 static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
@@ -627,6 +633,10 @@  static inline void kvmppc_xive_native_init_module(void) { }
 static inline void kvmppc_xive_native_exit_module(void) { }
 static inline int kvmppc_xive_hcall(struct kvm_vcpu *vcpu, u32 cmd)
 	{ return 0; }
+static inline int kvmppc_xive_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) { return 0; }
+static inline int kvmppc_xive_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) { return -ENOENT; }
+static inline int kvmppc_xive_get_vp_queue(struct kvm_vcpu *vcpu, int priority, union kvmppc_one_reg *val) { return 0; }
+static inline int kvmppc_xive_set_vp_queue(struct kvm_vcpu *vcpu, int priority, union kvmppc_one_reg *val) { return -ENOENT; }
 
 #endif /* CONFIG_KVM_XIVE */
 
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 6e120129dfe6..8a2be937a98e 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -480,6 +480,16 @@  struct kvm_ppc_cpu_char {
 #define  KVM_REG_PPC_ICP_PPRI_SHIFT	16	/* pending irq priority */
 #define  KVM_REG_PPC_ICP_PPRI_MASK	0xff
 
+#define KVM_REG_PPC_VP_STATE	(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x8d)
+#define KVM_REG_PPC_VP_EQ0	(KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x8e)
+#define KVM_REG_PPC_VP_EQ1	(KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x8f)
+#define KVM_REG_PPC_VP_EQ2	(KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x90)
+#define KVM_REG_PPC_VP_EQ3	(KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x91)
+#define KVM_REG_PPC_VP_EQ4	(KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x92)
+#define KVM_REG_PPC_VP_EQ5	(KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x93)
+#define KVM_REG_PPC_VP_EQ6	(KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x94)
+#define KVM_REG_PPC_VP_EQ7	(KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x95)
+
 /* Device control API: PPC-specific devices */
 #define KVM_DEV_MPIC_GRP_MISC		1
 #define   KVM_DEV_MPIC_BASE_ADDR	0	/* 64-bit */
@@ -679,6 +689,7 @@  struct kvm_ppc_cpu_char {
 #define   KVM_DEV_XIVE_GET_ESB_FD	1
 #define   KVM_DEV_XIVE_GET_TIMA_FD	2
 #define   KVM_DEV_XIVE_VC_BASE		3
+#define KVM_DEV_XIVE_GRP_IVE		3
 
 /* Layout of 64-bit XIVE source attribute values */
 #define  KVM_XIVE_LEVEL_SENSITIVE	(1ULL << 0)
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 19c0187cada3..fc745233b2d9 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -625,6 +625,29 @@  int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
 				*val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
 			break;
 #endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+		case KVM_REG_PPC_VP_STATE:
+			if (!vcpu->arch.xive_vcpu) {
+				r = -ENXIO;
+				break;
+			}
+			if (xive_enabled())
+				r = kvmppc_xive_get_vp(vcpu, val);
+			else
+				r = -ENXIO;
+			break;
+		case KVM_REG_PPC_VP_EQ0 ... KVM_REG_PPC_VP_EQ7:
+			if (!vcpu->arch.xive_vcpu) {
+				r = -ENXIO;
+				break;
+			}
+			if (xive_enabled()) {
+				i = id - KVM_REG_PPC_VP_EQ0;
+				r = kvmppc_xive_get_vp_queue(vcpu, i, val);
+			} else
+				r = -ENXIO;
+			break;
+#endif /* CONFIG_KVM_XIVE */
 		case KVM_REG_PPC_FSCR:
 			*val = get_reg_val(id, vcpu->arch.fscr);
 			break;
@@ -698,6 +721,29 @@  int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
 				r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val));
 			break;
 #endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+		case KVM_REG_PPC_VP_STATE:
+			if (!vcpu->arch.xive_vcpu) {
+				r = -ENXIO;
+				break;
+			}
+			if (xive_enabled())
+				r = kvmppc_xive_set_vp(vcpu, val);
+			else
+				r = -ENXIO;
+			break;
+		case KVM_REG_PPC_VP_EQ0 ... KVM_REG_PPC_VP_EQ7:
+			if (!vcpu->arch.xive_vcpu) {
+				r = -ENXIO;
+				break;
+			}
+			if (xive_enabled()) {
+				i = id - KVM_REG_PPC_VP_EQ0;
+				kvmppc_xive_set_vp_queue(vcpu, i, val);
+			} else
+				r = -ENXIO;
+			break;
+#endif /* CONFIG_KVM_XIVE */
 		case KVM_REG_PPC_FSCR:
 			vcpu->arch.fscr = set_reg_val(id, *val);
 			break;
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index d705de3c5d65..056d4669a506 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -189,6 +189,233 @@  static int xive_native_validate_queue_size(u32 qsize)
 	}
 }
 
+int kvmppc_xive_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	u32 version;
+	int rc;
+
+	if (!kvmppc_xive_enabled(vcpu))
+		return -EPERM;
+
+	if (!xc)
+		return -ENOENT;
+
+	val->vpval[0] = vcpu->arch.xive_saved_state.w01;
+
+	rc = xive_native_get_vp_state(xc->vp_id, &version, &val->vpval[1]);
+	if (rc)
+		return rc;
+
+	if (XIVE_STATE_COMPAT(version) > 1) {
+		pr_err("invalid OPAL state version %08x\n", version);
+		return -EIO;
+	}
+
+	pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n",
+		 __func__,
+		 vcpu->arch.xive_saved_state.nsr,
+		 vcpu->arch.xive_saved_state.cppr,
+		 vcpu->arch.xive_saved_state.ipb,
+		 vcpu->arch.xive_saved_state.pipr,
+		 vcpu->arch.xive_saved_state.w01,
+		 (u32) vcpu->arch.xive_cam_word, val->vpval[1]);
+
+	return 0;
+}
+
+int kvmppc_xive_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+	u32 version = XIVE_STATE_VERSION;
+	int rc;
+
+	pr_devel("%s w01=%016llx vp=%016llx\n", __func__, val->vpval[0],
+		 val->vpval[1]);
+
+	if (!kvmppc_xive_enabled(vcpu))
+		return -EPERM;
+
+	if (!xc || !xive)
+		return -ENOENT;
+
+	/* We can't update the state of a "pushed" VCPU	 */
+	if (WARN_ON(vcpu->arch.xive_pushed))
+		return -EIO;
+
+	/* TODO: only restore IPB and CPPR ? */
+	vcpu->arch.xive_saved_state.w01 = val->vpval[0];
+
+	rc = xive_native_set_vp_state(xc->vp_id, version, val->vpval[1]);
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
+int kvmppc_xive_get_vp_queue(struct kvm_vcpu *vcpu, int priority,
+			     union kvmppc_one_reg *val)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct xive_q *q;
+	u64 qpage;
+	u64 qsize;
+	u64 qeoi_page;
+	u32 escalate_irq;
+	u64 qflags;
+	u32 version;
+	u64 qw1;
+	int rc;
+
+	pr_debug("%s vcpu %d priority %d\n", __func__, xc->server_num,
+		 priority);
+
+	if (!kvmppc_xive_enabled(vcpu))
+		return -EPERM;
+
+	if (!xc)
+		return -ENOENT;
+
+	if (priority != xive_prio_from_guest(priority) || priority == MASKED) {
+		pr_err("Trying to retrieve info from queue %d for VCPU %d\n",
+		       priority, xc->server_num);
+		return -EINVAL;
+	}
+	q = &xc->queues[priority];
+
+	memset(val->xeqval, 0, sizeof(val->xeqval));
+
+	if (!q->qpage)
+		return 0;
+
+	rc = xive_native_get_queue_info(xc->vp_id, priority, &qpage, &qsize,
+					&qeoi_page, &escalate_irq, &qflags);
+	if (rc)
+		return rc;
+
+	rc = xive_native_get_queue_state(xc->vp_id, priority, &version, &qw1);
+	if (rc)
+		return rc;
+
+	if (XIVE_STATE_COMPAT(version) > 1) {
+		pr_err("invalid OPAL state version %08x\n", version);
+		return -EIO;
+	}
+
+	val->xeqval[0] = 0;
+	if (qflags & OPAL_XIVE_EQ_ENABLED)
+		val->xeqval[0] |= EQ_W0_VALID|EQ_W0_ENQUEUE;
+	if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY)
+		val->xeqval[0] |= EQ_W0_UCOND_NOTIFY;
+	if (qflags & OPAL_XIVE_EQ_ESCALATE)
+		val->xeqval[0] |= EQ_W0_ESCALATE_CTL;
+	val->xeqval[0] |= SETFIELD(EQ_W0_QSIZE, 0ul, qsize - 12);
+
+	val->xeqval[1] = qw1 & 0xffffffff;
+	val->xeqval[2] = (q->guest_qpage >> 32) & 0x0fffffff;
+	val->xeqval[3] = q->guest_qpage & 0xffffffff;
+	val->xeqval[4] = 0;
+	val->xeqval[5] = 0;
+	val->xeqval[6] = SETFIELD(EQ_W6_NVT_BLOCK, 0ul, 0ul) |
+		SETFIELD(EQ_W6_NVT_INDEX, 0ul, xc->server_num);
+	val->xeqval[7] = SETFIELD(EQ_W7_F0_PRIORITY, 0ul, priority);
+
+	/* Mark EQ page dirty for migration */
+	mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qpage));
+
+	return 0;
+}
+
+int kvmppc_xive_set_vp_queue(struct kvm_vcpu *vcpu, int priority,
+			     union kvmppc_one_reg *val)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+	u32 qsize;
+	u64 qpage;
+	u32 server;
+	u8 prio;
+	int rc;
+	__be32 *qaddr = 0;
+	struct page *page;
+	struct xive_q *q;
+	u32 version = XIVE_STATE_VERSION;
+
+	pr_devel("%s vcpu %d priority %d\n", __func__, xc->server_num,
+		 priority);
+
+	if (!xc || !xive)
+		return -ENOENT;
+
+	/*
+	 * Check that we are not trying to configure queues reserved
+	 * for the hypervisor
+	 */
+	if (priority != xive_prio_from_guest(priority) || priority == MASKED) {
+		pr_err("Trying to restore invalid queue %d for VCPU %d\n",
+		       priority, xc->server_num);
+		return -EINVAL;
+	}
+
+	qsize = GETFIELD(EQ_W0_QSIZE, val->xeqval[0]) + 12;
+	qpage = (((u64)(val->xeqval[2] & 0x0fffffff)) << 32) | val->xeqval[3];
+	server = GETFIELD(EQ_W6_NVT_INDEX, val->xeqval[6]);
+	prio = GETFIELD(EQ_W7_F0_PRIORITY, val->xeqval[7]);
+
+	if (xc->server_num != server) {
+		vcpu = kvmppc_xive_find_server(kvm, server);
+		if (!vcpu) {
+			pr_debug("Can't find server %d\n", server);
+			return -EINVAL;
+		}
+		xc = vcpu->arch.xive_vcpu;
+	}
+
+	if (priority != prio) {
+		pr_err("invalid state for queue %d for VCPU %d\n",
+		       priority, xc->server_num);
+		return -EIO;
+	}
+	q = &xc->queues[prio];
+
+	rc = xive_native_validate_queue_size(qsize);
+	if (rc || !qsize) {
+		pr_err("invalid queue size %d\n", qsize);
+		return rc;
+	}
+
+	page = gfn_to_page(kvm, gpa_to_gfn(qpage));
+	if (is_error_page(page)) {
+		pr_debug("Couldn't get guest page for %llx!\n", qpage);
+		return -ENOMEM;
+	}
+	qaddr = page_to_virt(page) + (qpage & ~PAGE_MASK);
+	q->guest_qpage = qpage;
+
+	rc = xive_native_configure_queue(xc->vp_id, q, prio, (__be32 *) qaddr,
+					 qsize, true);
+	if (rc) {
+		pr_err("Failed to configure queue %d for VCPU %d: %d\n",
+		       prio, xc->server_num, rc);
+		put_page(page);
+		return rc;
+	}
+
+	rc = xive_native_set_queue_state(xc->vp_id, prio, version,
+					 val->xeqval[1]);
+	if (rc)
+		goto error;
+
+	rc = kvmppc_xive_attach_escalation(vcpu, prio);
+error:
+	if (rc)
+		xive_native_cleanup_queue(vcpu, prio);
+	return rc;
+}
+
+
 static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
 					 u64 addr)
 {
@@ -328,6 +555,94 @@  static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive,
 	return rc;
 }
 
+static int kvmppc_xive_native_set_ive(struct kvmppc_xive *xive, long irq,
+				      u64 addr)
+{
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u64 __user *ubufp = (u64 __user *) addr;
+	u16 src;
+	u64 ive;
+	u32 eq_idx;
+	u32 server;
+	u8 priority;
+	u32 eisn;
+
+	pr_devel("%s irq=0x%lx\n", __func__, irq);
+
+	sb = kvmppc_xive_find_source(xive, irq, &src);
+	if (!sb)
+		return -ENOENT;
+
+	state = &sb->irq_state[src];
+
+	if (!state->valid)
+		return -ENOENT;
+
+	if (get_user(ive, ubufp)) {
+		pr_err("fault getting user info !\n");
+		return -EFAULT;
+	}
+
+	if (!(ive & IVE_VALID) || ive & IVE_MASKED) {
+		pr_err("invalid IVE %016llx for IRQ %lx\n", ive, irq);
+		return -EPERM;
+	}
+
+	/* QEMU encoding of EQ index */
+	eq_idx = GETFIELD(IVE_EQ_INDEX, ive);
+	server = eq_idx >> 3;
+	priority = eq_idx & 0x7;
+
+	eisn = GETFIELD(IVE_EQ_DATA, ive);
+
+	return kvmppc_xive_native_set_source_config(xive, sb, state, server,
+						    priority, eisn);
+}
+
+static int kvmppc_xive_native_get_ive(struct kvmppc_xive *xive, long irq,
+				      u64 addr)
+{
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u64 __user *ubufp = (u64 __user *) addr;
+	u16 src;
+	u64 ive;
+	u32 eq_idx;
+
+	pr_devel("%s irq=0x%lx\n", __func__, irq);
+
+	sb = kvmppc_xive_find_source(xive, irq, &src);
+	if (!sb)
+		return -ENOENT;
+
+	state = &sb->irq_state[src];
+
+	if (!state->valid)
+		return -ENOENT;
+
+	ive = IVE_VALID;
+
+	arch_spin_lock(&sb->lock);
+
+	if (state->act_priority == MASKED)
+		ive |= IVE_MASKED;
+	else {
+		/* QEMU encoding of EQ index */
+		eq_idx = ((state->act_server) << 3) |
+			((state->act_priority) & 0x7);
+		ive |= SETFIELD(IVE_EQ_BLOCK, 0ul, 0ul) |
+			SETFIELD(IVE_EQ_INDEX, 0ul, eq_idx) |
+			SETFIELD(IVE_EQ_DATA, 0ul, state->eisn);
+	}
+	arch_spin_unlock(&sb->lock);
+
+	if (put_user(ive, ubufp))
+		return -EFAULT;
+
+	return 0;
+}
+
 static int xive_native_esb_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
@@ -455,6 +770,8 @@  static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
 	case KVM_DEV_XIVE_GRP_SOURCES:
 		return kvmppc_xive_native_set_source(xive, attr->attr,
 						     attr->addr);
+	case KVM_DEV_XIVE_GRP_IVE:
+		return kvmppc_xive_native_set_ive(xive, attr->attr, attr->addr);
 	case KVM_DEV_XIVE_GRP_CTRL:
 		switch (attr->attr) {
 		case KVM_DEV_XIVE_VC_BASE:
@@ -471,6 +788,8 @@  static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
 	struct kvmppc_xive *xive = dev->private;
 
 	switch (attr->group) {
+	case KVM_DEV_XIVE_GRP_IVE:
+		return kvmppc_xive_native_get_ive(xive, attr->attr, attr->addr);
 	case KVM_DEV_XIVE_GRP_CTRL:
 		switch (attr->attr) {
 		case KVM_DEV_XIVE_GET_ESB_FD:
@@ -490,6 +809,7 @@  static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
 {
 	switch (attr->group) {
 	case KVM_DEV_XIVE_GRP_SOURCES:
+	case KVM_DEV_XIVE_GRP_IVE:
 		if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
 		    attr->attr < KVMPPC_XIVE_NR_IRQS)
 			return 0;