@@ -269,6 +269,8 @@ union kvmppc_one_reg {
u64 addr;
u64 length;
} vpaval;
+ u32 xeqval[8];
+ u64 vpval[2];
};
struct kvmppc_ops {
@@ -594,6 +596,10 @@ extern int kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
extern void kvmppc_xive_native_init_module(void);
extern void kvmppc_xive_native_exit_module(void);
extern int kvmppc_xive_hcall(struct kvm_vcpu *vcpu, u32 cmd);
+extern int kvmppc_xive_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val);
+extern int kvmppc_xive_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val);
+extern int kvmppc_xive_get_vp_queue(struct kvm_vcpu *vcpu, int priority, union kvmppc_one_reg *val);
+extern int kvmppc_xive_set_vp_queue(struct kvm_vcpu *vcpu, int priority, union kvmppc_one_reg *val);
#else
static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
@@ -627,6 +633,10 @@ static inline void kvmppc_xive_native_init_module(void) { }
static inline void kvmppc_xive_native_exit_module(void) { }
static inline int kvmppc_xive_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{ return 0; }
+static inline int kvmppc_xive_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) { return 0; }
+static inline int kvmppc_xive_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) { return -ENOENT; }
+static inline int kvmppc_xive_get_vp_queue(struct kvm_vcpu *vcpu, int priority, union kvmppc_one_reg *val) { return 0; }
+static inline int kvmppc_xive_set_vp_queue(struct kvm_vcpu *vcpu, int priority, union kvmppc_one_reg *val) { return -ENOENT; }
#endif /* CONFIG_KVM_XIVE */
@@ -480,6 +480,16 @@ struct kvm_ppc_cpu_char {
#define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */
#define KVM_REG_PPC_ICP_PPRI_MASK 0xff
+#define KVM_REG_PPC_VP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x8d)
+#define KVM_REG_PPC_VP_EQ0 (KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x8e)
+#define KVM_REG_PPC_VP_EQ1 (KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x8f)
+#define KVM_REG_PPC_VP_EQ2 (KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x90)
+#define KVM_REG_PPC_VP_EQ3 (KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x91)
+#define KVM_REG_PPC_VP_EQ4 (KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x92)
+#define KVM_REG_PPC_VP_EQ5 (KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x93)
+#define KVM_REG_PPC_VP_EQ6 (KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x94)
+#define KVM_REG_PPC_VP_EQ7 (KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x95)
+
/* Device control API: PPC-specific devices */
#define KVM_DEV_MPIC_GRP_MISC 1
#define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */
@@ -679,6 +689,7 @@ struct kvm_ppc_cpu_char {
#define KVM_DEV_XIVE_GET_ESB_FD 1
#define KVM_DEV_XIVE_GET_TIMA_FD 2
#define KVM_DEV_XIVE_VC_BASE 3
+#define KVM_DEV_XIVE_GRP_IVE 3
/* Layout of 64-bit XIVE source attribute values */
#define KVM_XIVE_LEVEL_SENSITIVE (1ULL << 0)
@@ -625,6 +625,29 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
*val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
break;
#endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+ case KVM_REG_PPC_VP_STATE:
+ if (!vcpu->arch.xive_vcpu) {
+ r = -ENXIO;
+ break;
+ }
+ if (xive_enabled())
+ r = kvmppc_xive_get_vp(vcpu, val);
+ else
+ r = -ENXIO;
+ break;
+ case KVM_REG_PPC_VP_EQ0 ... KVM_REG_PPC_VP_EQ7:
+ if (!vcpu->arch.xive_vcpu) {
+ r = -ENXIO;
+ break;
+ }
+ if (xive_enabled()) {
+ i = id - KVM_REG_PPC_VP_EQ0;
+ r = kvmppc_xive_get_vp_queue(vcpu, i, val);
+ } else
+ r = -ENXIO;
+ break;
+#endif /* CONFIG_KVM_XIVE */
case KVM_REG_PPC_FSCR:
*val = get_reg_val(id, vcpu->arch.fscr);
break;
@@ -698,6 +721,29 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val));
break;
#endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+ case KVM_REG_PPC_VP_STATE:
+ if (!vcpu->arch.xive_vcpu) {
+ r = -ENXIO;
+ break;
+ }
+ if (xive_enabled())
+ r = kvmppc_xive_set_vp(vcpu, val);
+ else
+ r = -ENXIO;
+ break;
+ case KVM_REG_PPC_VP_EQ0 ... KVM_REG_PPC_VP_EQ7:
+ if (!vcpu->arch.xive_vcpu) {
+ r = -ENXIO;
+ break;
+ }
+ if (xive_enabled()) {
+ i = id - KVM_REG_PPC_VP_EQ0;
+ kvmppc_xive_set_vp_queue(vcpu, i, val);
+ } else
+ r = -ENXIO;
+ break;
+#endif /* CONFIG_KVM_XIVE */
case KVM_REG_PPC_FSCR:
vcpu->arch.fscr = set_reg_val(id, *val);
break;
@@ -189,6 +189,233 @@ static int xive_native_validate_queue_size(u32 qsize)
}
}
+int kvmppc_xive_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ u32 version;
+ int rc;
+
+ if (!kvmppc_xive_enabled(vcpu))
+ return -EPERM;
+
+ if (!xc)
+ return -ENOENT;
+
+ val->vpval[0] = vcpu->arch.xive_saved_state.w01;
+
+ rc = xive_native_get_vp_state(xc->vp_id, &version, &val->vpval[1]);
+ if (rc)
+ return rc;
+
+ if (XIVE_STATE_COMPAT(version) > 1) {
+ pr_err("invalid OPAL state version %08x\n", version);
+ return -EIO;
+ }
+
+ pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n",
+ __func__,
+ vcpu->arch.xive_saved_state.nsr,
+ vcpu->arch.xive_saved_state.cppr,
+ vcpu->arch.xive_saved_state.ipb,
+ vcpu->arch.xive_saved_state.pipr,
+ vcpu->arch.xive_saved_state.w01,
+ (u32) vcpu->arch.xive_cam_word, val->vpval[1]);
+
+ return 0;
+}
+
+int kvmppc_xive_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+ u32 version = XIVE_STATE_VERSION;
+ int rc;
+
+ pr_devel("%s w01=%016llx vp=%016llx\n", __func__, val->vpval[0],
+ val->vpval[1]);
+
+ if (!kvmppc_xive_enabled(vcpu))
+ return -EPERM;
+
+ if (!xc || !xive)
+ return -ENOENT;
+
+ /* We can't update the state of a "pushed" VCPU */
+ if (WARN_ON(vcpu->arch.xive_pushed))
+ return -EIO;
+
+ /* TODO: only restore IPB and CPPR ? */
+ vcpu->arch.xive_saved_state.w01 = val->vpval[0];
+
+ rc = xive_native_set_vp_state(xc->vp_id, version, val->vpval[1]);
+ if (rc)
+ return rc;
+
+ return 0;
+}
+
+int kvmppc_xive_get_vp_queue(struct kvm_vcpu *vcpu, int priority,
+ union kvmppc_one_reg *val)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct xive_q *q;
+ u64 qpage;
+ u64 qsize;
+ u64 qeoi_page;
+ u32 escalate_irq;
+ u64 qflags;
+ u32 version;
+ u64 qw1;
+ int rc;
+
+ pr_debug("%s vcpu %d priority %d\n", __func__, xc->server_num,
+ priority);
+
+ if (!kvmppc_xive_enabled(vcpu))
+ return -EPERM;
+
+ if (!xc)
+ return -ENOENT;
+
+ if (priority != xive_prio_from_guest(priority) || priority == MASKED) {
+ pr_err("Trying to retrieve info from queue %d for VCPU %d\n",
+ priority, xc->server_num);
+ return -EINVAL;
+ }
+ q = &xc->queues[priority];
+
+ memset(val->xeqval, 0, sizeof(val->xeqval));
+
+ if (!q->qpage)
+ return 0;
+
+ rc = xive_native_get_queue_info(xc->vp_id, priority, &qpage, &qsize,
+ &qeoi_page, &escalate_irq, &qflags);
+ if (rc)
+ return rc;
+
+ rc = xive_native_get_queue_state(xc->vp_id, priority, &version, &qw1);
+ if (rc)
+ return rc;
+
+ if (XIVE_STATE_COMPAT(version) > 1) {
+ pr_err("invalid OPAL state version %08x\n", version);
+ return -EIO;
+ }
+
+ val->xeqval[0] = 0;
+ if (qflags & OPAL_XIVE_EQ_ENABLED)
+ val->xeqval[0] |= EQ_W0_VALID|EQ_W0_ENQUEUE;
+ if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY)
+ val->xeqval[0] |= EQ_W0_UCOND_NOTIFY;
+ if (qflags & OPAL_XIVE_EQ_ESCALATE)
+ val->xeqval[0] |= EQ_W0_ESCALATE_CTL;
+ val->xeqval[0] |= SETFIELD(EQ_W0_QSIZE, 0ul, qsize - 12);
+
+ val->xeqval[1] = qw1 & 0xffffffff;
+ val->xeqval[2] = (q->guest_qpage >> 32) & 0x0fffffff;
+ val->xeqval[3] = q->guest_qpage & 0xffffffff;
+ val->xeqval[4] = 0;
+ val->xeqval[5] = 0;
+ val->xeqval[6] = SETFIELD(EQ_W6_NVT_BLOCK, 0ul, 0ul) |
+ SETFIELD(EQ_W6_NVT_INDEX, 0ul, xc->server_num);
+ val->xeqval[7] = SETFIELD(EQ_W7_F0_PRIORITY, 0ul, priority);
+
+ /* Mark EQ page dirty for migration */
+ mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qpage));
+
+ return 0;
+}
+
+int kvmppc_xive_set_vp_queue(struct kvm_vcpu *vcpu, int priority,
+ union kvmppc_one_reg *val)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+ u32 qsize;
+ u64 qpage;
+ u32 server;
+ u8 prio;
+ int rc;
+ __be32 *qaddr = 0;
+ struct page *page;
+ struct xive_q *q;
+ u32 version = XIVE_STATE_VERSION;
+
+ pr_devel("%s vcpu %d priority %d\n", __func__, xc->server_num,
+ priority);
+
+ if (!xc || !xive)
+ return -ENOENT;
+
+ /*
+ * Check that we are not trying to configure queues reserved
+ * for the hypervisor
+ */
+ if (priority != xive_prio_from_guest(priority) || priority == MASKED) {
+ pr_err("Trying to restore invalid queue %d for VCPU %d\n",
+ priority, xc->server_num);
+ return -EINVAL;
+ }
+
+ qsize = GETFIELD(EQ_W0_QSIZE, val->xeqval[0]) + 12;
+ qpage = (((u64)(val->xeqval[2] & 0x0fffffff)) << 32) | val->xeqval[3];
+ server = GETFIELD(EQ_W6_NVT_INDEX, val->xeqval[6]);
+ prio = GETFIELD(EQ_W7_F0_PRIORITY, val->xeqval[7]);
+
+ if (xc->server_num != server) {
+ vcpu = kvmppc_xive_find_server(kvm, server);
+ if (!vcpu) {
+ pr_debug("Can't find server %d\n", server);
+ return -EINVAL;
+ }
+ xc = vcpu->arch.xive_vcpu;
+ }
+
+ if (priority != prio) {
+ pr_err("invalid state for queue %d for VCPU %d\n",
+ priority, xc->server_num);
+ return -EIO;
+ }
+ q = &xc->queues[prio];
+
+ rc = xive_native_validate_queue_size(qsize);
+ if (rc || !qsize) {
+ pr_err("invalid queue size %d\n", qsize);
+ return rc;
+ }
+
+ page = gfn_to_page(kvm, gpa_to_gfn(qpage));
+ if (is_error_page(page)) {
+ pr_debug("Couldn't get guest page for %llx!\n", qpage);
+ return -ENOMEM;
+ }
+ qaddr = page_to_virt(page) + (qpage & ~PAGE_MASK);
+ q->guest_qpage = qpage;
+
+ rc = xive_native_configure_queue(xc->vp_id, q, prio, (__be32 *) qaddr,
+ qsize, true);
+ if (rc) {
+ pr_err("Failed to configure queue %d for VCPU %d: %d\n",
+ prio, xc->server_num, rc);
+ put_page(page);
+ return rc;
+ }
+
+ rc = xive_native_set_queue_state(xc->vp_id, prio, version,
+ val->xeqval[1]);
+ if (rc)
+ goto error;
+
+ rc = kvmppc_xive_attach_escalation(vcpu, prio);
+error:
+ if (rc)
+ xive_native_cleanup_queue(vcpu, prio);
+ return rc;
+}
+
+
static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
u64 addr)
{
@@ -328,6 +555,94 @@ static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive,
return rc;
}
+static int kvmppc_xive_native_set_ive(struct kvmppc_xive *xive, long irq,
+ u64 addr)
+{
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ u64 __user *ubufp = (u64 __user *) addr;
+ u16 src;
+ u64 ive;
+ u32 eq_idx;
+ u32 server;
+ u8 priority;
+ u32 eisn;
+
+ pr_devel("%s irq=0x%lx\n", __func__, irq);
+
+ sb = kvmppc_xive_find_source(xive, irq, &src);
+ if (!sb)
+ return -ENOENT;
+
+ state = &sb->irq_state[src];
+
+ if (!state->valid)
+ return -ENOENT;
+
+ if (get_user(ive, ubufp)) {
+ pr_err("fault getting user info !\n");
+ return -EFAULT;
+ }
+
+ if (!(ive & IVE_VALID) || ive & IVE_MASKED) {
+ pr_err("invalid IVE %016llx for IRQ %lx\n", ive, irq);
+ return -EPERM;
+ }
+
+ /* QEMU encoding of EQ index */
+ eq_idx = GETFIELD(IVE_EQ_INDEX, ive);
+ server = eq_idx >> 3;
+ priority = eq_idx & 0x7;
+
+ eisn = GETFIELD(IVE_EQ_DATA, ive);
+
+ return kvmppc_xive_native_set_source_config(xive, sb, state, server,
+ priority, eisn);
+}
+
+static int kvmppc_xive_native_get_ive(struct kvmppc_xive *xive, long irq,
+ u64 addr)
+{
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ u64 __user *ubufp = (u64 __user *) addr;
+ u16 src;
+ u64 ive;
+ u32 eq_idx;
+
+ pr_devel("%s irq=0x%lx\n", __func__, irq);
+
+ sb = kvmppc_xive_find_source(xive, irq, &src);
+ if (!sb)
+ return -ENOENT;
+
+ state = &sb->irq_state[src];
+
+ if (!state->valid)
+ return -ENOENT;
+
+ ive = IVE_VALID;
+
+ arch_spin_lock(&sb->lock);
+
+ if (state->act_priority == MASKED)
+ ive |= IVE_MASKED;
+ else {
+ /* QEMU encoding of EQ index */
+ eq_idx = ((state->act_server) << 3) |
+ ((state->act_priority) & 0x7);
+ ive |= SETFIELD(IVE_EQ_BLOCK, 0ul, 0ul) |
+ SETFIELD(IVE_EQ_INDEX, 0ul, eq_idx) |
+ SETFIELD(IVE_EQ_DATA, 0ul, state->eisn);
+ }
+ arch_spin_unlock(&sb->lock);
+
+ if (put_user(ive, ubufp))
+ return -EFAULT;
+
+ return 0;
+}
+
static int xive_native_esb_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
@@ -455,6 +770,8 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
case KVM_DEV_XIVE_GRP_SOURCES:
return kvmppc_xive_native_set_source(xive, attr->attr,
attr->addr);
+ case KVM_DEV_XIVE_GRP_IVE:
+ return kvmppc_xive_native_set_ive(xive, attr->attr, attr->addr);
case KVM_DEV_XIVE_GRP_CTRL:
switch (attr->attr) {
case KVM_DEV_XIVE_VC_BASE:
@@ -471,6 +788,8 @@ static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
struct kvmppc_xive *xive = dev->private;
switch (attr->group) {
+ case KVM_DEV_XIVE_GRP_IVE:
+ return kvmppc_xive_native_get_ive(xive, attr->attr, attr->addr);
case KVM_DEV_XIVE_GRP_CTRL:
switch (attr->attr) {
case KVM_DEV_XIVE_GET_ESB_FD:
@@ -490,6 +809,7 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
{
switch (attr->group) {
case KVM_DEV_XIVE_GRP_SOURCES:
+ case KVM_DEV_XIVE_GRP_IVE:
if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
attr->attr < KVMPPC_XIVE_NR_IRQS)
return 0;
States we need to capture are : - the IVE table defining the source targeting - the main interrupt management registers for each vCPU - the EQs. Also mark the EQ page dirty to make sure it is transferred. This is work in progress. We need to make sure the HW has reached a quiescence point. Signed-off-by: Cédric Le Goater <clg@kaod.org> --- arch/powerpc/include/asm/kvm_ppc.h | 10 ++ arch/powerpc/include/uapi/asm/kvm.h | 11 ++ arch/powerpc/kvm/book3s.c | 46 +++++ arch/powerpc/kvm/book3s_xive_native.c | 320 ++++++++++++++++++++++++++++++++++ 4 files changed, 387 insertions(+)