[RFC,05/16] KVM: PPC: Book3S HV: add a new KVM device for the XIVE native exploitation mode

Message ID 20180423164341.15767-6-clg@kaod.org
State New
Headers show
Series
  • KVM: PPC: Book3S HV: add XIVE native exploitation mode
Related show

Commit Message

Cédric Le Goater April 23, 2018, 4:43 p.m.
This is the basic framework for the new KVM device supporting the XIVE
native exploitation mode. The user interface exposes a new capability
and a new KVM device defines to be used by QEMU.

Internally, the interface to the new KVM device is protected with a
new interrupt mode: KVMPPC_IRQ_XIVE.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/include/asm/kvm_host.h   |   2 +
 arch/powerpc/include/asm/kvm_ppc.h    |  21 +++
 arch/powerpc/kvm/Makefile             |   2 +-
 arch/powerpc/kvm/book3s.c             |   7 +-
 arch/powerpc/kvm/book3s_xive.h        |   3 +
 arch/powerpc/kvm/book3s_xive_native.c | 338 ++++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/powerpc.c            |  30 +++
 include/uapi/linux/kvm.h              |   3 +
 8 files changed, 404 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_xive_native.c

Patch

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 6b69d7999381..ab9268ca4ca3 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -211,6 +211,7 @@  extern struct kvm_device_ops kvm_xics_ops;
 struct kvmppc_xive;
 struct kvmppc_xive_vcpu;
 extern struct kvm_device_ops kvm_xive_ops;
+extern struct kvm_device_ops kvm_xive_native_ops;
 
 struct kvmppc_passthru_irqmap;
 
@@ -431,6 +432,7 @@  struct kvmppc_passthru_irqmap {
 #define KVMPPC_IRQ_DEFAULT	0
 #define KVMPPC_IRQ_MPIC		1
 #define KVMPPC_IRQ_XICS		2 /* Includes a XIVE option */
+#define KVMPPC_IRQ_XIVE		3 /* XIVE native exploitation mode */
 
 #define MMIO_HPTE_CACHE_SIZE	4
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 23cfaef9544e..bacbe4aaa357 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -582,6 +582,18 @@  extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
 
 extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
 			       int level, bool line_status);
+
+static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.irq_type == KVMPPC_IRQ_XIVE;
+}
+
+extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+				    struct kvm_vcpu *vcpu, u32 cpu);
+extern int kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_xive_native_init_module(void);
+extern void kvmppc_xive_native_exit_module(void);
+
 #else
 static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
 				       u32 priority) { return -1; }
@@ -604,6 +616,15 @@  static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur
 
 static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
 				      int level, bool line_status) { return -ENODEV; }
+
+static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
+	{ return 0; }
+static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+						  struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY}
+static inline int kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { return 0; }
+static inline void kvmppc_xive_native_init_module(void) { }
+static inline void kvmppc_xive_native_exit_module(void) { }
+
 #endif /* CONFIG_KVM_XIVE */
 
 /*
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 85ba80de7133..a9cc91b528bf 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -90,7 +90,7 @@  endif
 kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
 	book3s_xics.o
 
-kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o
+kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o book3s_xive_native.o
 kvm-book3s_64-objs-$(CONFIG_SPAPR_TCE_IOMMU) += book3s_64_vio.o
 
 kvm-book3s_64-module-objs := \
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 97d4a112648f..19c0187cada3 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -1021,7 +1021,10 @@  static int kvmppc_book3s_init(void)
 #ifdef CONFIG_KVM_XIVE
 	if (xive_enabled()) {
 		kvmppc_xive_init_module();
+		kvmppc_xive_native_init_module();
 		kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
+		kvm_register_device_ops(&kvm_xive_native_ops,
+					KVM_DEV_TYPE_XIVE);
 	} else
 #endif
 		kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
@@ -1032,8 +1035,10 @@  static int kvmppc_book3s_init(void)
 static void kvmppc_book3s_exit(void)
 {
 #ifdef CONFIG_KVM_XICS
-	if (xive_enabled())
+	if (xive_enabled()) {
 		kvmppc_xive_exit_module();
+		kvmppc_xive_native_exit_module();
+	}
 #endif
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
 	kvmppc_book3s_exit_pr();
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index 10c4aa5cd010..5f22415520b4 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -12,6 +12,9 @@ 
 #ifdef CONFIG_KVM_XICS
 #include "book3s_xics.h"
 
+#define KVMPPC_XIVE_FIRST_IRQ	0
+#define KVMPPC_XIVE_NR_IRQS	KVMPPC_XICS_NR_IRQS
+
 /*
  * State for one guest irq source.
  *
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
new file mode 100644
index 000000000000..58f272f56c99
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -0,0 +1,338 @@ 
+/*
+ * Copyright (c) 2017-2018, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "xive-kvm: " fmt
+
+#include <linux/anon_inodes.h>
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/percpu.h>
+#include <linux/cpumask.h>
+#include <asm/uaccess.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xics.h>
+#include <asm/xive.h>
+#include <asm/xive-regs.h>
+#include <asm/debug.h>
+#include <asm/debugfs.h>
+#include <asm/time.h>
+#include <asm/opal.h>
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "book3s_xive.h"
+
+static void xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct xive_q *q = &xc->queues[prio];
+
+	xive_native_disable_queue(xc->vp_id, q, prio);
+	if (q->qpage) {
+		put_page(virt_to_page(q->qpage));
+		q->qpage = NULL;
+	}
+}
+
+int kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	int i;
+
+	pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num);
+
+	if (!kvmppc_xive_enabled(vcpu))
+		return -EPERM;
+
+	if (!xc)
+		return -ENOENT;
+
+	/* Ensure no interrupt is still routed to that VP */
+	xc->valid = false;
+	kvmppc_xive_disable_vcpu_interrupts(vcpu);
+
+	/* Disable the VP */
+	xive_native_disable_vp(xc->vp_id);
+
+	/* Free the queues & associated interrupts */
+	for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+		/* Free the escalation irq */
+		if (xc->esc_virq[i]) {
+			free_irq(xc->esc_virq[i], vcpu);
+			irq_dispose_mapping(xc->esc_virq[i]);
+			kfree(xc->esc_virq_names[i]);
+			xc->esc_virq[i] = 0;
+		}
+
+		/* Free the queue */
+		xive_native_cleanup_queue(vcpu, i);
+	}
+
+	/* Free the VP */
+	kfree(xc);
+
+	/* Cleanup the vcpu */
+	vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
+	vcpu->arch.xive_vcpu = NULL;
+	return 0;
+}
+
+int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+				    struct kvm_vcpu *vcpu, u32 cpu)
+{
+	struct kvmppc_xive *xive = dev->private;
+	struct kvmppc_xive_vcpu *xc;
+	int rc;
+
+	pr_devel("native_connect_vcpu(cpu=%d)\n", cpu);
+
+	if (dev->ops != &kvm_xive_native_ops) {
+		pr_devel("Wrong ops !\n");
+		return -EPERM;
+	}
+	if (xive->kvm != vcpu->kvm)
+		return -EPERM;
+	if (vcpu->arch.irq_type)
+		return -EBUSY;
+	if (kvmppc_xive_find_server(vcpu->kvm, cpu)) {
+		pr_devel("Duplicate !\n");
+		return -EEXIST;
+	}
+	if (cpu >= KVM_MAX_VCPUS) {
+		pr_devel("Out of bounds !\n");
+		return -EINVAL;
+	}
+	xc = kzalloc(sizeof(*xc), GFP_KERNEL);
+	if (!xc)
+		return -ENOMEM;
+
+	mutex_lock(&vcpu->kvm->lock);
+	vcpu->arch.xive_vcpu = xc;
+	xc->xive = xive;
+	xc->vcpu = vcpu;
+	xc->server_num = cpu;
+	xc->vp_id = xive->vp_base + cpu;
+	xc->valid = true;
+
+	rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
+	if (rc) {
+		pr_err("Failed to get VP info from OPAL: %d\n", rc);
+		goto bail;
+	}
+
+	/*
+	 * Enable the VP first as the single escalation mode will
+	 * affect escalation interrupts numbering
+	 */
+	rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+	if (rc) {
+		pr_err("Failed to enable VP in OPAL: %d\n", rc);
+		goto bail;
+	}
+
+	/* Configure VCPU fields for use by assembly push/pull */
+	vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
+	vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
+
+	/* TODO: reset queues ? */
+
+bail:
+	vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
+	mutex_unlock(&vcpu->kvm->lock);
+	if (rc)
+		kvmppc_xive_native_cleanup_vcpu(vcpu);
+
+	return rc;
+}
+
+static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
+				       struct kvm_device_attr *attr)
+{
+	return -ENXIO;
+}
+
+static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
+				       struct kvm_device_attr *attr)
+{
+	return -ENXIO;
+}
+
+static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
+				       struct kvm_device_attr *attr)
+{
+	return -ENXIO;
+}
+
+static void kvmppc_xive_native_free(struct kvm_device *dev)
+{
+	struct kvmppc_xive *xive = dev->private;
+	struct kvm *kvm = xive->kvm;
+	int i;
+
+	debugfs_remove(xive->dentry);
+
+	pr_devel("Destroying xive native for partition\n");
+
+	if (kvm)
+		kvm->arch.xive = NULL;
+
+	/* Mask and free interrupts */
+	for (i = 0; i <= xive->max_sbid; i++) {
+		if (xive->src_blocks[i])
+			kvmppc_xive_free_sources(xive->src_blocks[i]);
+		kfree(xive->src_blocks[i]);
+		xive->src_blocks[i] = NULL;
+	}
+
+	if (xive->vp_base != XIVE_INVALID_VP)
+		xive_native_free_vp_block(xive->vp_base);
+
+	kfree(xive);
+	kfree(dev);
+}
+
+static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
+{
+	struct kvmppc_xive *xive;
+	struct kvm *kvm = dev->kvm;
+	int ret = 0;
+
+	pr_devel("Creating xive native for partition\n");
+
+	if (kvm->arch.xive)
+		return -EEXIST;
+
+	xive = kzalloc(sizeof(*xive), GFP_KERNEL);
+	if (!xive)
+		return -ENOMEM;
+
+	dev->private = xive;
+	xive->dev = dev;
+	xive->kvm = kvm;
+	kvm->arch.xive = xive;
+
+	/* We use the default queue size set by the host */
+	xive->q_order = xive_native_default_eq_shift();
+	if (xive->q_order < PAGE_SHIFT)
+		xive->q_page_order = 0;
+	else
+		xive->q_page_order = xive->q_order - PAGE_SHIFT;
+
+	/* Allocate a bunch of VPs */
+	xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
+	pr_devel("VP_Base=%x\n", xive->vp_base);
+
+	if (xive->vp_base == XIVE_INVALID_VP)
+		ret = -ENOMEM;
+
+	xive->single_escalation = xive_native_has_single_escalation();
+
+	if (ret)
+		kfree(xive);
+
+	return ret;
+}
+
+static int xive_native_debug_show(struct seq_file *m, void *private)
+{
+	struct kvmppc_xive *xive = m->private;
+	struct kvm *kvm = xive->kvm;
+	struct kvm_vcpu *vcpu;
+	unsigned int i;
+
+	if (!kvm)
+		return 0;
+
+	seq_puts(m, "=========\nVCPU state\n=========\n");
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+		if (!xc)
+			continue;
+
+		seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
+			   xc->server_num,
+			   vcpu->arch.xive_saved_state.nsr,
+			   vcpu->arch.xive_saved_state.cppr,
+			   vcpu->arch.xive_saved_state.ipb,
+			   vcpu->arch.xive_saved_state.pipr,
+			   vcpu->arch.xive_saved_state.w01,
+			   (u32) vcpu->arch.xive_cam_word);
+
+		kvmppc_xive_debug_show_queues(m, vcpu);
+
+		/* TODO: hcalls statistics */
+	}
+
+	return 0;
+}
+
+static int xive_native_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, xive_native_debug_show, inode->i_private);
+}
+
+static const struct file_operations xive_native_debug_fops = {
+	.open = xive_native_debug_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static void xive_native_debugfs_init(struct kvmppc_xive *xive)
+{
+	char *name;
+
+	name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive);
+	if (!name) {
+		pr_err("%s: no memory for name\n", __func__);
+		return;
+	}
+
+	xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
+					   xive, &xive_native_debug_fops);
+
+	pr_debug("%s: created %s\n", __func__, name);
+	kfree(name);
+}
+
+static void kvmppc_xive_native_init(struct kvm_device *dev)
+{
+	struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private;
+
+	/* Register some debug interfaces */
+	xive_native_debugfs_init(xive);
+}
+
+struct kvm_device_ops kvm_xive_native_ops = {
+	.name = "kvm-xive-native",
+	.create = kvmppc_xive_native_create,
+	.init = kvmppc_xive_native_init,
+	.destroy = kvmppc_xive_native_free,
+	.set_attr = kvmppc_xive_native_set_attr,
+	.get_attr = kvmppc_xive_native_get_attr,
+	.has_attr = kvmppc_xive_native_has_attr,
+};
+
+void kvmppc_xive_native_init_module(void)
+{
+	;
+}
+
+void kvmppc_xive_native_exit_module(void)
+{
+	;
+}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 52c205373986..42eb75e43d6e 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -552,6 +552,9 @@  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 #ifdef CONFIG_KVM_XICS
 	case KVM_CAP_IRQ_XICS:
 #endif
+#ifdef CONFIG_KVM_XIVE
+	case KVM_CAP_PPC_IRQ_XIVE:
+#endif
 	case KVM_CAP_PPC_GET_CPU_CHAR:
 		r = 1;
 		break;
@@ -731,6 +734,9 @@  void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 		else
 			kvmppc_xics_free_icp(vcpu);
 		break;
+	case KVMPPC_IRQ_XIVE:
+		kvmppc_xive_native_cleanup_vcpu(vcpu);
+		break;
 	}
 
 	kvmppc_core_vcpu_free(vcpu);
@@ -1714,6 +1720,30 @@  static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 		break;
 	}
 #endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+	case KVM_CAP_PPC_IRQ_XIVE: {
+		struct fd f;
+		struct kvm_device *dev;
+
+		r = -EBADF;
+		f = fdget(cap->args[0]);
+		if (!f.file)
+			break;
+
+		r = -ENXIO;
+		if (!xive_enabled())
+			break;
+
+		r = -EPERM;
+		dev = kvm_device_from_filp(f.file);
+		if (dev)
+			r = kvmppc_xive_native_connect_vcpu(dev, vcpu,
+							    cap->args[1]);
+
+		fdput(f);
+		break;
+	}
+#endif /* CONFIG_KVM_XIVE */
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	case KVM_CAP_PPC_FWNMI:
 		r = -EINVAL;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 7b26d4b0b052..cda8b85f4849 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -936,6 +936,7 @@  struct kvm_ppc_resize_hpt {
 #define KVM_CAP_PPC_GET_CPU_CHAR 151
 #define KVM_CAP_S390_BPB 152
 #define KVM_CAP_GET_MSR_FEATURES 153
+#define KVM_CAP_PPC_IRQ_XIVE 154
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1159,6 +1160,8 @@  enum kvm_device_type {
 #define KVM_DEV_TYPE_ARM_VGIC_V3	KVM_DEV_TYPE_ARM_VGIC_V3
 	KVM_DEV_TYPE_ARM_VGIC_ITS,
 #define KVM_DEV_TYPE_ARM_VGIC_ITS	KVM_DEV_TYPE_ARM_VGIC_ITS
+	KVM_DEV_TYPE_XIVE,
+#define KVM_DEV_TYPE_XIVE		KVM_DEV_TYPE_XIVE
 	KVM_DEV_TYPE_MAX,
 };