Patchwork [V2,4/6] kvm tools: Add PPC64 XICS interrupt controller support

login
register
mail settings
Submitter Matt Evans
Date Dec. 13, 2011, 7:10 a.m.
Message ID <1323760250-13237-5-git-send-email-matt@ozlabs.org>
Download mbox | patch
Permalink /patch/131029/
State New
Headers show

Comments

Matt Evans - Dec. 13, 2011, 7:10 a.m.
This patch adds XICS emulation code (heavily borrowed from QEMU), and wires
this into kvm_cpu__irq() to fire a CPU IRQ via KVM.  A device tree entry is
also added.  IPIs work, xics_alloc_irqnum() is added to allocate an external
IRQ (which will later be used by the PHB PCI code) and finally, kvm__irq_line()
can be called to raise an IRQ on XICS.

Signed-off-by: Matt Evans <matt@ozlabs.org>
---
 tools/kvm/Makefile                           |    1 +
 tools/kvm/powerpc/include/kvm/kvm-arch.h     |    1 +
 tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h |    2 +
 tools/kvm/powerpc/irq.c                      |   17 +-
 tools/kvm/powerpc/kvm-cpu.c                  |   11 +
 tools/kvm/powerpc/kvm.c                      |   26 +-
 tools/kvm/powerpc/xics.c                     |  514 ++++++++++++++++++++++++++
 tools/kvm/powerpc/xics.h                     |   23 ++
 8 files changed, 589 insertions(+), 6 deletions(-)
 create mode 100644 tools/kvm/powerpc/xics.c
 create mode 100644 tools/kvm/powerpc/xics.h
David Gibson - Dec. 14, 2011, 2:35 a.m.
On Tue, Dec 13, 2011 at 06:10:48PM +1100, Matt Evans wrote:
> This patch adds XICS emulation code (heavily borrowed from QEMU), and wires
> this into kvm_cpu__irq() to fire a CPU IRQ via KVM.  A device tree entry is
> also added.  IPIs work, xics_alloc_irqnum() is added to allocate an external
> IRQ (which will later be used by the PHB PCI code) and finally, kvm__irq_line()
> can be called to raise an IRQ on XICS.\

Hrm, looks like you took a somewhat old version of xics.c from qemu.
It dangerously uses the same variable names for global irq numbers and
numbers local to one ics unit.  It used to have at least one bug
caused by confusing the two, which I'm not sure if you've also copied.
Matt Evans - Dec. 20, 2011, 1:16 a.m.
Hi David,

On 14/12/11 13:35, David Gibson wrote:
> On Tue, Dec 13, 2011 at 06:10:48PM +1100, Matt Evans wrote:
>> This patch adds XICS emulation code (heavily borrowed from QEMU), and wires
>> this into kvm_cpu__irq() to fire a CPU IRQ via KVM.  A device tree entry is
>> also added.  IPIs work, xics_alloc_irqnum() is added to allocate an external
>> IRQ (which will later be used by the PHB PCI code) and finally, kvm__irq_line()
>> can be called to raise an IRQ on XICS.\
> 
> Hrm, looks like you took a somewhat old version of xics.c from qemu.
> It dangerously uses the same variable names for global irq numbers and
> numbers local to one ics unit.  It used to have at least one bug
> caused by confusing the two, which I'm not sure if you've also copied.

Just had a look at the diffs between this and hw/xics.c from the master branch
in your qemu-impreza.git (which I based the kvmtool stuff on) and I can't see
anything standing out.

Is there a particular commit/patch/variable name you have in mind that I can
search for?


Thanks!


Matt

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Gibson - Dec. 21, 2011, 12:39 a.m.
On Tue, Dec 20, 2011 at 12:16:40PM +1100, Matt Evans wrote:
> Hi David,
> 
> On 14/12/11 13:35, David Gibson wrote:
> > On Tue, Dec 13, 2011 at 06:10:48PM +1100, Matt Evans wrote:
> >> This patch adds XICS emulation code (heavily borrowed from QEMU), and wires
> >> this into kvm_cpu__irq() to fire a CPU IRQ via KVM.  A device tree entry is
> >> also added.  IPIs work, xics_alloc_irqnum() is added to allocate an external
> >> IRQ (which will later be used by the PHB PCI code) and finally, kvm__irq_line()
> >> can be called to raise an IRQ on XICS.\
> > 
> > Hrm, looks like you took a somewhat old version of xics.c from qemu.
> > It dangerously uses the same variable names for global irq numbers and
> > numbers local to one ics unit.  It used to have at least one bug
> > caused by confusing the two, which I'm not sure if you've also copied.
> 
> Just had a look at the diffs between this and hw/xics.c from the master branch
> in your qemu-impreza.git (which I based the kvmtool stuff on) and I can't see
> anything standing out.
> 
> Is there a particular commit/patch/variable name you have in mind that I can
> search for?

Sorry, my mistake, I was looking in the wrong place.

Patch

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 0a576be..0d42acf 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -134,6 +134,7 @@  ifeq ($(uname_M), ppc64)
 	OBJS	+= powerpc/spapr_hcall.o
 	OBJS	+= powerpc/spapr_rtas.o
 	OBJS	+= powerpc/spapr_hvcons.o
+	OBJS	+= powerpc/xics.o
 	ARCH_INCLUDE := powerpc/include
 	CFLAGS 	+= -m64
 	LIBS 	+= -lfdt
diff --git a/tools/kvm/powerpc/include/kvm/kvm-arch.h b/tools/kvm/powerpc/include/kvm/kvm-arch.h
index 33a3827..e070c3f 100644
--- a/tools/kvm/powerpc/include/kvm/kvm-arch.h
+++ b/tools/kvm/powerpc/include/kvm/kvm-arch.h
@@ -67,6 +67,7 @@  struct kvm {
 	unsigned long		initrd_gra;
 	unsigned long		initrd_size;
 	const char		*name;
+	struct icp_state	*icp;
 };
 
 /* Helper for the various bits of code that generate FDT nodes */
diff --git a/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h b/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h
index 64e4510..c1c6539 100644
--- a/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h
+++ b/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h
@@ -36,6 +36,8 @@ 
 #define MSR_RI		(1UL<<1)
 #define MSR_LE		(1UL<<0)
 
+#define POWER7_EXT_IRQ	0
+
 struct kvm;
 
 struct kvm_cpu {
diff --git a/tools/kvm/powerpc/irq.c b/tools/kvm/powerpc/irq.c
index 46aa64f..a1047d4 100644
--- a/tools/kvm/powerpc/irq.c
+++ b/tools/kvm/powerpc/irq.c
@@ -21,6 +21,15 @@ 
 #include <stddef.h>
 #include <stdlib.h>
 
+#include "xics.h"
+
+#define XICS_IRQS               1024
+
+/*
+ * FIXME: The code in this file assumes an SPAPR guest, using XICS.  Make
+ * generic & cope with multiple PPC platform types.
+ */
+
 int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line)
 {
 	fprintf(stderr, "irq__register_device(%d, [%d], [%d], [%d]\n",
@@ -30,7 +39,13 @@  int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line)
 
 void irq__init(struct kvm *kvm)
 {
-	fprintf(stderr, __func__);
+	/*
+	 * kvm->nr_cpus is now valid; for /now/, pass
+	 * this to xics_system_init(), which assumes servers
+	 * are numbered 0..nrcpus.  This may not really be true,
+	 * but it is OK currently.
+	 */
+	kvm->icp = xics_system_init(XICS_IRQS, kvm->nrcpus);
 }
 
 int irq__add_msix_route(struct kvm *kvm, struct msi_msg *msg)
diff --git a/tools/kvm/powerpc/kvm-cpu.c b/tools/kvm/powerpc/kvm-cpu.c
index ef3db4d..5ef1cbf 100644
--- a/tools/kvm/powerpc/kvm-cpu.c
+++ b/tools/kvm/powerpc/kvm-cpu.c
@@ -15,6 +15,7 @@ 
 #include "kvm/kvm.h"
 
 #include "spapr.h"
+#include "xics.h"
 
 #include <sys/ioctl.h>
 #include <sys/mman.h>
@@ -89,6 +90,9 @@  struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id)
 	 */
 	vcpu->is_running = true;
 
+	/* Register with IRQ controller (FIXME, assumes XICS) */
+	xics_cpu_register(vcpu);
+
 	return vcpu;
 }
 
@@ -141,6 +145,13 @@  void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu)
 /* kvm_cpu__irq - set KVM's IRQ flag on this vcpu */
 void kvm_cpu__irq(struct kvm_cpu *vcpu, int pin, int level)
 {
+	unsigned int virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
+
+	/* FIXME: POWER-specific */
+	if (pin != POWER7_EXT_IRQ)
+		return;
+	if (ioctl(vcpu->vcpu_fd, KVM_INTERRUPT, &virq) < 0)
+		pr_warning("Could not KVM_INTERRUPT.");
 }
 
 void kvm_cpu__arch_nmi(struct kvm_cpu *cpu)
diff --git a/tools/kvm/powerpc/kvm.c b/tools/kvm/powerpc/kvm.c
index 06fccef..30443c7 100644
--- a/tools/kvm/powerpc/kvm.c
+++ b/tools/kvm/powerpc/kvm.c
@@ -39,9 +39,13 @@ 
 
 #define HUGETLBFS_PATH "/var/lib/hugetlbfs/global/pagesize-16MB/"
 
+#define PHANDLE_XICP		0x00001111
+
 static char kern_cmdline[2048];
 
 struct kvm_ext kvm_req_ext[] = {
+	{ DEFINE_KVM_EXT(KVM_CAP_PPC_UNSET_IRQ) },
+	{ DEFINE_KVM_EXT(KVM_CAP_PPC_IRQ_LEVEL) },
 	{ 0, 0 }
 };
 
@@ -118,11 +122,6 @@  void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, const char *hugetlbfs_
 	spapr_hvcons_init();
 }
 
-void kvm__irq_line(struct kvm *kvm, int irq, int level)
-{
-	fprintf(stderr, "irq_line(%d, %d)\n", irq, level);
-}
-
 void kvm__irq_trigger(struct kvm *kvm, int irq)
 {
 	kvm__irq_line(kvm, irq, 1);
@@ -207,6 +206,7 @@  static void setup_fdt(struct kvm *kvm)
 {
 	uint64_t 	mem_reg_property[] = { 0, cpu_to_be64(kvm->ram_size) };
 	int 		smp_cpus = kvm->nrcpus;
+	uint32_t	int_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
 	char 		hypertas_prop_kvm[] = "hcall-pft\0hcall-term\0"
 		"hcall-dabr\0hcall-interrupt\0hcall-tce\0hcall-vio\0"
 		"hcall-splpar\0hcall-bulk";
@@ -347,6 +347,22 @@  static void setup_fdt(struct kvm *kvm)
 	}
 	_FDT(fdt_end_node(fdt));
 
+	/* IRQ controller */
+	_FDT(fdt_begin_node(fdt, "interrupt-controller@0"));
+
+	_FDT(fdt_property_string(fdt, "device_type",
+				 "PowerPC-External-Interrupt-Presentation"));
+	_FDT(fdt_property_string(fdt, "compatible", "IBM,ppc-xicp"));
+	_FDT(fdt_property_cell(fdt, "reg", 0));
+	_FDT(fdt_property(fdt, "interrupt-controller", NULL, 0));
+	_FDT(fdt_property(fdt, "ibm,interrupt-server-ranges",
+			   int_server_ranges_prop,
+			   sizeof(int_server_ranges_prop)));
+	_FDT(fdt_property_cell(fdt, "#interrupt-cells", 2));
+	_FDT(fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP));
+	_FDT(fdt_property_cell(fdt, "phandle", PHANDLE_XICP));
+	_FDT(fdt_end_node(fdt));
+
 	/*
 	 * VIO: See comment in linux,stdout-path; we don't yet represent a VIO
 	 * bus/address allocation so addresses are hardwired here.
diff --git a/tools/kvm/powerpc/xics.c b/tools/kvm/powerpc/xics.c
new file mode 100644
index 0000000..2d70d3c
--- /dev/null
+++ b/tools/kvm/powerpc/xics.c
@@ -0,0 +1,514 @@ 
+/*
+ * PAPR Virtualized Interrupt System, aka ICS/ICP aka xics
+ *
+ * Borrowed heavily from QEMU's xics.c,
+ * Copyright (c) 2010,2011 David Gibson, IBM Corporation.
+ *
+ * Modifications copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "spapr.h"
+#include "xics.h"
+#include "kvm/util.h"
+
+#include <stdio.h>
+#include <malloc.h>
+
+
+/* #define DEBUG_XICS yes */
+#ifdef DEBUG_XICS
+#define xics_dprintf(fmt, ...)					\
+	do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define xics_dprintf(fmt, ...)			\
+	do { } while (0)
+#endif
+
+/*
+ * ICP: Presentation layer
+ */
+
+struct icp_server_state {
+	uint32_t xirr;
+	uint8_t pending_priority;
+	uint8_t mfrr;
+	struct kvm_cpu *cpu;
+};
+
+#define XICS_IRQ_OFFSET 16
+#define XISR_MASK	0x00ffffff
+#define CPPR_MASK	0xff000000
+
+#define XISR(ss)   (((ss)->xirr) & XISR_MASK)
+#define CPPR(ss)   (((ss)->xirr) >> 24)
+
+struct ics_state;
+
+struct icp_state {
+	unsigned long nr_servers;
+	struct icp_server_state *ss;
+	struct ics_state *ics;
+};
+
+static void ics_reject(struct ics_state *ics, int nr);
+static void ics_resend(struct ics_state *ics);
+static void ics_eoi(struct ics_state *ics, int nr);
+
+static inline void cpu_irq_raise(struct kvm_cpu *vcpu)
+{
+	xics_dprintf("INT1[%p]\n", vcpu);
+	kvm_cpu__irq(vcpu, POWER7_EXT_IRQ, 1);
+}
+
+static inline void cpu_irq_lower(struct kvm_cpu *vcpu)
+{
+	xics_dprintf("INT0[%p]\n", vcpu);
+	kvm_cpu__irq(vcpu, POWER7_EXT_IRQ, 0);
+}
+
+static void icp_check_ipi(struct icp_state *icp, int server)
+{
+	struct icp_server_state *ss = icp->ss + server;
+
+	if (XISR(ss) && (ss->pending_priority <= ss->mfrr)) {
+		return;
+	}
+
+	if (XISR(ss)) {
+		ics_reject(icp->ics, XISR(ss));
+	}
+
+	ss->xirr = (ss->xirr & ~XISR_MASK) | XICS_IPI;
+	ss->pending_priority = ss->mfrr;
+	cpu_irq_raise(ss->cpu);
+}
+
+static void icp_resend(struct icp_state *icp, int server)
+{
+	struct icp_server_state *ss = icp->ss + server;
+
+	if (ss->mfrr < CPPR(ss)) {
+		icp_check_ipi(icp, server);
+	}
+	ics_resend(icp->ics);
+}
+
+static void icp_set_cppr(struct icp_state *icp, int server, uint8_t cppr)
+{
+	struct icp_server_state *ss = icp->ss + server;
+	uint8_t old_cppr;
+	uint32_t old_xisr;
+
+	old_cppr = CPPR(ss);
+	ss->xirr = (ss->xirr & ~CPPR_MASK) | (cppr << 24);
+
+	if (cppr < old_cppr) {
+		if (XISR(ss) && (cppr <= ss->pending_priority)) {
+			old_xisr = XISR(ss);
+			ss->xirr &= ~XISR_MASK; /* Clear XISR */
+			cpu_irq_lower(ss->cpu);
+			ics_reject(icp->ics, old_xisr);
+		}
+	} else {
+		if (!XISR(ss)) {
+			icp_resend(icp, server);
+		}
+	}
+}
+
+static void icp_set_mfrr(struct icp_state *icp, int nr, uint8_t mfrr)
+{
+	struct icp_server_state *ss = icp->ss + nr;
+
+	ss->mfrr = mfrr;
+	if (mfrr < CPPR(ss)) {
+		icp_check_ipi(icp, nr);
+	}
+}
+
+static uint32_t icp_accept(struct icp_server_state *ss)
+{
+	uint32_t xirr;
+
+	cpu_irq_lower(ss->cpu);
+	xirr = ss->xirr;
+	ss->xirr = ss->pending_priority << 24;
+	return xirr;
+}
+
+static void icp_eoi(struct icp_state *icp, int server, uint32_t xirr)
+{
+	struct icp_server_state *ss = icp->ss + server;
+
+	ics_eoi(icp->ics, xirr & XISR_MASK);
+	/* Send EOI -> ICS */
+	ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
+	if (!XISR(ss)) {
+		icp_resend(icp, server);
+	}
+}
+
+static void icp_irq(struct icp_state *icp, int server, int nr, uint8_t priority)
+{
+	struct icp_server_state *ss = icp->ss + server;
+	xics_dprintf("icp_irq(nr %d, server %d, prio 0x%x)\n", nr, server, priority);
+	if ((priority >= CPPR(ss))
+	    || (XISR(ss) && (ss->pending_priority <= priority))) {
+		xics_dprintf("reject %d, CPPR 0x%x, XISR 0x%x, pprio 0x%x, prio 0x%x\n",
+			     nr, CPPR(ss), XISR(ss), ss->pending_priority, priority);
+		ics_reject(icp->ics, nr);
+	} else {
+		if (XISR(ss)) {
+			xics_dprintf("reject %d, CPPR 0x%x, XISR 0x%x, pprio 0x%x, prio 0x%x\n",
+				     nr, CPPR(ss), XISR(ss), ss->pending_priority, priority);
+			ics_reject(icp->ics, XISR(ss));
+		}
+		ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK);
+		ss->pending_priority = priority;
+		cpu_irq_raise(ss->cpu);
+	}
+}
+
+/*
+ * ICS: Source layer
+ */
+
+struct ics_irq_state {
+	int server;
+	uint8_t priority;
+	uint8_t saved_priority;
+	int rejected:1;
+	int masked_pending:1;
+};
+
+struct ics_state {
+	unsigned int nr_irqs;
+	unsigned int offset;
+	struct ics_irq_state *irqs;
+	struct icp_state *icp;
+};
+
+static int ics_valid_irq(struct ics_state *ics, uint32_t nr)
+{
+	return (nr >= ics->offset)
+		&& (nr < (ics->offset + ics->nr_irqs));
+}
+
+static void ics_set_irq_msi(struct ics_state *ics, int srcno, int val)
+{
+	struct ics_irq_state *irq = ics->irqs + srcno;
+
+	if (val) {
+		if (irq->priority == 0xff) {
+			xics_dprintf(" irq pri ff, masked pending\n");
+			irq->masked_pending = 1;
+		} else	{
+			icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority);
+		}
+	}
+}
+
+static void ics_reject_msi(struct ics_state *ics, int nr)
+{
+	struct ics_irq_state *irq = ics->irqs + nr - ics->offset;
+
+	irq->rejected = 1;
+}
+
+static void ics_resend_msi(struct ics_state *ics)
+{
+	unsigned int i;
+
+	for (i = 0; i < ics->nr_irqs; i++) {
+		struct ics_irq_state *irq = ics->irqs + i;
+
+		/* FIXME: filter by server#? */
+		if (irq->rejected) {
+			irq->rejected = 0;
+			if (irq->priority != 0xff) {
+				icp_irq(ics->icp, irq->server, i + ics->offset, irq->priority);
+			}
+		}
+	}
+}
+
+static void ics_write_xive_msi(struct ics_state *ics, int nr, int server,
+			       uint8_t priority)
+{
+	struct ics_irq_state *irq = ics->irqs + nr - ics->offset;
+
+	irq->server = server;
+	irq->priority = priority;
+	xics_dprintf("ics_write_xive_msi(nr %d, server %d, pri 0x%x)\n", nr, server, priority);
+
+	if (!irq->masked_pending || (priority == 0xff)) {
+		return;
+	}
+
+	irq->masked_pending = 0;
+	icp_irq(ics->icp, server, nr, priority);
+}
+
+static void ics_reject(struct ics_state *ics, int nr)
+{
+	ics_reject_msi(ics, nr);
+}
+
+static void ics_resend(struct ics_state *ics)
+{
+	ics_resend_msi(ics);
+}
+
+static void ics_eoi(struct ics_state *ics, int nr)
+{
+}
+
+/*
+ * Exported functions
+ */
+
+static int allocated_irqnum = XICS_IRQ_OFFSET;
+
+/*
+ * xics_alloc_irqnum(): This is hacky.  The problem boils down to the PCI device
+ * code which just calls kvm__irq_line( .. pcidev->pci_hdr.irq_line ..) at will.
+ * Each PCI device's IRQ line is allocated by irq__register_device() (which
+ * allocates an IRQ AND allocates a.. PCI device num..).
+ *
+ * In future I'd like to at least mimic some kind of 'upstream IRQ controller'
+ * whereby PCI devices let their PHB know when they want to IRQ, and that
+ * percolates up.
+ *
+ * For now, allocate a REAL xics irq number and (via irq__register_device) push
+ * that into the config space.	8 bits only though!
+ */
+int xics_alloc_irqnum(void)
+{
+	int irq = allocated_irqnum++;
+
+	if (irq > 255)
+		die("Huge numbers of IRQs aren't supported with the daft kvmtool IRQ system.");
+
+	return irq;
+}
+
+static target_ulong h_cppr(struct kvm_cpu *vcpu,
+			   target_ulong opcode, target_ulong *args)
+{
+	target_ulong cppr = args[0];
+
+	xics_dprintf("h_cppr(%lx)\n", cppr);
+	icp_set_cppr(vcpu->kvm->icp, vcpu->cpu_id, cppr);
+	return H_SUCCESS;
+}
+
+static target_ulong h_ipi(struct kvm_cpu *vcpu,
+			  target_ulong opcode, target_ulong *args)
+{
+	target_ulong server = args[0];
+	target_ulong mfrr = args[1];
+
+	xics_dprintf("h_ipi(%lx, %lx)\n", server, mfrr);
+	if (server >= vcpu->kvm->icp->nr_servers) {
+		return H_PARAMETER;
+	}
+
+	icp_set_mfrr(vcpu->kvm->icp, server, mfrr);
+	return H_SUCCESS;
+}
+
+static target_ulong h_xirr(struct kvm_cpu *vcpu,
+			   target_ulong opcode, target_ulong *args)
+{
+	uint32_t xirr = icp_accept(vcpu->kvm->icp->ss + vcpu->cpu_id);
+
+	xics_dprintf("h_xirr() = %x\n", xirr);
+	args[0] = xirr;
+	return H_SUCCESS;
+}
+
+static target_ulong h_eoi(struct kvm_cpu *vcpu,
+			  target_ulong opcode, target_ulong *args)
+{
+	target_ulong xirr = args[0];
+
+	xics_dprintf("h_eoi(%lx)\n", xirr);
+	icp_eoi(vcpu->kvm->icp, vcpu->cpu_id, xirr);
+	return H_SUCCESS;
+}
+
+static void rtas_set_xive(struct kvm_cpu *vcpu, uint32_t token,
+			  uint32_t nargs, target_ulong args,
+			  uint32_t nret, target_ulong rets)
+{
+	struct ics_state *ics = vcpu->kvm->icp->ics;
+	uint32_t nr, server, priority;
+
+	if ((nargs != 3) || (nret != 1)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	nr = rtas_ld(vcpu->kvm, args, 0);
+	server = rtas_ld(vcpu->kvm, args, 1);
+	priority = rtas_ld(vcpu->kvm, args, 2);
+
+	xics_dprintf("rtas_set_xive(%x,%x,%x)\n", nr, server, priority);
+	if (!ics_valid_irq(ics, nr) || (server >= ics->icp->nr_servers)
+	    || (priority > 0xff)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	ics_write_xive_msi(ics, nr, server, priority);
+
+	rtas_st(vcpu->kvm, rets, 0, 0); /* Success */
+}
+
+static void rtas_get_xive(struct kvm_cpu *vcpu, uint32_t token,
+			  uint32_t nargs, target_ulong args,
+			  uint32_t nret, target_ulong rets)
+{
+	struct ics_state *ics = vcpu->kvm->icp->ics;
+	uint32_t nr;
+
+	if ((nargs != 1) || (nret != 3)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	nr = rtas_ld(vcpu->kvm, args, 0);
+
+	if (!ics_valid_irq(ics, nr)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	rtas_st(vcpu->kvm, rets, 0, 0); /* Success */
+	rtas_st(vcpu->kvm, rets, 1, ics->irqs[nr - ics->offset].server);
+	rtas_st(vcpu->kvm, rets, 2, ics->irqs[nr - ics->offset].priority);
+}
+
+static void rtas_int_off(struct kvm_cpu *vcpu, uint32_t token,
+			 uint32_t nargs, target_ulong args,
+			 uint32_t nret, target_ulong rets)
+{
+	struct ics_state *ics = vcpu->kvm->icp->ics;
+	uint32_t nr;
+
+	if ((nargs != 1) || (nret != 1)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	nr = rtas_ld(vcpu->kvm, args, 0);
+
+	if (!ics_valid_irq(ics, nr)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	/* ME: QEMU wrote xive_msi here, in #if 0.  Deleted. */
+
+	rtas_st(vcpu->kvm, rets, 0, 0); /* Success */
+}
+
+static void rtas_int_on(struct kvm_cpu *vcpu, uint32_t token,
+			uint32_t nargs, target_ulong args,
+			uint32_t nret, target_ulong rets)
+{
+	struct ics_state *ics = vcpu->kvm->icp->ics;
+	uint32_t nr;
+
+	if ((nargs != 1) || (nret != 1)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	nr = rtas_ld(vcpu->kvm, args, 0);
+
+	if (!ics_valid_irq(ics, nr)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	/* ME: QEMU wrote xive_msi here, in #if 0.  Deleted. */
+
+	rtas_st(vcpu->kvm, rets, 0, 0); /* Success */
+}
+
+void xics_cpu_register(struct kvm_cpu *vcpu)
+{
+	if (vcpu->cpu_id < vcpu->kvm->icp->nr_servers)
+		vcpu->kvm->icp->ss[vcpu->cpu_id].cpu = vcpu;
+	else
+		die("Setting invalid server for cpuid %ld\n", vcpu->cpu_id);
+}
+
+struct icp_state *xics_system_init(unsigned int nr_irqs, unsigned int nr_cpus)
+{
+	int max_server_num;
+	unsigned int i;
+	struct icp_state *icp;
+	struct ics_state *ics;
+
+	max_server_num = nr_cpus;
+
+	icp = malloc(sizeof(*icp));
+	icp->nr_servers = max_server_num + 1;
+	icp->ss = malloc(icp->nr_servers*sizeof(struct icp_server_state));
+
+	for (i = 0; i < icp->nr_servers; i++) {
+		icp->ss[i].xirr = 0;
+		icp->ss[i].pending_priority = 0;
+		icp->ss[i].cpu = 0;
+		icp->ss[i].mfrr = 0xff;
+	}
+
+	/*
+	 * icp->ss[env->cpu_index].cpu is set by CPUs calling in to
+	 * xics_cpu_register().
+	 */
+
+	ics = malloc(sizeof(*ics));
+	ics->nr_irqs = nr_irqs;
+	ics->offset = XICS_IRQ_OFFSET;
+	ics->irqs = malloc(nr_irqs * sizeof(struct ics_irq_state));
+
+	icp->ics = ics;
+	ics->icp = icp;
+
+	for (i = 0; i < nr_irqs; i++) {
+		ics->irqs[i].server = 0;
+		ics->irqs[i].priority = 0xff;
+		ics->irqs[i].saved_priority = 0xff;
+		ics->irqs[i].rejected = 0;
+		ics->irqs[i].masked_pending = 0;
+	}
+
+	spapr_register_hypercall(H_CPPR, h_cppr);
+	spapr_register_hypercall(H_IPI, h_ipi);
+	spapr_register_hypercall(H_XIRR, h_xirr);
+	spapr_register_hypercall(H_EOI, h_eoi);
+
+	spapr_rtas_register("ibm,set-xive", rtas_set_xive);
+	spapr_rtas_register("ibm,get-xive", rtas_get_xive);
+	spapr_rtas_register("ibm,int-off", rtas_int_off);
+	spapr_rtas_register("ibm,int-on", rtas_int_on);
+
+	return icp;
+}
+
+void kvm__irq_line(struct kvm *kvm, int irq, int level)
+{
+	/*
+	 * Route event to ICS, which routes to ICP, which eventually does a
+	 * kvm_cpu__irq(vcpu, POWER7_EXT_IRQ, 1)
+	 */
+	xics_dprintf("Raising IRQ %d -> %d\n", irq, level);
+	ics_set_irq_msi(kvm->icp->ics, irq - kvm->icp->ics->offset, level);
+}
diff --git a/tools/kvm/powerpc/xics.h b/tools/kvm/powerpc/xics.h
new file mode 100644
index 0000000..144915b
--- /dev/null
+++ b/tools/kvm/powerpc/xics.h
@@ -0,0 +1,23 @@ 
+/*
+ * PAPR Virtualized Interrupt System, aka ICS/ICP aka xics
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#ifndef XICS_H
+#define XICS_H
+
+#define XICS_IPI        0x2
+
+struct kvm_cpu;
+struct icp_state;
+
+struct icp_state *xics_system_init(unsigned int nr_irqs, unsigned int nr_cpus);
+void xics_cpu_register(struct kvm_cpu *vcpu);
+int xics_alloc_irqnum(void);
+
+#endif