Patchwork [5/8] kvm tools: Add PPC64 XICS interrupt controller support

login
register
mail settings
Submitter Matt Evans
Date Dec. 6, 2011, 4:06 a.m.
Message ID <4EDD94C9.2010402@ozlabs.org>
Download mbox | patch
Permalink /patch/129528/
State New
Headers show

Comments

Matt Evans - Dec. 6, 2011, 4:06 a.m.
This patch adds XICS emulation code (heavily borrowed from QEMU), and wires
this into kvm_cpu__irq() to fire a CPU IRQ via KVM.  A device tree entry is
also added.  IPIs work, xics_alloc_irqnum() is added to allocate an external
IRQ (which will later be used by the PHB PCI code) and finally, kvm__irq_line()
can be called to raise an IRQ on XICS.

Signed-off-by: Matt Evans <matt@ozlabs.org>
---
 tools/kvm/Makefile                           |    1 +
 tools/kvm/powerpc/include/kvm/kvm-arch.h     |    1 +
 tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h |    2 +
 tools/kvm/powerpc/irq.c                      |   11 +-
 tools/kvm/powerpc/kvm-cpu.c                  |   10 +
 tools/kvm/powerpc/kvm.c                      |   25 +-
 tools/kvm/powerpc/xics.c                     |  529 ++++++++++++++++++++++++++
 tools/kvm/powerpc/xics.h                     |   23 ++
 8 files changed, 596 insertions(+), 6 deletions(-)
 create mode 100644 tools/kvm/powerpc/xics.c
 create mode 100644 tools/kvm/powerpc/xics.h

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 76cce3a..6ffffc8 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -131,6 +131,7 @@  ifeq ($(uname_M), ppc64)
 	OBJS	+= powerpc/spapr_hcall.o
 	OBJS	+= powerpc/spapr_rtas.o
 	OBJS	+= powerpc/spapr_hvcons.o
+	OBJS	+= powerpc/xics.o
 	ARCH_INCLUDE := powerpc/include
 	CFLAGS 	+= -m64
 	LIBS 	+= -lfdt
diff --git a/tools/kvm/powerpc/include/kvm/kvm-arch.h b/tools/kvm/powerpc/include/kvm/kvm-arch.h
index 722d01c..ae811e9 100644
--- a/tools/kvm/powerpc/include/kvm/kvm-arch.h
+++ b/tools/kvm/powerpc/include/kvm/kvm-arch.h
@@ -65,6 +65,7 @@  struct kvm {
 	unsigned long		initrd_gra;
 	unsigned long		initrd_size;
 	const char		*name;
+	struct icp_state	*icp;
 };
 
 #endif /* KVM__KVM_ARCH_H */
diff --git a/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h b/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h
index dbabc57..551307e 100644
--- a/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h
+++ b/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h
@@ -17,6 +17,8 @@ 
 
 #include <pthread.h>
 
+#define POWER7_EXT_IRQ	0
+
 struct kvm;
 
 struct kvm_cpu {
diff --git a/tools/kvm/powerpc/irq.c b/tools/kvm/powerpc/irq.c
index 46aa64f..80c972a 100644
--- a/tools/kvm/powerpc/irq.c
+++ b/tools/kvm/powerpc/irq.c
@@ -21,6 +21,10 @@ 
 #include <stddef.h>
 #include <stdlib.h>
 
+#include "xics.h"
+
+#define XICS_IRQS               1024
+
 int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line)
 {
 	fprintf(stderr, "irq__register_device(%d, [%d], [%d], [%d]\n",
@@ -30,7 +34,12 @@  int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line)
 
 void irq__init(struct kvm *kvm)
 {
-	fprintf(stderr, __func__);
+	/* kvm->nr_cpus is now valid; for /now/, pass
+	 * this to xics_system_init(), which assumes servers
+	 * are numbered 0..nrcpus.  This may not really be true,
+	 * but it is OK currently.
+	 */
+	kvm->icp = xics_system_init(XICS_IRQS, kvm->nrcpus);
 }
 
 int irq__add_msix_route(struct kvm *kvm, struct msi_msg *msg)
diff --git a/tools/kvm/powerpc/kvm-cpu.c b/tools/kvm/powerpc/kvm-cpu.c
index 71c648e..63cd106 100644
--- a/tools/kvm/powerpc/kvm-cpu.c
+++ b/tools/kvm/powerpc/kvm-cpu.c
@@ -15,6 +15,7 @@ 
 #include "kvm/kvm.h"
 
 #include "spapr.h"
+#include "xics.h"
 
 #include <sys/ioctl.h>
 #include <sys/mman.h>
@@ -107,6 +108,9 @@  struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id)
 	 */
 	vcpu->is_running = true;
 
+	/* Register with IRQ controller */
+	xics_cpu_register(vcpu);
+
 	return vcpu;
 }
 
@@ -151,6 +155,12 @@  void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu)
 /* kvm_cpu__irq - set KVM's IRQ flag on this vcpu */
 void kvm_cpu__irq(struct kvm_cpu *vcpu, int pin, int level)
 {
+	unsigned int virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
+
+	if (pin != POWER7_EXT_IRQ)
+		return;
+	if (ioctl(vcpu->vcpu_fd, KVM_INTERRUPT, &virq) < 0)
+		pr_warning("Could not KVM_INTERRUPT.");
 }
 
 bool kvm_cpu__handle_exit(struct kvm_cpu *vcpu)
diff --git a/tools/kvm/powerpc/kvm.c b/tools/kvm/powerpc/kvm.c
index 8614538..bfd7c3a 100644
--- a/tools/kvm/powerpc/kvm.c
+++ b/tools/kvm/powerpc/kvm.c
@@ -41,9 +41,13 @@ 
 
 #define HUGETLBFS_PATH "/var/lib/hugetlbfs/global/pagesize-16MB/"
 
+#define PHANDLE_XICP		0x00001111
+
 static char kern_cmdline[2048];
 
 struct kvm_ext kvm_req_ext[] = {
+	{ DEFINE_KVM_EXT(KVM_CAP_PPC_UNSET_IRQ) },
+	{ DEFINE_KVM_EXT(KVM_CAP_PPC_IRQ_LEVEL) },
 	{ 0, 0 }
 };
 
@@ -164,11 +168,6 @@  void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, const char *hugetlbfs_
 	spapr_hvcons_init();
 }
 
-void kvm__irq_line(struct kvm *kvm, int irq, int level)
-{
-	fprintf(stderr, "irq_line(%d, %d)\n", irq, level);
-}
-
 void kvm__irq_trigger(struct kvm *kvm, int irq)
 {
 	kvm__irq_line(kvm, irq, 1);
@@ -384,6 +383,22 @@  static void setup_fdt(struct kvm *kvm)
 	}
 	_FDT(fdt_end_node(fdt));
 
+	/* IRQ controller */
+	_FDT(fdt_begin_node(fdt, "interrupt-controller@0"));
+
+	_FDT(fdt_property_string(fdt, "device_type",
+				 "PowerPC-External-Interrupt-Presentation"));
+	_FDT(fdt_property_string(fdt, "compatible", "IBM,ppc-xicp"));
+	_FDT(fdt_property_cell(fdt, "reg", 0));
+	_FDT(fdt_property(fdt, "interrupt-controller", NULL, 0));
+	_FDT(fdt_property(fdt, "ibm,interrupt-server-ranges",
+			   interrupt_server_ranges_prop,
+			   sizeof(interrupt_server_ranges_prop)));
+	_FDT(fdt_property_cell(fdt, "#interrupt-cells", 2));
+	_FDT(fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP));
+	_FDT(fdt_property_cell(fdt, "phandle", PHANDLE_XICP));
+	_FDT(fdt_end_node(fdt));
+
 	/* VIO: See comment in linux,stdout-path; we don't yet represent a VIO
 	 * bus/address allocation so addresses are hardwired here.
 	 */
diff --git a/tools/kvm/powerpc/xics.c b/tools/kvm/powerpc/xics.c
new file mode 100644
index 0000000..4154ca8
--- /dev/null
+++ b/tools/kvm/powerpc/xics.c
@@ -0,0 +1,529 @@ 
+/*
+ * PAPR Virtualized Interrupt System, aka ICS/ICP aka xics
+ *
+ * Borrowed heavily from QEMU's xics.c,
+ * Copyright (c) 2010,2011 David Gibson, IBM Corporation.
+ *
+ * Modifications copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "spapr.h"
+#include "xics.h"
+#include "kvm/util.h"
+
+#include <stdio.h>
+#include <malloc.h>
+
+
+//#define DEBUG_XICS yes
+#ifdef DEBUG_XICS
+#define xics_dprintf(fmt, ...)					\
+	do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define xics_dprintf(fmt, ...)			\
+	do { } while (0)
+#endif
+
+/*
+ * ICP: Presentation layer
+ */
+
+struct icp_server_state {
+	uint32_t xirr;
+	uint8_t pending_priority;
+	uint8_t mfrr;
+	struct kvm_cpu *cpu;
+};
+
+#define XICS_IRQ_OFFSET 16
+#define XISR_MASK	0x00ffffff
+#define CPPR_MASK	0xff000000
+
+#define XISR(ss)   (((ss)->xirr) & XISR_MASK)
+#define CPPR(ss)   (((ss)->xirr) >> 24)
+
+struct ics_state;
+
+struct icp_state {
+	unsigned long nr_servers;
+	struct icp_server_state *ss;
+	struct ics_state *ics;
+};
+
+static void ics_reject(struct ics_state *ics, int nr);
+static void ics_resend(struct ics_state *ics);
+static void ics_eoi(struct ics_state *ics, int nr);
+
+static inline void cpu_irq_raise(struct kvm_cpu *vcpu)
+{
+	xics_dprintf("INT1[%p]\n", vcpu);
+	kvm_cpu__irq(vcpu, POWER7_EXT_IRQ, 1);
+}
+
+static inline void cpu_irq_lower(struct kvm_cpu *vcpu)
+{
+	xics_dprintf("INT0[%p]\n", vcpu);
+	kvm_cpu__irq(vcpu, POWER7_EXT_IRQ, 0);
+}
+
+static void icp_check_ipi(struct icp_state *icp, int server)
+{
+	struct icp_server_state *ss = icp->ss + server;
+
+	if (XISR(ss) && (ss->pending_priority <= ss->mfrr)) {
+		return;
+	}
+
+	if (XISR(ss)) {
+		ics_reject(icp->ics, XISR(ss));
+	}
+
+	ss->xirr = (ss->xirr & ~XISR_MASK) | XICS_IPI;
+	ss->pending_priority = ss->mfrr;
+	cpu_irq_raise(ss->cpu);
+}
+
+static void icp_resend(struct icp_state *icp, int server)
+{
+	struct icp_server_state *ss = icp->ss + server;
+
+	if (ss->mfrr < CPPR(ss)) {
+		icp_check_ipi(icp, server);
+	}
+	ics_resend(icp->ics);
+}
+
+static void icp_set_cppr(struct icp_state *icp, int server, uint8_t cppr)
+{
+	struct icp_server_state *ss = icp->ss + server;
+	uint8_t old_cppr;
+	uint32_t old_xisr;
+
+	old_cppr = CPPR(ss);
+	ss->xirr = (ss->xirr & ~CPPR_MASK) | (cppr << 24);
+
+	if (cppr < old_cppr) {
+		if (XISR(ss) && (cppr <= ss->pending_priority)) {
+			old_xisr = XISR(ss);
+			ss->xirr &= ~XISR_MASK; /* Clear XISR */
+			cpu_irq_lower(ss->cpu);
+			ics_reject(icp->ics, old_xisr);
+		}
+	} else {
+		if (!XISR(ss)) {
+			icp_resend(icp, server);
+		}
+	}
+}
+
+static void icp_set_mfrr(struct icp_state *icp, int nr, uint8_t mfrr)
+{
+	struct icp_server_state *ss = icp->ss + nr;
+
+	ss->mfrr = mfrr;
+	if (mfrr < CPPR(ss)) {
+		icp_check_ipi(icp, nr);
+	}
+}
+
+static uint32_t icp_accept(struct icp_server_state *ss)
+{
+	uint32_t xirr;
+
+	cpu_irq_lower(ss->cpu);
+	xirr = ss->xirr;
+	ss->xirr = ss->pending_priority << 24;
+	return xirr;
+}
+
+static void icp_eoi(struct icp_state *icp, int server, uint32_t xirr)
+{
+	struct icp_server_state *ss = icp->ss + server;
+
+	ics_eoi(icp->ics, xirr & XISR_MASK);
+	/* Send EOI -> ICS */
+	ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
+	if (!XISR(ss)) {
+		icp_resend(icp, server);
+	}
+}
+
+static void icp_irq(struct icp_state *icp, int server, int nr, uint8_t priority)
+{
+	struct icp_server_state *ss = icp->ss + server;
+	xics_dprintf("icp_irq(nr %d, server %d, prio 0x%x)\n", nr, server, priority);
+	if ((priority >= CPPR(ss))
+	    || (XISR(ss) && (ss->pending_priority <= priority))) {
+		xics_dprintf("reject %d, CPPR 0x%x, XISR 0x%x, pprio 0x%x, prio 0x%x\n",
+			     nr, CPPR(ss), XISR(ss), ss->pending_priority, priority);
+		ics_reject(icp->ics, nr);
+	} else {
+		if (XISR(ss)) {
+			xics_dprintf("reject %d, CPPR 0x%x, XISR 0x%x, pprio 0x%x, prio 0x%x\n",
+				     nr, CPPR(ss), XISR(ss), ss->pending_priority, priority);
+			ics_reject(icp->ics, XISR(ss));
+		}
+		ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK);
+		ss->pending_priority = priority;
+		cpu_irq_raise(ss->cpu);
+	}
+}
+
+/*
+ * ICS: Source layer
+ */
+
+struct ics_irq_state {
+	int server;
+	uint8_t priority;
+	uint8_t saved_priority;
+	/* int pending:1; */
+	/* int presented:1; */
+	int rejected:1;
+	int masked_pending:1;
+};
+
+struct ics_state {
+	unsigned int nr_irqs;
+	unsigned int offset;
+	struct ics_irq_state *irqs;
+	struct icp_state *icp;
+};
+
+static int ics_valid_irq(struct ics_state *ics, uint32_t nr)
+{
+	return (nr >= ics->offset)
+		&& (nr < (ics->offset + ics->nr_irqs));
+}
+
+static void ics_set_irq_msi(struct ics_state *ics, int srcno, int val)
+{
+	struct ics_irq_state *irq = ics->irqs + srcno;
+
+	if (val) {
+		if (irq->priority == 0xff) {
+			xics_dprintf(" irq pri ff, masked pending\n");
+			irq->masked_pending = 1;
+			/* masked pending */ ;
+		} else	{
+			icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority);
+		}
+	}
+}
+
+static void ics_reject_msi(struct ics_state *ics, int nr)
+{
+	struct ics_irq_state *irq = ics->irqs + nr - ics->offset;
+
+	irq->rejected = 1;
+}
+
+static void ics_resend_msi(struct ics_state *ics)
+{
+	unsigned int i;
+
+	for (i = 0; i < ics->nr_irqs; i++) {
+		struct ics_irq_state *irq = ics->irqs + i;
+
+		/* FIXME: filter by server#? */
+		if (irq->rejected) {
+			irq->rejected = 0;
+			if (irq->priority != 0xff) {
+				icp_irq(ics->icp, irq->server, i + ics->offset, irq->priority);
+			}
+		}
+	}
+}
+
+static void ics_write_xive_msi(struct ics_state *ics, int nr, int server,
+			       uint8_t priority)
+{
+	struct ics_irq_state *irq = ics->irqs + nr - ics->offset;
+
+	irq->server = server;
+	irq->priority = priority;
+	xics_dprintf("ics_write_xive_msi(nr %d, server %d, pri 0x%x)\n", nr, server, priority);
+
+	if (!irq->masked_pending || (priority == 0xff)) {
+		return;
+	}
+
+	irq->masked_pending = 0;
+	icp_irq(ics->icp, server, nr, priority);
+}
+
+static void ics_reject(struct ics_state *ics, int nr)
+{
+	ics_reject_msi(ics, nr);
+}
+
+static void ics_resend(struct ics_state *ics)
+{
+	ics_resend_msi(ics);
+}
+
+static void ics_eoi(struct ics_state *ics, int nr)
+{
+}
+
+/*
+ * Exported functions
+ */
+
+static int allocated_irqnum = XICS_IRQ_OFFSET;
+
+/* This is hacky.  The problem boils down to the PCI device code which just
+ * calls kvm__irq_line( .. pcidev->pci_hdr.irq_line ..) at will.  Each PCI
+ * device's IRQ line is allocated by irq__register_device() (which allocates an
+ * IRQ AND allocates a.. PCI device num..).
+ *
+ * In future I'd like to at least mimic some kind of 'upstream IRQ controller'
+ * whereby PCI devices let their PHB know when they want to IRQ, and that
+ * percolates up.
+ *
+ * For now, allocate a REAL xics irq number and (via irq__register_device) push
+ * that into the config space.	8 bits only though!
+ */
+int	xics_alloc_irqnum(void)
+{
+	int irq = allocated_irqnum++;
+
+	if (irq > 255)
+		die("Huge numbers of IRQs aren't supported with the daft kvmtool IRQ system.");
+
+	return irq;
+}
+
+static target_ulong h_cppr(struct kvm_cpu *vcpu,
+			   target_ulong opcode, target_ulong *args)
+{
+	target_ulong cppr = args[0];
+
+	xics_dprintf("h_cppr(%lx)\n", cppr);
+	icp_set_cppr(vcpu->kvm->icp, vcpu->cpu_id, cppr);
+	return H_SUCCESS;
+}
+
+static target_ulong h_ipi(struct kvm_cpu *vcpu,
+			  target_ulong opcode, target_ulong *args)
+{
+	target_ulong server = args[0];
+	target_ulong mfrr = args[1];
+
+	xics_dprintf("h_ipi(%lx, %lx)\n", server, mfrr);
+	if (server >= vcpu->kvm->icp->nr_servers) {
+		return H_PARAMETER;
+	}
+
+	icp_set_mfrr(vcpu->kvm->icp, server, mfrr);
+	return H_SUCCESS;
+}
+
+static target_ulong h_xirr(struct kvm_cpu *vcpu,
+			   target_ulong opcode, target_ulong *args)
+{
+	uint32_t xirr = icp_accept(vcpu->kvm->icp->ss + vcpu->cpu_id);
+
+	xics_dprintf("h_xirr() = %x\n", xirr);
+	args[0] = xirr;
+	return H_SUCCESS;
+}
+
+static target_ulong h_eoi(struct kvm_cpu *vcpu,
+			  target_ulong opcode, target_ulong *args)
+{
+	target_ulong xirr = args[0];
+
+	xics_dprintf("h_eoi(%lx)\n", xirr);
+	icp_eoi(vcpu->kvm->icp, vcpu->cpu_id, xirr);
+	return H_SUCCESS;
+}
+
+static void rtas_set_xive(struct kvm_cpu *vcpu, uint32_t token,
+			  uint32_t nargs, target_ulong args,
+			  uint32_t nret, target_ulong rets)
+{
+	struct ics_state *ics = vcpu->kvm->icp->ics;
+	uint32_t nr, server, priority;
+
+	if ((nargs != 3) || (nret != 1)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	nr = rtas_ld(vcpu->kvm, args, 0);
+	server = rtas_ld(vcpu->kvm, args, 1);
+	priority = rtas_ld(vcpu->kvm, args, 2);
+
+	xics_dprintf("rtas_set_xive(%x,%x,%x)\n", nr, server, priority);
+	if (!ics_valid_irq(ics, nr) || (server >= ics->icp->nr_servers)
+	    || (priority > 0xff)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	ics_write_xive_msi(ics, nr, server, priority);
+
+	rtas_st(vcpu->kvm, rets, 0, 0); /* Success */
+}
+
+static void rtas_get_xive(struct kvm_cpu *vcpu, uint32_t token,
+			  uint32_t nargs, target_ulong args,
+			  uint32_t nret, target_ulong rets)
+{
+	struct ics_state *ics = vcpu->kvm->icp->ics;
+	uint32_t nr;
+
+	if ((nargs != 1) || (nret != 3)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	nr = rtas_ld(vcpu->kvm, args, 0);
+
+	if (!ics_valid_irq(ics, nr)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	rtas_st(vcpu->kvm, rets, 0, 0); /* Success */
+	rtas_st(vcpu->kvm, rets, 1, ics->irqs[nr - ics->offset].server);
+	rtas_st(vcpu->kvm, rets, 2, ics->irqs[nr - ics->offset].priority);
+}
+
+static void rtas_int_off(struct kvm_cpu *vcpu, uint32_t token,
+			 uint32_t nargs, target_ulong args,
+			 uint32_t nret, target_ulong rets)
+{
+	struct ics_state *ics = vcpu->kvm->icp->ics;
+	uint32_t nr;
+
+	if ((nargs != 1) || (nret != 1)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	nr = rtas_ld(vcpu->kvm, args, 0);
+
+	if (!ics_valid_irq(ics, nr)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	/* This is a NOP for now, since the described PAPR semantics don't
+	 * seem to gel with what Linux does (ME: borrowed from QEMU)
+	 */
+#if 0
+	struct ics_irq_state *irq = xics->irqs + (nr - xics->offset);
+
+	irq->saved_priority = irq->priority;
+	ics_write_xive_msi(xics, nr, irq->server, 0xff);
+#endif
+
+	rtas_st(vcpu->kvm, rets, 0, 0); /* Success */
+}
+
+static void rtas_int_on(struct kvm_cpu *vcpu, uint32_t token,
+			uint32_t nargs, target_ulong args,
+			uint32_t nret, target_ulong rets)
+{
+	struct ics_state *ics = vcpu->kvm->icp->ics;
+	uint32_t nr;
+
+	if ((nargs != 1) || (nret != 1)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	nr = rtas_ld(vcpu->kvm, args, 0);
+
+	if (!ics_valid_irq(ics, nr)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	/* This is a NOP for now, since the described PAPR semantics don't
+	 * seem to gel with what Linux does (ME: borrowed from QEMU)
+	 */
+#if 0
+	struct ics_irq_state *irq = xics->irqs + (nr - xics->offset);
+
+	ics_write_xive_msi(xics, nr, irq->server, irq->saved_priority);
+#endif
+
+	rtas_st(vcpu->kvm, rets, 0, 0); /* Success */
+}
+
+void xics_cpu_register(struct kvm_cpu *vcpu)
+{
+	if (vcpu->cpu_id < vcpu->kvm->icp->nr_servers)
+		vcpu->kvm->icp->ss[vcpu->cpu_id].cpu = vcpu;
+	else
+		die("Setting invalid server for cpuid %ld\n", vcpu->cpu_id);
+}
+
+struct icp_state *xics_system_init(unsigned int nr_irqs, unsigned int nr_cpus)
+{
+	int max_server_num;
+	unsigned int i;
+	struct icp_state *icp;
+	struct ics_state *ics;
+
+	max_server_num = nr_cpus;
+
+	icp = malloc(sizeof(*icp));
+	icp->nr_servers = max_server_num + 1;
+	icp->ss = malloc(icp->nr_servers*sizeof(struct icp_server_state));
+
+	for (i = 0; i < icp->nr_servers; i++) {
+		icp->ss[i].xirr = 0;
+		icp->ss[i].pending_priority = 0;
+		icp->ss[i].cpu = 0;
+		icp->ss[i].mfrr = 0xff;
+	}
+
+	/* icp->ss[env->cpu_index].cpu is set by CPUs calling in to
+	 * xics_cpu_register().
+	 */
+
+	ics = malloc(sizeof(*ics));
+	ics->nr_irqs = nr_irqs;
+	ics->offset = XICS_IRQ_OFFSET;
+	ics->irqs = malloc(nr_irqs * sizeof(struct ics_irq_state));
+
+	icp->ics = ics;
+	ics->icp = icp;
+
+	for (i = 0; i < nr_irqs; i++) {
+		ics->irqs[i].server = 0;
+		ics->irqs[i].priority = 0xff;
+		ics->irqs[i].saved_priority = 0xff;
+		ics->irqs[i].rejected = 0;
+		ics->irqs[i].masked_pending = 0;
+	}
+
+	spapr_register_hypercall(H_CPPR, h_cppr);
+	spapr_register_hypercall(H_IPI, h_ipi);
+	spapr_register_hypercall(H_XIRR, h_xirr);
+	spapr_register_hypercall(H_EOI, h_eoi);
+
+	spapr_rtas_register("ibm,set-xive", rtas_set_xive);
+	spapr_rtas_register("ibm,get-xive", rtas_get_xive);
+	spapr_rtas_register("ibm,int-off", rtas_int_off);
+	spapr_rtas_register("ibm,int-on", rtas_int_on);
+
+	return icp;
+}
+
+void kvm__irq_line(struct kvm *kvm, int irq, int level)
+{
+	/* Route me to ICS, which routes to ICP, which eventually
+	 * does a kvm_cpu__irq(vcpu, POWER7_EXT_IRQ, 1)
+	 */
+	xics_dprintf("Raising IRQ %d -> %d\n", irq, level);
+	ics_set_irq_msi(kvm->icp->ics, irq - kvm->icp->ics->offset, level);
+}
diff --git a/tools/kvm/powerpc/xics.h b/tools/kvm/powerpc/xics.h
new file mode 100644
index 0000000..144915b
--- /dev/null
+++ b/tools/kvm/powerpc/xics.h
@@ -0,0 +1,23 @@ 
+/*
+ * PAPR Virtualized Interrupt System, aka ICS/ICP aka xics
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#ifndef XICS_H
+#define XICS_H
+
+#define XICS_IPI        0x2
+
+struct kvm_cpu;
+struct icp_state;
+
+struct icp_state *xics_system_init(unsigned int nr_irqs, unsigned int nr_cpus);
+void xics_cpu_register(struct kvm_cpu *vcpu);
+int xics_alloc_irqnum(void);
+
+#endif