diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index d3e2d60..0ff9dcf 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2141,6 +2141,57 @@ associated with the service will be forgotten, and subsequent RTAS
 calls by the guest for that service will be passed to userspace to be
 handled.
 
+4.80 KVM_CREATE_IRQCHIP_ARGS
+
+Capability: KVM_CAP_IRQCHIP_ARGS
+Architectures: ppc
+Type: vm ioctl
+Parameters: struct kvm_irqchip_args
+Returns: 0 on success, -1 on error
+
+Creates an interrupt controller model in the kernel.  The type field
+of the argument struct indicates the interrupt controller architecture
+of the virtual machine.  Currently the only value permitted for the
+type field is 1, indicating the XICS (eXternal Interrupt Controller
+Specification) model defined in PAPR.  For XICS, this ioctl indicates
+to the kernel that an interrupt controller presentation (ICP) entity
+should be created for every vcpu, and interrupt controller source
+(ICS) entities should be created to accommodate the sources that are
+configured with the KVM_IRQCHIP_SET_SOURCES ioctl.
+
+4.81 KVM_IRQCHIP_GET_SOURCES
+
+Capability: KVM_CAP_IRQCHIP_ARGS
+Architectures: ppc
+Type: vm ioctl
+Parameters: struct kvm_irq_sources
+Returns: 0 on success, -1 on error
+
+Copies configuration and status information about a range of interrupt
+sources into a user-supplied buffer.  The argument struct gives the
+starting interrupt source number and the number of interrupt sources.
+The user buffer is an array of 64-bit quantities, one per interrupt
+source, with (from the least- significant bit) 32 bits of interrupt
+server number, 8 bits of priority, and 1 bit each for a
+level-sensitive indicator, a masked indicator, and a pending
+indicator.  If some of the sources in the range don't exist, that is,
+have not yet been created with the KVM_IRQCHIP_SET_SOURCES ioctl,
+this returns an ENODEV error.
+
+4.82 KVM_IRQCHIP_SET_SOURCES
+
+Capability: KVM_CAP_IRQCHIP_ARGS
+Architectures: ppc
+Type: vm ioctl
+Parameters: struct kvm_irq_sources
+Returns: 0 on success, -1 on error
+
+Sets the configuration and status for a range of interrupt sources
+from information supplied in a user-supplied buffer, creating the
+sources if they don't already exist.  The argument struct gives the
+starting interrupt source number and the number of interrupt sources.
+The user buffer is formatted as for KVM_IRQCHIP_GET_SOURCES.
+
 
 5. The kvm_run structure
 ------------------------
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 5a56e1c..17c9a15 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -142,6 +142,7 @@ extern int kvmppc_mmu_hv_init(void);
 extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
 extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
 extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
+extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
 extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags);
 extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
 			   bool upper, u32 val);
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 8295dc7..b05e7cd 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -188,6 +188,10 @@ struct kvmppc_linear_info {
 	int		 type;
 };
 
+/* XICS components, defined in boo3s_xics.c */
+struct kvmppc_xics;
+struct kvmppc_icp;
+
 /*
  * The reverse mapping array has one entry for each HPTE,
  * which stores the guest's view of the second word of the HPTE
@@ -256,6 +260,7 @@ struct kvm_arch {
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct list_head spapr_tce_tables;
 	struct list_head rtas_tokens;
+	struct kvmppc_xics *xics;
 #endif
 };
 
@@ -572,6 +577,9 @@ struct kvm_vcpu_arch {
 	u64 busy_stolen;
 	u64 busy_preempt;
 #endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	struct kvmppc_icp *icp; /* XICS presentation controller */
+#endif
 };
 
 /* Values for vcpu->arch.state */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index be611f6..f0fd22b 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -130,6 +130,13 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm,
 extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
 			struct kvm_memory_slot *memslot, unsigned long porder);
 extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
+extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
+extern int kvmppc_xics_ioctl(struct kvm *kvm, unsigned ioctl, unsigned long arg);
+extern int kvmppc_xics_create(struct kvm *kvm, struct kvm_irqchip_args *args);
+extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
+extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu);
+extern void kvmppc_xics_free(struct kvm *kvm);
+
 extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 				struct kvm_create_spapr_tce *args);
 extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
@@ -167,6 +174,8 @@ extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
 extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
 extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
 extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
+extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority);
+extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority);
 
 /*
  * Cuts out inst bits with ordering according to spec.
@@ -263,6 +272,16 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 
 static inline void kvm_linear_init(void)
 {}
+
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline int kvmppc_xics_enabled(struct kvm *kvm)
+{
+	return kvm->arch.xics != NULL;
+}
+#else
+static inline int kvmppc_xics_enabled(struct kvm *kvm) { return 0; }
 #endif
 
 static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 432132c..e2eb04c 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -87,6 +87,7 @@ kvm-book3s_64-module-objs := \
 	book3s.o \
 	book3s_64_vio.o \
 	book3s_rtas.o \
+	book3s_xics.o \
 	$(kvm-book3s_64-objs-y)
 
 kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 6548445..c5a4478 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -104,7 +104,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
 	return prio;
 }
 
-static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
+void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
 					  unsigned int vec)
 {
 	unsigned long old_pending = vcpu->arch.pending_exceptions;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 567c264..aa3a0db 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -528,6 +528,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 
 		/* Send the error out to userspace via KVM_RUN */
 		return rc;
+	case H_XIRR:
+	case H_CPPR:
+	case H_EOI:
+	case H_IPI:
+		if (kvmppc_xics_enabled(vcpu->kvm)) {
+			ret = kvmppc_xics_hcall(vcpu, req);
+			break;
+		} /* fallthrough */
 	default:
 		return RESUME_HOST;
 	}
@@ -886,6 +894,13 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	spin_lock_init(&vcpu->arch.tbacct_lock);
 	vcpu->arch.busy_preempt = TB_NIL;
 
+	/* Create the XICS */
+	if (kvmppc_xics_enabled(kvm)) {
+		err = kvmppc_xics_create_icp(vcpu);
+		if (err < 0)
+			goto free_vcpu;
+	}
+
 	kvmppc_mmu_book3s_hv_init(vcpu);
 
 	vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
@@ -937,6 +952,8 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 		kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
 	spin_unlock(&vcpu->arch.vpa_update_lock);
 	kvm_vcpu_uninit(vcpu);
+	if (kvmppc_xics_enabled(vcpu->kvm))
+		kvmppc_xics_free_icp(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
@@ -1882,6 +1899,9 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
 
 	kvmppc_rtas_tokens_free(kvm);
 
+	if (kvmppc_xics_enabled(kvm))
+		kvmppc_xics_free(kvm);
+
 	kvmppc_free_hpt(kvm);
 	WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
 }
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 73ed11c..9b2237f 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1069,6 +1069,13 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	if (err < 0)
 		goto uninit_vcpu;
 
+	/* Create the XICS */
+	if (kvmppc_xics_enabled(kvm)) {
+		err = kvmppc_xics_create_icp(vcpu);
+		if (err < 0)
+			goto free_vcpu;
+	}
+
 	return vcpu;
 
 uninit_vcpu:
@@ -1085,6 +1092,8 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
 
+	if (kvmppc_xics_enabled(vcpu->kvm))
+		kvmppc_xics_free_icp(vcpu);
 	free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
 	kvm_vcpu_uninit(vcpu);
 	kfree(vcpu_book3s->shadow_vcpu);
@@ -1293,6 +1302,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 {
 #ifdef CONFIG_PPC64
 	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+	INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
 #endif
 
 	return 0;
@@ -1303,6 +1313,9 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
 #ifdef CONFIG_PPC64
 	WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
 #endif
+	if (kvmppc_xics_enabled(kvm))
+		kvmppc_xics_free(kvm);
+
 }
 
 static int kvmppc_book3s_init(void)
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index 4efa4a4..94cec5b 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -227,6 +227,15 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
 	return EMULATE_DONE;
 }
 
+static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
+{
+	long rc = kvmppc_xics_hcall(vcpu, cmd);
+	if (rc == H_TOO_HARD)
+		return EMULATE_FAIL;
+	kvmppc_set_gpr(vcpu, 3, rc);
+	return EMULATE_DONE;
+}
+
 int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 {
 	switch (cmd) {
@@ -246,6 +255,13 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
 		vcpu->stat.halt_wakeup++;
 		return EMULATE_DONE;
+	case H_XIRR:
+	case H_CPPR:
+	case H_EOI:
+	case H_IPI:
+		if (kvmppc_xics_enabled(vcpu->kvm))
+			return kvmppc_h_pr_xics_hcall(vcpu, cmd);
+		break;
 	case H_RTAS:
 		if (list_empty(&vcpu->kvm->arch.rtas_tokens))
 			return RESUME_HOST;
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index 8a324e8..6a6c1fe 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -18,12 +18,61 @@
 #include <asm/rtas.h>
 
 
+static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+	u32 irq, server, priority;
+	int rc;
+
+	if (args->nargs != 3 || args->nret != 1) {
+		rc = -3;
+		goto out;
+	}
+
+	irq = args->args[0];
+	server = args->args[1];
+	priority = args->args[2];
+
+	rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
+	if (rc)
+		rc = -3;
+out:
+	args->rets[0] = rc;
+}
+
+static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+	u32 irq, server, priority;
+	int rc;
+
+	if (args->nargs != 1 || args->nret != 3) {
+		rc = -3;
+		goto out;
+	}
+
+	irq = args->args[0];
+
+	server = priority = 0;
+	rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
+	if (rc) {
+		rc = -3;
+		goto out;
+	}
+
+	args->rets[1] = server;
+	args->rets[2] = priority;
+out:
+	args->rets[0] = rc;
+}
+
 struct rtas_handler {
 	void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
 	char *name;
 };
 
-static struct rtas_handler rtas_handlers[] = { };
+static struct rtas_handler rtas_handlers[] = {
+	{ .name = "ibm,set-xive", .handler = kvm_rtas_set_xive },
+	{ .name = "ibm,get-xive", .handler = kvm_rtas_get_xive },
+};
 
 struct rtas_token_definition {
 	struct list_head list;
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
new file mode 100644
index 0000000..7749060
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -0,0 +1,1101 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+
+#include <asm/uaccess.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xics.h>
+#include <asm/debug.h>
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "book3s_xics.h"
+
+#define XICS_DBG(fmt...) do { } while (0)
+//#define XICS_DBG(fmt...) do { trace_printk(fmt); } while (0)
+
+/*
+ * LOCKING
+ * =======
+ *
+ * Each ICS has a mutex protecting the information about the IRQ
+ * sources and avoiding simultaneous deliveries if the same interrupt.
+ *
+ * ICP operations are done via a single compare & swap transaction
+ * (most ICP state fits in the union kvmppc_icp_state)
+ */
+
+/*
+ * TODO
+ * ====
+ *
+ * - To speed up resends, keep a bitmap of "resend" set bits in the
+ *   ICS
+ *
+ * - Speed up server# -> ICP lookup (array ? hash table ?)
+ *
+ * - Make ICS lockless as well, or at least a per-interrupt lock or hashed
+ *   locks array to improve scalability
+ *
+ * - ioctl's to save/restore the entire state for snapshot & migration
+ */
+
+/* -- ICS routines -- */
+
+static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+			    u32 new_irq);
+
+static void ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
+{
+	struct ics_irq_state *state;
+	struct kvmppc_ics *ics;	
+	u16 src;
+
+	XICS_DBG("ics deliver %#x (level: %d)\n", irq, level);
+
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics) {
+		XICS_DBG("ics_deliver_irq: IRQ 0x%06x not found !\n", irq);
+		return;
+	}
+	state = &ics->irq_state[src];
+
+	/*
+	 * We set state->asserted locklessly. This should be fine as
+	 * we are the only setter, thus concurrent access is undefined
+	 * to begin with.
+	 */
+	if (level == KVM_INTERRUPT_SET_LEVEL)
+		state->asserted = 1;
+	else if (level == KVM_INTERRUPT_UNSET) {
+		state->asserted = 0;
+		return;
+	}
+
+	/* Attempt delivery */
+	icp_deliver_irq(xics, NULL, irq);
+}
+
+static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
+			     struct kvmppc_icp *icp)
+{
+	int i;
+
+	mutex_lock(&ics->lock);
+
+	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+		struct ics_irq_state *state = &ics->irq_state[i];
+
+		if (!state->resend)
+			continue;
+
+		XICS_DBG("resend %#x prio %#x\n", state->number,
+			      state->priority);
+
+		mutex_unlock(&ics->lock);
+		icp_deliver_irq(xics, icp, state->number);
+		mutex_lock(&ics->lock);
+	}
+
+	mutex_unlock(&ics->lock);
+}
+
+int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority)
+{
+	struct kvmppc_xics *xics = kvm->arch.xics;
+	struct kvmppc_icp *icp;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *state;
+	u16 src;
+	bool deliver;
+
+	if (!xics)
+		return -ENODEV;
+
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics)
+		return -EINVAL;
+	state = &ics->irq_state[src];
+
+	icp = kvmppc_xics_find_server(kvm, server);
+	if (!icp)
+		return -EINVAL;
+
+	mutex_lock(&ics->lock);
+
+	XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n",
+		 irq, server, priority,
+		 state->masked_pending, state->resend);
+
+	state->server = server;
+	state->priority = priority;
+	deliver = false;
+	if ((state->masked_pending || state->resend) && priority != MASKED) {
+		state->masked_pending = 0;
+		deliver = true;
+	}
+
+	mutex_unlock(&ics->lock);
+
+	if (deliver)
+		icp_deliver_irq(xics, icp, irq);
+
+	return 0;
+}
+
+int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
+{
+	struct kvmppc_xics *xics = kvm->arch.xics;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *state;
+	u16 src;
+
+	if (!xics)
+		return -ENODEV;
+
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics)
+		return -EINVAL;
+	state = &ics->irq_state[src];
+
+	mutex_lock(&ics->lock);
+	*server = state->server;
+	*priority = state->priority;
+	mutex_unlock(&ics->lock);
+
+	return 0;
+}
+
+/* -- ICP routines, including hcalls -- */
+
+static inline bool icp_try_update(struct kvmppc_icp *icp,
+				  union kvmppc_icp_state old,
+				  union kvmppc_icp_state new,
+				  bool change_self)
+{
+	bool success;
+
+	/* Calculate new output value */
+	new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
+
+	/* Attempt atomic update */
+	success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
+	if (!success)
+		goto bail;
+
+	XICS_DBG("UPD [%04x] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
+		 icp->vcpu->vcpu_id,
+		 old.cppr, old.mfrr, old.pending_pri, old.xisr,
+		 old.need_resend, old.out_ee);
+	XICS_DBG("UPD        - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
+		 new.cppr, new.mfrr, new.pending_pri, new.xisr,
+		 new.need_resend, new.out_ee);
+	/*
+	 * Check for output state update
+	 *
+	 * Note that this is racy since another processor could be updating
+	 * the state already. This is why we never clear the interrupt output
+	 * here, we only ever set it. The clear only happens prior to doing
+	 * an update and only by the processor itself. Currently we do it
+	 * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
+	 *
+	 * We also do not try to figure out whether the EE state has changed,
+	 * we unconditionally set it if the new state calls for it for the
+	 * same reason.
+	 */
+	if (new.out_ee) {
+		kvmppc_book3s_queue_irqprio(icp->vcpu,
+					    BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+		if (!change_self)
+			kvm_vcpu_kick(icp->vcpu);
+	}
+ bail:
+	return success;
+}
+
+static void icp_check_resend(struct kvmppc_xics *xics,
+			     struct kvmppc_icp *icp)
+{
+	u32 icsid;
+	
+	/* Order this load with the test for need_resend in the caller */
+	smp_rmb();
+	for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) {
+		struct kvmppc_ics *ics = xics->ics[icsid];
+
+		if (!test_and_clear_bit(icsid, icp->resend_map))
+			continue;
+		if (!ics)
+			continue;
+		ics_check_resend(xics, ics, icp);
+	}
+}
+
+static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
+			       u32 *reject)
+{
+	union kvmppc_icp_state old_state, new_state;
+	bool success;
+
+	XICS_DBG("try deliver %#x(P:%#x) to server %#x\n", irq, priority,
+		 icp->vcpu->vcpu_id);
+
+	do {
+		old_state = new_state = ACCESS_ONCE(icp->state);
+
+		*reject = 0;
+
+		/* See if we can deliver */
+		success = new_state.cppr > priority &&
+			new_state.mfrr > priority &&
+			new_state.pending_pri > priority;
+
+		/*
+		 * If we can, check for a rejection and perform the
+		 * delivery
+		 */
+		if (success) {
+			*reject = new_state.xisr;
+			new_state.xisr = irq;
+			new_state.pending_pri = priority;
+		} else {
+			/*
+			 * If we failed to deliver we set need_resend
+			 * so a subsequent CPPR state change causes us
+			 * to try a new delivery.
+			 */
+			new_state.need_resend = true;
+		}
+
+	} while (!icp_try_update(icp, old_state, new_state, false));
+
+	return success;
+}
+
+static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+			    u32 new_irq)
+{
+	struct ics_irq_state *state;
+	struct kvmppc_ics *ics;
+	u32 reject;
+	u16 src;	
+
+	/*
+	 * This is used both for initial delivery of an interrupt and
+	 * for subsequent rejection.
+	 *
+	 * Rejection can be racy vs. resends. We have evaluated the
+	 * rejection in an atomic ICP transaction which is now complete,
+	 * so potentially the ICP can already accept the interrupt again.
+	 *
+	 * So we need to retry the delivery. Essentially the reject path
+	 * boils down to a failed delivery. Always.
+	 *
+	 * Now the interrupt could also have moved to a different target,
+	 * thus we may need to re-do the ICP lookup as well
+	 */
+	 
+ again:
+	/* Get the ICS state and lock it */
+	ics = kvmppc_xics_find_ics(xics, new_irq, &src);
+	if (!ics) {
+		XICS_DBG("icp_deliver_irq: IRQ 0x%06x not found !\n", new_irq);
+		return;
+	}
+	state = &ics->irq_state[src];
+
+	/* Get a lock on the ICS */
+	mutex_lock(&ics->lock);
+
+	/* Get our server */
+	if (!icp || state->server != icp->vcpu->vcpu_id) {
+		icp = kvmppc_xics_find_server(xics->kvm, state->server);
+		if (!icp) {
+			pr_warning("icp_deliver_irq: IRQ 0x%06x server 0x%x"
+				   " not found !\n", new_irq, state->server);
+			goto out;
+		}
+	}
+
+	/* Clear the resend bit of that interrupt */
+	state->resend = 0;
+
+	/*
+	 * If masked, bail out
+	 *
+	 * Note: PAPR doesn't mention anything about masked pending
+	 * when doing a resend, only when doing a delivery.
+	 *
+	 * However that would have the effect of losing a masked
+	 * interrupt that was rejected and isn't consistent with
+	 * the whole masked_pending business which is about not
+	 * losing interrupts that occur while masked.
+	 *
+	 * I don't differenciate normal deliveries and resends, this
+	 * implementation will differ from PAPR and not lose such
+	 * interrupts.
+	 */
+	if (state->priority == MASKED) {
+		XICS_DBG("irq %#x masked pending\n", new_irq);
+		state->masked_pending = 1;
+		goto out;
+	}
+
+	/*
+	 * Try the delivery, this will set the need_resend flag
+	 * in the ICP as part of the atomic transaction if the
+	 * delivery is not possible.
+	 *
+	 * Note that if successful, the new delivery might have itself
+	 * rejected an interrupt that was "delivered" before we took the
+	 * icp mutex.
+	 *
+	 * In this case we do the whole sequence all over again for the
+	 * new guy. We cannot assume that the rejected interrupt is less
+	 * favored than the new one, and thus doesn't need to be delivered,
+	 * because by the time we exit icp_try_to_deliver() the target
+	 * processor may well have alrady consumed & completed it, and thus
+	 * the rejected interrupt might actually be already acceptable.
+	 */
+	if (icp_try_to_deliver(icp, new_irq, state->priority, &reject)) {
+		/*
+		 * Delivery was successful, did we reject somebody else ?
+		 */
+		if (reject && reject != XICS_IPI) {
+			mutex_unlock(&ics->lock);
+			new_irq = reject;
+			goto again;
+		}
+	} else {
+		/*
+		 * We failed to deliver the interrupt we need to set the
+		 * resend map bit and mark the ICS state as needing a resend
+		 */
+		set_bit(ics->icsid, icp->resend_map);
+		state->resend = 1;
+
+		/*
+		 * If the need_resend flag got cleared in the ICP some time
+		 * between icp_try_to_deliver() atomic update and now, then
+		 * we know it might have missed the resend_map bit. So we
+		 * retry
+		 */
+		smp_mb();
+		if (!icp->state.need_resend) {
+			mutex_unlock(&ics->lock);
+			goto again;
+		}
+	}
+ out:
+	mutex_unlock(&ics->lock);
+}
+
+static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+			  u8 new_cppr)
+{
+	union kvmppc_icp_state old_state, new_state;
+	bool resend;
+
+	/*
+	 * This handles several related states in one operation:
+	 *
+	 * ICP State: Down_CPPR
+	 *
+	 * Load CPPR with new value and if the XISR is 0
+	 * then check for resends:
+	 *
+	 * ICP State: Resend
+	 *
+	 * If MFRR is more favored than CPPR, check for IPIs
+	 * and notify ICS of a potential resend. This is done
+	 * asynchronously (when used in real mode, we will have
+	 * to exit here).
+	 *
+	 * We do not handle the complete Check_IPI as documented
+	 * here. In the PAPR, this state will be used for both
+	 * Set_MFRR and Down_CPPR. However, we know that we aren't
+	 * changing the MFRR state here so we don't need to handle
+	 * the case of an MFRR causing a reject of a pending irq,
+	 * this will have been handled when the MFRR was set in the
+	 * first place.
+	 *
+	 * Thus we don't have to handle rejects, only resends.
+	 *
+	 * When implementing real mode for HV KVM, resend will lead to
+	 * a H_TOO_HARD return and the whole transaction will be handled
+	 * in virtual mode.
+	 */
+	do {
+		old_state = new_state = ACCESS_ONCE(icp->state);
+
+		/* Down_CPPR */
+		new_state.cppr = new_cppr;
+
+		/*
+		 * Cut down Resend / Check_IPI / IPI
+		 *
+		 * The logic is that we cannot have a pending interrupt
+		 * trumped by an IPI at this point (see above), so we
+		 * know that either the pending interrupt is already an
+		 * IPI (in which case we don't care to override it) or
+		 * it's either more favored than us or non existent
+		 */
+		if (new_state.mfrr < new_cppr &&
+		    new_state.mfrr <= new_state.pending_pri) {
+			WARN_ON(new_state.xisr != XICS_IPI &&
+				new_state.xisr != 0);
+			new_state.pending_pri = new_state.mfrr;
+			new_state.xisr = XICS_IPI;
+		}
+
+		/* Latch/clear resend bit */
+		resend = new_state.need_resend;
+		new_state.need_resend = 0;
+
+	} while (!icp_try_update(icp, old_state, new_state, true));
+
+	/*
+	 * Now handle resend checks. Those are asynchronous to the ICP
+	 * state update in HW (ie bus transactions) so we can handle them
+	 * separately here too
+	 */
+	if (resend)
+		icp_check_resend(xics, icp);
+}
+
+static noinline unsigned long h_xirr(struct kvm_vcpu *vcpu)
+{
+	union kvmppc_icp_state old_state, new_state;
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	u32 xirr;
+
+	/* First, remove EE from the processor */
+	kvmppc_book3s_dequeue_irqprio(icp->vcpu,
+				      BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+
+	/*
+	 * ICP State: Accept_Interrupt
+	 *
+	 * Return the pending interrupt (if any) along with the
+	 * current CPPR, then clear the XISR & set CPPR to the
+	 * pending priority
+	 */
+	do {
+		old_state = new_state = ACCESS_ONCE(icp->state);
+
+		xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
+		if (!old_state.xisr)
+			break;
+		new_state.cppr = new_state.pending_pri;
+		new_state.pending_pri = 0xff;
+		new_state.xisr = 0;
+
+	} while (!icp_try_update(icp, old_state, new_state, true));
+
+	XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr);
+
+	return xirr;
+}
+
+static noinline int h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+			  unsigned long mfrr)
+{
+        union kvmppc_icp_state old_state, new_state;
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp;
+	u32 reject;
+	bool resend;
+	bool local;
+
+	XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n",
+			vcpu->vcpu_id, server, mfrr);
+
+	local = vcpu->vcpu_id == server;
+	if (local)
+		icp = vcpu->arch.icp;
+	else
+		icp = kvmppc_xics_find_server(vcpu->kvm, server);
+	if (!icp)
+		return H_PARAMETER;
+
+	/*
+	 * ICP state: Set_MFRR
+	 *
+	 * If the CPPR is more favored than the new MFRR, then
+	 * nothing needs to be rejected as there can be no XISR to
+	 * reject.  If the MFRR is being made less favored then
+	 * there might be a previously-rejected interrupt needing
+	 * to be resent.
+	 *
+	 * If the CPPR is less favored, then we might be replacing
+	 * an interrupt, and thus need to possibly reject it as in
+	 *
+	 * ICP state: Check_IPI
+	 */
+	do {
+		old_state = new_state = ACCESS_ONCE(icp->state);
+
+		/* Set_MFRR */
+		new_state.mfrr = mfrr;
+
+		/* Check_IPI */
+		reject = 0;
+		resend = false;
+		if (mfrr < new_state.cppr) {
+			/* Reject a pending interrupt if not an IPI */
+			if (mfrr <= new_state.pending_pri)
+				reject = new_state.xisr;
+			new_state.pending_pri = mfrr;
+			new_state.xisr = XICS_IPI;
+		}
+
+		if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+			resend = new_state.need_resend;
+			new_state.need_resend = 0;
+		}
+	} while (!icp_try_update(icp, old_state, new_state, local));
+
+	/* Handle reject */
+	if (reject && reject != XICS_IPI)
+		icp_deliver_irq(xics, icp, reject);
+		
+	/* Handle resend */
+	if (resend)
+		icp_check_resend(xics, icp);
+
+	return H_SUCCESS;
+}
+
+static noinline void h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+{
+	union kvmppc_icp_state old_state, new_state;
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	u32 reject;
+
+	XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr);
+
+	/*
+	 * ICP State: Set_CPPR
+	 *
+	 * We can safely compare the new value with the current
+	 * value outside of the transaction as the CPPR is only
+	 * ever changed by the processor on itself
+	 */
+	if (cppr > icp->state.cppr)
+		icp_down_cppr(xics, icp, cppr);
+	else if (cppr == icp->state.cppr)
+		return;
+
+	/*
+	 * ICP State: Up_CPPR
+	 *
+	 * The processor is raising its priority, this can result
+	 * in a rejection of a pending interrupt:
+	 *
+	 * ICP State: Reject_Current
+	 *
+	 * We can remove EE from the current processor, the update
+	 * transaction will set it again if needed
+	 */
+	kvmppc_book3s_dequeue_irqprio(icp->vcpu,
+				      BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+
+	do {
+		old_state = new_state = ACCESS_ONCE(icp->state);
+
+		reject = 0;
+		new_state.cppr = cppr;
+
+		if (cppr <= new_state.pending_pri) {
+			reject = new_state.xisr;
+			new_state.xisr = 0;
+			new_state.pending_pri = 0xff;
+		}
+
+	} while (!icp_try_update(icp, old_state, new_state, true));
+
+	/*
+	 * Check for rejects. They are handled by doing a new delivery
+	 * attempt (see comments in icp_deliver_irq).
+	 */
+	if (reject && reject != XICS_IPI)
+		icp_deliver_irq(xics, icp, reject);
+}
+
+static noinline int h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *state;
+	u32 irq = xirr & 0x00ffffff;
+	u16 src;
+
+	XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr);
+
+	/*
+	 * ICP State: EOI
+	 *
+	 * Note: If EOI is incorrectly used by SW to lower the CPPR
+	 * value (ie more favored), we do not check for rejection of
+	 * a pending interrupt, this is a SW error and PAPR sepcifies
+	 * that we don't have to deal with it.
+	 *
+	 * The sending of an EOI to the ICS is handled after the
+	 * CPPR update
+	 *
+	 * ICP State: Down_CPPR which we handle
+	 * in a separate function as it's shared with H_CPPR.
+	 */
+	icp_down_cppr(xics, icp, xirr >> 24);
+
+	/* IPIs have no EOI */
+	if (irq == XICS_IPI)
+		return H_SUCCESS;
+	/*
+	 * EOI handling: If the interrupt is still asserted, we need to
+	 * resend it. We can take a lockless "peek" at the ICS state here.
+	 *
+	 * "Message" interrupts will never have "asserted" set
+	 */
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics) {
+		XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq);
+		return H_PARAMETER;
+	}
+	state = &ics->irq_state[src];
+
+	/* Still asserted, resend it */
+	if (state->asserted)
+		icp_deliver_irq(xics, icp, irq);
+
+	return H_SUCCESS;
+}
+
+int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+{
+	unsigned long res;
+	int rc = H_SUCCESS;
+
+	/* Check if we have an ICP */
+	if (!vcpu->arch.icp || !vcpu->kvm->arch.xics)
+		return H_HARDWARE;
+
+	switch (req) {
+	case H_XIRR:
+		res = h_xirr(vcpu);
+		kvmppc_set_gpr(vcpu, 4, res);
+		break;
+	case H_CPPR:
+		h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
+		break;
+	case H_EOI:
+		rc = h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
+		break;
+	case H_IPI:
+		rc = h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
+			   kvmppc_get_gpr(vcpu, 5));
+		break;
+	}
+
+	return rc;
+}
+
+
+/* -- Initialisation code etc. -- */
+
+static int xics_debug_show(struct seq_file *m, void *private)
+{
+	struct kvmppc_xics *xics = m->private;
+	struct kvm *kvm = xics->kvm;
+	struct kvm_vcpu *vcpu;
+	int icsid, i;
+
+	if (!kvm)
+		return 0;
+
+	seq_printf(m, "=========\nICP state\n=========\n");
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvmppc_icp *icp = vcpu->arch.icp;
+		union kvmppc_icp_state state;
+
+		if (!icp)
+			continue;
+
+		state.raw = ACCESS_ONCE(icp->state.raw);
+		seq_printf(m, "cpu server %#x XIRR:%#x PPRI:%#x CPPR:%#x "
+			   "MFRR:%#x OUT:%d NR:%d\n", vcpu->vcpu_id, state.xisr,
+			   state.pending_pri, state.cppr, state.mfrr,
+			   state.out_ee, state.need_resend);
+	}
+
+	for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) {
+		struct kvmppc_ics *ics = xics->ics[icsid];
+
+		if (!ics)
+			continue;
+
+		seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n",
+			   icsid);
+
+		mutex_lock(&ics->lock);
+
+		for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+			struct ics_irq_state *irq = &ics->irq_state[i];
+
+			seq_printf(m, "irq 0x%06x: server %#x prio %#x save"
+				   " prio %#x asserted %d resend %d masked"
+				   " pending %d\n",
+				   irq->number, irq->server, irq->priority,
+				   irq->saved_priority, irq->asserted,
+				   irq->resend, irq->masked_pending);
+
+		}
+		mutex_unlock(&ics->lock);
+	}
+	return 0;
+}
+
+static int xics_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, xics_debug_show, inode->i_private);
+}
+
+static const struct file_operations xics_debug_fops = {
+	.open = xics_debug_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static void xics_debugfs_init(struct kvmppc_xics *xics)
+{
+	char *name;
+
+	name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics);
+	if (!name) {
+		pr_err("%s: no memory for name\n", __func__);
+		return;
+	}
+
+	xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
+					   xics, &xics_debug_fops);
+
+	pr_debug("%s: created %s\n", __func__, name);
+	kfree(name);
+}
+
+static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvmppc_xics *xics,
+						 int irq)
+{
+	struct kvmppc_ics *ics;
+	int i, icsid;
+
+	icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
+
+	mutex_lock(&xics->kvm->lock);
+
+	/* ICS already exists - somebody else got here first */
+	if (xics->ics[icsid])
+		goto out;
+
+	/* Create the ICS */
+	ics = kzalloc(sizeof(struct kvmppc_ics), GFP_KERNEL);
+	if (!ics)
+		goto out;
+
+	mutex_init(&ics->lock);
+	ics->icsid = icsid;
+
+	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+		ics->irq_state[i].number = (icsid << KVMPPC_XICS_ICS_SHIFT) | i;
+		ics->irq_state[i].priority = MASKED;
+		ics->irq_state[i].saved_priority = MASKED;
+	}
+	smp_wmb();
+	xics->ics[icsid] = ics;
+
+	if (icsid > xics->max_icsid)
+		xics->max_icsid = icsid;
+
+ out:
+	mutex_unlock(&xics->kvm->lock);
+	return xics->ics[icsid];
+}
+
+int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_icp *icp;
+
+	icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL);
+	if (!icp)
+		return -ENOMEM;
+
+	icp->vcpu = vcpu;
+	icp->state.mfrr = MASKED;
+	icp->state.pending_pri = MASKED;
+	vcpu->arch.icp = icp;
+
+	XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id);
+
+	return 0;
+}
+
+void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
+{
+	if (!vcpu->arch.icp)
+		return;
+	kfree(vcpu->arch.icp);
+	vcpu->arch.icp = NULL;
+}
+
+void kvmppc_xics_free(struct kvm *kvm)
+{
+	struct kvmppc_xics *xics = kvm->arch.xics;
+	int i;
+
+	if (!xics)
+		return;
+
+	lockdep_assert_held(&kvm->lock);
+
+	debugfs_remove(xics->dentry);
+
+	if (xics->kvm) {
+		xics->kvm->arch.xics = NULL;
+		xics->kvm = NULL;
+	}
+
+	for (i = 0; i <= xics->max_icsid; i++) {
+		if (xics->ics[i])
+			kfree(xics->ics[i]);
+	}
+	kfree(xics);
+}
+
+static int kvm_xics_get_sources(struct kvm *kvm, struct kvm_irq_sources *srcs)
+{
+	int ret = 0;
+	struct kvmppc_xics *xics = kvm->arch.xics;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *irqp;
+	u64 __user *ubufp;
+	u16 idx;
+	u64 val;
+	long int i, irq, nirq;
+
+	irq = srcs->irq;
+	ubufp = srcs->irqbuf;
+
+	while (srcs->nr_irqs > 0 && !ret) {
+		ics = kvmppc_xics_find_ics(xics, irq, &idx);
+		if (!ics)
+			return -ENOENT;
+		nirq = KVMPPC_XICS_IRQ_PER_ICS - idx;
+		if (nirq > srcs->nr_irqs)
+			nirq = srcs->nr_irqs;
+		srcs->nr_irqs -= nirq;
+		irq += nirq;
+
+		irqp = &ics->irq_state[idx];
+		mutex_lock(&ics->lock);
+		for (i = 0; i < nirq; ++i, ++irqp, ++ubufp) {
+			ret = -ENOENT;
+			if (!irqp->exists)
+				break;
+			val = irqp->server;
+			val |= ((u64)irqp->priority << KVM_IRQ_PRIORITY_SHIFT);
+			if (irqp->priority == MASKED)
+				val |= KVM_IRQ_MASKED;
+			if (irqp->asserted)
+				val |= KVM_IRQ_LEVEL_SENSITIVE |
+					KVM_IRQ_PENDING;
+			else if (irqp->masked_pending || irqp->resend)
+				val |= KVM_IRQ_PENDING;
+			ret = -EFAULT;
+			if (__put_user(val, ubufp))
+				break;
+			ret = 0;
+		}
+		mutex_unlock(&ics->lock);
+	}
+
+	return ret;
+}
+
+static int kvm_xics_set_sources(struct kvm *kvm, struct kvm_irq_sources *srcs)
+{
+	int ret = 0;
+	struct kvmppc_xics *xics = kvm->arch.xics;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *irqp;
+	u64 __user *ubufp;
+	u16 idx;
+	u64 val;
+	long int i, irq, nirq;
+
+	irq = srcs->irq;
+	ubufp = srcs->irqbuf;
+
+	if (irq < KVMPPC_XICS_FIRST_IRQ ||
+	    irq + srcs->nr_irqs > KVMPPC_XICS_NR_IRQS)
+		return -ENOENT;
+
+	while (srcs->nr_irqs > 0 && !ret) {
+		ics = kvmppc_xics_find_ics(xics, irq, &idx);
+		if (!ics) {
+			ics = kvmppc_xics_create_ics(xics, irq);
+			if (!ics)
+				return -ENOMEM;
+		}
+		nirq = KVMPPC_XICS_IRQ_PER_ICS - idx;
+		if (nirq > srcs->nr_irqs)
+			nirq = srcs->nr_irqs;
+		srcs->nr_irqs -= nirq;
+		irq += nirq;
+
+		irqp = &ics->irq_state[idx];
+		ubufp = srcs->irqbuf;
+		for (i = 0; i < nirq; ++i, ++irqp, ++ubufp) {
+			ret = -EFAULT;
+			if (__get_user(val, ubufp))
+				break;
+			ret = 0;
+
+			mutex_lock(&ics->lock);
+			irqp->server = val & KVM_IRQ_SERVER_MASK;
+			irqp->priority = val >> KVM_IRQ_PRIORITY_SHIFT;
+			irqp->resend = 0;
+			irqp->masked_pending = 0;
+			irqp->asserted = 0;
+			if ((val & KVM_IRQ_PENDING) &&
+			    (val & KVM_IRQ_LEVEL_SENSITIVE))
+				irqp->asserted = 1;
+			irqp->exists = 1;
+			mutex_unlock(&ics->lock);
+
+			if (val & KVM_IRQ_PENDING)
+				icp_deliver_irq(xics, NULL, irqp->number);
+		}
+	}
+
+	return ret;
+}
+
+/* -- ioctls -- */
+
+int kvmppc_xics_create(struct kvm *kvm, struct kvm_irqchip_args *args)
+{
+	struct kvmppc_xics *xics;
+	int rc = 0;
+
+	mutex_lock(&kvm->lock);
+
+	/* Already there ? */
+	if (kvm->arch.xics)
+		return -EEXIST;
+
+	xics = kzalloc(sizeof(*xics), GFP_KERNEL);
+	if (!xics) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	xics->kvm = kvm;
+	kvm->arch.xics = xics;
+	xics_debugfs_init(xics);
+
+out:
+	mutex_unlock(&kvm->lock);
+	return rc;
+}
+
+static int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args)
+{
+	struct kvmppc_xics *xics;
+
+	/* locking against multiple callers? */
+
+	xics = kvm->arch.xics;
+	if (!xics)
+		return -ENODEV;
+
+	switch (args->level) {
+	case KVM_INTERRUPT_SET:
+	case KVM_INTERRUPT_SET_LEVEL:
+	case KVM_INTERRUPT_UNSET:
+		ics_deliver_irq(xics, args->irq, args->level);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int kvmppc_xics_ioctl(struct kvm *kvm, unsigned ioctl, unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	int rc;
+
+	BUILD_BUG_ON(sizeof(union kvmppc_icp_state) != sizeof(unsigned long));
+
+	switch (ioctl) {
+	case KVM_IRQ_LINE: {
+		struct kvm_irq_level args;
+
+		rc = -EFAULT;
+		if (copy_from_user(&args, argp, sizeof(args)))
+			break;
+		rc = kvm_vm_ioctl_xics_irq(kvm, &args);
+		break;
+	}
+
+	case KVM_IRQCHIP_GET_SOURCES: {
+		struct kvm_irq_sources sources;
+
+		rc = -EFAULT;
+		if (copy_from_user(&sources, argp, sizeof(sources)))
+			break;
+		if (!access_ok(VERIFY_WRITE, sources.irqbuf,
+			       sources.nr_irqs * sizeof(u64)))
+			break;
+		rc = kvm_xics_get_sources(kvm, &sources);
+		break;
+	}
+
+	case KVM_IRQCHIP_SET_SOURCES: {
+		struct kvm_irq_sources sources;
+
+		rc = -EFAULT;
+		if (copy_from_user(&sources, argp, sizeof(sources)))
+			break;
+		if (!access_ok(VERIFY_READ, sources.irqbuf,
+			       sources.nr_irqs * sizeof(u64)))
+			break;
+		rc = kvm_xics_set_sources(kvm, &sources);
+		break;
+	}
+
+	default:
+		rc = -ENOTTY;
+		break;
+	}
+
+	return rc;
+}
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
new file mode 100644
index 0000000..0e20a51
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _KVM_PPC_BOOK3S_XICS_H
+#define _KVM_PPC_BOOK3S_XICS_H
+
+/*
+ * We use a two-level tree to store interrupt source information.
+ * There are up to 1024 ICS nodes, each of which can represent
+ * 1024 sources.
+ */
+#define KVMPPC_XICS_MAX_ICS_ID	1023
+#define KVMPPC_XICS_ICS_SHIFT	10
+#define KVMPPC_XICS_IRQ_PER_ICS	(1 << KVMPPC_XICS_ICS_SHIFT)
+#define KVMPPC_XICS_SRC_MASK	(KVMPPC_XICS_IRQ_PER_ICS - 1)
+
+/*
+ * Interrupt source numbers below this are reserved, for example
+ * 0 is "no interrupt", and 2 is used for IPIs.
+ */
+#define KVMPPC_XICS_FIRST_IRQ	16
+#define KVMPPC_XICS_NR_IRQS	((KVMPPC_XICS_MAX_ICS_ID + 1) * KVMPPC_XICS_IRQ_PER_ICS)
+
+/* Priority value to use for disabling an interrupt */
+#define MASKED	0xff
+
+/* State for one irq source */
+struct ics_irq_state {
+	u32 number;
+	u32 server;
+	u8  priority;
+	u8  saved_priority; /* currently unused */
+	u8  resend;
+	u8  masked_pending;
+	u8  asserted; /* Only for LSI */
+	u8  exists;
+};
+
+/* Atomic ICP state, updated with a single compare & swap */
+union kvmppc_icp_state {
+	unsigned long raw;
+	struct {
+		u8 out_ee : 1;
+		u8 need_resend : 1;
+		u8 cppr;
+		u8 mfrr;
+		u8 pending_pri;
+		u32 xisr;
+	};
+};
+
+/* One bit per ICS */
+#define ICP_RESEND_MAP_SIZE	(KVMPPC_XICS_MAX_ICS_ID / BITS_PER_LONG + 1)
+
+struct kvmppc_icp {
+	struct kvm_vcpu *vcpu;
+	union kvmppc_icp_state state;
+	unsigned long resend_map[ICP_RESEND_MAP_SIZE];
+};
+
+struct kvmppc_ics {
+	struct mutex lock;
+	u16 icsid;
+	struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
+};
+
+struct kvmppc_xics {
+	struct kvm *kvm;
+	struct dentry *dentry;
+	u32 max_icsid;
+	struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1];
+};
+
+static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm,
+							 u32 nr)
+{
+	struct kvm_vcpu *vcpu = NULL;
+	int i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (nr == vcpu->vcpu_id)
+			return vcpu->arch.icp;
+	}
+	return NULL;
+}
+
+static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
+						      u32 irq, u16 *source)
+{
+	u32 icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
+	u16 src = irq & KVMPPC_XICS_SRC_MASK;
+	struct kvmppc_ics *ics;
+
+	if (source)
+		*source = src;
+	if (icsid > KVMPPC_XICS_MAX_ICS_ID)
+		return NULL;
+	ics = xics->ics[icsid];
+	if (!ics)
+		return NULL;
+	return ics;
+}
+
+
+#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 1772883..3bcc030 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -383,6 +383,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 		break;
 #ifdef CONFIG_PPC_BOOK3S_64
 	case KVM_CAP_PPC_GET_SMMU_INFO:
+	case KVM_CAP_IRQCHIP_ARGS:
 		r = 1;
 		break;
 #endif
@@ -1002,6 +1003,28 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
 		break;
 	}
+	case KVM_IRQ_LINE:
+	case KVM_IRQCHIP_GET_SOURCES:
+	case KVM_IRQCHIP_SET_SOURCES: {
+		struct kvm *kvm = filp->private_data;
+
+		r = -ENOTTY;
+		if (kvmppc_xics_enabled(kvm))
+			r = kvmppc_xics_ioctl(kvm, ioctl, arg);
+		break;
+	}
+	case KVM_CREATE_IRQCHIP_ARGS: {
+		struct kvm *kvm = filp->private_data;
+		struct kvm_irqchip_args args;
+
+		r = -EFAULT;
+		if (copy_from_user(&args, argp, sizeof(args)))
+			break;
+		r = -EINVAL;
+		if (args.type == KVM_IRQCHIP_TYPE_XICS)
+			r = kvmppc_xics_create(kvm, &args);
+		break;
+	}
 #endif /* CONFIG_PPC_BOOK3S_64 */
 	default:
 		r = -ENOTTY;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 1e2fda0..25d73c0 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -115,6 +115,7 @@ struct kvm_irq_level {
 	 * ACPI gsi notion of irq.
 	 * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
 	 * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
+	 * On powerpc SPAPR, the ICS source number, level is ignored.
 	 */
 	union {
 		__u32 irq;
@@ -146,6 +147,15 @@ struct kvm_pit_config {
 
 #define KVM_PIT_SPEAKER_DUMMY     1
 
+/* for KVM_CREATE_IRQCHIP_ARGS */
+struct kvm_irqchip_args {
+	__u64 type;
+	__u64 param;
+};
+
+/* values for type */
+#define KVM_IRQCHIP_TYPE_XICS	1	/* Power server external intr ctrler */
+
 #define KVM_EXIT_UNKNOWN          0
 #define KVM_EXIT_EXCEPTION        1
 #define KVM_EXIT_IO               2
@@ -663,6 +673,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_S390_CSS_SUPPORT 85
 #define KVM_CAP_PPC_EPR 86
 #define KVM_CAP_PPC_RTAS 87
+#define KVM_CAP_IRQCHIP_ARGS 88
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -805,6 +816,21 @@ struct kvm_msi {
 	__u8  pad[16];
 };
 
+struct kvm_irq_sources {
+	__u32 irq;
+	__u32 nr_irqs;
+	__u64 __user *irqbuf;
+};
+
+/* irqbuf entries are laid out like this: */
+#define KVM_IRQ_SERVER_SHIFT	0
+#define KVM_IRQ_SERVER_MASK	0xffffffffULL
+#define KVM_IRQ_PRIORITY_SHIFT	32
+#define KVM_IRQ_PRIORITY_MASK	0xff
+#define KVM_IRQ_LEVEL_SENSITIVE	(1ULL << 40)
+#define KVM_IRQ_MASKED		(1ULL << 41)
+#define KVM_IRQ_PENDING		(1ULL << 42)
+
 /*
  * ioctls for VM fds
  */
@@ -892,6 +918,9 @@ struct kvm_s390_ucas_mapping {
 #define KVM_PPC_GET_HTAB_FD	  _IOW(KVMIO,  0xaa, struct kvm_get_htab_fd)
 /* Available with KVM_CAP_PPC_RTAS */
 #define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO,  0xab, struct kvm_rtas_token_args)
+#define KVM_CREATE_IRQCHIP_ARGS   _IOW(KVMIO,  0xac, struct kvm_irqchip_args)
+#define KVM_IRQCHIP_GET_SOURCES	  _IOW(KVMIO,  0xad, struct kvm_irq_sources)
+#define KVM_IRQCHIP_SET_SOURCES	  _IOW(KVMIO,  0xae, struct kvm_irq_sources)
 
 /*
  * ioctls for vcpu fds
