diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index f0e0c6a..0221996 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -140,6 +140,7 @@ extern int kvmppc_mmu_hv_init(void);
 extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
 extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
 extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
+extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
 extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags);
 extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
 			   bool upper, u32 val);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 5c631e4..b57fb3ae 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -104,7 +104,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
 	return prio;
 }
 
-static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
+void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
 					  unsigned int vec)
 {
 	unsigned long old_pending = vcpu->arch.pending_exceptions;
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 5638e21..254d0fe 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -25,6 +25,7 @@
 #define MASKED	0xff
 
 #define XICS_DBG(fmt...) do { } while(0)
+//#define XICS_DBG(fmt...) do { trace_printk(fmt); } while(0)
 
 #undef DEBUG_REALMODE
 
@@ -32,11 +33,11 @@
  * LOCKING
  * =======
  *
- * Each ICP has its own lock, and there is one lock for the ICS (ie. all
- * information about irq sources).
+ * Each ICS has a mutex protecting the information about the IRQ
+ * sources and avoiding simultaneous deliveries if the same interrupt.
  *
- * The ICS lock nests inside any of the ICP locks. ie. you are allowed
- * to take the ICS lock while holding an ICP lock, but not vice versa.
+ * ICP operations are done via a single compare & swap transaction
+ * (most ICP state fits in the union kvmppc_icp_state)
  */
 
 /*
@@ -48,6 +49,21 @@
  * 10 bit
  */
 
+/*
+ * TODO
+ * ====
+ *
+ * - To speed up resends, keep a bitmap of "resend" set bits in the
+ *   ICS
+ *
+ * - Speed up server# -> ICP lookup (array ? hash table ?)
+ *
+ * - Make ICS lockless as well, or at least a per-interrupt lock or hashed
+ *   locks array to improve scalability
+ *
+ * - ioctl's to save/restore the entire state for snapshot & migration
+ */
+
 #define KVMPPC_XICS_MAX_BUID	0xfff
 #define KVMPPC_XICS_IRQ_COUNT	0x1000
 #define KVMPPC_XICS_BUID_SHIFT	12
@@ -67,18 +83,25 @@ struct ics_irq_state {
 #define ICP_RESEND_MAP_SIZE	\
 	((KVMPPC_XICS_MAX_BUID + BITS_PER_LONG - 1) / BITS_PER_LONG)
 
+/* Atomic ICP state, updated with a single compare & swap */
+union kvmppc_icp_state {
+	unsigned long raw;
+	struct {
+		u8 out_ee : 1;
+		u8 need_resend : 1;
+		u8 cppr;
+		u8 mfrr;
+		u8 pending_pri;
+		u32 xisr;
+	};
+};
+
 struct kvmppc_icp {
-	struct mutex lock;
 	struct kvm_vcpu *vcpu;
-	u32 pending_irq;	/* XISR */
-	u8  pending_priority;
-	u8  current_priority;	/* CPPR */
-	u8  mfrr;		/* MFRR */
-	bool need_resend;
+	union kvmppc_icp_state state;
 	unsigned long resend_map[ICP_RESEND_MAP_SIZE];
 };
 
-
 struct kvmppc_ics {
 	struct mutex lock;
 	u16 buid;
@@ -112,6 +135,11 @@ static struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
 	u16 src = irq & KVMPPC_XICS_SRC_MASK;
 	struct kvmppc_ics *ics;
 
+	if (WARN_ON_ONCE(!buid || buid > KVMPPC_XICS_MAX_BUID)) {
+		XICS_DBG("kvmppc_xics_find_ics: irq %#x BUID out of range !\n",
+			 irq);
+		return NULL;
+	}
 	ics = xics->ics[buid - 1];
 	if (!ics)
 		return NULL;
@@ -125,123 +153,43 @@ static struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
 
 /* -- ICS routines -- */
 
-static void icp_deliver_irq(struct kvmppc_xics *xics,
-			    struct kvmppc_icp *icp,
-			    struct kvmppc_ics *ics, u16 src);
-
-static void __ics_reject_irq(struct kvmppc_icp *icp,
-			     struct kvmppc_ics *ics, u16 src)
-{
-	struct ics_irq_state *state = &ics->irq_state[src];
-
-	XICS_DBG("server %d reject src %#x\n", icp->vcpu->vcpu_id, src);
-
-	/* XXX check if it still level & asserted ? */
-	state->resend = 1;
-	set_bit(ics->buid, icp->resend_map);
-	icp->need_resend = true;
-}
-
-static void ics_reject_irq(struct kvmppc_xics *xics,
-			   struct kvmppc_icp *icp, u32 irq)
-{
-	struct kvmppc_ics *ics;
-	u16 src;
-
-	lockdep_assert_held(&icp->lock);
-
-	ics = kvmppc_xics_find_ics(xics, irq, &src);
-	if (!ics) {
-		pr_warning("ics_reject_irq: IRQ 0x%06x not found !\n", irq);
-		return;
-	}
-
-	mutex_lock(&ics->lock);
-	__ics_reject_irq(icp, ics, src);
-	mutex_unlock(&ics->lock);
-}
-
-static void ics_eoi(struct kvmppc_xics *xics, struct kvmppc_icp *icp, 
-		    u32 irq)
-{
-	struct ics_irq_state *state;
-	struct kvmppc_ics *ics;
-	u16 src;
-
-	XICS_DBG("ics_eoi 0x%06x\n", irq);
-
-	lockdep_assert_held(&icp->lock);
-
-	ics = kvmppc_xics_find_ics(xics, irq, &src);
-	if (!ics) {
-		pr_warning("ics_eoi: IRQ 0x%06x not found !\n", irq);
-		return;
-	}
-	state = &ics->irq_state[src];
-
-	mutex_lock(&ics->lock);
-
-	/* If it's an LSI and still asserted we resend */
-	if (state->asserted) {
-		state->resend = 1;
-		set_bit(ics->buid, icp->resend_map);
-		icp->need_resend = true;
-	}
-
-	mutex_unlock(&ics->lock);
-}
+static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+			    u32 new_irq);
 
-static void ics_deliver_irq(struct kvmppc_xics *xics,
-			    u32 irq, u32 level)
+static void ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
 {
-	struct kvmppc_icp *icp;
 	struct ics_irq_state *state;
 	struct kvmppc_ics *ics;	
-	bool deliver = false;
-	u32 server;
 	u16 src;
 
-	XICS_DBG("ics deliver 0x%06x (level: %d)\n", irq, level);
+	XICS_DBG("ics deliver %#x (level: %d)\n", irq, level);
 
 	ics = kvmppc_xics_find_ics(xics, irq, &src);
 	if (!ics) {
-		pr_warning("ics_deliver_irq: IRQ 0x%06x not found !\n", irq);
+		XICS_DBG("ics_deliver_irq: IRQ 0x%06x not found !\n", irq);
 		return;
 	}
 	state = &ics->irq_state[src];
 
-	mutex_lock(&ics->lock);
-
+	/*
+	 * We set state->asserted locklessly. This should be fine as
+	 * we are the only setter, thus concurrent access is undefined
+	 * to begin with.
+	 */
 	if (level == KVM_INTERRUPT_SET_LEVEL)
 		state->asserted = 1;
 	else if (level == KVM_INTERRUPT_UNSET) {
 		state->asserted = 0;
-		goto unlock;
+		return;
 	}
 
-	if (state->priority != MASKED) {
-		deliver = true;
-		server = state->server;
-	} else {
-		XICS_DBG("masked pending\n");
-		state->masked_pending = 1;
-	}	
-
-unlock:
-	mutex_unlock(&ics->lock);
-
-	if (deliver) {
-		 icp = kvmppc_xics_find_server(xics->kvm, server);
-		 /* Configured server not found... XXX FALLBACK */
-		 if (icp) 
-			 icp_deliver_irq(xics, icp, ics, src);
-	}
+	/* Attempt delivery */
+	icp_deliver_irq(xics, NULL, irq);
 }
 
 static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
 			     struct kvmppc_icp *icp)
 {
-	u32 server = icp->vcpu->vcpu_id;
 	int i;
 
 	mutex_lock(&ics->lock);
@@ -249,40 +197,20 @@ static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
 	for (i = 0; i < ics->nr_irqs; i++) {
 		struct ics_irq_state *state = &ics->irq_state[i];
 
-		if (!state->resend || state->server != server)
+		if (!state->resend)
 			continue;
 
-		XICS_DBG("resend 0x%06x prio %d\n", state->number,
+		XICS_DBG("resend %#x prio %#x\n", state->number,
 			      state->priority);
 
-		state->resend = 0;
-		if (state->priority == MASKED)
-			continue;
-
 		mutex_unlock(&ics->lock);
-		icp_deliver_irq(xics, icp, ics, i);
+		icp_deliver_irq(xics, icp, state->number);
 		mutex_lock(&ics->lock);
 	}
 
 	mutex_unlock(&ics->lock);
 }
 
-static void icp_check_resend(struct kvmppc_xics *xics,
-			     struct kvmppc_icp *icp)
-{
-	u32 buid;
-	
-	for_each_set_bit(buid, icp->resend_map, xics->max_buid + 1) {
-		struct kvmppc_ics *ics = xics->ics[buid - 1];
-
-		if (!test_and_clear_bit(buid, icp->resend_map))
-			continue;
-		if (!ics)
-			continue;
-		ics_check_resend(xics, ics, icp);
-	}
-}
-
 int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority)
 {
 	struct kvmppc_xics *xics = kvm->arch.xics;
@@ -306,20 +234,22 @@ int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority)
 
 	mutex_lock(&ics->lock);
 
+	XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n",
+		 irq, server, priority,
+		 state->masked_pending, state->resend);
+
 	state->server = server;
 	state->priority = priority;
 	deliver = false;
-	if (state->masked_pending && state->priority != MASKED) {
+	if ((state->masked_pending || state->resend) && priority != MASKED) {
 		state->masked_pending = 0;
 		deliver = true;
 	}
 
 	mutex_unlock(&ics->lock);
 
-	XICS_DBG("irq 0x%06x server %d prio %#x\n", irq, server, priority);
-
 	if (deliver)
-		icp_deliver_irq(xics, icp, ics, src);
+		icp_deliver_irq(xics, icp, irq);
 
 	return 0;
 }
@@ -344,218 +274,501 @@ int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
 	*priority = state->priority;
 	mutex_unlock(&ics->lock);
 
-	XICS_DBG("irq 0x%06x server %d prio %#x\n",
-		      irq, state->server, state->priority);
-
 	return 0;
 }
 
 /* -- ICP routines, including hcalls -- */
 
-static void icp_external_interrupt(struct kvmppc_icp *icp)
+static inline bool icp_try_update(struct kvmppc_icp *icp,
+				  union kvmppc_icp_state old,
+				  union kvmppc_icp_state new,
+				  bool change_self)
 {
-	unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL;
-
-	lockdep_assert_held(&icp->lock);
-
-	kvmppc_book3s_queue_irqprio(icp->vcpu, vec);
-	kvm_vcpu_kick(icp->vcpu);
+	bool success;
+
+	/* Calculate new output value */
+	new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
+
+	/* Attempt atomic update */
+	success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
+	if (!success)
+		goto bail;
+
+	XICS_DBG("UPD [%04x] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
+		 icp->vcpu->vcpu_id,
+		 old.cppr, old.mfrr, old.pending_pri, old.xisr,
+		 old.need_resend, old.out_ee);
+	XICS_DBG("UPD        - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
+		 new.cppr, new.mfrr, new.pending_pri, new.xisr,
+		 new.need_resend, new.out_ee);
+	/*
+	 * Check for output state update
+	 *
+	 * Note that this is racy since another processor could be updating
+	 * the state already. This is why we never clear the interrupt output
+	 * here, we only ever set it. The clear only happens prior to doing
+	 * an update and only by the processor itself. Currently we do it
+	 * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
+	 *
+	 * We also do not try to figure out whether the EE state has changed,
+	 * we unconditionally set it if the new state calls for it for the
+	 * same reason.
+	 */
+	if (new.out_ee) {
+		kvmppc_book3s_queue_irqprio(icp->vcpu,
+					    BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+		if (!change_self)
+			kvm_vcpu_kick(icp->vcpu);
+	}
+ bail:
+	return success;
 }
 
-static void icp_deliver_irq(struct kvmppc_xics *xics,
-			    struct kvmppc_icp *icp,
-			    struct kvmppc_ics *ics, u16 src)
+static void icp_check_resend(struct kvmppc_xics *xics,
+			     struct kvmppc_icp *icp)
 {
-	struct ics_irq_state state_copy;
-
-	mutex_lock(&icp->lock);
-
-	/* Snapshot irq state */
-	mutex_lock(&ics->lock);
-	state_copy = ics->irq_state[src];
+	u32 buid;
+	
+	/* Order this load with the test for need_resend in the caller */
+	smp_rmb();
+	for_each_set_bit(buid, icp->resend_map, xics->max_buid + 1) {
+		struct kvmppc_ics *ics = xics->ics[buid - 1];
 
-	if (state_copy.priority > icp->current_priority) {
-		/* CPU is not interested in us */
-		__ics_reject_irq(icp, ics, src);
-		mutex_unlock(&ics->lock);
-		goto out;
+		if (!test_and_clear_bit(buid, icp->resend_map))
+			continue;
+		if (!ics)
+			continue;
+		ics_check_resend(xics, ics, icp);
 	}
+}
 
-	if (icp->pending_irq) {
-		/* An interrupt is pending */
-		if (icp->pending_priority <= state_copy.priority) {
-			/* pending irq is equally or more favoured */
-			__ics_reject_irq(icp, ics, src);
-			mutex_unlock(&ics->lock);
-			goto out;
+static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
+			       u32 *reject)
+{
+	union kvmppc_icp_state old_state, new_state;
+	bool success;
+
+	XICS_DBG("try deliver %#x(P:%#x) to server %#x\n", irq, priority,
+		 icp->vcpu->vcpu_id);
+
+	do {
+		old_state = new_state = ACCESS_ONCE(icp->state);
+
+		*reject = 0;
+
+		/* See if we can deliver */
+		success = new_state.cppr > priority &&
+			new_state.mfrr > priority &&
+			new_state.pending_pri > priority;
+
+		/*
+		 * If we can, check for a rejection and perform the
+		 * delivery
+		 */
+		if (success) {
+			*reject = new_state.xisr;
+			new_state.xisr = irq;
+			new_state.pending_pri = priority;
+		} else {
+			/*
+			 * If we failed to deliver we set need_resend
+			 * so a subsequent CPPR state change causes us
+			 * to try a new delivery.
+			 */
+			new_state.need_resend = true;
 		}
-	}
-	mutex_unlock(&ics->lock);
-
-	/* We are more favoured, reject pending irq */
-	if (icp->pending_irq)
-		ics_reject_irq(xics, icp, icp->pending_irq);
-
-	icp->pending_irq = state_copy.number;
-	icp->pending_priority = state_copy.priority;
 
-	XICS_DBG("irq 0x%06x pending on %d prio %#x\n",
-		     state_copy.number, state_copy.server, state_copy.priority);
+	} while(!icp_try_update(icp, old_state, new_state, false));
 
-	icp_external_interrupt(icp);
-
-out:
-	mutex_unlock(&icp->lock);
+	return success;
 }
 
-static void icp_check_ipi(struct kvmppc_xics *xics, struct kvmppc_icp *icp)
+static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+			    u32 new_irq)
 {
-	lockdep_assert_held(&icp->lock);
-
-	if (icp->mfrr >= icp->current_priority)
+	struct ics_irq_state *state;
+	struct kvmppc_ics *ics;
+	u32 reject;
+	u16 src;	
+
+	/*
+	 * This is used both for initial delivery of an interrupt and
+	 * for subsequent rejection.
+	 *
+	 * Rejection can be racy vs. resends. We have evaluated the
+	 * rejection in an atomic ICP transaction which is now complete,
+	 * so potentially the ICP can already accept the interrupt again.
+	 *
+	 * So we need to retry the delivery. Essentially the reject path
+	 * boils down to a failed delivery. Always.
+	 *
+	 * Now the interrupt could also have moved to a different target,
+	 * thus we may need to re-do the ICP lookup as well
+	 */
+	 
+ again:
+	/* Get the ICS state and lock it */
+	ics = kvmppc_xics_find_ics(xics, new_irq, &src);
+	if (!ics) {
+		XICS_DBG("icp_deliver_irq: IRQ 0x%06x not found !\n", new_irq);
 		return;
+	}
+	state = &ics->irq_state[src];
 
-	XICS_DBG("cpu %d can take IPI mfrr=%#x\n",
-		     icp->vcpu->vcpu_id, icp->mfrr);
+	/* Get a lock on the ICS */
+	mutex_lock(&ics->lock);
 
-	if (icp->pending_irq) {
-		/* IPI is less favoured */
-		if (icp->pending_priority <= icp->mfrr) {
-			XICS_DBG("ODD: pending_prio=%#x pending_irq=%#x\n",
-				     icp->pending_priority, icp->pending_irq);
-			return;
+	/* Get our server */
+	if (!icp || state->server != icp->vcpu->vcpu_id) {
+		icp = kvmppc_xics_find_server(xics->kvm, state->server);
+		if (!icp) {
+			pr_warning("icp_deliver_irq: IRQ 0x%06x server 0x%x"
+				   " not found !\n", new_irq, state->server);
+			goto out;
 		}
+	}
 
-		/* IPI is more favoured, reject the other interrupt */
-		ics_reject_irq(xics, icp, icp->pending_irq);
+	/* Clear the resend bit of that interrupt */
+	state->resend = 0;
+
+	/*
+	 * If masked, bail out
+	 *
+	 * Note: PAPR doesn't mention anything about masked pending
+	 * when doing a resend, only when doing a delivery.
+	 *
+	 * However that would have the effect of losing a masked
+	 * interrupt that was rejected and isn't consistent with
+	 * the whole masked_pending business which is about not
+	 * losing interrupts that occur while masked.
+	 *
+	 * I don't differenciate normal deliveries and resends, this
+	 * implementation will differ from PAPR and not lose such
+	 * interrupts.
+	 */
+	if (state->priority == MASKED) {
+		XICS_DBG("irq %#x masked pending\n", new_irq);
+		state->masked_pending = 1;
+		goto out;
 	}
 
-	icp->pending_irq = XICS_IPI;
-	icp->pending_priority = icp->mfrr;
-	icp_external_interrupt(icp);
+	/*
+	 * Try the delivery, this will set the need_resend flag
+	 * in the ICP as part of the atomic transaction if the
+	 * delivery is not possible.
+	 *
+	 * Note that if successful, the new delivery might have itself
+	 * rejected an interrupt that was "delivered" before we took the
+	 * icp mutex.
+	 *
+	 * In this case we do the whole sequence all over again for the
+	 * new guy. We cannot assume that the rejected interrupt is less
+	 * favored than the new one, and thus doesn't need to be delivered,
+	 * because by the time we exit icp_try_to_deliver() the target
+	 * processor may well have alrady consumed & completed it, and thus
+	 * the rejected interrupt might actually be already acceptable.
+	 */
+	if (icp_try_to_deliver(icp, new_irq, state->priority, &reject)) {
+		/*
+		 * Delivery was successful, did we reject somebody else ?
+		 */
+		if (reject && reject != XICS_IPI) {
+			mutex_unlock(&ics->lock);
+			new_irq = reject;
+			goto again;
+		}
+	} else {
+		/*
+		 * We failed to deliver the interrupt we need to set the
+		 * resend map bit and mark the ICS state as needing a resend
+		 */
+		set_bit(ics->buid, icp->resend_map);
+		state->resend = 1;
+
+		/*
+		 * If the need_resend flag got cleared in the ICP some time
+		 * between icp_try_to_deliver() atomic update and now, then
+		 * we know it might have missed the resend_map bit. So we
+		 * retry
+		 */
+		smp_mb();
+		if (!icp->state.need_resend) {
+			mutex_unlock(&ics->lock);
+			goto again;
+		}
+	}
+ out:
+	mutex_unlock(&ics->lock);
 }
 
-static u32 icp_accept(struct kvm_vcpu *vcpu, struct kvmppc_icp *icp)
+static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+			  u8 new_cppr)
 {
-	u32 xirr;
-
-	mutex_lock(&icp->lock);
-
-	kvmppc_core_dequeue_external(vcpu);
-
-	/* The XIRR is the pending interrupt & current priority */
-	xirr = icp->pending_irq | (icp->current_priority << 24);
-
-	/* The pending priority becomes current */
-	icp->current_priority = icp->pending_priority;
+	union kvmppc_icp_state old_state, new_state;
+	bool resend;
+
+	/*
+	 * This handles several related states in one operation:
+	 *
+	 * ICP State: Down_CPPR
+	 *
+	 * Load CPPR with new value and if the XISR is 0
+	 * then check for resends:
+	 *
+	 * ICP State: Resend
+	 *
+	 * If MFRR is more favored than CPPR, check for IPIs
+	 * and notify ICS of a potential resend. This is done
+	 * asynchronously (when used in real mode, we will have
+	 * to exit here).
+	 *
+	 * We do not handle the complete Check_IPI as documented
+	 * here. In the PAPR, this state will be used for both
+	 * Set_MFRR and Down_CPPR. However, we know that we aren't
+	 * changing the MFRR state here so we don't need to handle
+	 * the case of an MFRR causing a reject of a pending irq,
+	 * this will have been handled when the MFRR was set in the
+	 * first place.
+	 *
+	 * Thus we don't have to handle rejects, only resends.
+	 *
+	 * When implementing real mode for HV KVM, resend will lead to
+	 * a H_TOO_HARD return and the whole transaction will be handled
+	 * in virtual mode.
+	 */
+	do {
+		old_state = new_state = ACCESS_ONCE(icp->state);
+
+		/* Down_CPPR */
+		new_state.cppr = new_cppr;
+
+		/*
+		 * Cut down Resend / Check_IPI / IPI
+		 *
+		 * The logic is that we cannot have a pending interrupt
+		 * trumped by an IPI at this point (see above), so we
+		 * know that either the pending interrupt is already an
+		 * IPI (in which case we don't care to override it) or
+		 * it's either more favored than us or non existent
+		 */
+		if (new_state.mfrr < new_cppr &&
+		    new_state.mfrr <= new_state.pending_pri) {
+			WARN_ON(new_state.xisr != XICS_IPI &&
+				new_state.xisr != 0);
+			new_state.pending_pri = new_state.mfrr;
+			new_state.xisr = XICS_IPI;
+		}
 
-	/* Clear the pending interrupt */
-	icp->pending_irq = 0;
+		/* Latch/clear resend bit */
+		resend = new_state.need_resend;
+		new_state.need_resend = 0;
 
-	mutex_unlock(&icp->lock);
+	} while(!icp_try_update(icp, old_state, new_state, true));
 
-	return xirr;
+	/*
+	 * Now handle resend checks. Those are asynchronous to the ICP
+	 * state update in HW (ie bus transactions) so we can handle them
+	 * separately here too
+	 */
+	if (resend)
+		icp_check_resend(xics, icp);
 }
 
-static unsigned long h_xirr(struct kvm_vcpu *vcpu)
+static noinline unsigned long h_xirr(struct kvm_vcpu *vcpu)
 {
+	union kvmppc_icp_state old_state, new_state;
 	struct kvmppc_icp *icp = vcpu->arch.icp;
 	u32 xirr;
 
-	xirr = icp_accept(vcpu, icp);
+	/* First, remove EE from the processor */
+	kvmppc_book3s_dequeue_irqprio(icp->vcpu,
+				      BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+
+	/*
+	 * ICP State: Accept_Interrupt
+	 *
+	 * Return the pending interrupt (if any) along with the
+	 * current CPPR, then clear the XISR & set CPPR to the
+	 * pending priority
+	 */
+	do {
+		old_state = new_state = ACCESS_ONCE(icp->state);
+
+		xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
+		new_state.cppr = new_state.pending_pri;
+		new_state.pending_pri = 0xff;
+		new_state.xisr = 0;
+
+	} while(!icp_try_update(icp, old_state, new_state, true));
 
 	XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr);
 
 	return xirr;
 }
 
-static int h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
-		 unsigned long mfrr)
+static noinline int h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+			  unsigned long mfrr)
 {
+        union kvmppc_icp_state old_state, new_state;
 	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
 	struct kvmppc_icp *icp;
+	u32 reject;
+	bool local;
 
 	XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n",
 			vcpu->vcpu_id, server, mfrr);
 
-	icp = kvmppc_xics_find_server(vcpu->kvm, server);
+	local = vcpu->vcpu_id == server;
+	if (local)
+		icp = vcpu->arch.icp;
+	else
+		icp = kvmppc_xics_find_server(vcpu->kvm, server);
 	if (!icp)
 		return H_PARAMETER;
 
-	mutex_lock(&icp->lock);
+	/*
+	 * ICP state: Set_MFRR
+	 *
+	 * If the CPPR is more favored than the new MFRR, then
+	 * nothing needs to be done as there can be no XISR to
+	 * reject.
+	 *
+	 * If the CPPR is less favored, then we might be replacing
+	 * an interrupt, and thus need to possibly reject it as in
+	 *
+	 * ICP state: Check_IPI
+	 */
+	do {
+		old_state = new_state = ACCESS_ONCE(icp->state);
+
+		/* Set_MFRR */
+		new_state.mfrr = mfrr;
+
+		/* Check_IPI */
+		reject = 0;
+		if (mfrr < new_state.cppr) {
+			/* Reject a pending interrupt if not an IPI */
+			if (mfrr <= new_state.pending_pri)
+				reject = new_state.xisr;
+			new_state.pending_pri = mfrr;
+			new_state.xisr = XICS_IPI;
+		}
 
-	icp->mfrr = mfrr;
-	icp_check_ipi(xics, icp);
+	} while(!icp_try_update(icp, old_state, new_state, local));
 
-	mutex_unlock(&icp->lock);
+	/* Handle reject */
+	if (reject && reject != XICS_IPI)
+		icp_deliver_irq(xics, icp, reject);
+		
 
 	return H_SUCCESS;
 }
 
-static void h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+static noinline void h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
 {
+	union kvmppc_icp_state old_state, new_state;
 	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
 	struct kvmppc_icp *icp = vcpu->arch.icp;
-	u8 old_priority;
-	bool check_resend = false;
+	u32 reject;
 
 	XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr);
 
-	mutex_lock(&icp->lock);
-
-	old_priority = icp->current_priority;
-	icp->current_priority = cppr;
+	/*
+	 * ICP State: Set_CPPR
+	 *
+	 * We can safely compare the new value with the current
+	 * value outside of the transaction as the CPPR is only
+	 * ever changed by the processor on itself
+	 */
+	if (cppr > icp->state.cppr)
+		icp_down_cppr(xics, icp, cppr);
+	else if (cppr == icp->state.cppr)
+		return;
 
-	if (icp->pending_irq &&
-	    icp->current_priority < icp->pending_priority) {
-		u32 pending = icp->pending_irq;
-		/* Pending irq is less favoured than our new priority */
-		icp->pending_irq = 0;
-		kvmppc_core_dequeue_external(vcpu);
-		ics_reject_irq(xics, icp, pending);
-	}
+	/*
+	 * ICP State: Up_CPPR
+	 *
+	 * The processor is raising its priority, this can result
+	 * in a rejection of a pending interrupt:
+	 *
+	 * ICP State: Reject_Current
+	 *
+	 * We can remove EE from the current processor, the update
+	 * transaction will set it again if needed
+	 */
+	kvmppc_book3s_dequeue_irqprio(icp->vcpu,
+				      BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+
+	do {
+		old_state = new_state = ACCESS_ONCE(icp->state);
+
+		reject = 0;
+		new_state.cppr = cppr;
+
+		if (cppr <= new_state.pending_pri) {
+			reject = new_state.xisr;
+			new_state.xisr = 0;
+			new_state.pending_pri = 0xff;
+		}
 
-	/* Check if there is anything we can accept now */
-	if (!icp->pending_irq)
-		icp_check_ipi(xics, icp);
-	if (!icp->pending_irq && icp->need_resend) {
-		check_resend = true;
-		icp->need_resend = false;
-	}
-	
-	mutex_unlock(&icp->lock);
+	} while(!icp_try_update(icp, old_state, new_state, true));
 
-	if (check_resend)
-		icp_check_resend(xics, icp);
+	/*
+	 * Check for rejects. They are handled by doing a new delivery
+	 * attempt (see comments in icp_deliver_irq).
+	 */
+	if (reject && reject != XICS_IPI)
+		icp_deliver_irq(xics, icp, reject);
 }
 
-static void h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+static noinline int h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
 {
 	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
 	struct kvmppc_icp *icp = vcpu->arch.icp;
-	bool check_resend = false;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *state;
+	u32 irq = xirr & 0x00ffffff;
+	u16 src;
 
 	XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr);
 
-	mutex_lock(&icp->lock);
-
-	icp->current_priority = xirr >> 24;
-
-	/* If nothing is pending since accept, check for an IPI */
-	if (!icp->pending_irq)
-		icp_check_ipi(xics, icp);
-
-	if (!icp->pending_irq && icp->need_resend) {
-		check_resend = true;
-		icp->need_resend = false;
+	/*
+	 * ICP State: EOI
+	 *
+	 * Note: If EOI is incorrectly used by SW to lower the CPPR
+	 * value (ie more favored), we do not check for rejection of
+	 * a pending interrupt, this is a SW error and PAPR sepcifies
+	 * that we don't have to deal with it.
+	 *
+	 * The sending of an EOI to the ICS is handled after the
+	 * CPPR update
+	 *
+	 * ICP State: Down_CPPR which we handle
+	 * in a separate function as it's shared with H_CPPR.
+	 */
+	icp_down_cppr(xics, icp, xirr >> 24);
+
+	/* IPIs have no EOI */
+	if (irq == XICS_IPI)
+		return H_SUCCESS;
+	/*
+	 * EOI handling: If the interrupt is still asserted, we need to
+	 * resend it. We can take a lockless "peek" at the ICS state here.
+	 *
+	 * "Message" interrupts will never have "asserted" set
+	 */
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics) {
+		XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq);
+		return H_PARAMETER;
 	}
+	state = &ics->irq_state[src];
 
-	ics_eoi(xics, icp, xirr & 0xFFFFFF);
-
-	mutex_unlock(&icp->lock);
+	/* Still asserted, resend it */
+	if (state->asserted)
+		icp_deliver_irq(xics, icp, irq);
 
-	if (check_resend)
-		icp_check_resend(xics, icp);
+	return H_SUCCESS;
 }
 
 int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
@@ -576,7 +789,7 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
 		h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
 		break;
 	case H_EOI:
-		h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
+		rc = h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
 		break;
 	case H_IPI:
 		rc = h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
@@ -604,18 +817,16 @@ static int xics_debug_show(struct seq_file *m, void *private)
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		struct kvmppc_icp *icp = vcpu->arch.icp;
+		union kvmppc_icp_state state;
 
 		if (!icp)
 			continue;
 
-		mutex_lock(&icp->lock);
-
-		seq_printf(m, "cpu server %#x pending %#x pending prio %#x cppr %#x "
-			   "mfrr %#x\n", vcpu->vcpu_id, icp->pending_irq,
-			   icp->pending_priority, icp->current_priority,
-			   icp->mfrr);
-
-		mutex_unlock(&icp->lock);
+		state.raw = ACCESS_ONCE(icp->state.raw);
+		seq_printf(m, "cpu server %#x XIRR:%#x PPRI:%#x CPPR:%#x "
+			   "MFRR:%#x OUT:%d NR:%d\n", vcpu->vcpu_id, state.xisr,
+			   state.pending_pri, state.cppr, state.mfrr,
+			   state.out_ee, state.need_resend);
 	}
 
 	for (buid = 1; buid <= KVMPPC_XICS_MAX_BUID; buid++) {
@@ -624,18 +835,20 @@ static int xics_debug_show(struct seq_file *m, void *private)
 		if (!ics)
 			continue;
 
-		seq_printf(m, "=========\nICS state for BUID 0x%x\n=========\n", buid);
+		seq_printf(m, "=========\nICS state for BUID 0x%x\n=========\n",
+			   buid);
 
 		mutex_lock(&ics->lock);
 
 		for (i = 0; i < ics->nr_irqs; i++) {
 			struct ics_irq_state *irq = &ics->irq_state[i];
 
-			seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x "
-				   "asserted %d resend %d masked pending %d\n",
+			seq_printf(m, "irq 0x%06x: server %#x prio %#x save"
+				   " prio %#x asserted %d resend %d masked"
+				   " pending %d\n",
 				   irq->number, irq->server, irq->priority,
-				   irq->saved_priority, irq->asserted, irq->resend,
-				   irq->masked_pending);
+				   irq->saved_priority, irq->asserted,
+				   irq->resend, irq->masked_pending);
 
 		}
 		mutex_unlock(&ics->lock);
@@ -672,14 +885,16 @@ static void xics_debugfs_init(struct kvmppc_xics *xics)
 	kfree(name);
 }
 
-static int kvmppc_xics_create_ics(struct kvmppc_xics *xics, u16 buid, u16 nr_irq)
+static int kvmppc_xics_create_ics(struct kvmppc_xics *xics, u16 buid,
+				  u16 nr_irq)
 {
 	struct kvmppc_ics *ics;
 	int i, size;
 
 
 	/* Create the ICS */
-	size = sizeof(struct kvmppc_ics) + sizeof(struct ics_irq_state) * nr_irqs;
+	size = sizeof(struct kvmppc_ics) +
+		sizeof(struct ics_irq_state) * nr_irqs;
 	ics = kzalloc(size, GFP_KERNEL);
 	if (!ics)
 		return -ENOMEM;
@@ -710,9 +925,9 @@ int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu)
 	if (!icp)
 		return -ENOMEM;
 
-	mutex_init(&icp->lock);
 	icp->vcpu = vcpu;
-	icp->mfrr = MASKED;
+	icp->state.mfrr = MASKED;
+	icp->state.pending_pri = MASKED;
 	vcpu->arch.icp = icp;
 
 	XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id);
@@ -848,6 +1063,8 @@ int kvmppc_xics_ioctl(struct kvm *kvm, unsigned ioctl, unsigned long arg)
 	void __user *argp = (void __user *)arg;
 	int rc;
 
+	BUILD_BUG_ON(sizeof(union kvmppc_icp_state) != sizeof(unsigned long));
+
 	switch (ioctl) {
 	case KVM_CREATE_IRQCHIP: {
 		struct kvm_irqchip_args args;
