diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 82e3e8f..e85d015 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -18,7 +18,11 @@ typedef struct {
 #ifdef CONFIG_SMP
 	unsigned int irq_resched_count;
 	unsigned int irq_call_count;
+#ifndef CONFIG_XEN
 	unsigned int irq_tlb_count;
+#else
+	unsigned int irq_lock_count;
+#endif
 #endif
 #ifdef CONFIG_X86_MCE
 	unsigned int irq_thermal_count;
diff --git a/arch/x86/include/mach-xen/asm/hypervisor.h b/arch/x86/include/mach-xen/asm/hypervisor.h
index edd1b82..fb728c6 100644
--- a/arch/x86/include/mach-xen/asm/hypervisor.h
+++ b/arch/x86/include/mach-xen/asm/hypervisor.h
@@ -86,6 +86,8 @@ extern start_info_t *xen_start_info;
 #define init_hypervisor(c) ((void)((c)->x86_hyper_vendor = X86_HYPER_VENDOR_XEN))
 #define init_hypervisor_platform() init_hypervisor(&boot_cpu_data)
 
+struct vcpu_runstate_info *setup_runstate_area(unsigned int cpu);
+
 /* arch/xen/kernel/evtchn.c */
 /* Force a proper event-channel callback from Xen. */
 void force_evtchn_callback(void);
diff --git a/arch/x86/include/mach-xen/asm/irq_vectors.h b/arch/x86/include/mach-xen/asm/irq_vectors.h
index 8d696f5..8e18c9a 100644
--- a/arch/x86/include/mach-xen/asm/irq_vectors.h
+++ b/arch/x86/include/mach-xen/asm/irq_vectors.h
@@ -13,9 +13,8 @@
 #define RESCHEDULE_VECTOR		0
 #define CALL_FUNCTION_VECTOR		1
 #define CALL_FUNC_SINGLE_VECTOR		2
-#define SPIN_UNLOCK_VECTOR		3
-#define REBOOT_VECTOR			4
-#define NR_IPIS				5
+#define REBOOT_VECTOR			3
+#define NR_IPIS				4
 
 /*
  * The maximum number of vectors supported by i386 processors
diff --git a/arch/x86/include/mach-xen/asm/spinlock.h b/arch/x86/include/mach-xen/asm/spinlock.h
index 336b1a6..7b8548d 100644
--- a/arch/x86/include/mach-xen/asm/spinlock.h
+++ b/arch/x86/include/mach-xen/asm/spinlock.h
@@ -38,13 +38,20 @@
 # define UNLOCK_LOCK_PREFIX
 #endif
 
+#ifdef TICKET_SHIFT
+
+#include <asm/irqflags.h>
+#include <asm/smp-processor-id.h>
+#include <xen/interface/vcpu.h>
+
+DECLARE_PER_CPU(struct vcpu_runstate_info, runstate);
+
 int xen_spinlock_init(unsigned int cpu);
 void xen_spinlock_cleanup(unsigned int cpu);
-extern int xen_spin_wait(raw_spinlock_t *, unsigned int token);
-extern int xen_spin_wait_flags(raw_spinlock_t *, unsigned int *token,
-			       unsigned int flags);
-extern unsigned int xen_spin_adjust(raw_spinlock_t *, unsigned int token);
-extern void xen_spin_kick(raw_spinlock_t *, unsigned int token);
+bool xen_spin_wait(raw_spinlock_t *, unsigned int *token,
+		   unsigned int flags);
+unsigned int xen_spin_adjust(const raw_spinlock_t *, unsigned int token);
+void xen_spin_kick(raw_spinlock_t *, unsigned int token);
 
 /*
  * Ticket locks are conceptually two parts, one indicating the current head of
@@ -63,8 +70,7 @@ extern void xen_spin_kick(raw_spinlock_t *, unsigned int token);
  * save some instructions and make the code more elegant. There really isn't
  * much between them in performance though, especially as locks are out of line.
  */
-#if (NR_CPUS < 256)
-#define TICKET_SHIFT 8
+#if TICKET_SHIFT == 8
 #define __ticket_spin_lock_preamble \
 	asm(LOCK_PREFIX "xaddw %w0, %2\n\t" \
 	    "cmpb %h0, %b0\n\t" \
@@ -86,7 +92,14 @@ extern void xen_spin_kick(raw_spinlock_t *, unsigned int token);
 	    : "+Q" (token), "+g" (count) \
 	    : "m" (lock->slock) \
 	    : "memory", "cc")
-
+#define __ticket_spin_unlock_body \
+	asm(UNLOCK_LOCK_PREFIX "incb %2\n\t" \
+	    "movzwl %2, %0\n\t" \
+	    "cmpb %h0, %b0\n\t" \
+	    "setne %1" \
+	    : "=&Q" (token), "=qm" (kick), "+m" (lock->slock) \
+	    : \
+	    : "memory", "cc")
 
 static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
 {
@@ -104,26 +117,12 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
 	    :
 	    : "memory", "cc");
 
-	return tmp;
-}
-
-static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
-{
-	unsigned int token;
-	unsigned char kick;
+	if (tmp)
+		lock->owner = raw_smp_processor_id();
 
-	asm(UNLOCK_LOCK_PREFIX "incb %2\n\t"
-	    "movzwl %2, %0\n\t"
-	    "cmpb %h0, %b0\n\t"
-	    "setne %1"
-	    : "=&Q" (token), "=qm" (kick), "+m" (lock->slock)
-	    :
-	    : "memory", "cc");
-	if (kick)
-		xen_spin_kick(lock, token);
+	return tmp;
 }
-#else
-#define TICKET_SHIFT 16
+#elif TICKET_SHIFT == 16
 #define __ticket_spin_lock_preamble \
 	do { \
 		unsigned int tmp; \
@@ -154,6 +153,19 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
 		    : "m" (lock->slock) \
 		    : "memory", "cc"); \
 	} while (0)
+#define __ticket_spin_unlock_body \
+	do { \
+		unsigned int tmp; \
+		asm(UNLOCK_LOCK_PREFIX "incw %2\n\t" \
+		    "movl %2, %0\n\t" \
+		    "shldl $16, %0, %3\n\t" \
+		    "cmpw %w3, %w0\n\t" \
+		    "setne %1" \
+		    : "=&r" (token), "=qm" (kick), "+m" (lock->slock), \
+		      "=&r" (tmp) \
+		    : \
+		    : "memory", "cc"); \
+	} while (0)
 
 static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
 {
@@ -174,27 +186,17 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
 	    :
 	    : "memory", "cc");
 
-	return tmp;
-}
-
-static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
-{
-	unsigned int token, tmp;
-	bool kick;
+	if (tmp)
+		lock->owner = raw_smp_processor_id();
 
-	asm(UNLOCK_LOCK_PREFIX "incw %2\n\t"
-	    "movl %2, %0\n\t"
-	    "shldl $16, %0, %3\n\t"
-	    "cmpw %w3, %w0\n\t"
-	    "setne %1"
-	    : "=&r" (token), "=qm" (kick), "+m" (lock->slock), "=&r" (tmp)
-	    :
-	    : "memory", "cc");
-	if (kick)
-		xen_spin_kick(lock, token);
+	return tmp;
 }
 #endif
 
+#define __ticket_spin_count(lock) \
+	(per_cpu(runstate.state, (lock)->owner) == RUNSTATE_running \
+	 ? 1 << 10 : 2)
+
 static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
 {
 	int tmp = ACCESS_ONCE(lock->slock);
@@ -212,16 +214,22 @@ static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
 static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
 {
 	unsigned int token, count;
+	unsigned int flags = __raw_local_irq_save();
 	bool free;
 
 	__ticket_spin_lock_preamble;
 	if (likely(free))
-		return;
-	token = xen_spin_adjust(lock, token);
-	do {
-		count = 1 << 10;
-		__ticket_spin_lock_body;
-	} while (unlikely(!count) && !xen_spin_wait(lock, token));
+		raw_local_irq_restore(flags);
+	else {
+		token = xen_spin_adjust(lock, token);
+		raw_local_irq_restore(flags);
+		do {
+			count = __ticket_spin_count(lock);
+			__ticket_spin_lock_body;
+		} while (unlikely(!count)
+			 && !xen_spin_wait(lock, &token, flags));
+	}
+	lock->owner = raw_smp_processor_id();
 }
 
 static __always_inline void __ticket_spin_lock_flags(raw_spinlock_t *lock,
@@ -231,50 +239,123 @@ static __always_inline void __ticket_spin_lock_flags(raw_spinlock_t *lock,
 	bool free;
 
 	__ticket_spin_lock_preamble;
-	if (likely(free))
-		return;
-	token = xen_spin_adjust(lock, token);
-	do {
-		count = 1 << 10;
-		__ticket_spin_lock_body;
-	} while (unlikely(!count) && !xen_spin_wait_flags(lock, &token, flags));
+	if (unlikely(!free)) {
+		token = xen_spin_adjust(lock, token);
+		do {
+			count = __ticket_spin_count(lock);
+			__ticket_spin_lock_body;
+		} while (unlikely(!count)
+			 && !xen_spin_wait(lock, &token, flags));
+	}
+	lock->owner = raw_smp_processor_id();
+}
+
+static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
+{
+	unsigned int token;
+	bool kick;
+
+	__ticket_spin_unlock_body;
+	if (kick)
+		xen_spin_kick(lock, token);
 }
 
-#ifndef CONFIG_PARAVIRT_SPINLOCKS
+#ifndef XEN_SPINLOCK_SOURCE
+#undef __ticket_spin_lock_preamble
+#undef __ticket_spin_lock_body
+#undef __ticket_spin_unlock_body
+#undef __ticket_spin_count
+#endif
+
+#define __raw_spin(n) __ticket_spin_##n
+
+#else /* TICKET_SHIFT */
+
+static inline int xen_spinlock_init(unsigned int cpu) { return 0; }
+static inline void xen_spinlock_cleanup(unsigned int cpu) {}
+
+static inline int __byte_spin_is_locked(raw_spinlock_t *lock)
+{
+	return lock->lock != 0;
+}
+
+static inline int __byte_spin_is_contended(raw_spinlock_t *lock)
+{
+	return lock->spinners != 0;
+}
+
+static inline void __byte_spin_lock(raw_spinlock_t *lock)
+{
+	s8 val = 1;
+
+	asm("1: xchgb %1, %0\n"
+	    "   test %1,%1\n"
+	    "   jz 3f\n"
+	    "   " LOCK_PREFIX "incb %2\n"
+	    "2: rep;nop\n"
+	    "   cmpb $1, %0\n"
+	    "   je 2b\n"
+	    "   " LOCK_PREFIX "decb %2\n"
+	    "   jmp 1b\n"
+	    "3:"
+	    : "+m" (lock->lock), "+q" (val), "+m" (lock->spinners): : "memory");
+}
+
+#define __byte_spin_lock_flags(lock, flags) __byte_spin_lock(lock)
+
+static inline int __byte_spin_trylock(raw_spinlock_t *lock)
+{
+	u8 old = 1;
+
+	asm("xchgb %1,%0"
+	    : "+m" (lock->lock), "+q" (old) : : "memory");
+
+	return old == 0;
+}
+
+static inline void __byte_spin_unlock(raw_spinlock_t *lock)
+{
+	smp_wmb();
+	lock->lock = 0;
+}
+
+#define __raw_spin(n) __byte_spin_##n
+
+#endif /* TICKET_SHIFT */
 
 static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
 {
-	return __ticket_spin_is_locked(lock);
+	return __raw_spin(is_locked)(lock);
 }
 
 static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
 {
-	return __ticket_spin_is_contended(lock);
+	return __raw_spin(is_contended)(lock);
 }
 #define __raw_spin_is_contended	__raw_spin_is_contended
 
 static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
 {
-	__ticket_spin_lock(lock);
+	__raw_spin(lock)(lock);
 }
 
 static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
 {
-	return __ticket_spin_trylock(lock);
+	return __raw_spin(trylock)(lock);
 }
 
 static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
 {
-	__ticket_spin_unlock(lock);
+	__raw_spin(unlock)(lock);
 }
 
 static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
 						  unsigned long flags)
 {
-	__ticket_spin_lock_flags(lock, flags);
+	__raw_spin(lock_flags)(lock, flags);
 }
 
-#endif	/* CONFIG_PARAVIRT_SPINLOCKS */
+#undef __raw_spin
 
 static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
 {
diff --git a/arch/x86/include/mach-xen/asm/spinlock_types.h b/arch/x86/include/mach-xen/asm/spinlock_types.h
new file mode 100644
index 0000000..7d01ae1
--- /dev/null
+++ b/arch/x86/include/mach-xen/asm/spinlock_types.h
@@ -0,0 +1,60 @@
+#ifndef _ASM_X86_SPINLOCK_TYPES_H
+#define _ASM_X86_SPINLOCK_TYPES_H
+
+#ifndef __LINUX_SPINLOCK_TYPES_H
+# error "please don't include this file directly"
+#endif
+
+#include <asm/types.h>
+
+typedef union {
+	unsigned int slock;
+	struct {
+/*
+ * Xen versions prior to 3.2.x have a race condition with HYPERVISOR_poll().
+ */
+#if CONFIG_XEN_COMPAT >= 0x030200
+/*
+ * On Xen we support a single level of interrupt re-enabling per lock. Hence
+ * we can have twice as many outstanding tickets. Thus the cut-off for using
+ * byte register pairs must be at half the number of CPUs.
+ */
+#if 2 * CONFIG_NR_CPUS < 256
+# define TICKET_SHIFT 8
+		u8 cur, seq;
+#else
+# define TICKET_SHIFT 16
+		u16 cur, seq;
+#endif
+#if CONFIG_NR_CPUS <= 256
+		u8 owner;
+#else
+		u16 owner;
+#endif
+#else
+/*
+ * This differs from the pre-2.6.24 spinlock by always using xchgb
+ * rather than decb to take the lock; this allows it to use a
+ * zero-initialized lock structure.  It also maintains a 1-byte
+ * contention counter, so that we can implement
+ * __byte_spin_is_contended.
+ */
+		u8 lock;
+#if CONFIG_NR_CPUS < 256
+		u8 spinners;
+#else
+# error NR_CPUS >= 256 not implemented
+#endif
+#endif
+	};
+} raw_spinlock_t;
+
+#define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
+
+typedef struct {
+	unsigned int lock;
+} raw_rwlock_t;
+
+#define __RAW_RW_LOCK_UNLOCKED		{ RW_LOCK_BIAS }
+
+#endif /* _ASM_X86_SPINLOCK_TYPES_H */
diff --git a/arch/x86/kernel/irq-xen.c b/arch/x86/kernel/irq-xen.c
index cc77647..a3dbf16 100644
--- a/arch/x86/kernel/irq-xen.c
+++ b/arch/x86/kernel/irq-xen.c
@@ -96,6 +96,11 @@ static int show_other_interrupts(struct seq_file *p, int prec)
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
 	seq_printf(p, "  TLB shootdowns\n");
+#else
+	seq_printf(p, "LCK: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->irq_lock_count);
+	seq_printf(p, "  Spinlock wakeups\n");
 #endif
 #endif
 #ifdef CONFIG_X86_THERMAL_VECTOR
diff --git a/arch/x86/kernel/time-xen.c b/arch/x86/kernel/time-xen.c
index 7bab3e4..3e9b37b 100644
--- a/arch/x86/kernel/time-xen.c
+++ b/arch/x86/kernel/time-xen.c
@@ -65,7 +65,7 @@ static DEFINE_PER_CPU(u64, processed_stolen_time);
 static DEFINE_PER_CPU(u64, processed_blocked_time);
 
 /* Current runstate of each CPU (updated automatically by the hypervisor). */
-static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
+DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
 
 /* Must be signed, as it's compared with s64 quantities which can be -ve. */
 #define NS_PER_TICK (1000000000LL/HZ)
@@ -550,15 +550,7 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable);
 
 static void init_missing_ticks_accounting(unsigned int cpu)
 {
-	struct vcpu_register_runstate_memory_area area;
-	struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
-	int rc;
-
-	memset(runstate, 0, sizeof(*runstate));
-
-	area.addr.v = runstate;
-	rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area);
-	WARN_ON(rc && rc != -ENOSYS);
+	struct vcpu_runstate_info *runstate = setup_runstate_area(cpu);
 
 	per_cpu(processed_blocked_time, cpu) =
 		runstate->time[RUNSTATE_blocked];
@@ -643,6 +635,23 @@ static struct clocksource clocksource_xen = {
 	.resume			= xen_clocksource_resume,
 };
 
+struct vcpu_runstate_info *setup_runstate_area(unsigned int cpu)
+{
+	struct vcpu_register_runstate_memory_area area;
+	struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
+	int rc;
+
+	set_xen_guest_handle(area.addr.h, runstate);
+	rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area);
+	if (rc) {
+		BUILD_BUG_ON(RUNSTATE_running);
+		memset(runstate, 0, sizeof(*runstate));
+		WARN_ON(rc != -ENOSYS);
+	}
+
+	return runstate;
+}
+
 void xen_read_persistent_clock(struct timespec *ts)
 {
 	const shared_info_t *s = HYPERVISOR_shared_info;
diff --git a/drivers/xen/core/evtchn.c b/drivers/xen/core/evtchn.c
index 421aa86..25e1199 100644
--- a/drivers/xen/core/evtchn.c
+++ b/drivers/xen/core/evtchn.c
@@ -1414,43 +1414,6 @@ void disable_all_local_evtchn(void)
 			synch_set_bit(i, &s->evtchn_mask[0]);
 }
 
-/* Clear an irq's pending state, in preparation for polling on it. */
-void xen_clear_irq_pending(int irq)
-{
-	int evtchn = evtchn_from_irq(irq);
-
-	if (VALID_EVTCHN(evtchn))
-		clear_evtchn(evtchn);
-}
-
-/* Set an irq's pending state, to avoid blocking on it. */
-void xen_set_irq_pending(int irq)
-{
-	int evtchn = evtchn_from_irq(irq);
-
-	if (VALID_EVTCHN(evtchn))
-		set_evtchn(evtchn);
-}
-
-/* Test an irq's pending state. */
-int xen_test_irq_pending(int irq)
-{
-	int evtchn = evtchn_from_irq(irq);
-
-	return VALID_EVTCHN(evtchn) && test_evtchn(evtchn);
-}
-
-/* Poll waiting for an irq to become pending.  In the usual case, the
-   irq will be disabled so it won't deliver an interrupt. */
-void xen_poll_irq(int irq)
-{
-	evtchn_port_t evtchn = evtchn_from_irq(irq);
-
-	if (VALID_EVTCHN(evtchn)
-	    && HYPERVISOR_poll_no_timeout(&evtchn, 1))
-		BUG();
-}
-
 #ifdef CONFIG_PM_SLEEP
 static void restore_cpu_virqs(unsigned int cpu)
 {
diff --git a/drivers/xen/core/spinlock.c b/drivers/xen/core/spinlock.c
index 07dda38..fc343a4 100644
--- a/drivers/xen/core/spinlock.c
+++ b/drivers/xen/core/spinlock.c
@@ -4,24 +4,24 @@
  *	See arch/x86/xen/smp.c for copyright and credits for derived
  *	portions of this file.
  */
+#define XEN_SPINLOCK_SOURCE
+#include <linux/spinlock_types.h>
+
+#ifdef TICKET_SHIFT
 
 #include <linux/init.h>
-#include <linux/irq.h>
 #include <linux/kernel.h>
-#include <linux/kernel_stat.h>
 #include <linux/module.h>
+#include <asm/hardirq.h>
 #include <xen/evtchn.h>
 
-#ifdef TICKET_SHIFT
-
-static int __read_mostly spinlock_irq = -1;
-
 struct spinning {
 	raw_spinlock_t *lock;
 	unsigned int ticket;
 	struct spinning *prev;
 };
 static DEFINE_PER_CPU(struct spinning *, spinning);
+static DEFINE_PER_CPU(evtchn_port_t, poll_evtchn);
 /*
  * Protect removal of objects: Addition can be done lockless, and even
  * removal itself doesn't need protection - what needs to be prevented is
@@ -31,98 +31,229 @@ static DEFINE_PER_CPU(raw_rwlock_t, spinning_rm_lock) = __RAW_RW_LOCK_UNLOCKED;
 
 int __cpuinit xen_spinlock_init(unsigned int cpu)
 {
-	static struct irqaction spinlock_action = {
-		.handler = smp_reschedule_interrupt,
-		.flags   = IRQF_DISABLED,
-		.name    = "spinlock"
-	};
+	struct evtchn_bind_ipi bind_ipi;
 	int rc;
 
-	rc = bind_ipi_to_irqaction(SPIN_UNLOCK_VECTOR,
-				   cpu,
-				   &spinlock_action);
- 	if (rc < 0)
- 		return rc;
+	setup_runstate_area(cpu);
 
-	if (spinlock_irq < 0) {
-		disable_irq(rc); /* make sure it's never delivered */
-		spinlock_irq = rc;
-	} else
-		BUG_ON(spinlock_irq != rc);
+ 	WARN_ON(per_cpu(poll_evtchn, cpu));
+	bind_ipi.vcpu = cpu;
+	rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi);
+	if (!rc)
+	 	per_cpu(poll_evtchn, cpu) = bind_ipi.port;
+	else
+		printk(KERN_WARNING
+		       "No spinlock poll event channel for CPU#%u (%d)\n",
+		       cpu, rc);
 
 	return 0;
 }
 
 void __cpuinit xen_spinlock_cleanup(unsigned int cpu)
 {
-	unbind_from_per_cpu_irq(spinlock_irq, cpu, NULL);
+	struct evtchn_close close;
+
+	close.port = per_cpu(poll_evtchn, cpu);
+ 	per_cpu(poll_evtchn, cpu) = 0;
+	WARN_ON(HYPERVISOR_event_channel_op(EVTCHNOP_close, &close));
 }
 
-int xen_spin_wait(raw_spinlock_t *lock, unsigned int token)
+#ifdef CONFIG_PM_SLEEP
+#include <linux/sysdev.h>
+
+static int __cpuinit spinlock_resume(struct sys_device *dev)
 {
-	int rc = 0, irq = spinlock_irq;
-	raw_rwlock_t *rm_lock;
-	unsigned long flags;
-	struct spinning spinning;
+	unsigned int cpu;
 
-	/* If kicker interrupt not initialized yet, just spin. */
-	if (unlikely(irq < 0) || unlikely(!cpu_online(raw_smp_processor_id())))
+	for_each_online_cpu(cpu) {
+		per_cpu(poll_evtchn, cpu) = 0;
+		xen_spinlock_init(cpu);
+	}
+
+	return 0;
+}
+
+static struct sysdev_class __cpuinitdata spinlock_sysclass = {
+	.name	= "spinlock",
+	.resume	= spinlock_resume
+};
+
+static struct sys_device __cpuinitdata device_spinlock = {
+	.id		= 0,
+	.cls		= &spinlock_sysclass
+};
+
+static int __init spinlock_register(void)
+{
+	int rc;
+
+	if (is_initial_xendomain())
 		return 0;
 
-	token >>= TICKET_SHIFT;
+	rc = sysdev_class_register(&spinlock_sysclass);
+	if (!rc)
+		rc = sysdev_register(&device_spinlock);
+	return rc;
+}
+core_initcall(spinlock_register);
+#endif
+
+static unsigned int spin_adjust(struct spinning *spinning,
+				const raw_spinlock_t *lock,
+				unsigned int token)
+{
+	for (; spinning; spinning = spinning->prev)
+		if (spinning->lock == lock) {
+			unsigned int ticket = spinning->ticket;
+
+			if (unlikely(!(ticket + 1)))
+				break;
+			spinning->ticket = token >> TICKET_SHIFT;
+			token = (token & ((1 << TICKET_SHIFT) - 1))
+				| (ticket << TICKET_SHIFT);
+			break;
+		}
+
+	return token;
+}
+
+unsigned int xen_spin_adjust(const raw_spinlock_t *lock, unsigned int token)
+{
+	return spin_adjust(percpu_read(spinning), lock, token);
+}
+
+bool xen_spin_wait(raw_spinlock_t *lock, unsigned int *ptok,
+                   unsigned int flags)
+{
+	unsigned int cpu = raw_smp_processor_id();
+	bool rc;
+	typeof(vcpu_info(0)->evtchn_upcall_mask) upcall_mask;
+	raw_rwlock_t *rm_lock;
+	struct spinning spinning, *other;
+
+	/* If kicker interrupt not initialized yet, just spin. */
+	if (unlikely(!cpu_online(cpu)) || unlikely(!percpu_read(poll_evtchn)))
+		return false;
 
 	/* announce we're spinning */
-	spinning.ticket = token;
+	spinning.ticket = *ptok >> TICKET_SHIFT;
 	spinning.lock = lock;
 	spinning.prev = percpu_read(spinning);
 	smp_wmb();
 	percpu_write(spinning, &spinning);
-
-	/* clear pending */
-	xen_clear_irq_pending(irq);
+	upcall_mask = vcpu_info_read(evtchn_upcall_mask);
 
 	do {
-		/* Check again to make sure it didn't become free while
-		 * we weren't looking. */
-		if ((lock->slock & ((1U << TICKET_SHIFT) - 1)) == token) {
-			/* If we interrupted another spinlock while it was
+		bool nested = false;
+
+		clear_evtchn(percpu_read(poll_evtchn));
+
+		/*
+		 * Check again to make sure it didn't become free while
+		 * we weren't looking.
+		 */
+		if (lock->cur == spinning.ticket) {
+			lock->owner = cpu;
+			/*
+			 * If we interrupted another spinlock while it was
 			 * blocking, make sure it doesn't block (again)
-			 * without rechecking the lock. */
+			 * without rechecking the lock.
+			 */
 			if (spinning.prev)
-				xen_set_irq_pending(irq);
-			rc = 1;
+				set_evtchn(percpu_read(poll_evtchn));
+			rc = true;
 			break;
 		}
 
-		/* block until irq becomes pending */
-		xen_poll_irq(irq);
-	} while (!xen_test_irq_pending(irq));
+		for (other = spinning.prev; other; other = other->prev) {
+			if (other->lock == lock)
+				nested = true;
+			else {
+				/*
+				 * Return the ticket if we now own the lock.
+				 * While just being desirable generally (to
+				 * reduce latency on other CPUs), this is
+				 * essential in the case where interrupts
+				 * get re-enabled below.
+				 * Try to get a new ticket right away (to
+				 * reduce latency after the current lock was
+				 * released), but don't acquire the lock.
+				 */
+				raw_spinlock_t *lock = other->lock;
 
-	/* Leave the irq pending so that any interrupted blocker will
-	 * re-check. */
-	if (!rc)
-		kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
+				raw_local_irq_disable();
+				while (lock->cur == other->ticket) {
+					unsigned int token;
+					bool kick, free;
+
+					other->ticket = -1;
+					__ticket_spin_unlock_body;
+					if (!kick)
+						break;
+					xen_spin_kick(lock, token);
+					__ticket_spin_lock_preamble;
+					if (!free)
+						token = spin_adjust(
+							other->prev, lock,
+							token);
+					other->ticket = token >> TICKET_SHIFT;
+					smp_mb();
+				}
+			}
+		}
+
+		/*
+		 * No need to use raw_local_irq_restore() here, as the
+		 * intended event processing will happen with the poll
+		 * call.
+		 */
+		vcpu_info_write(evtchn_upcall_mask,
+				nested ? upcall_mask : flags);
+
+		if (HYPERVISOR_poll_no_timeout(&__get_cpu_var(poll_evtchn), 1))
+			BUG();
+
+		vcpu_info_write(evtchn_upcall_mask, upcall_mask);
+
+		rc = !test_evtchn(percpu_read(poll_evtchn));
+		if (!rc)
+			inc_irq_stat(irq_lock_count);
+	} while (spinning.prev || rc);
+
+	/*
+	 * Leave the irq pending so that any interrupted blocker will
+	 * re-check.
+	 */
 
 	/* announce we're done */
-	percpu_write(spinning, spinning.prev);
+	other = spinning.prev;
+	percpu_write(spinning, other);
 	rm_lock = &__get_cpu_var(spinning_rm_lock);
-	raw_local_irq_save(flags);
+	raw_local_irq_disable();
 	__raw_write_lock(rm_lock);
 	__raw_write_unlock(rm_lock);
-	raw_local_irq_restore(flags);
+	*ptok = lock->cur | (spinning.ticket << TICKET_SHIFT);
 
-	return rc;
-}
+	/*
+	 * Obtain new tickets for (or acquire) all those locks where
+	 * above we avoided acquiring them.
+	 */
+	for (; other; other = other->prev)
+		if (!(other->ticket + 1)) {
+			unsigned int token;
+			bool free;
 
-unsigned int xen_spin_adjust(raw_spinlock_t *lock, unsigned int token)
-{
-	return token;//todo
-}
+			lock = other->lock;
+			__ticket_spin_lock_preamble;
+			if (!free)
+				token = spin_adjust(other->prev, lock, token);
+			other->ticket = token >> TICKET_SHIFT;
+			if (lock->cur == other->ticket)
+				lock->owner = cpu;
+		}
+	raw_local_irq_restore(upcall_mask);
 
-int xen_spin_wait_flags(raw_spinlock_t *lock, unsigned int *token,
-			  unsigned int flags)
-{
-	return xen_spin_wait(lock, *token);//todo
+	return rc;
 }
 
 void xen_spin_kick(raw_spinlock_t *lock, unsigned int token)
@@ -135,7 +266,7 @@ void xen_spin_kick(raw_spinlock_t *lock, unsigned int token)
 		unsigned long flags;
 		struct spinning *spinning;
 
-		if (cpu == raw_smp_processor_id() || !per_cpu(spinning, cpu))
+		if (cpu == raw_smp_processor_id())
 			continue;
 
 		rm_lock = &per_cpu(spinning_rm_lock, cpu);
@@ -144,15 +275,17 @@ void xen_spin_kick(raw_spinlock_t *lock, unsigned int token)
 
 		spinning = per_cpu(spinning, cpu);
 		smp_rmb();
-		if (spinning
-		    && (spinning->lock != lock || spinning->ticket != token))
-			spinning = NULL;
+		while (spinning) {
+			if (spinning->lock == lock && spinning->ticket == token)
+				break;
+			spinning = spinning->prev;
+		}
 
 		__raw_read_unlock(rm_lock);
 		raw_local_irq_restore(flags);
 
 		if (unlikely(spinning)) {
-			notify_remote_via_ipi(SPIN_UNLOCK_VECTOR, cpu);
+			notify_remote_via_evtchn(per_cpu(poll_evtchn, cpu));
 			return;
 		}
 	}
diff --git a/include/xen/events.h b/include/xen/events.h
index e68d59a..1812ea6 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -44,15 +44,6 @@ extern void notify_remote_via_irq(int irq);
 
 extern void xen_irq_resume(void);
 
-/* Clear an irq's pending state, in preparation for polling on it */
-void xen_clear_irq_pending(int irq);
-void xen_set_irq_pending(int irq);
-bool xen_test_irq_pending(int irq);
-
-/* Poll waiting for an irq to become pending.  In the usual case, the
-   irq will be disabled so it won't deliver an interrupt. */
-void xen_poll_irq(int irq);
-
 /* Determine the IRQ which is bound to an event channel */
 unsigned irq_from_evtchn(unsigned int evtchn);
 
