Patchwork [RFC] powerpc: Implement atomic64_t for 32-bit processors

login
register
mail settings
Submitter Paul Mackerras
Date June 12, 2009, 12:02 p.m.
Message ID <18994.17381.129339.162342@cargo.ozlabs.ibm.com>
Download mbox | patch
Permalink /patch/28635/
State Superseded, archived
Headers show

Comments

Paul Mackerras - June 12, 2009, 12:02 p.m.
32-bit powerpc processors have no 64-bit atomic instructions, but we will
need atomic64_t in order to support the perf_counter subsystem on 32-bit
processors.

This adds an implementation of 64-bit atomic operations using hashed
spinlocks to provide atomicity.  For each atomic operation, the address
of the atomic64_t variable is hashed to an index into an array of 16
spinlocks.  That spinlock is taken (with interrupts disabled) around the
operation, which can then be coded non-atomically within the lock.

On UP, all the spinlock manipulation goes away and we simply disable
interrupts around each operation.  In fact gcc eliminates the whole
atomic64_lock variable as well.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
Compile-tested only at this stage, which is why it's [RFC].

 arch/powerpc/include/asm/atomic.h |   29 ++++++
 arch/powerpc/lib/Makefile         |    2 +-
 arch/powerpc/lib/atomic64_32.c    |  173 +++++++++++++++++++++++++++++++++++++
 3 files changed, 203 insertions(+), 1 deletions(-)
 create mode 100644 arch/powerpc/lib/atomic64_32.c
Kumar Gala - June 12, 2009, 2:04 p.m.
On Jun 12, 2009, at 7:02 AM, Paul Mackerras wrote:

> 32-bit powerpc processors have no 64-bit atomic instructions, but we  
> will
> need atomic64_t in order to support the perf_counter subsystem on 32- 
> bit
> processors.
>
> This adds an implementation of 64-bit atomic operations using hashed
> spinlocks to provide atomicity.  For each atomic operation, the  
> address
> of the atomic64_t variable is hashed to an index into an array of 16
> spinlocks.  That spinlock is taken (with interrupts disabled) around  
> the
> operation, which can then be coded non-atomically within the lock.
>
> On UP, all the spinlock manipulation goes away and we simply disable
> interrupts around each operation.  In fact gcc eliminates the whole
> atomic64_lock variable as well.
>
> Signed-off-by: Paul Mackerras <paulus@samba.org>
> ---
> Compile-tested only at this stage, which is why it's [RFC].

any reason not to make this lib/asm generic?  Seems like it isn't ppc  
specific.

- k
Kyle McMartin - June 12, 2009, 5:40 p.m.
On Fri, Jun 12, 2009 at 09:04:52AM -0500, Kumar Gala wrote:
>> On UP, all the spinlock manipulation goes away and we simply disable
>> interrupts around each operation.  In fact gcc eliminates the whole
>> atomic64_lock variable as well.
>>
>> Signed-off-by: Paul Mackerras <paulus@samba.org>
>> ---
>> Compile-tested only at this stage, which is why it's [RFC].
>
> any reason not to make this lib/asm generic?  Seems like it isn't ppc  
> specific.
>

Indeed... PA-RISC uses these for all atomics, since we have no useful
atomic insns to implement them... I'd be happy to submit a cleanup after
you merge this, if you don't have time to ahead of time.

regards, Kyle

Patch

diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index b401950..45356d6 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -470,6 +470,35 @@  static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 
 #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
 
+#else /* not __powerpc64__ */
+
+typedef struct {
+	long long counter;
+} atomic64_t;
+
+#define ATOMIC64_INIT(i)	{ (i) }
+
+extern long long atomic64_read(const atomic64_t *v);
+extern void atomic64_set(atomic64_t *v, long long i);
+extern void atomic64_add(long long a, atomic64_t *v);
+extern long long atomic64_add_return(long long a, atomic64_t *v);
+extern void atomic64_sub(long long a, atomic64_t *v);
+extern long long atomic64_sub_return(long long a, atomic64_t *v);
+extern long long atomic64_dec_if_positive(atomic64_t *v);
+extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n);
+extern long long atomic64_xchg(atomic64_t *v, long long new);
+extern int atomic64_add_unless(atomic64_t *v, long long a, long long u);
+
+#define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
+#define atomic64_inc(v)			(atomic64_add(1LL, (v))
+#define atomic64_inc_return(v)		(atomic64_add_return(1LL, (v))
+#define atomic64_inc_and_test(v) 	(atomic64_inc_return(v) == 0)
+#define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
+#define atomic64_dec(v)			(atomic64_sub(1LL, (v))
+#define atomic64_dec_return(v)		(atomic64_sub_return(1LL, (v))
+#define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
+#define atomic64_inc_not_zero(v) 	atomic64_add_unless((v), 1, 0)
+
 #endif /* __powerpc64__ */
 
 #include <asm-generic/atomic.h>
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 29b742b..1537f13 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -11,7 +11,7 @@  CFLAGS_REMOVE_feature-fixups.o = -pg
 
 obj-y			:= string.o alloc.o \
 			   checksum_$(CONFIG_WORD_SIZE).o
-obj-$(CONFIG_PPC32)	+= div64.o copy_32.o crtsavres.o
+obj-$(CONFIG_PPC32)	+= div64.o copy_32.o crtsavres.o atomic64_32.o
 obj-$(CONFIG_HAS_IOMEM)	+= devres.o
 
 obj-$(CONFIG_PPC64)	+= copypage_64.o copyuser_64.o \
diff --git a/arch/powerpc/lib/atomic64_32.c b/arch/powerpc/lib/atomic64_32.c
new file mode 100644
index 0000000..4c24b8a
--- /dev/null
+++ b/arch/powerpc/lib/atomic64_32.c
@@ -0,0 +1,173 @@ 
+/*
+ * Implementation of 64-bit atomics on 32-bit PowerPC processors.
+ *
+ * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <asm/atomic.h>
+
+/*
+ * We use a hashed array of spinlocks to provide exclusive access
+ * to each atomic64_t variable.  Since this is expected to used on
+ * systems with at most 4 processors, we use a relatively small
+ * array of 16 spinlocks.
+ */
+#define NR_LOCKS	16
+
+/*
+ * Ensure each lock is in a separate cacheline on SMP.
+ */
+static union {
+	spinlock_t lock;
+	char pad[L1_CACHE_BYTES];
+} atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp;
+
+static inline spinlock_t *lock_addr(const atomic64_t *v)
+{
+	unsigned long addr = (unsigned long) v;
+
+	addr >>= L1_CACHE_SHIFT;
+	addr ^= (addr >> 8) ^ (addr >> 16);
+	return &atomic64_lock[addr & (NR_LOCKS - 1)].lock;
+}
+
+long long atomic64_read(const atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+
+void atomic64_set(atomic64_t *v, long long i)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+
+	spin_lock_irqsave(lock, flags);
+	v->counter = i;
+	spin_unlock_irqrestore(lock, flags);
+}
+
+void atomic64_add(long long a, atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+
+	spin_lock_irqsave(lock, flags);
+	v->counter += a;
+	spin_unlock_irqrestore(lock, flags);
+}
+
+long long atomic64_add_return(long long a, atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter += a;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+
+void atomic64_sub(long long a, atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+
+	spin_lock_irqsave(lock, flags);
+	v->counter -= a;
+	spin_unlock_irqrestore(lock, flags);
+}
+
+long long atomic64_sub_return(long long a, atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter -= a;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+
+long long atomic64_dec_if_positive(atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter - 1;
+	if (val >= 0)
+		v->counter = val;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+
+long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter;
+	if (val == o)
+		v->counter = n;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+
+long long atomic64_xchg(atomic64_t *v, long long new)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter;
+	v->counter = new;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+
+int atomic64_add_unless(atomic64_t *v, long long a, long long u)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	int ret = 1;
+
+	spin_lock_irqsave(lock, flags);
+	if (v->counter != u) {
+		v->counter += a;
+		ret = 0;
+	}
+	spin_unlock_irqrestore(lock, flags);
+	return ret;
+}
+
+static int init_atomic64_lock(void)
+{
+	int i;
+
+	for (i = 0; i < NR_LOCKS; ++i)
+		spin_lock_init(&atomic64_lock[i].lock);
+	return 0;
+}
+
+pure_initcall(init_atomic64_lock);