diff mbox series

spinlocks: Replace test-and-set locks by ticket locks

Message ID 20210403162315.1595501-1-cmuellner@linux.com
State Superseded
Headers show
Series spinlocks: Replace test-and-set locks by ticket locks | expand

Commit Message

Christoph Müllner April 3, 2021, 4:23 p.m. UTC
Test-and-set locks don't provide fairness and are non-deterministic
(w.r.t. the prediction of the future holder of a lock).
This can be a performance problem in case of lock contention.

Let's change the implementation to use ticket locks, which guarantee
a fair locking order (FIFO ordering).

Ticket locks have two counters 'owner' and 'next'.
The counter 'owner' holds the ticket number of the current lock owner.
The counter 'next' holds the latest free ticket number.

The lock is free if both counters are equal. To acquire the lock, the
'next' counter is atomically incremented to obtain a new ticket.
The counter 'owner' is then polled until it becomes equal to the new
ticket. To release a lock, one atomically increments the 'owner'
counter.

The implementation uses a 32-bit wide struct, which consists of
two 16-bit counters (owner and next). This is inspired by similar
spinlock implementations on other architectures.

Note, that RISC-V lacks sub-word atomics, therefore we need to
circumvent this limitation with additional instructions.
This implementation aims to reduce the instructions between the
LR/SC pairs to minimize the chances of failing SCs. The cost of
this approach is, that we spill more registers than necessary.

Signed-off-by: Christoph Muellner <cmuellner@linux.com>
---
 include/sbi/riscv_locks.h | 33 ++++++++++++-------
 lib/sbi/riscv_locks.c     | 67 +++++++++++++++++++++++++++++----------
 2 files changed, 72 insertions(+), 28 deletions(-)

Comments

Jessica Clarke April 3, 2021, 4:43 p.m. UTC | #1
Jess

> On 3 Apr 2021, at 17:23, Christoph Muellner <cmuellner@linux.com> wrote:
> 
> Test-and-set locks don't provide fairness and are non-deterministic
> (w.r.t. the prediction of the future holder of a lock).
> This can be a performance problem in case of lock contention.
> 
> Let's change the implementation to use ticket locks, which guarantee
> a fair locking order (FIFO ordering).
> 
> Ticket locks have two counters 'owner' and 'next'.
> The counter 'owner' holds the ticket number of the current lock owner.
> The counter 'next' holds the latest free ticket number.
> 
> The lock is free if both counters are equal. To acquire the lock, the
> 'next' counter is atomically incremented to obtain a new ticket.
> The counter 'owner' is then polled until it becomes equal to the new
> ticket. To release a lock, one atomically increments the 'owner'
> counter.
> 
> The implementation uses a 32-bit wide struct, which consists of
> two 16-bit counters (owner and next). This is inspired by similar
> spinlock implementations on other architectures.
> 
> Note, that RISC-V lacks sub-word atomics, therefore we need to
> circumvent this limitation with additional instructions.
> This implementation aims to reduce the instructions between the
> LR/SC pairs to minimize the chances of failing SCs. The cost of
> this approach is, that we spill more registers than necessary.
> 
> Signed-off-by: Christoph Muellner <cmuellner@linux.com>
> ---
> include/sbi/riscv_locks.h | 33 ++++++++++++-------
> lib/sbi/riscv_locks.c     | 67 +++++++++++++++++++++++++++++----------
> 2 files changed, 72 insertions(+), 28 deletions(-)
> 
> diff --git a/include/sbi/riscv_locks.h b/include/sbi/riscv_locks.h
> index faa9676..59e6af9 100644
> --- a/include/sbi/riscv_locks.h
> +++ b/include/sbi/riscv_locks.h
> @@ -2,26 +2,37 @@
>  * SPDX-License-Identifier: BSD-2-Clause
>  *
>  * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> - *
> - * Authors:
> - *   Anup Patel <anup.patel@wdc.com>
> + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
>  */
> 
> #ifndef __RISCV_LOCKS_H__
> #define __RISCV_LOCKS_H__
> 
> +#include <sbi/sbi_types.h>
> +
> +#define TICKET_SHIFT	16
> +
> typedef struct {
> -	volatile long lock;
> -} spinlock_t;
> +#ifdef __BIG_ENDIAN


This looks wrong. Neither GCC nor Clang define that, and sometimes you can end
up with __BIG_ENDIAN being 4321, __LITTLE_ENDIAN being 1234 and __BYTE_ORDER
being one of the two (or I guess also 3421 for __PDP_ENDIAN). __BYTE_ORDER__ ==
__ORDER_BIG_ENDIAN__ is the portable way to check this across Clang and GCC
without relying on anything other than built-in defines.

> +       u16 next;
> +       u16 owner;
> +#else
> +       u16 owner;
> +       u16 next;
> +#endif
> +} __aligned(4) spinlock_t;
> +
> +#define __SPIN_LOCK_UNLOCKED	\
> +	(spinlock_t) { 0, 0 }
> 
> -#define __RISCV_SPIN_UNLOCKED 0
> +#define SPIN_LOCK_INIT(x)	\
> +	x = __SPIN_LOCK_UNLOCKED
> 
> -#define SPIN_LOCK_INIT(x) (x).lock = __RISCV_SPIN_UNLOCKED
> +#define SPIN_LOCK_INITIALIZER	\
> +	__SPIN_LOCK_UNLOCKED
> 
> -#define SPIN_LOCK_INITIALIZER                  \
> -	{                                      \
> -		.lock = __RISCV_SPIN_UNLOCKED, \
> -	}
> +#define DEFINE_SPIN_LOCK(x)	\
> +	spinlock_t SPIN_LOCK_INIT(x)
> 
> int spin_lock_check(spinlock_t *lock);
> 
> diff --git a/lib/sbi/riscv_locks.c b/lib/sbi/riscv_locks.c
> index 4d1d9c0..7d86742 100644
> --- a/lib/sbi/riscv_locks.c
> +++ b/lib/sbi/riscv_locks.c
> @@ -2,44 +2,77 @@
>  * SPDX-License-Identifier: BSD-2-Clause
>  *
>  * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> - *
> - * Authors:
> - *   Anup Patel <anup.patel@wdc.com>
> + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
>  */
> 
> #include <sbi/riscv_barrier.h>
> #include <sbi/riscv_locks.h>
> 
> -int spin_lock_check(spinlock_t *lock)
> +static inline int spin_lock_unlocked(spinlock_t lock)
> {
> -	return (lock->lock == __RISCV_SPIN_UNLOCKED) ? 0 : 1;
> +        return lock.owner == lock.next;
> +}
> +
> +bool spin_lock_check(spinlock_t *lock)
> +{
> +	RISCV_FENCE(r, rw);
> +	return !spin_lock_unlocked(*lock);
> }
> 
> int spin_trylock(spinlock_t *lock)
> {
> -	int tmp = 1, busy;
> +	u32 inc = 1u << TICKET_SHIFT;
> +	u32 mask = 0xffffu << TICKET_SHIFT;
> +	u32 l0, tmp1, tmp2;
> 
> 	__asm__ __volatile__(
> -		"	amoswap.w %0, %2, %1\n" RISCV_ACQUIRE_BARRIER
> -		: "=r"(busy), "+A"(lock->lock)
> -		: "r"(tmp)
> +		/* Get the current lock counters. */
> +		"1:	lr.w.aq	%0, %3\n"
> +		"	slliw	%2, %0, %6\n"

This, and addw below, require RV64.

> +		"	and	%1, %0, %5\n"
> +		/* Is the lock free right now? */
> +		"	bne	%1, %2, 2f\n"
> +		"	addw	%0, %0, %4\n"
> +		/* Acquire the lock. */
> +		"	sc.w.rl	%0, %0, %3\n"
> +		"	bnez	%0, 1b\n"
> +		"2:"
> +		: "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> +		: "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
> 		: "memory");
> 
> -	return !busy;
> +	return !l0;
> }
> 
> void spin_lock(spinlock_t *lock)
> {
> -	while (1) {
> -		if (spin_lock_check(lock))
> -			continue;
> +	u32 inc = 1u << TICKET_SHIFT;
> +	u32 mask = 0xffffu << TICKET_SHIFT;
> +	u32 l0, tmp1, tmp2;
> 
> -		if (spin_trylock(lock))
> -			break;
> -	}
> +	__asm__ __volatile__(
> +		/* Atomically increment the next ticket. */
> +		"0:	lr.w.aq	%0, %3\n"
> +		"	addw	%0, %0, %4\n"
> +		"	sc.w.rl	%0, %0, %3\n"
> +		"	bnez	%0, 0b\n"
> +
> +		/* Did we get the lock? */
> +		"1:	slliw	%2, %0, %6\n"

Do you mean to be reading the success code from sc, which is guaranteed 0 the
first time round due to the above loop?

> +		"	and	%1, %0, %5\n"
> +		"	beq	%1, %2, 2f\n"
> +
> +		/* No, let's spin on the lock. */
> +		"	lr.w.aq	%0, %3\n"

Why lr? This is just an acquire load, no reservation needed, so just lw then
fence, otherwise I imagine this will hurt performance on Rocket due to the way
it implements the forward progress guarantee.

> +		"	j	1b\n"
> +		"2:"
> +		: "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> +		: "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
> +		: "memory");
> }
> 
> void spin_unlock(spinlock_t *lock)
> {
> -	__smp_store_release(&lock->lock, __RISCV_SPIN_UNLOCKED);
> +	__smp_store_release(&lock->owner, lock->owner + 1);
> }
> +
> -- 
> 2.30.2

Jess
Christoph Müllner April 3, 2021, 9:48 p.m. UTC | #2
On Sat, Apr 3, 2021 at 6:43 PM Jessica Clarke <jrtc27@jrtc27.com> wrote:
>
>
>
> Jess
>
> > On 3 Apr 2021, at 17:23, Christoph Muellner <cmuellner@linux.com> wrote:
> >
> > Test-and-set locks don't provide fairness and are non-deterministic
> > (w.r.t. the prediction of the future holder of a lock).
> > This can be a performance problem in case of lock contention.
> >
> > Let's change the implementation to use ticket locks, which guarantee
> > a fair locking order (FIFO ordering).
> >
> > Ticket locks have two counters 'owner' and 'next'.
> > The counter 'owner' holds the ticket number of the current lock owner.
> > The counter 'next' holds the latest free ticket number.
> >
> > The lock is free if both counters are equal. To acquire the lock, the
> > 'next' counter is atomically incremented to obtain a new ticket.
> > The counter 'owner' is then polled until it becomes equal to the new
> > ticket. To release a lock, one atomically increments the 'owner'
> > counter.
> >
> > The implementation uses a 32-bit wide struct, which consists of
> > two 16-bit counters (owner and next). This is inspired by similar
> > spinlock implementations on other architectures.
> >
> > Note, that RISC-V lacks sub-word atomics, therefore we need to
> > circumvent this limitation with additional instructions.
> > This implementation aims to reduce the instructions between the
> > LR/SC pairs to minimize the chances of failing SCs. The cost of
> > this approach is, that we spill more registers than necessary.
> >
> > Signed-off-by: Christoph Muellner <cmuellner@linux.com>
> > ---
> > include/sbi/riscv_locks.h | 33 ++++++++++++-------
> > lib/sbi/riscv_locks.c     | 67 +++++++++++++++++++++++++++++----------
> > 2 files changed, 72 insertions(+), 28 deletions(-)
> >
> > diff --git a/include/sbi/riscv_locks.h b/include/sbi/riscv_locks.h
> > index faa9676..59e6af9 100644
> > --- a/include/sbi/riscv_locks.h
> > +++ b/include/sbi/riscv_locks.h
> > @@ -2,26 +2,37 @@
> >  * SPDX-License-Identifier: BSD-2-Clause
> >  *
> >  * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> > - *
> > - * Authors:
> > - *   Anup Patel <anup.patel@wdc.com>
> > + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
> >  */
> >
> > #ifndef __RISCV_LOCKS_H__
> > #define __RISCV_LOCKS_H__
> >
> > +#include <sbi/sbi_types.h>
> > +
> > +#define TICKET_SHIFT 16
> > +
> > typedef struct {
> > -     volatile long lock;
> > -} spinlock_t;
> > +#ifdef __BIG_ENDIAN
>
>
> This looks wrong. Neither GCC nor Clang define that, and sometimes you can end
> up with __BIG_ENDIAN being 4321, __LITTLE_ENDIAN being 1234 and __BYTE_ORDER
> being one of the two (or I guess also 3421 for __PDP_ENDIAN). __BYTE_ORDER__ ==
> __ORDER_BIG_ENDIAN__ is the portable way to check this across Clang and GCC
> without relying on anything other than built-in defines.

Ah, yes.

> > +       u16 next;
> > +       u16 owner;
> > +#else
> > +       u16 owner;
> > +       u16 next;
> > +#endif
> > +} __aligned(4) spinlock_t;
> > +
> > +#define __SPIN_LOCK_UNLOCKED \
> > +     (spinlock_t) { 0, 0 }
> >
> > -#define __RISCV_SPIN_UNLOCKED 0
> > +#define SPIN_LOCK_INIT(x)    \
> > +     x = __SPIN_LOCK_UNLOCKED
> >
> > -#define SPIN_LOCK_INIT(x) (x).lock = __RISCV_SPIN_UNLOCKED
> > +#define SPIN_LOCK_INITIALIZER        \
> > +     __SPIN_LOCK_UNLOCKED
> >
> > -#define SPIN_LOCK_INITIALIZER                  \
> > -     {                                      \
> > -             .lock = __RISCV_SPIN_UNLOCKED, \
> > -     }
> > +#define DEFINE_SPIN_LOCK(x)  \
> > +     spinlock_t SPIN_LOCK_INIT(x)
> >
> > int spin_lock_check(spinlock_t *lock);
> >
> > diff --git a/lib/sbi/riscv_locks.c b/lib/sbi/riscv_locks.c
> > index 4d1d9c0..7d86742 100644
> > --- a/lib/sbi/riscv_locks.c
> > +++ b/lib/sbi/riscv_locks.c
> > @@ -2,44 +2,77 @@
> >  * SPDX-License-Identifier: BSD-2-Clause
> >  *
> >  * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> > - *
> > - * Authors:
> > - *   Anup Patel <anup.patel@wdc.com>
> > + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
> >  */
> >
> > #include <sbi/riscv_barrier.h>
> > #include <sbi/riscv_locks.h>
> >
> > -int spin_lock_check(spinlock_t *lock)
> > +static inline int spin_lock_unlocked(spinlock_t lock)
> > {
> > -     return (lock->lock == __RISCV_SPIN_UNLOCKED) ? 0 : 1;
> > +        return lock.owner == lock.next;
> > +}
> > +
> > +bool spin_lock_check(spinlock_t *lock)
> > +{
> > +     RISCV_FENCE(r, rw);
> > +     return !spin_lock_unlocked(*lock);
> > }
> >
> > int spin_trylock(spinlock_t *lock)
> > {
> > -     int tmp = 1, busy;
> > +     u32 inc = 1u << TICKET_SHIFT;
> > +     u32 mask = 0xffffu << TICKET_SHIFT;
> > +     u32 l0, tmp1, tmp2;
> >
> >       __asm__ __volatile__(
> > -             "       amoswap.w %0, %2, %1\n" RISCV_ACQUIRE_BARRIER
> > -             : "=r"(busy), "+A"(lock->lock)
> > -             : "r"(tmp)
> > +             /* Get the current lock counters. */
> > +             "1:     lr.w.aq %0, %3\n"
> > +             "       slliw   %2, %0, %6\n"
>
> This, and addw below, require RV64.

I admit that I have not tested RV32.
Will do this as well.

> > +             "       and     %1, %0, %5\n"
> > +             /* Is the lock free right now? */
> > +             "       bne     %1, %2, 2f\n"
> > +             "       addw    %0, %0, %4\n"
> > +             /* Acquire the lock. */
> > +             "       sc.w.rl %0, %0, %3\n"
> > +             "       bnez    %0, 1b\n"
> > +             "2:"
> > +             : "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> > +             : "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
> >               : "memory");
> >
> > -     return !busy;
> > +     return !l0;
> > }
> >
> > void spin_lock(spinlock_t *lock)
> > {
> > -     while (1) {
> > -             if (spin_lock_check(lock))
> > -                     continue;
> > +     u32 inc = 1u << TICKET_SHIFT;
> > +     u32 mask = 0xffffu << TICKET_SHIFT;
> > +     u32 l0, tmp1, tmp2;
> >
> > -             if (spin_trylock(lock))
> > -                     break;
> > -     }
> > +     __asm__ __volatile__(
> > +             /* Atomically increment the next ticket. */
> > +             "0:     lr.w.aq %0, %3\n"
> > +             "       addw    %0, %0, %4\n"
> > +             "       sc.w.rl %0, %0, %3\n"
> > +             "       bnez    %0, 0b\n"
> > +
> > +             /* Did we get the lock? */
> > +             "1:     slliw   %2, %0, %6\n"
>
> Do you mean to be reading the success code from sc, which is guaranteed 0 the
> first time round due to the above loop?

Plus, the sequence is wrong.
I messed up when I tried to make lock() and trylock() more similar,
but my tests did not trigger this issue.

> > +             "       and     %1, %0, %5\n"
> > +             "       beq     %1, %2, 2f\n"
> > +
> > +             /* No, let's spin on the lock. */
> > +             "       lr.w.aq %0, %3\n"
>
> Why lr? This is just an acquire load, no reservation needed, so just lw then
> fence, otherwise I imagine this will hurt performance on Rocket due to the way
> it implements the forward progress guarantee.

Good point.

Thanks for the review!
I will go back to do more testing, to not embarrass myself again.

Christoph

>
> > +             "       j       1b\n"
> > +             "2:"
> > +             : "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> > +             : "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
> > +             : "memory");
> > }
> >
> > void spin_unlock(spinlock_t *lock)
> > {
> > -     __smp_store_release(&lock->lock, __RISCV_SPIN_UNLOCKED);
> > +     __smp_store_release(&lock->owner, lock->owner + 1);
> > }
> > +
> > --
> > 2.30.2
>
> Jess
Xiang W April 4, 2021, 1:58 a.m. UTC | #3
在 2021-04-03六的 18:23 +0200,Christoph Muellner写道:
> Test-and-set locks don't provide fairness and are non-deterministic
> (w.r.t. the prediction of the future holder of a lock).
> This can be a performance problem in case of lock contention.
Why?
> 
> Let's change the implementation to use ticket locks, which guarantee
> a fair locking order (FIFO ordering).
> 
> Ticket locks have two counters 'owner' and 'next'.
> The counter 'owner' holds the ticket number of the current lock
> owner.
> The counter 'next' holds the latest free ticket number.
> 
> The lock is free if both counters are equal. To acquire the lock, the
> 'next' counter is atomically incremented to obtain a new ticket.
> The counter 'owner' is then polled until it becomes equal to the new
> ticket. To release a lock, one atomically increments the 'owner'
> counter.
> 
> The implementation uses a 32-bit wide struct, which consists of
> two 16-bit counters (owner and next). This is inspired by similar
> spinlock implementations on other architectures.
> 
> Note, that RISC-V lacks sub-word atomics, therefore we need to
> circumvent this limitation with additional instructions.
> This implementation aims to reduce the instructions between the
> LR/SC pairs to minimize the chances of failing SCs. The cost of
> this approach is, that we spill more registers than necessary.
> 
> Signed-off-by: Christoph Muellner <cmuellner@linux.com>
> ---
>  include/sbi/riscv_locks.h | 33 ++++++++++++-------
>  lib/sbi/riscv_locks.c     | 67 +++++++++++++++++++++++++++++------
> ----
>  2 files changed, 72 insertions(+), 28 deletions(-)
> 
> diff --git a/include/sbi/riscv_locks.h b/include/sbi/riscv_locks.h
> index faa9676..59e6af9 100644
> --- a/include/sbi/riscv_locks.h
> +++ b/include/sbi/riscv_locks.h
> @@ -2,26 +2,37 @@
>   * SPDX-License-Identifier: BSD-2-Clause
>   *
>   * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> - *
> - * Authors:
> - *   Anup Patel <anup.patel@wdc.com>
> + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
>   */
>  
>  #ifndef __RISCV_LOCKS_H__
>  #define __RISCV_LOCKS_H__
>  
> +#include <sbi/sbi_types.h>
> +
> +#define TICKET_SHIFT	16
> +
>  typedef struct {
> -	volatile long lock;
> -} spinlock_t;
> +#ifdef __BIG_ENDIAN
> +       u16 next;
> +       u16 owner;
> +#else
> +       u16 owner;
> +       u16 next;
> +#endif
I recommend using two u32, which can streamline part of the code and
make the code better understandable.
> +} __aligned(4) spinlock_t;
> +
> +#define __SPIN_LOCK_UNLOCKED	\
> +	(spinlock_t) { 0, 0 }
>  
> -#define __RISCV_SPIN_UNLOCKED 0
> +#define SPIN_LOCK_INIT(x)	\
> +	x = __SPIN_LOCK_UNLOCKED
>  
> -#define SPIN_LOCK_INIT(x) (x).lock = __RISCV_SPIN_UNLOCKED
> +#define SPIN_LOCK_INITIALIZER	\
> +	__SPIN_LOCK_UNLOCKED
>  
> -#define SPIN_LOCK_INITIALIZER                  \
> -	{                                      \
> -		.lock = __RISCV_SPIN_UNLOCKED, \
> -	}
> +#define DEFINE_SPIN_LOCK(x)	\
> +	spinlock_t SPIN_LOCK_INIT(x)
>  
>  int spin_lock_check(spinlock_t *lock);
>  
> diff --git a/lib/sbi/riscv_locks.c b/lib/sbi/riscv_locks.c
> index 4d1d9c0..7d86742 100644
> --- a/lib/sbi/riscv_locks.c
> +++ b/lib/sbi/riscv_locks.c
> @@ -2,44 +2,77 @@
>   * SPDX-License-Identifier: BSD-2-Clause
>   *
>   * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> - *
> - * Authors:
> - *   Anup Patel <anup.patel@wdc.com>
> + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
>   */
>  
>  #include <sbi/riscv_barrier.h>
>  #include <sbi/riscv_locks.h>
>  
> -int spin_lock_check(spinlock_t *lock)
> +static inline int spin_lock_unlocked(spinlock_t lock)
>  {
> -	return (lock->lock == __RISCV_SPIN_UNLOCKED) ? 0 : 1;
> +        return lock.owner == lock.next;
> +}
> +
> +bool spin_lock_check(spinlock_t *lock)
> +{
> +	RISCV_FENCE(r, rw);
> +	return !spin_lock_unlocked(*lock);
>  }
>  
>  int spin_trylock(spinlock_t *lock)
>  {
> -	int tmp = 1, busy;
> +	u32 inc = 1u << TICKET_SHIFT;
> +	u32 mask = 0xffffu << TICKET_SHIFT;
> +	u32 l0, tmp1, tmp2;
>  
>  	__asm__ __volatile__(
> -		"	amoswap.w %0, %2, %1\n" RISCV_ACQUIRE_BARRIER
> -		: "=r"(busy), "+A"(lock->lock)
> -		: "r"(tmp)
> +		/* Get the current lock counters. */
> +		"1:	lr.w.aq	%0, %3\n"
> +		"	slliw	%2, %0, %6\n"
> +		"	and	%1, %0, %5\n"
> +		/* Is the lock free right now? */
> +		"	bne	%1, %2, 2f\n"
> +		"	addw	%0, %0, %4\n"
> +		/* Acquire the lock. */
> +		"	sc.w.rl	%0, %0, %3\n"
> +		"	bnez	%0, 1b\n"
> +		"2:"
> +		: "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> +		: "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
>  		: "memory");
>  
> -	return !busy;
> +	return !l0;
>  }
>  
>  void spin_lock(spinlock_t *lock)
>  {
> -	while (1) {
> -		if (spin_lock_check(lock))
> -			continue;
> +	u32 inc = 1u << TICKET_SHIFT;
> +	u32 mask = 0xffffu << TICKET_SHIFT;
> +	u32 l0, tmp1, tmp2;
>  
> -		if (spin_trylock(lock))
> -			break;
> -	}
> +	__asm__ __volatile__(
> +		/* Atomically increment the next ticket. */
> +		"0:	lr.w.aq	%0, %3\n"
> +		"	addw	%0, %0, %4\n"
> +		"	sc.w.rl	%0, %0, %3\n"
> +		"	bnez	%0, 0b\n"
> +
> +		/* Did we get the lock? */
> +		"1:	slliw	%2, %0, %6\n"
> +		"	and	%1, %0, %5\n"
> +		"	beq	%1, %2, 2f\n"
> +
> +		/* No, let's spin on the lock. */
> +		"	lr.w.aq	%0, %3\n"
> +		"	j	1b\n"
> +		"2:"
> +		: "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> +		: "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
> +		: "memory");
>  }
>  
>  void spin_unlock(spinlock_t *lock)
>  {
> -	__smp_store_release(&lock->lock, __RISCV_SPIN_UNLOCKED);
> +	__smp_store_release(&lock->owner, lock->owner + 1);
>  }
> +
> -- 
> 2.30.2
> 
>
Anup Patel April 4, 2021, 6:04 a.m. UTC | #4
> -----Original Message-----
> From: Christoph Muellner <cmuellner@linux.com>
> Sent: 03 April 2021 21:53
> To: opensbi@lists.infradead.org; Anup Patel <Anup.Patel@wdc.com>
> Cc: Christoph Muellner <cmuellner@linux.com>
> Subject: [PATCH] spinlocks: Replace test-and-set locks by ticket locks
> 
> Test-and-set locks don't provide fairness and are non-deterministic (w.r.t.
> the prediction of the future holder of a lock).
> This can be a performance problem in case of lock contention.
> 
> Let's change the implementation to use ticket locks, which guarantee a fair
> locking order (FIFO ordering).
> 
> Ticket locks have two counters 'owner' and 'next'.
> The counter 'owner' holds the ticket number of the current lock owner.
> The counter 'next' holds the latest free ticket number.
> 
> The lock is free if both counters are equal. To acquire the lock, the 'next'
> counter is atomically incremented to obtain a new ticket.
> The counter 'owner' is then polled until it becomes equal to the new ticket.
> To release a lock, one atomically increments the 'owner'
> counter.
> 
> The implementation uses a 32-bit wide struct, which consists of two 16-bit
> counters (owner and next). This is inspired by similar spinlock
> implementations on other architectures.
> 
> Note, that RISC-V lacks sub-word atomics, therefore we need to circumvent
> this limitation with additional instructions.
> This implementation aims to reduce the instructions between the LR/SC pairs
> to minimize the chances of failing SCs. The cost of this approach is, that we
> spill more registers than necessary.

The spin_trylock() can be tricky to implement with ticket approach. Since the
spin_trylock() is currently not used anywhere in OpenSBI, I suggest:

1) We can drop the spin_trylock() API as separate patch
2) Implement classic ticket spinlock approach as described in Wikipedia
    using AMOADD instructions and barriers.
3) We don't need to pack next_ticket and owner into one word because
     these can be separate "unsigned long" in spinlock_t. I believe that
     this packing only helps in implementing spin_trylock(), Right ??

Regards,
Anup

> 
> Signed-off-by: Christoph Muellner <cmuellner@linux.com>
> ---
>  include/sbi/riscv_locks.h | 33 ++++++++++++-------
>  lib/sbi/riscv_locks.c     | 67 +++++++++++++++++++++++++++++----------
>  2 files changed, 72 insertions(+), 28 deletions(-)
> 
> diff --git a/include/sbi/riscv_locks.h b/include/sbi/riscv_locks.h index
> faa9676..59e6af9 100644
> --- a/include/sbi/riscv_locks.h
> +++ b/include/sbi/riscv_locks.h
> @@ -2,26 +2,37 @@
>   * SPDX-License-Identifier: BSD-2-Clause
>   *
>   * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> - *
> - * Authors:
> - *   Anup Patel <anup.patel@wdc.com>
> + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
>   */
> 
>  #ifndef __RISCV_LOCKS_H__
>  #define __RISCV_LOCKS_H__
> 
> +#include <sbi/sbi_types.h>
> +
> +#define TICKET_SHIFT	16
> +
>  typedef struct {
> -	volatile long lock;
> -} spinlock_t;
> +#ifdef __BIG_ENDIAN
> +       u16 next;
> +       u16 owner;
> +#else
> +       u16 owner;
> +       u16 next;
> +#endif
> +} __aligned(4) spinlock_t;
> +
> +#define __SPIN_LOCK_UNLOCKED	\
> +	(spinlock_t) { 0, 0 }
> 
> -#define __RISCV_SPIN_UNLOCKED 0
> +#define SPIN_LOCK_INIT(x)	\
> +	x = __SPIN_LOCK_UNLOCKED
> 
> -#define SPIN_LOCK_INIT(x) (x).lock = __RISCV_SPIN_UNLOCKED
> +#define SPIN_LOCK_INITIALIZER	\
> +	__SPIN_LOCK_UNLOCKED
> 
> -#define SPIN_LOCK_INITIALIZER                  \
> -	{                                      \
> -		.lock = __RISCV_SPIN_UNLOCKED, \
> -	}
> +#define DEFINE_SPIN_LOCK(x)	\
> +	spinlock_t SPIN_LOCK_INIT(x)
> 
>  int spin_lock_check(spinlock_t *lock);
> 
> diff --git a/lib/sbi/riscv_locks.c b/lib/sbi/riscv_locks.c index 4d1d9c0..7d86742
> 100644
> --- a/lib/sbi/riscv_locks.c
> +++ b/lib/sbi/riscv_locks.c
> @@ -2,44 +2,77 @@
>   * SPDX-License-Identifier: BSD-2-Clause
>   *
>   * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> - *
> - * Authors:
> - *   Anup Patel <anup.patel@wdc.com>
> + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
>   */
> 
>  #include <sbi/riscv_barrier.h>
>  #include <sbi/riscv_locks.h>
> 
> -int spin_lock_check(spinlock_t *lock)
> +static inline int spin_lock_unlocked(spinlock_t lock)
>  {
> -	return (lock->lock == __RISCV_SPIN_UNLOCKED) ? 0 : 1;
> +        return lock.owner == lock.next; }
> +
> +bool spin_lock_check(spinlock_t *lock)
> +{
> +	RISCV_FENCE(r, rw);
> +	return !spin_lock_unlocked(*lock);
>  }
> 
>  int spin_trylock(spinlock_t *lock)
>  {
> -	int tmp = 1, busy;
> +	u32 inc = 1u << TICKET_SHIFT;
> +	u32 mask = 0xffffu << TICKET_SHIFT;
> +	u32 l0, tmp1, tmp2;
> 
>  	__asm__ __volatile__(
> -		"	amoswap.w %0, %2, %1\n"
> RISCV_ACQUIRE_BARRIER
> -		: "=r"(busy), "+A"(lock->lock)
> -		: "r"(tmp)
> +		/* Get the current lock counters. */
> +		"1:	lr.w.aq	%0, %3\n"
> +		"	slliw	%2, %0, %6\n"
> +		"	and	%1, %0, %5\n"
> +		/* Is the lock free right now? */
> +		"	bne	%1, %2, 2f\n"
> +		"	addw	%0, %0, %4\n"
> +		/* Acquire the lock. */
> +		"	sc.w.rl	%0, %0, %3\n"
> +		"	bnez	%0, 1b\n"
> +		"2:"
> +		: "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> +		: "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
>  		: "memory");
> 
> -	return !busy;
> +	return !l0;
>  }
> 
>  void spin_lock(spinlock_t *lock)
>  {
> -	while (1) {
> -		if (spin_lock_check(lock))
> -			continue;
> +	u32 inc = 1u << TICKET_SHIFT;
> +	u32 mask = 0xffffu << TICKET_SHIFT;
> +	u32 l0, tmp1, tmp2;
> 
> -		if (spin_trylock(lock))
> -			break;
> -	}
> +	__asm__ __volatile__(
> +		/* Atomically increment the next ticket. */
> +		"0:	lr.w.aq	%0, %3\n"
> +		"	addw	%0, %0, %4\n"
> +		"	sc.w.rl	%0, %0, %3\n"
> +		"	bnez	%0, 0b\n"
> +
> +		/* Did we get the lock? */
> +		"1:	slliw	%2, %0, %6\n"
> +		"	and	%1, %0, %5\n"
> +		"	beq	%1, %2, 2f\n"
> +
> +		/* No, let's spin on the lock. */
> +		"	lr.w.aq	%0, %3\n"
> +		"	j	1b\n"
> +		"2:"
> +		: "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> +		: "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
> +		: "memory");
>  }
> 
>  void spin_unlock(spinlock_t *lock)
>  {
> -	__smp_store_release(&lock->lock, __RISCV_SPIN_UNLOCKED);
> +	__smp_store_release(&lock->owner, lock->owner + 1);
>  }
> +
> --
> 2.30.2
Xiang W April 4, 2021, 6:54 a.m. UTC | #5
在 2021-04-04日的 06:04 +0000,Anup Patel写道:
> > -----Original Message-----
> > From: Christoph Muellner <cmuellner@linux.com>
> > Sent: 03 April 2021 21:53
> > To: opensbi@lists.infradead.org; Anup Patel <Anup.Patel@wdc.com>
> > Cc: Christoph Muellner <cmuellner@linux.com>
> > Subject: [PATCH] spinlocks: Replace test-and-set locks by ticket
> > locks
> > 
> > Test-and-set locks don't provide fairness and are non-deterministic 
> > (w.r.t.
> > the prediction of the future holder of a lock).
> > This can be a performance problem in case of lock contention.
> > 
> > Let's change the implementation to use ticket locks, which
> > guarantee a fair
> > locking order (FIFO ordering).
> > 
> > Ticket locks have two counters 'owner' and 'next'.
> > The counter 'owner' holds the ticket number of the current lock
> > owner.
> > The counter 'next' holds the latest free ticket number.
> > 
> > The lock is free if both counters are equal. To acquire the lock,
> > the 'next'
> > counter is atomically incremented to obtain a new ticket.
> > The counter 'owner' is then polled until it becomes equal to the
> > new ticket.
> > To release a lock, one atomically increments the 'owner'
> > counter.
> > 
> > The implementation uses a 32-bit wide struct, which consists of two
> > 16-bit
> > counters (owner and next). This is inspired by similar spinlock
> > implementations on other architectures.
> > 
> > Note, that RISC-V lacks sub-word atomics, therefore we need to
> > circumvent
> > this limitation with additional instructions.
> > This implementation aims to reduce the instructions between the
> > LR/SC pairs
> > to minimize the chances of failing SCs. The cost of this approach
> > is, that we
> > spill more registers than necessary.
> 
> The spin_trylock() can be tricky to implement with ticket approach.
> Since the
> spin_trylock() is currently not used anywhere in OpenSBI, I suggest:
> 
> 1) We can drop the spin_trylock() API as separate patch
> 2) Implement classic ticket spinlock approach as described in
> Wikipedia
>     using AMOADD instructions and barriers.
> 3) We don't need to pack next_ticket and owner into one word because
>      these can be separate "unsigned long" in spinlock_t. I believe
> that
Under RV32 long is 4 bytes, and under RV64 long is 8 bytes. Different
instructions will be used for atomic operations, so I recommend using
u32

Regards,
Xiang W
>      this packing only helps in implementing spin_trylock(), Right ??
> 
> Regards,
> Anup
> 
> > Signed-off-by: Christoph Muellner <cmuellner@linux.com>
> > ---
> >  include/sbi/riscv_locks.h | 33 ++++++++++++-------
> >  lib/sbi/riscv_locks.c     | 67 +++++++++++++++++++++++++++++----
> > ------
> >  2 files changed, 72 insertions(+), 28 deletions(-)
> > 
> > diff --git a/include/sbi/riscv_locks.h b/include/sbi/riscv_locks.h
> > index
> > faa9676..59e6af9 100644
> > --- a/include/sbi/riscv_locks.h
> > +++ b/include/sbi/riscv_locks.h
> > @@ -2,26 +2,37 @@
> >   * SPDX-License-Identifier: BSD-2-Clause
> >   *
> >   * Copyright (c) 2019 Western Digital Corporation or its
> > affiliates.
> > - *
> > - * Authors:
> > - *   Anup Patel <anup.patel@wdc.com>
> > + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
> >   */
> > 
> >  #ifndef __RISCV_LOCKS_H__
> >  #define __RISCV_LOCKS_H__
> > 
> > +#include <sbi/sbi_types.h>
> > +
> > +#define TICKET_SHIFT	16
> > +
> >  typedef struct {
> > -	volatile long lock;
> > -} spinlock_t;
> > +#ifdef __BIG_ENDIAN
> > +       u16 next;
> > +       u16 owner;
> > +#else
> > +       u16 owner;
> > +       u16 next;
> > +#endif
> > +} __aligned(4) spinlock_t;
> > +
> > +#define __SPIN_LOCK_UNLOCKED	\
> > +	(spinlock_t) { 0, 0 }
> > 
> > -#define __RISCV_SPIN_UNLOCKED 0
> > +#define SPIN_LOCK_INIT(x)	\
> > +	x = __SPIN_LOCK_UNLOCKED
> > 
> > -#define SPIN_LOCK_INIT(x) (x).lock = __RISCV_SPIN_UNLOCKED
> > +#define SPIN_LOCK_INITIALIZER	\
> > +	__SPIN_LOCK_UNLOCKED
> > 
> > -#define SPIN_LOCK_INITIALIZER                  \
> > -	{                                      \
> > -		.lock = __RISCV_SPIN_UNLOCKED, \
> > -	}
> > +#define DEFINE_SPIN_LOCK(x)	\
> > +	spinlock_t SPIN_LOCK_INIT(x)
> > 
> >  int spin_lock_check(spinlock_t *lock);
> > 
> > diff --git a/lib/sbi/riscv_locks.c b/lib/sbi/riscv_locks.c index
> > 4d1d9c0..7d86742
> > 100644
> > --- a/lib/sbi/riscv_locks.c
> > +++ b/lib/sbi/riscv_locks.c
> > @@ -2,44 +2,77 @@
> >   * SPDX-License-Identifier: BSD-2-Clause
> >   *
> >   * Copyright (c) 2019 Western Digital Corporation or its
> > affiliates.
> > - *
> > - * Authors:
> > - *   Anup Patel <anup.patel@wdc.com>
> > + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
> >   */
> > 
> >  #include <sbi/riscv_barrier.h>
> >  #include <sbi/riscv_locks.h>
> > 
> > -int spin_lock_check(spinlock_t *lock)
> > +static inline int spin_lock_unlocked(spinlock_t lock)
> >  {
> > -	return (lock->lock == __RISCV_SPIN_UNLOCKED) ? 0 : 1;
> > +        return lock.owner == lock.next; }
> > +
> > +bool spin_lock_check(spinlock_t *lock)
> > +{
> > +	RISCV_FENCE(r, rw);
> > +	return !spin_lock_unlocked(*lock);
> >  }
> > 
> >  int spin_trylock(spinlock_t *lock)
> >  {
> > -	int tmp = 1, busy;
> > +	u32 inc = 1u << TICKET_SHIFT;
> > +	u32 mask = 0xffffu << TICKET_SHIFT;
> > +	u32 l0, tmp1, tmp2;
> > 
> >  	__asm__ __volatile__(
> > -		"	amoswap.w %0, %2, %1\n"
> > RISCV_ACQUIRE_BARRIER
> > -		: "=r"(busy), "+A"(lock->lock)
> > -		: "r"(tmp)
> > +		/* Get the current lock counters. */
> > +		"1:	lr.w.aq	%0, %3\n"
> > +		"	slliw	%2, %0, %6\n"
> > +		"	and	%1, %0, %5\n"
> > +		/* Is the lock free right now? */
> > +		"	bne	%1, %2, 2f\n"
> > +		"	addw	%0, %0, %4\n"
> > +		/* Acquire the lock. */
> > +		"	sc.w.rl	%0, %0, %3\n"
> > +		"	bnez	%0, 1b\n"
> > +		"2:"
> > +		: "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> > +		: "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
> >  		: "memory");
> > 
> > -	return !busy;
> > +	return !l0;
> >  }
> > 
> >  void spin_lock(spinlock_t *lock)
> >  {
> > -	while (1) {
> > -		if (spin_lock_check(lock))
> > -			continue;
> > +	u32 inc = 1u << TICKET_SHIFT;
> > +	u32 mask = 0xffffu << TICKET_SHIFT;
> > +	u32 l0, tmp1, tmp2;
> > 
> > -		if (spin_trylock(lock))
> > -			break;
> > -	}
> > +	__asm__ __volatile__(
> > +		/* Atomically increment the next ticket. */
> > +		"0:	lr.w.aq	%0, %3\n"
> > +		"	addw	%0, %0, %4\n"
> > +		"	sc.w.rl	%0, %0, %3\n"
> > +		"	bnez	%0, 0b\n"
> > +
> > +		/* Did we get the lock? */
> > +		"1:	slliw	%2, %0, %6\n"
> > +		"	and	%1, %0, %5\n"
> > +		"	beq	%1, %2, 2f\n"
> > +
> > +		/* No, let's spin on the lock. */
> > +		"	lr.w.aq	%0, %3\n"
> > +		"	j	1b\n"
> > +		"2:"
> > +		: "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> > +		: "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
> > +		: "memory");
> >  }
> > 
> >  void spin_unlock(spinlock_t *lock)
> >  {
> > -	__smp_store_release(&lock->lock, __RISCV_SPIN_UNLOCKED);
> > +	__smp_store_release(&lock->owner, lock->owner + 1);
> >  }
> > +
> > --
> > 2.30.2
Guo Ren April 5, 2021, 5:05 a.m. UTC | #6
On Sun, Apr 4, 2021 at 12:23 AM Christoph Muellner <cmuellner@linux.com> wrote:
>
> Test-and-set locks don't provide fairness and are non-deterministic
> (w.r.t. the prediction of the future holder of a lock).
> This can be a performance problem in case of lock contention.
>
> Let's change the implementation to use ticket locks, which guarantee
> a fair locking order (FIFO ordering).
>
> Ticket locks have two counters 'owner' and 'next'.
> The counter 'owner' holds the ticket number of the current lock owner.
> The counter 'next' holds the latest free ticket number.
>
> The lock is free if both counters are equal. To acquire the lock, the
> 'next' counter is atomically incremented to obtain a new ticket.
> The counter 'owner' is then polled until it becomes equal to the new
> ticket. To release a lock, one atomically increments the 'owner'
> counter.
>
> The implementation uses a 32-bit wide struct, which consists of
> two 16-bit counters (owner and next). This is inspired by similar
> spinlock implementations on other architectures.
>
> Note, that RISC-V lacks sub-word atomics, therefore we need to
> circumvent this limitation with additional instructions.
> This implementation aims to reduce the instructions between the
> LR/SC pairs to minimize the chances of failing SCs. The cost of
> this approach is, that we spill more registers than necessary.
>
> Signed-off-by: Christoph Muellner <cmuellner@linux.com>
> ---
>  include/sbi/riscv_locks.h | 33 ++++++++++++-------
>  lib/sbi/riscv_locks.c     | 67 +++++++++++++++++++++++++++++----------
>  2 files changed, 72 insertions(+), 28 deletions(-)
>
> diff --git a/include/sbi/riscv_locks.h b/include/sbi/riscv_locks.h
> index faa9676..59e6af9 100644
> --- a/include/sbi/riscv_locks.h
> +++ b/include/sbi/riscv_locks.h
> @@ -2,26 +2,37 @@
>   * SPDX-License-Identifier: BSD-2-Clause
>   *
>   * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> - *
> - * Authors:
> - *   Anup Patel <anup.patel@wdc.com>
> + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
>   */
>
>  #ifndef __RISCV_LOCKS_H__
>  #define __RISCV_LOCKS_H__
>
> +#include <sbi/sbi_types.h>
> +
> +#define TICKET_SHIFT   16
> +
>  typedef struct {
> -       volatile long lock;
> -} spinlock_t;
> +#ifdef __BIG_ENDIAN
> +       u16 next;
> +       u16 owner;
> +#else
> +       u16 owner;
> +       u16 next;
> +#endif
> +} __aligned(4) spinlock_t;
> +
> +#define __SPIN_LOCK_UNLOCKED   \
> +       (spinlock_t) { 0, 0 }
>
> -#define __RISCV_SPIN_UNLOCKED 0
> +#define SPIN_LOCK_INIT(x)      \
> +       x = __SPIN_LOCK_UNLOCKED
>
> -#define SPIN_LOCK_INIT(x) (x).lock = __RISCV_SPIN_UNLOCKED
> +#define SPIN_LOCK_INITIALIZER  \
> +       __SPIN_LOCK_UNLOCKED
>
> -#define SPIN_LOCK_INITIALIZER                  \
> -       {                                      \
> -               .lock = __RISCV_SPIN_UNLOCKED, \
> -       }
> +#define DEFINE_SPIN_LOCK(x)    \
> +       spinlock_t SPIN_LOCK_INIT(x)
>
>  int spin_lock_check(spinlock_t *lock);
>
> diff --git a/lib/sbi/riscv_locks.c b/lib/sbi/riscv_locks.c
> index 4d1d9c0..7d86742 100644
> --- a/lib/sbi/riscv_locks.c
> +++ b/lib/sbi/riscv_locks.c
> @@ -2,44 +2,77 @@
>   * SPDX-License-Identifier: BSD-2-Clause
>   *
>   * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> - *
> - * Authors:
> - *   Anup Patel <anup.patel@wdc.com>
> + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
>   */
>
>  #include <sbi/riscv_barrier.h>
>  #include <sbi/riscv_locks.h>
>
> -int spin_lock_check(spinlock_t *lock)
> +static inline int spin_lock_unlocked(spinlock_t lock)
>  {
> -       return (lock->lock == __RISCV_SPIN_UNLOCKED) ? 0 : 1;
> +        return lock.owner == lock.next;
> +}
> +
> +bool spin_lock_check(spinlock_t *lock)
> +{
> +       RISCV_FENCE(r, rw);
> +       return !spin_lock_unlocked(*lock);
>  }
>
>  int spin_trylock(spinlock_t *lock)
>  {
> -       int tmp = 1, busy;
> +       u32 inc = 1u << TICKET_SHIFT;
> +       u32 mask = 0xffffu << TICKET_SHIFT;
> +       u32 l0, tmp1, tmp2;
>
>         __asm__ __volatile__(
> -               "       amoswap.w %0, %2, %1\n" RISCV_ACQUIRE_BARRIER
> -               : "=r"(busy), "+A"(lock->lock)
> -               : "r"(tmp)
> +               /* Get the current lock counters. */
> +               "1:     lr.w.aq %0, %3\n"
> +               "       slliw   %2, %0, %6\n"
> +               "       and     %1, %0, %5\n"
> +               /* Is the lock free right now? */
> +               "       bne     %1, %2, 2f\n"
> +               "       addw    %0, %0, %4\n"
> +               /* Acquire the lock. */
> +               "       sc.w.rl %0, %0, %3\n"
> +               "       bnez    %0, 1b\n"
> +               "2:"
> +               : "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> +               : "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
>                 : "memory");
>
> -       return !busy;
> +       return !l0;
>  }
>
>  void spin_lock(spinlock_t *lock)
Please use inline for spin_lock, spin_try_lock, spin_unlock.

>  {
> -       while (1) {
> -               if (spin_lock_check(lock))
> -                       continue;
> +       u32 inc = 1u << TICKET_SHIFT;
> +       u32 mask = 0xffffu << TICKET_SHIFT;
> +       u32 l0, tmp1, tmp2;
>
> -               if (spin_trylock(lock))
> -                       break;
> -       }
> +       __asm__ __volatile__(
> +               /* Atomically increment the next ticket. */
> +               "0:     lr.w.aq %0, %3\n"
> +               "       addw    %0, %0, %4\n"
> +               "       sc.w.rl %0, %0, %3\n"
I think it's wrong for acquiring & releasing here. It should be:
lock - acquire
unlock - release

> +               "       bnez    %0, 0b\n"
> +
> +               /* Did we get the lock? */
> +               "1:     slliw   %2, %0, %6\n"
slliw only support for rv64, so you need use slli for rv32. ref:
https://lore.kernel.org/linux-riscv/1617201040-83905-4-git-send-email-guoren@kernel.org/T/#u

> +               "       and     %1, %0, %5\n"
> +               "       beq     %1, %2, 2f\n"
> +
> +               /* No, let's spin on the lock. */
> +               "       lr.w.aq %0, %3\n"
> +               "       j       1b\n"
> +               "2:"
> +               : "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> +               : "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
> +               : "memory");
>  }
>
>  void spin_unlock(spinlock_t *lock)
>  {
> -       __smp_store_release(&lock->lock, __RISCV_SPIN_UNLOCKED);
> +       __smp_store_release(&lock->owner, lock->owner + 1);
>  }
> +
> --
> 2.30.2
>
>
> --
> opensbi mailing list
> opensbi@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/opensbi
Guo Ren April 5, 2021, 5:39 a.m. UTC | #7
On Sun, Apr 4, 2021 at 2:05 PM Anup Patel <Anup.Patel@wdc.com> wrote:
>
>
>
> > -----Original Message-----
> > From: Christoph Muellner <cmuellner@linux.com>
> > Sent: 03 April 2021 21:53
> > To: opensbi@lists.infradead.org; Anup Patel <Anup.Patel@wdc.com>
> > Cc: Christoph Muellner <cmuellner@linux.com>
> > Subject: [PATCH] spinlocks: Replace test-and-set locks by ticket locks
> >
> > Test-and-set locks don't provide fairness and are non-deterministic (w.r.t.
> > the prediction of the future holder of a lock).
> > This can be a performance problem in case of lock contention.
> >
> > Let's change the implementation to use ticket locks, which guarantee a fair
> > locking order (FIFO ordering).
> >
> > Ticket locks have two counters 'owner' and 'next'.
> > The counter 'owner' holds the ticket number of the current lock owner.
> > The counter 'next' holds the latest free ticket number.
> >
> > The lock is free if both counters are equal. To acquire the lock, the 'next'
> > counter is atomically incremented to obtain a new ticket.
> > The counter 'owner' is then polled until it becomes equal to the new ticket.
> > To release a lock, one atomically increments the 'owner'
> > counter.
> >
> > The implementation uses a 32-bit wide struct, which consists of two 16-bit
> > counters (owner and next). This is inspired by similar spinlock
> > implementations on other architectures.
> >
> > Note, that RISC-V lacks sub-word atomics, therefore we need to circumvent
> > this limitation with additional instructions.
> > This implementation aims to reduce the instructions between the LR/SC pairs
> > to minimize the chances of failing SCs. The cost of this approach is, that we
> > spill more registers than necessary.
>
> The spin_trylock() can be tricky to implement with ticket approach. Since the
> spin_trylock() is currently not used anywhere in OpenSBI, I suggest:
>
> 1) We can drop the spin_trylock() API as separate patch
> 2) Implement classic ticket spinlock approach as described in Wikipedia
>     using AMOADD instructions and barriers.
Do you prefer AMOADD? I think it's a better idea (csky only has
lr.w/sc.w, so it's my habit :P), I will update the Linux ticket-lock
patch:
https://lore.kernel.org/linux-riscv/1617201040-83905-4-git-send-email-guoren@kernel.org/T/#u

with:

diff --git a/arch/riscv/include/asm/spinlock.h
b/arch/riscv/include/asm/spinlock.h
index 90b7eaa950cf..435286ad342b 100644
--- a/arch/riscv/include/asm/spinlock.h
+++ b/arch/riscv/include/asm/spinlock.h
@@ -22,15 +22,10 @@
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
        arch_spinlock_t lockval;
-       u32 tmp;

        asm volatile (
-               "1:     lr.w    %0, %2          \n"
-               "       mv      %1, %0          \n"
-               "       addw    %0, %0, %3      \n"
-               "       sc.w    %0, %0, %2      \n"
-               "       bnez    %0, 1b          \n"
-               : "=&r" (tmp), "=&r" (lockval), "+A" (lock->lock)
+               "   amoadd.w    %0, %2, %1      \n"
+               : "=&r" (lockval), "+A" (lock->lock)
                : "r" (1 << TICKET_NEXT)
                : "memory");

Thx for the tip :)

> 3) We don't need to pack next_ticket and owner into one word because
>      these can be separate "unsigned long" in spinlock_t. I believe that
>      this packing only helps in implementing spin_trylock(), Right ??
>
> Regards,
> Anup
>
> >
> > Signed-off-by: Christoph Muellner <cmuellner@linux.com>
> > ---
> >  include/sbi/riscv_locks.h | 33 ++++++++++++-------
> >  lib/sbi/riscv_locks.c     | 67 +++++++++++++++++++++++++++++----------
> >  2 files changed, 72 insertions(+), 28 deletions(-)
> >
> > diff --git a/include/sbi/riscv_locks.h b/include/sbi/riscv_locks.h index
> > faa9676..59e6af9 100644
> > --- a/include/sbi/riscv_locks.h
> > +++ b/include/sbi/riscv_locks.h
> > @@ -2,26 +2,37 @@
> >   * SPDX-License-Identifier: BSD-2-Clause
> >   *
> >   * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> > - *
> > - * Authors:
> > - *   Anup Patel <anup.patel@wdc.com>
> > + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
> >   */
> >
> >  #ifndef __RISCV_LOCKS_H__
> >  #define __RISCV_LOCKS_H__
> >
> > +#include <sbi/sbi_types.h>
> > +
> > +#define TICKET_SHIFT 16
> > +
> >  typedef struct {
> > -     volatile long lock;
> > -} spinlock_t;
> > +#ifdef __BIG_ENDIAN
> > +       u16 next;
> > +       u16 owner;
> > +#else
> > +       u16 owner;
> > +       u16 next;
> > +#endif
> > +} __aligned(4) spinlock_t;
> > +
> > +#define __SPIN_LOCK_UNLOCKED \
> > +     (spinlock_t) { 0, 0 }
> >
> > -#define __RISCV_SPIN_UNLOCKED 0
> > +#define SPIN_LOCK_INIT(x)    \
> > +     x = __SPIN_LOCK_UNLOCKED
> >
> > -#define SPIN_LOCK_INIT(x) (x).lock = __RISCV_SPIN_UNLOCKED
> > +#define SPIN_LOCK_INITIALIZER        \
> > +     __SPIN_LOCK_UNLOCKED
> >
> > -#define SPIN_LOCK_INITIALIZER                  \
> > -     {                                      \
> > -             .lock = __RISCV_SPIN_UNLOCKED, \
> > -     }
> > +#define DEFINE_SPIN_LOCK(x)  \
> > +     spinlock_t SPIN_LOCK_INIT(x)
> >
> >  int spin_lock_check(spinlock_t *lock);
> >
> > diff --git a/lib/sbi/riscv_locks.c b/lib/sbi/riscv_locks.c index 4d1d9c0..7d86742
> > 100644
> > --- a/lib/sbi/riscv_locks.c
> > +++ b/lib/sbi/riscv_locks.c
> > @@ -2,44 +2,77 @@
> >   * SPDX-License-Identifier: BSD-2-Clause
> >   *
> >   * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> > - *
> > - * Authors:
> > - *   Anup Patel <anup.patel@wdc.com>
> > + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
> >   */
> >
> >  #include <sbi/riscv_barrier.h>
> >  #include <sbi/riscv_locks.h>
> >
> > -int spin_lock_check(spinlock_t *lock)
> > +static inline int spin_lock_unlocked(spinlock_t lock)
> >  {
> > -     return (lock->lock == __RISCV_SPIN_UNLOCKED) ? 0 : 1;
> > +        return lock.owner == lock.next; }
> > +
> > +bool spin_lock_check(spinlock_t *lock)
> > +{
> > +     RISCV_FENCE(r, rw);
> > +     return !spin_lock_unlocked(*lock);
> >  }
> >
> >  int spin_trylock(spinlock_t *lock)
> >  {
> > -     int tmp = 1, busy;
> > +     u32 inc = 1u << TICKET_SHIFT;
> > +     u32 mask = 0xffffu << TICKET_SHIFT;
> > +     u32 l0, tmp1, tmp2;
> >
> >       __asm__ __volatile__(
> > -             "       amoswap.w %0, %2, %1\n"
> > RISCV_ACQUIRE_BARRIER
> > -             : "=r"(busy), "+A"(lock->lock)
> > -             : "r"(tmp)
> > +             /* Get the current lock counters. */
> > +             "1:     lr.w.aq %0, %3\n"
> > +             "       slliw   %2, %0, %6\n"
> > +             "       and     %1, %0, %5\n"
> > +             /* Is the lock free right now? */
> > +             "       bne     %1, %2, 2f\n"
> > +             "       addw    %0, %0, %4\n"
> > +             /* Acquire the lock. */
> > +             "       sc.w.rl %0, %0, %3\n"
> > +             "       bnez    %0, 1b\n"
> > +             "2:"
> > +             : "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> > +             : "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
> >               : "memory");
> >
> > -     return !busy;
> > +     return !l0;
> >  }
> >
> >  void spin_lock(spinlock_t *lock)
> >  {
> > -     while (1) {
> > -             if (spin_lock_check(lock))
> > -                     continue;
> > +     u32 inc = 1u << TICKET_SHIFT;
> > +     u32 mask = 0xffffu << TICKET_SHIFT;
> > +     u32 l0, tmp1, tmp2;
> >
> > -             if (spin_trylock(lock))
> > -                     break;
> > -     }
> > +     __asm__ __volatile__(
> > +             /* Atomically increment the next ticket. */
> > +             "0:     lr.w.aq %0, %3\n"
> > +             "       addw    %0, %0, %4\n"
> > +             "       sc.w.rl %0, %0, %3\n"
> > +             "       bnez    %0, 0b\n"
> > +
> > +             /* Did we get the lock? */
> > +             "1:     slliw   %2, %0, %6\n"
> > +             "       and     %1, %0, %5\n"
> > +             "       beq     %1, %2, 2f\n"
> > +
> > +             /* No, let's spin on the lock. */
> > +             "       lr.w.aq %0, %3\n"
> > +             "       j       1b\n"
> > +             "2:"
> > +             : "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> > +             : "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
> > +             : "memory");
> >  }
> >
> >  void spin_unlock(spinlock_t *lock)
> >  {
> > -     __smp_store_release(&lock->lock, __RISCV_SPIN_UNLOCKED);
> > +     __smp_store_release(&lock->owner, lock->owner + 1);
> >  }
> > +
> > --
> > 2.30.2
>
> --
> opensbi mailing list
> opensbi@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/opensbi
Anup Patel April 5, 2021, 6:03 a.m. UTC | #8
> -----Original Message-----
> From: Guo Ren <guoren@kernel.org>
> Sent: 05 April 2021 11:09
> To: Anup Patel <Anup.Patel@wdc.com>
> Cc: Christoph Muellner <cmuellner@linux.com>; opensbi@lists.infradead.org
> Subject: Re: [PATCH] spinlocks: Replace test-and-set locks by ticket locks
> 
> On Sun, Apr 4, 2021 at 2:05 PM Anup Patel <Anup.Patel@wdc.com> wrote:
> >
> >
> >
> > > -----Original Message-----
> > > From: Christoph Muellner <cmuellner@linux.com>
> > > Sent: 03 April 2021 21:53
> > > To: opensbi@lists.infradead.org; Anup Patel <Anup.Patel@wdc.com>
> > > Cc: Christoph Muellner <cmuellner@linux.com>
> > > Subject: [PATCH] spinlocks: Replace test-and-set locks by ticket
> > > locks
> > >
> > > Test-and-set locks don't provide fairness and are non-deterministic
> (w.r.t.
> > > the prediction of the future holder of a lock).
> > > This can be a performance problem in case of lock contention.
> > >
> > > Let's change the implementation to use ticket locks, which guarantee
> > > a fair locking order (FIFO ordering).
> > >
> > > Ticket locks have two counters 'owner' and 'next'.
> > > The counter 'owner' holds the ticket number of the current lock owner.
> > > The counter 'next' holds the latest free ticket number.
> > >
> > > The lock is free if both counters are equal. To acquire the lock, the 'next'
> > > counter is atomically incremented to obtain a new ticket.
> > > The counter 'owner' is then polled until it becomes equal to the new
> ticket.
> > > To release a lock, one atomically increments the 'owner'
> > > counter.
> > >
> > > The implementation uses a 32-bit wide struct, which consists of two
> > > 16-bit counters (owner and next). This is inspired by similar
> > > spinlock implementations on other architectures.
> > >
> > > Note, that RISC-V lacks sub-word atomics, therefore we need to
> > > circumvent this limitation with additional instructions.
> > > This implementation aims to reduce the instructions between the
> > > LR/SC pairs to minimize the chances of failing SCs. The cost of this
> > > approach is, that we spill more registers than necessary.
> >
> > The spin_trylock() can be tricky to implement with ticket approach.
> > Since the
> > spin_trylock() is currently not used anywhere in OpenSBI, I suggest:
> >
> > 1) We can drop the spin_trylock() API as separate patch
> > 2) Implement classic ticket spinlock approach as described in Wikipedia
> >     using AMOADD instructions and barriers.
> Do you prefer AMOADD? I think it's a better idea (csky only has lr.w/sc.w, so
> it's my habit :P), I will update the Linux ticket-lock
> patch:
> https://lore.kernel.org/linux-riscv/1617201040-83905-4-git-send-email-
> guoren@kernel.org/T/#u

It's generally easier to think in-terms of LR/SC on any architecture but the
AMO instructions always scale better in-terms of HW implementation.

> 
> with:
> 
> diff --git a/arch/riscv/include/asm/spinlock.h
> b/arch/riscv/include/asm/spinlock.h
> index 90b7eaa950cf..435286ad342b 100644
> --- a/arch/riscv/include/asm/spinlock.h
> +++ b/arch/riscv/include/asm/spinlock.h
> @@ -22,15 +22,10 @@
>  static inline void arch_spin_lock(arch_spinlock_t *lock)  {
>         arch_spinlock_t lockval;
> -       u32 tmp;
> 
>         asm volatile (
> -               "1:     lr.w    %0, %2          \n"
> -               "       mv      %1, %0          \n"
> -               "       addw    %0, %0, %3      \n"
> -               "       sc.w    %0, %0, %2      \n"
> -               "       bnez    %0, 1b          \n"
> -               : "=&r" (tmp), "=&r" (lockval), "+A" (lock->lock)
> +               "   amoadd.w    %0, %2, %1      \n"
> +               : "=&r" (lockval), "+A" (lock->lock)
>                 : "r" (1 << TICKET_NEXT)
>                 : "memory");
> 
> Thx for the tip :)

I was going to comment in LKML after we had it working here but it's good
that you already noticed it.

I did not think much about spin_trylock() using atomic instructions but it
will be nice if you can attempt that as well in your Linux patches.

> 
> > 3) We don't need to pack next_ticket and owner into one word because
> >      these can be separate "unsigned long" in spinlock_t. I believe that
> >      this packing only helps in implementing spin_trylock(), Right ??
> >
> > Regards,
> > Anup
> >
> > >
> > > Signed-off-by: Christoph Muellner <cmuellner@linux.com>
> > > ---
> > >  include/sbi/riscv_locks.h | 33 ++++++++++++-------
> > >  lib/sbi/riscv_locks.c     | 67 +++++++++++++++++++++++++++++----------
> > >  2 files changed, 72 insertions(+), 28 deletions(-)
> > >
> > > diff --git a/include/sbi/riscv_locks.h b/include/sbi/riscv_locks.h
> > > index
> > > faa9676..59e6af9 100644
> > > --- a/include/sbi/riscv_locks.h
> > > +++ b/include/sbi/riscv_locks.h
> > > @@ -2,26 +2,37 @@
> > >   * SPDX-License-Identifier: BSD-2-Clause
> > >   *
> > >   * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> > > - *
> > > - * Authors:
> > > - *   Anup Patel <anup.patel@wdc.com>
> > > + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
> > >   */
> > >
> > >  #ifndef __RISCV_LOCKS_H__
> > >  #define __RISCV_LOCKS_H__
> > >
> > > +#include <sbi/sbi_types.h>
> > > +
> > > +#define TICKET_SHIFT 16
> > > +
> > >  typedef struct {
> > > -     volatile long lock;
> > > -} spinlock_t;
> > > +#ifdef __BIG_ENDIAN
> > > +       u16 next;
> > > +       u16 owner;
> > > +#else
> > > +       u16 owner;
> > > +       u16 next;
> > > +#endif
> > > +} __aligned(4) spinlock_t;
> > > +
> > > +#define __SPIN_LOCK_UNLOCKED \
> > > +     (spinlock_t) { 0, 0 }
> > >
> > > -#define __RISCV_SPIN_UNLOCKED 0
> > > +#define SPIN_LOCK_INIT(x)    \
> > > +     x = __SPIN_LOCK_UNLOCKED
> > >
> > > -#define SPIN_LOCK_INIT(x) (x).lock = __RISCV_SPIN_UNLOCKED
> > > +#define SPIN_LOCK_INITIALIZER        \
> > > +     __SPIN_LOCK_UNLOCKED
> > >
> > > -#define SPIN_LOCK_INITIALIZER                  \
> > > -     {                                      \
> > > -             .lock = __RISCV_SPIN_UNLOCKED, \
> > > -     }
> > > +#define DEFINE_SPIN_LOCK(x)  \
> > > +     spinlock_t SPIN_LOCK_INIT(x)
> > >
> > >  int spin_lock_check(spinlock_t *lock);
> > >
> > > diff --git a/lib/sbi/riscv_locks.c b/lib/sbi/riscv_locks.c index
> > > 4d1d9c0..7d86742
> > > 100644
> > > --- a/lib/sbi/riscv_locks.c
> > > +++ b/lib/sbi/riscv_locks.c
> > > @@ -2,44 +2,77 @@
> > >   * SPDX-License-Identifier: BSD-2-Clause
> > >   *
> > >   * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> > > - *
> > > - * Authors:
> > > - *   Anup Patel <anup.patel@wdc.com>
> > > + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
> > >   */
> > >
> > >  #include <sbi/riscv_barrier.h>
> > >  #include <sbi/riscv_locks.h>
> > >
> > > -int spin_lock_check(spinlock_t *lock)
> > > +static inline int spin_lock_unlocked(spinlock_t lock)
> > >  {
> > > -     return (lock->lock == __RISCV_SPIN_UNLOCKED) ? 0 : 1;
> > > +        return lock.owner == lock.next; }
> > > +
> > > +bool spin_lock_check(spinlock_t *lock) {
> > > +     RISCV_FENCE(r, rw);
> > > +     return !spin_lock_unlocked(*lock);
> > >  }
> > >
> > >  int spin_trylock(spinlock_t *lock)
> > >  {
> > > -     int tmp = 1, busy;
> > > +     u32 inc = 1u << TICKET_SHIFT;
> > > +     u32 mask = 0xffffu << TICKET_SHIFT;
> > > +     u32 l0, tmp1, tmp2;
> > >
> > >       __asm__ __volatile__(
> > > -             "       amoswap.w %0, %2, %1\n"
> > > RISCV_ACQUIRE_BARRIER
> > > -             : "=r"(busy), "+A"(lock->lock)
> > > -             : "r"(tmp)
> > > +             /* Get the current lock counters. */
> > > +             "1:     lr.w.aq %0, %3\n"
> > > +             "       slliw   %2, %0, %6\n"
> > > +             "       and     %1, %0, %5\n"
> > > +             /* Is the lock free right now? */
> > > +             "       bne     %1, %2, 2f\n"
> > > +             "       addw    %0, %0, %4\n"
> > > +             /* Acquire the lock. */
> > > +             "       sc.w.rl %0, %0, %3\n"
> > > +             "       bnez    %0, 1b\n"
> > > +             "2:"
> > > +             : "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> > > +             : "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
> > >               : "memory");
> > >
> > > -     return !busy;
> > > +     return !l0;
> > >  }
> > >
> > >  void spin_lock(spinlock_t *lock)
> > >  {
> > > -     while (1) {
> > > -             if (spin_lock_check(lock))
> > > -                     continue;
> > > +     u32 inc = 1u << TICKET_SHIFT;
> > > +     u32 mask = 0xffffu << TICKET_SHIFT;
> > > +     u32 l0, tmp1, tmp2;
> > >
> > > -             if (spin_trylock(lock))
> > > -                     break;
> > > -     }
> > > +     __asm__ __volatile__(
> > > +             /* Atomically increment the next ticket. */
> > > +             "0:     lr.w.aq %0, %3\n"
> > > +             "       addw    %0, %0, %4\n"
> > > +             "       sc.w.rl %0, %0, %3\n"
> > > +             "       bnez    %0, 0b\n"
> > > +
> > > +             /* Did we get the lock? */
> > > +             "1:     slliw   %2, %0, %6\n"
> > > +             "       and     %1, %0, %5\n"
> > > +             "       beq     %1, %2, 2f\n"
> > > +
> > > +             /* No, let's spin on the lock. */
> > > +             "       lr.w.aq %0, %3\n"
> > > +             "       j       1b\n"
> > > +             "2:"
> > > +             : "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> > > +             : "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
> > > +             : "memory");
> > >  }
> > >
> > >  void spin_unlock(spinlock_t *lock)
> > >  {
> > > -     __smp_store_release(&lock->lock, __RISCV_SPIN_UNLOCKED);
> > > +     __smp_store_release(&lock->owner, lock->owner + 1);
> > >  }
> > > +
> > > --
> > > 2.30.2
> >
> > --
> > opensbi mailing list
> > opensbi@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/opensbi
> 
> 
> 
> --
> Best Regards
>  Guo Ren
> 
> ML: https://lore.kernel.org/linux-csky/

Regards,
Anup
Guo Ren April 5, 2021, 6:52 a.m. UTC | #9
On Mon, Apr 5, 2021 at 2:03 PM Anup Patel <Anup.Patel@wdc.com> wrote:
>
>
>
> > -----Original Message-----
> > From: Guo Ren <guoren@kernel.org>
> > Sent: 05 April 2021 11:09
> > To: Anup Patel <Anup.Patel@wdc.com>
> > Cc: Christoph Muellner <cmuellner@linux.com>; opensbi@lists.infradead.org
> > Subject: Re: [PATCH] spinlocks: Replace test-and-set locks by ticket locks
> >
> > On Sun, Apr 4, 2021 at 2:05 PM Anup Patel <Anup.Patel@wdc.com> wrote:
> > >
> > >
> > >
> > > > -----Original Message-----
> > > > From: Christoph Muellner <cmuellner@linux.com>
> > > > Sent: 03 April 2021 21:53
> > > > To: opensbi@lists.infradead.org; Anup Patel <Anup.Patel@wdc.com>
> > > > Cc: Christoph Muellner <cmuellner@linux.com>
> > > > Subject: [PATCH] spinlocks: Replace test-and-set locks by ticket
> > > > locks
> > > >
> > > > Test-and-set locks don't provide fairness and are non-deterministic
> > (w.r.t.
> > > > the prediction of the future holder of a lock).
> > > > This can be a performance problem in case of lock contention.
> > > >
> > > > Let's change the implementation to use ticket locks, which guarantee
> > > > a fair locking order (FIFO ordering).
> > > >
> > > > Ticket locks have two counters 'owner' and 'next'.
> > > > The counter 'owner' holds the ticket number of the current lock owner.
> > > > The counter 'next' holds the latest free ticket number.
> > > >
> > > > The lock is free if both counters are equal. To acquire the lock, the 'next'
> > > > counter is atomically incremented to obtain a new ticket.
> > > > The counter 'owner' is then polled until it becomes equal to the new
> > ticket.
> > > > To release a lock, one atomically increments the 'owner'
> > > > counter.
> > > >
> > > > The implementation uses a 32-bit wide struct, which consists of two
> > > > 16-bit counters (owner and next). This is inspired by similar
> > > > spinlock implementations on other architectures.
> > > >
> > > > Note, that RISC-V lacks sub-word atomics, therefore we need to
> > > > circumvent this limitation with additional instructions.
> > > > This implementation aims to reduce the instructions between the
> > > > LR/SC pairs to minimize the chances of failing SCs. The cost of this
> > > > approach is, that we spill more registers than necessary.
> > >
> > > The spin_trylock() can be tricky to implement with ticket approach.
> > > Since the
> > > spin_trylock() is currently not used anywhere in OpenSBI, I suggest:
> > >
> > > 1) We can drop the spin_trylock() API as separate patch
> > > 2) Implement classic ticket spinlock approach as described in Wikipedia
> > >     using AMOADD instructions and barriers.
> > Do you prefer AMOADD? I think it's a better idea (csky only has lr.w/sc.w, so
> > it's my habit :P), I will update the Linux ticket-lock
> > patch:
> > https://lore.kernel.org/linux-riscv/1617201040-83905-4-git-send-email-
> > guoren@kernel.org/T/#u
>
> It's generally easier to think in-terms of LR/SC on any architecture but the
> AMO instructions always scale better in-terms of HW implementation.
>
> >
> > with:
> >
> > diff --git a/arch/riscv/include/asm/spinlock.h
> > b/arch/riscv/include/asm/spinlock.h
> > index 90b7eaa950cf..435286ad342b 100644
> > --- a/arch/riscv/include/asm/spinlock.h
> > +++ b/arch/riscv/include/asm/spinlock.h
> > @@ -22,15 +22,10 @@
> >  static inline void arch_spin_lock(arch_spinlock_t *lock)  {
> >         arch_spinlock_t lockval;
> > -       u32 tmp;
> >
> >         asm volatile (
> > -               "1:     lr.w    %0, %2          \n"
> > -               "       mv      %1, %0          \n"
> > -               "       addw    %0, %0, %3      \n"
> > -               "       sc.w    %0, %0, %2      \n"
> > -               "       bnez    %0, 1b          \n"
> > -               : "=&r" (tmp), "=&r" (lockval), "+A" (lock->lock)
> > +               "   amoadd.w    %0, %2, %1      \n"
> > +               : "=&r" (lockval), "+A" (lock->lock)
> >                 : "r" (1 << TICKET_NEXT)
> >                 : "memory");
> >
> > Thx for the tip :)
>
> I was going to comment in LKML after we had it working here but it's good
> that you already noticed it.
>
> I did not think much about spin_trylock() using atomic instructions but it
> will be nice if you can attempt that as well in your Linux patches.
All riscv amo instructions will write to the target address
unconditionally and there is no cas instruction in riscv A-extension.
So "lr/sc" is the only way for riscv to implement trylock.

look the codes below:
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
        u32 tmp, contended, res;

        do {
                asm volatile (
                "       lr.w    %0, %3          \n"
                __ASM_SRLIW    "%1, %0, %5      \n"
                __ASM_SLLIW    "%2, %0, %5      \n"
                "       or      %1, %2, %1      \n"
                "       li      %2, 0           \n"
                "       sub     %1, %1, %0      \n"
                "       bnez    %1, 1f          \n"
                "       addw    %0, %0, %4      \n"
                "       sc.w    %2, %0, %3      \n"
                "1:                             \n"
                : "=&r" (tmp), "=&r" (contended), "=&r" (res),
                  "+A" (lock->lock)
                : "r" (1 << TICKET_NEXT), "I" (TICKET_NEXT)
                : "memory");
        } while (res);

        if (!contended)
                __atomic_acquire_fence();

        return !contended;
}

When lock is up, trylock'll fail and the instructions progress is:
                "       lr.w    %0, %3          \n"
                "       srliw    "%1, %0, %5      \n"
                "       slliw    "%2, %0, %5      \n"
                "       or      %1, %2, %1      \n"
                "       li      %2, 0           \n"
                "       sub     %1, %1, %0      \n"
                "       bnez    %1, 1f          \n"
                "1:"
                return !contented;

1 lr.w + 5 ALUs + branch = 7 instructions
The most inconvenient about RISC-V is that there is no rotate shift
instruction, I must use more instructions and registers to implement
compare operation for the owner:next-ticket.

>
> >
> > > 3) We don't need to pack next_ticket and owner into one word because
> > >      these can be separate "unsigned long" in spinlock_t. I believe that
> > >      this packing only helps in implementing spin_trylock(), Right ??
> > >
> > > Regards,
> > > Anup
> > >
> > > >
> > > > Signed-off-by: Christoph Muellner <cmuellner@linux.com>
> > > > ---
> > > >  include/sbi/riscv_locks.h | 33 ++++++++++++-------
> > > >  lib/sbi/riscv_locks.c     | 67 +++++++++++++++++++++++++++++----------
> > > >  2 files changed, 72 insertions(+), 28 deletions(-)
> > > >
> > > > diff --git a/include/sbi/riscv_locks.h b/include/sbi/riscv_locks.h
> > > > index
> > > > faa9676..59e6af9 100644
> > > > --- a/include/sbi/riscv_locks.h
> > > > +++ b/include/sbi/riscv_locks.h
> > > > @@ -2,26 +2,37 @@
> > > >   * SPDX-License-Identifier: BSD-2-Clause
> > > >   *
> > > >   * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> > > > - *
> > > > - * Authors:
> > > > - *   Anup Patel <anup.patel@wdc.com>
> > > > + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
> > > >   */
> > > >
> > > >  #ifndef __RISCV_LOCKS_H__
> > > >  #define __RISCV_LOCKS_H__
> > > >
> > > > +#include <sbi/sbi_types.h>
> > > > +
> > > > +#define TICKET_SHIFT 16
> > > > +
> > > >  typedef struct {
> > > > -     volatile long lock;
> > > > -} spinlock_t;
> > > > +#ifdef __BIG_ENDIAN
> > > > +       u16 next;
> > > > +       u16 owner;
> > > > +#else
> > > > +       u16 owner;
> > > > +       u16 next;
> > > > +#endif
> > > > +} __aligned(4) spinlock_t;
> > > > +
> > > > +#define __SPIN_LOCK_UNLOCKED \
> > > > +     (spinlock_t) { 0, 0 }
> > > >
> > > > -#define __RISCV_SPIN_UNLOCKED 0
> > > > +#define SPIN_LOCK_INIT(x)    \
> > > > +     x = __SPIN_LOCK_UNLOCKED
> > > >
> > > > -#define SPIN_LOCK_INIT(x) (x).lock = __RISCV_SPIN_UNLOCKED
> > > > +#define SPIN_LOCK_INITIALIZER        \
> > > > +     __SPIN_LOCK_UNLOCKED
> > > >
> > > > -#define SPIN_LOCK_INITIALIZER                  \
> > > > -     {                                      \
> > > > -             .lock = __RISCV_SPIN_UNLOCKED, \
> > > > -     }
> > > > +#define DEFINE_SPIN_LOCK(x)  \
> > > > +     spinlock_t SPIN_LOCK_INIT(x)
> > > >
> > > >  int spin_lock_check(spinlock_t *lock);
> > > >
> > > > diff --git a/lib/sbi/riscv_locks.c b/lib/sbi/riscv_locks.c index
> > > > 4d1d9c0..7d86742
> > > > 100644
> > > > --- a/lib/sbi/riscv_locks.c
> > > > +++ b/lib/sbi/riscv_locks.c
> > > > @@ -2,44 +2,77 @@
> > > >   * SPDX-License-Identifier: BSD-2-Clause
> > > >   *
> > > >   * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> > > > - *
> > > > - * Authors:
> > > > - *   Anup Patel <anup.patel@wdc.com>
> > > > + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
> > > >   */
> > > >
> > > >  #include <sbi/riscv_barrier.h>
> > > >  #include <sbi/riscv_locks.h>
> > > >
> > > > -int spin_lock_check(spinlock_t *lock)
> > > > +static inline int spin_lock_unlocked(spinlock_t lock)
> > > >  {
> > > > -     return (lock->lock == __RISCV_SPIN_UNLOCKED) ? 0 : 1;
> > > > +        return lock.owner == lock.next; }
> > > > +
> > > > +bool spin_lock_check(spinlock_t *lock) {
> > > > +     RISCV_FENCE(r, rw);
> > > > +     return !spin_lock_unlocked(*lock);
> > > >  }
> > > >
> > > >  int spin_trylock(spinlock_t *lock)
> > > >  {
> > > > -     int tmp = 1, busy;
> > > > +     u32 inc = 1u << TICKET_SHIFT;
> > > > +     u32 mask = 0xffffu << TICKET_SHIFT;
> > > > +     u32 l0, tmp1, tmp2;
> > > >
> > > >       __asm__ __volatile__(
> > > > -             "       amoswap.w %0, %2, %1\n"
> > > > RISCV_ACQUIRE_BARRIER
> > > > -             : "=r"(busy), "+A"(lock->lock)
> > > > -             : "r"(tmp)
> > > > +             /* Get the current lock counters. */
> > > > +             "1:     lr.w.aq %0, %3\n"
> > > > +             "       slliw   %2, %0, %6\n"
> > > > +             "       and     %1, %0, %5\n"
> > > > +             /* Is the lock free right now? */
> > > > +             "       bne     %1, %2, 2f\n"
> > > > +             "       addw    %0, %0, %4\n"
> > > > +             /* Acquire the lock. */
> > > > +             "       sc.w.rl %0, %0, %3\n"
> > > > +             "       bnez    %0, 1b\n"
> > > > +             "2:"
> > > > +             : "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> > > > +             : "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
> > > >               : "memory");
> > > >
> > > > -     return !busy;
> > > > +     return !l0;
> > > >  }
> > > >
> > > >  void spin_lock(spinlock_t *lock)
> > > >  {
> > > > -     while (1) {
> > > > -             if (spin_lock_check(lock))
> > > > -                     continue;
> > > > +     u32 inc = 1u << TICKET_SHIFT;
> > > > +     u32 mask = 0xffffu << TICKET_SHIFT;
> > > > +     u32 l0, tmp1, tmp2;
> > > >
> > > > -             if (spin_trylock(lock))
> > > > -                     break;
> > > > -     }
> > > > +     __asm__ __volatile__(
> > > > +             /* Atomically increment the next ticket. */
> > > > +             "0:     lr.w.aq %0, %3\n"
> > > > +             "       addw    %0, %0, %4\n"
> > > > +             "       sc.w.rl %0, %0, %3\n"
> > > > +             "       bnez    %0, 0b\n"
> > > > +
> > > > +             /* Did we get the lock? */
> > > > +             "1:     slliw   %2, %0, %6\n"
> > > > +             "       and     %1, %0, %5\n"
> > > > +             "       beq     %1, %2, 2f\n"
> > > > +
> > > > +             /* No, let's spin on the lock. */
> > > > +             "       lr.w.aq %0, %3\n"
> > > > +             "       j       1b\n"
> > > > +             "2:"
> > > > +             : "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> > > > +             : "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
> > > > +             : "memory");
> > > >  }
> > > >
> > > >  void spin_unlock(spinlock_t *lock)
> > > >  {
> > > > -     __smp_store_release(&lock->lock, __RISCV_SPIN_UNLOCKED);
> > > > +     __smp_store_release(&lock->owner, lock->owner + 1);
> > > >  }
> > > > +
> > > > --
> > > > 2.30.2
> > >
> > > --
> > > opensbi mailing list
> > > opensbi@lists.infradead.org
> > > http://lists.infradead.org/mailman/listinfo/opensbi
> >
> >
> >
> > --
> > Best Regards
> >  Guo Ren
> >
> > ML: https://lore.kernel.org/linux-csky/
>
> Regards,
> Anup
Anup Patel April 5, 2021, 7:45 a.m. UTC | #10
> -----Original Message-----
> From: Guo Ren <guoren@kernel.org>
> Sent: 05 April 2021 12:23
> To: Anup Patel <Anup.Patel@wdc.com>
> Cc: Christoph Muellner <cmuellner@linux.com>; opensbi@lists.infradead.org
> Subject: Re: [PATCH] spinlocks: Replace test-and-set locks by ticket locks
> 
> On Mon, Apr 5, 2021 at 2:03 PM Anup Patel <Anup.Patel@wdc.com> wrote:
> >
> >
> >
> > > -----Original Message-----
> > > From: Guo Ren <guoren@kernel.org>
> > > Sent: 05 April 2021 11:09
> > > To: Anup Patel <Anup.Patel@wdc.com>
> > > Cc: Christoph Muellner <cmuellner@linux.com>;
> > > opensbi@lists.infradead.org
> > > Subject: Re: [PATCH] spinlocks: Replace test-and-set locks by ticket
> > > locks
> > >
> > > On Sun, Apr 4, 2021 at 2:05 PM Anup Patel <Anup.Patel@wdc.com>
> wrote:
> > > >
> > > >
> > > >
> > > > > -----Original Message-----
> > > > > From: Christoph Muellner <cmuellner@linux.com>
> > > > > Sent: 03 April 2021 21:53
> > > > > To: opensbi@lists.infradead.org; Anup Patel <Anup.Patel@wdc.com>
> > > > > Cc: Christoph Muellner <cmuellner@linux.com>
> > > > > Subject: [PATCH] spinlocks: Replace test-and-set locks by ticket
> > > > > locks
> > > > >
> > > > > Test-and-set locks don't provide fairness and are
> > > > > non-deterministic
> > > (w.r.t.
> > > > > the prediction of the future holder of a lock).
> > > > > This can be a performance problem in case of lock contention.
> > > > >
> > > > > Let's change the implementation to use ticket locks, which
> > > > > guarantee a fair locking order (FIFO ordering).
> > > > >
> > > > > Ticket locks have two counters 'owner' and 'next'.
> > > > > The counter 'owner' holds the ticket number of the current lock
> owner.
> > > > > The counter 'next' holds the latest free ticket number.
> > > > >
> > > > > The lock is free if both counters are equal. To acquire the lock, the
> 'next'
> > > > > counter is atomically incremented to obtain a new ticket.
> > > > > The counter 'owner' is then polled until it becomes equal to the
> > > > > new
> > > ticket.
> > > > > To release a lock, one atomically increments the 'owner'
> > > > > counter.
> > > > >
> > > > > The implementation uses a 32-bit wide struct, which consists of
> > > > > two 16-bit counters (owner and next). This is inspired by
> > > > > similar spinlock implementations on other architectures.
> > > > >
> > > > > Note, that RISC-V lacks sub-word atomics, therefore we need to
> > > > > circumvent this limitation with additional instructions.
> > > > > This implementation aims to reduce the instructions between the
> > > > > LR/SC pairs to minimize the chances of failing SCs. The cost of
> > > > > this approach is, that we spill more registers than necessary.
> > > >
> > > > The spin_trylock() can be tricky to implement with ticket approach.
> > > > Since the
> > > > spin_trylock() is currently not used anywhere in OpenSBI, I suggest:
> > > >
> > > > 1) We can drop the spin_trylock() API as separate patch
> > > > 2) Implement classic ticket spinlock approach as described in Wikipedia
> > > >     using AMOADD instructions and barriers.
> > > Do you prefer AMOADD? I think it's a better idea (csky only has
> > > lr.w/sc.w, so it's my habit :P), I will update the Linux ticket-lock
> > > patch:
> > > https://lore.kernel.org/linux-riscv/1617201040-83905-4-git-send-emai
> > > l-
> > > guoren@kernel.org/T/#u
> >
> > It's generally easier to think in-terms of LR/SC on any architecture
> > but the AMO instructions always scale better in-terms of HW
> implementation.
> >
> > >
> > > with:
> > >
> > > diff --git a/arch/riscv/include/asm/spinlock.h
> > > b/arch/riscv/include/asm/spinlock.h
> > > index 90b7eaa950cf..435286ad342b 100644
> > > --- a/arch/riscv/include/asm/spinlock.h
> > > +++ b/arch/riscv/include/asm/spinlock.h
> > > @@ -22,15 +22,10 @@
> > >  static inline void arch_spin_lock(arch_spinlock_t *lock)  {
> > >         arch_spinlock_t lockval;
> > > -       u32 tmp;
> > >
> > >         asm volatile (
> > > -               "1:     lr.w    %0, %2          \n"
> > > -               "       mv      %1, %0          \n"
> > > -               "       addw    %0, %0, %3      \n"
> > > -               "       sc.w    %0, %0, %2      \n"
> > > -               "       bnez    %0, 1b          \n"
> > > -               : "=&r" (tmp), "=&r" (lockval), "+A" (lock->lock)
> > > +               "   amoadd.w    %0, %2, %1      \n"
> > > +               : "=&r" (lockval), "+A" (lock->lock)
> > >                 : "r" (1 << TICKET_NEXT)
> > >                 : "memory");
> > >
> > > Thx for the tip :)
> >
> > I was going to comment in LKML after we had it working here but it's
> > good that you already noticed it.
> >
> > I did not think much about spin_trylock() using atomic instructions
> > but it will be nice if you can attempt that as well in your Linux patches.
> All riscv amo instructions will write to the target address unconditionally and
> there is no cas instruction in riscv A-extension.
> So "lr/sc" is the only way for riscv to implement trylock.

spin_trylock() is not so common in Linux so I guess using LR/SC just
for spin_trylock() should be fine. For other arch_spin_xyz() functions,
we should prefer AMO instructions as much as possible.

> 
> look the codes below:
> static inline int arch_spin_trylock(arch_spinlock_t *lock) {
>         u32 tmp, contended, res;
> 
>         do {
>                 asm volatile (
>                 "       lr.w    %0, %3          \n"
>                 __ASM_SRLIW    "%1, %0, %5      \n"
>                 __ASM_SLLIW    "%2, %0, %5      \n"
>                 "       or      %1, %2, %1      \n"
>                 "       li      %2, 0           \n"
>                 "       sub     %1, %1, %0      \n"
>                 "       bnez    %1, 1f          \n"
>                 "       addw    %0, %0, %4      \n"
>                 "       sc.w    %2, %0, %3      \n"
>                 "1:                             \n"
>                 : "=&r" (tmp), "=&r" (contended), "=&r" (res),
>                   "+A" (lock->lock)
>                 : "r" (1 << TICKET_NEXT), "I" (TICKET_NEXT)
>                 : "memory");
>         } while (res);
> 
>         if (!contended)
>                 __atomic_acquire_fence();
> 
>         return !contended;
> }
> 
> When lock is up, trylock'll fail and the instructions progress is:
>                 "       lr.w    %0, %3          \n"
>                 "       srliw    "%1, %0, %5      \n"
>                 "       slliw    "%2, %0, %5      \n"
>                 "       or      %1, %2, %1      \n"
>                 "       li      %2, 0           \n"
>                 "       sub     %1, %1, %0      \n"
>                 "       bnez    %1, 1f          \n"
>                 "1:"
>                 return !contented;
> 
> 1 lr.w + 5 ALUs + branch = 7 instructions The most inconvenient about RISC-V
> is that there is no rotate shift instruction, I must use more instructions and
> registers to implement compare operation for the owner:next-ticket.

You will not require shift instruction if "next_ticket" and "owner" are not
packed into one word.

Regards,
Anup

> 
> >
> > >
> > > > 3) We don't need to pack next_ticket and owner into one word
> because
> > > >      these can be separate "unsigned long" in spinlock_t. I believe that
> > > >      this packing only helps in implementing spin_trylock(), Right ??
> > > >
> > > > Regards,
> > > > Anup
> > > >
> > > > >
> > > > > Signed-off-by: Christoph Muellner <cmuellner@linux.com>
> > > > > ---
> > > > >  include/sbi/riscv_locks.h | 33 ++++++++++++-------
> > > > >  lib/sbi/riscv_locks.c     | 67 +++++++++++++++++++++++++++++------
> ----
> > > > >  2 files changed, 72 insertions(+), 28 deletions(-)
> > > > >
> > > > > diff --git a/include/sbi/riscv_locks.h
> > > > > b/include/sbi/riscv_locks.h index
> > > > > faa9676..59e6af9 100644
> > > > > --- a/include/sbi/riscv_locks.h
> > > > > +++ b/include/sbi/riscv_locks.h
> > > > > @@ -2,26 +2,37 @@
> > > > >   * SPDX-License-Identifier: BSD-2-Clause
> > > > >   *
> > > > >   * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> > > > > - *
> > > > > - * Authors:
> > > > > - *   Anup Patel <anup.patel@wdc.com>
> > > > > + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
> > > > >   */
> > > > >
> > > > >  #ifndef __RISCV_LOCKS_H__
> > > > >  #define __RISCV_LOCKS_H__
> > > > >
> > > > > +#include <sbi/sbi_types.h>
> > > > > +
> > > > > +#define TICKET_SHIFT 16
> > > > > +
> > > > >  typedef struct {
> > > > > -     volatile long lock;
> > > > > -} spinlock_t;
> > > > > +#ifdef __BIG_ENDIAN
> > > > > +       u16 next;
> > > > > +       u16 owner;
> > > > > +#else
> > > > > +       u16 owner;
> > > > > +       u16 next;
> > > > > +#endif
> > > > > +} __aligned(4) spinlock_t;
> > > > > +
> > > > > +#define __SPIN_LOCK_UNLOCKED \
> > > > > +     (spinlock_t) { 0, 0 }
> > > > >
> > > > > -#define __RISCV_SPIN_UNLOCKED 0
> > > > > +#define SPIN_LOCK_INIT(x)    \
> > > > > +     x = __SPIN_LOCK_UNLOCKED
> > > > >
> > > > > -#define SPIN_LOCK_INIT(x) (x).lock = __RISCV_SPIN_UNLOCKED
> > > > > +#define SPIN_LOCK_INITIALIZER        \
> > > > > +     __SPIN_LOCK_UNLOCKED
> > > > >
> > > > > -#define SPIN_LOCK_INITIALIZER                  \
> > > > > -     {                                      \
> > > > > -             .lock = __RISCV_SPIN_UNLOCKED, \
> > > > > -     }
> > > > > +#define DEFINE_SPIN_LOCK(x)  \
> > > > > +     spinlock_t SPIN_LOCK_INIT(x)
> > > > >
> > > > >  int spin_lock_check(spinlock_t *lock);
> > > > >
> > > > > diff --git a/lib/sbi/riscv_locks.c b/lib/sbi/riscv_locks.c index
> > > > > 4d1d9c0..7d86742
> > > > > 100644
> > > > > --- a/lib/sbi/riscv_locks.c
> > > > > +++ b/lib/sbi/riscv_locks.c
> > > > > @@ -2,44 +2,77 @@
> > > > >   * SPDX-License-Identifier: BSD-2-Clause
> > > > >   *
> > > > >   * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> > > > > - *
> > > > > - * Authors:
> > > > > - *   Anup Patel <anup.patel@wdc.com>
> > > > > + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
> > > > >   */
> > > > >
> > > > >  #include <sbi/riscv_barrier.h>
> > > > >  #include <sbi/riscv_locks.h>
> > > > >
> > > > > -int spin_lock_check(spinlock_t *lock)
> > > > > +static inline int spin_lock_unlocked(spinlock_t lock)
> > > > >  {
> > > > > -     return (lock->lock == __RISCV_SPIN_UNLOCKED) ? 0 : 1;
> > > > > +        return lock.owner == lock.next; }
> > > > > +
> > > > > +bool spin_lock_check(spinlock_t *lock) {
> > > > > +     RISCV_FENCE(r, rw);
> > > > > +     return !spin_lock_unlocked(*lock);
> > > > >  }
> > > > >
> > > > >  int spin_trylock(spinlock_t *lock)  {
> > > > > -     int tmp = 1, busy;
> > > > > +     u32 inc = 1u << TICKET_SHIFT;
> > > > > +     u32 mask = 0xffffu << TICKET_SHIFT;
> > > > > +     u32 l0, tmp1, tmp2;
> > > > >
> > > > >       __asm__ __volatile__(
> > > > > -             "       amoswap.w %0, %2, %1\n"
> > > > > RISCV_ACQUIRE_BARRIER
> > > > > -             : "=r"(busy), "+A"(lock->lock)
> > > > > -             : "r"(tmp)
> > > > > +             /* Get the current lock counters. */
> > > > > +             "1:     lr.w.aq %0, %3\n"
> > > > > +             "       slliw   %2, %0, %6\n"
> > > > > +             "       and     %1, %0, %5\n"
> > > > > +             /* Is the lock free right now? */
> > > > > +             "       bne     %1, %2, 2f\n"
> > > > > +             "       addw    %0, %0, %4\n"
> > > > > +             /* Acquire the lock. */
> > > > > +             "       sc.w.rl %0, %0, %3\n"
> > > > > +             "       bnez    %0, 1b\n"
> > > > > +             "2:"
> > > > > +             : "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> > > > > +             : "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
> > > > >               : "memory");
> > > > >
> > > > > -     return !busy;
> > > > > +     return !l0;
> > > > >  }
> > > > >
> > > > >  void spin_lock(spinlock_t *lock)  {
> > > > > -     while (1) {
> > > > > -             if (spin_lock_check(lock))
> > > > > -                     continue;
> > > > > +     u32 inc = 1u << TICKET_SHIFT;
> > > > > +     u32 mask = 0xffffu << TICKET_SHIFT;
> > > > > +     u32 l0, tmp1, tmp2;
> > > > >
> > > > > -             if (spin_trylock(lock))
> > > > > -                     break;
> > > > > -     }
> > > > > +     __asm__ __volatile__(
> > > > > +             /* Atomically increment the next ticket. */
> > > > > +             "0:     lr.w.aq %0, %3\n"
> > > > > +             "       addw    %0, %0, %4\n"
> > > > > +             "       sc.w.rl %0, %0, %3\n"
> > > > > +             "       bnez    %0, 0b\n"
> > > > > +
> > > > > +             /* Did we get the lock? */
> > > > > +             "1:     slliw   %2, %0, %6\n"
> > > > > +             "       and     %1, %0, %5\n"
> > > > > +             "       beq     %1, %2, 2f\n"
> > > > > +
> > > > > +             /* No, let's spin on the lock. */
> > > > > +             "       lr.w.aq %0, %3\n"
> > > > > +             "       j       1b\n"
> > > > > +             "2:"
> > > > > +             : "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> > > > > +             : "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
> > > > > +             : "memory");
> > > > >  }
> > > > >
> > > > >  void spin_unlock(spinlock_t *lock)  {
> > > > > -     __smp_store_release(&lock->lock, __RISCV_SPIN_UNLOCKED);
> > > > > +     __smp_store_release(&lock->owner, lock->owner + 1);
> > > > >  }
> > > > > +
> > > > > --
> > > > > 2.30.2
> > > >
> > > > --
> > > > opensbi mailing list
> > > > opensbi@lists.infradead.org
> > > > http://lists.infradead.org/mailman/listinfo/opensbi
> > >
> > >
> > >
> > > --
> > > Best Regards
> > >  Guo Ren
> > >
> > > ML: https://lore.kernel.org/linux-csky/
> >
> > Regards,
> > Anup
> 
> 
> 
> --
> Best Regards
>  Guo Ren
> 
> ML: https://lore.kernel.org/linux-csky/
Christoph Müllner April 5, 2021, 2:54 p.m. UTC | #11
Thanks for all the input!

I've just sent out a v2, which got proper testing and is thus reaching
the minium
requirement of being correct on RV32/RV64 with qemu:
http://lists.infradead.org/pipermail/opensbi/2021-April/000783.html

Regarding the trylock() question: I'd keep it, because having that is
a valid expectation
of a programmer.

The idea with AMOADD for spin_lock() is good. I will use that in a v3.

Thanks,
Christoph




On Mon, Apr 5, 2021 at 9:45 AM Anup Patel <Anup.Patel@wdc.com> wrote:
>
>
>
> > -----Original Message-----
> > From: Guo Ren <guoren@kernel.org>
> > Sent: 05 April 2021 12:23
> > To: Anup Patel <Anup.Patel@wdc.com>
> > Cc: Christoph Muellner <cmuellner@linux.com>; opensbi@lists.infradead.org
> > Subject: Re: [PATCH] spinlocks: Replace test-and-set locks by ticket locks
> >
> > On Mon, Apr 5, 2021 at 2:03 PM Anup Patel <Anup.Patel@wdc.com> wrote:
> > >
> > >
> > >
> > > > -----Original Message-----
> > > > From: Guo Ren <guoren@kernel.org>
> > > > Sent: 05 April 2021 11:09
> > > > To: Anup Patel <Anup.Patel@wdc.com>
> > > > Cc: Christoph Muellner <cmuellner@linux.com>;
> > > > opensbi@lists.infradead.org
> > > > Subject: Re: [PATCH] spinlocks: Replace test-and-set locks by ticket
> > > > locks
> > > >
> > > > On Sun, Apr 4, 2021 at 2:05 PM Anup Patel <Anup.Patel@wdc.com>
> > wrote:
> > > > >
> > > > >
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Christoph Muellner <cmuellner@linux.com>
> > > > > > Sent: 03 April 2021 21:53
> > > > > > To: opensbi@lists.infradead.org; Anup Patel <Anup.Patel@wdc.com>
> > > > > > Cc: Christoph Muellner <cmuellner@linux.com>
> > > > > > Subject: [PATCH] spinlocks: Replace test-and-set locks by ticket
> > > > > > locks
> > > > > >
> > > > > > Test-and-set locks don't provide fairness and are
> > > > > > non-deterministic
> > > > (w.r.t.
> > > > > > the prediction of the future holder of a lock).
> > > > > > This can be a performance problem in case of lock contention.
> > > > > >
> > > > > > Let's change the implementation to use ticket locks, which
> > > > > > guarantee a fair locking order (FIFO ordering).
> > > > > >
> > > > > > Ticket locks have two counters 'owner' and 'next'.
> > > > > > The counter 'owner' holds the ticket number of the current lock
> > owner.
> > > > > > The counter 'next' holds the latest free ticket number.
> > > > > >
> > > > > > The lock is free if both counters are equal. To acquire the lock, the
> > 'next'
> > > > > > counter is atomically incremented to obtain a new ticket.
> > > > > > The counter 'owner' is then polled until it becomes equal to the
> > > > > > new
> > > > ticket.
> > > > > > To release a lock, one atomically increments the 'owner'
> > > > > > counter.
> > > > > >
> > > > > > The implementation uses a 32-bit wide struct, which consists of
> > > > > > two 16-bit counters (owner and next). This is inspired by
> > > > > > similar spinlock implementations on other architectures.
> > > > > >
> > > > > > Note, that RISC-V lacks sub-word atomics, therefore we need to
> > > > > > circumvent this limitation with additional instructions.
> > > > > > This implementation aims to reduce the instructions between the
> > > > > > LR/SC pairs to minimize the chances of failing SCs. The cost of
> > > > > > this approach is, that we spill more registers than necessary.
> > > > >
> > > > > The spin_trylock() can be tricky to implement with ticket approach.
> > > > > Since the
> > > > > spin_trylock() is currently not used anywhere in OpenSBI, I suggest:
> > > > >
> > > > > 1) We can drop the spin_trylock() API as separate patch
> > > > > 2) Implement classic ticket spinlock approach as described in Wikipedia
> > > > >     using AMOADD instructions and barriers.
> > > > Do you prefer AMOADD? I think it's a better idea (csky only has
> > > > lr.w/sc.w, so it's my habit :P), I will update the Linux ticket-lock
> > > > patch:
> > > > https://lore.kernel.org/linux-riscv/1617201040-83905-4-git-send-emai
> > > > l-
> > > > guoren@kernel.org/T/#u
> > >
> > > It's generally easier to think in-terms of LR/SC on any architecture
> > > but the AMO instructions always scale better in-terms of HW
> > implementation.
> > >
> > > >
> > > > with:
> > > >
> > > > diff --git a/arch/riscv/include/asm/spinlock.h
> > > > b/arch/riscv/include/asm/spinlock.h
> > > > index 90b7eaa950cf..435286ad342b 100644
> > > > --- a/arch/riscv/include/asm/spinlock.h
> > > > +++ b/arch/riscv/include/asm/spinlock.h
> > > > @@ -22,15 +22,10 @@
> > > >  static inline void arch_spin_lock(arch_spinlock_t *lock)  {
> > > >         arch_spinlock_t lockval;
> > > > -       u32 tmp;
> > > >
> > > >         asm volatile (
> > > > -               "1:     lr.w    %0, %2          \n"
> > > > -               "       mv      %1, %0          \n"
> > > > -               "       addw    %0, %0, %3      \n"
> > > > -               "       sc.w    %0, %0, %2      \n"
> > > > -               "       bnez    %0, 1b          \n"
> > > > -               : "=&r" (tmp), "=&r" (lockval), "+A" (lock->lock)
> > > > +               "   amoadd.w    %0, %2, %1      \n"
> > > > +               : "=&r" (lockval), "+A" (lock->lock)
> > > >                 : "r" (1 << TICKET_NEXT)
> > > >                 : "memory");
> > > >
> > > > Thx for the tip :)
> > >
> > > I was going to comment in LKML after we had it working here but it's
> > > good that you already noticed it.
> > >
> > > I did not think much about spin_trylock() using atomic instructions
> > > but it will be nice if you can attempt that as well in your Linux patches.
> > All riscv amo instructions will write to the target address unconditionally and
> > there is no cas instruction in riscv A-extension.
> > So "lr/sc" is the only way for riscv to implement trylock.
>
> spin_trylock() is not so common in Linux so I guess using LR/SC just
> for spin_trylock() should be fine. For other arch_spin_xyz() functions,
> we should prefer AMO instructions as much as possible.
>
> >
> > look the codes below:
> > static inline int arch_spin_trylock(arch_spinlock_t *lock) {
> >         u32 tmp, contended, res;
> >
> >         do {
> >                 asm volatile (
> >                 "       lr.w    %0, %3          \n"
> >                 __ASM_SRLIW    "%1, %0, %5      \n"
> >                 __ASM_SLLIW    "%2, %0, %5      \n"
> >                 "       or      %1, %2, %1      \n"
> >                 "       li      %2, 0           \n"
> >                 "       sub     %1, %1, %0      \n"
> >                 "       bnez    %1, 1f          \n"
> >                 "       addw    %0, %0, %4      \n"
> >                 "       sc.w    %2, %0, %3      \n"
> >                 "1:                             \n"
> >                 : "=&r" (tmp), "=&r" (contended), "=&r" (res),
> >                   "+A" (lock->lock)
> >                 : "r" (1 << TICKET_NEXT), "I" (TICKET_NEXT)
> >                 : "memory");
> >         } while (res);
> >
> >         if (!contended)
> >                 __atomic_acquire_fence();
> >
> >         return !contended;
> > }
> >
> > When lock is up, trylock'll fail and the instructions progress is:
> >                 "       lr.w    %0, %3          \n"
> >                 "       srliw    "%1, %0, %5      \n"
> >                 "       slliw    "%2, %0, %5      \n"
> >                 "       or      %1, %2, %1      \n"
> >                 "       li      %2, 0           \n"
> >                 "       sub     %1, %1, %0      \n"
> >                 "       bnez    %1, 1f          \n"
> >                 "1:"
> >                 return !contented;
> >
> > 1 lr.w + 5 ALUs + branch = 7 instructions The most inconvenient about RISC-V
> > is that there is no rotate shift instruction, I must use more instructions and
> > registers to implement compare operation for the owner:next-ticket.
>
> You will not require shift instruction if "next_ticket" and "owner" are not
> packed into one word.
>
> Regards,
> Anup
>
> >
> > >
> > > >
> > > > > 3) We don't need to pack next_ticket and owner into one word
> > because
> > > > >      these can be separate "unsigned long" in spinlock_t. I believe that
> > > > >      this packing only helps in implementing spin_trylock(), Right ??
> > > > >
> > > > > Regards,
> > > > > Anup
> > > > >
> > > > > >
> > > > > > Signed-off-by: Christoph Muellner <cmuellner@linux.com>
> > > > > > ---
> > > > > >  include/sbi/riscv_locks.h | 33 ++++++++++++-------
> > > > > >  lib/sbi/riscv_locks.c     | 67 +++++++++++++++++++++++++++++------
> > ----
> > > > > >  2 files changed, 72 insertions(+), 28 deletions(-)
> > > > > >
> > > > > > diff --git a/include/sbi/riscv_locks.h
> > > > > > b/include/sbi/riscv_locks.h index
> > > > > > faa9676..59e6af9 100644
> > > > > > --- a/include/sbi/riscv_locks.h
> > > > > > +++ b/include/sbi/riscv_locks.h
> > > > > > @@ -2,26 +2,37 @@
> > > > > >   * SPDX-License-Identifier: BSD-2-Clause
> > > > > >   *
> > > > > >   * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> > > > > > - *
> > > > > > - * Authors:
> > > > > > - *   Anup Patel <anup.patel@wdc.com>
> > > > > > + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
> > > > > >   */
> > > > > >
> > > > > >  #ifndef __RISCV_LOCKS_H__
> > > > > >  #define __RISCV_LOCKS_H__
> > > > > >
> > > > > > +#include <sbi/sbi_types.h>
> > > > > > +
> > > > > > +#define TICKET_SHIFT 16
> > > > > > +
> > > > > >  typedef struct {
> > > > > > -     volatile long lock;
> > > > > > -} spinlock_t;
> > > > > > +#ifdef __BIG_ENDIAN
> > > > > > +       u16 next;
> > > > > > +       u16 owner;
> > > > > > +#else
> > > > > > +       u16 owner;
> > > > > > +       u16 next;
> > > > > > +#endif
> > > > > > +} __aligned(4) spinlock_t;
> > > > > > +
> > > > > > +#define __SPIN_LOCK_UNLOCKED \
> > > > > > +     (spinlock_t) { 0, 0 }
> > > > > >
> > > > > > -#define __RISCV_SPIN_UNLOCKED 0
> > > > > > +#define SPIN_LOCK_INIT(x)    \
> > > > > > +     x = __SPIN_LOCK_UNLOCKED
> > > > > >
> > > > > > -#define SPIN_LOCK_INIT(x) (x).lock = __RISCV_SPIN_UNLOCKED
> > > > > > +#define SPIN_LOCK_INITIALIZER        \
> > > > > > +     __SPIN_LOCK_UNLOCKED
> > > > > >
> > > > > > -#define SPIN_LOCK_INITIALIZER                  \
> > > > > > -     {                                      \
> > > > > > -             .lock = __RISCV_SPIN_UNLOCKED, \
> > > > > > -     }
> > > > > > +#define DEFINE_SPIN_LOCK(x)  \
> > > > > > +     spinlock_t SPIN_LOCK_INIT(x)
> > > > > >
> > > > > >  int spin_lock_check(spinlock_t *lock);
> > > > > >
> > > > > > diff --git a/lib/sbi/riscv_locks.c b/lib/sbi/riscv_locks.c index
> > > > > > 4d1d9c0..7d86742
> > > > > > 100644
> > > > > > --- a/lib/sbi/riscv_locks.c
> > > > > > +++ b/lib/sbi/riscv_locks.c
> > > > > > @@ -2,44 +2,77 @@
> > > > > >   * SPDX-License-Identifier: BSD-2-Clause
> > > > > >   *
> > > > > >   * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> > > > > > - *
> > > > > > - * Authors:
> > > > > > - *   Anup Patel <anup.patel@wdc.com>
> > > > > > + * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
> > > > > >   */
> > > > > >
> > > > > >  #include <sbi/riscv_barrier.h>
> > > > > >  #include <sbi/riscv_locks.h>
> > > > > >
> > > > > > -int spin_lock_check(spinlock_t *lock)
> > > > > > +static inline int spin_lock_unlocked(spinlock_t lock)
> > > > > >  {
> > > > > > -     return (lock->lock == __RISCV_SPIN_UNLOCKED) ? 0 : 1;
> > > > > > +        return lock.owner == lock.next; }
> > > > > > +
> > > > > > +bool spin_lock_check(spinlock_t *lock) {
> > > > > > +     RISCV_FENCE(r, rw);
> > > > > > +     return !spin_lock_unlocked(*lock);
> > > > > >  }
> > > > > >
> > > > > >  int spin_trylock(spinlock_t *lock)  {
> > > > > > -     int tmp = 1, busy;
> > > > > > +     u32 inc = 1u << TICKET_SHIFT;
> > > > > > +     u32 mask = 0xffffu << TICKET_SHIFT;
> > > > > > +     u32 l0, tmp1, tmp2;
> > > > > >
> > > > > >       __asm__ __volatile__(
> > > > > > -             "       amoswap.w %0, %2, %1\n"
> > > > > > RISCV_ACQUIRE_BARRIER
> > > > > > -             : "=r"(busy), "+A"(lock->lock)
> > > > > > -             : "r"(tmp)
> > > > > > +             /* Get the current lock counters. */
> > > > > > +             "1:     lr.w.aq %0, %3\n"
> > > > > > +             "       slliw   %2, %0, %6\n"
> > > > > > +             "       and     %1, %0, %5\n"
> > > > > > +             /* Is the lock free right now? */
> > > > > > +             "       bne     %1, %2, 2f\n"
> > > > > > +             "       addw    %0, %0, %4\n"
> > > > > > +             /* Acquire the lock. */
> > > > > > +             "       sc.w.rl %0, %0, %3\n"
> > > > > > +             "       bnez    %0, 1b\n"
> > > > > > +             "2:"
> > > > > > +             : "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> > > > > > +             : "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
> > > > > >               : "memory");
> > > > > >
> > > > > > -     return !busy;
> > > > > > +     return !l0;
> > > > > >  }
> > > > > >
> > > > > >  void spin_lock(spinlock_t *lock)  {
> > > > > > -     while (1) {
> > > > > > -             if (spin_lock_check(lock))
> > > > > > -                     continue;
> > > > > > +     u32 inc = 1u << TICKET_SHIFT;
> > > > > > +     u32 mask = 0xffffu << TICKET_SHIFT;
> > > > > > +     u32 l0, tmp1, tmp2;
> > > > > >
> > > > > > -             if (spin_trylock(lock))
> > > > > > -                     break;
> > > > > > -     }
> > > > > > +     __asm__ __volatile__(
> > > > > > +             /* Atomically increment the next ticket. */
> > > > > > +             "0:     lr.w.aq %0, %3\n"
> > > > > > +             "       addw    %0, %0, %4\n"
> > > > > > +             "       sc.w.rl %0, %0, %3\n"
> > > > > > +             "       bnez    %0, 0b\n"
> > > > > > +
> > > > > > +             /* Did we get the lock? */
> > > > > > +             "1:     slliw   %2, %0, %6\n"
> > > > > > +             "       and     %1, %0, %5\n"
> > > > > > +             "       beq     %1, %2, 2f\n"
> > > > > > +
> > > > > > +             /* No, let's spin on the lock. */
> > > > > > +             "       lr.w.aq %0, %3\n"
> > > > > > +             "       j       1b\n"
> > > > > > +             "2:"
> > > > > > +             : "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
> > > > > > +             : "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
> > > > > > +             : "memory");
> > > > > >  }
> > > > > >
> > > > > >  void spin_unlock(spinlock_t *lock)  {
> > > > > > -     __smp_store_release(&lock->lock, __RISCV_SPIN_UNLOCKED);
> > > > > > +     __smp_store_release(&lock->owner, lock->owner + 1);
> > > > > >  }
> > > > > > +
> > > > > > --
> > > > > > 2.30.2
> > > > >
> > > > > --
> > > > > opensbi mailing list
> > > > > opensbi@lists.infradead.org
> > > > > http://lists.infradead.org/mailman/listinfo/opensbi
> > > >
> > > >
> > > >
> > > > --
> > > > Best Regards
> > > >  Guo Ren
> > > >
> > > > ML: https://lore.kernel.org/linux-csky/
> > >
> > > Regards,
> > > Anup
> >
> >
> >
> > --
> > Best Regards
> >  Guo Ren
> >
> > ML: https://lore.kernel.org/linux-csky/
diff mbox series

Patch

diff --git a/include/sbi/riscv_locks.h b/include/sbi/riscv_locks.h
index faa9676..59e6af9 100644
--- a/include/sbi/riscv_locks.h
+++ b/include/sbi/riscv_locks.h
@@ -2,26 +2,37 @@ 
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2019 Western Digital Corporation or its affiliates.
- *
- * Authors:
- *   Anup Patel <anup.patel@wdc.com>
+ * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
  */
 
 #ifndef __RISCV_LOCKS_H__
 #define __RISCV_LOCKS_H__
 
+#include <sbi/sbi_types.h>
+
+#define TICKET_SHIFT	16
+
 typedef struct {
-	volatile long lock;
-} spinlock_t;
+#ifdef __BIG_ENDIAN
+       u16 next;
+       u16 owner;
+#else
+       u16 owner;
+       u16 next;
+#endif
+} __aligned(4) spinlock_t;
+
+#define __SPIN_LOCK_UNLOCKED	\
+	(spinlock_t) { 0, 0 }
 
-#define __RISCV_SPIN_UNLOCKED 0
+#define SPIN_LOCK_INIT(x)	\
+	x = __SPIN_LOCK_UNLOCKED
 
-#define SPIN_LOCK_INIT(x) (x).lock = __RISCV_SPIN_UNLOCKED
+#define SPIN_LOCK_INITIALIZER	\
+	__SPIN_LOCK_UNLOCKED
 
-#define SPIN_LOCK_INITIALIZER                  \
-	{                                      \
-		.lock = __RISCV_SPIN_UNLOCKED, \
-	}
+#define DEFINE_SPIN_LOCK(x)	\
+	spinlock_t SPIN_LOCK_INIT(x)
 
 int spin_lock_check(spinlock_t *lock);
 
diff --git a/lib/sbi/riscv_locks.c b/lib/sbi/riscv_locks.c
index 4d1d9c0..7d86742 100644
--- a/lib/sbi/riscv_locks.c
+++ b/lib/sbi/riscv_locks.c
@@ -2,44 +2,77 @@ 
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2019 Western Digital Corporation or its affiliates.
- *
- * Authors:
- *   Anup Patel <anup.patel@wdc.com>
+ * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
  */
 
 #include <sbi/riscv_barrier.h>
 #include <sbi/riscv_locks.h>
 
-int spin_lock_check(spinlock_t *lock)
+static inline int spin_lock_unlocked(spinlock_t lock)
 {
-	return (lock->lock == __RISCV_SPIN_UNLOCKED) ? 0 : 1;
+        return lock.owner == lock.next;
+}
+
+bool spin_lock_check(spinlock_t *lock)
+{
+	RISCV_FENCE(r, rw);
+	return !spin_lock_unlocked(*lock);
 }
 
 int spin_trylock(spinlock_t *lock)
 {
-	int tmp = 1, busy;
+	u32 inc = 1u << TICKET_SHIFT;
+	u32 mask = 0xffffu << TICKET_SHIFT;
+	u32 l0, tmp1, tmp2;
 
 	__asm__ __volatile__(
-		"	amoswap.w %0, %2, %1\n" RISCV_ACQUIRE_BARRIER
-		: "=r"(busy), "+A"(lock->lock)
-		: "r"(tmp)
+		/* Get the current lock counters. */
+		"1:	lr.w.aq	%0, %3\n"
+		"	slliw	%2, %0, %6\n"
+		"	and	%1, %0, %5\n"
+		/* Is the lock free right now? */
+		"	bne	%1, %2, 2f\n"
+		"	addw	%0, %0, %4\n"
+		/* Acquire the lock. */
+		"	sc.w.rl	%0, %0, %3\n"
+		"	bnez	%0, 1b\n"
+		"2:"
+		: "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
+		: "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
 		: "memory");
 
-	return !busy;
+	return !l0;
 }
 
 void spin_lock(spinlock_t *lock)
 {
-	while (1) {
-		if (spin_lock_check(lock))
-			continue;
+	u32 inc = 1u << TICKET_SHIFT;
+	u32 mask = 0xffffu << TICKET_SHIFT;
+	u32 l0, tmp1, tmp2;
 
-		if (spin_trylock(lock))
-			break;
-	}
+	__asm__ __volatile__(
+		/* Atomically increment the next ticket. */
+		"0:	lr.w.aq	%0, %3\n"
+		"	addw	%0, %0, %4\n"
+		"	sc.w.rl	%0, %0, %3\n"
+		"	bnez	%0, 0b\n"
+
+		/* Did we get the lock? */
+		"1:	slliw	%2, %0, %6\n"
+		"	and	%1, %0, %5\n"
+		"	beq	%1, %2, 2f\n"
+
+		/* No, let's spin on the lock. */
+		"	lr.w.aq	%0, %3\n"
+		"	j	1b\n"
+		"2:"
+		: "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
+		: "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
+		: "memory");
 }
 
 void spin_unlock(spinlock_t *lock)
 {
-	__smp_store_release(&lock->lock, __RISCV_SPIN_UNLOCKED);
+	__smp_store_release(&lock->owner, lock->owner + 1);
 }
+