diff mbox series

[07/17] powerpc/qspinlock: store owner CPU in lock word

Message ID 20220728063120.2867508-9-npiggin@gmail.com (mailing list archive)
State Changes Requested
Headers show
Series powerpc: alternate queued spinlock implementation | expand

Commit Message

Nicholas Piggin July 28, 2022, 6:31 a.m. UTC
Store the owner CPU number in the lock word so it may be yielded to,
as powerpc's paravirtualised simple spinlocks do.
---
 arch/powerpc/include/asm/qspinlock.h       |  8 +++++++-
 arch/powerpc/include/asm/qspinlock_types.h | 10 ++++++++++
 arch/powerpc/lib/qspinlock.c               |  6 +++---
 3 files changed, 20 insertions(+), 4 deletions(-)

Comments

Jordan Niethe Aug. 12, 2022, 12:50 a.m. UTC | #1
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
> Store the owner CPU number in the lock word so it may be yielded to,
> as powerpc's paravirtualised simple spinlocks do.
> ---
>  arch/powerpc/include/asm/qspinlock.h       |  8 +++++++-
>  arch/powerpc/include/asm/qspinlock_types.h | 10 ++++++++++
>  arch/powerpc/lib/qspinlock.c               |  6 +++---
>  3 files changed, 20 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
> index 3ab354159e5e..44601b261e08 100644
> --- a/arch/powerpc/include/asm/qspinlock.h
> +++ b/arch/powerpc/include/asm/qspinlock.h
> @@ -20,9 +20,15 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
>  	return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK);
>  }
>  
> +static __always_inline u32 queued_spin_get_locked_val(void)

Maybe this function should have "encode" in the name to match with
encode_tail_cpu().


> +{
> +	/* XXX: make this use lock value in paca like simple spinlocks? */

Is that the paca's lock_token which is 0x8000?


> +	return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET);
> +}
> +
>  static __always_inline int queued_spin_trylock(struct qspinlock *lock)
>  {
> -	u32 new = _Q_LOCKED_VAL;
> +	u32 new = queued_spin_get_locked_val();
>  	u32 prev;
>  
>  	asm volatile(
> diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h
> index 8b20f5e22bba..35f9525381e6 100644
> --- a/arch/powerpc/include/asm/qspinlock_types.h
> +++ b/arch/powerpc/include/asm/qspinlock_types.h
> @@ -29,6 +29,8 @@ typedef struct qspinlock {
>   * Bitfields in the lock word:
>   *
>   *     0: locked bit
> + *  1-14: lock holder cpu
> + *    15: unused bit
>   *    16: must queue bit
>   * 17-31: tail cpu (+1)

So there is one more bit to store the tail cpu vs the lock holder cpu?

>   */
> @@ -39,6 +41,14 @@ typedef struct qspinlock {
>  #define _Q_LOCKED_MASK		_Q_SET_MASK(LOCKED)
>  #define _Q_LOCKED_VAL		(1U << _Q_LOCKED_OFFSET)
>  
> +#define _Q_OWNER_CPU_OFFSET	1
> +#define _Q_OWNER_CPU_BITS	14
> +#define _Q_OWNER_CPU_MASK	_Q_SET_MASK(OWNER_CPU)
> +
> +#if CONFIG_NR_CPUS > (1U << _Q_OWNER_CPU_BITS)
> +#error "qspinlock does not support such large CONFIG_NR_CPUS"
> +#endif
> +
>  #define _Q_MUST_Q_OFFSET	16
>  #define _Q_MUST_Q_BITS		1
>  #define _Q_MUST_Q_MASK		_Q_SET_MASK(MUST_Q)
> diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
> index a906cc8f15fa..aa26cfe21f18 100644
> --- a/arch/powerpc/lib/qspinlock.c
> +++ b/arch/powerpc/lib/qspinlock.c
> @@ -50,7 +50,7 @@ static inline int get_tail_cpu(u32 val)
>  /* Take the lock by setting the lock bit, no other CPUs will touch it. */
>  static __always_inline void lock_set_locked(struct qspinlock *lock)
>  {
> -	u32 new = _Q_LOCKED_VAL;
> +	u32 new = queued_spin_get_locked_val();
>  	u32 prev;
>  
>  	asm volatile(
> @@ -68,7 +68,7 @@ static __always_inline void lock_set_locked(struct qspinlock *lock)
>  /* Take lock, clearing tail, cmpxchg with old (which must not be locked) */
>  static __always_inline int trylock_clear_tail_cpu(struct qspinlock *lock, u32 old)
>  {
> -	u32 new = _Q_LOCKED_VAL;
> +	u32 new = queued_spin_get_locked_val();
>  	u32 prev;
>  
>  	BUG_ON(old & _Q_LOCKED_VAL);
> @@ -116,7 +116,7 @@ static __always_inline u32 __trylock_cmpxchg(struct qspinlock *lock, u32 old, u3
>  /* Take lock, preserving tail, cmpxchg with val (which must not be locked) */
>  static __always_inline int trylock_with_tail_cpu(struct qspinlock *lock, u32 val)
>  {
> -	u32 newval = _Q_LOCKED_VAL | (val & _Q_TAIL_CPU_MASK);
> +	u32 newval = queued_spin_get_locked_val() | (val & _Q_TAIL_CPU_MASK);
>  
>  	if (__trylock_cmpxchg(lock, val, newval) == val)
>  		return 1;
Jordan Niethe Nov. 10, 2022, 12:40 a.m. UTC | #2
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
[resend as utf-8, not utf-7]
> Store the owner CPU number in the lock word so it may be yielded to,
> as powerpc's paravirtualised simple spinlocks do.
> ---
>  arch/powerpc/include/asm/qspinlock.h       |  8 +++++++-
>  arch/powerpc/include/asm/qspinlock_types.h | 10 ++++++++++
>  arch/powerpc/lib/qspinlock.c               |  6 +++---
>  3 files changed, 20 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
> index 3ab354159e5e..44601b261e08 100644
> --- a/arch/powerpc/include/asm/qspinlock.h
> +++ b/arch/powerpc/include/asm/qspinlock.h
> @@ -20,9 +20,15 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
>  	return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK);
>  }
>  
> +static __always_inline u32 queued_spin_get_locked_val(void)

Maybe this function should have "encode" in the name to match with
encode_tail_cpu().


> +{
> +	/* XXX: make this use lock value in paca like simple spinlocks? */

Is that the paca's lock_token which is 0x8000?


> +	return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET);
> +}
> +
>  static __always_inline int queued_spin_trylock(struct qspinlock *lock)
>  {
> -	u32 new = _Q_LOCKED_VAL;
> +	u32 new = queued_spin_get_locked_val();
>  	u32 prev;
>  
>  	asm volatile(
> diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h
> index 8b20f5e22bba..35f9525381e6 100644
> --- a/arch/powerpc/include/asm/qspinlock_types.h
> +++ b/arch/powerpc/include/asm/qspinlock_types.h
> @@ -29,6 +29,8 @@ typedef struct qspinlock {
>   * Bitfields in the lock word:
>   *
>   *     0: locked bit
> + *  1-14: lock holder cpu
> + *    15: unused bit
>   *    16: must queue bit
>   * 17-31: tail cpu (+1)

So there is one more bit to store the tail cpu vs the lock holder cpu?

>   */
> @@ -39,6 +41,14 @@ typedef struct qspinlock {
>  #define _Q_LOCKED_MASK		_Q_SET_MASK(LOCKED)
>  #define _Q_LOCKED_VAL		(1U << _Q_LOCKED_OFFSET)
>  
> +#define _Q_OWNER_CPU_OFFSET	1
> +#define _Q_OWNER_CPU_BITS	14
> +#define _Q_OWNER_CPU_MASK	_Q_SET_MASK(OWNER_CPU)
> +
> +#if CONFIG_NR_CPUS > (1U << _Q_OWNER_CPU_BITS)
> +#error "qspinlock does not support such large CONFIG_NR_CPUS"
> +#endif
> +
>  #define _Q_MUST_Q_OFFSET	16
>  #define _Q_MUST_Q_BITS		1
>  #define _Q_MUST_Q_MASK		_Q_SET_MASK(MUST_Q)
> diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
> index a906cc8f15fa..aa26cfe21f18 100644
> --- a/arch/powerpc/lib/qspinlock.c
> +++ b/arch/powerpc/lib/qspinlock.c
> @@ -50,7 +50,7 @@ static inline int get_tail_cpu(u32 val)
>  /* Take the lock by setting the lock bit, no other CPUs will touch it. */
>  static __always_inline void lock_set_locked(struct qspinlock *lock)
>  {
> -	u32 new = _Q_LOCKED_VAL;
> +	u32 new = queued_spin_get_locked_val();
>  	u32 prev;
>  
>  	asm volatile(
> @@ -68,7 +68,7 @@ static __always_inline void lock_set_locked(struct qspinlock *lock)
>  /* Take lock, clearing tail, cmpxchg with old (which must not be locked) */
>  static __always_inline int trylock_clear_tail_cpu(struct qspinlock *lock, u32 old)
>  {
> -	u32 new = _Q_LOCKED_VAL;
> +	u32 new = queued_spin_get_locked_val();
>  	u32 prev;
>  
>  	BUG_ON(old & _Q_LOCKED_VAL);
> @@ -116,7 +116,7 @@ static __always_inline u32 __trylock_cmpxchg(struct qspinlock *lock, u32 old, u3
>  /* Take lock, preserving tail, cmpxchg with val (which must not be locked) */
>  static __always_inline int trylock_with_tail_cpu(struct qspinlock *lock, u32 val)
>  {
> -	u32 newval = _Q_LOCKED_VAL | (val & _Q_TAIL_CPU_MASK);
> +	u32 newval = queued_spin_get_locked_val() | (val & _Q_TAIL_CPU_MASK);
>  
>  	if (__trylock_cmpxchg(lock, val, newval) == val)
>  		return 1;
Nicholas Piggin Nov. 10, 2022, 10:59 a.m. UTC | #3
On Thu Nov 10, 2022 at 10:40 AM AEST, Jordan Niethe wrote:
> On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
> [resend as utf-8, not utf-7]
> > Store the owner CPU number in the lock word so it may be yielded to,
> > as powerpc's paravirtualised simple spinlocks do.
> > ---
> >  arch/powerpc/include/asm/qspinlock.h       |  8 +++++++-
> >  arch/powerpc/include/asm/qspinlock_types.h | 10 ++++++++++
> >  arch/powerpc/lib/qspinlock.c               |  6 +++---
> >  3 files changed, 20 insertions(+), 4 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
> > index 3ab354159e5e..44601b261e08 100644
> > --- a/arch/powerpc/include/asm/qspinlock.h
> > +++ b/arch/powerpc/include/asm/qspinlock.h
> > @@ -20,9 +20,15 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
> >  	return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK);
> >  }
> >  
> > +static __always_inline u32 queued_spin_get_locked_val(void)
>
> Maybe this function should have "encode" in the name to match with
> encode_tail_cpu().

Yep.

> > +{
> > +	/* XXX: make this use lock value in paca like simple spinlocks? */
>
> Is that the paca's lock_token which is 0x8000?

Yes, which AFAIKS is actually unused now with queued spinlocks.

> > +	return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET);
> > +}
> > +
> >  static __always_inline int queued_spin_trylock(struct qspinlock *lock)
> >  {
> > -	u32 new = _Q_LOCKED_VAL;
> > +	u32 new = queued_spin_get_locked_val();
> >  	u32 prev;
> >  
> >  	asm volatile(
> > diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h
> > index 8b20f5e22bba..35f9525381e6 100644
> > --- a/arch/powerpc/include/asm/qspinlock_types.h
> > +++ b/arch/powerpc/include/asm/qspinlock_types.h
> > @@ -29,6 +29,8 @@ typedef struct qspinlock {
> >   * Bitfields in the lock word:
> >   *
> >   *     0: locked bit
> > + *  1-14: lock holder cpu
> > + *    15: unused bit
> >   *    16: must queue bit
> >   * 17-31: tail cpu (+1)
>
> So there is one more bit to store the tail cpu vs the lock holder cpu?

Yeah but the tail has to encode it as CPU+1.

Thanks,
Nick
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
index 3ab354159e5e..44601b261e08 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -20,9 +20,15 @@  static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
 	return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK);
 }
 
+static __always_inline u32 queued_spin_get_locked_val(void)
+{
+	/* XXX: make this use lock value in paca like simple spinlocks? */
+	return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET);
+}
+
 static __always_inline int queued_spin_trylock(struct qspinlock *lock)
 {
-	u32 new = _Q_LOCKED_VAL;
+	u32 new = queued_spin_get_locked_val();
 	u32 prev;
 
 	asm volatile(
diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h
index 8b20f5e22bba..35f9525381e6 100644
--- a/arch/powerpc/include/asm/qspinlock_types.h
+++ b/arch/powerpc/include/asm/qspinlock_types.h
@@ -29,6 +29,8 @@  typedef struct qspinlock {
  * Bitfields in the lock word:
  *
  *     0: locked bit
+ *  1-14: lock holder cpu
+ *    15: unused bit
  *    16: must queue bit
  * 17-31: tail cpu (+1)
  */
@@ -39,6 +41,14 @@  typedef struct qspinlock {
 #define _Q_LOCKED_MASK		_Q_SET_MASK(LOCKED)
 #define _Q_LOCKED_VAL		(1U << _Q_LOCKED_OFFSET)
 
+#define _Q_OWNER_CPU_OFFSET	1
+#define _Q_OWNER_CPU_BITS	14
+#define _Q_OWNER_CPU_MASK	_Q_SET_MASK(OWNER_CPU)
+
+#if CONFIG_NR_CPUS > (1U << _Q_OWNER_CPU_BITS)
+#error "qspinlock does not support such large CONFIG_NR_CPUS"
+#endif
+
 #define _Q_MUST_Q_OFFSET	16
 #define _Q_MUST_Q_BITS		1
 #define _Q_MUST_Q_MASK		_Q_SET_MASK(MUST_Q)
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index a906cc8f15fa..aa26cfe21f18 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -50,7 +50,7 @@  static inline int get_tail_cpu(u32 val)
 /* Take the lock by setting the lock bit, no other CPUs will touch it. */
 static __always_inline void lock_set_locked(struct qspinlock *lock)
 {
-	u32 new = _Q_LOCKED_VAL;
+	u32 new = queued_spin_get_locked_val();
 	u32 prev;
 
 	asm volatile(
@@ -68,7 +68,7 @@  static __always_inline void lock_set_locked(struct qspinlock *lock)
 /* Take lock, clearing tail, cmpxchg with old (which must not be locked) */
 static __always_inline int trylock_clear_tail_cpu(struct qspinlock *lock, u32 old)
 {
-	u32 new = _Q_LOCKED_VAL;
+	u32 new = queued_spin_get_locked_val();
 	u32 prev;
 
 	BUG_ON(old & _Q_LOCKED_VAL);
@@ -116,7 +116,7 @@  static __always_inline u32 __trylock_cmpxchg(struct qspinlock *lock, u32 old, u3
 /* Take lock, preserving tail, cmpxchg with val (which must not be locked) */
 static __always_inline int trylock_with_tail_cpu(struct qspinlock *lock, u32 val)
 {
-	u32 newval = _Q_LOCKED_VAL | (val & _Q_TAIL_CPU_MASK);
+	u32 newval = queued_spin_get_locked_val() | (val & _Q_TAIL_CPU_MASK);
 
 	if (__trylock_cmpxchg(lock, val, newval) == val)
 		return 1;