diff mbox series

[07/12] powerpc: powernv: Fix KCSAN datarace warnings on idle_state contention

Message ID 20230508020120.218494-8-rmclure@linux.ibm.com (mailing list archive)
State Superseded
Headers show
Series powerpc: KCSAN fix warnings and mark accesses | expand

Commit Message

Rohan McLure May 8, 2023, 2:01 a.m. UTC
The idle_state entry in the PACA on PowerNV features a bit which is
atomically tested and set through ldarx/stdcx. to be used as a spinlock.
This lock then guards access to other bit fields of idle_state. KCSAN
cannot differentiate between any of these bitfield accesses as they all
are implemented by 8-byte store/load instructions, thus cores contending
on the bit-lock appear to data race with modifications to idle_state.

Separate the bit-lock entry from the data guarded by the lock to avoid
the possibility of data races being detected by KCSAN.

Suggested-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Rohan McLure <rmclure@ibm.com>
---
 arch/powerpc/include/asm/paca.h       |  1 +
 arch/powerpc/platforms/powernv/idle.c | 20 +++++++++++---------
 2 files changed, 12 insertions(+), 9 deletions(-)

Comments

Nicholas Piggin May 9, 2023, 2:26 a.m. UTC | #1
On Mon May 8, 2023 at 12:01 PM AEST, Rohan McLure wrote:
> The idle_state entry in the PACA on PowerNV features a bit which is
> atomically tested and set through ldarx/stdcx. to be used as a spinlock.
> This lock then guards access to other bit fields of idle_state. KCSAN
> cannot differentiate between any of these bitfield accesses as they all
> are implemented by 8-byte store/load instructions, thus cores contending
> on the bit-lock appear to data race with modifications to idle_state.
>
> Separate the bit-lock entry from the data guarded by the lock to avoid
> the possibility of data races being detected by KCSAN.
>
> Suggested-by: Nicholas Piggin <npiggin@gmail.com>
> Signed-off-by: Rohan McLure <rmclure@ibm.com>
> ---
>  arch/powerpc/include/asm/paca.h       |  1 +
>  arch/powerpc/platforms/powernv/idle.c | 20 +++++++++++---------
>  2 files changed, 12 insertions(+), 9 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
> index da0377f46597..cb325938766a 100644
> --- a/arch/powerpc/include/asm/paca.h
> +++ b/arch/powerpc/include/asm/paca.h
> @@ -191,6 +191,7 @@ struct paca_struct {
>  #ifdef CONFIG_PPC_POWERNV
>  	/* PowerNV idle fields */
>  	/* PNV_CORE_IDLE_* bits, all siblings work on thread 0 paca */
> +	unsigned long idle_lock; /* A value of 1 means acquired */
>  	unsigned long idle_state;
>  	union {
>  		/* P7/P8 specific fields */
> diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
> index 841cb7f31f4f..97dbb7bc2b00 100644
> --- a/arch/powerpc/platforms/powernv/idle.c
> +++ b/arch/powerpc/platforms/powernv/idle.c
> @@ -246,9 +246,9 @@ static inline void atomic_lock_thread_idle(void)
>  {
>  	int cpu = raw_smp_processor_id();
>  	int first = cpu_first_thread_sibling(cpu);
> -	unsigned long *state = &paca_ptrs[first]->idle_state;
> +	unsigned long *lock = &paca_ptrs[first]->idle_lock;
>  
> -	while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state)))
> +	while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, lock)))
>  		barrier();
>  }
>  
> @@ -258,29 +258,31 @@ static inline void atomic_unlock_and_stop_thread_idle(void)
>  	int first = cpu_first_thread_sibling(cpu);
>  	unsigned long thread = 1UL << cpu_thread_in_core(cpu);
>  	unsigned long *state = &paca_ptrs[first]->idle_state;
> +	unsigned long *lock = &paca_ptrs[first]->idle_lock;
>  	u64 s = READ_ONCE(*state);
>  	u64 new, tmp;
>  
> -	BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT));
> +	BUG_ON(!(READ_ONCE(*lock) & PNV_CORE_IDLE_LOCK_BIT));
>  	BUG_ON(s & thread);
>  
>  again:
> -	new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT;
> +	new = s | thread;
>  	tmp = cmpxchg(state, s, new);
>  	if (unlikely(tmp != s)) {
>  		s = tmp;
>  		goto again;
>  	}
> +	clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock);

Sigh, another atomic. It's in a slow path though so I won't get too
upset. Would be nice to add a comment here and revert it when KCSCAN
can be taught about this pattern though, so we don't lose it.

>  }
>  
>  static inline void atomic_unlock_thread_idle(void)
>  {
>  	int cpu = raw_smp_processor_id();
>  	int first = cpu_first_thread_sibling(cpu);
> -	unsigned long *state = &paca_ptrs[first]->idle_state;
> +	unsigned long *lock = &paca_ptrs[first]->idle_lock;
>  
> -	BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state));
> -	clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state);
> +	BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, lock));
> +	clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock);
>  }
>  
>  /* P7 and P8 */
> @@ -380,9 +382,9 @@ static unsigned long power7_idle_insn(unsigned long type)
>  		sprs.uamor	= mfspr(SPRN_UAMOR);
>  	}
>  
> -	local_paca->thread_idle_state = type;
> +	WRITE_ONCE(local_paca->thread_idle_state, type);
>  	srr1 = isa206_idle_insn_mayloss(type);		/* go idle */
> -	local_paca->thread_idle_state = PNV_THREAD_RUNNING;
> +	WRITE_ONCE(local_paca->thread_idle_state, PNV_THREAD_RUNNING);

Where is the thread_idle_state concurrency coming from?

Thanks,
Nick
Rohan McLure May 10, 2023, 2 a.m. UTC | #2
> On 9 May 2023, at 12:26 pm, Nicholas Piggin <npiggin@gmail.com> wrote:
> 
> On Mon May 8, 2023 at 12:01 PM AEST, Rohan McLure wrote:
>> The idle_state entry in the PACA on PowerNV features a bit which is
>> atomically tested and set through ldarx/stdcx. to be used as a spinlock.
>> This lock then guards access to other bit fields of idle_state. KCSAN
>> cannot differentiate between any of these bitfield accesses as they all
>> are implemented by 8-byte store/load instructions, thus cores contending
>> on the bit-lock appear to data race with modifications to idle_state.
>> 
>> Separate the bit-lock entry from the data guarded by the lock to avoid
>> the possibility of data races being detected by KCSAN.
>> 
>> Suggested-by: Nicholas Piggin <npiggin@gmail.com>
>> Signed-off-by: Rohan McLure <rmclure@ibm.com>
>> ---
>> arch/powerpc/include/asm/paca.h       |  1 +
>> arch/powerpc/platforms/powernv/idle.c | 20 +++++++++++---------
>> 2 files changed, 12 insertions(+), 9 deletions(-)
>> 
>> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
>> index da0377f46597..cb325938766a 100644
>> --- a/arch/powerpc/include/asm/paca.h
>> +++ b/arch/powerpc/include/asm/paca.h
>> @@ -191,6 +191,7 @@ struct paca_struct {
>> #ifdef CONFIG_PPC_POWERNV
>> /* PowerNV idle fields */
>> /* PNV_CORE_IDLE_* bits, all siblings work on thread 0 paca */
>> + unsigned long idle_lock; /* A value of 1 means acquired */
>> unsigned long idle_state;
>> union {
>> /* P7/P8 specific fields */
>> diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
>> index 841cb7f31f4f..97dbb7bc2b00 100644
>> --- a/arch/powerpc/platforms/powernv/idle.c
>> +++ b/arch/powerpc/platforms/powernv/idle.c
>> @@ -246,9 +246,9 @@ static inline void atomic_lock_thread_idle(void)
>> {
>> int cpu = raw_smp_processor_id();
>> int first = cpu_first_thread_sibling(cpu);
>> - unsigned long *state = &paca_ptrs[first]->idle_state;
>> + unsigned long *lock = &paca_ptrs[first]->idle_lock;
>> 
>> - while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state)))
>> + while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, lock)))
>> barrier();
>> }
>> 
>> @@ -258,29 +258,31 @@ static inline void atomic_unlock_and_stop_thread_idle(void)
>> int first = cpu_first_thread_sibling(cpu);
>> unsigned long thread = 1UL << cpu_thread_in_core(cpu);
>> unsigned long *state = &paca_ptrs[first]->idle_state;
>> + unsigned long *lock = &paca_ptrs[first]->idle_lock;
>> u64 s = READ_ONCE(*state);
>> u64 new, tmp;
>> 
>> - BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT));
>> + BUG_ON(!(READ_ONCE(*lock) & PNV_CORE_IDLE_LOCK_BIT));
>> BUG_ON(s & thread);
>> 
>> again:
>> - new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT;
>> + new = s | thread;
>> tmp = cmpxchg(state, s, new);
>> if (unlikely(tmp != s)) {
>> s = tmp;
>> goto again;
>> }
>> + clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock);
> 
> Sigh, another atomic. It's in a slow path though so I won't get too
> upset. Would be nice to add a comment here and revert it when KCSCAN
> can be taught about this pattern though, so we don't lose it.
> 
>> }
>> 
>> static inline void atomic_unlock_thread_idle(void)
>> {
>> int cpu = raw_smp_processor_id();
>> int first = cpu_first_thread_sibling(cpu);
>> - unsigned long *state = &paca_ptrs[first]->idle_state;
>> + unsigned long *lock = &paca_ptrs[first]->idle_lock;
>> 
>> - BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state));
>> - clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state);
>> + BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, lock));
>> + clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock);
>> }
>> 
>> /* P7 and P8 */
>> @@ -380,9 +382,9 @@ static unsigned long power7_idle_insn(unsigned long type)
>> sprs.uamor = mfspr(SPRN_UAMOR);
>> }
>> 
>> - local_paca->thread_idle_state = type;
>> + WRITE_ONCE(local_paca->thread_idle_state, type);
>> srr1 = isa206_idle_insn_mayloss(type); /* go idle */
>> - local_paca->thread_idle_state = PNV_THREAD_RUNNING;
>> + WRITE_ONCE(local_paca->thread_idle_state, PNV_THREAD_RUNNING);
> 
> Where is the thread_idle_state concurrency coming from?

Yeah, I agree, WRITE_ONCE isn’t necessary here, as all reads of this variable
by xmon are purely diagnostic (data races permitted), and the 
isa206_idle_insn_mayloss() call is a compiler barrier. So write instructions
will be emitted on each side of the call.

> 
> Thanks,
> Nick
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index da0377f46597..cb325938766a 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -191,6 +191,7 @@  struct paca_struct {
 #ifdef CONFIG_PPC_POWERNV
 	/* PowerNV idle fields */
 	/* PNV_CORE_IDLE_* bits, all siblings work on thread 0 paca */
+	unsigned long idle_lock; /* A value of 1 means acquired */
 	unsigned long idle_state;
 	union {
 		/* P7/P8 specific fields */
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 841cb7f31f4f..97dbb7bc2b00 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -246,9 +246,9 @@  static inline void atomic_lock_thread_idle(void)
 {
 	int cpu = raw_smp_processor_id();
 	int first = cpu_first_thread_sibling(cpu);
-	unsigned long *state = &paca_ptrs[first]->idle_state;
+	unsigned long *lock = &paca_ptrs[first]->idle_lock;
 
-	while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state)))
+	while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, lock)))
 		barrier();
 }
 
@@ -258,29 +258,31 @@  static inline void atomic_unlock_and_stop_thread_idle(void)
 	int first = cpu_first_thread_sibling(cpu);
 	unsigned long thread = 1UL << cpu_thread_in_core(cpu);
 	unsigned long *state = &paca_ptrs[first]->idle_state;
+	unsigned long *lock = &paca_ptrs[first]->idle_lock;
 	u64 s = READ_ONCE(*state);
 	u64 new, tmp;
 
-	BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT));
+	BUG_ON(!(READ_ONCE(*lock) & PNV_CORE_IDLE_LOCK_BIT));
 	BUG_ON(s & thread);
 
 again:
-	new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT;
+	new = s | thread;
 	tmp = cmpxchg(state, s, new);
 	if (unlikely(tmp != s)) {
 		s = tmp;
 		goto again;
 	}
+	clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock);
 }
 
 static inline void atomic_unlock_thread_idle(void)
 {
 	int cpu = raw_smp_processor_id();
 	int first = cpu_first_thread_sibling(cpu);
-	unsigned long *state = &paca_ptrs[first]->idle_state;
+	unsigned long *lock = &paca_ptrs[first]->idle_lock;
 
-	BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state));
-	clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state);
+	BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, lock));
+	clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock);
 }
 
 /* P7 and P8 */
@@ -380,9 +382,9 @@  static unsigned long power7_idle_insn(unsigned long type)
 		sprs.uamor	= mfspr(SPRN_UAMOR);
 	}
 
-	local_paca->thread_idle_state = type;
+	WRITE_ONCE(local_paca->thread_idle_state, type);
 	srr1 = isa206_idle_insn_mayloss(type);		/* go idle */
-	local_paca->thread_idle_state = PNV_THREAD_RUNNING;
+	WRITE_ONCE(local_paca->thread_idle_state, PNV_THREAD_RUNNING);
 
 	WARN_ON_ONCE(!srr1);
 	WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));