diff mbox series

[v2,3/3] powerpc/spinlocks: Fix oops in shared-processor spinlocks

Message ID 20190802042233.20835-4-cmr@informatik.wtf (mailing list archive)
State Superseded
Headers show
Series Fix oops in shared-processor spinlocks | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch next (f3365d1a959d5c6527efe3d38276acc9b58e3f3f)
snowpatch_ozlabs/build-ppc64le success Build succeeded
snowpatch_ozlabs/build-ppc64be success Build succeeded
snowpatch_ozlabs/build-ppc64e success Build succeeded
snowpatch_ozlabs/build-pmac32 success Build succeeded
snowpatch_ozlabs/checkpatch success total: 0 errors, 0 warnings, 0 checks, 79 lines checked

Commit Message

Christopher M. Riedl Aug. 2, 2019, 4:22 a.m. UTC
Booting w/ ppc64le_defconfig + CONFIG_PREEMPT results in the attached
kernel trace due to calling shared-processor spinlocks while not running
in an SPLPAR. Previously, the out-of-line spinlocks implementations were
selected based on CONFIG_PPC_SPLPAR at compile time without a runtime
shared-processor LPAR check.

To fix, call the actual spinlock implementations from a set of common
functions, spin_yield() and rw_yield(), which check for shared-processor
LPAR during runtime and select the appropriate lock implementation.

[    0.430878] BUG: Kernel NULL pointer dereference at 0x00000100
[    0.431991] Faulting instruction address: 0xc000000000097f88
[    0.432934] Oops: Kernel access of bad area, sig: 7 [#1]
[    0.433448] LE PAGE_SIZE=64K MMU=Radix MMU=Hash PREEMPT SMP NR_CPUS=2048 NUMA PowerNV
[    0.434479] Modules linked in:
[    0.435055] CPU: 0 PID: 2 Comm: kthreadd Not tainted 5.2.0-rc6-00491-g249155c20f9b #28
[    0.435730] NIP:  c000000000097f88 LR: c000000000c07a88 CTR: c00000000015ca10
[    0.436383] REGS: c0000000727079f0 TRAP: 0300   Not tainted  (5.2.0-rc6-00491-g249155c20f9b)
[    0.437004] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 84000424  XER: 20040000
[    0.437874] CFAR: c000000000c07a84 DAR: 0000000000000100 DSISR: 00080000 IRQMASK: 1
[    0.437874] GPR00: c000000000c07a88 c000000072707c80 c000000001546300 c00000007be38a80
[    0.437874] GPR04: c0000000726f0c00 0000000000000002 c00000007279c980 0000000000000100
[    0.437874] GPR08: c000000001581b78 0000000080000001 0000000000000008 c00000007279c9b0
[    0.437874] GPR12: 0000000000000000 c000000001730000 c000000000142558 0000000000000000
[    0.437874] GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[    0.437874] GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[    0.437874] GPR24: c00000007be38a80 c000000000c002f4 0000000000000000 0000000000000000
[    0.437874] GPR28: c000000072221a00 c0000000726c2600 c00000007be38a80 c00000007be38a80
[    0.443992] NIP [c000000000097f88] __spin_yield+0x48/0xa0
[    0.444523] LR [c000000000c07a88] __raw_spin_lock+0xb8/0xc0
[    0.445080] Call Trace:
[    0.445670] [c000000072707c80] [c000000072221a00] 0xc000000072221a00 (unreliable)
[    0.446425] [c000000072707cb0] [c000000000bffb0c] __schedule+0xbc/0x850
[    0.447078] [c000000072707d70] [c000000000c002f4] schedule+0x54/0x130
[    0.447694] [c000000072707da0] [c0000000001427dc] kthreadd+0x28c/0x2b0
[    0.448389] [c000000072707e20] [c00000000000c1cc] ret_from_kernel_thread+0x5c/0x70
[    0.449143] Instruction dump:
[    0.449821] 4d9e0020 552a043e 210a07ff 79080fe0 0b080000 3d020004 3908b878 794a1f24
[    0.450587] e8e80000 7ce7502a e8e70000 38e70100 <7ca03c2c> 70a70001 78a50020 4d820020
[    0.452808] ---[ end trace 474d6b2b8fc5cb7e ]---

Signed-off-by: Christopher M. Riedl <cmr@informatik.wtf>
---
 arch/powerpc/include/asm/spinlock.h | 36 ++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 11 deletions(-)

Comments

Michael Ellerman Aug. 2, 2019, 11:38 a.m. UTC | #1
"Christopher M. Riedl" <cmr@informatik.wtf> writes:
> diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
> index 0a8270183770..6aed8a83b180 100644
> --- a/arch/powerpc/include/asm/spinlock.h
> +++ b/arch/powerpc/include/asm/spinlock.h
> @@ -124,6 +122,22 @@ static inline bool is_shared_processor(void)
>  #endif
>  }
>  
> +static inline void spin_yield(arch_spinlock_t *lock)
> +{
> +	if (is_shared_processor())
> +		splpar_spin_yield(lock);
> +	else
> +		barrier();
> +}
...
>  static inline void arch_spin_lock(arch_spinlock_t *lock)
>  {
>  	while (1) {
> @@ -132,7 +146,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
>  		do {
>  			HMT_low();
>  			if (is_shared_processor())
> -				__spin_yield(lock);
> +				spin_yield(lock);

This leaves us with a double test of is_shared_processor() doesn't it?

cheers
Christopher M. Riedl Aug. 2, 2019, 3:12 p.m. UTC | #2
> On August 2, 2019 at 6:38 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
> 
> 
> "Christopher M. Riedl" <cmr@informatik.wtf> writes:
> > diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
> > index 0a8270183770..6aed8a83b180 100644
> > --- a/arch/powerpc/include/asm/spinlock.h
> > +++ b/arch/powerpc/include/asm/spinlock.h
> > @@ -124,6 +122,22 @@ static inline bool is_shared_processor(void)
> >  #endif
> >  }
> >  
> > +static inline void spin_yield(arch_spinlock_t *lock)
> > +{
> > +	if (is_shared_processor())
> > +		splpar_spin_yield(lock);
> > +	else
> > +		barrier();
> > +}
> ...
> >  static inline void arch_spin_lock(arch_spinlock_t *lock)
> >  {
> >  	while (1) {
> > @@ -132,7 +146,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
> >  		do {
> >  			HMT_low();
> >  			if (is_shared_processor())
> > -				__spin_yield(lock);
> > +				spin_yield(lock);
> 
> This leaves us with a double test of is_shared_processor() doesn't it?

Yep, and that's no good. Hmm, executing the barrier() in the non-shared-processor
case probably hurts performance here?
Michael Ellerman Aug. 6, 2019, 12:14 p.m. UTC | #3
Christopher M Riedl <cmr@informatik.wtf> writes:
>> On August 2, 2019 at 6:38 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
>> "Christopher M. Riedl" <cmr@informatik.wtf> writes:
>> > diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
>> > index 0a8270183770..6aed8a83b180 100644
>> > --- a/arch/powerpc/include/asm/spinlock.h
>> > +++ b/arch/powerpc/include/asm/spinlock.h
>> > @@ -124,6 +122,22 @@ static inline bool is_shared_processor(void)
>> >  #endif
>> >  }
>> >  
>> > +static inline void spin_yield(arch_spinlock_t *lock)
>> > +{
>> > +	if (is_shared_processor())
>> > +		splpar_spin_yield(lock);
>> > +	else
>> > +		barrier();
>> > +}
>> ...
>> >  static inline void arch_spin_lock(arch_spinlock_t *lock)
>> >  {
>> >  	while (1) {
>> > @@ -132,7 +146,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
>> >  		do {
>> >  			HMT_low();
>> >  			if (is_shared_processor())
>> > -				__spin_yield(lock);
>> > +				spin_yield(lock);
>> 
>> This leaves us with a double test of is_shared_processor() doesn't it?
>
> Yep, and that's no good. Hmm, executing the barrier() in the non-shared-processor
> case probably hurts performance here?

It's only a "compiler barrier", so it shouldn't generate any code.

But it does have the effect of telling the compiler it can't optimise
across that barrier, which can be important.

In those spin loops all we're doing is checking lock->slock which is
already marked volatile in the definition of arch_spinlock_t, so the
extra barrier shouldn't really make any difference.

But still the current code doesn't have a barrier() there, so we should
make sure we don't introduce one as part of this refactor.

So I think you just want to change the call to spin_yield() above to
splpar_spin_yield(), which avoids the double check, and also avoids the
barrier() in the SPLPAR=n case.

And then arch_spin_relax() calls spin_yield() etc.

cheers
Christopher M. Riedl Aug. 6, 2019, 12:31 p.m. UTC | #4
> On August 6, 2019 at 7:14 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
> 
> 
> Christopher M Riedl <cmr@informatik.wtf> writes:
> >> On August 2, 2019 at 6:38 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
> >> "Christopher M. Riedl" <cmr@informatik.wtf> writes:
> >> 
> >> This leaves us with a double test of is_shared_processor() doesn't it?
> >
> > Yep, and that's no good. Hmm, executing the barrier() in the non-shared-processor
> > case probably hurts performance here?
> 
> It's only a "compiler barrier", so it shouldn't generate any code.
> 
> But it does have the effect of telling the compiler it can't optimise
> across that barrier, which can be important.
> 
> In those spin loops all we're doing is checking lock->slock which is
> already marked volatile in the definition of arch_spinlock_t, so the
> extra barrier shouldn't really make any difference.
> 
> But still the current code doesn't have a barrier() there, so we should
> make sure we don't introduce one as part of this refactor.

Thank you for taking the time to explain this. I have some more reading to
do about compiler-barriers it seems :)

> 
> So I think you just want to change the call to spin_yield() above to
> splpar_spin_yield(), which avoids the double check, and also avoids the
> barrier() in the SPLPAR=n case.
> 
> And then arch_spin_relax() calls spin_yield() etc.

I submitted a v3 before your reply with this change already - figured this
is the best way to avoid the double check and maintain legacy behavior.

> 
> cheers
Segher Boessenkool Aug. 6, 2019, 3:57 p.m. UTC | #5
On Tue, Aug 06, 2019 at 10:14:27PM +1000, Michael Ellerman wrote:
> Christopher M Riedl <cmr@informatik.wtf> writes:
> > Yep, and that's no good. Hmm, executing the barrier() in the non-shared-processor
> > case probably hurts performance here?
> 
> It's only a "compiler barrier", so it shouldn't generate any code.
> 
> But it does have the effect of telling the compiler it can't optimise
> across that barrier, which can be important.

This is

#define barrier() __asm__ __volatile__("": : :"memory")

It doesn't tell the compiler "not to optimise" across the barrier.  It
tells the compiler that all memory accesses before the barrier should
stay before it, and all accesses after the barrier should stay after it,
because it says the "barrier" can access and/or change any memory.

This does not tell the hardware not to move those accesses around.  It
also doesn't say anything about things that are not in memory.  Not
everything you think is in memory, is.  What is and isn't in memory can
change during compilation.


[ This message brought to you by the "Stamp Out Optimisation Barrier"
  campaign. ]


Segher
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 0a8270183770..6aed8a83b180 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -103,11 +103,9 @@  static inline int arch_spin_trylock(arch_spinlock_t *lock)
 /* We only yield to the hypervisor if we are in shared processor mode */
 void splpar_spin_yield(arch_spinlock_t *lock);
 void splpar_rw_yield(arch_rwlock_t *lock);
-#define __spin_yield(x) splpar_spin_yield(x)
-#define __rw_yield(x) splpar_rw_yield(x)
 #else /* SPLPAR */
-#define __spin_yield(x)	barrier()
-#define __rw_yield(x)	barrier()
+static inline void splpar_spin_yield(arch_spinlock_t *lock) {};
+static inline void splpar_rw_yield(arch_rwlock_t *lock) {};
 #endif
 
 static inline bool is_shared_processor(void)
@@ -124,6 +122,22 @@  static inline bool is_shared_processor(void)
 #endif
 }
 
+static inline void spin_yield(arch_spinlock_t *lock)
+{
+	if (is_shared_processor())
+		splpar_spin_yield(lock);
+	else
+		barrier();
+}
+
+static inline void rw_yield(arch_rwlock_t *lock)
+{
+	if (is_shared_processor())
+		splpar_rw_yield(lock);
+	else
+		barrier();
+}
+
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	while (1) {
@@ -132,7 +146,7 @@  static inline void arch_spin_lock(arch_spinlock_t *lock)
 		do {
 			HMT_low();
 			if (is_shared_processor())
-				__spin_yield(lock);
+				spin_yield(lock);
 		} while (unlikely(lock->slock != 0));
 		HMT_medium();
 	}
@@ -151,7 +165,7 @@  void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
 		do {
 			HMT_low();
 			if (is_shared_processor())
-				__spin_yield(lock);
+				spin_yield(lock);
 		} while (unlikely(lock->slock != 0));
 		HMT_medium();
 		local_irq_restore(flags_dis);
@@ -241,7 +255,7 @@  static inline void arch_read_lock(arch_rwlock_t *rw)
 		do {
 			HMT_low();
 			if (is_shared_processor())
-				__rw_yield(rw);
+				rw_yield(rw);
 		} while (unlikely(rw->lock < 0));
 		HMT_medium();
 	}
@@ -255,7 +269,7 @@  static inline void arch_write_lock(arch_rwlock_t *rw)
 		do {
 			HMT_low();
 			if (is_shared_processor())
-				__rw_yield(rw);
+				rw_yield(rw);
 		} while (unlikely(rw->lock != 0));
 		HMT_medium();
 	}
@@ -295,9 +309,9 @@  static inline void arch_write_unlock(arch_rwlock_t *rw)
 	rw->lock = 0;
 }
 
-#define arch_spin_relax(lock)	__spin_yield(lock)
-#define arch_read_relax(lock)	__rw_yield(lock)
-#define arch_write_relax(lock)	__rw_yield(lock)
+#define arch_spin_relax(lock)	spin_yield(lock)
+#define arch_read_relax(lock)	rw_yield(lock)
+#define arch_write_relax(lock)	rw_yield(lock)
 
 /* See include/linux/spinlock.h */
 #define smp_mb__after_spinlock()   smp_mb()