diff mbox series

[2/3] powerpc/tm: P9 disabled suspend mode workaround

Message ID 20171006074643.25269-2-cyrilbur@gmail.com (mailing list archive)
State Superseded
Headers show
Series [1/3] powerpc/tm: Add commandline option to disable hardware transactional memory | expand

Commit Message

Cyril Bur Oct. 6, 2017, 7:46 a.m. UTC
[from Michael Neulings original patch]
Each POWER9 core is made of two super slices. Each super slice can
only have one thread at a time in TM suspend mode. The super slice
restricts ever entering a state where both threads are in suspend by
aborting transactions on tsuspend or exceptions into the kernel.

Unfortunately for context switch we need trechkpt which forces suspend
mode. If a thread is already in suspend and a second thread needs to
be restored that was suspended, the trechkpt must be executed.
Currently the trechkpt will hang in this case until the other thread
exits suspend. This causes problems for Linux resulting in hang and
RCU stall detectors going off.

To workaround this, we disable suspend in the core. This is done via a
firmware change which stops the hardware ever getting into suspend.
The hardware will always rollback a transaction on any tsuspend or
entry into the kernel.

[added by Cyril Bur]
As the no-suspend firmware change is novel and untested using it should
be opt in by users. Furthumore, currently the kernel has no method to
know if the firmware has applied the no-suspend workaround. This patch
extends the ppc_tm commandline option to allow users to opt-in if they
are sure that their firmware has been updated and they understand the
risks involed.

Signed-off-by: Cyril Bur <cyrilbur@gmail.com>
---
 Documentation/admin-guide/kernel-parameters.txt |  7 +++++--
 arch/powerpc/include/asm/cputable.h             |  6 ++++++
 arch/powerpc/include/asm/tm.h                   |  6 ++++--
 arch/powerpc/kernel/cputable.c                  | 12 ++++++++++++
 arch/powerpc/kernel/setup_64.c                  | 16 ++++++++++------
 5 files changed, 37 insertions(+), 10 deletions(-)

Comments

Benjamin Herrenschmidt Oct. 6, 2017, 8:10 a.m. UTC | #1
On Fri, 2017-10-06 at 18:46 +1100, Cyril Bur wrote:
> [from Michael Neulings original patch]
> Each POWER9 core is made of two super slices. Each super slice can
> only have one thread at a time in TM suspend mode. The super slice
> restricts ever entering a state where both threads are in suspend by
> aborting transactions on tsuspend or exceptions into the kernel.
> 
> Unfortunately for context switch we need trechkpt which forces suspend
> mode. If a thread is already in suspend and a second thread needs to
> be restored that was suspended, the trechkpt must be executed.
> Currently the trechkpt will hang in this case until the other thread
> exits suspend. This causes problems for Linux resulting in hang and
> RCU stall detectors going off.
> 
> To workaround this, we disable suspend in the core. This is done via a
> firmware change which stops the hardware ever getting into suspend.
> The hardware will always rollback a transaction on any tsuspend or
> entry into the kernel.
> 
> [added by Cyril Bur]
> As the no-suspend firmware change is novel and untested using it should
> be opt in by users. Furthumore, currently the kernel has no method to
> know if the firmware has applied the no-suspend workaround. This patch
> extends the ppc_tm commandline option to allow users to opt-in if they
> are sure that their firmware has been updated and they understand the
> risks involed.

Is this what the patch actually does ? ...

> Signed-off-by: Cyril Bur <cyrilbur@gmail.com>
> ---
>  Documentation/admin-guide/kernel-parameters.txt |  7 +++++--
>  arch/powerpc/include/asm/cputable.h             |  6 ++++++
>  arch/powerpc/include/asm/tm.h                   |  6 ++++--
>  arch/powerpc/kernel/cputable.c                  | 12 ++++++++++++
>  arch/powerpc/kernel/setup_64.c                  | 16 ++++++++++------
>  5 files changed, 37 insertions(+), 10 deletions(-)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 4e2b5d9078a0..a0f757f749cf 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -805,8 +805,11 @@
>  			Disable RADIX MMU mode on POWER9
>  
>  	ppc_tm=		[PPC]
> -			Format: {"off"}
> -			Disable Hardware Transactional Memory
> +			Format: {"off" | "no-suspend"}
> +			"Off" Will disable Hardware Transactional Memory.
> +			"no-suspend" Informs the kernel that the
> +			hardware will not transition into the kernel
> +			with a suspended transaction.
>  
>  	disable_cpu_apicid= [X86,APIC,SMP]
>  			Format: <int>
> diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
> index a9bf921f4efc..e66101830af2 100644
> --- a/arch/powerpc/include/asm/cputable.h
> +++ b/arch/powerpc/include/asm/cputable.h
> @@ -124,6 +124,12 @@ extern void identify_cpu_name(unsigned int pvr);
>  extern void do_feature_fixups(unsigned long value, void *fixup_start,
>  			      void *fixup_end);
>  
> +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> +extern bool tm_suspend_supported(void);
> +#else
> +static inline bool tm_suspend_supported(void) { return false; }
> +#endif
> +
>  extern const char *powerpc_base_platform;
>  
>  #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
> diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h
> index eca1c866ca97..1fd0b5f72861 100644
> --- a/arch/powerpc/include/asm/tm.h
> +++ b/arch/powerpc/include/asm/tm.h
> @@ -9,9 +9,11 @@
>  
>  #ifndef __ASSEMBLY__
>  
> -#define TM_STATE_ON	0
> -#define TM_STATE_OFF	1
> +#define TM_STATE_ON		0
> +#define TM_STATE_OFF		1
> +#define TM_STATE_NO_SUSPEND	2
>  
> +extern int ppc_tm_state;
>  extern void tm_enable(void);
>  extern void tm_reclaim(struct thread_struct *thread,
>  		       unsigned long orig_msr, uint8_t cause);
> diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
> index 760872916013..2cb01b48123a 100644
> --- a/arch/powerpc/kernel/cputable.c
> +++ b/arch/powerpc/kernel/cputable.c
> @@ -22,6 +22,7 @@
>  #include <asm/prom.h>		/* for PTRRELOC on ARCH=ppc */
>  #include <asm/mmu.h>
>  #include <asm/setup.h>
> +#include <asm/tm.h>
>  
>  static struct cpu_spec the_cpu_spec __read_mostly;
>  
> @@ -2301,6 +2302,17 @@ void __init identify_cpu_name(unsigned int pvr)
>  	}
>  }
>  
> +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> +bool tm_suspend_supported(void)
> +{
> +	if (cpu_has_feature(CPU_FTR_TM)) {
> +		if (pvr_version_is(PVR_POWER9) && ppc_tm_state != TM_STATE_NO_SUSPEND)
> +			return false;
> +		return true;
> +	}

Hrm... so if state is "NO SUSPEND" you return "true" ? Isn't this
backward ? Or I don't understand what this is about...

> +	return false;
> +}
> +#endif
>  
>  #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
>  struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS] = {
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index e37c26d2e54b..227ac600a1b7 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -251,12 +251,14 @@ static void cpu_ready_for_interrupts(void)
>  	get_paca()->kernel_msr = MSR_KERNEL;
>  }
>  
> +int ppc_tm_state;
>  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> -static int ppc_tm_state;
>  static int __init parse_ppc_tm(char *p)
>  {
>  	if (strcmp(p, "off") == 0)
>  		ppc_tm_state = TM_STATE_OFF;
> +	else if (strcmp(p, "no-suspend") == 0)
> +		ppc_tm_state = TM_STATE_NO_SUSPEND;
>  	else
>  		printk(KERN_NOTICE "Unknown value to cmdline ppc_tm '%s'\n", p);
>  	return 0;
> @@ -265,11 +267,13 @@ early_param("ppc_tm", parse_ppc_tm);
>  
>  static void check_disable_tm(void)
>  {
> -	if (cpu_has_feature(CPU_FTR_TM) && ppc_tm_state == TM_STATE_OFF) {
> -		printk(KERN_NOTICE "Disabling hardware transactional memory (HTM)\n");
> -		cur_cpu_spec->cpu_user_features2 &=
> -			~(PPC_FEATURE2_HTM_NOSC | PPC_FEATURE2_HTM);
> -		cur_cpu_spec->cpu_features &= ~CPU_FTR_TM;
> +	if (cpu_has_feature(CPU_FTR_TM)) {
> +		if (ppc_tm_state == TM_STATE_OFF || (!tm_suspend_supported())) {
> +			printk(KERN_NOTICE "Disabling hardware transactional memory (HTM)\n");
> +			cur_cpu_spec->cpu_user_features2 &=
> +				~(PPC_FEATURE2_HTM_NOSC | PPC_FEATURE2_HTM);
> +			cur_cpu_spec->cpu_features &= ~CPU_FTR_TM;
> +		}

So that code translates to if TM is off or doesn't support suspend,
disable TM. Are we sure that's really what we meant here ?

I suspect this makes more sense if that function was called
tm_supported() ...

>  	}
>  }
>  #else
Michael Ellerman Oct. 6, 2017, 10:29 a.m. UTC | #2
Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:
> On Fri, 2017-10-06 at 18:46 +1100, Cyril Bur wrote:
...
>> diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
>> index 760872916013..2cb01b48123a 100644
>> --- a/arch/powerpc/kernel/cputable.c
>> +++ b/arch/powerpc/kernel/cputable.c
>> @@ -2301,6 +2302,17 @@ void __init identify_cpu_name(unsigned int pvr)
>>  	}
>>  }
>>  
>> +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
>> +bool tm_suspend_supported(void)
>> +{
>> +	if (cpu_has_feature(CPU_FTR_TM)) {
>> +		if (pvr_version_is(PVR_POWER9) && ppc_tm_state != TM_STATE_NO_SUSPEND)
>> +			return false;
>> +		return true;
>> +	}
>
> Hrm... so if state is "NO SUSPEND" you return "true" ? Isn't this
> backward ? Or I don't understand what this is about...

Yeah it's a bit confuzzled.

I literally wrote it for you on a post-it Cyril! >:D

tm_suspend_supported() should be called tm_no_suspend_mode(). Where "no
suspend mode" is the new "mode" we're adding where suspend is not supported.

Then tm_no_suspend_mode() should be:

+	if (pvr_version_is(PVR_POWER9) && ppc_tm_state == TM_STATE_NO_SUSPEND)
+			return true;
+	return false;
  
And then all the extra checks and warnings in patch 3 just use it like:

	WARN_ON(tm_no_suspend_mode());

Because they're in paths where we shouldn't get to if suspend is
disabled.

I don't think we need to check CPU_FTR_TM because it's only called from
TM paths anyway. But we could add that to be paranoid. Or probably
better, when TM is forced off (below) we set ppc_tm_state to off.

>> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
>> index e37c26d2e54b..227ac600a1b7 100644
>> --- a/arch/powerpc/kernel/setup_64.c
>> +++ b/arch/powerpc/kernel/setup_64.c
>> @@ -265,11 +267,13 @@ early_param("ppc_tm", parse_ppc_tm);
>>  
>>  static void check_disable_tm(void)
>>  {
>> -	if (cpu_has_feature(CPU_FTR_TM) && ppc_tm_state == TM_STATE_OFF) {
>> -		printk(KERN_NOTICE "Disabling hardware transactional memory (HTM)\n");
>> -		cur_cpu_spec->cpu_user_features2 &=
>> -			~(PPC_FEATURE2_HTM_NOSC | PPC_FEATURE2_HTM);
>> -		cur_cpu_spec->cpu_features &= ~CPU_FTR_TM;
>> +	if (cpu_has_feature(CPU_FTR_TM)) {
>> +		if (ppc_tm_state == TM_STATE_OFF || (!tm_suspend_supported())) {
>> +			printk(KERN_NOTICE "Disabling hardware transactional memory (HTM)\n");
>> +			cur_cpu_spec->cpu_user_features2 &=
>> +				~(PPC_FEATURE2_HTM_NOSC | PPC_FEATURE2_HTM);
>> +			cur_cpu_spec->cpu_features &= ~CPU_FTR_TM;
>> +		}
>
> So that code translates to if TM is off or doesn't support suspend,
> disable TM. Are we sure that's really what we meant here ?

It should be:

+	if (!cpu_has_feature(CPU_FTR_TM))
+		return;
+
+	if (ppc_tm_state == TM_STATE_OFF || \
+	    (pvr_version_is(PVR_POWER9) && ppc_tm_state != TM_STATE_NO_SUSPEND)) {
+		printk(KERN_NOTICE "Disabling hardware transactional memory (HTM)\n");
+		cur_cpu_spec->cpu_user_features2 &= ~(PPC_FEATURE2_HTM_NOSC | PPC_FEATURE2_HTM);
+		cur_cpu_spec->cpu_features &= ~CPU_FTR_TM;
+	}

And as I mentioned above perhaps we should also do:
+		ppc_tm_state = TM_STATE_OFF;

cheers
Gustavo Romero Oct. 6, 2017, 11:22 a.m. UTC | #3
Hi Cyril,

On 06-10-2017 04:46, Cyril Bur wrote:
> [added by Cyril Bur]
> As the no-suspend firmware change is novel and untested using it should
> be opt in by users. Furthumore, currently the kernel has no method to

I forgot to mention on my last reply, but should s/Furthumore/Furthermore/ ?

Regards,
Gustavo
diff mbox series

Patch

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 4e2b5d9078a0..a0f757f749cf 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -805,8 +805,11 @@ 
 			Disable RADIX MMU mode on POWER9
 
 	ppc_tm=		[PPC]
-			Format: {"off"}
-			Disable Hardware Transactional Memory
+			Format: {"off" | "no-suspend"}
+			"Off" Will disable Hardware Transactional Memory.
+			"no-suspend" Informs the kernel that the
+			hardware will not transition into the kernel
+			with a suspended transaction.
 
 	disable_cpu_apicid= [X86,APIC,SMP]
 			Format: <int>
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index a9bf921f4efc..e66101830af2 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -124,6 +124,12 @@  extern void identify_cpu_name(unsigned int pvr);
 extern void do_feature_fixups(unsigned long value, void *fixup_start,
 			      void *fixup_end);
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+extern bool tm_suspend_supported(void);
+#else
+static inline bool tm_suspend_supported(void) { return false; }
+#endif
+
 extern const char *powerpc_base_platform;
 
 #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h
index eca1c866ca97..1fd0b5f72861 100644
--- a/arch/powerpc/include/asm/tm.h
+++ b/arch/powerpc/include/asm/tm.h
@@ -9,9 +9,11 @@ 
 
 #ifndef __ASSEMBLY__
 
-#define TM_STATE_ON	0
-#define TM_STATE_OFF	1
+#define TM_STATE_ON		0
+#define TM_STATE_OFF		1
+#define TM_STATE_NO_SUSPEND	2
 
+extern int ppc_tm_state;
 extern void tm_enable(void);
 extern void tm_reclaim(struct thread_struct *thread,
 		       unsigned long orig_msr, uint8_t cause);
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 760872916013..2cb01b48123a 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -22,6 +22,7 @@ 
 #include <asm/prom.h>		/* for PTRRELOC on ARCH=ppc */
 #include <asm/mmu.h>
 #include <asm/setup.h>
+#include <asm/tm.h>
 
 static struct cpu_spec the_cpu_spec __read_mostly;
 
@@ -2301,6 +2302,17 @@  void __init identify_cpu_name(unsigned int pvr)
 	}
 }
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+bool tm_suspend_supported(void)
+{
+	if (cpu_has_feature(CPU_FTR_TM)) {
+		if (pvr_version_is(PVR_POWER9) && ppc_tm_state != TM_STATE_NO_SUSPEND)
+			return false;
+		return true;
+	}
+	return false;
+}
+#endif
 
 #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
 struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS] = {
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e37c26d2e54b..227ac600a1b7 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -251,12 +251,14 @@  static void cpu_ready_for_interrupts(void)
 	get_paca()->kernel_msr = MSR_KERNEL;
 }
 
+int ppc_tm_state;
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-static int ppc_tm_state;
 static int __init parse_ppc_tm(char *p)
 {
 	if (strcmp(p, "off") == 0)
 		ppc_tm_state = TM_STATE_OFF;
+	else if (strcmp(p, "no-suspend") == 0)
+		ppc_tm_state = TM_STATE_NO_SUSPEND;
 	else
 		printk(KERN_NOTICE "Unknown value to cmdline ppc_tm '%s'\n", p);
 	return 0;
@@ -265,11 +267,13 @@  early_param("ppc_tm", parse_ppc_tm);
 
 static void check_disable_tm(void)
 {
-	if (cpu_has_feature(CPU_FTR_TM) && ppc_tm_state == TM_STATE_OFF) {
-		printk(KERN_NOTICE "Disabling hardware transactional memory (HTM)\n");
-		cur_cpu_spec->cpu_user_features2 &=
-			~(PPC_FEATURE2_HTM_NOSC | PPC_FEATURE2_HTM);
-		cur_cpu_spec->cpu_features &= ~CPU_FTR_TM;
+	if (cpu_has_feature(CPU_FTR_TM)) {
+		if (ppc_tm_state == TM_STATE_OFF || (!tm_suspend_supported())) {
+			printk(KERN_NOTICE "Disabling hardware transactional memory (HTM)\n");
+			cur_cpu_spec->cpu_user_features2 &=
+				~(PPC_FEATURE2_HTM_NOSC | PPC_FEATURE2_HTM);
+			cur_cpu_spec->cpu_features &= ~CPU_FTR_TM;
+		}
 	}
 }
 #else