diff mbox series

powerpc/pseries: Fix xive=off command line

Message ID 155791470178.432724.8008395673479905061.stgit@bahia.lan (mailing list archive)
State Accepted
Commit a3bf9fbdad600b1e4335dd90979f8d6072e4f602
Headers show
Series powerpc/pseries: Fix xive=off command line | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch next (8150a153c013aa2dd1ffae43370b89ac1347a7fb)
snowpatch_ozlabs/build-ppc64le success Build succeeded
snowpatch_ozlabs/build-ppc64be success Build succeeded
snowpatch_ozlabs/build-ppc64e success Build succeeded
snowpatch_ozlabs/build-pmac32 success Build succeeded
snowpatch_ozlabs/checkpatch success total: 0 errors, 0 warnings, 0 checks, 107 lines checked

Commit Message

Greg Kurz May 15, 2019, 10:05 a.m. UTC
On POWER9, if the hypervisor supports XIVE exploitation mode, the guest OS
will unconditionally requests for the XIVE interrupt mode even if XIVE was
deactivated with the kernel command line xive=off. Later on, when the spapr
XIVE init code handles xive=off, it disables XIVE and tries to fall back on
the legacy mode XICS.

This discrepency causes a kernel panic because the hypervisor is configured
to provide the XIVE interrupt mode to the guest :

[    0.008837] kernel BUG at arch/powerpc/sysdev/xics/xics-common.c:135!
[    0.008877] Oops: Exception in kernel mode, sig: 5 [#1]
[    0.008908] LE SMP NR_CPUS=1024 NUMA pSeries
[    0.008939] Modules linked in:
[    0.008964] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G        W         5.0.13-200.fc29.ppc64le #1
[    0.009018] NIP:  c000000001029ab8 LR: c000000001029aac CTR: c0000000018e0000
[    0.009065] REGS: c0000007f96d7900 TRAP: 0700   Tainted: G        W          (5.0.13-200.fc29.ppc64le)
[    0.009119] MSR:  8000000002029033 <SF,VEC,EE,ME,IR,DR,RI,LE>  CR: 28000222  XER: 20040000
[    0.009168] CFAR: c0000000001b1e28 IRQMASK: 0
[    0.009168] GPR00: c000000001029aac c0000007f96d7b90 c0000000015e8600 0000000000000000
[    0.009168] GPR04: 0000000000000001 0000000000000000 0000000000000061 646f6d61696e0d0a
[    0.009168] GPR08: 00000007fd8f0000 0000000000000001 c0000000014c44c0 c0000007f96d76cf
[    0.009168] GPR12: 0000000000000000 c0000000018e0000 0000000000000001 0000000000000000
[    0.009168] GPR16: 0000000000000000 0000000000000001 c0000007f96d7c08 c0000000016903d0
[    0.009168] GPR20: c0000007fffe04e8 ffffffffffffffea c000000001620164 c00000000161fe58
[    0.009168] GPR24: c000000000ea6c88 c0000000011151a8 00000000006000c0 c0000007f96d7c34
[    0.009168] GPR28: 0000000000000000 c0000000014b286c c000000001115180 c00000000161dc70
[    0.009558] NIP [c000000001029ab8] xics_smp_probe+0x38/0x98
[    0.009590] LR [c000000001029aac] xics_smp_probe+0x2c/0x98
[    0.009622] Call Trace:
[    0.009639] [c0000007f96d7b90] [c000000001029aac] xics_smp_probe+0x2c/0x98 (unreliable)
[    0.009687] [c0000007f96d7bb0] [c000000001033404] pSeries_smp_probe+0x40/0xa0
[    0.009734] [c0000007f96d7bd0] [c0000000010212a4] smp_prepare_cpus+0x62c/0x6ec
[    0.009782] [c0000007f96d7cf0] [c0000000010141b8] kernel_init_freeable+0x148/0x448
[    0.009829] [c0000007f96d7db0] [c000000000010ba4] kernel_init+0x2c/0x148
[    0.009870] [c0000007f96d7e20] [c00000000000bdd4] ret_from_kernel_thread+0x5c/0x68
[    0.009916] Instruction dump:
[    0.009940] 7c0802a6 60000000 7c0802a6 38800002 f8010010 f821ffe1 3c62001c e863b9a0
[    0.009988] 4b1882d1 60000000 7c690034 5529d97e <0b090000> 3d22001c e929b998 3ce2ff8f

Look for xive=off during prom_init and don't ask for XIVE in this case. One
exception though: if the host only supports XIVE, we still want to boot so
we ignore xive=off.

Similarly, have the spapr XIVE init code to looking at the interrupt mode
negociated during CAS, and ignore xive=off if the hypervisor only supports
XIVE.

Fixes: eac1e731b59e ("powerpc/xive: guest exploitation of the XIVE interrupt controller")
Cc: stable@vger.kernel.org # v4.20
Reported-by: Pavithra R. Prakash <pavrampu@in.ibm.com>
Signed-off-by: Greg Kurz <groug@kaod.org>
---
eac1e731b59e is a v4.16 commit actually but this patch only applies
cleanly to v4.20 and newer. If needed I can send a backport for
older versions.
---
 arch/powerpc/kernel/prom_init.c  |   16 +++++++++++-
 arch/powerpc/sysdev/xive/spapr.c |   52 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 66 insertions(+), 2 deletions(-)

Comments

Cédric Le Goater May 15, 2019, 10:13 a.m. UTC | #1
On 5/15/19 12:05 PM, Greg Kurz wrote:
> On POWER9, if the hypervisor supports XIVE exploitation mode, the guest OS
> will unconditionally requests for the XIVE interrupt mode even if XIVE was
> deactivated with the kernel command line xive=off. Later on, when the spapr
> XIVE init code handles xive=off, it disables XIVE and tries to fall back on
> the legacy mode XICS.
> 
> This discrepency causes a kernel panic because the hypervisor is configured
> to provide the XIVE interrupt mode to the guest :
> 
> [    0.008837] kernel BUG at arch/powerpc/sysdev/xics/xics-common.c:135!
> [    0.008877] Oops: Exception in kernel mode, sig: 5 [#1]
> [    0.008908] LE SMP NR_CPUS=1024 NUMA pSeries
> [    0.008939] Modules linked in:
> [    0.008964] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G        W         5.0.13-200.fc29.ppc64le #1
> [    0.009018] NIP:  c000000001029ab8 LR: c000000001029aac CTR: c0000000018e0000
> [    0.009065] REGS: c0000007f96d7900 TRAP: 0700   Tainted: G        W          (5.0.13-200.fc29.ppc64le)
> [    0.009119] MSR:  8000000002029033 <SF,VEC,EE,ME,IR,DR,RI,LE>  CR: 28000222  XER: 20040000
> [    0.009168] CFAR: c0000000001b1e28 IRQMASK: 0
> [    0.009168] GPR00: c000000001029aac c0000007f96d7b90 c0000000015e8600 0000000000000000
> [    0.009168] GPR04: 0000000000000001 0000000000000000 0000000000000061 646f6d61696e0d0a
> [    0.009168] GPR08: 00000007fd8f0000 0000000000000001 c0000000014c44c0 c0000007f96d76cf
> [    0.009168] GPR12: 0000000000000000 c0000000018e0000 0000000000000001 0000000000000000
> [    0.009168] GPR16: 0000000000000000 0000000000000001 c0000007f96d7c08 c0000000016903d0
> [    0.009168] GPR20: c0000007fffe04e8 ffffffffffffffea c000000001620164 c00000000161fe58
> [    0.009168] GPR24: c000000000ea6c88 c0000000011151a8 00000000006000c0 c0000007f96d7c34
> [    0.009168] GPR28: 0000000000000000 c0000000014b286c c000000001115180 c00000000161dc70
> [    0.009558] NIP [c000000001029ab8] xics_smp_probe+0x38/0x98
> [    0.009590] LR [c000000001029aac] xics_smp_probe+0x2c/0x98
> [    0.009622] Call Trace:
> [    0.009639] [c0000007f96d7b90] [c000000001029aac] xics_smp_probe+0x2c/0x98 (unreliable)
> [    0.009687] [c0000007f96d7bb0] [c000000001033404] pSeries_smp_probe+0x40/0xa0
> [    0.009734] [c0000007f96d7bd0] [c0000000010212a4] smp_prepare_cpus+0x62c/0x6ec
> [    0.009782] [c0000007f96d7cf0] [c0000000010141b8] kernel_init_freeable+0x148/0x448
> [    0.009829] [c0000007f96d7db0] [c000000000010ba4] kernel_init+0x2c/0x148
> [    0.009870] [c0000007f96d7e20] [c00000000000bdd4] ret_from_kernel_thread+0x5c/0x68
> [    0.009916] Instruction dump:
> [    0.009940] 7c0802a6 60000000 7c0802a6 38800002 f8010010 f821ffe1 3c62001c e863b9a0
> [    0.009988] 4b1882d1 60000000 7c690034 5529d97e <0b090000> 3d22001c e929b998 3ce2ff8f
> 
> Look for xive=off during prom_init and don't ask for XIVE in this case. One
> exception though: if the host only supports XIVE, we still want to boot so
> we ignore xive=off.
> 
> Similarly, have the spapr XIVE init code to looking at the interrupt mode
> negociated during CAS, and ignore xive=off if the hypervisor only supports
> XIVE.
> 
> Fixes: eac1e731b59e ("powerpc/xive: guest exploitation of the XIVE interrupt controller")
> Cc: stable@vger.kernel.org # v4.20
> Reported-by: Pavithra R. Prakash <pavrampu@in.ibm.com>
> Signed-off-by: Greg Kurz <groug@kaod.org>


Reviewed-by: Cédric Le Goater <clg@kaod.org>

Thanks,

C.

> ---
> eac1e731b59e is a v4.16 commit actually but this patch only applies
> cleanly to v4.20 and newer. If needed I can send a backport for
> older versions.
> ---
>  arch/powerpc/kernel/prom_init.c  |   16 +++++++++++-
>  arch/powerpc/sysdev/xive/spapr.c |   52 +++++++++++++++++++++++++++++++++++++-
>  2 files changed, 66 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
> index 523bb99d7676..c8f7eb845927 100644
> --- a/arch/powerpc/kernel/prom_init.c
> +++ b/arch/powerpc/kernel/prom_init.c
> @@ -172,6 +172,7 @@ static unsigned long __prombss prom_tce_alloc_end;
>  
>  #ifdef CONFIG_PPC_PSERIES
>  static bool __prombss prom_radix_disable;
> +static bool __prombss prom_xive_disable;
>  #endif
>  
>  struct platform_support {
> @@ -808,6 +809,12 @@ static void __init early_cmdline_parse(void)
>  	}
>  	if (prom_radix_disable)
>  		prom_debug("Radix disabled from cmdline\n");
> +
> +	opt = prom_strstr(prom_cmd_line, "xive=off");
> +	if (opt) {
> +		prom_xive_disable = true;
> +		prom_debug("XIVE disabled from cmdline\n");
> +	}
>  #endif /* CONFIG_PPC_PSERIES */
>  }
>  
> @@ -1216,10 +1223,17 @@ static void __init prom_parse_xive_model(u8 val,
>  	switch (val) {
>  	case OV5_FEAT(OV5_XIVE_EITHER): /* Either Available */
>  		prom_debug("XIVE - either mode supported\n");
> -		support->xive = true;
> +		support->xive = !prom_xive_disable;
>  		break;
>  	case OV5_FEAT(OV5_XIVE_EXPLOIT): /* Only Exploitation mode */
>  		prom_debug("XIVE - exploitation mode supported\n");
> +		if (prom_xive_disable) {
> +			/*
> +			 * If we __have__ to do XIVE, we're better off ignoring
> +			 * the command line rather than not booting.
> +			 */
> +			prom_printf("WARNING: Ignoring cmdline option xive=off\n");
> +		}
>  		support->xive = true;
>  		break;
>  	case OV5_FEAT(OV5_XIVE_LEGACY): /* Only Legacy mode */
> diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
> index 575db3b06a6b..2e2d1b8f810f 100644
> --- a/arch/powerpc/sysdev/xive/spapr.c
> +++ b/arch/powerpc/sysdev/xive/spapr.c
> @@ -20,6 +20,7 @@
>  #include <linux/cpumask.h>
>  #include <linux/mm.h>
>  #include <linux/delay.h>
> +#include <linux/libfdt.h>
>  
>  #include <asm/prom.h>
>  #include <asm/io.h>
> @@ -663,6 +664,55 @@ static bool xive_get_max_prio(u8 *max_prio)
>  	return true;
>  }
>  
> +static const u8 *get_vec5_feature(unsigned int index)
> +{
> +	unsigned long root, chosen;
> +	int size;
> +	const u8 *vec5;
> +
> +	root = of_get_flat_dt_root();
> +	chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
> +	if (chosen == -FDT_ERR_NOTFOUND)
> +		return NULL;
> +
> +	vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size);
> +	if (!vec5)
> +		return NULL;
> +
> +	if (size <= index)
> +		return NULL;
> +
> +	return vec5 + index;
> +}
> +
> +static bool xive_spapr_disabled(void)
> +{
> +	const u8 *vec5_xive;
> +
> +	vec5_xive = get_vec5_feature(OV5_INDX(OV5_XIVE_SUPPORT));
> +	if (vec5_xive) {
> +		u8 val;
> +
> +		val = *vec5_xive & OV5_FEAT(OV5_XIVE_SUPPORT);
> +		switch (val) {
> +		case OV5_FEAT(OV5_XIVE_EITHER):
> +		case OV5_FEAT(OV5_XIVE_LEGACY):
> +			break;
> +		case OV5_FEAT(OV5_XIVE_EXPLOIT):
> +			/* Hypervisor only supports XIVE */
> +			if (xive_cmdline_disabled)
> +				pr_warn("WARNING: Ignoring cmdline option xive=off\n");
> +			return false;
> +		default:
> +			pr_warn("%s: Unknown xive support option: 0x%x\n",
> +				__func__, val);
> +			break;
> +		}
> +	}
> +
> +	return xive_cmdline_disabled;
> +}
> +
>  bool __init xive_spapr_init(void)
>  {
>  	struct device_node *np;
> @@ -675,7 +725,7 @@ bool __init xive_spapr_init(void)
>  	const __be32 *reg;
>  	int i;
>  
> -	if (xive_cmdline_disabled)
> +	if (xive_spapr_disabled())
>  		return false;
>  
>  	pr_devel("%s()\n", __func__);
>
Sasha Levin May 15, 2019, 10:54 a.m. UTC | #2
Hi,

[This is an automated email]

This commit has been processed because it contains a "Fixes:" tag,
fixing commit: eac1e731b59e powerpc/xive: guest exploitation of the XIVE interrupt controller.

The bot has tested the following trees: v5.1.1, v5.0.15, v4.19.42, v4.14.118.

v5.1.1: Build OK!
v5.0.15: Build OK!
v4.19.42: Failed to apply! Possible dependencies:
    8ca2d5151e7f ("powerpc/prom_init: Move a few remaining statics to appropriate sections")
    c886087caee7 ("powerpc/prom_init: Move prom_radix_disable to __prombss")

v4.14.118: Failed to apply! Possible dependencies:
    028555a590d6 ("powerpc/xive: fix hcall H_INT_RESET to support long busy delays")
    7a22d6321c3d ("powerpc/mm/radix: Update command line parsing for disable_radix")
    8ca2d5151e7f ("powerpc/prom_init: Move a few remaining statics to appropriate sections")
    c886087caee7 ("powerpc/prom_init: Move prom_radix_disable to __prombss")


How should we proceed with this patch?

--
Thanks,
Sasha
Greg Kurz May 17, 2019, 10:57 a.m. UTC | #3
On Wed, 15 May 2019 10:54:42 +0000
Sasha Levin <sashal@kernel.org> wrote:

> Hi,
> 

Hi,

> [This is an automated email]
> 
> This commit has been processed because it contains a "Fixes:" tag,
> fixing commit: eac1e731b59e powerpc/xive: guest exploitation of the XIVE interrupt controller.
> 
> The bot has tested the following trees: v5.1.1, v5.0.15, v4.19.42, v4.14.118.
> 
> v5.1.1: Build OK!
> v5.0.15: Build OK!
> v4.19.42: Failed to apply! Possible dependencies:
>     8ca2d5151e7f ("powerpc/prom_init: Move a few remaining statics to appropriate sections")
>     c886087caee7 ("powerpc/prom_init: Move prom_radix_disable to __prombss")
> 

Dependencies are:

3bad719b4954 ("powerpc/prom_init: Make of_workarounds static")
e63334e556d9 ("powerpc/prom_init: Replace __initdata with __prombss when applicable")
11fdb309341c ("powerpc/prom_init: Remove support for OPAL v2")
c886087caee7 ("powerpc/prom_init: Move prom_radix_disable to __prombss")
8ca2d5151e7f ("powerpc/prom_init: Move a few remaining statics to appropriate sections")
f1f208e54d08 ("powerpc/prom_init: Generate "phandle" instead of "linux, phandle"")
cbe46bd4f510 ("powerpc: remove CONFIG_CMDLINE #ifdef mess")
450e7dd4001f ("powerpc/prom_init: don't use string functions from lib/")

The patches apply flawlessly and allow the build to succeed.

> v4.14.118: Failed to apply! Possible dependencies:
>     028555a590d6 ("powerpc/xive: fix hcall H_INT_RESET to support long busy delays")
>     7a22d6321c3d ("powerpc/mm/radix: Update command line parsing for disable_radix")
>     8ca2d5151e7f ("powerpc/prom_init: Move a few remaining statics to appropriate sections")
>     c886087caee7 ("powerpc/prom_init: Move prom_radix_disable to __prombss")
> 

Dependencies are:

7a22d6321c3d ("powerpc/mm/radix: Update command line parsing for disable_radix")
028555a590d6 ("powerpc/xive: fix hcall H_INT_RESET to support long busy delays")
3bad719b4954 ("powerpc/prom_init: Make of_workarounds static")
e63334e556d9 ("powerpc/prom_init: Replace __initdata with __prombss when applicable")
11fdb309341c ("powerpc/prom_init: Remove support for OPAL v2")
c886087caee7 ("powerpc/prom_init: Move prom_radix_disable to __prombss")
8ca2d5151e7f ("powerpc/prom_init: Move a few remaining statics to appropriate sections")
f1f208e54d08 ("powerpc/prom_init: Generate "phandle" instead of "linux, phandle"")
cbe46bd4f510 ("powerpc: remove CONFIG_CMDLINE #ifdef mess")
450e7dd4001f ("powerpc/prom_init: don't use string functions from lib/")

The patches apply flawlessly and allow the build to succeed.

> 
> How should we proceed with this patch?
> 

xive=off allows the kernel to use the legacy XICS interrupt controller
interface on POWER9, definitely not a recommended setting. A typical
usage for this would be to workaround some issue that would only pop
up when using XIVE. Note also that this only affects the pseries platform,
ie. running under an hypervisor (KVM or pHyp).

I cannot state right now whether it is worth the pain to cherry-pick all
the dependencies to fix this or not in older kernels...

Cheers,

--
Greg

> --
> Thanks,
> Sasha
diff mbox series

Patch

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 523bb99d7676..c8f7eb845927 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -172,6 +172,7 @@  static unsigned long __prombss prom_tce_alloc_end;
 
 #ifdef CONFIG_PPC_PSERIES
 static bool __prombss prom_radix_disable;
+static bool __prombss prom_xive_disable;
 #endif
 
 struct platform_support {
@@ -808,6 +809,12 @@  static void __init early_cmdline_parse(void)
 	}
 	if (prom_radix_disable)
 		prom_debug("Radix disabled from cmdline\n");
+
+	opt = prom_strstr(prom_cmd_line, "xive=off");
+	if (opt) {
+		prom_xive_disable = true;
+		prom_debug("XIVE disabled from cmdline\n");
+	}
 #endif /* CONFIG_PPC_PSERIES */
 }
 
@@ -1216,10 +1223,17 @@  static void __init prom_parse_xive_model(u8 val,
 	switch (val) {
 	case OV5_FEAT(OV5_XIVE_EITHER): /* Either Available */
 		prom_debug("XIVE - either mode supported\n");
-		support->xive = true;
+		support->xive = !prom_xive_disable;
 		break;
 	case OV5_FEAT(OV5_XIVE_EXPLOIT): /* Only Exploitation mode */
 		prom_debug("XIVE - exploitation mode supported\n");
+		if (prom_xive_disable) {
+			/*
+			 * If we __have__ to do XIVE, we're better off ignoring
+			 * the command line rather than not booting.
+			 */
+			prom_printf("WARNING: Ignoring cmdline option xive=off\n");
+		}
 		support->xive = true;
 		break;
 	case OV5_FEAT(OV5_XIVE_LEGACY): /* Only Legacy mode */
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
index 575db3b06a6b..2e2d1b8f810f 100644
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -20,6 +20,7 @@ 
 #include <linux/cpumask.h>
 #include <linux/mm.h>
 #include <linux/delay.h>
+#include <linux/libfdt.h>
 
 #include <asm/prom.h>
 #include <asm/io.h>
@@ -663,6 +664,55 @@  static bool xive_get_max_prio(u8 *max_prio)
 	return true;
 }
 
+static const u8 *get_vec5_feature(unsigned int index)
+{
+	unsigned long root, chosen;
+	int size;
+	const u8 *vec5;
+
+	root = of_get_flat_dt_root();
+	chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
+	if (chosen == -FDT_ERR_NOTFOUND)
+		return NULL;
+
+	vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size);
+	if (!vec5)
+		return NULL;
+
+	if (size <= index)
+		return NULL;
+
+	return vec5 + index;
+}
+
+static bool xive_spapr_disabled(void)
+{
+	const u8 *vec5_xive;
+
+	vec5_xive = get_vec5_feature(OV5_INDX(OV5_XIVE_SUPPORT));
+	if (vec5_xive) {
+		u8 val;
+
+		val = *vec5_xive & OV5_FEAT(OV5_XIVE_SUPPORT);
+		switch (val) {
+		case OV5_FEAT(OV5_XIVE_EITHER):
+		case OV5_FEAT(OV5_XIVE_LEGACY):
+			break;
+		case OV5_FEAT(OV5_XIVE_EXPLOIT):
+			/* Hypervisor only supports XIVE */
+			if (xive_cmdline_disabled)
+				pr_warn("WARNING: Ignoring cmdline option xive=off\n");
+			return false;
+		default:
+			pr_warn("%s: Unknown xive support option: 0x%x\n",
+				__func__, val);
+			break;
+		}
+	}
+
+	return xive_cmdline_disabled;
+}
+
 bool __init xive_spapr_init(void)
 {
 	struct device_node *np;
@@ -675,7 +725,7 @@  bool __init xive_spapr_init(void)
 	const __be32 *reg;
 	int i;
 
-	if (xive_cmdline_disabled)
+	if (xive_spapr_disabled())
 		return false;
 
 	pr_devel("%s()\n", __func__);