diff mbox

[RFC,v2,12/12] spapr: Workaround for broken radix guests

Message ID e2f52ac4317e8caff0cc4ed696e550d6843013ad.1487829585.git.sam.bobroff@au1.ibm.com
State New
Headers show

Commit Message

Sam Bobroff Feb. 23, 2017, 6 a.m. UTC
For a little while around 4.9, Linux kernels that saw the radix bit in
ibm,pa-features would attempt to set up the MMU as if they were a
hypervisor, even if they were a guest, which would cause them to
crash.

Work around this by detecting pre-ISA 3.0 guests by their lack of that
bit in option vector 1, and then removing the radix bit from
ibm,pa-features. Note: This now requires regeneration of that node
after CAS negotiation.

Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
---
 hw/ppc/spapr.c              | 15 +++++++++++++--
 hw/ppc/spapr_hcall.c        |  5 +++--
 include/hw/ppc/spapr.h      |  1 +
 include/hw/ppc/spapr_ovec.h |  3 +++
 4 files changed, 20 insertions(+), 4 deletions(-)

Comments

David Gibson Feb. 28, 2017, 12:36 a.m. UTC | #1
On Thu, Feb 23, 2017 at 05:00:05PM +1100, Sam Bobroff wrote:
> For a little while around 4.9, Linux kernels that saw the radix bit in
> ibm,pa-features would attempt to set up the MMU as if they were a
> hypervisor, even if they were a guest, which would cause them to
> crash.
> 
> Work around this by detecting pre-ISA 3.0 guests by their lack of that
> bit in option vector 1, and then removing the radix bit from
> ibm,pa-features. Note: This now requires regeneration of that node
> after CAS negotiation.
> 
> Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>

A bit ugly, but not any more so than it needs to given what we're
dealing with AFAICT.

I'll save more detailed review until the rebase in conjuction with the
TCG bits.

> ---
>  hw/ppc/spapr.c              | 15 +++++++++++++--
>  hw/ppc/spapr_hcall.c        |  5 +++--
>  include/hw/ppc/spapr.h      |  1 +
>  include/hw/ppc/spapr_ovec.h |  3 +++
>  4 files changed, 20 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index e83468a8d3..c47600b8ee 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -195,7 +195,8 @@ static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, CPUState *cs)
>  }
>  
>  /* Populate the "ibm,pa-features" property */
> -static void spapr_populate_pa_features(CPUPPCState *env, void *fdt, int offset)
> +static void spapr_populate_pa_features(CPUPPCState *env, void *fdt, int offset,
> +                                      bool legacy_guest)
>  {
>      uint8_t pa_features_206[] = { 6, 0,
>          0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 };
> @@ -251,6 +252,12 @@ static void spapr_populate_pa_features(CPUPPCState *env, void *fdt, int offset)
>      if (kvmppc_has_cap_htm() && pa_size > 24) {
>          pa_features[24] |= 0x80;    /* Transactional memory support */
>      }
> +    if (legacy_guest && pa_size > 40) {
> +        /* Workaround for broken kernels that attempt (guest) radix
> +         * mode when they can't handle it, if they see the radix bit set
> +         * in pa-features. So hide it from them. */
> +        pa_features[40 + 2] &= ~0x80; /* Radix MMU */
> +    }
>  
>      _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));
>  }
> @@ -265,6 +272,7 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr)
>  
>      CPU_FOREACH(cs) {
>          PowerPCCPU *cpu = POWERPC_CPU(cs);
> +        CPUPPCState *env = &cpu->env;
>          DeviceClass *dc = DEVICE_GET_CLASS(cs);
>          int index = ppc_get_vcpu_dt_id(cpu);
>          int compat_smt = MIN(smp_threads, ppc_compat_max_threads(cpu));
> @@ -306,6 +314,9 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr)
>          if (ret < 0) {
>              return ret;
>          }
> +
> +        spapr_populate_pa_features(env, fdt, offset,
> +                                         spapr->cas_legacy_guest_workaround);
>      }
>      return ret;
>  }
> @@ -503,7 +514,7 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
>                            page_sizes_prop, page_sizes_prop_size)));
>      }
>  
> -    spapr_populate_pa_features(env, fdt, offset);
> +    spapr_populate_pa_features(env, fdt, offset, false);
>  
>      _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
>                             cs->cpu_index / vcpus_per_socket)));
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index efaa1a1b19..7660cd7d64 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -933,7 +933,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
>      uint32_t max_compat = cpu->max_compat;
>      uint32_t best_compat = 0;
>      int i;
> -    sPAPROptionVector *ov5_guest, *ov5_cas_old, *ov5_updates;
> +    sPAPROptionVector *ov1_guest, *ov5_guest, *ov5_cas_old, *ov5_updates;
>      bool guest_radix;
>  
>      /*
> @@ -985,6 +985,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
>      /* For the future use: here @ov_table points to the first option vector */
>      ov_table = list;
>  
> +    ov1_guest = spapr_ovec_parse_vector(ov_table, 1);
>      ov5_guest = spapr_ovec_parse_vector(ov_table, 5);
>      if (spapr_ovec_test(ov5_guest, OV5_MMU_BOTH)) {
>          error_report("qemu: guest requested hash and radix MMU, which is invalid.");
> @@ -1025,7 +1026,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
>              exit(EXIT_FAILURE);
>          }
>      }
> -
> +    spapr->cas_legacy_guest_workaround = !spapr_ovec_test(ov1_guest, OV1_PPC_3_00);
>      if (!spapr->cas_reboot) {
>          spapr->cas_reboot =
>              (spapr_h_cas_compose_response(spapr, args[1], args[2],
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index d523db3b4a..1e64e3ada8 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -77,6 +77,7 @@ struct sPAPRMachineState {
>      sPAPROptionVector *ov5;         /* QEMU-supported option vectors */
>      sPAPROptionVector *ov5_cas;     /* negotiated (via CAS) option vectors */
>      bool cas_reboot;
> +    bool cas_legacy_guest_workaround;
>  
>      Notifier epow_notifier;
>      QTAILQ_HEAD(, sPAPREventLogEntry) pending_events;
> diff --git a/include/hw/ppc/spapr_ovec.h b/include/hw/ppc/spapr_ovec.h
> index e2dfbac558..8807c753e0 100644
> --- a/include/hw/ppc/spapr_ovec.h
> +++ b/include/hw/ppc/spapr_ovec.h
> @@ -43,6 +43,9 @@ typedef struct sPAPROptionVector sPAPROptionVector;
>  
>  #define OV_BIT(byte, bit) ((byte - 1) * BITS_PER_BYTE + bit)
>  
> +/* option vector 1 */
> +#define OV1_PPC_3_00            OV_BIT(3, 0) /* set if we support PowerPC 3.00 */
> +
>  /* option vector 5 */
>  #define OV5_DRCONF_MEMORY       OV_BIT(2, 2)
>  #define OV5_FORM1_AFFINITY      OV_BIT(5, 0)
diff mbox

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index e83468a8d3..c47600b8ee 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -195,7 +195,8 @@  static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, CPUState *cs)
 }
 
 /* Populate the "ibm,pa-features" property */
-static void spapr_populate_pa_features(CPUPPCState *env, void *fdt, int offset)
+static void spapr_populate_pa_features(CPUPPCState *env, void *fdt, int offset,
+                                      bool legacy_guest)
 {
     uint8_t pa_features_206[] = { 6, 0,
         0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 };
@@ -251,6 +252,12 @@  static void spapr_populate_pa_features(CPUPPCState *env, void *fdt, int offset)
     if (kvmppc_has_cap_htm() && pa_size > 24) {
         pa_features[24] |= 0x80;    /* Transactional memory support */
     }
+    if (legacy_guest && pa_size > 40) {
+        /* Workaround for broken kernels that attempt (guest) radix
+         * mode when they can't handle it, if they see the radix bit set
+         * in pa-features. So hide it from them. */
+        pa_features[40 + 2] &= ~0x80; /* Radix MMU */
+    }
 
     _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));
 }
@@ -265,6 +272,7 @@  static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr)
 
     CPU_FOREACH(cs) {
         PowerPCCPU *cpu = POWERPC_CPU(cs);
+        CPUPPCState *env = &cpu->env;
         DeviceClass *dc = DEVICE_GET_CLASS(cs);
         int index = ppc_get_vcpu_dt_id(cpu);
         int compat_smt = MIN(smp_threads, ppc_compat_max_threads(cpu));
@@ -306,6 +314,9 @@  static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr)
         if (ret < 0) {
             return ret;
         }
+
+        spapr_populate_pa_features(env, fdt, offset,
+                                         spapr->cas_legacy_guest_workaround);
     }
     return ret;
 }
@@ -503,7 +514,7 @@  static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
                           page_sizes_prop, page_sizes_prop_size)));
     }
 
-    spapr_populate_pa_features(env, fdt, offset);
+    spapr_populate_pa_features(env, fdt, offset, false);
 
     _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
                            cs->cpu_index / vcpus_per_socket)));
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index efaa1a1b19..7660cd7d64 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -933,7 +933,7 @@  static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
     uint32_t max_compat = cpu->max_compat;
     uint32_t best_compat = 0;
     int i;
-    sPAPROptionVector *ov5_guest, *ov5_cas_old, *ov5_updates;
+    sPAPROptionVector *ov1_guest, *ov5_guest, *ov5_cas_old, *ov5_updates;
     bool guest_radix;
 
     /*
@@ -985,6 +985,7 @@  static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
     /* For the future use: here @ov_table points to the first option vector */
     ov_table = list;
 
+    ov1_guest = spapr_ovec_parse_vector(ov_table, 1);
     ov5_guest = spapr_ovec_parse_vector(ov_table, 5);
     if (spapr_ovec_test(ov5_guest, OV5_MMU_BOTH)) {
         error_report("qemu: guest requested hash and radix MMU, which is invalid.");
@@ -1025,7 +1026,7 @@  static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
             exit(EXIT_FAILURE);
         }
     }
-
+    spapr->cas_legacy_guest_workaround = !spapr_ovec_test(ov1_guest, OV1_PPC_3_00);
     if (!spapr->cas_reboot) {
         spapr->cas_reboot =
             (spapr_h_cas_compose_response(spapr, args[1], args[2],
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index d523db3b4a..1e64e3ada8 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -77,6 +77,7 @@  struct sPAPRMachineState {
     sPAPROptionVector *ov5;         /* QEMU-supported option vectors */
     sPAPROptionVector *ov5_cas;     /* negotiated (via CAS) option vectors */
     bool cas_reboot;
+    bool cas_legacy_guest_workaround;
 
     Notifier epow_notifier;
     QTAILQ_HEAD(, sPAPREventLogEntry) pending_events;
diff --git a/include/hw/ppc/spapr_ovec.h b/include/hw/ppc/spapr_ovec.h
index e2dfbac558..8807c753e0 100644
--- a/include/hw/ppc/spapr_ovec.h
+++ b/include/hw/ppc/spapr_ovec.h
@@ -43,6 +43,9 @@  typedef struct sPAPROptionVector sPAPROptionVector;
 
 #define OV_BIT(byte, bit) ((byte - 1) * BITS_PER_BYTE + bit)
 
+/* option vector 1 */
+#define OV1_PPC_3_00            OV_BIT(3, 0) /* set if we support PowerPC 3.00 */
+
 /* option vector 5 */
 #define OV5_DRCONF_MEMORY       OV_BIT(2, 2)
 #define OV5_FORM1_AFFINITY      OV_BIT(5, 0)