diff mbox

[1/3] pseries: Support SMT systems for KVM Book3S-HV

Message ID 1317278706-16105-2-git-send-email-david@gibson.dropbear.id.au
State New
Headers show

Commit Message

David Gibson Sept. 29, 2011, 6:45 a.m. UTC
Alex Graf has already made qemu support KVM for the pseries machine
when using the Book3S-PR KVM variant (which runs the guest in
usermode, emulating supervisor operations).  This code allows gets us
very close to also working with KVM Book3S-HV (using the hypervisor
capabilities of recent POWER CPUs).

This patch moves us another step towards Book3S-HV support by
correctly handling SMT (multithreaded) POWER CPUs.  There are two
parts to this:

 * Querying KVM to check SMT capability, and if present, adjusting the
   cpu numbers that qemu assigns to cause KVM to assign guest threads
   to cores in the right way (this isn't automatic, because the POWER
   HV support has a limitation that different threads on a single core
   cannot be in different guests at the same time).

 * Correctly informing the guest OS of the SMT thread to core mappings
   via the device tree.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/spapr.c           |   23 ++++++++++++++++++++---
 target-ppc/helper.c  |   11 +++++++++++
 target-ppc/kvm.c     |   10 ++++++++++
 target-ppc/kvm_ppc.h |    6 ++++++
 4 files changed, 47 insertions(+), 3 deletions(-)

Comments

Jan Kiszka Sept. 29, 2011, 7:27 a.m. UTC | #1
On 2011-09-29 08:45, David Gibson wrote:
> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> index 35a6f10..2c1bc7a 100644
> --- a/target-ppc/kvm.c
> +++ b/target-ppc/kvm.c

...

> @@ -76,6 +78,9 @@ int kvm_arch_init(KVMState *s)
>      cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
>      cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
>      cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
> +#ifdef KVM_CAP_PPC_SMT
> +    cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
> +#endif

Just use update-linux-headers.sh if the CAP is missing and drop the
#ifdef. Same for patch 2 & 3.

Jan
Alexander Graf Sept. 29, 2011, 1:17 p.m. UTC | #2
On 29.09.2011, at 08:45, David Gibson wrote:

> Alex Graf has already made qemu support KVM for the pseries machine
> when using the Book3S-PR KVM variant (which runs the guest in
> usermode, emulating supervisor operations).  This code allows gets us
> very close to also working with KVM Book3S-HV (using the hypervisor
> capabilities of recent POWER CPUs).
> 
> This patch moves us another step towards Book3S-HV support by
> correctly handling SMT (multithreaded) POWER CPUs.  There are two
> parts to this:
> 
> * Querying KVM to check SMT capability, and if present, adjusting the
>   cpu numbers that qemu assigns to cause KVM to assign guest threads
>   to cores in the right way (this isn't automatic, because the POWER
>   HV support has a limitation that different threads on a single core
>   cannot be in different guests at the same time).
> 
> * Correctly informing the guest OS of the SMT thread to core mappings
>   via the device tree.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
> hw/spapr.c           |   23 ++++++++++++++++++++---
> target-ppc/helper.c  |   11 +++++++++++
> target-ppc/kvm.c     |   10 ++++++++++
> target-ppc/kvm_ppc.h |    6 ++++++
> 4 files changed, 47 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/spapr.c b/hw/spapr.c
> index b118975..ba9ae1c 100644
> --- a/hw/spapr.c
> +++ b/hw/spapr.c
> @@ -29,6 +29,9 @@
> #include "elf.h"
> #include "net.h"
> #include "blockdev.h"
> +#include "cpus.h"
> +#include "kvm.h"
> +#include "kvm_ppc.h"
> 
> #include "hw/boards.h"
> #include "hw/ppc.h"
> @@ -103,6 +106,7 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
>     uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
>     int i;
>     char *modelname;
> +    int smt = kvmppc_smt_threads();
> 
> #define _FDT(exp) \
>     do { \
> @@ -162,13 +166,17 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
> 
>     for (env = first_cpu; env != NULL; env = env->next_cpu) {
>         int index = env->cpu_index;
> -        uint32_t gserver_prop[] = {cpu_to_be32(index), 0}; /* HACK! */
> +        uint32_t servers_prop[smp_threads];
> +        uint32_t gservers_prop[smp_threads * 2];
>         char *nodename;
>         uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
>                            0xffffffff, 0xffffffff};
>         uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
>         uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
> 
> +        if ((index % smt) != 0)
> +            continue;

Please run through checkpatch.pl

Alex
David Gibson Sept. 30, 2011, 1:02 a.m. UTC | #3
On Thu, Sep 29, 2011 at 03:17:11PM +0200, Alexander Graf wrote:
> On 29.09.2011, at 08:45, David Gibson wrote:
[snip]
> > diff --git a/hw/spapr.c b/hw/spapr.c
> > index b118975..ba9ae1c 100644
> > --- a/hw/spapr.c
> > +++ b/hw/spapr.c
> > @@ -29,6 +29,9 @@
> > #include "elf.h"
> > #include "net.h"
> > #include "blockdev.h"
> > +#include "cpus.h"
> > +#include "kvm.h"
> > +#include "kvm_ppc.h"
> > 
> > #include "hw/boards.h"
> > #include "hw/ppc.h"
> > @@ -103,6 +106,7 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
> >     uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
> >     int i;
> >     char *modelname;
> > +    int smt = kvmppc_smt_threads();
> > 
> > #define _FDT(exp) \
> >     do { \
> > @@ -162,13 +166,17 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
> > 
> >     for (env = first_cpu; env != NULL; env = env->next_cpu) {
> >         int index = env->cpu_index;
> > -        uint32_t gserver_prop[] = {cpu_to_be32(index), 0}; /* HACK! */
> > +        uint32_t servers_prop[smp_threads];
> > +        uint32_t gservers_prop[smp_threads * 2];
> >         char *nodename;
> >         uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
> >                            0xffffffff, 0xffffffff};
> >         uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
> >         uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
> > 
> > +        if ((index % smt) != 0)
> > +            continue;
> 
> Please run through checkpatch.pl

Actually, checkpatch.pl didn't spot that one, for zome bizarre
reason.  Fixed, nonetheless.
diff mbox

Patch

diff --git a/hw/spapr.c b/hw/spapr.c
index b118975..ba9ae1c 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -29,6 +29,9 @@ 
 #include "elf.h"
 #include "net.h"
 #include "blockdev.h"
+#include "cpus.h"
+#include "kvm.h"
+#include "kvm_ppc.h"
 
 #include "hw/boards.h"
 #include "hw/ppc.h"
@@ -103,6 +106,7 @@  static void *spapr_create_fdt_skel(const char *cpu_model,
     uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
     int i;
     char *modelname;
+    int smt = kvmppc_smt_threads();
 
 #define _FDT(exp) \
     do { \
@@ -162,13 +166,17 @@  static void *spapr_create_fdt_skel(const char *cpu_model,
 
     for (env = first_cpu; env != NULL; env = env->next_cpu) {
         int index = env->cpu_index;
-        uint32_t gserver_prop[] = {cpu_to_be32(index), 0}; /* HACK! */
+        uint32_t servers_prop[smp_threads];
+        uint32_t gservers_prop[smp_threads * 2];
         char *nodename;
         uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
                            0xffffffff, 0xffffffff};
         uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
         uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
 
+        if ((index % smt) != 0)
+            continue;
+
         if (asprintf(&nodename, "%s@%x", modelname, index) < 0) {
             fprintf(stderr, "Allocation failure\n");
             exit(1);
@@ -193,9 +201,18 @@  static void *spapr_create_fdt_skel(const char *cpu_model,
                            pft_size_prop, sizeof(pft_size_prop))));
         _FDT((fdt_property_string(fdt, "status", "okay")));
         _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
-        _FDT((fdt_property_cell(fdt, "ibm,ppc-interrupt-server#s", index)));
+
+        /* Build interrupt servers and gservers properties */
+        for (i = 0; i < smp_threads; i++) {
+            servers_prop[i] = cpu_to_be32(index + i);
+            /* Hack, direct the group queues back to cpu 0 */
+            gservers_prop[i*2] = cpu_to_be32(index + i);
+            gservers_prop[i*2 + 1] = 0;
+        }
+        _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
+                           servers_prop, sizeof(servers_prop))));
         _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
-                           gserver_prop, sizeof(gserver_prop))));
+                           gservers_prop, sizeof(gservers_prop))));
 
         if (env->mmu_model & POWERPC_MMU_1TSEG) {
             _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
diff --git a/target-ppc/helper.c b/target-ppc/helper.c
index 6339be3..137a494 100644
--- a/target-ppc/helper.c
+++ b/target-ppc/helper.c
@@ -26,6 +26,8 @@ 
 #include "helper_regs.h"
 #include "qemu-common.h"
 #include "kvm.h"
+#include "kvm_ppc.h"
+#include "cpus.h"
 
 //#define DEBUG_MMU
 //#define DEBUG_BATS
@@ -3189,6 +3191,15 @@  CPUPPCState *cpu_ppc_init (const char *cpu_model)
     if (tcg_enabled()) {
         ppc_translate_init();
     }
+    /* Adjust cpu index for SMT */
+#if !defined(CONFIG_USER_ONLY)
+    if (kvm_enabled()) {
+        int smt = kvmppc_smt_threads();
+
+        env->cpu_index = (env->cpu_index / smp_threads)*smt
+            + (env->cpu_index % smp_threads);
+    }
+#endif /* !CONFIG_USER_ONLY */
     env->cpu_model_str = cpu_model;
     cpu_ppc_register_internal(env, def);
 
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 35a6f10..2c1bc7a 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -28,6 +28,7 @@ 
 #include "kvm_ppc.h"
 #include "cpu.h"
 #include "device_tree.h"
+#include "hw/spapr.h"
 
 #include "hw/sysbus.h"
 #include "hw/spapr.h"
@@ -53,6 +54,7 @@  static int cap_interrupt_unset = false;
 static int cap_interrupt_level = false;
 static int cap_segstate;
 static int cap_booke_sregs;
+static int cap_ppc_smt = 0;
 
 /* XXX We have a race condition where we actually have a level triggered
  *     interrupt, but the infrastructure can't expose that yet, so the guest
@@ -76,6 +78,9 @@  int kvm_arch_init(KVMState *s)
     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
+#ifdef KVM_CAP_PPC_SMT
+    cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
+#endif
 
     if (!cap_interrupt_level) {
         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
@@ -731,6 +736,11 @@  fail:
     cpu_abort(env, "This KVM version does not support PAPR\n");
 }
 
+int kvmppc_smt_threads(void)
+{
+    return cap_ppc_smt ? cap_ppc_smt : 1;
+}
+
 bool kvm_arch_stop_on_emulation_error(CPUState *env)
 {
     return true;
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index c484e60..c298411 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -18,6 +18,7 @@  uint64_t kvmppc_get_clockfreq(void);
 int kvmppc_get_hypercall(CPUState *env, uint8_t *buf, int buf_len);
 int kvmppc_set_interrupt(CPUState *env, int irq, int level);
 void kvmppc_set_papr(CPUState *env);
+int kvmppc_smt_threads(void);
 
 #else
 
@@ -45,6 +46,11 @@  static inline void kvmppc_set_papr(CPUState *env)
 {
 }
 
+static inline int kvmppc_smt_threads(void)
+{
+    return 1;
+}
+
 #endif
 
 #ifndef CONFIG_KVM