diff mbox

[RFC] make cpu creation happen inside the right thread.

Message ID 1257258908-16541-1-git-send-email-glommer@redhat.com
State New
Headers show

Commit Message

Glauber Costa Nov. 3, 2009, 2:35 p.m. UTC
Right now, we issue cpu creation from the i/o thread, and then shoot a thread
from inside that code. Over the last months, a lot of subtle bugs were reported,
usually arising from the very fragile order of that initialization.

I propose we rethink that a little. This is a patch that received basic testing
only, and I'd  like to hear on the overall direction. The idea is to issue the new
thread as early as possible. The first direct benefits I can identify are that
we no longer have to rely at on_vcpu-like schemes for issuing vcpu ioctls, since
we are already on the right thread. Apic creation has far less spots for race
conditions as well.

I am implementing this on qemu-kvm first, since we can show the benefits of it
a bit better in there (since we already support smp)

Let me know what you guys think

Signed-off-by: Glauber Costa <glommer@redhat.com>
CC: Marcelo Tosatti <mtosatti@redhat.com>
CC: Avi Kivity <avi@redhat.com>
CC: Jan Kiszka <jan.kiszka@siemens.com>
CC: Anthony Liguori <aliguori@us.ibm.com>
---
 cpu-defs.h     |    2 +-
 hw/acpi.c      |    2 +-
 hw/pc.c        |   26 ++++++++++++--------------
 hw/pc.h        |    2 +-
 qemu-kvm-x86.c |    5 +++++
 qemu-kvm.c     |   44 +++++++++++++++++++++++++++++++-------------
 qemu-kvm.h     |    2 ++
 vl.c           |    2 --
 8 files changed, 53 insertions(+), 32 deletions(-)

Comments

Marcelo Tosatti Nov. 3, 2009, 5:46 p.m. UTC | #1
On Tue, Nov 03, 2009 at 12:35:08PM -0200, Glauber Costa wrote:
> Right now, we issue cpu creation from the i/o thread, and then shoot a thread
> from inside that code. Over the last months, a lot of subtle bugs were reported,
> usually arising from the very fragile order of that initialization.
> 
> I propose we rethink that a little. This is a patch that received basic testing
> only, and I'd  like to hear on the overall direction. The idea is to issue the new
> thread as early as possible. The first direct benefits I can identify are that
> we no longer have to rely at on_vcpu-like schemes for issuing vcpu ioctls, since
> we are already on the right thread. Apic creation has far less spots for race
> conditions as well.
> 
> I am implementing this on qemu-kvm first, since we can show the benefits of it
> a bit better in there (since we already support smp)
> 
> Let me know what you guys think

Makes sense to me. You still need on_vcpu for issuing vcpu ioctls though
(after initialization).
Glauber Costa Nov. 3, 2009, 5:56 p.m. UTC | #2
On Tue, Nov 03, 2009 at 03:46:07PM -0200, Marcelo Tosatti wrote:
> On Tue, Nov 03, 2009 at 12:35:08PM -0200, Glauber Costa wrote:
> > Right now, we issue cpu creation from the i/o thread, and then shoot a thread
> > from inside that code. Over the last months, a lot of subtle bugs were reported,
> > usually arising from the very fragile order of that initialization.
> > 
> > I propose we rethink that a little. This is a patch that received basic testing
> > only, and I'd  like to hear on the overall direction. The idea is to issue the new
> > thread as early as possible. The first direct benefits I can identify are that
> > we no longer have to rely at on_vcpu-like schemes for issuing vcpu ioctls, since
> > we are already on the right thread. Apic creation has far less spots for race
> > conditions as well.
> > 
> > I am implementing this on qemu-kvm first, since we can show the benefits of it
> > a bit better in there (since we already support smp)
> > 
> > Let me know what you guys think
> 
> Makes sense to me. You still need on_vcpu for issuing vcpu ioctls though
> (after initialization).

Yes, but I believe we can avoid most of them. There is a performance hit of using
it, but I am not so concerned with that. The nasty races that arises from it, are
more a concern to me.
diff mbox

Patch

diff --git a/cpu-defs.h b/cpu-defs.h
index cf502e9..6d026e0 100644
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -139,7 +139,7 @@  typedef struct CPUWatchpoint {
 struct qemu_work_item;
 
 struct KVMCPUState {
-    pthread_t thread;
+    pthread_t *thread;
     int signalled;
     struct qemu_work_item *queued_work_first, *queued_work_last;
     int regs_modified;
diff --git a/hw/acpi.c b/hw/acpi.c
index 7564abf..cc68188 100644
--- a/hw/acpi.c
+++ b/hw/acpi.c
@@ -781,7 +781,7 @@  void qemu_system_cpu_hot_add(int cpu, int state)
     CPUState *env;
 
     if (state && !qemu_get_cpu(cpu)) {
-        env = pc_new_cpu(model);
+        pc_new_cpu(model, &env);
         if (!env) {
             fprintf(stderr, "cpu %d creation failed\n", cpu);
             return;
diff --git a/hw/pc.c b/hw/pc.c
index 83012a9..53e7273 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -1013,29 +1013,26 @@  int cpu_is_bsp(CPUState *env)
     return env->cpuid_apic_id == 0;
 }
 
-CPUState *pc_new_cpu(const char *cpu_model)
+void pc_new_cpu(const char *cpu_model, CPUState **env)
 {
-    CPUState *env;
-
-    env = cpu_init(cpu_model);
-    if (!env) {
+    *env = cpu_init(cpu_model);
+    if (!*env) {
         fprintf(stderr, "Unable to find x86 CPU definition\n");
         exit(1);
     }
-    env->kvm_cpu_state.regs_modified = 1;
-    if ((env->cpuid_features & CPUID_APIC) || smp_cpus > 1) {
-        env->cpuid_apic_id = env->cpu_index;
+    (*env)->kvm_cpu_state.regs_modified = 1;
+    if (((*env)->cpuid_features & CPUID_APIC) || smp_cpus > 1) {
+        (*env)->cpuid_apic_id = (*env)->cpu_index;
         /* APIC reset callback resets cpu */
-        apic_init(env);
+        apic_init(*env);
     } else {
-        qemu_register_reset((QEMUResetHandler*)cpu_reset, env);
+        qemu_register_reset((QEMUResetHandler*)cpu_reset, *env);
     }
 
     /* kvm needs this to run after the apic is initialized. Otherwise,
      * it can access invalid state and crash.
      */
-    qemu_init_vcpu(env);
-    return env;
+    qemu_init_vcpu(*env);
 }
 
 /* PC hardware initialisation */
@@ -1055,7 +1052,6 @@  static void pc_init1(ram_addr_t ram_size,
     PCIBus *pci_bus;
     ISADevice *isa_dev;
     int piix3_devfn = -1;
-    CPUState *env;
     qemu_irq *cpu_irq;
     qemu_irq *isa_irq;
     qemu_irq *i8259;
@@ -1086,8 +1082,10 @@  static void pc_init1(ram_addr_t ram_size,
     if (kvm_enabled()) {
         kvm_set_boot_cpu_id(0);
     }
+
     for (i = 0; i < smp_cpus; i++) {
-        env = pc_new_cpu(cpu_model);
+//        pc_new_cpu(cpu_model, &env);
+	  kvm_init_vcpu(NULL);
     }
 
     vmport_init();
diff --git a/hw/pc.h b/hw/pc.h
index 93eb34d..f931380 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -100,7 +100,7 @@  extern int fd_bootchk;
 
 void ioport_set_a20(int enable);
 int ioport_get_a20(void);
-CPUState *pc_new_cpu(const char *cpu_model);
+void pc_new_cpu(const char *cpu_model, CPUState **env);
 
 /* acpi.c */
 extern int acpi_enabled;
diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 0d263ca..4084312 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -160,6 +160,11 @@  int kvm_arch_create(kvm_context_t kvm, unsigned long phys_mem_bytes,
 	return 0;
 }
 
+void kvm_arch_create_vcpu(const char *model, CPUState **env)
+{
+    pc_new_cpu("qemu64", env);
+}
+
 #ifdef KVM_EXIT_TPR_ACCESS
 
 static int kvm_handle_tpr_access(CPUState *env)
diff --git a/qemu-kvm.c b/qemu-kvm.c
index b58a457..f83d19a 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -436,10 +436,18 @@  void kvm_disable_pit_creation(kvm_context_t kvm)
     kvm->no_pit_creation = 1;
 }
 
-static void kvm_create_vcpu(CPUState *env, int id)
+
+static void kvm_create_vcpu(CPUState **_env)
 {
     long mmap_size;
     int r;
+    int id;
+    CPUState *env;
+
+    kvm_arch_create_vcpu("qemu64", _env);
+    env = *(CPUState **)_env;
+   
+    id = env->cpu_index;
 
     r = kvm_vm_ioctl(kvm_state, KVM_CREATE_VCPU, id);
     if (r < 0) {
@@ -1549,11 +1557,13 @@  static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
 static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
 {
     struct qemu_work_item wi;
+    static int remotes;
 
     if (env == current_env) {
         func(data);
         return;
     }
+    printf("remotes: %d\n", remotes++);
 
     wi.func = func;
     wi.data = data;
@@ -1565,7 +1575,7 @@  static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
     wi.next = NULL;
     wi.done = false;
 
-    pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
+    pthread_kill(*env->kvm_cpu_state.thread, SIG_IPI);
     while (!wi.done)
         qemu_cond_wait(&qemu_work_cond);
 }
@@ -1616,8 +1626,8 @@  void kvm_update_interrupt_request(CPUState *env)
 
         if (signal) {
             env->kvm_cpu_state.signalled = 1;
-            if (env->kvm_cpu_state.thread)
-                pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
+            if (*env->kvm_cpu_state.thread)
+                pthread_kill(*env->kvm_cpu_state.thread, SIG_IPI);
         }
     }
 }
@@ -1840,7 +1850,7 @@  static void pause_all_threads(void)
     while (penv) {
         if (penv != cpu_single_env) {
             penv->stop = 1;
-            pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
+            pthread_kill(*penv->kvm_cpu_state.thread, SIG_IPI);
         } else {
             penv->stop = 0;
             penv->stopped = 1;
@@ -1862,7 +1872,7 @@  static void resume_all_threads(void)
     while (penv) {
         penv->stop = 0;
         penv->stopped = 0;
-        pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
+        pthread_kill(*penv->kvm_cpu_state.thread, SIG_IPI);
         penv = (CPUState *) penv->next_cpu;
     }
 }
@@ -1934,19 +1944,23 @@  static int kvm_main_loop_cpu(CPUState *env)
     return 0;
 }
 
+extern void pc_new_cpu(const char *c, CPUState **env);
+
 static void *ap_main_loop(void *_env)
 {
-    CPUState *env = _env;
+    CPUState *env;
     sigset_t signals;
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
     struct ioperm_data *data = NULL;
 #endif
 
-    current_env = env;
-    env->thread_id = kvm_get_thread_id();
     sigfillset(&signals);
     sigprocmask(SIG_BLOCK, &signals, NULL);
-    kvm_create_vcpu(env, env->cpu_index);
+    kvm_create_vcpu(&env);
+
+    *(CPUState **)_env = env;
+    current_env = env;
+    env->thread_id = kvm_get_thread_id();
 
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
     /* do ioperm for io ports of assigned devices */
@@ -1981,12 +1995,16 @@  static void *ap_main_loop(void *_env)
     return NULL;
 }
 
-void kvm_init_vcpu(CPUState *env)
+void kvm_init_vcpu(CPUState *_env)
 {
-    pthread_create(&env->kvm_cpu_state.thread, NULL, ap_main_loop, env);
+    CPUState *env = NULL;
+    pthread_t *thread = qemu_malloc(sizeof(*thread));
+
+    pthread_create(thread, NULL, ap_main_loop, &env);
 
-    while (env->created == 0)
+    while (env == NULL)
         qemu_cond_wait(&qemu_vcpu_cond);
+    env->kvm_cpu_state.thread = thread;
 }
 
 int kvm_vcpu_inited(CPUState *env)
diff --git a/qemu-kvm.h b/qemu-kvm.h
index 525d78d..4dd50f8 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -87,6 +87,8 @@  int kvm_alloc_userspace_memory(kvm_context_t kvm, unsigned long memory,
 int kvm_arch_create(kvm_context_t kvm, unsigned long phys_mem_bytes,
                     void **vm_mem);
 
+void kvm_arch_create_vcpu(const char *model, CPUState **env);
+
 int kvm_arch_run(CPUState *env);
 
 
diff --git a/vl.c b/vl.c
index 5dc7b2b..8dfae43 100644
--- a/vl.c
+++ b/vl.c
@@ -3570,8 +3570,6 @@  void qemu_init_vcpu(void *_env)
 {
     CPUState *env = _env;
 
-    if (kvm_enabled())
-        kvm_init_vcpu(env);
     env->nr_cores = smp_cores;
     env->nr_threads = smp_threads;
     return;