diff mbox

[RFC,v2] queue_work proposal

Message ID 1252000886-20611-1-git-send-email-glommer@redhat.com
State Superseded
Headers show

Commit Message

Glauber Costa Sept. 3, 2009, 6:01 p.m. UTC
Hi guys

In this patch, I am attaching an early version of a new "on_vcpu" mechanism (after
making it generic, I saw no reason to keep its name). It allows us to guarantee
that a piece of code will be executed in a certain vcpu, indicated by a CPUState.

I am sorry for the big patch, I just dumped what I had so we can have early directions.
When it comes to submission state, I'll split it accordingly.

As we discussed these days at qemu-devel, I am using pthread_set/get_specific for
dealing with thread-local variables. Note that they are not used from signal handlers.
A first optimization would be to use TLS variables where available.

In vl.c, I am providing a version of queue_work for the IO-thread, and other for normal
operation. The "normal" one should fix the problems Jan is having, since it does nothing
more than just issuing the function we want to execute.

The io-thread version is tested with both tcg and kvm, and works (to the extent they were
working before, which in kvm case, is not much)

Changes from v1:
 * Don't open the possibility of asynchronous calling queue_work, suggested by
   Avi "Peter Parker" Kivity
 * Use a local mutex, suggested by Paolo Bonzini

Signed-off-by: Glauber Costa <glommer@redhat.com>
---
 cpu-all.h  |    3 ++
 cpu-defs.h |   15 ++++++++++++
 exec.c     |    1 +
 kvm-all.c  |   58 +++++++++++++++++++---------------------------
 kvm.h      |    7 +++++
 vl.c       |   75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 125 insertions(+), 34 deletions(-)

Comments

Glauber Costa Sept. 3, 2009, 7:28 p.m. UTC | #1
On Thu, Sep 03, 2009 at 08:48:53PM +0200, Juan Quintela wrote:
> Glauber Costa <glommer@redhat.com> wrote:
> 
> Hi Glauber
> 
> > +int kvm_vcpu_ioctl(CPUState *env, int type, ...)
> > +{
> >      void *arg;
> >      va_list ap;
> > +    KVMIoctl data;
> >  
> >      va_start(ap, type);
> >      arg = va_arg(ap, void *);
> >      va_end(ap);
> >  
> > -    ret = ioctl(env->kvm_fd, type, arg);
> > -    if (ret == -1)
> > -        ret = -errno;
> > +    data.type = type;
> > +    data.data = arg;
> > +    data.fd = env->kvm_fd;
> >  
> > -    return ret;
> > +    qemu_queue_work(env, kvm_remote_ioctl, (void *)&data);
> > +
> > +    return data.ret;
> 
> Does this work?  giving the address of a local variable is a no-no.
Yes, because it synchronous. The stack will keep existing until queue_work returns.

Actually, this was one of the reasons Avi pushed so hardly against the async mechanism
Marcelo Tosatti Oct. 22, 2009, 5:37 p.m. UTC | #2
On Thu, Sep 03, 2009 at 02:01:26PM -0400, Glauber Costa wrote:
> Hi guys
> 
> In this patch, I am attaching an early version of a new "on_vcpu" mechanism (after
> making it generic, I saw no reason to keep its name). It allows us to guarantee
> that a piece of code will be executed in a certain vcpu, indicated by a CPUState.
> 
> I am sorry for the big patch, I just dumped what I had so we can have early directions.
> When it comes to submission state, I'll split it accordingly.
> 
> As we discussed these days at qemu-devel, I am using pthread_set/get_specific for
> dealing with thread-local variables. Note that they are not used from signal handlers.
> A first optimization would be to use TLS variables where available.
> 
> In vl.c, I am providing a version of queue_work for the IO-thread, and other for normal
> operation. The "normal" one should fix the problems Jan is having, since it does nothing
> more than just issuing the function we want to execute.
> 
> The io-thread version is tested with both tcg and kvm, and works (to the extent they were
> working before, which in kvm case, is not much)
> 
> Changes from v1:
>  * Don't open the possibility of asynchronous calling queue_work, suggested by
>    Avi "Peter Parker" Kivity
>  * Use a local mutex, suggested by Paolo Bonzini
> 
> Signed-off-by: Glauber Costa <glommer@redhat.com>
> ---
>  cpu-all.h  |    3 ++
>  cpu-defs.h |   15 ++++++++++++
>  exec.c     |    1 +
>  kvm-all.c  |   58 +++++++++++++++++++---------------------------
>  kvm.h      |    7 +++++
>  vl.c       |   75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 125 insertions(+), 34 deletions(-)
> 
> diff --git a/cpu-all.h b/cpu-all.h
> index 1a6a812..529479e 100644
> --- a/cpu-all.h
> +++ b/cpu-all.h
> @@ -763,6 +763,9 @@ extern CPUState *cpu_single_env;
>  extern int64_t qemu_icount;
>  extern int use_icount;
>  
> +void qemu_queue_work(CPUState *env, void (*func)(void *data), void *data);
> +void qemu_flush_work(CPUState *env);
> +
>  #define CPU_INTERRUPT_HARD   0x02 /* hardware interrupt pending */
>  #define CPU_INTERRUPT_EXITTB 0x04 /* exit the current TB (use for x86 a20 case) */
>  #define CPU_INTERRUPT_TIMER  0x08 /* internal timer exception pending */

> @@ -3808,6 +3835,50 @@ void qemu_cpu_kick(void *_env)
>          qemu_thread_signal(env->thread, SIGUSR1);
>  }
>  
> +void qemu_queue_work(CPUState *env, void (*func)(void *data), void *data)
> +{
> +    QemuWorkItem wii;
> +
> +    env->queued_total++;
> +
> +    if (env == qemu_get_current_env()) {
> +        env->queued_local++;
> +        func(data);
> +        return;
> +    }
> +
> +    wii.func = func;
> +    wii.data = data;
> +    qemu_mutex_lock(&env->queue_lock);
> +    TAILQ_INSERT_TAIL(&env->queued_work, &wii, entry);
> +    qemu_mutex_unlock(&env->queue_lock);
> +
> +    qemu_thread_signal(env->thread, SIGUSR1);
> +
> +    qemu_mutex_lock(&env->queue_lock);
> +    while (!wii.done) {
> +        qemu_cond_wait(&env->work_cond, &qemu_global_mutex);
> +    }
> +    qemu_mutex_unlock(&env->queue_lock);

How's qemu_flush_work supposed to execute if env->queue_lock is held
here?

qemu_cond_wait() should work with env->queue_lock, and qemu_global_mutex
should be dropped before waiting and reacquired on return.
Glauber Costa Oct. 22, 2009, 6:57 p.m. UTC | #3
On Thu, Oct 22, 2009 at 03:37:05PM -0200, Marcelo Tosatti wrote:
> On Thu, Sep 03, 2009 at 02:01:26PM -0400, Glauber Costa wrote:
> > Hi guys
> > 
> > In this patch, I am attaching an early version of a new "on_vcpu" mechanism (after
> > making it generic, I saw no reason to keep its name). It allows us to guarantee
> > that a piece of code will be executed in a certain vcpu, indicated by a CPUState.
> > 
> > I am sorry for the big patch, I just dumped what I had so we can have early directions.
> > When it comes to submission state, I'll split it accordingly.
> > 
> > As we discussed these days at qemu-devel, I am using pthread_set/get_specific for
> > dealing with thread-local variables. Note that they are not used from signal handlers.
> > A first optimization would be to use TLS variables where available.
> > 
> > In vl.c, I am providing a version of queue_work for the IO-thread, and other for normal
> > operation. The "normal" one should fix the problems Jan is having, since it does nothing
> > more than just issuing the function we want to execute.
> > 
> > The io-thread version is tested with both tcg and kvm, and works (to the extent they were
> > working before, which in kvm case, is not much)
> > 
> > Changes from v1:
> >  * Don't open the possibility of asynchronous calling queue_work, suggested by
> >    Avi "Peter Parker" Kivity
> >  * Use a local mutex, suggested by Paolo Bonzini
> > 
> > Signed-off-by: Glauber Costa <glommer@redhat.com>
> > ---
> >  cpu-all.h  |    3 ++
> >  cpu-defs.h |   15 ++++++++++++
> >  exec.c     |    1 +
> >  kvm-all.c  |   58 +++++++++++++++++++---------------------------
> >  kvm.h      |    7 +++++
> >  vl.c       |   75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  6 files changed, 125 insertions(+), 34 deletions(-)
> > 
> > diff --git a/cpu-all.h b/cpu-all.h
> > index 1a6a812..529479e 100644
> > --- a/cpu-all.h
> > +++ b/cpu-all.h
> > @@ -763,6 +763,9 @@ extern CPUState *cpu_single_env;
> >  extern int64_t qemu_icount;
> >  extern int use_icount;
> >  
> > +void qemu_queue_work(CPUState *env, void (*func)(void *data), void *data);
> > +void qemu_flush_work(CPUState *env);
> > +
> >  #define CPU_INTERRUPT_HARD   0x02 /* hardware interrupt pending */
> >  #define CPU_INTERRUPT_EXITTB 0x04 /* exit the current TB (use for x86 a20 case) */
> >  #define CPU_INTERRUPT_TIMER  0x08 /* internal timer exception pending */
> 
> > @@ -3808,6 +3835,50 @@ void qemu_cpu_kick(void *_env)
> >          qemu_thread_signal(env->thread, SIGUSR1);
> >  }
> >  
> > +void qemu_queue_work(CPUState *env, void (*func)(void *data), void *data)
> > +{
> > +    QemuWorkItem wii;
> > +
> > +    env->queued_total++;
> > +
> > +    if (env == qemu_get_current_env()) {
> > +        env->queued_local++;
> > +        func(data);
> > +        return;
> > +    }
> > +
> > +    wii.func = func;
> > +    wii.data = data;
> > +    qemu_mutex_lock(&env->queue_lock);
> > +    TAILQ_INSERT_TAIL(&env->queued_work, &wii, entry);
> > +    qemu_mutex_unlock(&env->queue_lock);
> > +
> > +    qemu_thread_signal(env->thread, SIGUSR1);
> > +
> > +    qemu_mutex_lock(&env->queue_lock);
> > +    while (!wii.done) {
> > +        qemu_cond_wait(&env->work_cond, &qemu_global_mutex);
> > +    }
> > +    qemu_mutex_unlock(&env->queue_lock);
> 
> How's qemu_flush_work supposed to execute if env->queue_lock is held
> here?
> 
> qemu_cond_wait() should work with env->queue_lock, and qemu_global_mutex
> should be dropped before waiting and reacquired on return.
After some thinking, I don't plan to introduce this until it is absolutely needed.
I believe we can refactor a lot of code to actually run on the vcpu it should,
instead of triggering a remove event.
diff mbox

Patch

diff --git a/cpu-all.h b/cpu-all.h
index 1a6a812..529479e 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -763,6 +763,9 @@  extern CPUState *cpu_single_env;
 extern int64_t qemu_icount;
 extern int use_icount;
 
+void qemu_queue_work(CPUState *env, void (*func)(void *data), void *data);
+void qemu_flush_work(CPUState *env);
+
 #define CPU_INTERRUPT_HARD   0x02 /* hardware interrupt pending */
 #define CPU_INTERRUPT_EXITTB 0x04 /* exit the current TB (use for x86 a20 case) */
 #define CPU_INTERRUPT_TIMER  0x08 /* internal timer exception pending */
diff --git a/cpu-defs.h b/cpu-defs.h
index b6c8b95..20f83e3 100644
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -31,6 +31,8 @@ 
 #include "sys-queue.h"
 #include "targphys.h"
 
+#include "qemu-thread.h"
+
 #ifndef TARGET_LONG_BITS
 #error TARGET_LONG_BITS must be defined before including this header
 #endif
@@ -134,6 +136,14 @@  typedef struct CPUWatchpoint {
     TAILQ_ENTRY(CPUWatchpoint) entry;
 } CPUWatchpoint;
 
+typedef struct QemuWorkItem {
+    void (*func)(void *data);
+    void *data;
+    int done;
+    TAILQ_ENTRY(QemuWorkItem) entry;
+} QemuWorkItem;
+
+
 #define CPU_TEMP_BUF_NLONGS 128
 #define CPU_COMMON                                                      \
     struct TranslationBlock *current_tb; /* currently executing TB  */  \
@@ -175,6 +185,10 @@  typedef struct CPUWatchpoint {
     TAILQ_HEAD(watchpoints_head, CPUWatchpoint) watchpoints;            \
     CPUWatchpoint *watchpoint_hit;                                      \
                                                                         \
+    TAILQ_HEAD(queued_work_head, QemuWorkItem) queued_work;             \
+    uint64_t queued_local, queued_total;                                \
+    struct QemuMutex queue_lock;                                        \
+                                                                        \
     struct GDBRegisterState *gdb_regs;                                  \
                                                                         \
     /* Core interrupt code */                                           \
@@ -194,6 +208,7 @@  typedef struct CPUWatchpoint {
     uint32_t created;                                                   \
     struct QemuThread *thread;                                          \
     struct QemuCond *halt_cond;                                         \
+    struct QemuCond work_cond;                                          \
     const char *cpu_model_str;                                          \
     struct KVMState *kvm_state;                                         \
     struct kvm_run *kvm_run;                                            \
diff --git a/exec.c b/exec.c
index 21f10b9..2db5e5b 100644
--- a/exec.c
+++ b/exec.c
@@ -577,6 +577,7 @@  void cpu_exec_init(CPUState *env)
     env->numa_node = 0;
     TAILQ_INIT(&env->breakpoints);
     TAILQ_INIT(&env->watchpoints);
+    TAILQ_INIT(&env->queued_work);
     *penv = env;
 #if defined(CONFIG_USER_ONLY)
     cpu_list_unlock();
diff --git a/kvm-all.c b/kvm-all.c
index 03a0ed6..ed0a852 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -860,21 +860,34 @@  int kvm_vm_ioctl(KVMState *s, int type, ...)
     return ret;
 }
 
-int kvm_vcpu_ioctl(CPUState *env, int type, ...)
+static void kvm_remote_ioctl(void *data)
 {
+    KVMIoctl *arg = data;
     int ret;
+
+    ret = ioctl(arg->fd, arg->type, arg->data);
+    if (ret == -1)
+        ret = -errno;
+    arg->ret = ret;
+}
+
+int kvm_vcpu_ioctl(CPUState *env, int type, ...)
+{
     void *arg;
     va_list ap;
+    KVMIoctl data;
 
     va_start(ap, type);
     arg = va_arg(ap, void *);
     va_end(ap);
 
-    ret = ioctl(env->kvm_fd, type, arg);
-    if (ret == -1)
-        ret = -errno;
+    data.type = type;
+    data.data = arg;
+    data.fd = env->kvm_fd;
 
-    return ret;
+    qemu_queue_work(env, kvm_remote_ioctl, (void *)&data);
+
+    return data.ret;
 }
 
 int kvm_has_sync_mmu(void)
@@ -907,15 +920,6 @@  void kvm_setup_guest_memory(void *start, size_t size)
 }
 
 #ifdef KVM_CAP_SET_GUEST_DEBUG
-static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
-{
-    if (env == cpu_single_env) {
-        func(data);
-        return;
-    }
-    abort();
-}
-
 struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
                                                  target_ulong pc)
 {
@@ -933,32 +937,18 @@  int kvm_sw_breakpoints_active(CPUState *env)
     return !TAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
 }
 
-struct kvm_set_guest_debug_data {
-    struct kvm_guest_debug dbg;
-    CPUState *env;
-    int err;
-};
-
-static void kvm_invoke_set_guest_debug(void *data)
-{
-    struct kvm_set_guest_debug_data *dbg_data = data;
-    dbg_data->err = kvm_vcpu_ioctl(dbg_data->env, KVM_SET_GUEST_DEBUG, &dbg_data->dbg);
-}
-
 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
 {
-    struct kvm_set_guest_debug_data data;
+    struct kvm_guest_debug dbg;
 
-    data.dbg.control = 0;
+    dbg.control = 0;
     if (env->singlestep_enabled)
-        data.dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
+        dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
 
-    kvm_arch_update_guest_debug(env, &data.dbg);
-    data.dbg.control |= reinject_trap;
-    data.env = env;
+    kvm_arch_update_guest_debug(env, &dbg);
+    dbg.control |= reinject_trap;
 
-    on_vcpu(env, kvm_invoke_set_guest_debug, &data);
-    return data.err;
+    return kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg);
 }
 
 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
diff --git a/kvm.h b/kvm.h
index dbe825f..7595141 100644
--- a/kvm.h
+++ b/kvm.h
@@ -139,4 +139,11 @@  static inline void cpu_synchronize_state(CPUState *env)
     }
 }
 
+typedef struct KVMIoctl {
+    int fd;
+    int type;
+    int ret;
+    void *data;
+} KVMIoctl;
+
 #endif
diff --git a/vl.c b/vl.c
index ff6a597..21f3d69 100644
--- a/vl.c
+++ b/vl.c
@@ -3673,6 +3673,11 @@  void vm_stop(int reason)
     do_vm_stop(reason);
 }
 
+void qemu_queue_work(CPUState *env, void (*func)(void *data), void *data, int wait)
+{
+        func(data);
+}
+
 void qemu_mutex_lock_iothread(void) {}
 void qemu_mutex_unlock_iothread(void) {}
 #else /* CONFIG_IOTHREAD */
@@ -3698,6 +3703,23 @@  static void block_io_signals(void);
 static void unblock_io_signals(void);
 static int tcg_has_work(void);
 
+static pthread_key_t current_env;
+
+static CPUState *qemu_get_current_env(void)
+{
+    return pthread_getspecific(current_env);
+}
+
+static void qemu_set_current_env(CPUState *env)
+{
+    pthread_setspecific(current_env, env);
+}
+
+static void qemu_init_current_env(void)
+{
+    pthread_key_create(&current_env, NULL);
+}
+
 static int qemu_init_main_loop(void)
 {
     int ret;
@@ -3710,6 +3732,7 @@  static int qemu_init_main_loop(void)
     qemu_mutex_init(&qemu_fair_mutex);
     qemu_mutex_init(&qemu_global_mutex);
     qemu_mutex_lock(&qemu_global_mutex);
+    qemu_init_current_env();
 
     unblock_io_signals();
     qemu_thread_self(&io_thread);
@@ -3733,6 +3756,8 @@  static void qemu_wait_io_event(CPUState *env)
     qemu_mutex_unlock(&qemu_fair_mutex);
 
     qemu_mutex_lock(&qemu_global_mutex);
+    qemu_flush_work(env);
+
     if (env->stop) {
         env->stop = 0;
         env->stopped = 1;
@@ -3748,6 +3773,8 @@  static void *kvm_cpu_thread_fn(void *arg)
 
     block_io_signals();
     qemu_thread_self(env->thread);
+    qemu_set_current_env(env);
+
     kvm_init_vcpu(env);
 
     /* signal CPU creation */
@@ -3808,6 +3835,50 @@  void qemu_cpu_kick(void *_env)
         qemu_thread_signal(env->thread, SIGUSR1);
 }
 
+void qemu_queue_work(CPUState *env, void (*func)(void *data), void *data)
+{
+    QemuWorkItem wii;
+
+    env->queued_total++;
+
+    if (env == qemu_get_current_env()) {
+        env->queued_local++;
+        func(data);
+        return;
+    }
+
+    wii.func = func;
+    wii.data = data;
+    qemu_mutex_lock(&env->queue_lock);
+    TAILQ_INSERT_TAIL(&env->queued_work, &wii, entry);
+    qemu_mutex_unlock(&env->queue_lock);
+
+    qemu_thread_signal(env->thread, SIGUSR1);
+
+    qemu_mutex_lock(&env->queue_lock);
+    while (!wii.done) {
+        qemu_cond_wait(&env->work_cond, &qemu_global_mutex);
+    }
+    qemu_mutex_unlock(&env->queue_lock);
+}
+
+void qemu_flush_work(CPUState *env)
+{
+    QemuWorkItem *wi;
+
+    qemu_mutex_lock(&env->queue_lock);
+    TAILQ_FOREACH(wi, &env->queued_work, entry) {
+        wi->func(wi->data);
+        wi->done = 1;
+        qemu_mutex_unlock(&env->queue_lock);
+        qemu_cond_broadcast(&env->work_cond);
+        qemu_mutex_lock(&env->queue_lock);
+        TAILQ_REMOVE(&env->queued_work, wi, entry);
+    }
+    qemu_mutex_unlock(&env->queue_lock);
+
+}
+
 int qemu_cpu_self(void *env)
 {
     return (cpu_single_env != NULL);
@@ -3961,6 +4032,10 @@  void qemu_init_vcpu(void *_env)
 {
     CPUState *env = _env;
 
+    qemu_cond_init(&env->work_cond);
+
+    qemu_mutex_init(&env->queue_lock);
+
     if (kvm_enabled())
         kvm_start_vcpu(env);
     else