Patchwork Re: [PATCH] fix smp with tcg mode and --enable-io-thread

login
register
mail settings
Submitter Jan Kiszka
Date June 21, 2010, 8:25 p.m.
Message ID <4C1FCABA.3060207@web.de>
Download mbox | patch
Permalink /patch/56344/
State New
Headers show

Comments

Jan Kiszka - June 21, 2010, 8:25 p.m.
Marcelo Tosatti wrote:
> Clear exit_request when iothread grabs the global lock. 
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> diff --git a/cpu-exec.c b/cpu-exec.c
> index 026980a..74cb973 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -236,10 +236,8 @@ int cpu_exec(CPUState *env1)
>      asm("");
>      env = env1;
>  
> -    if (exit_request) {
> +    if (exit_request)
>          env->exit_request = 1;
> -        exit_request = 0;
> -    }

Coding style...

>  
>  #if defined(TARGET_I386)
>      if (!kvm_enabled()) {
> diff --git a/cpus.c b/cpus.c
> index fcd0f09..ef1ab22 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -598,6 +598,7 @@ void qemu_mutex_lock_iothread(void)
>          }
>          qemu_mutex_unlock(&qemu_fair_mutex);
>      }
> +   exit_request = 0;
>  }
>  
>  void qemu_mutex_unlock_iothread(void)
> 
> 

I looked into this a bit as well, and that's what I also have in my
queue.

But things are still widely broken: pause_all_vcpus and run_on_cpu as
there is no guarantee that all VCPUs regularly call into
qemu_wait_io_event. Also breakpoints don't work, not only in SMP mode.

The former issue is mostly fine with this patch:


I haven't looked into the breakpoint bug yet as performance (likely
VCPU scheduling) in iothread mode is still suffering compared to classic
mode, specifically when you go up with the number of VCPUs (try -smp 8).
And there is some race that cause a lock up in qemu_mutex_lock_iothread
after a while (the cpu_unlink_tb seems to race with the linking - just a
guess so far).

Anyway, all this is getting terribly complex, hard to grasp, and maybe
therefore fragile as well. Specifically the SMP round-robin scheduling
looks like it requires a redesign for iothread mode (my feeling is it
only works by chance ATM). Part of the issue may not be new, but could
have been shadowed by the frequently interrupting I/O load when using a
single thread. The rest seems to lack a bit a user base...

Jan
Jan Kiszka - June 21, 2010, 8:58 p.m.
Jan Kiszka wrote:
> Marcelo Tosatti wrote:
>> Clear exit_request when iothread grabs the global lock. 
>>
>> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>>
>> diff --git a/cpu-exec.c b/cpu-exec.c
>> index 026980a..74cb973 100644
>> --- a/cpu-exec.c
>> +++ b/cpu-exec.c
>> @@ -236,10 +236,8 @@ int cpu_exec(CPUState *env1)
>>      asm("");
>>      env = env1;
>>  
>> -    if (exit_request) {
>> +    if (exit_request)
>>          env->exit_request = 1;
>> -        exit_request = 0;
>> -    }
> 
> Coding style...
> 
>>  
>>  #if defined(TARGET_I386)
>>      if (!kvm_enabled()) {
>> diff --git a/cpus.c b/cpus.c
>> index fcd0f09..ef1ab22 100644
>> --- a/cpus.c
>> +++ b/cpus.c
>> @@ -598,6 +598,7 @@ void qemu_mutex_lock_iothread(void)
>>          }
>>          qemu_mutex_unlock(&qemu_fair_mutex);
>>      }
>> +   exit_request = 0;
>>  }
>>  
>>  void qemu_mutex_unlock_iothread(void)
>>
>>
> 
> I looked into this a bit as well, and that's what I also have in my
> queue.
> 
> But things are still widely broken: pause_all_vcpus and run_on_cpu as
> there is no guarantee that all VCPUs regularly call into
> qemu_wait_io_event. Also breakpoints don't work, not only in SMP mode.
> 
> The former issue is mostly fine with this patch:
> 
> diff --git a/cpu-exec.c b/cpu-exec.c
> index 026980a..9f4191d 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -236,9 +236,8 @@ int cpu_exec(CPUState *env1)
>      asm("");
>      env = env1;
>  
> -    if (exit_request) {
> +    if (unlikely(exit_request)) {
>          env->exit_request = 1;
> -        exit_request = 0;
>      }
>  
>  #if defined(TARGET_I386)
> diff --git a/cpus.c b/cpus.c
> index fcd0f09..2ce839d 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -39,7 +39,6 @@
>  #define SIG_IPI SIGUSR1
>  #endif
>  
> -static CPUState *cur_cpu;
>  static CPUState *next_cpu;
>  
>  /***********************************************************/
> @@ -331,6 +330,7 @@ int qemu_init_main_loop(void)
>          return ret;
>  
>      qemu_cond_init(&qemu_pause_cond);
> +    qemu_cond_init(&qemu_system_cond);
>      qemu_mutex_init(&qemu_fair_mutex);
>      qemu_mutex_init(&qemu_global_mutex);
>      qemu_mutex_lock(&qemu_global_mutex);
> @@ -401,10 +401,12 @@ static void qemu_wait_io_event_common(CPUState *env)
>      flush_queued_work(env);
>  }
>  
> -static void qemu_wait_io_event(CPUState *env)
> +static void qemu_tcg_wait_io_event(void)
>  {
> +    CPUState *env;
> +
>      while (!tcg_has_work())
> -        qemu_cond_timedwait(env->halt_cond, &qemu_global_mutex, 1000);
> +        qemu_cond_timedwait(tcg_halt_cond, &qemu_global_mutex, 1000);
>  
>      qemu_mutex_unlock(&qemu_global_mutex);
>  
> @@ -417,7 +419,10 @@ static void qemu_wait_io_event(CPUState *env)
>      qemu_mutex_unlock(&qemu_fair_mutex);
>  
>      qemu_mutex_lock(&qemu_global_mutex);
> -    qemu_wait_io_event_common(env);
> +
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        qemu_wait_io_event_common(env);
> +    }
>  }
>  
>  static void qemu_kvm_eat_signal(CPUState *env, int timeout)
> @@ -502,7 +507,7 @@ static void *tcg_cpu_thread_fn(void *arg)
>  
>      while (1) {
>          tcg_cpu_exec();
> -        qemu_wait_io_event(cur_cpu);
> +        qemu_tcg_wait_io_event();
>      }
>  
>      return NULL;
> @@ -768,11 +773,11 @@ bool tcg_cpu_exec(void)
>  
>      if (next_cpu == NULL)
>          next_cpu = first_cpu;
> -    for (; next_cpu != NULL; next_cpu = next_cpu->next_cpu) {
> -        CPUState *env = cur_cpu = next_cpu;
> +    for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
> +        CPUState *env = next_cpu;
>  
>          qemu_clock_enable(vm_clock,
> -                          (cur_cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
> +                          (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
>  
>          if (qemu_alarm_pending())
>              break;
> @@ -787,6 +792,7 @@ bool tcg_cpu_exec(void)
>              break;
>          }
>      }
> +    exit_request = 0;
>      return tcg_has_work();
>  }
>  
> 
> I haven't looked into the breakpoint bug yet as performance (likely
> VCPU scheduling) in iothread mode is still suffering compared to classic
> mode, specifically when you go up with the number of VCPUs (try -smp 8).

Hmm, retesting this, I cannot reproduce the performance regression
anymore. Likely I confused something or had instrumentations applied.

Locking up still works "reliably". :(

Jan

> And there is some race that cause a lock up in qemu_mutex_lock_iothread
> after a while (the cpu_unlink_tb seems to race with the linking - just a
> guess so far).
> 
> Anyway, all this is getting terribly complex, hard to grasp, and maybe
> therefore fragile as well. Specifically the SMP round-robin scheduling
> looks like it requires a redesign for iothread mode (my feeling is it
> only works by chance ATM). Part of the issue may not be new, but could
> have been shadowed by the frequently interrupting I/O load when using a
> single thread. The rest seems to lack a bit a user base...
> 
> Jan
>

Patch

diff --git a/cpu-exec.c b/cpu-exec.c
index 026980a..9f4191d 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -236,9 +236,8 @@  int cpu_exec(CPUState *env1)
     asm("");
     env = env1;
 
-    if (exit_request) {
+    if (unlikely(exit_request)) {
         env->exit_request = 1;
-        exit_request = 0;
     }
 
 #if defined(TARGET_I386)
diff --git a/cpus.c b/cpus.c
index fcd0f09..2ce839d 100644
--- a/cpus.c
+++ b/cpus.c
@@ -39,7 +39,6 @@ 
 #define SIG_IPI SIGUSR1
 #endif
 
-static CPUState *cur_cpu;
 static CPUState *next_cpu;
 
 /***********************************************************/
@@ -331,6 +330,7 @@  int qemu_init_main_loop(void)
         return ret;
 
     qemu_cond_init(&qemu_pause_cond);
+    qemu_cond_init(&qemu_system_cond);
     qemu_mutex_init(&qemu_fair_mutex);
     qemu_mutex_init(&qemu_global_mutex);
     qemu_mutex_lock(&qemu_global_mutex);
@@ -401,10 +401,12 @@  static void qemu_wait_io_event_common(CPUState *env)
     flush_queued_work(env);
 }
 
-static void qemu_wait_io_event(CPUState *env)
+static void qemu_tcg_wait_io_event(void)
 {
+    CPUState *env;
+
     while (!tcg_has_work())
-        qemu_cond_timedwait(env->halt_cond, &qemu_global_mutex, 1000);
+        qemu_cond_timedwait(tcg_halt_cond, &qemu_global_mutex, 1000);
 
     qemu_mutex_unlock(&qemu_global_mutex);
 
@@ -417,7 +419,10 @@  static void qemu_wait_io_event(CPUState *env)
     qemu_mutex_unlock(&qemu_fair_mutex);
 
     qemu_mutex_lock(&qemu_global_mutex);
-    qemu_wait_io_event_common(env);
+
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        qemu_wait_io_event_common(env);
+    }
 }
 
 static void qemu_kvm_eat_signal(CPUState *env, int timeout)
@@ -502,7 +507,7 @@  static void *tcg_cpu_thread_fn(void *arg)
 
     while (1) {
         tcg_cpu_exec();
-        qemu_wait_io_event(cur_cpu);
+        qemu_tcg_wait_io_event();
     }
 
     return NULL;
@@ -768,11 +773,11 @@  bool tcg_cpu_exec(void)
 
     if (next_cpu == NULL)
         next_cpu = first_cpu;
-    for (; next_cpu != NULL; next_cpu = next_cpu->next_cpu) {
-        CPUState *env = cur_cpu = next_cpu;
+    for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
+        CPUState *env = next_cpu;
 
         qemu_clock_enable(vm_clock,
-                          (cur_cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
+                          (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
 
         if (qemu_alarm_pending())
             break;
@@ -787,6 +792,7 @@  bool tcg_cpu_exec(void)
             break;
         }
     }
+    exit_request = 0;
     return tcg_has_work();
 }