diff mbox

kvm: Prevent dynticks race condition for !CONFIG_IOTHREAD

Message ID 1296034784-6513-1-git-send-email-stefanha@linux.vnet.ibm.com
State New
Headers show

Commit Message

Stefan Hajnoczi Jan. 26, 2011, 9:39 a.m. UTC
The dynticks timer arranges for SIGALRM to be raised when the next
pending timer expires.  When building with !CONFIG_IOTHREAD, we need to
check whether a request to exit the vcpu is pending before re-entering
the guest.

Unfortunately there is a race condition here because SIGALRM may be
raised after we check for an exit request but before re-entering the
guest.  In that case the guest is re-entered without the dynticks timer
being rearmed.

This results in temporary loss of timers until some other event forces a
vmexit.  In the case of a CPU-bound guest it can cause softlockups.

This patch blocks SIGALRM before checking for an exit request and uses
KVM's sigmask support to atomically unblock it when entering the guest,
thereby making the exit request check safe.

Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 cpus.c    |   17 ++++++++++++++++-
 kvm-all.c |   16 ++++++++++++++++
 2 files changed, 32 insertions(+), 1 deletions(-)

Does not affect qemu-kvm.git.  Still worth having in qemu.git so we don't get
odd behavior when building without --enable-io-thread.

Comments

Jan Kiszka Jan. 26, 2011, 1:12 p.m. UTC | #1
On 2011-01-26 10:39, Stefan Hajnoczi wrote:
> The dynticks timer arranges for SIGALRM to be raised when the next
> pending timer expires.  When building with !CONFIG_IOTHREAD, we need to
> check whether a request to exit the vcpu is pending before re-entering
> the guest.
> 
> Unfortunately there is a race condition here because SIGALRM may be
> raised after we check for an exit request but before re-entering the
> guest.  In that case the guest is re-entered without the dynticks timer
> being rearmed.
> 
> This results in temporary loss of timers until some other event forces a
> vmexit.  In the case of a CPU-bound guest it can cause softlockups.
> 
> This patch blocks SIGALRM before checking for an exit request and uses
> KVM's sigmask support to atomically unblock it when entering the guest,
> thereby making the exit request check safe.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
> ---
>  cpus.c    |   17 ++++++++++++++++-
>  kvm-all.c |   16 ++++++++++++++++
>  2 files changed, 32 insertions(+), 1 deletions(-)
> 
> Does not affect qemu-kvm.git.  Still worth having in qemu.git so we don't get
> odd behavior when building without --enable-io-thread.
> 
> diff --git a/cpus.c b/cpus.c
> index 0309189..59dbfab 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -262,14 +262,29 @@ void qemu_main_loop_start(void)
>  {
>  }
>  
> +static void kvm_init_sigmask(CPUState *env)
> +{
> +    int r;
> +    sigset_t set;
> +
> +    pthread_sigmask(SIG_SETMASK, NULL, &set);
> +    r = kvm_set_signal_mask(env, &set);
> +    if (r) {
> +        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(r));
> +        exit(1);
> +    }
> +}
> +
>  void qemu_init_vcpu(void *_env)
>  {
>      CPUState *env = _env;
>  
>      env->nr_cores = smp_cores;
>      env->nr_threads = smp_threads;
> -    if (kvm_enabled())
> +    if (kvm_enabled()) {
>          kvm_init_vcpu(env);
> +        kvm_init_sigmask(env);
> +    }
>      return;
>  }
>  
> diff --git a/kvm-all.c b/kvm-all.c
> index 255b6fa..9cc2553 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -890,19 +890,31 @@ int kvm_cpu_exec(CPUState *env)
>  {
>      struct kvm_run *run = env->kvm_run;
>      int ret;
> +#ifndef CONFIG_IOTHREAD
> +    sigset_t set, old_set;
> +
> +    sigemptyset(&set);
> +    sigaddset(&set, SIGALRM);
> +#endif
>  
>      DPRINTF("kvm_cpu_exec()\n");
>  
>      do {
>  #ifndef CONFIG_IOTHREAD
> +        pthread_sigmask(SIG_BLOCK, &set, &old_set);
> +
>          if (env->exit_request) {
>              DPRINTF("interrupt exit requested\n");
> +            pthread_sigmask(SIG_SETMASK, &old_set, NULL);
>              ret = 0;
>              break;
>          }
>  #endif
>  
>          if (kvm_arch_process_irqchip_events(env)) {
> +#ifndef CONFIG_IOTHREAD
> +            pthread_sigmask(SIG_SETMASK, &old_set, NULL);
> +#endif
>              ret = 0;
>              break;
>          }
> @@ -920,6 +932,10 @@ int kvm_cpu_exec(CPUState *env)
>          cpu_single_env = env;
>          kvm_arch_post_run(env, run);
>  
> +#ifndef CONFIG_IOTHREAD
> +        pthread_sigmask(SIG_SETMASK, &old_set, NULL);
> +#endif
> +
>          if (ret == -EINTR || ret == -EAGAIN) {
>              cpu_exit(env);
>              DPRINTF("io window exit\n");

Good catch, but the code should be organized differently, probably with
the help of signalfd which we need for SIGBUS anyway. I'm currently
reworking signaling bits. I will pick this up and integrate a
corresponding solution in my queue. Will keep you posted!

Jan
Jan Kiszka Jan. 26, 2011, 1:56 p.m. UTC | #2
On 2011-01-26 14:12, Jan Kiszka wrote:
> On 2011-01-26 10:39, Stefan Hajnoczi wrote:
>> The dynticks timer arranges for SIGALRM to be raised when the next
>> pending timer expires.  When building with !CONFIG_IOTHREAD, we need to
>> check whether a request to exit the vcpu is pending before re-entering
>> the guest.
>>
>> Unfortunately there is a race condition here because SIGALRM may be
>> raised after we check for an exit request but before re-entering the
>> guest.  In that case the guest is re-entered without the dynticks timer
>> being rearmed.
>>
>> This results in temporary loss of timers until some other event forces a
>> vmexit.  In the case of a CPU-bound guest it can cause softlockups.
>>
>> This patch blocks SIGALRM before checking for an exit request and uses
>> KVM's sigmask support to atomically unblock it when entering the guest,
>> thereby making the exit request check safe.
>>
>> Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
>> ---
>>  cpus.c    |   17 ++++++++++++++++-
>>  kvm-all.c |   16 ++++++++++++++++
>>  2 files changed, 32 insertions(+), 1 deletions(-)
>>
>> Does not affect qemu-kvm.git.  Still worth having in qemu.git so we don't get
>> odd behavior when building without --enable-io-thread.
>>
>> diff --git a/cpus.c b/cpus.c
>> index 0309189..59dbfab 100644
>> --- a/cpus.c
>> +++ b/cpus.c
>> @@ -262,14 +262,29 @@ void qemu_main_loop_start(void)
>>  {
>>  }
>>  
>> +static void kvm_init_sigmask(CPUState *env)
>> +{
>> +    int r;
>> +    sigset_t set;
>> +
>> +    pthread_sigmask(SIG_SETMASK, NULL, &set);
>> +    r = kvm_set_signal_mask(env, &set);
>> +    if (r) {
>> +        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(r));
>> +        exit(1);
>> +    }
>> +}
>> +
>>  void qemu_init_vcpu(void *_env)
>>  {
>>      CPUState *env = _env;
>>  
>>      env->nr_cores = smp_cores;
>>      env->nr_threads = smp_threads;
>> -    if (kvm_enabled())
>> +    if (kvm_enabled()) {
>>          kvm_init_vcpu(env);
>> +        kvm_init_sigmask(env);
>> +    }
>>      return;
>>  }
>>  
>> diff --git a/kvm-all.c b/kvm-all.c
>> index 255b6fa..9cc2553 100644
>> --- a/kvm-all.c
>> +++ b/kvm-all.c
>> @@ -890,19 +890,31 @@ int kvm_cpu_exec(CPUState *env)
>>  {
>>      struct kvm_run *run = env->kvm_run;
>>      int ret;
>> +#ifndef CONFIG_IOTHREAD
>> +    sigset_t set, old_set;
>> +
>> +    sigemptyset(&set);
>> +    sigaddset(&set, SIGALRM);
>> +#endif
>>  
>>      DPRINTF("kvm_cpu_exec()\n");
>>  
>>      do {
>>  #ifndef CONFIG_IOTHREAD
>> +        pthread_sigmask(SIG_BLOCK, &set, &old_set);
>> +
>>          if (env->exit_request) {
>>              DPRINTF("interrupt exit requested\n");
>> +            pthread_sigmask(SIG_SETMASK, &old_set, NULL);
>>              ret = 0;
>>              break;
>>          }
>>  #endif
>>  
>>          if (kvm_arch_process_irqchip_events(env)) {
>> +#ifndef CONFIG_IOTHREAD
>> +            pthread_sigmask(SIG_SETMASK, &old_set, NULL);
>> +#endif
>>              ret = 0;
>>              break;
>>          }
>> @@ -920,6 +932,10 @@ int kvm_cpu_exec(CPUState *env)
>>          cpu_single_env = env;
>>          kvm_arch_post_run(env, run);
>>  
>> +#ifndef CONFIG_IOTHREAD
>> +        pthread_sigmask(SIG_SETMASK, &old_set, NULL);
>> +#endif
>> +
>>          if (ret == -EINTR || ret == -EAGAIN) {
>>              cpu_exit(env);
>>              DPRINTF("io window exit\n");
> 
> Good catch, but the code should be organized differently, probably with
> the help of signalfd which we need for SIGBUS anyway. I'm currently
> reworking signaling bits. I will pick this up and integrate a
> corresponding solution in my queue. Will keep you posted!
> 

Looks like this should also include SIGIO in case hpet is selected as
host timer.

Jan
Stefan Hajnoczi Jan. 26, 2011, 2:01 p.m. UTC | #3
On Wed, Jan 26, 2011 at 1:56 PM, Jan Kiszka <jan.kiszka@siemens.com> wrote:
> Looks like this should also include SIGIO in case hpet is selected as
> host timer.

Yeah that can't hurt although since hpet is periodic we don't risk
forgetting to rearm the timer.

Can you explain the signalfd idea a little more?  I'm not sure that
model would work for !CONFIG_IOTHREAD.  Maybe if QEMU itself uses
signalfd but KVM sigmask is used to unblock the signals during guest
code execution (and a solution for TCG too).

Stefan
Jan Kiszka Jan. 26, 2011, 2:25 p.m. UTC | #4
On 2011-01-26 15:01, Stefan Hajnoczi wrote:
> On Wed, Jan 26, 2011 at 1:56 PM, Jan Kiszka <jan.kiszka@siemens.com> wrote:
>> Looks like this should also include SIGIO in case hpet is selected as
>> host timer.
> 
> Yeah that can't hurt although since hpet is periodic we don't risk
> forgetting to rearm the timer.
> 
> Can you explain the signalfd idea a little more?  I'm not sure that
> model would work for !CONFIG_IOTHREAD.  Maybe if QEMU itself uses
> signalfd but KVM sigmask is used to unblock the signals during guest
> code execution (and a solution for TCG too).

Will push a version I would like you to test as well in a few minutes.
This is already the concept: As under IOTHREAD, we will block SIGIO and
SIGALARM the whole time - except during KVM_RUN - and catch those
signals via qemu_signalfd in the main select loop. Of course, this
includes setting up the kvm signal mask properly as well (was already
part of my previously posted series for other reasons).

Jan
diff mbox

Patch

diff --git a/cpus.c b/cpus.c
index 0309189..59dbfab 100644
--- a/cpus.c
+++ b/cpus.c
@@ -262,14 +262,29 @@  void qemu_main_loop_start(void)
 {
 }
 
+static void kvm_init_sigmask(CPUState *env)
+{
+    int r;
+    sigset_t set;
+
+    pthread_sigmask(SIG_SETMASK, NULL, &set);
+    r = kvm_set_signal_mask(env, &set);
+    if (r) {
+        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(r));
+        exit(1);
+    }
+}
+
 void qemu_init_vcpu(void *_env)
 {
     CPUState *env = _env;
 
     env->nr_cores = smp_cores;
     env->nr_threads = smp_threads;
-    if (kvm_enabled())
+    if (kvm_enabled()) {
         kvm_init_vcpu(env);
+        kvm_init_sigmask(env);
+    }
     return;
 }
 
diff --git a/kvm-all.c b/kvm-all.c
index 255b6fa..9cc2553 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -890,19 +890,31 @@  int kvm_cpu_exec(CPUState *env)
 {
     struct kvm_run *run = env->kvm_run;
     int ret;
+#ifndef CONFIG_IOTHREAD
+    sigset_t set, old_set;
+
+    sigemptyset(&set);
+    sigaddset(&set, SIGALRM);
+#endif
 
     DPRINTF("kvm_cpu_exec()\n");
 
     do {
 #ifndef CONFIG_IOTHREAD
+        pthread_sigmask(SIG_BLOCK, &set, &old_set);
+
         if (env->exit_request) {
             DPRINTF("interrupt exit requested\n");
+            pthread_sigmask(SIG_SETMASK, &old_set, NULL);
             ret = 0;
             break;
         }
 #endif
 
         if (kvm_arch_process_irqchip_events(env)) {
+#ifndef CONFIG_IOTHREAD
+            pthread_sigmask(SIG_SETMASK, &old_set, NULL);
+#endif
             ret = 0;
             break;
         }
@@ -920,6 +932,10 @@  int kvm_cpu_exec(CPUState *env)
         cpu_single_env = env;
         kvm_arch_post_run(env, run);
 
+#ifndef CONFIG_IOTHREAD
+        pthread_sigmask(SIG_SETMASK, &old_set, NULL);
+#endif
+
         if (ret == -EINTR || ret == -EAGAIN) {
             cpu_exit(env);
             DPRINTF("io window exit\n");