diff mbox

[v2] vnc: threaded server depends on io-thread

Message ID 1299676892-19246-1-git-send-email-corentin.chary@gmail.com
State New
Headers show

Commit Message

Corentin Chary March 9, 2011, 1:21 p.m. UTC
The threaded VNC servers messed up with QEMU fd handlers without
any kind of locking, and that can cause some nasty race conditions.

The IO-Thread provides appropriate locking primitives to avoid that.
This patch makes CONFIG_VNC_THREAD depends on CONFIG_IO_THREAD,
and add lock and unlock calls around the two faulty calls.

Thanks to Jan Kiszka for helping me solve this issue.

Cc: Jan Kiszka <jan.kiszka@web.de>
Signed-off-by: Corentin Chary <corentin.chary@gmail.com>
---
The previous patch was total crap, introduced race conditions,
and probably crashs on client disconnections.

 configure           |    9 +++++++++
 ui/vnc-jobs-async.c |   24 +++++++++++++++++++-----
 2 files changed, 28 insertions(+), 5 deletions(-)

Comments

Corentin Chary March 9, 2011, 1:42 p.m. UTC | #1
On Wed, Mar 9, 2011 at 1:21 PM, Corentin Chary <corentin.chary@gmail.com> wrote:
> The threaded VNC servers messed up with QEMU fd handlers without
> any kind of locking, and that can cause some nasty race conditions.
>
> The IO-Thread provides appropriate locking primitives to avoid that.
> This patch makes CONFIG_VNC_THREAD depends on CONFIG_IO_THREAD,
> and add lock and unlock calls around the two faulty calls.
>
> Thanks to Jan Kiszka for helping me solve this issue.
>
> Cc: Jan Kiszka <jan.kiszka@web.de>
> Signed-off-by: Corentin Chary <corentin.chary@gmail.com>
> ---
> The previous patch was total crap, introduced race conditions,
> and probably crashs on client disconnections.

Forget that one too, also deadlock on vnc_jobs_join(). I'll need some
more time to fix that.
Paolo Bonzini March 9, 2011, 1:51 p.m. UTC | #2
On 03/09/2011 02:21 PM, Corentin Chary wrote:
> The threaded VNC servers messed up with QEMU fd handlers without
> any kind of locking, and that can cause some nasty race conditions.
>
> The IO-Thread provides appropriate locking primitives to avoid that.
> This patch makes CONFIG_VNC_THREAD depends on CONFIG_IO_THREAD,
> and add lock and unlock calls around the two faulty calls.
>
> Thanks to Jan Kiszka for helping me solve this issue.
>
> Cc: Jan Kiszka<jan.kiszka@web.de>
> Signed-off-by: Corentin Chary<corentin.chary@gmail.com>
> ---
> The previous patch was total crap, introduced race conditions,
> and probably crashs on client disconnections.
>
>   configure           |    9 +++++++++
>   ui/vnc-jobs-async.c |   24 +++++++++++++++++++-----
>   2 files changed, 28 insertions(+), 5 deletions(-)
>
> diff --git a/configure b/configure
> index 5513d3e..c8c1ac1 100755
> --- a/configure
> +++ b/configure
> @@ -2455,6 +2455,15 @@ if test \( "$cpu" = "i386" -o "$cpu" = "x86_64" \) -a \
>     roms="optionrom"
>   fi
>
> +# VNC Thread depends on IO Thread
> +if test "$vnc_thread" = "yes" -a "$io_thread" != "yes"; then
> +  echo
> +  echo "ERROR: VNC thread depends on IO thread which isn't enabled."
> +  echo "Please use --enable-io-thread if you want to enable it."
> +  echo
> +  exit 1
> +fi
> +
>
>   echo "Install prefix    $prefix"
>   echo "BIOS directory    `eval echo $datadir`"
> diff --git a/ui/vnc-jobs-async.c b/ui/vnc-jobs-async.c
> index f596247..d0c6f61 100644
> --- a/ui/vnc-jobs-async.c
> +++ b/ui/vnc-jobs-async.c
> @@ -190,6 +190,18 @@ static void vnc_async_encoding_end(VncState *orig, VncState *local)
>       queue->buffer = local->output;
>   }
>
> +static void vnc_worker_lock_output(VncState *vs)
> +{
> +    qemu_mutex_lock_iothread();
> +    vnc_lock_output(vs);
> +}
> +
> +static void vnc_worker_unlock_output(VncState *vs)
> +{
> +    vnc_unlock_output(vs);
> +    qemu_mutex_unlock_iothread();
> +}
> +
>   static int vnc_worker_thread_loop(VncJobQueue *queue)
>   {
>       VncJob *job;
> @@ -211,11 +223,11 @@ static int vnc_worker_thread_loop(VncJobQueue *queue)
>           return -1;
>       }
>
> -    vnc_lock_output(job->vs);
> +    vnc_worker_lock_output(job->vs);
>       if (job->vs->csock == -1 || job->vs->abort == true) {
>           goto disconnected;
>       }
> -    vnc_unlock_output(job->vs);
> +    vnc_worker_unlock_output(job->vs);
>
>       /* Make a local copy of vs and switch output buffers */
>       vnc_async_encoding_start(job->vs,&vs);
> @@ -236,7 +248,7 @@ static int vnc_worker_thread_loop(VncJobQueue *queue)
>               /* output mutex must be locked before going to
>                * disconnected:
>                */
> -            vnc_lock_output(job->vs);
> +            vnc_worker_lock_output(job->vs);
>               goto disconnected;
>           }
>
> @@ -255,7 +267,7 @@ static int vnc_worker_thread_loop(VncJobQueue *queue)
>       vs.output.buffer[saved_offset + 1] = n_rectangles&  0xFF;
>
>       /* Switch back buffers */
> -    vnc_lock_output(job->vs);
> +    vnc_worker_lock_output(job->vs);
>       if (job->vs->csock == -1) {
>           goto disconnected;
>       }
> @@ -266,10 +278,12 @@ disconnected:
>       /* Copy persistent encoding data */
>       vnc_async_encoding_end(job->vs,&vs);
>       flush = (job->vs->csock != -1&&  job->vs->abort != true);
> -    vnc_unlock_output(job->vs);
> +    vnc_worker_unlock_output(job->vs);
>
>       if (flush) {
> +        qemu_mutex_lock_iothread();
>           vnc_flush(job->vs);
> +        qemu_mutex_unlock_iothread();
>       }
>
>       vnc_lock_queue(queue);

Acked-by: Paolo Bonzini <pbonzini@redhat.com> for stable.

For 0.15, I believe an iohandler-list lock is a better solution.

Paolo
Corentin Chary March 9, 2011, 1:59 p.m. UTC | #3
On Wed, Mar 9, 2011 at 1:51 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> On 03/09/2011 02:21 PM, Corentin Chary wrote:
>>
>> The threaded VNC servers messed up with QEMU fd handlers without
>> any kind of locking, and that can cause some nasty race conditions.
>>
>> The IO-Thread provides appropriate locking primitives to avoid that.
>> This patch makes CONFIG_VNC_THREAD depends on CONFIG_IO_THREAD,
>> and add lock and unlock calls around the two faulty calls.
>>
>> Thanks to Jan Kiszka for helping me solve this issue.
>>
>> Cc: Jan Kiszka<jan.kiszka@web.de>
>> Signed-off-by: Corentin Chary<corentin.chary@gmail.com>
>> ---
>> The previous patch was total crap, introduced race conditions,
>> and probably crashs on client disconnections.
>>
>>  configure           |    9 +++++++++
>>  ui/vnc-jobs-async.c |   24 +++++++++++++++++++-----
>>  2 files changed, 28 insertions(+), 5 deletions(-)
>>
>> diff --git a/configure b/configure
>> index 5513d3e..c8c1ac1 100755
>> --- a/configure
>> +++ b/configure
>> @@ -2455,6 +2455,15 @@ if test \( "$cpu" = "i386" -o "$cpu" = "x86_64" \)
>> -a \
>>    roms="optionrom"
>>  fi
>>
>> +# VNC Thread depends on IO Thread
>> +if test "$vnc_thread" = "yes" -a "$io_thread" != "yes"; then
>> +  echo
>> +  echo "ERROR: VNC thread depends on IO thread which isn't enabled."
>> +  echo "Please use --enable-io-thread if you want to enable it."
>> +  echo
>> +  exit 1
>> +fi
>> +
>>
>>  echo "Install prefix    $prefix"
>>  echo "BIOS directory    `eval echo $datadir`"
>> diff --git a/ui/vnc-jobs-async.c b/ui/vnc-jobs-async.c
>> index f596247..d0c6f61 100644
>> --- a/ui/vnc-jobs-async.c
>> +++ b/ui/vnc-jobs-async.c
>> @@ -190,6 +190,18 @@ static void vnc_async_encoding_end(VncState *orig,
>> VncState *local)
>>      queue->buffer = local->output;
>>  }
>>
>> +static void vnc_worker_lock_output(VncState *vs)
>> +{
>> +    qemu_mutex_lock_iothread();
>> +    vnc_lock_output(vs);
>> +}
>> +
>> +static void vnc_worker_unlock_output(VncState *vs)
>> +{
>> +    vnc_unlock_output(vs);
>> +    qemu_mutex_unlock_iothread();
>> +}
>> +
>>  static int vnc_worker_thread_loop(VncJobQueue *queue)
>>  {
>>      VncJob *job;
>> @@ -211,11 +223,11 @@ static int vnc_worker_thread_loop(VncJobQueue
>> *queue)
>>          return -1;
>>      }
>>
>> -    vnc_lock_output(job->vs);
>> +    vnc_worker_lock_output(job->vs);
>>      if (job->vs->csock == -1 || job->vs->abort == true) {
>>          goto disconnected;
>>      }
>> -    vnc_unlock_output(job->vs);
>> +    vnc_worker_unlock_output(job->vs);
>>
>>      /* Make a local copy of vs and switch output buffers */
>>      vnc_async_encoding_start(job->vs,&vs);
>> @@ -236,7 +248,7 @@ static int vnc_worker_thread_loop(VncJobQueue *queue)
>>              /* output mutex must be locked before going to
>>               * disconnected:
>>               */
>> -            vnc_lock_output(job->vs);
>> +            vnc_worker_lock_output(job->vs);
>>              goto disconnected;
>>          }
>>
>> @@ -255,7 +267,7 @@ static int vnc_worker_thread_loop(VncJobQueue *queue)
>>      vs.output.buffer[saved_offset + 1] = n_rectangles&  0xFF;
>>
>>      /* Switch back buffers */
>> -    vnc_lock_output(job->vs);
>> +    vnc_worker_lock_output(job->vs);
>>      if (job->vs->csock == -1) {
>>          goto disconnected;
>>      }
>> @@ -266,10 +278,12 @@ disconnected:
>>      /* Copy persistent encoding data */
>>      vnc_async_encoding_end(job->vs,&vs);
>>      flush = (job->vs->csock != -1&&  job->vs->abort != true);
>> -    vnc_unlock_output(job->vs);
>> +    vnc_worker_unlock_output(job->vs);
>>
>>      if (flush) {
>> +        qemu_mutex_lock_iothread();
>>          vnc_flush(job->vs);
>> +        qemu_mutex_unlock_iothread();
>>      }
>>
>>      vnc_lock_queue(queue);
>
> Acked-by: Paolo Bonzini <pbonzini@redhat.com> for stable.

Nacked-by: Corentin Chary <corentin.chary@gmail.com>

> For 0.15, I believe an iohandler-list lock is a better solution.

I believe that's the only solution. Looks at that deadlock:

(gdb) bt
#0  0x00007f70c067ad74 in __lll_lock_wait () from /lib/libpthread.so.0
#1  0x00007f70c0676219 in _L_lock_508 () from /lib/libpthread.so.0
#2  0x00007f70c067603b in pthread_mutex_lock () from /lib/libpthread.so.0
#3  0x00000000004c7559 in qemu_mutex_lock (mutex=0x10f4340) at qemu-thread.c:50
#4  0x00000000005644ef in main_loop_wait (nonblocking=<value optimized
out>) at /home/iksaif/dev/qemu/vl.c:1374
#5  0x00000000005653a8 in main_loop (argc=<value optimized out>,
argv=<value optimized out>, envp=<value optimized out>) at
/home/iksaif/dev/qemu/vl.c:1437
#6  main (argc=<value optimized out>, argv=<value optimized out>,
envp=<value optimized out>) at /home/iksaif/dev/qemu/vl.c:3148
(gdb) t 2
[Switching to thread 2 (Thread 0x7f709262b710 (LWP 19546))]#0
0x00007f70c067ad74 in __lll_lock_wait () from /lib/libpthread.so.0
(gdb) bt
#0  0x00007f70c067ad74 in __lll_lock_wait () from /lib/libpthread.so.0
#1  0x00007f70c0676219 in _L_lock_508 () from /lib/libpthread.so.0
#2  0x00007f70c067603b in pthread_mutex_lock () from /lib/libpthread.so.0
#3  0x00000000004c7559 in qemu_mutex_lock (mutex=0x10f4340) at qemu-thread.c:50
#4  0x00000000004c65de in vnc_worker_thread_loop (queue=0x33561d0) at
ui/vnc-jobs-async.c:254
#5  0x00000000004c6730 in vnc_worker_thread (arg=0x33561d0) at
ui/vnc-jobs-async.c:306
#6  0x00007f70c0673904 in start_thread () from /lib/libpthread.so.0
#7  0x00007f70be5cf1dd in clone () from /lib/libc.so.6
(gdb) t 3
[Switching to thread 3 (Thread 0x7f70b38ff710 (LWP 19545))]#0
0x00007f70c067829c in pthread_cond_wait () from /lib/libpthread.so.0
(gdb) bt
#0  0x00007f70c067829c in pthread_cond_wait () from /lib/libpthread.so.0
#1  0x00000000004c7239 in qemu_cond_wait (cond=0x33561d4, mutex=0x80)
at qemu-thread.c:133
#2  0x00000000004c617b in vnc_jobs_join (vs=0x35d9c10) at
ui/vnc-jobs-async.c:155
#3  0x00000000004afefa in vnc_update_client_sync (ds=<value optimized
out>, src_x=<value optimized out>, src_y=<value optimized out>,
dst_x=<value optimized out>, dst_y=<value optimized out>, w=<value
optimized out>, h=1) at ui/vnc.c:819
#4  vnc_dpy_copy (ds=<value optimized out>, src_x=<value optimized
out>, src_y=<value optimized out>, dst_x=<value optimized out>,
dst_y=<value optimized out>, w=<value optimized out>, h=1) at
ui/vnc.c:692
#5  0x000000000046b5e1 in dpy_copy (ds=0x3329d40, src_x=<value
optimized out>, src_y=<value optimized out>, dst_x=129, dst_y=377,
w=2, h=1) at console.h:273
#6  qemu_console_copy (ds=0x3329d40, src_x=<value optimized out>,
src_y=<value optimized out>, dst_x=129, dst_y=377, w=2, h=1) at
console.c:1579
#7  0x00000000005d6159 in cirrus_do_copy (s=0x3316ea8) at
/home/iksaif/dev/qemu/hw/cirrus_vga.c:729
#8  cirrus_bitblt_videotovideo_copy (s=0x3316ea8) at
/home/iksaif/dev/qemu/hw/cirrus_vga.c:747
#9  cirrus_bitblt_videotovideo (s=0x3316ea8) at
/home/iksaif/dev/qemu/hw/cirrus_vga.c:869
#10 cirrus_bitblt_start (s=0x3316ea8) at
/home/iksaif/dev/qemu/hw/cirrus_vga.c:1010
#11 0x00000000005d8b67 in cirrus_mmio_writel (opaque=0x33561d4,
addr=128, val=4294967042) at
/home/iksaif/dev/qemu/hw/cirrus_vga.c:2819
#12 0x00000000004e04d8 in cpu_physical_memory_rw (addr=4060086592,
buf=0x7f70b30fc028 "\002\377\377\377", len=4, is_write=1) at
/home/iksaif/dev/qemu/exec.c:3670
#13 0x000000000042c845 in kvm_cpu_exec (env=0x30f8dd0) at
/home/iksaif/dev/qemu/kvm-all.c:954
#14 0x000000000040c75e in qemu_kvm_cpu_thread_fn (arg=0x30f8dd0) at
/home/iksaif/dev/qemu/cpus.c:832
#15 0x00007f70c0673904 in start_thread () from /lib/libpthread.so.0
#16 0x00007f70be5cf1dd in clone () from /lib/libc.so.6
(gdb) t 4
[Switching to thread 4 (Thread 0x7f70b4103710 (LWP 19544))]#0
0x00007f70c067ad74 in __lll_lock_wait () from /lib/libpthread.so.0
(gdb) bt
#0  0x00007f70c067ad74 in __lll_lock_wait () from /lib/libpthread.so.0
#1  0x00007f70c0676219 in _L_lock_508 () from /lib/libpthread.so.0
#2  0x00007f70c067603b in pthread_mutex_lock () from /lib/libpthread.so.0
#3  0x00000000004c7559 in qemu_mutex_lock (mutex=0x10f4340) at qemu-thread.c:50
#4  0x000000000042c703 in kvm_cpu_exec (env=0x30e15f0) at
/home/iksaif/dev/qemu/kvm-all.c:924
#5  0x000000000040c75e in qemu_kvm_cpu_thread_fn (arg=0x30e15f0) at
/home/iksaif/dev/qemu/cpus.c:832
#6  0x00007f70c0673904 in start_thread () from /lib/libpthread.so.0
#7  0x00007f70be5cf1dd in clone () from /lib/libc.so.6

I currently don't see any solution for that one using iothread lock:
- vnc_dpy_copy can be called with iothread locked
- vnc_dpy_copy needs to wait for vnc-thread to finish is work before
being able to do anything
- vnc-thread need to lock iothread to finish its work
diff mbox

Patch

diff --git a/configure b/configure
index 5513d3e..c8c1ac1 100755
--- a/configure
+++ b/configure
@@ -2455,6 +2455,15 @@  if test \( "$cpu" = "i386" -o "$cpu" = "x86_64" \) -a \
   roms="optionrom"
 fi
 
+# VNC Thread depends on IO Thread
+if test "$vnc_thread" = "yes" -a "$io_thread" != "yes"; then
+  echo
+  echo "ERROR: VNC thread depends on IO thread which isn't enabled."
+  echo "Please use --enable-io-thread if you want to enable it."
+  echo
+  exit 1
+fi
+
 
 echo "Install prefix    $prefix"
 echo "BIOS directory    `eval echo $datadir`"
diff --git a/ui/vnc-jobs-async.c b/ui/vnc-jobs-async.c
index f596247..d0c6f61 100644
--- a/ui/vnc-jobs-async.c
+++ b/ui/vnc-jobs-async.c
@@ -190,6 +190,18 @@  static void vnc_async_encoding_end(VncState *orig, VncState *local)
     queue->buffer = local->output;
 }
 
+static void vnc_worker_lock_output(VncState *vs)
+{
+    qemu_mutex_lock_iothread();
+    vnc_lock_output(vs);
+}
+
+static void vnc_worker_unlock_output(VncState *vs)
+{
+    vnc_unlock_output(vs);
+    qemu_mutex_unlock_iothread();
+}
+
 static int vnc_worker_thread_loop(VncJobQueue *queue)
 {
     VncJob *job;
@@ -211,11 +223,11 @@  static int vnc_worker_thread_loop(VncJobQueue *queue)
         return -1;
     }
 
-    vnc_lock_output(job->vs);
+    vnc_worker_lock_output(job->vs);
     if (job->vs->csock == -1 || job->vs->abort == true) {
         goto disconnected;
     }
-    vnc_unlock_output(job->vs);
+    vnc_worker_unlock_output(job->vs);
 
     /* Make a local copy of vs and switch output buffers */
     vnc_async_encoding_start(job->vs, &vs);
@@ -236,7 +248,7 @@  static int vnc_worker_thread_loop(VncJobQueue *queue)
             /* output mutex must be locked before going to
              * disconnected:
              */
-            vnc_lock_output(job->vs);
+            vnc_worker_lock_output(job->vs);
             goto disconnected;
         }
 
@@ -255,7 +267,7 @@  static int vnc_worker_thread_loop(VncJobQueue *queue)
     vs.output.buffer[saved_offset + 1] = n_rectangles & 0xFF;
 
     /* Switch back buffers */
-    vnc_lock_output(job->vs);
+    vnc_worker_lock_output(job->vs);
     if (job->vs->csock == -1) {
         goto disconnected;
     }
@@ -266,10 +278,12 @@  disconnected:
     /* Copy persistent encoding data */
     vnc_async_encoding_end(job->vs, &vs);
     flush = (job->vs->csock != -1 && job->vs->abort != true);
-    vnc_unlock_output(job->vs);
+    vnc_worker_unlock_output(job->vs);
 
     if (flush) {
+        qemu_mutex_lock_iothread();
         vnc_flush(job->vs);
+        qemu_mutex_unlock_iothread();
     }
 
     vnc_lock_queue(queue);