Patchwork [0/3] another round of qemu-char fixes

login
register
mail settings
Submitter Paolo Bonzini
Date April 18, 2013, 1:11 p.m.
Message ID <516FF0E9.9080108@redhat.com>
Download mbox | patch
Permalink /patch/237638/
State New
Headers show

Comments

Paolo Bonzini - April 18, 2013, 1:11 p.m.
Il 18/04/2013 14:38, Paolo Bonzini ha scritto:
> Il 18/04/2013 08:14, Gerd Hoffmann ha scritto:
>> Thread 1 (Thread 0x7f9038188980 (LWP 27849)):
>> ---Type <return> to continue, or q <return> to quit---
>> #0  __lll_lock_wait () at
>> ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
>> #1  0x00007f90366a9388 in _L_lock_854 () from /lib64/libpthread.so.0
>> #2  0x00007f90366a9257 in __pthread_mutex_lock (mutex=0x7f903abb1538) at
>> pthread_mutex_lock.c:61
>> #3  0x00007f9037903c37 in ?? () from /lib64/libglib-2.0.so.0
>> #4  0x00007f90383c5d96 in io_watch_poll_finalize (source=<value
>> optimized out>)
>>     at /home/kraxel/projects/qemu/qemu-char.c:648
> 
> Hmm, this seems to be recursive locking.  It sounded unlikely, but then
> googling for "glib gsource finalize unlock" led to this:
> 
> https://bugs.launchpad.net/ubuntu/+source/glib2.0/+bug/887946
> 
> and this:
> 
> https://mail.gnome.org/archives/commits-list/2010-November/msg01816.html
> 
> and this:
> 
> https://bugzilla.gnome.org/show_bug.cgi?id=586432
> https://bugzilla.gnome.org/show_bug.cgi?id=626702
> 
> Comment 6 of the latter bug says that finalize-inside-finalize is in
> fact broken without this fix.  If this is RHEL6, you or Amit should file
> a bug.

Ok, anyway this is not caused by this series.  It is pre-existing and
likely it's what Amit was observing as well.

Filing a RHEL6 bug is probably the right thing to do, but we have to
work around it.  Can you test this:

---------------- 8< --------------------

From 2db7ad3c07b048493b1bc38a5cc644076c0f68c1 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 18 Apr 2013 15:05:35 +0200
Subject: [PATCH] qemu-char: do not operate on sources from finalize callbacks

Due to a glib bug, the finalize callback is called with the GMainContext
lock held.  Thus, any operation on the context from the callback will
cause recursive locking and a deadlock.

The fix for this is somewhat ugly, because we need to forego polymorphism
and implement our own function to destroy IOWatchPoll sources.  The
right thing to do here would be child sources, but we support older
glib versions that do not have them.  Not coincidentially, glib developers
found and fixed the deadlock as part of implementing child sources.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 qemu-char.c | 54 +++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 15 deletions(-)

Patch

diff --git a/qemu-char.c b/qemu-char.c
index 6e897da..ffd08e6 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -643,12 +643,17 @@  static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback,
 
 static void io_watch_poll_finalize(GSource *source)
 {
+    /* Due to a glib bug, removing the last reference to a source
+     * inside a finalize callback causes recursive locking (and a
+     * deadlock).  For this reason, we call io_remove_watch_poll
+     * to remove this source.  At this point, iwp->src must
+     * be NULL, or we would leak it.
+     *
+     * This would be solved much more elegantly by child sources,
+     * but we support older glib versions that do not have them.
+     */
     IOWatchPoll *iwp = io_watch_poll_from_source(source);
-    if (iwp->src) {
-        g_source_destroy(iwp->src);
-        g_source_unref(iwp->src);
-        iwp->src = NULL;
-    }
+    assert(iwp->src == NULL);
 }
 
 static GSourceFuncs io_watch_poll_funcs = {
@@ -679,6 +684,25 @@  static guint io_add_watch_poll(GIOChannel *channel,
     return tag;
 }
 
+static void io_remove_watch_poll(guint tag)
+{
+    GSource *source;
+    IOWatchPoll *iwp;
+
+    g_return_if_fail (tag > 0);
+
+    source = g_main_context_find_source_by_id(NULL, tag);
+    g_return_if_fail (source != NULL);
+
+    iwp = io_watch_poll_from_source(source);
+    if (iwp->src) {
+        g_source_destroy(iwp->src);
+        g_source_unref(iwp->src);
+        iwp->src = NULL;
+    }
+    g_source_destroy(&iwp->parent);
+}
+
 #ifndef _WIN32
 static GIOChannel *io_channel_from_fd(int fd)
 {
@@ -788,7 +812,7 @@  static gboolean fd_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
                                      len, &bytes_read, NULL);
     if (status == G_IO_STATUS_EOF) {
         if (s->fd_in_tag) {
-            g_source_remove(s->fd_in_tag);
+            io_remove_watch_poll(s->fd_in_tag);
             s->fd_in_tag = 0;
         }
         qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
@@ -821,7 +845,7 @@  static void fd_chr_update_read_handler(CharDriverState *chr)
     FDCharDriver *s = chr->opaque;
 
     if (s->fd_in_tag) {
-        g_source_remove(s->fd_in_tag);
+        io_remove_watch_poll(s->fd_in_tag);
         s->fd_in_tag = 0;
     }
 
@@ -835,7 +859,7 @@  static void fd_chr_close(struct CharDriverState *chr)
     FDCharDriver *s = chr->opaque;
 
     if (s->fd_in_tag) {
-        g_source_remove(s->fd_in_tag);
+        io_remove_watch_poll(s->fd_in_tag);
         s->fd_in_tag = 0;
     }
 
@@ -1145,7 +1169,7 @@  static void pty_chr_state(CharDriverState *chr, int connected)
 
     if (!connected) {
         if (s->fd_tag) {
-            g_source_remove(s->fd_tag);
+            io_remove_watch_poll(s->fd_tag);
             s->fd_tag = 0;
         }
         s->connected = 0;
@@ -1173,7 +1197,7 @@  static void pty_chr_close(struct CharDriverState *chr)
     int fd;
 
     if (s->fd_tag) {
-        g_source_remove(s->fd_tag);
+        io_remove_watch_poll(s->fd_tag);
         s->fd_tag = 0;
     }
     fd = g_io_channel_unix_get_fd(s->fd);
@@ -2252,7 +2276,7 @@  static gboolean udp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
     s->bufptr = s->bufcnt;
     if (status != G_IO_STATUS_NORMAL) {
         if (s->tag) {
-            g_source_remove(s->tag);
+            io_remove_watch_poll(s->tag);
             s->tag = 0;
         }
         return FALSE;
@@ -2273,7 +2297,7 @@  static void udp_chr_update_read_handler(CharDriverState *chr)
     NetCharDriver *s = chr->opaque;
 
     if (s->tag) {
-        g_source_remove(s->tag);
+        io_remove_watch_poll(s->tag);
         s->tag = 0;
     }
 
@@ -2286,7 +2310,7 @@  static void udp_chr_close(CharDriverState *chr)
 {
     NetCharDriver *s = chr->opaque;
     if (s->tag) {
-        g_source_remove(s->tag);
+        io_remove_watch_poll(s->tag);
         s->tag = 0;
     }
     if (s->chan) {
@@ -2520,7 +2544,7 @@  static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
             s->listen_tag = g_io_add_watch(s->listen_chan, G_IO_IN, tcp_chr_accept, chr);
         }
         if (s->tag) {
-            g_source_remove(s->tag);
+            io_remove_watch_poll(s->tag);
             s->tag = 0;
         }
         g_io_channel_unref(s->chan);
@@ -2635,7 +2659,7 @@  static void tcp_chr_close(CharDriverState *chr)
     TCPCharDriver *s = chr->opaque;
     if (s->fd >= 0) {
         if (s->tag) {
-            g_source_remove(s->tag);
+            io_remove_watch_poll(s->tag);
             s->tag = 0;
         }
         if (s->chan) {