Patchwork Re: kvm crashes with spice while loading qxl

login
register
mail settings
Submitter Alon Levy
Date Feb. 27, 2011, 7:03 p.m.
Message ID <20110227190312.GA14445@playa.redhat.com>
Download mbox | patch
Permalink /patch/84701/
State New
Headers show

Comments

Alon Levy - Feb. 27, 2011, 7:03 p.m.
On Sat, Feb 26, 2011 at 01:29:01PM +0100, Jan Kiszka wrote:
> On 2011-02-26 12:43, xming wrote:
> > When trying to start X (and it loads qxl driver) the kvm process just crashes.

This is fixed by Gerd's attached patch (taken from rhel repository, don't know
why it wasn't pushed to qemu-kvm upstream). I'll send it to kvm list as well (separate email).

> > 
> > qemu-kvm 0.14
> > 
> > startup line
> > 
> > /usr/bin/kvm -name spaceball,process=spaceball -m 1024 -kernel
> > /boot/bzImage-2.6.37.2-guest -append "root=/dev/vda ro" -smp 1 -netdev
> > type=tap,id=spaceball0,script=kvm-ifup-brloc,vhost=on -device
> > virtio-net-pci,netdev=spaceball0,mac=00:16:3e:00:08:01 -drive
> > file=/dev/volume01/G-spaceball,if=virtio -vga qxl -spice
> > port=5957,disable-ticketing -monitor
> > telnet:192.168.0.254:10007,server,nowait,nodelay -pidfile
> > /var/run/kvm/spaceball.pid
> > 
> > host is running vanilla 2.6.37.1 on amd64.
> > 
> > Here is the bt
> > 
> > # gdb /usr/bin/qemu-system-x86_64
> > GNU gdb (Gentoo 7.2 p1) 7.2
> > Copyright (C) 2010 Free Software Foundation, Inc.
> > License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
> > This is free software: you are free to change and redistribute it.
> > There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
> > and "show warranty" for details.
> > This GDB was configured as "x86_64-pc-linux-gnu".
> > For bug reporting instructions, please see:
> > <http://bugs.gentoo.org/>...
> > Reading symbols from /usr/bin/qemu-system-x86_64...done.
> > (gdb) set args -name spaceball,process=spaceball -m 1024 -kernel
> > /boot/bzImage-2.6.37.2-guest -append "root=/dev/vda ro" -smp 1 -netdev
> > type=tap,id=spaceball0,script=kvm-ifup-brloc,vhost=on -device
> > virtio-net-pci,netdev=spaceball0,mac=00:16:3e:00:08:01 -drive
> > file=/dev/volume01/G-spaceball,if=virtio -vga qxl -spice
> > port=5957,disable-ticketing -monitor
> > telnet:192.168.0.254:10007,server,nowait,nodelay -pidfile
> > /var/run/kvm/spaceball.pid
> > (gdb) run
> > Starting program: /usr/bin/qemu-system-x86_64 -name
> > spaceball,process=spaceball -m 1024 -kernel
> > /boot/bzImage-2.6.37.2-guest -append "root=/dev/vda ro" -smp 1 -netdev
> > type=tap,id=spaceball0,script=kvm-ifup-brloc,vhost=on -device
> > virtio-net-pci,netdev=spaceball0,mac=00:16:3e:00:08:01 -drive
> > file=/dev/volume01/G-spaceball,if=virtio -vga qxl -spice
> > port=5957,disable-ticketing -monitor
> > telnet:192.168.0.254:10007,server,nowait,nodelay -pidfile
> > /var/run/kvm/spaceball.pid
> > [Thread debugging using libthread_db enabled]
> > do_spice_init: starting 0.6.0
> > spice_server_add_interface: SPICE_INTERFACE_KEYBOARD
> > spice_server_add_interface: SPICE_INTERFACE_MOUSE
> > [New Thread 0x7ffff4802710 (LWP 30294)]
> > spice_server_add_interface: SPICE_INTERFACE_QXL
> > [New Thread 0x7fffaacae710 (LWP 30295)]
> > red_worker_main: begin
> > handle_dev_destroy_surfaces:
> > handle_dev_destroy_surfaces:
> > handle_dev_input: start
> > [New Thread 0x7fffaa4ad710 (LWP 30298)]
> > [New Thread 0x7fffa9cac710 (LWP 30299)]
> > [New Thread 0x7fffa94ab710 (LWP 30300)]
> > [New Thread 0x7fffa8caa710 (LWP 30301)]
> > [New Thread 0x7fffa3fff710 (LWP 30302)]
> > [New Thread 0x7fffa37fe710 (LWP 30303)]
> > [New Thread 0x7fffa2ffd710 (LWP 30304)]
> > [New Thread 0x7fffa27fc710 (LWP 30305)]
> > [New Thread 0x7fffa1ffb710 (LWP 30306)]
> > [New Thread 0x7fffa17fa710 (LWP 30307)]
> > reds_handle_main_link:
> > reds_show_new_channel: channel 1:0, connected successfully, over Non Secure link
> > reds_main_handle_message: net test: latency 5.636000 ms, bitrate
> > 11027768 bps (10.516899 Mbps)
> > reds_show_new_channel: channel 2:0, connected successfully, over Non Secure link
> > red_dispatcher_set_peer:
> > handle_dev_input: connect
> > handle_new_display_channel: jpeg disabled
> > handle_new_display_channel: zlib-over-glz disabled
> > reds_show_new_channel: channel 4:0, connected successfully, over Non Secure link
> > red_dispatcher_set_cursor_peer:
> > handle_dev_input: cursor connect
> > reds_show_new_channel: channel 3:0, connected successfully, over Non Secure link
> > inputs_link:
> > [New Thread 0x7fffa07f8710 (LWP 30312)]
> > [New Thread 0x7fff9fff7710 (LWP 30313)]
> > [New Thread 0x7fff9f7f6710 (LWP 30314)]
> > [New Thread 0x7fff9eff5710 (LWP 30315)]
> > [New Thread 0x7fff9e7f4710 (LWP 30316)]
> > [New Thread 0x7fff9dff3710 (LWP 30317)]
> > [New Thread 0x7fff9d7f2710 (LWP 30318)]
> > qemu-system-x86_64:
> > /var/tmp/portage/app-emulation/qemu-kvm-0.14.0/work/qemu-kvm-0.14.0/qemu-kvm.c:1724:
> > kvm_mutex_unlock: Assertion `!cpu_single_env' failed.
> > 
> > Program received signal SIGABRT, Aborted.
> > [Switching to Thread 0x7ffff4802710 (LWP 30294)]
> > 0x00007ffff5daa165 in raise () from /lib/libc.so.6
> > (gdb)
> > (gdb)
> > (gdb)
> > (gdb)
> > (gdb) bt
> > #0  0x00007ffff5daa165 in raise () from /lib/libc.so.6
> > #1  0x00007ffff5dab580 in abort () from /lib/libc.so.6
> > #2  0x00007ffff5da3201 in __assert_fail () from /lib/libc.so.6
> > #3  0x0000000000436f7e in kvm_mutex_unlock ()
> >     at /var/tmp/portage/app-emulation/qemu-kvm-0.14.0/work/qemu-kvm-0.14.0/qemu-kvm.c:1724
> > #4  qemu_mutex_unlock_iothread ()
> >     at /var/tmp/portage/app-emulation/qemu-kvm-0.14.0/work/qemu-kvm-0.14.0/qemu-kvm.c:1737
> > #5  0x00000000005e84ee in qxl_hard_reset (d=0x15d3080, loadvm=0)
> >     at /var/tmp/portage/app-emulation/qemu-kvm-0.14.0/work/qemu-kvm-0.14.0/hw/qxl.c:665
> > #6  0x00000000005e9f9a in ioport_write (opaque=0x15d3080, addr=<value
> > optimized out>, val=0)
> >     at /var/tmp/portage/app-emulation/qemu-kvm-0.14.0/work/qemu-kvm-0.14.0/hw/qxl.c:979
> > #7  0x0000000000439d4e in kvm_handle_io (env=0x11a3e00)
> >     at /var/tmp/portage/app-emulation/qemu-kvm-0.14.0/work/qemu-kvm-0.14.0/kvm-all.c:818
> > #8  kvm_run (env=0x11a3e00)
> >     at /var/tmp/portage/app-emulation/qemu-kvm-0.14.0/work/qemu-kvm-0.14.0/qemu-kvm.c:617
> > #9  0x0000000000439f79 in kvm_cpu_exec (env=0x764b)
> >     at /var/tmp/portage/app-emulation/qemu-kvm-0.14.0/work/qemu-kvm-0.14.0/qemu-kvm.c:1233
> > #10 0x000000000043b2d7 in kvm_main_loop_cpu (_env=0x11a3e00)
> >     at /var/tmp/portage/app-emulation/qemu-kvm-0.14.0/work/qemu-kvm-0.14.0/qemu-kvm.c:1419
> > #11 ap_main_loop (_env=0x11a3e00)
> >     at /var/tmp/portage/app-emulation/qemu-kvm-0.14.0/work/qemu-kvm-0.14.0/qemu-kvm.c:1466
> > #12 0x00007ffff77bb944 in start_thread () from /lib/libpthread.so.0
> > #13 0x00007ffff5e491dd in clone () from /lib/libc.so.6
> > (gdb)
> 
> That's a spice bug. In fact, there are a lot of
> qemu_mutex_lock/unlock_iothread in that subsystem. I bet at least a few
> of them can cause even more subtle problems.
> 
> Two general issues with dropping the global mutex like this:
>  - The caller of mutex_unlock is responsible for maintaining
>    cpu_single_env across the unlocked phase (that's related to the
>    abort above).
>  - Dropping the lock in the middle of a callback is risky. That may
>    enable re-entrances of code sections that weren't designed for this
>    (I'm skeptic about the side effects of
>    qemu_spice_vm_change_state_handler - why dropping the lock here?).
> 
> Spice requires a careful review regarding such issues. Or it should
> pioneer with introducing its own lock so that we can handle at least
> related I/O activities over the VCPUs without holding the global mutex
> (but I bet it's not the simplest candidate for such a new scheme).
> 
> Jan
>
From bef7336f79e3325451d2ead120b958a401b0ccc4 Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Mon, 24 Jan 2011 09:18:32 -0200
Subject: [PATCH] spice/qxl: locking fix for qemu-kvm

qxl needs to release the qemu lock before calling some libspice
functions (and re-aquire it later).  In upstream qemu qxl can just
use qemu_mutex_{unlock,lock}_iothread.  In qemu-kvm this doesn't
work, qxl needs additionally save+restore the cpu_single_env pointer
on unlock+lock.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 hw/qxl.c           |   37 +++++++++++++++++++++++++++++--------
 ui/spice-display.c |   12 ++++++------
 ui/spice-display.h |    6 ++++++
 3 files changed, 41 insertions(+), 14 deletions(-)
Jan Kiszka - Feb. 27, 2011, 7:11 p.m.
On 2011-02-27 20:03, Alon Levy wrote:
> On Sat, Feb 26, 2011 at 01:29:01PM +0100, Jan Kiszka wrote:
>> On 2011-02-26 12:43, xming wrote:
>>> When trying to start X (and it loads qxl driver) the kvm process just crashes.
> 
> This is fixed by Gerd's attached patch (taken from rhel repository, don't know
> why it wasn't pushed to qemu-kvm upstream). I'll send it to kvm list as well (separate email).

Patch looks OK on first glance, but the changelog is misleading: This
was broken for _both_ trees, but upstream didn't detect the bug.

My concerns regarding other side effects of juggling with global mutex
in spice code remain.

Jan
Alon Levy - Feb. 27, 2011, 7:16 p.m.
On Sun, Feb 27, 2011 at 08:11:26PM +0100, Jan Kiszka wrote:
> On 2011-02-27 20:03, Alon Levy wrote:
> > On Sat, Feb 26, 2011 at 01:29:01PM +0100, Jan Kiszka wrote:
> >> On 2011-02-26 12:43, xming wrote:
> >>> When trying to start X (and it loads qxl driver) the kvm process just crashes.
> > 
> > This is fixed by Gerd's attached patch (taken from rhel repository, don't know
> > why it wasn't pushed to qemu-kvm upstream). I'll send it to kvm list as well (separate email).
> 
> Patch looks OK on first glance, but the changelog is misleading: This
> was broken for _both_ trees, but upstream didn't detect the bug.
> 

The trees the patch commit message refers to are qemu and qemu-kvm. qemu doesn't even
have cpu_single_env. It didn't talk about two qemu-kvm trees.

> My concerns regarding other side effects of juggling with global mutex
> in spice code remain.

I know there used to be a mutex in spice code and during the upstreaming process it
got ditched in favor of the qemu global io mutex. I would have rather deferred this
to Gerd since he wrote this, but he is not available atm.

> 
> Jan
>
Jan Kiszka - Feb. 27, 2011, 7:27 p.m.
On 2011-02-27 20:16, Alon Levy wrote:
> On Sun, Feb 27, 2011 at 08:11:26PM +0100, Jan Kiszka wrote:
>> On 2011-02-27 20:03, Alon Levy wrote:
>>> On Sat, Feb 26, 2011 at 01:29:01PM +0100, Jan Kiszka wrote:
>>>> On 2011-02-26 12:43, xming wrote:
>>>>> When trying to start X (and it loads qxl driver) the kvm process just crashes.
>>>
>>> This is fixed by Gerd's attached patch (taken from rhel repository, don't know
>>> why it wasn't pushed to qemu-kvm upstream). I'll send it to kvm list as well (separate email).
>>
>> Patch looks OK on first glance, but the changelog is misleading: This
>> was broken for _both_ trees, but upstream didn't detect the bug.
>>
> 
> The trees the patch commit message refers to are qemu and qemu-kvm.

The same did I.

> qemu doesn't even have cpu_single_env.

Really? Check again. :)

> It didn't talk about two qemu-kvm trees.
> 
>> My concerns regarding other side effects of juggling with global mutex
>> in spice code remain.
> 
> I know there used to be a mutex in spice code and during the upstreaming process it
> got ditched in favor of the qemu global io mutex. I would have rather deferred this
> to Gerd since he wrote this, but he is not available atm.

It's not necessarily bad to drop the io mutex, but it is more tricky
than it may appear on first glance.

Jan
Alon Levy - Feb. 27, 2011, 7:29 p.m.
On Sun, Feb 27, 2011 at 08:27:01PM +0100, Jan Kiszka wrote:
> On 2011-02-27 20:16, Alon Levy wrote:
> > On Sun, Feb 27, 2011 at 08:11:26PM +0100, Jan Kiszka wrote:
> >> On 2011-02-27 20:03, Alon Levy wrote:
> >>> On Sat, Feb 26, 2011 at 01:29:01PM +0100, Jan Kiszka wrote:
> >>>> On 2011-02-26 12:43, xming wrote:
> >>>>> When trying to start X (and it loads qxl driver) the kvm process just crashes.
> >>>
> >>> This is fixed by Gerd's attached patch (taken from rhel repository, don't know
> >>> why it wasn't pushed to qemu-kvm upstream). I'll send it to kvm list as well (separate email).
> >>
> >> Patch looks OK on first glance, but the changelog is misleading: This
> >> was broken for _both_ trees, but upstream didn't detect the bug.
> >>
> > 
> > The trees the patch commit message refers to are qemu and qemu-kvm.
> 
> The same did I.
> 
> > qemu doesn't even have cpu_single_env.
> 
> Really? Check again. :)

Sorry, grepped the wrong repo. I'll send this to qemu-devel too then.

> 
> > It didn't talk about two qemu-kvm trees.
> > 
> >> My concerns regarding other side effects of juggling with global mutex
> >> in spice code remain.
> > 
> > I know there used to be a mutex in spice code and during the upstreaming process it
> > got ditched in favor of the qemu global io mutex. I would have rather deferred this
> > to Gerd since he wrote this, but he is not available atm.
> 
> It's not necessarily bad to drop the io mutex, but it is more tricky
> than it may appear on first glance.
> 
> Jan
>
Alon Levy - Feb. 27, 2011, 7:32 p.m.
On Sun, Feb 27, 2011 at 08:27:01PM +0100, Jan Kiszka wrote:
> On 2011-02-27 20:16, Alon Levy wrote:
> > On Sun, Feb 27, 2011 at 08:11:26PM +0100, Jan Kiszka wrote:
> >> On 2011-02-27 20:03, Alon Levy wrote:
> >>> On Sat, Feb 26, 2011 at 01:29:01PM +0100, Jan Kiszka wrote:
> >>>> On 2011-02-26 12:43, xming wrote:
> >>>>> When trying to start X (and it loads qxl driver) the kvm process just crashes.
> >>>
> >>> This is fixed by Gerd's attached patch (taken from rhel repository, don't know
> >>> why it wasn't pushed to qemu-kvm upstream). I'll send it to kvm list as well (separate email).
> >>
> >> Patch looks OK on first glance, but the changelog is misleading: This
> >> was broken for _both_ trees, but upstream didn't detect the bug.
> >>
> > 
> > The trees the patch commit message refers to are qemu and qemu-kvm.
> 
> The same did I.
> 
> > qemu doesn't even have cpu_single_env.
> 
> Really? Check again. :)
> 
> > It didn't talk about two qemu-kvm trees.
> > 
> >> My concerns regarding other side effects of juggling with global mutex
> >> in spice code remain.
> > 
> > I know there used to be a mutex in spice code and during the upstreaming process it
> > got ditched in favor of the qemu global io mutex. I would have rather deferred this
> > to Gerd since he wrote this, but he is not available atm.
> 
> It's not necessarily bad to drop the io mutex, but it is more tricky
> than it may appear on first glance.

The problem with not dropping it is that we may be in vga mode and create
updates synthtically (i.e. qemu created and not driver created) that access the
framebuffer and need to be locked so the framebuffer isn't updated at the same
time. We drop the mutex only when we are about to call the dispatcher, which basically
waits on red_worker (a libspice-server thread) to do some work. red_worker may
in turn callback into qxl in qemu, which may try to acquire the lock. (the
many may's here are just reflections of the codepaths).

> 
> Jan
>
xming - Feb. 28, 2011, 12:56 p.m.
On Sun, Feb 27, 2011 at 8:03 PM, Alon Levy <alevy@redhat.com> wrote:
> On Sat, Feb 26, 2011 at 01:29:01PM +0100, Jan Kiszka wrote:
>> On 2011-02-26 12:43, xming wrote:
>> > When trying to start X (and it loads qxl driver) the kvm process just crashes.
>
> This is fixed by Gerd's attached patch (taken from rhel repository, don't know
> why it wasn't pushed to qemu-kvm upstream). I'll send it to kvm list as well (separate email).

I can confirm that this patch fixes the issue, thanks a lot

cheers
Rick Vernam - March 1, 2011, 3:56 a.m.
On Sunday 27 February 2011 13:03:14 Alon Levy wrote:
> On Sat, Feb 26, 2011 at 01:29:01PM +0100, Jan Kiszka wrote:
> > On 2011-02-26 12:43, xming wrote:
> > > When trying to start X (and it loads qxl driver) the kvm process just
> > > crashes.
> 
> This is fixed by Gerd's attached patch (taken from rhel repository, don't
> know why it wasn't pushed to qemu-kvm upstream). I'll send it to kvm list
> as well (separate email).
> 

This patch also fixed
https://bugs.launchpad.net/bugs/723871
I created the bug report on launchpad, but I suppose it should be left open 
until the patch hits qemu-kvm?

-Rick
Alon Levy - March 1, 2011, 12:58 p.m.
On Sun, Feb 27, 2011 at 08:11:26PM +0100, Jan Kiszka wrote:
> On 2011-02-27 20:03, Alon Levy wrote:
> > On Sat, Feb 26, 2011 at 01:29:01PM +0100, Jan Kiszka wrote:
> >> On 2011-02-26 12:43, xming wrote:
> >>> When trying to start X (and it loads qxl driver) the kvm process just crashes.
> > 
> > This is fixed by Gerd's attached patch (taken from rhel repository, don't know
> > why it wasn't pushed to qemu-kvm upstream). I'll send it to kvm list as well (separate email).
> 
> Patch looks OK on first glance, but the changelog is misleading: This
> was broken for _both_ trees, but upstream didn't detect the bug.

So I didn't test with qemu not having this patch, but according to the discussion in the
launchpad bug the problem only happens with qemu-kvm. This doesn't rule out it being a
bug, perhaps it is just triggered much less frequently I guess.

> 
> My concerns regarding other side effects of juggling with global mutex
> in spice code remain.
> 
> Jan
>
Jan Kiszka - March 2, 2011, 8:22 a.m.
On 2011-03-01 13:58, Alon Levy wrote:
> On Sun, Feb 27, 2011 at 08:11:26PM +0100, Jan Kiszka wrote:
>> On 2011-02-27 20:03, Alon Levy wrote:
>>> On Sat, Feb 26, 2011 at 01:29:01PM +0100, Jan Kiszka wrote:
>>>> On 2011-02-26 12:43, xming wrote:
>>>>> When trying to start X (and it loads qxl driver) the kvm process just crashes.
>>>
>>> This is fixed by Gerd's attached patch (taken from rhel repository, don't know
>>> why it wasn't pushed to qemu-kvm upstream). I'll send it to kvm list as well (separate email).
>>
>> Patch looks OK on first glance, but the changelog is misleading: This
>> was broken for _both_ trees, but upstream didn't detect the bug.
> 
> So I didn't test with qemu not having this patch, but according to the discussion in the
> launchpad bug the problem only happens with qemu-kvm. This doesn't rule out it being a
> bug, perhaps it is just triggered much less frequently I guess.

Again: qemu-kvm has the instrumentation to detect the bug, qemu is
lacking this, but both trees will break subtly if cpu_current_env is not
properly restored.

Jan
Alon Levy - March 2, 2011, 10:56 a.m.
On Wed, Mar 02, 2011 at 09:22:35AM +0100, Jan Kiszka wrote:
> On 2011-03-01 13:58, Alon Levy wrote:
> > On Sun, Feb 27, 2011 at 08:11:26PM +0100, Jan Kiszka wrote:
> >> On 2011-02-27 20:03, Alon Levy wrote:
> >>> On Sat, Feb 26, 2011 at 01:29:01PM +0100, Jan Kiszka wrote:
> >>>> On 2011-02-26 12:43, xming wrote:
> >>>>> When trying to start X (and it loads qxl driver) the kvm process just crashes.
> >>>
> >>> This is fixed by Gerd's attached patch (taken from rhel repository, don't know
> >>> why it wasn't pushed to qemu-kvm upstream). I'll send it to kvm list as well (separate email).
> >>
> >> Patch looks OK on first glance, but the changelog is misleading: This
> >> was broken for _both_ trees, but upstream didn't detect the bug.
> > 
> > So I didn't test with qemu not having this patch, but according to the discussion in the
> > launchpad bug the problem only happens with qemu-kvm. This doesn't rule out it being a
> > bug, perhaps it is just triggered much less frequently I guess.
> 
> Again: qemu-kvm has the instrumentation to detect the bug, qemu is
> lacking this, but both trees will break subtly if cpu_current_env is not
> properly restored.

ok, so what do you want to be done further before this patch is applied?

> 
> Jan
>
Jan Kiszka - March 2, 2011, 11:34 a.m.
On 2011-03-02 11:56, Alon Levy wrote:
> On Wed, Mar 02, 2011 at 09:22:35AM +0100, Jan Kiszka wrote:
>> On 2011-03-01 13:58, Alon Levy wrote:
>>> On Sun, Feb 27, 2011 at 08:11:26PM +0100, Jan Kiszka wrote:
>>>> On 2011-02-27 20:03, Alon Levy wrote:
>>>>> On Sat, Feb 26, 2011 at 01:29:01PM +0100, Jan Kiszka wrote:
>>>>>> On 2011-02-26 12:43, xming wrote:
>>>>>>> When trying to start X (and it loads qxl driver) the kvm process just crashes.
>>>>>
>>>>> This is fixed by Gerd's attached patch (taken from rhel repository, don't know
>>>>> why it wasn't pushed to qemu-kvm upstream). I'll send it to kvm list as well (separate email).
>>>>
>>>> Patch looks OK on first glance, but the changelog is misleading: This
>>>> was broken for _both_ trees, but upstream didn't detect the bug.
>>>
>>> So I didn't test with qemu not having this patch, but according to the discussion in the
>>> launchpad bug the problem only happens with qemu-kvm. This doesn't rule out it being a
>>> bug, perhaps it is just triggered much less frequently I guess.
>>
>> Again: qemu-kvm has the instrumentation to detect the bug, qemu is
>> lacking this, but both trees will break subtly if cpu_current_env is not
>> properly restored.
> 
> ok, so what do you want to be done further before this patch is applied?

The patch posted to qemu-devel just requires a changelog that correctly
reflects what it addresses (and where).

Jan
Alon Levy - March 2, 2011, 12:32 p.m.
On Wed, Mar 02, 2011 at 12:34:24PM +0100, Jan Kiszka wrote:
> On 2011-03-02 11:56, Alon Levy wrote:
> > On Wed, Mar 02, 2011 at 09:22:35AM +0100, Jan Kiszka wrote:
> >> On 2011-03-01 13:58, Alon Levy wrote:
> >>> On Sun, Feb 27, 2011 at 08:11:26PM +0100, Jan Kiszka wrote:
> >>>> On 2011-02-27 20:03, Alon Levy wrote:
> >>>>> On Sat, Feb 26, 2011 at 01:29:01PM +0100, Jan Kiszka wrote:
> >>>>>> On 2011-02-26 12:43, xming wrote:
> >>>>>>> When trying to start X (and it loads qxl driver) the kvm process just crashes.
> >>>>>
> >>>>> This is fixed by Gerd's attached patch (taken from rhel repository, don't know
> >>>>> why it wasn't pushed to qemu-kvm upstream). I'll send it to kvm list as well (separate email).
> >>>>
> >>>> Patch looks OK on first glance, but the changelog is misleading: This
> >>>> was broken for _both_ trees, but upstream didn't detect the bug.
> >>>
> >>> So I didn't test with qemu not having this patch, but according to the discussion in the
> >>> launchpad bug the problem only happens with qemu-kvm. This doesn't rule out it being a
> >>> bug, perhaps it is just triggered much less frequently I guess.
> >>
> >> Again: qemu-kvm has the instrumentation to detect the bug, qemu is
> >> lacking this, but both trees will break subtly if cpu_current_env is not
> >> properly restored.
> > 
> > ok, so what do you want to be done further before this patch is applied?
> 
> The patch posted to qemu-devel just requires a changelog that correctly
> reflects what it addresses (and where).

Just sent,

Alon

> 
> Jan
>

Patch

diff --git a/hw/qxl.c b/hw/qxl.c
index fe4212b..117f7c8 100644
--- a/hw/qxl.c
+++ b/hw/qxl.c
@@ -125,6 +125,27 @@  static void qxl_reset_memslots(PCIQXLDevice *d);
 static void qxl_reset_surfaces(PCIQXLDevice *d);
 static void qxl_ring_set_dirty(PCIQXLDevice *qxl);
 
+/* qemu-kvm locking ... */
+void qxl_unlock_iothread(SimpleSpiceDisplay *ssd)
+{
+    if (cpu_single_env) {
+        assert(ssd->env == NULL);
+        ssd->env = cpu_single_env;
+        cpu_single_env = NULL;
+    }
+    qemu_mutex_unlock_iothread();
+}
+
+void qxl_lock_iothread(SimpleSpiceDisplay *ssd)
+{
+    qemu_mutex_lock_iothread();
+    if (ssd->env) {
+        assert(cpu_single_env == NULL);
+        cpu_single_env = ssd->env;
+        ssd->env = NULL;
+    }
+}
+
 static inline uint32_t msb_mask(uint32_t val)
 {
     uint32_t mask;
@@ -662,10 +683,10 @@  static void qxl_hard_reset(PCIQXLDevice *d, int loadvm)
     dprint(d, 1, "%s: start%s\n", __FUNCTION__,
            loadvm ? " (loadvm)" : "");
 
-    qemu_mutex_unlock_iothread();
+    qxl_unlock_iothread(&d->ssd);
     d->ssd.worker->reset_cursor(d->ssd.worker);
     d->ssd.worker->reset_image_cache(d->ssd.worker);
-    qemu_mutex_lock_iothread();
+    qxl_lock_iothread(&d->ssd);
     qxl_reset_surfaces(d);
     qxl_reset_memslots(d);
 
@@ -795,9 +816,9 @@  static void qxl_reset_surfaces(PCIQXLDevice *d)
 {
     dprint(d, 1, "%s:\n", __FUNCTION__);
     d->mode = QXL_MODE_UNDEFINED;
-    qemu_mutex_unlock_iothread();
+    qxl_unlock_iothread(&d->ssd);
     d->ssd.worker->destroy_surfaces(d->ssd.worker);
-    qemu_mutex_lock_iothread();
+    qxl_lock_iothread(&d->ssd);
     memset(&d->guest_surfaces.cmds, 0, sizeof(d->guest_surfaces.cmds));
 }
 
@@ -866,9 +887,9 @@  static void qxl_destroy_primary(PCIQXLDevice *d)
     dprint(d, 1, "%s\n", __FUNCTION__);
 
     d->mode = QXL_MODE_UNDEFINED;
-    qemu_mutex_unlock_iothread();
+    qxl_unlock_iothread(&d->ssd);
     d->ssd.worker->destroy_primary_surface(d->ssd.worker, 0);
-    qemu_mutex_lock_iothread();
+    qxl_lock_iothread(&d->ssd);
 }
 
 static void qxl_set_mode(PCIQXLDevice *d, int modenr, int loadvm)
@@ -938,10 +959,10 @@  static void ioport_write(void *opaque, uint32_t addr, uint32_t val)
     case QXL_IO_UPDATE_AREA:
     {
         QXLRect update = d->ram->update_area;
-        qemu_mutex_unlock_iothread();
+        qxl_unlock_iothread(&d->ssd);
         d->ssd.worker->update_area(d->ssd.worker, d->ram->update_surface,
                                    &update, NULL, 0, 0);
-        qemu_mutex_lock_iothread();
+        qxl_lock_iothread(&d->ssd);
         break;
     }
     case QXL_IO_NOTIFY_CMD:
diff --git a/ui/spice-display.c b/ui/spice-display.c
index 020b423..defe652 100644
--- a/ui/spice-display.c
+++ b/ui/spice-display.c
@@ -186,18 +186,18 @@  void qemu_spice_create_host_primary(SimpleSpiceDisplay *ssd)
     surface.mem        = (intptr_t)ssd->buf;
     surface.group_id   = MEMSLOT_GROUP_HOST;
 
-    qemu_mutex_unlock_iothread();
+    qxl_unlock_iothread(ssd);
     ssd->worker->create_primary_surface(ssd->worker, 0, &surface);
-    qemu_mutex_lock_iothread();
+    qxl_lock_iothread(ssd);
 }
 
 void qemu_spice_destroy_host_primary(SimpleSpiceDisplay *ssd)
 {
     dprint(1, "%s:\n", __FUNCTION__);
 
-    qemu_mutex_unlock_iothread();
+    qxl_unlock_iothread(ssd);
     ssd->worker->destroy_primary_surface(ssd->worker, 0);
-    qemu_mutex_lock_iothread();
+    qxl_lock_iothread(ssd);
 }
 
 void qemu_spice_vm_change_state_handler(void *opaque, int running, int reason)
@@ -207,9 +207,9 @@  void qemu_spice_vm_change_state_handler(void *opaque, int running, int reason)
     if (running) {
         ssd->worker->start(ssd->worker);
     } else {
-        qemu_mutex_unlock_iothread();
+        qxl_unlock_iothread(ssd);
         ssd->worker->stop(ssd->worker);
-        qemu_mutex_lock_iothread();
+        qxl_lock_iothread(ssd);
     }
     ssd->running = running;
 }
diff --git a/ui/spice-display.h b/ui/spice-display.h
index aef0464..df74828 100644
--- a/ui/spice-display.h
+++ b/ui/spice-display.h
@@ -43,6 +43,9 @@  typedef struct SimpleSpiceDisplay {
     QXLRect dirty;
     int notify;
     int running;
+
+    /* qemu-kvm locking ... */
+    void *env;
 } SimpleSpiceDisplay;
 
 typedef struct SimpleSpiceUpdate {
@@ -52,6 +55,9 @@  typedef struct SimpleSpiceUpdate {
     uint8_t *bitmap;
 } SimpleSpiceUpdate;
 
+void qxl_unlock_iothread(SimpleSpiceDisplay *ssd);
+void qxl_lock_iothread(SimpleSpiceDisplay *ssd);
+
 int qemu_spice_rect_is_empty(const QXLRect* r);
 void qemu_spice_rect_union(QXLRect *dest, const QXLRect *r);