diff mbox

coroutine: add ./configure --disable-coroutine-pool

Message ID 1378910555-24753-1-git-send-email-stefanha@redhat.com
State New
Headers show

Commit Message

Stefan Hajnoczi Sept. 11, 2013, 2:42 p.m. UTC
The 'gthread' coroutine backend was written before the freelist (aka
pool) existed in qemu-coroutine.c.

This means that every thread is expected to exit when its coroutine
terminates.  It is not possible to reuse threads from a pool.

This patch automatically disables the pool when 'gthread' is used.  This
allows the 'gthread' backend to work again (for example,
tests/test-coroutine completes successfully instead of hanging).

I considered implementing thread reuse but I don't want quirks like CPU
affinity differences due to coroutine threads being recycled.  The
'gthread' backend is a reference backend and it's therefore okay to skip
the pool optimization.

Note this patch also makes it easy to toggle the pool for benchmarking
purposes:

  ./configure --with-coroutine-backend=ucontext \
              --disable-coroutine-pool

Reported-by: Gabriel Kerneis <gabriel@kerneis.info>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 configure        | 24 ++++++++++++++++++++++++
 qemu-coroutine.c | 34 +++++++++++++++++++---------------
 2 files changed, 43 insertions(+), 15 deletions(-)

Comments

Gabriel Kerneis Sept. 11, 2013, 2:56 p.m. UTC | #1
On Wed, Sep 11, 2013 at 04:42:35PM +0200, Stefan Hajnoczi wrote:
> This patch automatically disables the pool when 'gthread' is used.  This
> allows the 'gthread' backend to work again (for example,
> tests/test-coroutine completes successfully instead of hanging).
> 
> Reported-by: Gabriel Kerneis <gabriel@kerneis.info>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>

Reviewed-by: Gabriel Kerneis <gabriel@kerneis.info>
Kevin Wolf Sept. 11, 2013, 3:24 p.m. UTC | #2
Am 11.09.2013 um 16:42 hat Stefan Hajnoczi geschrieben:
> The 'gthread' coroutine backend was written before the freelist (aka
> pool) existed in qemu-coroutine.c.
> 
> This means that every thread is expected to exit when its coroutine
> terminates.  It is not possible to reuse threads from a pool.
> 
> This patch automatically disables the pool when 'gthread' is used.  This
> allows the 'gthread' backend to work again (for example,
> tests/test-coroutine completes successfully instead of hanging).
> 
> I considered implementing thread reuse but I don't want quirks like CPU
> affinity differences due to coroutine threads being recycled.  The
> 'gthread' backend is a reference backend and it's therefore okay to skip
> the pool optimization.
> 
> Note this patch also makes it easy to toggle the pool for benchmarking
> purposes:
> 
>   ./configure --with-coroutine-backend=ucontext \
>               --disable-coroutine-pool
> 
> Reported-by: Gabriel Kerneis <gabriel@kerneis.info>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  configure        | 24 ++++++++++++++++++++++++
>  qemu-coroutine.c | 34 +++++++++++++++++++---------------
>  2 files changed, 43 insertions(+), 15 deletions(-)
> 
> diff --git a/configure b/configure
> index e989609..5cf6341 100755
> --- a/configure
> +++ b/configure
> @@ -235,6 +235,7 @@ guest_agent=""
>  want_tools="yes"
>  libiscsi=""
>  coroutine=""
> +coroutine_pool=""
>  seccomp=""
>  glusterfs=""
>  glusterfs_discard="no"
> @@ -875,6 +876,10 @@ for opt do
>    ;;
>    --with-coroutine=*) coroutine="$optarg"
>    ;;
> +  --disable-coroutine-pool) coroutine_pool="no"
> +  ;;
> +  --enable-coroutine-pool) coroutine_pool="yes"
> +  ;;
>    --disable-docs) docs="no"
>    ;;
>    --enable-docs) docs="yes"
> @@ -1161,6 +1166,8 @@ echo "  --disable-seccomp        disable seccomp support"
>  echo "  --enable-seccomp         enables seccomp support"
>  echo "  --with-coroutine=BACKEND coroutine backend. Supported options:"
>  echo "                           gthread, ucontext, sigaltstack, windows"
> +echo "  --disable-coroutine-pool disable coroutine freelist (worse performance)"
> +echo "  --enable-coroutine-pool  enable coroutine freelist (better performance)"
>  echo "  --enable-glusterfs       enable GlusterFS backend"
>  echo "  --disable-glusterfs      disable GlusterFS backend"
>  echo "  --enable-gcov            enable test coverage analysis with gcov"
> @@ -3279,6 +3286,17 @@ else
>    esac
>  fi
>  
> +if test "$coroutine_pool" = ""; then
> +  if test "$coroutine" = "gthread"; then
> +    coroutine_pool=no
> +  else
> +    coroutine_pool=yes
> +  fi
> +fi
> +if test "$coroutine" = "gthread" -a "$coroutine_pool" = "yes"; then
> +  error_exit "'gthread' coroutine backend does not support pool (use --disable-coroutine-pool)"
> +fi
> +
>  ##########################################
>  # check if we have open_by_handle_at
>  
> @@ -3644,6 +3662,7 @@ echo "libiscsi support  $libiscsi"
>  echo "build guest agent $guest_agent"
>  echo "seccomp support   $seccomp"
>  echo "coroutine backend $coroutine"
> +echo "coroutine pool    $coroutine_pool"
>  echo "GlusterFS support $glusterfs"
>  echo "virtio-blk-data-plane $virtio_blk_data_plane"
>  echo "gcov              $gcov_tool"
> @@ -3999,6 +4018,11 @@ if test "$rbd" = "yes" ; then
>  fi
>  
>  echo "CONFIG_COROUTINE_BACKEND=$coroutine" >> $config_host_mak
> +if test "$coroutine_pool" = "yes" ; then
> +  echo "CONFIG_COROUTINE_POOL=1" >> $config_host_mak
> +else
> +  echo "CONFIG_COROUTINE_POOL=0" >> $config_host_mak
> +fi

Doesn't work.

In config-host.make we do get:

    CONFIG_COROUTINE_POOL=0

But when config-host.h is generated from it, I assume it's only checked
if the variable is defined, so we end up with:

    #define CONFIG_COROUTINE_POOL 1

That is, you never really disable the pool.

Kevin
Gabriel Kerneis Sept. 11, 2013, 3:32 p.m. UTC | #3
On Wed, Sep 11, 2013 at 05:24:55PM +0200, Kevin Wolf wrote:
> In config-host.make we do get:
> 
>     CONFIG_COROUTINE_POOL=0
> 
> But when config-host.h is generated from it, I assume it's only checked
> if the variable is defined, so we end up with:
> 
>     #define CONFIG_COROUTINE_POOL 1

Did you clean your tree?

$ rm -rf *
$ ../../configure --disable-coroutine-pool
$ make config-host.h
$ grep COROUTINE_POOL config-host.h
#define CONFIG_COROUTINE_POOL 0

And scripts/create_config seems to take the value into account:

 CONFIG_*=*) # configuration
    name=${line%=*}
    value=${line#*=}
    echo "#define $name $value"
    ;;
Kevin Wolf Sept. 11, 2013, 3:38 p.m. UTC | #4
Am 11.09.2013 um 17:32 hat Gabriel Kerneis geschrieben:
> On Wed, Sep 11, 2013 at 05:24:55PM +0200, Kevin Wolf wrote:
> > In config-host.make we do get:
> > 
> >     CONFIG_COROUTINE_POOL=0
> > 
> > But when config-host.h is generated from it, I assume it's only checked
> > if the variable is defined, so we end up with:
> > 
> >     #define CONFIG_COROUTINE_POOL 1
> 
> Did you clean your tree?
> 
> $ rm -rf *
> $ ../../configure --disable-coroutine-pool
> $ make config-host.h
> $ grep COROUTINE_POOL config-host.h
> #define CONFIG_COROUTINE_POOL 0

My bad, I checked after configure, but before running make. We must have
a weird build system that this isn't created during configure. :-)

Thanks, applied to the block branch.

Kevin
Stefan Hajnoczi Sept. 12, 2013, 8:42 a.m. UTC | #5
On Wed, Sep 11, 2013 at 05:38:07PM +0200, Kevin Wolf wrote:
> Am 11.09.2013 um 17:32 hat Gabriel Kerneis geschrieben:
> > On Wed, Sep 11, 2013 at 05:24:55PM +0200, Kevin Wolf wrote:
> > > In config-host.make we do get:
> > > 
> > >     CONFIG_COROUTINE_POOL=0
> > > 
> > > But when config-host.h is generated from it, I assume it's only checked
> > > if the variable is defined, so we end up with:
> > > 
> > >     #define CONFIG_COROUTINE_POOL 1
> > 
> > Did you clean your tree?
> > 
> > $ rm -rf *
> > $ ../../configure --disable-coroutine-pool
> > $ make config-host.h
> > $ grep COROUTINE_POOL config-host.h
> > #define CONFIG_COROUTINE_POOL 0
> 
> My bad, I checked after configure, but before running make. We must have
> a weird build system that this isn't created during configure. :-)

Yes, this confused by too once or twice while writing this patch :).

The relevant code in scripts/create_config is:

 CONFIG_*=*) # configuration
    name=${line%=*}
    value=${line#*=}
    echo "#define $name $value"
    ;;

Stefan
Stefan Weil Sept. 27, 2013, 5:20 a.m. UTC | #6
Am 11.09.2013 16:42, schrieb Stefan Hajnoczi:
> The 'gthread' coroutine backend was written before the freelist (aka
> pool) existed in qemu-coroutine.c.
>
> This means that every thread is expected to exit when its coroutine
> terminates.  It is not possible to reuse threads from a pool.
>
> This patch automatically disables the pool when 'gthread' is used.  This
> allows the 'gthread' backend to work again (for example,
> tests/test-coroutine completes successfully instead of hanging).
>
> I considered implementing thread reuse but I don't want quirks like CPU
> affinity differences due to coroutine threads being recycled.  The
> 'gthread' backend is a reference backend and it's therefore okay to skip
> the pool optimization.
>
> Note this patch also makes it easy to toggle the pool for benchmarking
> purposes:
>
>   ./configure --with-coroutine-backend=ucontext \
>               --disable-coroutine-pool
>
> Reported-by: Gabriel Kerneis <gabriel@kerneis.info>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  configure        | 24 ++++++++++++++++++++++++
>  qemu-coroutine.c | 34 +++++++++++++++++++---------------
>  2 files changed, 43 insertions(+), 15 deletions(-)
>

This patch is important for QEMU 1.5 as well, but needs some
modifications there.
A recent bug report for MinGW shows that the win32 coroutine needs it, too.

Stefan, could you please prepare a patch for stable-1.5?

Regards
Stefan W.
Stefan Hajnoczi Sept. 27, 2013, 9:11 a.m. UTC | #7
On Fri, Sep 27, 2013 at 07:20:21AM +0200, Stefan Weil wrote:
> Am 11.09.2013 16:42, schrieb Stefan Hajnoczi:
> > The 'gthread' coroutine backend was written before the freelist (aka
> > pool) existed in qemu-coroutine.c.
> >
> > This means that every thread is expected to exit when its coroutine
> > terminates.  It is not possible to reuse threads from a pool.
> >
> > This patch automatically disables the pool when 'gthread' is used.  This
> > allows the 'gthread' backend to work again (for example,
> > tests/test-coroutine completes successfully instead of hanging).
> >
> > I considered implementing thread reuse but I don't want quirks like CPU
> > affinity differences due to coroutine threads being recycled.  The
> > 'gthread' backend is a reference backend and it's therefore okay to skip
> > the pool optimization.
> >
> > Note this patch also makes it easy to toggle the pool for benchmarking
> > purposes:
> >
> >   ./configure --with-coroutine-backend=ucontext \
> >               --disable-coroutine-pool
> >
> > Reported-by: Gabriel Kerneis <gabriel@kerneis.info>
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> >  configure        | 24 ++++++++++++++++++++++++
> >  qemu-coroutine.c | 34 +++++++++++++++++++---------------
> >  2 files changed, 43 insertions(+), 15 deletions(-)
> >
> 
> This patch is important for QEMU 1.5 as well, but needs some
> modifications there.
> A recent bug report for MinGW shows that the win32 coroutine needs it, too.

coroutine-win32.c is designed to support reuse:

static void CALLBACK coroutine_trampoline(void *co_)
{
    Coroutine *co = co_;

    while (true) {
        co->entry(co->entry_arg);
        qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
    }
}

We return from qemu_coroutine_switch() when the fiber is reused and
simply run another iteration of the while loop.

Why do you say win32 coroutines should disable the pool?

Stefan
Stefan Weil Sept. 27, 2013, 4:49 p.m. UTC | #8
Am 27.09.2013 11:11, schrieb Stefan Hajnoczi:
> On Fri, Sep 27, 2013 at 07:20:21AM +0200, Stefan Weil wrote:
>> Am 11.09.2013 16:42, schrieb Stefan Hajnoczi:
>>> The 'gthread' coroutine backend was written before the freelist (aka
>>> pool) existed in qemu-coroutine.c.
>>>
>>> This means that every thread is expected to exit when its coroutine
>>> terminates.  It is not possible to reuse threads from a pool.
>>>
>>> This patch automatically disables the pool when 'gthread' is used.  This
>>> allows the 'gthread' backend to work again (for example,
>>> tests/test-coroutine completes successfully instead of hanging).
>>>
>>> I considered implementing thread reuse but I don't want quirks like CPU
>>> affinity differences due to coroutine threads being recycled.  The
>>> 'gthread' backend is a reference backend and it's therefore okay to skip
>>> the pool optimization.
>>>
>>> Note this patch also makes it easy to toggle the pool for benchmarking
>>> purposes:
>>>
>>>   ./configure --with-coroutine-backend=ucontext \
>>>               --disable-coroutine-pool
>>>
>>> Reported-by: Gabriel Kerneis <gabriel@kerneis.info>
>>> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
>>> ---
>>>  configure        | 24 ++++++++++++++++++++++++
>>>  qemu-coroutine.c | 34 +++++++++++++++++++---------------
>>>  2 files changed, 43 insertions(+), 15 deletions(-)
>>>
>> This patch is important for QEMU 1.5 as well, but needs some
>> modifications there.
>> A recent bug report for MinGW shows that the win32 coroutine needs it, too.
> coroutine-win32.c is designed to support reuse:
>
> static void CALLBACK coroutine_trampoline(void *co_)
> {
>     Coroutine *co = co_;
>
>     while (true) {
>         co->entry(co->entry_arg);
>         qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
>     }
> }
>
> We return from qemu_coroutine_switch() when the fiber is reused and
> simply run another iteration of the while loop.
>
> Why do you say win32 coroutines should disable the pool?
>
> Stefan

QEMU MinGW binaries with coroutine pool simply crash, and disabling the
pool helps
for the moment until we have a better fix. That's a regression which was
introduced
with a patch which added the pool for win32.

See this discussion thread for more details:
http://lists.nongnu.org/archive/html/qemu-devel/2013-09/msg04195.html

Regards,
Stefan
Stefan Hajnoczi Sept. 30, 2013, 9:06 a.m. UTC | #9
On Fri, Sep 27, 2013 at 06:49:18PM +0200, Stefan Weil wrote:
> Am 27.09.2013 11:11, schrieb Stefan Hajnoczi:
> > On Fri, Sep 27, 2013 at 07:20:21AM +0200, Stefan Weil wrote:
> >> Am 11.09.2013 16:42, schrieb Stefan Hajnoczi:
> >>> The 'gthread' coroutine backend was written before the freelist (aka
> >>> pool) existed in qemu-coroutine.c.
> >>>
> >>> This means that every thread is expected to exit when its coroutine
> >>> terminates.  It is not possible to reuse threads from a pool.
> >>>
> >>> This patch automatically disables the pool when 'gthread' is used.  This
> >>> allows the 'gthread' backend to work again (for example,
> >>> tests/test-coroutine completes successfully instead of hanging).
> >>>
> >>> I considered implementing thread reuse but I don't want quirks like CPU
> >>> affinity differences due to coroutine threads being recycled.  The
> >>> 'gthread' backend is a reference backend and it's therefore okay to skip
> >>> the pool optimization.
> >>>
> >>> Note this patch also makes it easy to toggle the pool for benchmarking
> >>> purposes:
> >>>
> >>>   ./configure --with-coroutine-backend=ucontext \
> >>>               --disable-coroutine-pool
> >>>
> >>> Reported-by: Gabriel Kerneis <gabriel@kerneis.info>
> >>> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> >>> ---
> >>>  configure        | 24 ++++++++++++++++++++++++
> >>>  qemu-coroutine.c | 34 +++++++++++++++++++---------------
> >>>  2 files changed, 43 insertions(+), 15 deletions(-)
> >>>
> >> This patch is important for QEMU 1.5 as well, but needs some
> >> modifications there.
> >> A recent bug report for MinGW shows that the win32 coroutine needs it, too.
> > coroutine-win32.c is designed to support reuse:
> >
> > static void CALLBACK coroutine_trampoline(void *co_)
> > {
> >     Coroutine *co = co_;
> >
> >     while (true) {
> >         co->entry(co->entry_arg);
> >         qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
> >     }
> > }
> >
> > We return from qemu_coroutine_switch() when the fiber is reused and
> > simply run another iteration of the while loop.
> >
> > Why do you say win32 coroutines should disable the pool?
> >
> > Stefan
> 
> QEMU MinGW binaries with coroutine pool simply crash, and disabling the
> pool helps
> for the moment until we have a better fix. That's a regression which was
> introduced
> with a patch which added the pool for win32.
> 
> See this discussion thread for more details:
> http://lists.nongnu.org/archive/html/qemu-devel/2013-09/msg04195.html

I'd like to fix the win32 coroutine backend.

In your email you said it can be reproduced under wine on Linux.  I
wasn't able to reproduce it with this command-line:

  $ wine qemu-system-x86_64.exe -m 1024 -vnc :1 -L ../pc-bios -hda ../test.img

Stefan: Do you have a ./configure and QEMU command-line which reproduces
the assertion failure under wine?

Stefan
Stefan Weil Oct. 1, 2013, 5:51 a.m. UTC | #10
Am 30.09.2013 11:06, schrieb Stefan Hajnoczi:
> On Fri, Sep 27, 2013 at 06:49:18PM +0200, Stefan Weil wrote:
>> Am 27.09.2013 11:11, schrieb Stefan Hajnoczi:
>>> On Fri, Sep 27, 2013 at 07:20:21AM +0200, Stefan Weil wrote:
>>>> Am 11.09.2013 16:42, schrieb Stefan Hajnoczi:
>>>>> The 'gthread' coroutine backend was written before the freelist (aka
>>>>> pool) existed in qemu-coroutine.c.
>>>>>
>>>>> This means that every thread is expected to exit when its coroutine
>>>>> terminates.  It is not possible to reuse threads from a pool.
>>>>>
>>>>> This patch automatically disables the pool when 'gthread' is used.  This
>>>>> allows the 'gthread' backend to work again (for example,
>>>>> tests/test-coroutine completes successfully instead of hanging).
>>>>>
>>>>> I considered implementing thread reuse but I don't want quirks like CPU
>>>>> affinity differences due to coroutine threads being recycled.  The
>>>>> 'gthread' backend is a reference backend and it's therefore okay to skip
>>>>> the pool optimization.
>>>>>
>>>>> Note this patch also makes it easy to toggle the pool for benchmarking
>>>>> purposes:
>>>>>
>>>>>   ./configure --with-coroutine-backend=ucontext \
>>>>>               --disable-coroutine-pool
>>>>>
>>>>> Reported-by: Gabriel Kerneis <gabriel@kerneis.info>
>>>>> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
>>>>> ---
>>>>>  configure        | 24 ++++++++++++++++++++++++
>>>>>  qemu-coroutine.c | 34 +++++++++++++++++++---------------
>>>>>  2 files changed, 43 insertions(+), 15 deletions(-)
>>>>>
>>>> This patch is important for QEMU 1.5 as well, but needs some
>>>> modifications there.
>>>> A recent bug report for MinGW shows that the win32 coroutine needs it, too.
>>> coroutine-win32.c is designed to support reuse:
>>>
>>> static void CALLBACK coroutine_trampoline(void *co_)
>>> {
>>>     Coroutine *co = co_;
>>>
>>>     while (true) {
>>>         co->entry(co->entry_arg);
>>>         qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
>>>     }
>>> }
>>>
>>> We return from qemu_coroutine_switch() when the fiber is reused and
>>> simply run another iteration of the while loop.
>>>
>>> Why do you say win32 coroutines should disable the pool?
>>>
>>> Stefan
>> QEMU MinGW binaries with coroutine pool simply crash, and disabling the
>> pool helps
>> for the moment until we have a better fix. That's a regression which was
>> introduced
>> with a patch which added the pool for win32.
>>
>> See this discussion thread for more details:
>> http://lists.nongnu.org/archive/html/qemu-devel/2013-09/msg04195.html
> I'd like to fix the win32 coroutine backend.
>
> In your email you said it can be reproduced under wine on Linux.  I
> wasn't able to reproduce it with this command-line:
>
>   $ wine qemu-system-x86_64.exe -m 1024 -vnc :1 -L ../pc-bios -hda ../test.img
>
> Stefan: Do you have a ./configure and QEMU command-line which reproduces
> the assertion failure under wine?
>
> Stefan

Here is my current test environment:

* Debian wheezy x86_64 host (running on a KVM server)
* latest QEMU sources

$ ./configure' '--cross-prefix=i686-w64-mingw32-'
'--enable-trace-backend=stderr' --target-list=i386-softmmu && make
# Get Debian installer CDROM mini.iso.
$ wine i386-softmmu/qemu-system-i386 -L pc-bios -cdrom /var/tmp/mini.iso
-sdl # or -vnc :1

=> Assertion in qemu-coroutine-lock.c:99.

I also tested a similar scenario on an ASUS netbook with Ubuntu some
days ago and got the same result.

Regards,
Stefan
Paolo Bonzini Oct. 1, 2013, 7:21 a.m. UTC | #11
Il 30/09/2013 11:06, Stefan Hajnoczi ha scritto:
> I'd like to fix the win32 coroutine backend.
> 
> In your email you said it can be reproduced under wine on Linux.  I
> wasn't able to reproduce it with this command-line:
> 
>   $ wine qemu-system-x86_64.exe -m 1024 -vnc :1 -L ../pc-bios -hda ../test.img
> 
> Stefan: Do you have a ./configure and QEMU command-line which reproduces
> the assertion failure under wine?

Neither could I.  The patch was definitely tested under Wine.

Paolo
Gabriel Kerneis Oct. 1, 2013, 7:29 a.m. UTC | #12
On Tue, Oct 01, 2013 at 07:51:24AM +0200, Stefan Weil wrote:
> $ wine i386-softmmu/qemu-system-i386 -L pc-bios -cdrom /var/tmp/mini.iso
> -sdl # or -vnc :1
> 
> => Assertion in qemu-coroutine-lock.c:99.

Could you please provide the backtrace for this?  I am curious which of the uses
of qemu_co_queue_restart_all() fails.  I wonder if it is a bug in the
implementation of qemu_in_coroutine() on your platform, or really a rare
interleaving which exhibits a bug in the logic of coroutine functions.
Stefan Weil Oct. 1, 2013, 4:44 p.m. UTC | #13
Am 01.10.2013 09:29, schrieb Gabriel Kerneis:
> On Tue, Oct 01, 2013 at 07:51:24AM +0200, Stefan Weil wrote:
>> $ wine i386-softmmu/qemu-system-i386 -L pc-bios -cdrom /var/tmp/mini.iso
>> -sdl # or -vnc :1
>>
>> => Assertion in qemu-coroutine-lock.c:99.
> Could you please provide the backtrace for this?  I am curious which of the uses
> of qemu_co_queue_restart_all() fails.  I wonder if it is a bug in the
> implementation of qemu_in_coroutine() on your platform, or really a rare
> interleaving which exhibits a bug in the logic of coroutine functions.
>

Here is a GDB protocol. Build environment:

Windows 7 (64 bit) host
MinGW toolchain (not MinGW-w64 which I usually prefer)
./configure && make

$ gdb --args bin/ndebug/mingw32/i386-softmmu/qemu-system-i386 -L pc-bios
-cdrom mini.iso
GNU gdb (GDB) 7.5
Copyright (C) 2012 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later
<http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "i686-pc-mingw32".
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>...
Reading symbols from
c:\home\stefan\src\qemu\qemu.org\qemu\bin\ndebug\mingw32\i386-softmmu\qemu-system-i386.exe...done.
(gdb) b abort
Breakpoint 1 at 0x6b0070
(gdb) r
Starting program:
c:\home\stefan\src\qemu\qemu.org\qemu\bin\ndebug\mingw32\i386-softmmu\qemu-system-i386.exe
-L pc-bios -cdrom mini.iso
[New Thread 4120.0x19c4]
[New Thread 4120.0x1724]
[New Thread 4120.0x1a98]
[New Thread 4120.0x1854]
[New Thread 4120.0x1bf8]
VNC server running on `::1:5900'
Assertion failed: qemu_in_coroutine(), file
c:/home/stefan/src/qemu/qemu.org/qemu/qemu-coroutine-lock.c, line 99
[Switching to Thread 4120.0x1a98]

Breakpoint 1, 0x76118e76 in msvcrt!abort ()
   from C:\Windows\syswow64\msvcrt.dll
(gdb) i s
#0  0x76118e76 in msvcrt!abort () from C:\Windows\syswow64\msvcrt.dll
#1  0x7611680c in msvcrt!_assert () from C:\Windows\syswow64\msvcrt.dll
#2  0x00518f2d in qemu_co_queue_restart_all (queue=queue@entry=0x6d3fe90)
    at c:/home/stefan/src/qemu/qemu.org/qemu/qemu-coroutine-lock.c:99
#3  0x0040ee81 in tracked_request_end (req=0x6d3fe6c)
    at c:/home/stefan/src/qemu/qemu.org/qemu/block.c:1963
#4  bdrv_co_do_readv (bs=0x3057658, sector_num=<optimized out>,
nb_sectors=4,
    qiov=0x733f9d0, flags=<optimized out>)
    at c:/home/stefan/src/qemu/qemu.org/qemu/block.c:2675
#5  0x0040ee52 in bdrv_co_do_readv (bs=0x3055c10, sector_num=<optimized
out>,
    nb_sectors=4, qiov=0x733f9d0, flags=<optimized out>)
    at c:/home/stefan/src/qemu/qemu.org/qemu/block.c:2645
#6  0x0040ffbc in bdrv_rw_co_entry (opaque=0x733f968)
    at c:/home/stefan/src/qemu/qemu.org/qemu/block.c:2276
#7  0x00441be8 in coroutine_trampoline (co_=0x3058958)
    at c:/home/stefan/src/qemu/qemu.org/qemu/coroutine-win32.c:57
#8  0x7549bfa2 in KERNEL32!GetQueuedCompletionStatus ()
   from C:\Windows\syswow64\kernel32.dll
#9  0x03058958 in ?? ()
#10 0x7549bf5a in KERNEL32!GetQueuedCompletionStatus ()
   from C:\Windows\syswow64\kernel32.dll
#11 0x014feff0 in ?? ()
Stefan Hajnoczi Oct. 2, 2013, 8:54 a.m. UTC | #14
On Tue, Oct 01, 2013 at 06:44:54PM +0200, Stefan Weil wrote:
> Am 01.10.2013 09:29, schrieb Gabriel Kerneis:
> > On Tue, Oct 01, 2013 at 07:51:24AM +0200, Stefan Weil wrote:
> >> $ wine i386-softmmu/qemu-system-i386 -L pc-bios -cdrom /var/tmp/mini.iso
> >> -sdl # or -vnc :1
> >>
> >> => Assertion in qemu-coroutine-lock.c:99.
> > Could you please provide the backtrace for this?  I am curious which of the uses
> > of qemu_co_queue_restart_all() fails.  I wonder if it is a bug in the
> > implementation of qemu_in_coroutine() on your platform, or really a rare
> > interleaving which exhibits a bug in the logic of coroutine functions.
> >
> 
> Here is a GDB protocol. Build environment:
> 
> Windows 7 (64 bit) host
> MinGW toolchain (not MinGW-w64 which I usually prefer)
> ./configure && make
> 
> $ gdb --args bin/ndebug/mingw32/i386-softmmu/qemu-system-i386 -L pc-bios
> -cdrom mini.iso
> GNU gdb (GDB) 7.5
> Copyright (C) 2012 Free Software Foundation, Inc.
> License GPLv3+: GNU GPL version 3 or later
> <http://gnu.org/licenses/gpl.html>
> This is free software: you are free to change and redistribute it.
> There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
> and "show warranty" for details.
> This GDB was configured as "i686-pc-mingw32".
> For bug reporting instructions, please see:
> <http://www.gnu.org/software/gdb/bugs/>...
> Reading symbols from
> c:\home\stefan\src\qemu\qemu.org\qemu\bin\ndebug\mingw32\i386-softmmu\qemu-system-i386.exe...done.
> (gdb) b abort
> Breakpoint 1 at 0x6b0070
> (gdb) r
> Starting program:
> c:\home\stefan\src\qemu\qemu.org\qemu\bin\ndebug\mingw32\i386-softmmu\qemu-system-i386.exe
> -L pc-bios -cdrom mini.iso
> [New Thread 4120.0x19c4]
> [New Thread 4120.0x1724]
> [New Thread 4120.0x1a98]
> [New Thread 4120.0x1854]
> [New Thread 4120.0x1bf8]
> VNC server running on `::1:5900'
> Assertion failed: qemu_in_coroutine(), file
> c:/home/stefan/src/qemu/qemu.org/qemu/qemu-coroutine-lock.c, line 99
> [Switching to Thread 4120.0x1a98]
> 
> Breakpoint 1, 0x76118e76 in msvcrt!abort ()
>    from C:\Windows\syswow64\msvcrt.dll
> (gdb) i s
> #0  0x76118e76 in msvcrt!abort () from C:\Windows\syswow64\msvcrt.dll
> #1  0x7611680c in msvcrt!_assert () from C:\Windows\syswow64\msvcrt.dll
> #2  0x00518f2d in qemu_co_queue_restart_all (queue=queue@entry=0x6d3fe90)
>     at c:/home/stefan/src/qemu/qemu.org/qemu/qemu-coroutine-lock.c:99
> #3  0x0040ee81 in tracked_request_end (req=0x6d3fe6c)
>     at c:/home/stefan/src/qemu/qemu.org/qemu/block.c:1963
> #4  bdrv_co_do_readv (bs=0x3057658, sector_num=<optimized out>,
> nb_sectors=4,
>     qiov=0x733f9d0, flags=<optimized out>)
>     at c:/home/stefan/src/qemu/qemu.org/qemu/block.c:2675
> #5  0x0040ee52 in bdrv_co_do_readv (bs=0x3055c10, sector_num=<optimized
> out>,
>     nb_sectors=4, qiov=0x733f9d0, flags=<optimized out>)
>     at c:/home/stefan/src/qemu/qemu.org/qemu/block.c:2645
> #6  0x0040ffbc in bdrv_rw_co_entry (opaque=0x733f968)
>     at c:/home/stefan/src/qemu/qemu.org/qemu/block.c:2276
> #7  0x00441be8 in coroutine_trampoline (co_=0x3058958)
>     at c:/home/stefan/src/qemu/qemu.org/qemu/coroutine-win32.c:57
> #8  0x7549bfa2 in KERNEL32!GetQueuedCompletionStatus ()
>    from C:\Windows\syswow64\kernel32.dll
> #9  0x03058958 in ?? ()
> #10 0x7549bf5a in KERNEL32!GetQueuedCompletionStatus ()
>    from C:\Windows\syswow64\kernel32.dll
> #11 0x014feff0 in ?? ()

This is an interesting backtrace.  The 'current' thread-local variable
from coroutine-win32.c is NULL or doesn't have a caller assigned.

Please post 'thread apply all bt' so we can identify the other threads.

Stefan
Stefan Weil Oct. 2, 2013, 9:15 p.m. UTC | #15
Am 02.10.2013 10:54, schrieb Stefan Hajnoczi:
> This is an interesting backtrace.  The 'current' thread-local variable
> from coroutine-win32.c is NULL or doesn't have a caller assigned.
>
> Please post 'thread apply all bt' so we can identify the other threads.
>
> Stefan

The other threads don't have a reasonable backtrace:

(gdb) thread apply all bt

Thread 3 (Thread 5708.0x15bc):
#0  0x76118e76 in msvcrt!abort () from C:\Windows\syswow64\msvcrt.dll
#1  0x7611680c in msvcrt!_assert () from C:\Windows\syswow64\msvcrt.dll
#2  0x0051957d in qemu_co_queue_restart_all (queue=queue@entry=0x6e5fe90)
    at c:/cygwin/home/stweil/src/qemu/qemu.org/qemu/qemu-coroutine-lock.c:99
#3  0x0040f4d1 in tracked_request_end (req=0x6e5fe6c)
    at c:/cygwin/home/stweil/src/qemu/qemu.org/qemu/block.c:1963
#4  bdrv_co_do_readv (bs=0x31e78f0, sector_num=<optimized out>,
nb_sectors=4,
    qiov=0x745f9d0, flags=<optimized out>)
    at c:/cygwin/home/stweil/src/qemu/qemu.org/qemu/block.c:2675
#5  0x0040f4a2 in bdrv_co_do_readv (bs=0x31e5ea8, sector_num=<optimized
out>,
    nb_sectors=4, qiov=0x745f9d0, flags=<optimized out>)
    at c:/cygwin/home/stweil/src/qemu/qemu.org/qemu/block.c:2645
#6  0x0041060c in bdrv_rw_co_entry (opaque=0x745f968)
    at c:/cygwin/home/stweil/src/qemu/qemu.org/qemu/block.c:2276
#7  0x00442238 in coroutine_trampoline (co_=0x31e8bf0)
    at c:/cygwin/home/stweil/src/qemu/qemu.org/qemu/coroutine-win32.c:57
#8  0x7549bfa2 in KERNEL32!GetQueuedCompletionStatus ()
   from C:\Windows\syswow64\kernel32.dll
#9  0x031e8bf0 in ?? ()
#10 0x7549bf5a in KERNEL32!GetQueuedCompletionStatus ()
   from C:\Windows\syswow64\kernel32.dll
#11 0x016d24e0 in ?? ()

Thread 2 (Thread 5708.0xe1c):
#0  0x7749f8d1 in ntdll!RtlUpdateClonedSRWLock ()
   from C:\Windows\system32\ntdll.dll
#1  0x7749f8d1 in ntdll!RtlUpdateClonedSRWLock ()
   from C:\Windows\system32\ntdll.dll
#2  0x759c149d in WaitForSingleObjectEx ()
   from C:\Windows\syswow64\KernelBase.dll
#3  0x00000150 in ?? ()
#4  0x00000000 in ?? ()

Thread 1 (Thread 5708.0x1600):
#0  0x7749f8d1 in ntdll!RtlUpdateClonedSRWLock ()
   from C:\Windows\system32\ntdll.dll
#1  0x7749f8d1 in ntdll!RtlUpdateClonedSRWLock ()
   from C:\Windows\system32\ntdll.dll
#2  0x774b8e44 in ntdll!TpCallbackMayRunLong ()
   from C:\Windows\system32\ntdll.dll
#3  0x00000160 in ?? ()
#4  0x00000000 in ?? ()
Stefan Hajnoczi Oct. 4, 2013, 7:30 a.m. UTC | #16
On Wed, Oct 02, 2013 at 11:15:56PM +0200, Stefan Weil wrote:
> Am 02.10.2013 10:54, schrieb Stefan Hajnoczi:
> > This is an interesting backtrace.  The 'current' thread-local variable
> > from coroutine-win32.c is NULL or doesn't have a caller assigned.
> >
> > Please post 'thread apply all bt' so we can identify the other threads.
> >
> > Stefan
> 
> The other threads don't have a reasonable backtrace:

Okay.  I've been able to reproduce this with the Debian netinst i386 ISO
and mingw32.

I'll take a look at fixing this.

Stefan
diff mbox

Patch

diff --git a/configure b/configure
index e989609..5cf6341 100755
--- a/configure
+++ b/configure
@@ -235,6 +235,7 @@  guest_agent=""
 want_tools="yes"
 libiscsi=""
 coroutine=""
+coroutine_pool=""
 seccomp=""
 glusterfs=""
 glusterfs_discard="no"
@@ -875,6 +876,10 @@  for opt do
   ;;
   --with-coroutine=*) coroutine="$optarg"
   ;;
+  --disable-coroutine-pool) coroutine_pool="no"
+  ;;
+  --enable-coroutine-pool) coroutine_pool="yes"
+  ;;
   --disable-docs) docs="no"
   ;;
   --enable-docs) docs="yes"
@@ -1161,6 +1166,8 @@  echo "  --disable-seccomp        disable seccomp support"
 echo "  --enable-seccomp         enables seccomp support"
 echo "  --with-coroutine=BACKEND coroutine backend. Supported options:"
 echo "                           gthread, ucontext, sigaltstack, windows"
+echo "  --disable-coroutine-pool disable coroutine freelist (worse performance)"
+echo "  --enable-coroutine-pool  enable coroutine freelist (better performance)"
 echo "  --enable-glusterfs       enable GlusterFS backend"
 echo "  --disable-glusterfs      disable GlusterFS backend"
 echo "  --enable-gcov            enable test coverage analysis with gcov"
@@ -3279,6 +3286,17 @@  else
   esac
 fi
 
+if test "$coroutine_pool" = ""; then
+  if test "$coroutine" = "gthread"; then
+    coroutine_pool=no
+  else
+    coroutine_pool=yes
+  fi
+fi
+if test "$coroutine" = "gthread" -a "$coroutine_pool" = "yes"; then
+  error_exit "'gthread' coroutine backend does not support pool (use --disable-coroutine-pool)"
+fi
+
 ##########################################
 # check if we have open_by_handle_at
 
@@ -3644,6 +3662,7 @@  echo "libiscsi support  $libiscsi"
 echo "build guest agent $guest_agent"
 echo "seccomp support   $seccomp"
 echo "coroutine backend $coroutine"
+echo "coroutine pool    $coroutine_pool"
 echo "GlusterFS support $glusterfs"
 echo "virtio-blk-data-plane $virtio_blk_data_plane"
 echo "gcov              $gcov_tool"
@@ -3999,6 +4018,11 @@  if test "$rbd" = "yes" ; then
 fi
 
 echo "CONFIG_COROUTINE_BACKEND=$coroutine" >> $config_host_mak
+if test "$coroutine_pool" = "yes" ; then
+  echo "CONFIG_COROUTINE_POOL=1" >> $config_host_mak
+else
+  echo "CONFIG_COROUTINE_POOL=0" >> $config_host_mak
+fi
 
 if test "$open_by_handle_at" = "yes" ; then
   echo "CONFIG_OPEN_BY_HANDLE=y" >> $config_host_mak
diff --git a/qemu-coroutine.c b/qemu-coroutine.c
index 423430d..4708521 100644
--- a/qemu-coroutine.c
+++ b/qemu-coroutine.c
@@ -30,15 +30,17 @@  static unsigned int pool_size;
 
 Coroutine *qemu_coroutine_create(CoroutineEntry *entry)
 {
-    Coroutine *co;
-
-    qemu_mutex_lock(&pool_lock);
-    co = QSLIST_FIRST(&pool);
-    if (co) {
-        QSLIST_REMOVE_HEAD(&pool, pool_next);
-        pool_size--;
+    Coroutine *co = NULL;
+
+    if (CONFIG_COROUTINE_POOL) {
+        qemu_mutex_lock(&pool_lock);
+        co = QSLIST_FIRST(&pool);
+        if (co) {
+            QSLIST_REMOVE_HEAD(&pool, pool_next);
+            pool_size--;
+        }
+        qemu_mutex_unlock(&pool_lock);
     }
-    qemu_mutex_unlock(&pool_lock);
 
     if (!co) {
         co = qemu_coroutine_new();
@@ -51,15 +53,17 @@  Coroutine *qemu_coroutine_create(CoroutineEntry *entry)
 
 static void coroutine_delete(Coroutine *co)
 {
-    qemu_mutex_lock(&pool_lock);
-    if (pool_size < POOL_MAX_SIZE) {
-        QSLIST_INSERT_HEAD(&pool, co, pool_next);
-        co->caller = NULL;
-        pool_size++;
+    if (CONFIG_COROUTINE_POOL) {
+        qemu_mutex_lock(&pool_lock);
+        if (pool_size < POOL_MAX_SIZE) {
+            QSLIST_INSERT_HEAD(&pool, co, pool_next);
+            co->caller = NULL;
+            pool_size++;
+            qemu_mutex_unlock(&pool_lock);
+            return;
+        }
         qemu_mutex_unlock(&pool_lock);
-        return;
     }
-    qemu_mutex_unlock(&pool_lock);
 
     qemu_coroutine_delete(co);
 }