Patchwork [1/3] coroutine: adding sigaltstack method (.c source)

login
register
mail settings
Submitter Alex Barcelo
Date Feb. 13, 2012, 2:42 p.m.
Message ID <1329144150-7720-2-git-send-email-abarcelo@ac.upc.edu>
Download mbox | patch
Permalink /patch/140911/
State New
Headers show

Comments

Alex Barcelo - Feb. 13, 2012, 2:42 p.m.
This file is based in both coroutine-ucontext.c and
pth_mctx.c (from the GNU Portable Threads library).

The mechanism used to change stacks is the sigaltstack
function (variant 2 of the pth library).

Signed-off-by: Alex Barcelo <abarcelo@ac.upc.edu>
---
 coroutine-sigaltstack.c |  337 +++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 337 insertions(+), 0 deletions(-)
 create mode 100644 coroutine-sigaltstack.c
Paolo Bonzini - Feb. 13, 2012, 2:57 p.m.
On 02/13/2012 03:42 PM, Alex Barcelo wrote:
> This file is based in both coroutine-ucontext.c and
> pth_mctx.c (from the GNU Portable Threads library).
>
> The mechanism used to change stacks is the sigaltstack
> function (variant 2 of the pth library).
>
> Signed-off-by: Alex Barcelo <abarcelo@ac.upc.edu>
> ---
>  coroutine-sigaltstack.c |  337 +++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 337 insertions(+), 0 deletions(-)
>  create mode 100644 coroutine-sigaltstack.c
>
> diff --git a/coroutine-sigaltstack.c b/coroutine-sigaltstack.c
> new file mode 100644
> index 0000000..1d4f26d
> --- /dev/null
> +++ b/coroutine-sigaltstack.c
> @@ -0,0 +1,337 @@
> +/*
> + * sigaltstack coroutine initialization code
> + *
> + * Copyright (C) 2006  Anthony Liguori <anthony@codemonkey.ws>
> + * Copyright (C) 2011  Kevin Wolf <kwolf@redhat.com>
> + * Copyright (C) 2012  Alex Barcelo <abarcelo@ac.upc.edu>
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.0 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +/*
> +** This file is partly based on pth_mctx.c, from the GNU Portable Threads
> +**  Copyright (c) 1999-2006 Ralf S. Engelschall <rse@engelschall.com>
> +**  Same license (version 2.1 or later)
> +*/
> +
> +/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
> +#ifdef _FORTIFY_SOURCE
> +#undef _FORTIFY_SOURCE
> +#endif
> +#include <stdlib.h>
> +#include <setjmp.h>
> +#include <stdint.h>
> +#include <pthread.h>
> +#include <signal.h>
> +#include "qemu-common.h"
> +#include "qemu-coroutine-int.h"
> +
> +enum {
> +    /* Maximum free pool size prevents holding too many freed coroutines */
> +    POOL_MAX_SIZE = 64,
> +};
> +
> +/** Free list to speed up creation */
> +static QLIST_HEAD(, Coroutine) pool = QLIST_HEAD_INITIALIZER(pool);
> +static unsigned int pool_size;
> +
> +typedef struct {
> +    Coroutine base;
> +    void *stack;
> +    jmp_buf env;
> +} CoroutineUContext;
> +
> +/**
> + * Per-thread coroutine bookkeeping
> + */
> +typedef struct {
> +    /** Currently executing coroutine */
> +    Coroutine *current;
> +
> +    /** The default coroutine */
> +    CoroutineUContext leader;
> +} CoroutineThreadState;
> +
> +static pthread_key_t thread_state_key;
> +
> +/*
> + * the way to pass information to the signal handler (trampoline)
> + * It's not thread-safe, as can be seen, but there is no other simple way.
> + */
> +static volatile jmp_buf      tr_reenter;
> +static volatile sig_atomic_t tr_called;

Unlike pth, we can assume thread-local storage:   these should be placed 
in CoroutineThreadState and coroutine_get_thread_state() used to access 
them.

> +    /*
> +     * Preserve the SIGUSR1 signal state, block SIGUSR1,
> +     * and establish our signal handler. The signal will
> +     * later transfer control onto the signal stack.
> +     */

We're already using SIGUSR1.  Can you switch to SIGUSR2?

> +    sigemptyset(&sigs);
> +    sigaddset(&sigs, SIGUSR1);
> +    sigprocmask(SIG_BLOCK, &sigs, &osigs);

This should be pthread_sigmask.

> +    /*
> +     * Restore the old SIGUSR1 signal handler and mask
> +     */
> +    sigaction(SIGUSR1, &osa, NULL);
> +    sigprocmask(SIG_SETMASK, &osigs, NULL);
> +
> +    /*
> +     * Now enter the trampoline again, but this time not as a signal
> +     * handler. Instead we jump into it directly. The functionally
> +     * redundant ping-pong pointer arithmentic is neccessary to avoid
> +     * type-conversion warnings related to the `volatile' qualifier and
> +     * the fact that `jmp_buf' usually is an array type.
> +     */
> +    if (!setjmp(old_env)) {
> +        longjmp(*((jmp_buf *)&tr_reenter), 1);
> +    }

Use thread-local storage and you'll be able to remove this ugliness,
too.

Overall it looks good, however I think if it is good it should replace 
coroutine-ucontext.c altogether.  Other thoughts?

Paolo
Andreas Färber - Feb. 13, 2012, 3:57 p.m.
Am 13.02.2012 15:42, schrieb Alex Barcelo:
> This file is based in both coroutine-ucontext.c and
> pth_mctx.c (from the GNU Portable Threads library).
> 
> The mechanism used to change stacks is the sigaltstack
> function (variant 2 of the pth library).
> 
> Signed-off-by: Alex Barcelo <abarcelo@ac.upc.edu>
> ---
>  coroutine-sigaltstack.c |  337 +++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 337 insertions(+), 0 deletions(-)
>  create mode 100644 coroutine-sigaltstack.c
> 
> diff --git a/coroutine-sigaltstack.c b/coroutine-sigaltstack.c
> new file mode 100644
> index 0000000..1d4f26d
> --- /dev/null
> +++ b/coroutine-sigaltstack.c
> @@ -0,0 +1,337 @@
> +/*
> + * sigaltstack coroutine initialization code
> + *
> + * Copyright (C) 2006  Anthony Liguori <anthony@codemonkey.ws>
> + * Copyright (C) 2011  Kevin Wolf <kwolf@redhat.com>
> + * Copyright (C) 2012  Alex Barcelo <abarcelo@ac.upc.edu>
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.0 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +/*
> +** This file is partly based on pth_mctx.c, from the GNU Portable Threads
> +**  Copyright (c) 1999-2006 Ralf S. Engelschall <rse@engelschall.com>
> +**  Same license (version 2.1 or later)
> +*/

You should (need to?) use version 2.1 or later above then, too. You can
then simply move this snippet up and drop the "Same license ..." line.

Andreas
Alex Barcelo - Feb. 13, 2012, 4:11 p.m.
On Mon, Feb 13, 2012 at 16:57, Andreas Färber <afaerber@suse.de> wrote:
> You should (need to?) use version 2.1 or later above then, too. You can
> then simply move this snippet up and drop the "Same license ..." line.

I wanted to ask this, but it slipped my mind. So it's ok to change the
header to a newer GNU version. I will update it in the PATCH v2.
Andreas Färber - Feb. 13, 2012, 4:31 p.m.
Am 13.02.2012 17:11, schrieb Alex Barcelo:
> On Mon, Feb 13, 2012 at 16:57, Andreas Färber <afaerber@suse.de> wrote:
>> You should (need to?) use version 2.1 or later above then, too. You can
>> then simply move this snippet up and drop the "Same license ..." line.
> 
> I wanted to ask this, but it slipped my mind. So it's ok to change the
> header to a newer GNU version. I will update it in the PATCH v2.

IANAL. It looked like you were adding a new file, so you can choose any
Open Source license that's compatible with the code that gets linked
together. Coroutines need to be compatible with parts of QEMU under
GPLv2, so LGPLv2.1 is the newest we can go AFAIU.

Further, there is no GNU Lesser General Public License 2.0, only 2.1:
http://www.gnu.org/licenses/lgpl-2.1.html

2.0 was the GNU Library General Public License:
http://www.gnu.org/licenses/old-licenses/lgpl-2.0.html

So this may even just be a typo somewhere.

Andreas
Andreas Färber - Feb. 13, 2012, 10:20 p.m.
Am 13.02.2012 17:31, schrieb Andreas Färber:
> Further, there is no GNU Lesser General Public License 2.0, only 2.1:
> http://www.gnu.org/licenses/lgpl-2.1.html
> 
> 2.0 was the GNU Library General Public License:
> http://www.gnu.org/licenses/old-licenses/lgpl-2.0.html
> 
> So this may even just be a typo somewhere.

Actually FWIW, I noticed that the German version of the license page has
the s/Lesser/Library/ error while the English and Japanese ones do not:

http://www.gnu.org/licenses/old-licenses/lgpl-2.1.de.html

http://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html
http://www.gnu.org/licenses/old-licenses/lgpl-2.1.ja.html

Andreas
Stefan Hajnoczi - Feb. 14, 2012, 9:24 a.m.
On Mon, Feb 13, 2012 at 03:42:28PM +0100, Alex Barcelo wrote:
> +/*
> + * This is used as the signal handler. This is called with the brand new stack
> + * (thanks to sigaltstack). We have to return, given that this is a signal
> + * handler and the sigmask and some other things are changed.
> + */
> +static void coroutine_trampoline(int signal)
> +{
> +    CoroutineUContext *self;
> +    Coroutine *co;
> +
> +    /* This will break on multithread or in any race condition */
> +    self = ptr_for_handler;
> +    tr_called = 1;
> +    co = &self->base;
> +
> +    /*
> +     * Here we have to do a bit of a ping pong between the caller, given that
> +     * this is a signal handler and we have to do a return "soon". Then the
> +     * caller can reestablish everything and do a longjmp here again.
> +     */
> +    if (!setjmp(*((jmp_buf *)&tr_reenter))) {
> +        return;
> +    }

setjmp() followed by return is usually bad.  We're relying on the fact
that the return code path here does not clobber local variables 'self'
and 'co'.  Can't we longjmp out back to the coroutine_new() function
instead?

> +static Coroutine *coroutine_new(void)
> +{
> +    const size_t stack_size = 1 << 20;

This reminds me of a recent observation that QEMU is using a lot of
memory in a bursty pattern.  I wonder if coroutine stacks are the cause
for this behavior - they are pretty big.  Not a specific problem with
your implementation since we do the same for other coroutine
implementations.

> +    /*
> +     * Preserve the SIGUSR1 signal state, block SIGUSR1,
> +     * and establish our signal handler. The signal will
> +     * later transfer control onto the signal stack.
> +     */
> +    sigemptyset(&sigs);
> +    sigaddset(&sigs, SIGUSR1);
> +    sigprocmask(SIG_BLOCK, &sigs, &osigs);
> +    sa.sa_handler = coroutine_trampoline;
> +    sigfillset(&sa.sa_mask);
> +    sa.sa_flags = SA_ONSTACK;
> +    if (sigaction(SIGUSR1, &sa, &osa) != 0) {
> +        abort();
> +    }
> +
> +    /*
> +     * Set the new stack.
> +     */
> +    ss.ss_sp = co->stack;
> +    ss.ss_size = stack_size;
> +    ss.ss_flags = 0;
> +    if (sigaltstack(&ss, &oss) < 0) {
> +        abort();
> +    }
> +
> +    /*
> +     * Now transfer control onto the signal stack and set it up.
> +     * It will return immediately via "return" after the setjmp()
> +     * was performed. Be careful here with race conditions.  The
> +     * signal can be delivered the first time sigsuspend() is
> +     * called.
> +     */
> +    tr_called = 0;
> +    kill(getpid(), SIGUSR1);
> +    sigfillset(&sigs);
> +    sigdelset(&sigs, SIGUSR1);
> +    while (!tr_called) {
> +        sigsuspend(&sigs);
> +    }
> +
> +    /*
> +     * Inform the system that we are back off the signal stack by
> +     * removing the alternative signal stack. Be careful here: It
> +     * first has to be disabled, before it can be removed.
> +     */
> +    sigaltstack(NULL, &ss);

What happens when a vcpu thread creates a coroutine while another QEMU
thread raises SIG_IPI?  The SIG_IPI will be handled on the alternate
signal stack and could corrupt the coroutine if the signal is handled
between sigsuspend(2) and sigaltstack(2).

Stefan
Paolo Bonzini - Feb. 14, 2012, 9:50 a.m.
On 02/14/2012 10:24 AM, Stefan Hajnoczi wrote:
> setjmp() followed by return is usually bad.  We're relying on the fact
> that the return code path here does not clobber local variables 'self'
> and 'co'.  Can't we longjmp out back to the coroutine_new() function
> instead?

http://www.gnu.org/software/pth/rse-pmt.ps covers this.  Basically, this 
turned out to be more portable than longjmp from a signal handler.

Paolo
Alex Barcelo - Feb. 14, 2012, 11:53 a.m.
On Tue, Feb 14, 2012 at 10:24, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Mon, Feb 13, 2012 at 03:42:28PM +0100, Alex Barcelo wrote:
>> +    if (!setjmp(*((jmp_buf *)&tr_reenter))) {
>> +        return;
>> +    }
>
> setjmp() followed by return is usually bad.  We're relying on the fact
> that the return code path here does not clobber local variables 'self'
> and 'co'.  Can't we longjmp out back to the coroutine_new() function
> instead?

Paolo has answered this question. But some lighter reading: man sigreturn

Somebody has to call sigreturn. The easiest way is to do a return from
the signal handler. Calling manually sigreturn it's... tricky (and "It
should *never* be called directly.", man's page dixit). Knowing that
qemu has to work in lots of platforms, I wouldn't bet on that. Or on
any clever procmask direct manipulation.

>> +static Coroutine *coroutine_new(void)
>> +{
>> +    const size_t stack_size = 1 << 20;
>
> This reminds me of a recent observation that QEMU is using a lot of
> memory in a bursty pattern.  I wonder if coroutine stacks are the cause
> for this behavior - they are pretty big.  Not a specific problem with
> your implementation since we do the same for other coroutine
> implementations.

Agree. But few coroutines are created (one or two in an average
qemu-system execution, as I have checked). So... well, not sure if
this is our guilty malloc. Anyway, as you say, the behaviour is the
same in ucontext than sigaltstack, so it should not be a matter for
this series of patches.

>> +    /*
>> +     * Preserve the SIGUSR1 signal state, block SIGUSR1,
>> +     * and establish our signal handler. The signal will
>> +     * later transfer control onto the signal stack.
>> +     */
>> +    sigemptyset(&sigs);
>> +    sigaddset(&sigs, SIGUSR1);
>> +    sigprocmask(SIG_BLOCK, &sigs, &osigs);
>> +    sa.sa_handler = coroutine_trampoline;
>> +    sigfillset(&sa.sa_mask);
>> +    sa.sa_flags = SA_ONSTACK;
>> +    if (sigaction(SIGUSR1, &sa, &osa) != 0) {
>> +        abort();
>> +    }
>> +
>> +    /*
>> +     * Set the new stack.
>> +     */
>> +    ss.ss_sp = co->stack;
>> +    ss.ss_size = stack_size;
>> +    ss.ss_flags = 0;
>> +    if (sigaltstack(&ss, &oss) < 0) {
>> +        abort();
>> +    }
>> +
>> +    /*
>> +     * Now transfer control onto the signal stack and set it up.
>> +     * It will return immediately via "return" after the setjmp()
>> +     * was performed. Be careful here with race conditions.  The
>> +     * signal can be delivered the first time sigsuspend() is
>> +     * called.
>> +     */
>> +    tr_called = 0;
>> +    kill(getpid(), SIGUSR1);
>> +    sigfillset(&sigs);
>> +    sigdelset(&sigs, SIGUSR1);
>> +    while (!tr_called) {
>> +        sigsuspend(&sigs);
>> +    }
>> +
>> +    /*
>> +     * Inform the system that we are back off the signal stack by
>> +     * removing the alternative signal stack. Be careful here: It
>> +     * first has to be disabled, before it can be removed.
>> +     */
>> +    sigaltstack(NULL, &ss);
>
> What happens when a vcpu thread creates a coroutine while another QEMU
> thread raises SIG_IPI?  The SIG_IPI will be handled on the alternate
> signal stack

mmm no, it won't. The sigaction is set for the SIGUSR1 only (yes I
have to change it to sigusr2, the V2 will have this changed). And only
this signal will be handled on an alternate stack (the sa.sa_flags is
the responsible).

I'm not really sure about that, did I miss something?

> and could corrupt the coroutine if the signal is handled
> between sigsuspend(2) and sigaltstack(2).
> Stefan
Stefan Hajnoczi - Feb. 14, 2012, 12:20 p.m.
On Tue, Feb 14, 2012 at 11:53 AM, Alex Barcelo <abarcelo@ac.upc.edu> wrote:
> On Tue, Feb 14, 2012 at 10:24, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>> On Mon, Feb 13, 2012 at 03:42:28PM +0100, Alex Barcelo wrote:
>>> +    /*
>>> +     * Preserve the SIGUSR1 signal state, block SIGUSR1,
>>> +     * and establish our signal handler. The signal will
>>> +     * later transfer control onto the signal stack.
>>> +     */
>>> +    sigemptyset(&sigs);
>>> +    sigaddset(&sigs, SIGUSR1);
>>> +    sigprocmask(SIG_BLOCK, &sigs, &osigs);
>>> +    sa.sa_handler = coroutine_trampoline;
>>> +    sigfillset(&sa.sa_mask);
>>> +    sa.sa_flags = SA_ONSTACK;
>>> +    if (sigaction(SIGUSR1, &sa, &osa) != 0) {
>>> +        abort();
>>> +    }
>>> +
>>> +    /*
>>> +     * Set the new stack.
>>> +     */
>>> +    ss.ss_sp = co->stack;
>>> +    ss.ss_size = stack_size;
>>> +    ss.ss_flags = 0;
>>> +    if (sigaltstack(&ss, &oss) < 0) {
>>> +        abort();
>>> +    }
>>> +
>>> +    /*
>>> +     * Now transfer control onto the signal stack and set it up.
>>> +     * It will return immediately via "return" after the setjmp()
>>> +     * was performed. Be careful here with race conditions.  The
>>> +     * signal can be delivered the first time sigsuspend() is
>>> +     * called.
>>> +     */
>>> +    tr_called = 0;
>>> +    kill(getpid(), SIGUSR1);
>>> +    sigfillset(&sigs);
>>> +    sigdelset(&sigs, SIGUSR1);
>>> +    while (!tr_called) {
>>> +        sigsuspend(&sigs);
>>> +    }
>>> +
>>> +    /*
>>> +     * Inform the system that we are back off the signal stack by
>>> +     * removing the alternative signal stack. Be careful here: It
>>> +     * first has to be disabled, before it can be removed.
>>> +     */
>>> +    sigaltstack(NULL, &ss);
>>
>> What happens when a vcpu thread creates a coroutine while another QEMU
>> thread raises SIG_IPI?  The SIG_IPI will be handled on the alternate
>> signal stack
>
> mmm no, it won't. The sigaction is set for the SIGUSR1 only (yes I
> have to change it to sigusr2, the V2 will have this changed). And only
> this signal will be handled on an alternate stack (the sa.sa_flags is
> the responsible).
>
> I'm not really sure about that, did I miss something?

What I meant is that there are other signals handlers installed and
the signals will be unblocked between the time when sigsuspend() has
returned and before sigaltstack(NULL, &ss) is executed.  This seems
like a race condition to me.

Stefan
Stefan Hajnoczi - Feb. 14, 2012, 12:25 p.m.
On Tue, Feb 14, 2012 at 9:50 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> On 02/14/2012 10:24 AM, Stefan Hajnoczi wrote:
>>
>> setjmp() followed by return is usually bad.  We're relying on the fact
>> that the return code path here does not clobber local variables 'self'
>> and 'co'.  Can't we longjmp out back to the coroutine_new() function
>> instead?
>
>
> http://www.gnu.org/software/pth/rse-pmt.ps covers this.  Basically, this
> turned out to be more portable than longjmp from a signal handler.

I suggest adding a comment explaining this, since this is normally not
an okay thing to do.

Stefan
Alex Barcelo - Feb. 14, 2012, 1:21 p.m.
On Tue, Feb 14, 2012 at 13:20, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Tue, Feb 14, 2012 at 11:53 AM, Alex Barcelo <abarcelo@ac.upc.edu> wrote:
>> On Tue, Feb 14, 2012 at 10:24, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>>> (...)
>>> What happens when a vcpu thread creates a coroutine while another QEMU
>>> thread raises SIG_IPI?  The SIG_IPI will be handled on the alternate
>>> signal stack
>>
>> mmm no, it won't. The sigaction is set for the SIGUSR1 only (yes I
>> have to change it to sigusr2, the V2 will have this changed). And only
>> this signal will be handled on an alternate stack (the sa.sa_flags is
>> the responsible).
>>
>> I'm not really sure about that, did I miss something?
>
> What I meant is that there are other signals handlers installed and
> the signals will be unblocked between the time when sigsuspend() has
> returned and before sigaltstack(NULL, &ss) is executed.  This seems
> like a race condition to me.

But nobody (in qemu) uses sa.sa_flags ONSTACK, so I see no problem. If
a signal is delivered, it will be attended as it should. If there is
some other sigaltstack (I looked for it, and found nothing) then you
are right. But if no other signal uses sigaltstack, then there is no
race condition between the sigaltstack and the sigsuspend. And we only
use a signal that should not be used anywhere else (I have to change
that, seems that SIGUSR1 is being used in some point). So no conflict
here.

Have I understood you? I'm not sure if this is what you were talking
about... if not, please, explain a bit more the race condition and the
exact problem.
Stefan Hajnoczi - Feb. 14, 2012, 3:12 p.m.
On Tue, Feb 14, 2012 at 1:21 PM, Alex Barcelo <abarcelo@ac.upc.edu> wrote:
> On Tue, Feb 14, 2012 at 13:20, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>> On Tue, Feb 14, 2012 at 11:53 AM, Alex Barcelo <abarcelo@ac.upc.edu> wrote:
>>> On Tue, Feb 14, 2012 at 10:24, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>>>> (...)
>>>> What happens when a vcpu thread creates a coroutine while another QEMU
>>>> thread raises SIG_IPI?  The SIG_IPI will be handled on the alternate
>>>> signal stack
>>>
>>> mmm no, it won't. The sigaction is set for the SIGUSR1 only (yes I
>>> have to change it to sigusr2, the V2 will have this changed). And only
>>> this signal will be handled on an alternate stack (the sa.sa_flags is
>>> the responsible).
>>>
>>> I'm not really sure about that, did I miss something?
>>
>> What I meant is that there are other signals handlers installed and
>> the signals will be unblocked between the time when sigsuspend() has
>> returned and before sigaltstack(NULL, &ss) is executed.  This seems
>> like a race condition to me.
>
> But nobody (in qemu) uses sa.sa_flags ONSTACK, so I see no problem. If
> a signal is delivered, it will be attended as it should. If there is
> some other sigaltstack (I looked for it, and found nothing) then you
> are right. But if no other signal uses sigaltstack, then there is no
> race condition between the sigaltstack and the sigsuspend. And we only
> use a signal that should not be used anywhere else (I have to change
> that, seems that SIGUSR1 is being used in some point). So no conflict
> here.
>
> Have I understood you? I'm not sure if this is what you were talking
> about... if not, please, explain a bit more the race condition and the
> exact problem.

No, you are right.  It's not a problem because SA_ONSTACK isn't used
elsewhere.  Sorry, I missed that you need to set it explicitly on a
per-signal basis.

There's no issue with other signals here.

Stefan
Peter Maydell - March 6, 2012, 9:14 p.m.
On 14 February 2012 09:50, Paolo Bonzini <pbonzini@redhat.com> wrote:
> On 02/14/2012 10:24 AM, Stefan Hajnoczi wrote:
>>
>> setjmp() followed by return is usually bad.  We're relying on the fact
>> that the return code path here does not clobber local variables 'self'
>> and 'co'.  Can't we longjmp out back to the coroutine_new() function
>> instead?
>
> http://www.gnu.org/software/pth/rse-pmt.ps covers this.  Basically, this
> turned out to be more portable than longjmp from a signal handler.

...but still not as portable as you might like. See
https://bugs.launchpad.net/ubuntu/+source/gnupg2/+bug/599862
 -- if you compile with a newer glibc and FORTIFY_SOURCE (which
is default in ubuntu IIRC) then it will complain about the
longjump-down-the-stack that pth and this code both do.

-- PMM

Patch

diff --git a/coroutine-sigaltstack.c b/coroutine-sigaltstack.c
new file mode 100644
index 0000000..1d4f26d
--- /dev/null
+++ b/coroutine-sigaltstack.c
@@ -0,0 +1,337 @@ 
+/*
+ * sigaltstack coroutine initialization code
+ *
+ * Copyright (C) 2006  Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011  Kevin Wolf <kwolf@redhat.com>
+ * Copyright (C) 2012  Alex Barcelo <abarcelo@ac.upc.edu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.0 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+** This file is partly based on pth_mctx.c, from the GNU Portable Threads
+**  Copyright (c) 1999-2006 Ralf S. Engelschall <rse@engelschall.com>
+**  Same license (version 2.1 or later)
+*/
+
+/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
+#ifdef _FORTIFY_SOURCE
+#undef _FORTIFY_SOURCE
+#endif
+#include <stdlib.h>
+#include <setjmp.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <signal.h>
+#include "qemu-common.h"
+#include "qemu-coroutine-int.h"
+
+enum {
+    /* Maximum free pool size prevents holding too many freed coroutines */
+    POOL_MAX_SIZE = 64,
+};
+
+/** Free list to speed up creation */
+static QLIST_HEAD(, Coroutine) pool = QLIST_HEAD_INITIALIZER(pool);
+static unsigned int pool_size;
+
+typedef struct {
+    Coroutine base;
+    void *stack;
+    jmp_buf env;
+} CoroutineUContext;
+
+/**
+ * Per-thread coroutine bookkeeping
+ */
+typedef struct {
+    /** Currently executing coroutine */
+    Coroutine *current;
+
+    /** The default coroutine */
+    CoroutineUContext leader;
+} CoroutineThreadState;
+
+static pthread_key_t thread_state_key;
+
+/*
+ * the way to pass information to the signal handler (trampoline)
+ * It's not thread-safe, as can be seen, but there is no other simple way.
+ */
+static volatile jmp_buf      tr_reenter;
+static volatile sig_atomic_t tr_called;
+static void *ptr_for_handler;
+
+static CoroutineThreadState *coroutine_get_thread_state(void)
+{
+    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+    if (!s) {
+        s = g_malloc0(sizeof(*s));
+        s->current = &s->leader.base;
+        pthread_setspecific(thread_state_key, s);
+    }
+    return s;
+}
+
+static void qemu_coroutine_thread_cleanup(void *opaque)
+{
+    CoroutineThreadState *s = opaque;
+
+    g_free(s);
+}
+
+static void __attribute__((destructor)) coroutine_cleanup(void)
+{
+    Coroutine *co;
+    Coroutine *tmp;
+
+    QLIST_FOREACH_SAFE(co, &pool, pool_next, tmp) {
+        g_free(DO_UPCAST(CoroutineUContext, base, co)->stack);
+        g_free(co);
+    }
+}
+
+static void __attribute__((constructor)) coroutine_init(void)
+{
+    int ret;
+
+    ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup);
+    if (ret != 0) {
+        fprintf(stderr, "unable to create leader key: %s\n", strerror(errno));
+        abort();
+    }
+}
+
+/* "boot" function
+ * This is what starts the coroutine, is called from the trampoline
+ * (from the signal handler when it is not signal handling, read ahead
+ * for more information).
+ */
+static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co)
+{
+    /* Initialize longjmp environment and switch back the caller */
+    if (!setjmp(self->env)) {
+        longjmp(*(jmp_buf *)co->entry_arg, 1);
+    }
+
+    while (true) {
+        co->entry(co->entry_arg);
+        qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+    }
+}
+
+/*
+ * This is used as the signal handler. This is called with the brand new stack
+ * (thanks to sigaltstack). We have to return, given that this is a signal
+ * handler and the sigmask and some other things are changed.
+ */
+static void coroutine_trampoline(int signal)
+{
+    CoroutineUContext *self;
+    Coroutine *co;
+
+    /* This will break on multithread or in any race condition */
+    self = ptr_for_handler;
+    tr_called = 1;
+    co = &self->base;
+
+    /*
+     * Here we have to do a bit of a ping pong between the caller, given that
+     * this is a signal handler and we have to do a return "soon". Then the
+     * caller can reestablish everything and do a longjmp here again.
+     */
+    if (!setjmp(*((jmp_buf *)&tr_reenter))) {
+        return;
+    }
+
+    /*
+     * Ok, the caller has longjmp'ed back to us, so now prepare
+     * us for the real machine state switching. We have to jump
+     * into another function here to get a new stack context for
+     * the auto variables (which have to be auto-variables
+     * because the start of the thread happens later). Else with
+     * PIC (i.e. Position Independent Code which is used when PTH
+     * is built as a shared library) most platforms would
+     * horrible core dump as experience showed.
+     */
+    coroutine_bootstrap(self, co);
+}
+
+static Coroutine *coroutine_new(void)
+{
+    const size_t stack_size = 1 << 20;
+    CoroutineUContext *co;
+    struct sigaction sa;
+    struct sigaction osa;
+    struct sigaltstack ss;
+    struct sigaltstack oss;
+    sigset_t sigs;
+    sigset_t osigs;
+    jmp_buf old_env;
+
+    /* The way to manipulate stack is with the sigaltstack function. We
+     * prepare a stack, with it delivering a signal to ourselves and then
+     * put setjmp/longjmp where needed.
+     * This has been done keeping coroutine-ucontext as a model and with the
+     * pth ideas (GNU Portable Threads). See coroutine-ucontext for the basics
+     * of the coroutines and see pth_mctx.c (from the pth project) for the
+     * sigaltstack way of manipulating stacks.
+     */
+
+    co = g_malloc0(sizeof(*co));
+    co->stack = g_malloc(stack_size);
+    co->base.entry_arg = &old_env; /* stash away our jmp_buf */
+
+    ptr_for_handler = co;
+
+    /*
+     * Preserve the SIGUSR1 signal state, block SIGUSR1,
+     * and establish our signal handler. The signal will
+     * later transfer control onto the signal stack.
+     */
+    sigemptyset(&sigs);
+    sigaddset(&sigs, SIGUSR1);
+    sigprocmask(SIG_BLOCK, &sigs, &osigs);
+    sa.sa_handler = coroutine_trampoline;
+    sigfillset(&sa.sa_mask);
+    sa.sa_flags = SA_ONSTACK;
+    if (sigaction(SIGUSR1, &sa, &osa) != 0) {
+        abort();
+    }
+
+    /*
+     * Set the new stack.
+     */
+    ss.ss_sp = co->stack;
+    ss.ss_size = stack_size;
+    ss.ss_flags = 0;
+    if (sigaltstack(&ss, &oss) < 0) {
+        abort();
+    }
+
+    /*
+     * Now transfer control onto the signal stack and set it up.
+     * It will return immediately via "return" after the setjmp()
+     * was performed. Be careful here with race conditions.  The
+     * signal can be delivered the first time sigsuspend() is
+     * called.
+     */
+    tr_called = 0;
+    kill(getpid(), SIGUSR1);
+    sigfillset(&sigs);
+    sigdelset(&sigs, SIGUSR1);
+    while (!tr_called) {
+        sigsuspend(&sigs);
+    }
+
+    /*
+     * Inform the system that we are back off the signal stack by
+     * removing the alternative signal stack. Be careful here: It
+     * first has to be disabled, before it can be removed.
+     */
+    sigaltstack(NULL, &ss);
+    ss.ss_flags = SS_DISABLE;
+    if (sigaltstack(&ss, NULL) < 0) {
+        abort();
+    }
+    sigaltstack(NULL, &ss);
+    if (!(oss.ss_flags & SS_DISABLE)) {
+        sigaltstack(&oss, NULL);
+    }
+
+    /*
+     * Restore the old SIGUSR1 signal handler and mask
+     */
+    sigaction(SIGUSR1, &osa, NULL);
+    sigprocmask(SIG_SETMASK, &osigs, NULL);
+
+    /*
+     * Now enter the trampoline again, but this time not as a signal
+     * handler. Instead we jump into it directly. The functionally
+     * redundant ping-pong pointer arithmentic is neccessary to avoid
+     * type-conversion warnings related to the `volatile' qualifier and
+     * the fact that `jmp_buf' usually is an array type.
+     */
+    if (!setjmp(old_env)) {
+        longjmp(*((jmp_buf *)&tr_reenter), 1);
+    }
+
+    /*
+     * Ok, we returned again, so now we're finished
+     */
+
+    return &co->base;
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+    Coroutine *co;
+
+    co = QLIST_FIRST(&pool);
+    if (co) {
+        QLIST_REMOVE(co, pool_next);
+        pool_size--;
+    } else {
+        co = coroutine_new();
+    }
+    return co;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+    CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
+
+    if (pool_size < POOL_MAX_SIZE) {
+        QLIST_INSERT_HEAD(&pool, &co->base, pool_next);
+        co->base.caller = NULL;
+        pool_size++;
+        return;
+    }
+
+    g_free(co->stack);
+    g_free(co);
+}
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                                      CoroutineAction action)
+{
+    CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
+    CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
+    CoroutineThreadState *s = coroutine_get_thread_state();
+    int ret;
+
+    s->current = to_;
+
+    ret = setjmp(from->env);
+    if (ret == 0) {
+        longjmp(to->env, action);
+    }
+    return ret;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+    CoroutineThreadState *s = coroutine_get_thread_state();
+
+    return s->current;
+}
+
+bool qemu_in_coroutine(void)
+{
+    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+    return s && s->current->caller;
+}
+