Message ID | 20171128154350.21504-2-kwolf@redhat.com |
---|---|
State | New |
Headers | show |
Series | Fix qemu-iotests failures | expand |
On Tue, Nov 28, 2017 at 04:43:47PM +0100, Kevin Wolf wrote: > This reverts commit 6133b39f3c36623425a6ede9e89d93175fde15cd. > > The commit checked conditions that would expose a bug, but there is no > real reason to forbid them apart from the bug, which we'll fix in a > minute. > > In particular, reentering a coroutine during co_aio_sleep_ns() is fine; > the function is explicitly written to allow this. > > aio_co_schedule() can indeed conflict with direct coroutine invocations, > but this is exactky what we want to fix, so remove that check again, s/exactky/exactly/ Reviewed-by: Jeff Cody <jcody@redhat.com> > too. > > Signed-off-by: Kevin Wolf <kwolf@redhat.com> > --- > include/qemu/coroutine_int.h | 13 +++---------- > util/async.c | 13 ------------- > util/qemu-coroutine-sleep.c | 12 ------------ > util/qemu-coroutine.c | 14 -------------- > 4 files changed, 3 insertions(+), 49 deletions(-) > > diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h > index 59e8406398..cb98892bba 100644 > --- a/include/qemu/coroutine_int.h > +++ b/include/qemu/coroutine_int.h > @@ -46,21 +46,14 @@ struct Coroutine { > > size_t locks_held; > > - /* Only used when the coroutine has yielded. */ > - AioContext *ctx; > - > - /* Used to catch and abort on illegal co-routine entry. > - * Will contain the name of the function that had first > - * scheduled the coroutine. */ > - const char *scheduled; > - > - QSIMPLEQ_ENTRY(Coroutine) co_queue_next; > - > /* Coroutines that should be woken up when we yield or terminate. > * Only used when the coroutine is running. > */ > QSIMPLEQ_HEAD(, Coroutine) co_queue_wakeup; > > + /* Only used when the coroutine has yielded. */ > + AioContext *ctx; > + QSIMPLEQ_ENTRY(Coroutine) co_queue_next; > QSLIST_ENTRY(Coroutine) co_scheduled_next; > }; > > diff --git a/util/async.c b/util/async.c > index 4dd9d95a9e..0e1bd8780a 100644 > --- a/util/async.c > +++ b/util/async.c > @@ -388,9 +388,6 @@ static void co_schedule_bh_cb(void *opaque) > QSLIST_REMOVE_HEAD(&straight, co_scheduled_next); > trace_aio_co_schedule_bh_cb(ctx, co); > aio_context_acquire(ctx); > - > - /* Protected by write barrier in qemu_aio_coroutine_enter */ > - atomic_set(&co->scheduled, NULL); > qemu_coroutine_enter(co); > aio_context_release(ctx); > } > @@ -441,16 +438,6 @@ fail: > void aio_co_schedule(AioContext *ctx, Coroutine *co) > { > trace_aio_co_schedule(ctx, co); > - const char *scheduled = atomic_cmpxchg(&co->scheduled, NULL, > - __func__); > - > - if (scheduled) { > - fprintf(stderr, > - "%s: Co-routine was already scheduled in '%s'\n", > - __func__, scheduled); > - abort(); > - } > - > QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines, > co, co_scheduled_next); > qemu_bh_schedule(ctx->co_schedule_bh); > diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c > index 254349cdbb..9c5655041b 100644 > --- a/util/qemu-coroutine-sleep.c > +++ b/util/qemu-coroutine-sleep.c > @@ -13,7 +13,6 @@ > > #include "qemu/osdep.h" > #include "qemu/coroutine.h" > -#include "qemu/coroutine_int.h" > #include "qemu/timer.h" > #include "block/aio.h" > > @@ -26,8 +25,6 @@ static void co_sleep_cb(void *opaque) > { > CoSleepCB *sleep_cb = opaque; > > - /* Write of schedule protected by barrier write in aio_co_schedule */ > - atomic_set(&sleep_cb->co->scheduled, NULL); > aio_co_wake(sleep_cb->co); > } > > @@ -37,15 +34,6 @@ void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type, > CoSleepCB sleep_cb = { > .co = qemu_coroutine_self(), > }; > - > - const char *scheduled = atomic_cmpxchg(&sleep_cb.co->scheduled, NULL, > - __func__); > - if (scheduled) { > - fprintf(stderr, > - "%s: Co-routine was already scheduled in '%s'\n", > - __func__, scheduled); > - abort(); > - } > sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &sleep_cb); > timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns); > qemu_coroutine_yield(); > diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c > index 9eff7fd450..d6095c1d5a 100644 > --- a/util/qemu-coroutine.c > +++ b/util/qemu-coroutine.c > @@ -107,22 +107,8 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co) > Coroutine *self = qemu_coroutine_self(); > CoroutineAction ret; > > - /* Cannot rely on the read barrier for co in aio_co_wake(), as there are > - * callers outside of aio_co_wake() */ > - const char *scheduled = atomic_mb_read(&co->scheduled); > - > trace_qemu_aio_coroutine_enter(ctx, self, co, co->entry_arg); > > - /* if the Coroutine has already been scheduled, entering it again will > - * cause us to enter it twice, potentially even after the coroutine has > - * been deleted */ > - if (scheduled) { > - fprintf(stderr, > - "%s: Co-routine was already scheduled in '%s'\n", > - __func__, scheduled); > - abort(); > - } > - > if (co->caller) { > fprintf(stderr, "Co-routine re-entered recursively\n"); > abort(); > -- > 2.13.6 >
On 28/11/2017 16:43, Kevin Wolf wrote: > This reverts commit 6133b39f3c36623425a6ede9e89d93175fde15cd. > > The commit checked conditions that would expose a bug, but there is no > real reason to forbid them apart from the bug, which we'll fix in a > minute. > > In particular, reentering a coroutine during co_aio_sleep_ns() is fine; > the function is explicitly written to allow this. This is true. > aio_co_schedule() can indeed conflict with direct coroutine invocations, > but this is exactky what we want to fix, so remove that check again, > too. I'm not sure this is a good idea, as I answered in patch 3. It can also conflict badly with another aio_co_schedule(). Your patch here removes the assertion in this case, and patch 3 makes it easier to get into the situation where two aio_co_schedule()s conflict with each other. For example, say you have a coroutine that calls aio_co_schedule on itself, like while (true) { aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self()); } If somebody else calls qemu_coroutine_enter on this coroutine, *that* is the bug. These patches would just cause some random corruption or (perhaps worse) hang. Paolo
Am 28.11.2017 um 17:18 hat Paolo Bonzini geschrieben: > On 28/11/2017 16:43, Kevin Wolf wrote: > > This reverts commit 6133b39f3c36623425a6ede9e89d93175fde15cd. > > > > The commit checked conditions that would expose a bug, but there is no > > real reason to forbid them apart from the bug, which we'll fix in a > > minute. > > > > In particular, reentering a coroutine during co_aio_sleep_ns() is fine; > > the function is explicitly written to allow this. > > This is true. > > > aio_co_schedule() can indeed conflict with direct coroutine invocations, > > but this is exactky what we want to fix, so remove that check again, > > too. > > I'm not sure this is a good idea, as I answered in patch 3. > > It can also conflict badly with another aio_co_schedule(). Your patch > here removes the assertion in this case, and patch 3 makes it easier to > get into the situation where two aio_co_schedule()s conflict with each > other. I don't see how they conflict. If the second aio_co_schedule() comes before the coroutine is actually entered, they are effectively simply merged into a single one. Which is exactly what was intended. > For example, say you have a coroutine that calls aio_co_schedule on > itself, like > > while (true) { > aio_co_schedule(qemu_get_current_aio_context(), > qemu_coroutine_self()); > } > > If somebody else calls qemu_coroutine_enter on this coroutine, *that* is > the bug. These patches would just cause some random corruption or > (perhaps worse) hang. Obviously not every coroutine is made to be reentered from multiple places, so for some cases it just might not make a whole lot of sense. Coroutines that are made for it generally are one of the types I explained in the commit message of patch 3. But anyway, how would this cause corruption or a hang (apart from the fact that this example doesn't have any state that could even be corrupted)? The external qemu_coroutine_enter() would just replace the scheduled coroutine call, so the coroutine wouldn't even notice that it was called from qemu_coroutine_enter() rather than its own scheduled call. Kevin
On 28/11/2017 17:37, Kevin Wolf wrote: >> >> It can also conflict badly with another aio_co_schedule(). Your patch >> here removes the assertion in this case, and patch 3 makes it easier to >> get into the situation where two aio_co_schedule()s conflict with each >> other. > I don't see how they conflict. If the second aio_co_schedule() comes > before the coroutine is actually entered, they are effectively simply > merged into a single one. Which is exactly what was intended. Suppose you have ctx->scheduled_coroutines | '---> co | '---> NULL Then aio_co_schedule(ctx, co) does QSLIST_INSERT_HEAD_ATOMIC, which is /* On entry, ctx->scheduled_coroutines == co. */ co->next = ctx->scheduled_coroutines change ctx->scheduled_coroutines from co->next to co This results in a loop: ctx->scheduled_coroutines | '---> co <-. | | '----' This is an obvious hang due to list corruption. In other more complicated cases it can skip a wakeup, which is a more subtle kind of hang. You can also get memory corruption if the coroutine terminates (and is freed) before aio_co_schedule executes. Basically, once you do aio_co_schedule or aio_co_wake the coroutine is not any more yours. It's owned by the context that will run it and you should not do anything with it. The same is true for co_aio_sleep_ns. Just because in practice it works (and it seemed like a clever idea at the time) it doesn't mean it *is* a good idea... Paolo
Am 28.11.2017 um 18:01 hat Paolo Bonzini geschrieben: > On 28/11/2017 17:37, Kevin Wolf wrote: > >> > >> It can also conflict badly with another aio_co_schedule(). Your patch > >> here removes the assertion in this case, and patch 3 makes it easier to > >> get into the situation where two aio_co_schedule()s conflict with each > >> other. > > I don't see how they conflict. If the second aio_co_schedule() comes > > before the coroutine is actually entered, they are effectively simply > > merged into a single one. Which is exactly what was intended. > > Suppose you have > > ctx->scheduled_coroutines > | > '---> co > | > '---> NULL > > Then aio_co_schedule(ctx, co) does QSLIST_INSERT_HEAD_ATOMIC, which is > > /* On entry, ctx->scheduled_coroutines == co. */ > co->next = ctx->scheduled_coroutines > change ctx->scheduled_coroutines from co->next to co > > This results in a loop: > > ctx->scheduled_coroutines > | > '---> co <-. > | | > '----' > > This is an obvious hang due to list corruption. In other more > complicated cases it can skip a wakeup, which is a more subtle kind of > hang. You can also get memory corruption if the coroutine terminates > (and is freed) before aio_co_schedule executes. Okay, I can see that. I thought you were talking about the logic introduced by this series, but you're actually talking about preexisting behaviour which limits the usefulness of the patches. > Basically, once you do aio_co_schedule or aio_co_wake the coroutine is > not any more yours. It's owned by the context that will run it and you > should not do anything with it. > > The same is true for co_aio_sleep_ns. Just because in practice it works > (and it seemed like a clever idea at the time) it doesn't mean it *is* a > good idea... Well, but that poses the question how you can implement any coroutine that can be reentered from more than one place. Not being able to do that would be a severe restriction. For example, take quorum, which handles requests by spawning a coroutine for every child and then yielding until acb->count == s->num_children. This means that the main request coroutine can be reentered from the child request coroutines in any order and timing. What saves us currently is that they are all in the same AioContext, so we won't actually end up in aio_co_schedule(), but I'm sure that sooner or later we'll find cases where we're waiting for several workers that are spread across different I/O threads. I don't think "don't do that" is a good answer to this. And yes, reentering co_aio_sleep_ns() early is a real requirement, too. The test case that used speed=1 and would have waited a few hours before actually cancelling the job is an extreme example, but even just delaying cancellation for a second is bad if this is a second of migration downtime. Kevin
On Tue, Nov 28, 2017 at 06:19:31PM +0100, Kevin Wolf wrote: > Am 28.11.2017 um 18:01 hat Paolo Bonzini geschrieben: > > On 28/11/2017 17:37, Kevin Wolf wrote: > > >> > > >> It can also conflict badly with another aio_co_schedule(). Your patch > > >> here removes the assertion in this case, and patch 3 makes it easier to > > >> get into the situation where two aio_co_schedule()s conflict with each > > >> other. > > > I don't see how they conflict. If the second aio_co_schedule() comes > > > before the coroutine is actually entered, they are effectively simply > > > merged into a single one. Which is exactly what was intended. > > > > Suppose you have > > > > ctx->scheduled_coroutines > > | > > '---> co > > | > > '---> NULL > > > > Then aio_co_schedule(ctx, co) does QSLIST_INSERT_HEAD_ATOMIC, which is > > > > /* On entry, ctx->scheduled_coroutines == co. */ > > co->next = ctx->scheduled_coroutines > > change ctx->scheduled_coroutines from co->next to co > > > > This results in a loop: > > > > ctx->scheduled_coroutines > > | > > '---> co <-. > > | | > > '----' > > > > This is an obvious hang due to list corruption. In other more > > complicated cases it can skip a wakeup, which is a more subtle kind of > > hang. You can also get memory corruption if the coroutine terminates > > (and is freed) before aio_co_schedule executes. > > Okay, I can see that. I thought you were talking about the logic > introduced by this series, but you're actually talking about preexisting > behaviour which limits the usefulness of the patches. > > > Basically, once you do aio_co_schedule or aio_co_wake the coroutine is > > not any more yours. It's owned by the context that will run it and you > > should not do anything with it. > > > > The same is true for co_aio_sleep_ns. Just because in practice it works > > (and it seemed like a clever idea at the time) it doesn't mean it *is* a > > good idea... > > Well, but that poses the question how you can implement any coroutine > that can be reentered from more than one place. Not being able to do > that would be a severe restriction. > > For example, take quorum, which handles requests by spawning a coroutine > for every child and then yielding until acb->count == s->num_children. > This means that the main request coroutine can be reentered from the > child request coroutines in any order and timing. > > What saves us currently is that they are all in the same AioContext, so > we won't actually end up in aio_co_schedule(), but I'm sure that sooner > or later we'll find cases where we're waiting for several workers that > are spread across different I/O threads. > > I don't think "don't do that" is a good answer to this. > > And yes, reentering co_aio_sleep_ns() early is a real requirement, too. > The test case that used speed=1 and would have waited a few hours before > actually cancelling the job is an extreme example, but even just > delaying cancellation for a second is bad if this is a second of > migration downtime. > At least this last part should be doable; reentering co_aio_sleep_ns() really should mean that we want to short-circuit the QEMU timer for the sleep cb. Any reason we can't allow a reenter by removing the timer for that sleep, and then doing an aio_co_wake?
On 28/11/2017 18:19, Kevin Wolf wrote: > Am 28.11.2017 um 18:01 hat Paolo Bonzini geschrieben: >> Basically, once you do aio_co_schedule or aio_co_wake the coroutine is >> not any more yours. It's owned by the context that will run it and you >> should not do anything with it. > > Well, but that poses the question how you can implement any coroutine > that can be reentered from more than one place. Not being able to do > that would be a severe restriction. > > For example, take quorum, which handles requests by spawning a coroutine > for every child and then yielding until acb->count == s->num_children. > This means that the main request coroutine can be reentered from the > child request coroutines in any order and timing. > > What saves us currently is that they are all in the same AioContext, so > we won't actually end up in aio_co_schedule(), but I'm sure that sooner > or later we'll find cases where we're waiting for several workers that > are spread across different I/O threads. That can work as long as you add synchronization among those "more than one place" (quorum doesn't need it because as you say there's only one AioContext involved). Take the previous example of f(); g(); qemu_coroutine_yield(); If f() and g() do lock a mutex x--; wake = (x == 0) unlock a mutex if (wake) aio_co_wake(co) then that's fine, because aio_co_wake is only entered in one place. > I don't think "don't do that" is a good answer to this. > > And yes, reentering co_aio_sleep_ns() early is a real requirement, too. > The test case that used speed=1 and would have waited a few hours before > actually cancelling the job is an extreme example, but even just > delaying cancellation for a second is bad if this is a second of > migration downtime. Ok, that can be fixed along the lines of what Jeff has just written. I'll take a look. Thanks, Paolo
diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h index 59e8406398..cb98892bba 100644 --- a/include/qemu/coroutine_int.h +++ b/include/qemu/coroutine_int.h @@ -46,21 +46,14 @@ struct Coroutine { size_t locks_held; - /* Only used when the coroutine has yielded. */ - AioContext *ctx; - - /* Used to catch and abort on illegal co-routine entry. - * Will contain the name of the function that had first - * scheduled the coroutine. */ - const char *scheduled; - - QSIMPLEQ_ENTRY(Coroutine) co_queue_next; - /* Coroutines that should be woken up when we yield or terminate. * Only used when the coroutine is running. */ QSIMPLEQ_HEAD(, Coroutine) co_queue_wakeup; + /* Only used when the coroutine has yielded. */ + AioContext *ctx; + QSIMPLEQ_ENTRY(Coroutine) co_queue_next; QSLIST_ENTRY(Coroutine) co_scheduled_next; }; diff --git a/util/async.c b/util/async.c index 4dd9d95a9e..0e1bd8780a 100644 --- a/util/async.c +++ b/util/async.c @@ -388,9 +388,6 @@ static void co_schedule_bh_cb(void *opaque) QSLIST_REMOVE_HEAD(&straight, co_scheduled_next); trace_aio_co_schedule_bh_cb(ctx, co); aio_context_acquire(ctx); - - /* Protected by write barrier in qemu_aio_coroutine_enter */ - atomic_set(&co->scheduled, NULL); qemu_coroutine_enter(co); aio_context_release(ctx); } @@ -441,16 +438,6 @@ fail: void aio_co_schedule(AioContext *ctx, Coroutine *co) { trace_aio_co_schedule(ctx, co); - const char *scheduled = atomic_cmpxchg(&co->scheduled, NULL, - __func__); - - if (scheduled) { - fprintf(stderr, - "%s: Co-routine was already scheduled in '%s'\n", - __func__, scheduled); - abort(); - } - QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines, co, co_scheduled_next); qemu_bh_schedule(ctx->co_schedule_bh); diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c index 254349cdbb..9c5655041b 100644 --- a/util/qemu-coroutine-sleep.c +++ b/util/qemu-coroutine-sleep.c @@ -13,7 +13,6 @@ #include "qemu/osdep.h" #include "qemu/coroutine.h" -#include "qemu/coroutine_int.h" #include "qemu/timer.h" #include "block/aio.h" @@ -26,8 +25,6 @@ static void co_sleep_cb(void *opaque) { CoSleepCB *sleep_cb = opaque; - /* Write of schedule protected by barrier write in aio_co_schedule */ - atomic_set(&sleep_cb->co->scheduled, NULL); aio_co_wake(sleep_cb->co); } @@ -37,15 +34,6 @@ void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type, CoSleepCB sleep_cb = { .co = qemu_coroutine_self(), }; - - const char *scheduled = atomic_cmpxchg(&sleep_cb.co->scheduled, NULL, - __func__); - if (scheduled) { - fprintf(stderr, - "%s: Co-routine was already scheduled in '%s'\n", - __func__, scheduled); - abort(); - } sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &sleep_cb); timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns); qemu_coroutine_yield(); diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c index 9eff7fd450..d6095c1d5a 100644 --- a/util/qemu-coroutine.c +++ b/util/qemu-coroutine.c @@ -107,22 +107,8 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co) Coroutine *self = qemu_coroutine_self(); CoroutineAction ret; - /* Cannot rely on the read barrier for co in aio_co_wake(), as there are - * callers outside of aio_co_wake() */ - const char *scheduled = atomic_mb_read(&co->scheduled); - trace_qemu_aio_coroutine_enter(ctx, self, co, co->entry_arg); - /* if the Coroutine has already been scheduled, entering it again will - * cause us to enter it twice, potentially even after the coroutine has - * been deleted */ - if (scheduled) { - fprintf(stderr, - "%s: Co-routine was already scheduled in '%s'\n", - __func__, scheduled); - abort(); - } - if (co->caller) { fprintf(stderr, "Co-routine re-entered recursively\n"); abort();
This reverts commit 6133b39f3c36623425a6ede9e89d93175fde15cd. The commit checked conditions that would expose a bug, but there is no real reason to forbid them apart from the bug, which we'll fix in a minute. In particular, reentering a coroutine during co_aio_sleep_ns() is fine; the function is explicitly written to allow this. aio_co_schedule() can indeed conflict with direct coroutine invocations, but this is exactky what we want to fix, so remove that check again, too. Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- include/qemu/coroutine_int.h | 13 +++---------- util/async.c | 13 ------------- util/qemu-coroutine-sleep.c | 12 ------------ util/qemu-coroutine.c | 14 -------------- 4 files changed, 3 insertions(+), 49 deletions(-)