Message ID | 1375067768-11342-5-git-send-email-pingfank@linux.vnet.ibm.com |
---|---|
State | New |
Headers | show |
Il 29/07/2013 05:16, Liu Ping Fan ha scritto: > Currently, timers run on iothread inside QBL, this limits the usage > of timers in some case, e.g. virtio-blk-dataplane. In order to run > timers on private thread based on different clocksource, we arm each > AioContext with three timer lists in according to three clocksource > (QemuClock). > > A little later, we will run timers in aio_poll. > > Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com> > > ------ issue to fix --- > Note: before this patch, there should be another one to fix the race > issue by qemu_mod_timer() and _run_timers(). Another issue is that deadline computation is not using the AioContext's timer lists. Paolo > --- > async.c | 9 ++++++++ > include/block/aio.h | 13 +++++++++++ > include/qemu/timer.h | 20 ++++++++++++++++ > qemu-timer.c | 65 +++++++++++++++++++++++++++++++--------------------- > 4 files changed, 81 insertions(+), 26 deletions(-) > > diff --git a/async.c b/async.c > index ba4072c..7e2340e 100644 > --- a/async.c > +++ b/async.c > @@ -201,11 +201,15 @@ static void > aio_ctx_finalize(GSource *source) > { > AioContext *ctx = (AioContext *) source; > + int i; > > thread_pool_free(ctx->thread_pool); > aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL); > event_notifier_cleanup(&ctx->notifier); > g_array_free(ctx->pollfds, TRUE); > + for (i = 0; i < QEMU_CLOCK_MAXCNT; i++) { > + timer_list_finalize(&ctx->timer_list[i]); > + } > } > > static GSourceFuncs aio_source_funcs = { > @@ -237,6 +241,8 @@ void aio_notify(AioContext *ctx) > AioContext *aio_context_new(void) > { > AioContext *ctx; > + int i; > + > ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext)); > ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD)); > ctx->thread_pool = NULL; > @@ -245,6 +251,9 @@ AioContext *aio_context_new(void) > aio_set_event_notifier(ctx, &ctx->notifier, > (EventNotifierHandler *) > event_notifier_test_and_clear, NULL); > + for (i = 0; i < QEMU_CLOCK_MAXCNT; i++) { > + timer_list_init(&ctx->timer_list[i]); > + } > > return ctx; > } > diff --git a/include/block/aio.h b/include/block/aio.h > index 04598b2..cf41b42 100644 > --- a/include/block/aio.h > +++ b/include/block/aio.h > @@ -43,6 +43,18 @@ typedef struct AioHandler AioHandler; > typedef void QEMUBHFunc(void *opaque); > typedef void IOHandler(void *opaque); > > +/* Related timer with AioContext */ > +typedef struct QEMUTimer QEMUTimer; > +#define QEMU_CLOCK_MAXCNT 3 > + > +typedef struct TimerList { > + QEMUTimer *active_timers; > + QemuMutex active_timers_lock; > +} TimerList; > + > +void timer_list_init(TimerList *tlist); > +void timer_list_finalize(TimerList *tlist); > + > typedef struct AioContext { > GSource source; > > @@ -73,6 +85,7 @@ typedef struct AioContext { > > /* Thread pool for performing work and receiving completion callbacks */ > struct ThreadPool *thread_pool; > + TimerList timer_list[QEMU_CLOCK_MAXCNT]; > } AioContext; > > /* Returns 1 if there are still outstanding AIO requests; 0 otherwise */ > diff --git a/include/qemu/timer.h b/include/qemu/timer.h > index 9dd206c..3e5016b 100644 > --- a/include/qemu/timer.h > +++ b/include/qemu/timer.h > @@ -33,9 +33,14 @@ extern QEMUClock *vm_clock; > extern QEMUClock *host_clock; > > int64_t qemu_get_clock_ns(QEMUClock *clock); > +/* qemu_clock_has_timers, qemu_clock_expired, qemu_clock_deadline > + * run In tcg icount mode. There is only one AioContext i.e. qemu_aio_context. > + * So we only count the timers on qemu_aio_context. > + */ > int64_t qemu_clock_has_timers(QEMUClock *clock); > int64_t qemu_clock_expired(QEMUClock *clock); > int64_t qemu_clock_deadline(QEMUClock *clock); > + > void qemu_clock_enable(QEMUClock *clock, bool enabled); > void qemu_clock_warp(QEMUClock *clock); > > @@ -45,6 +50,9 @@ void qemu_unregister_clock_reset_notifier(QEMUClock *clock, > > QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale, > QEMUTimerCB *cb, void *opaque); > +QEMUTimer *aioctx_new_timer(QEMUClock *clock, int scale, > + QEMUTimerCB *cb, void *opaque, AioContext *ctx); > + > void qemu_free_timer(QEMUTimer *ts); > void qemu_del_timer(QEMUTimer *ts); > void qemu_mod_timer_ns(QEMUTimer *ts, int64_t expire_time); > @@ -75,6 +83,18 @@ static inline QEMUTimer *qemu_new_timer_ms(QEMUClock *clock, QEMUTimerCB *cb, > return qemu_new_timer(clock, SCALE_MS, cb, opaque); > } > > +static inline QEMUTimer *aioctx_new_timer_ns(QEMUClock *clock, QEMUTimerCB *cb, > + void *opaque, AioContext *ctx) > +{ > + return aioctx_new_timer(clock, SCALE_NS, cb, opaque, ctx); > +} > + > +static inline QEMUTimer *aioctx_new_timer_ms(QEMUClock *clock, QEMUTimerCB *cb, > + void *opaque, AioContext *ctx) > +{ > + return aioctx_new_timer(clock, SCALE_MS, cb, opaque, ctx); > +} > + > static inline int64_t qemu_get_clock_ms(QEMUClock *clock) > { > return qemu_get_clock_ns(clock) / SCALE_MS; > diff --git a/qemu-timer.c b/qemu-timer.c > index d941a83..f15c3e6 100644 > --- a/qemu-timer.c > +++ b/qemu-timer.c > @@ -45,14 +45,6 @@ > #define QEMU_CLOCK_REALTIME 0 > #define QEMU_CLOCK_VIRTUAL 1 > #define QEMU_CLOCK_HOST 2 > -#define QEMU_CLOCK_MAXCNT 3 > - > -typedef struct TimerList { > - QEMUTimer *active_timers; > - QemuMutex active_timers_lock; > -} TimerList; > - > -static TimerList timer_list[QEMU_CLOCK_MAXCNT]; > > struct QEMUClock { > NotifierList reset_notifiers; > @@ -72,7 +64,9 @@ struct QEMUClock { > > struct QEMUTimer { > int64_t expire_time; /* in nanoseconds */ > + /* quick link to AioContext timer list */ > TimerList *list; > + AioContext *ctx; > QEMUTimerCB *cb; > void *opaque; > QEMUTimer *next; > @@ -100,11 +94,12 @@ struct qemu_alarm_timer { > > static struct qemu_alarm_timer *alarm_timer; > > -static TimerList *clock_to_timerlist(QEMUClock *clock) > +static TimerList *clock_to_timerlist(QEMUClock *clock, AioContext *ctx) > { > int type = clock->type; > > - return &timer_list[type]; > + assert(ctx); > + return &ctx->timer_list[type]; > } > > static bool qemu_timer_expired_ns(QEMUTimer *timer_head, int64_t current_time) > @@ -112,7 +107,8 @@ static bool qemu_timer_expired_ns(QEMUTimer *timer_head, int64_t current_time) > return timer_head && (timer_head->expire_time <= current_time); > } > > -static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta) > +static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta, > + AioContext *ctx) > { > int64_t expire_time, next; > bool has_timer = false; > @@ -122,7 +118,7 @@ static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta) > return delta; > } > > - tlist = clock_to_timerlist(clock); > + tlist = clock_to_timerlist(clock, ctx); > qemu_mutex_lock(&tlist->active_timers_lock); > if (tlist->active_timers) { > has_timer = true; > @@ -140,12 +136,13 @@ static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta) > static int64_t qemu_next_alarm_deadline(void) > { > int64_t delta = INT64_MAX; > + AioContext *ctx = *tls_get_thread_aio_context(); > > if (!use_icount) { > - delta = qemu_next_clock_deadline(vm_clock, delta); > + delta = qemu_next_clock_deadline(vm_clock, delta, ctx); > } > - delta = qemu_next_clock_deadline(host_clock, delta); > - return qemu_next_clock_deadline(rt_clock, delta); > + delta = qemu_next_clock_deadline(host_clock, delta, ctx); > + return qemu_next_clock_deadline(rt_clock, delta, ctx); > } > > static void qemu_rearm_alarm_timer(struct qemu_alarm_timer *t) > @@ -267,16 +264,21 @@ QEMUClock *rt_clock; > QEMUClock *vm_clock; > QEMUClock *host_clock; > > -static void timer_list_init(TimerList *tlist) > +void timer_list_init(TimerList *tlist) > { > qemu_mutex_init(&tlist->active_timers_lock); > tlist->active_timers = NULL; > } > > +void timer_list_finalize(TimerList *tlist) > +{ > + qemu_mutex_destroy(&tlist->active_timers_lock); > + assert(!tlist->active_timers); > +} > + > static QEMUClock *qemu_new_clock(int type) > { > QEMUClock *clock; > - TimerList *tlist; > > clock = g_malloc0(sizeof(QEMUClock)); > clock->type = type; > @@ -286,8 +288,6 @@ static QEMUClock *qemu_new_clock(int type) > qemu_cond_init(&clock->wait_using); > qemu_mutex_init(&clock->lock); > notifier_list_init(&clock->reset_notifiers); > - tlist = clock_to_timerlist(clock); > - timer_list_init(tlist); > > return clock; > } > @@ -308,10 +308,14 @@ void qemu_clock_enable(QEMUClock *clock, bool enabled) > } > } > > +/* qemu_clock_has_timers, qemu_clock_expired, qemu_clock_deadline > + * run In tcg icount mode. There is only one AioContext i.e. qemu_aio_context. > + * So we only count the timers on qemu_aio_context. > +*/ > int64_t qemu_clock_has_timers(QEMUClock *clock) > { > bool has_timers; > - TimerList *tlist = clock_to_timerlist(clock); > + TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context()); > > qemu_mutex_lock(&tlist->active_timers_lock); > has_timers = !!tlist->active_timers; > @@ -323,7 +327,7 @@ int64_t qemu_clock_expired(QEMUClock *clock) > { > bool has_timers; > int64_t expire_time; > - TimerList *tlist = clock_to_timerlist(clock); > + TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context()); > > qemu_mutex_lock(&tlist->active_timers_lock); > has_timers = tlist->active_timers; > @@ -339,7 +343,7 @@ int64_t qemu_clock_deadline(QEMUClock *clock) > int64_t delta = INT32_MAX; > bool has_timers; > int64_t expire_time; > - TimerList *tlist = clock_to_timerlist(clock); > + TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context()); > > qemu_mutex_lock(&tlist->active_timers_lock); > has_timers = tlist->active_timers; > @@ -355,19 +359,26 @@ int64_t qemu_clock_deadline(QEMUClock *clock) > return delta; > } > > -QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale, > - QEMUTimerCB *cb, void *opaque) > +QEMUTimer *aioctx_new_timer(QEMUClock *clock, int scale, > + QEMUTimerCB *cb, void *opaque, AioContext *ctx) > { > QEMUTimer *ts; > > ts = g_malloc0(sizeof(QEMUTimer)); > - ts->list = clock_to_timerlist(clock); > + ts->list = clock_to_timerlist(clock, ctx); > ts->cb = cb; > ts->opaque = opaque; > ts->scale = scale; > + ts->ctx = ctx; > return ts; > } > > +QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale, > + QEMUTimerCB *cb, void *opaque) > +{ > + return aioctx_new_timer(clock, scale, cb, opaque, qemu_get_aio_context()); > +} > + > void qemu_free_timer(QEMUTimer *ts) > { > g_free(ts); > @@ -457,6 +468,7 @@ void qemu_run_timers(QEMUClock *clock) > QEMUTimer *ts; > int64_t current_time; > TimerList *tlist; > + AioContext *ctx; > > atomic_inc(&clock->using); > if (unlikely(!clock->enabled)) { > @@ -465,7 +477,8 @@ void qemu_run_timers(QEMUClock *clock) > > > current_time = qemu_get_clock_ns(clock); > - tlist = clock_to_timerlist(clock); > + ctx = *tls_get_thread_aio_context(); > + tlist = clock_to_timerlist(clock, ctx); > > for(;;) { > qemu_mutex_lock(&tlist->active_timers_lock); >
On Mon, Jul 29, 2013 at 2:32 PM, Paolo Bonzini <pbonzini@redhat.com> wrote: > Il 29/07/2013 05:16, Liu Ping Fan ha scritto: >> Currently, timers run on iothread inside QBL, this limits the usage >> of timers in some case, e.g. virtio-blk-dataplane. In order to run >> timers on private thread based on different clocksource, we arm each >> AioContext with three timer lists in according to three clocksource >> (QemuClock). >> >> A little later, we will run timers in aio_poll. >> >> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com> >> >> ------ issue to fix --- >> Note: before this patch, there should be another one to fix the race >> issue by qemu_mod_timer() and _run_timers(). > > Another issue is that deadline computation is not using the AioContext's > timer lists. > Sorry, can not catch the meaning. When AioContext has its own timer lists, it will have its own deadline(for ppoll timeout). So the computation should be based on AioContext's timer lists, right? Thanks and regards, Pingfan > Paolo > >> --- >> async.c | 9 ++++++++ >> include/block/aio.h | 13 +++++++++++ >> include/qemu/timer.h | 20 ++++++++++++++++ >> qemu-timer.c | 65 +++++++++++++++++++++++++++++++--------------------- >> 4 files changed, 81 insertions(+), 26 deletions(-) >> >> diff --git a/async.c b/async.c >> index ba4072c..7e2340e 100644 >> --- a/async.c >> +++ b/async.c >> @@ -201,11 +201,15 @@ static void >> aio_ctx_finalize(GSource *source) >> { >> AioContext *ctx = (AioContext *) source; >> + int i; >> >> thread_pool_free(ctx->thread_pool); >> aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL); >> event_notifier_cleanup(&ctx->notifier); >> g_array_free(ctx->pollfds, TRUE); >> + for (i = 0; i < QEMU_CLOCK_MAXCNT; i++) { >> + timer_list_finalize(&ctx->timer_list[i]); >> + } >> } >> >> static GSourceFuncs aio_source_funcs = { >> @@ -237,6 +241,8 @@ void aio_notify(AioContext *ctx) >> AioContext *aio_context_new(void) >> { >> AioContext *ctx; >> + int i; >> + >> ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext)); >> ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD)); >> ctx->thread_pool = NULL; >> @@ -245,6 +251,9 @@ AioContext *aio_context_new(void) >> aio_set_event_notifier(ctx, &ctx->notifier, >> (EventNotifierHandler *) >> event_notifier_test_and_clear, NULL); >> + for (i = 0; i < QEMU_CLOCK_MAXCNT; i++) { >> + timer_list_init(&ctx->timer_list[i]); >> + } >> >> return ctx; >> } >> diff --git a/include/block/aio.h b/include/block/aio.h >> index 04598b2..cf41b42 100644 >> --- a/include/block/aio.h >> +++ b/include/block/aio.h >> @@ -43,6 +43,18 @@ typedef struct AioHandler AioHandler; >> typedef void QEMUBHFunc(void *opaque); >> typedef void IOHandler(void *opaque); >> >> +/* Related timer with AioContext */ >> +typedef struct QEMUTimer QEMUTimer; >> +#define QEMU_CLOCK_MAXCNT 3 >> + >> +typedef struct TimerList { >> + QEMUTimer *active_timers; >> + QemuMutex active_timers_lock; >> +} TimerList; >> + >> +void timer_list_init(TimerList *tlist); >> +void timer_list_finalize(TimerList *tlist); >> + >> typedef struct AioContext { >> GSource source; >> >> @@ -73,6 +85,7 @@ typedef struct AioContext { >> >> /* Thread pool for performing work and receiving completion callbacks */ >> struct ThreadPool *thread_pool; >> + TimerList timer_list[QEMU_CLOCK_MAXCNT]; >> } AioContext; >> >> /* Returns 1 if there are still outstanding AIO requests; 0 otherwise */ >> diff --git a/include/qemu/timer.h b/include/qemu/timer.h >> index 9dd206c..3e5016b 100644 >> --- a/include/qemu/timer.h >> +++ b/include/qemu/timer.h >> @@ -33,9 +33,14 @@ extern QEMUClock *vm_clock; >> extern QEMUClock *host_clock; >> >> int64_t qemu_get_clock_ns(QEMUClock *clock); >> +/* qemu_clock_has_timers, qemu_clock_expired, qemu_clock_deadline >> + * run In tcg icount mode. There is only one AioContext i.e. qemu_aio_context. >> + * So we only count the timers on qemu_aio_context. >> + */ >> int64_t qemu_clock_has_timers(QEMUClock *clock); >> int64_t qemu_clock_expired(QEMUClock *clock); >> int64_t qemu_clock_deadline(QEMUClock *clock); >> + >> void qemu_clock_enable(QEMUClock *clock, bool enabled); >> void qemu_clock_warp(QEMUClock *clock); >> >> @@ -45,6 +50,9 @@ void qemu_unregister_clock_reset_notifier(QEMUClock *clock, >> >> QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale, >> QEMUTimerCB *cb, void *opaque); >> +QEMUTimer *aioctx_new_timer(QEMUClock *clock, int scale, >> + QEMUTimerCB *cb, void *opaque, AioContext *ctx); >> + >> void qemu_free_timer(QEMUTimer *ts); >> void qemu_del_timer(QEMUTimer *ts); >> void qemu_mod_timer_ns(QEMUTimer *ts, int64_t expire_time); >> @@ -75,6 +83,18 @@ static inline QEMUTimer *qemu_new_timer_ms(QEMUClock *clock, QEMUTimerCB *cb, >> return qemu_new_timer(clock, SCALE_MS, cb, opaque); >> } >> >> +static inline QEMUTimer *aioctx_new_timer_ns(QEMUClock *clock, QEMUTimerCB *cb, >> + void *opaque, AioContext *ctx) >> +{ >> + return aioctx_new_timer(clock, SCALE_NS, cb, opaque, ctx); >> +} >> + >> +static inline QEMUTimer *aioctx_new_timer_ms(QEMUClock *clock, QEMUTimerCB *cb, >> + void *opaque, AioContext *ctx) >> +{ >> + return aioctx_new_timer(clock, SCALE_MS, cb, opaque, ctx); >> +} >> + >> static inline int64_t qemu_get_clock_ms(QEMUClock *clock) >> { >> return qemu_get_clock_ns(clock) / SCALE_MS; >> diff --git a/qemu-timer.c b/qemu-timer.c >> index d941a83..f15c3e6 100644 >> --- a/qemu-timer.c >> +++ b/qemu-timer.c >> @@ -45,14 +45,6 @@ >> #define QEMU_CLOCK_REALTIME 0 >> #define QEMU_CLOCK_VIRTUAL 1 >> #define QEMU_CLOCK_HOST 2 >> -#define QEMU_CLOCK_MAXCNT 3 >> - >> -typedef struct TimerList { >> - QEMUTimer *active_timers; >> - QemuMutex active_timers_lock; >> -} TimerList; >> - >> -static TimerList timer_list[QEMU_CLOCK_MAXCNT]; >> >> struct QEMUClock { >> NotifierList reset_notifiers; >> @@ -72,7 +64,9 @@ struct QEMUClock { >> >> struct QEMUTimer { >> int64_t expire_time; /* in nanoseconds */ >> + /* quick link to AioContext timer list */ >> TimerList *list; >> + AioContext *ctx; >> QEMUTimerCB *cb; >> void *opaque; >> QEMUTimer *next; >> @@ -100,11 +94,12 @@ struct qemu_alarm_timer { >> >> static struct qemu_alarm_timer *alarm_timer; >> >> -static TimerList *clock_to_timerlist(QEMUClock *clock) >> +static TimerList *clock_to_timerlist(QEMUClock *clock, AioContext *ctx) >> { >> int type = clock->type; >> >> - return &timer_list[type]; >> + assert(ctx); >> + return &ctx->timer_list[type]; >> } >> >> static bool qemu_timer_expired_ns(QEMUTimer *timer_head, int64_t current_time) >> @@ -112,7 +107,8 @@ static bool qemu_timer_expired_ns(QEMUTimer *timer_head, int64_t current_time) >> return timer_head && (timer_head->expire_time <= current_time); >> } >> >> -static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta) >> +static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta, >> + AioContext *ctx) >> { >> int64_t expire_time, next; >> bool has_timer = false; >> @@ -122,7 +118,7 @@ static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta) >> return delta; >> } >> >> - tlist = clock_to_timerlist(clock); >> + tlist = clock_to_timerlist(clock, ctx); >> qemu_mutex_lock(&tlist->active_timers_lock); >> if (tlist->active_timers) { >> has_timer = true; >> @@ -140,12 +136,13 @@ static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta) >> static int64_t qemu_next_alarm_deadline(void) >> { >> int64_t delta = INT64_MAX; >> + AioContext *ctx = *tls_get_thread_aio_context(); >> >> if (!use_icount) { >> - delta = qemu_next_clock_deadline(vm_clock, delta); >> + delta = qemu_next_clock_deadline(vm_clock, delta, ctx); >> } >> - delta = qemu_next_clock_deadline(host_clock, delta); >> - return qemu_next_clock_deadline(rt_clock, delta); >> + delta = qemu_next_clock_deadline(host_clock, delta, ctx); >> + return qemu_next_clock_deadline(rt_clock, delta, ctx); >> } >> >> static void qemu_rearm_alarm_timer(struct qemu_alarm_timer *t) >> @@ -267,16 +264,21 @@ QEMUClock *rt_clock; >> QEMUClock *vm_clock; >> QEMUClock *host_clock; >> >> -static void timer_list_init(TimerList *tlist) >> +void timer_list_init(TimerList *tlist) >> { >> qemu_mutex_init(&tlist->active_timers_lock); >> tlist->active_timers = NULL; >> } >> >> +void timer_list_finalize(TimerList *tlist) >> +{ >> + qemu_mutex_destroy(&tlist->active_timers_lock); >> + assert(!tlist->active_timers); >> +} >> + >> static QEMUClock *qemu_new_clock(int type) >> { >> QEMUClock *clock; >> - TimerList *tlist; >> >> clock = g_malloc0(sizeof(QEMUClock)); >> clock->type = type; >> @@ -286,8 +288,6 @@ static QEMUClock *qemu_new_clock(int type) >> qemu_cond_init(&clock->wait_using); >> qemu_mutex_init(&clock->lock); >> notifier_list_init(&clock->reset_notifiers); >> - tlist = clock_to_timerlist(clock); >> - timer_list_init(tlist); >> >> return clock; >> } >> @@ -308,10 +308,14 @@ void qemu_clock_enable(QEMUClock *clock, bool enabled) >> } >> } >> >> +/* qemu_clock_has_timers, qemu_clock_expired, qemu_clock_deadline >> + * run In tcg icount mode. There is only one AioContext i.e. qemu_aio_context. >> + * So we only count the timers on qemu_aio_context. >> +*/ >> int64_t qemu_clock_has_timers(QEMUClock *clock) >> { >> bool has_timers; >> - TimerList *tlist = clock_to_timerlist(clock); >> + TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context()); >> >> qemu_mutex_lock(&tlist->active_timers_lock); >> has_timers = !!tlist->active_timers; >> @@ -323,7 +327,7 @@ int64_t qemu_clock_expired(QEMUClock *clock) >> { >> bool has_timers; >> int64_t expire_time; >> - TimerList *tlist = clock_to_timerlist(clock); >> + TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context()); >> >> qemu_mutex_lock(&tlist->active_timers_lock); >> has_timers = tlist->active_timers; >> @@ -339,7 +343,7 @@ int64_t qemu_clock_deadline(QEMUClock *clock) >> int64_t delta = INT32_MAX; >> bool has_timers; >> int64_t expire_time; >> - TimerList *tlist = clock_to_timerlist(clock); >> + TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context()); >> >> qemu_mutex_lock(&tlist->active_timers_lock); >> has_timers = tlist->active_timers; >> @@ -355,19 +359,26 @@ int64_t qemu_clock_deadline(QEMUClock *clock) >> return delta; >> } >> >> -QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale, >> - QEMUTimerCB *cb, void *opaque) >> +QEMUTimer *aioctx_new_timer(QEMUClock *clock, int scale, >> + QEMUTimerCB *cb, void *opaque, AioContext *ctx) >> { >> QEMUTimer *ts; >> >> ts = g_malloc0(sizeof(QEMUTimer)); >> - ts->list = clock_to_timerlist(clock); >> + ts->list = clock_to_timerlist(clock, ctx); >> ts->cb = cb; >> ts->opaque = opaque; >> ts->scale = scale; >> + ts->ctx = ctx; >> return ts; >> } >> >> +QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale, >> + QEMUTimerCB *cb, void *opaque) >> +{ >> + return aioctx_new_timer(clock, scale, cb, opaque, qemu_get_aio_context()); >> +} >> + >> void qemu_free_timer(QEMUTimer *ts) >> { >> g_free(ts); >> @@ -457,6 +468,7 @@ void qemu_run_timers(QEMUClock *clock) >> QEMUTimer *ts; >> int64_t current_time; >> TimerList *tlist; >> + AioContext *ctx; >> >> atomic_inc(&clock->using); >> if (unlikely(!clock->enabled)) { >> @@ -465,7 +477,8 @@ void qemu_run_timers(QEMUClock *clock) >> >> >> current_time = qemu_get_clock_ns(clock); >> - tlist = clock_to_timerlist(clock); >> + ctx = *tls_get_thread_aio_context(); >> + tlist = clock_to_timerlist(clock, ctx); >> >> for(;;) { >> qemu_mutex_lock(&tlist->active_timers_lock); >> >
Il 29/07/2013 10:20, liu ping fan ha scritto: >> > Another issue is that deadline computation is not using the AioContext's >> > timer lists. >> > > Sorry, can not catch the meaning. When AioContext has its own timer > lists, it will have its own deadline(for ppoll timeout). So the > computation should be based on AioContext's timer lists, right? Associating the main loop's timer lists to other AioContexts is really really wrong... It is _already_ wrong to run timers in the main AioContext's aio_poll (because timers may not be ready to run from an arbitrary aio_poll), it is even worse to run them in other threads without taking the BQL. What exactly is the purpose of this series? Paolo
On Mon, Jul 29, 2013 at 9:11 PM, Paolo Bonzini <pbonzini@redhat.com> wrote: > Il 29/07/2013 10:20, liu ping fan ha scritto: >>> > Another issue is that deadline computation is not using the AioContext's >>> > timer lists. >>> > >> Sorry, can not catch the meaning. When AioContext has its own timer >> lists, it will have its own deadline(for ppoll timeout). So the >> computation should be based on AioContext's timer lists, right? > > Associating the main loop's timer lists to other AioContexts is really Main loop's timers will only be associated with the qemu_aio_context, not with other AioContext. (When creating timer, we have bound it with a specified AioContext, so it will only run on that AioContext) > really wrong... It is _already_ wrong to run timers in the main > AioContext's aio_poll (because timers may not be ready to run from an Sorry, but except that timer cb will call aio_poll, any other reason ? > arbitrary aio_poll), it is even worse to run them in other threads > without taking the BQL. > Can be fixed by some method to sync the dedicated thread with vcpus? > What exactly is the purpose of this series? > Enable timers to run on the dedicated threads(using aio as the mini-loop), currently for the following devices, virtio-blk-dataplane for throttling hpet for the best-effort resolution Thanks and regards, Pingfan > Paolo
diff --git a/async.c b/async.c index ba4072c..7e2340e 100644 --- a/async.c +++ b/async.c @@ -201,11 +201,15 @@ static void aio_ctx_finalize(GSource *source) { AioContext *ctx = (AioContext *) source; + int i; thread_pool_free(ctx->thread_pool); aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL); event_notifier_cleanup(&ctx->notifier); g_array_free(ctx->pollfds, TRUE); + for (i = 0; i < QEMU_CLOCK_MAXCNT; i++) { + timer_list_finalize(&ctx->timer_list[i]); + } } static GSourceFuncs aio_source_funcs = { @@ -237,6 +241,8 @@ void aio_notify(AioContext *ctx) AioContext *aio_context_new(void) { AioContext *ctx; + int i; + ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext)); ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD)); ctx->thread_pool = NULL; @@ -245,6 +251,9 @@ AioContext *aio_context_new(void) aio_set_event_notifier(ctx, &ctx->notifier, (EventNotifierHandler *) event_notifier_test_and_clear, NULL); + for (i = 0; i < QEMU_CLOCK_MAXCNT; i++) { + timer_list_init(&ctx->timer_list[i]); + } return ctx; } diff --git a/include/block/aio.h b/include/block/aio.h index 04598b2..cf41b42 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -43,6 +43,18 @@ typedef struct AioHandler AioHandler; typedef void QEMUBHFunc(void *opaque); typedef void IOHandler(void *opaque); +/* Related timer with AioContext */ +typedef struct QEMUTimer QEMUTimer; +#define QEMU_CLOCK_MAXCNT 3 + +typedef struct TimerList { + QEMUTimer *active_timers; + QemuMutex active_timers_lock; +} TimerList; + +void timer_list_init(TimerList *tlist); +void timer_list_finalize(TimerList *tlist); + typedef struct AioContext { GSource source; @@ -73,6 +85,7 @@ typedef struct AioContext { /* Thread pool for performing work and receiving completion callbacks */ struct ThreadPool *thread_pool; + TimerList timer_list[QEMU_CLOCK_MAXCNT]; } AioContext; /* Returns 1 if there are still outstanding AIO requests; 0 otherwise */ diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 9dd206c..3e5016b 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -33,9 +33,14 @@ extern QEMUClock *vm_clock; extern QEMUClock *host_clock; int64_t qemu_get_clock_ns(QEMUClock *clock); +/* qemu_clock_has_timers, qemu_clock_expired, qemu_clock_deadline + * run In tcg icount mode. There is only one AioContext i.e. qemu_aio_context. + * So we only count the timers on qemu_aio_context. + */ int64_t qemu_clock_has_timers(QEMUClock *clock); int64_t qemu_clock_expired(QEMUClock *clock); int64_t qemu_clock_deadline(QEMUClock *clock); + void qemu_clock_enable(QEMUClock *clock, bool enabled); void qemu_clock_warp(QEMUClock *clock); @@ -45,6 +50,9 @@ void qemu_unregister_clock_reset_notifier(QEMUClock *clock, QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale, QEMUTimerCB *cb, void *opaque); +QEMUTimer *aioctx_new_timer(QEMUClock *clock, int scale, + QEMUTimerCB *cb, void *opaque, AioContext *ctx); + void qemu_free_timer(QEMUTimer *ts); void qemu_del_timer(QEMUTimer *ts); void qemu_mod_timer_ns(QEMUTimer *ts, int64_t expire_time); @@ -75,6 +83,18 @@ static inline QEMUTimer *qemu_new_timer_ms(QEMUClock *clock, QEMUTimerCB *cb, return qemu_new_timer(clock, SCALE_MS, cb, opaque); } +static inline QEMUTimer *aioctx_new_timer_ns(QEMUClock *clock, QEMUTimerCB *cb, + void *opaque, AioContext *ctx) +{ + return aioctx_new_timer(clock, SCALE_NS, cb, opaque, ctx); +} + +static inline QEMUTimer *aioctx_new_timer_ms(QEMUClock *clock, QEMUTimerCB *cb, + void *opaque, AioContext *ctx) +{ + return aioctx_new_timer(clock, SCALE_MS, cb, opaque, ctx); +} + static inline int64_t qemu_get_clock_ms(QEMUClock *clock) { return qemu_get_clock_ns(clock) / SCALE_MS; diff --git a/qemu-timer.c b/qemu-timer.c index d941a83..f15c3e6 100644 --- a/qemu-timer.c +++ b/qemu-timer.c @@ -45,14 +45,6 @@ #define QEMU_CLOCK_REALTIME 0 #define QEMU_CLOCK_VIRTUAL 1 #define QEMU_CLOCK_HOST 2 -#define QEMU_CLOCK_MAXCNT 3 - -typedef struct TimerList { - QEMUTimer *active_timers; - QemuMutex active_timers_lock; -} TimerList; - -static TimerList timer_list[QEMU_CLOCK_MAXCNT]; struct QEMUClock { NotifierList reset_notifiers; @@ -72,7 +64,9 @@ struct QEMUClock { struct QEMUTimer { int64_t expire_time; /* in nanoseconds */ + /* quick link to AioContext timer list */ TimerList *list; + AioContext *ctx; QEMUTimerCB *cb; void *opaque; QEMUTimer *next; @@ -100,11 +94,12 @@ struct qemu_alarm_timer { static struct qemu_alarm_timer *alarm_timer; -static TimerList *clock_to_timerlist(QEMUClock *clock) +static TimerList *clock_to_timerlist(QEMUClock *clock, AioContext *ctx) { int type = clock->type; - return &timer_list[type]; + assert(ctx); + return &ctx->timer_list[type]; } static bool qemu_timer_expired_ns(QEMUTimer *timer_head, int64_t current_time) @@ -112,7 +107,8 @@ static bool qemu_timer_expired_ns(QEMUTimer *timer_head, int64_t current_time) return timer_head && (timer_head->expire_time <= current_time); } -static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta) +static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta, + AioContext *ctx) { int64_t expire_time, next; bool has_timer = false; @@ -122,7 +118,7 @@ static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta) return delta; } - tlist = clock_to_timerlist(clock); + tlist = clock_to_timerlist(clock, ctx); qemu_mutex_lock(&tlist->active_timers_lock); if (tlist->active_timers) { has_timer = true; @@ -140,12 +136,13 @@ static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta) static int64_t qemu_next_alarm_deadline(void) { int64_t delta = INT64_MAX; + AioContext *ctx = *tls_get_thread_aio_context(); if (!use_icount) { - delta = qemu_next_clock_deadline(vm_clock, delta); + delta = qemu_next_clock_deadline(vm_clock, delta, ctx); } - delta = qemu_next_clock_deadline(host_clock, delta); - return qemu_next_clock_deadline(rt_clock, delta); + delta = qemu_next_clock_deadline(host_clock, delta, ctx); + return qemu_next_clock_deadline(rt_clock, delta, ctx); } static void qemu_rearm_alarm_timer(struct qemu_alarm_timer *t) @@ -267,16 +264,21 @@ QEMUClock *rt_clock; QEMUClock *vm_clock; QEMUClock *host_clock; -static void timer_list_init(TimerList *tlist) +void timer_list_init(TimerList *tlist) { qemu_mutex_init(&tlist->active_timers_lock); tlist->active_timers = NULL; } +void timer_list_finalize(TimerList *tlist) +{ + qemu_mutex_destroy(&tlist->active_timers_lock); + assert(!tlist->active_timers); +} + static QEMUClock *qemu_new_clock(int type) { QEMUClock *clock; - TimerList *tlist; clock = g_malloc0(sizeof(QEMUClock)); clock->type = type; @@ -286,8 +288,6 @@ static QEMUClock *qemu_new_clock(int type) qemu_cond_init(&clock->wait_using); qemu_mutex_init(&clock->lock); notifier_list_init(&clock->reset_notifiers); - tlist = clock_to_timerlist(clock); - timer_list_init(tlist); return clock; } @@ -308,10 +308,14 @@ void qemu_clock_enable(QEMUClock *clock, bool enabled) } } +/* qemu_clock_has_timers, qemu_clock_expired, qemu_clock_deadline + * run In tcg icount mode. There is only one AioContext i.e. qemu_aio_context. + * So we only count the timers on qemu_aio_context. +*/ int64_t qemu_clock_has_timers(QEMUClock *clock) { bool has_timers; - TimerList *tlist = clock_to_timerlist(clock); + TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context()); qemu_mutex_lock(&tlist->active_timers_lock); has_timers = !!tlist->active_timers; @@ -323,7 +327,7 @@ int64_t qemu_clock_expired(QEMUClock *clock) { bool has_timers; int64_t expire_time; - TimerList *tlist = clock_to_timerlist(clock); + TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context()); qemu_mutex_lock(&tlist->active_timers_lock); has_timers = tlist->active_timers; @@ -339,7 +343,7 @@ int64_t qemu_clock_deadline(QEMUClock *clock) int64_t delta = INT32_MAX; bool has_timers; int64_t expire_time; - TimerList *tlist = clock_to_timerlist(clock); + TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context()); qemu_mutex_lock(&tlist->active_timers_lock); has_timers = tlist->active_timers; @@ -355,19 +359,26 @@ int64_t qemu_clock_deadline(QEMUClock *clock) return delta; } -QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale, - QEMUTimerCB *cb, void *opaque) +QEMUTimer *aioctx_new_timer(QEMUClock *clock, int scale, + QEMUTimerCB *cb, void *opaque, AioContext *ctx) { QEMUTimer *ts; ts = g_malloc0(sizeof(QEMUTimer)); - ts->list = clock_to_timerlist(clock); + ts->list = clock_to_timerlist(clock, ctx); ts->cb = cb; ts->opaque = opaque; ts->scale = scale; + ts->ctx = ctx; return ts; } +QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale, + QEMUTimerCB *cb, void *opaque) +{ + return aioctx_new_timer(clock, scale, cb, opaque, qemu_get_aio_context()); +} + void qemu_free_timer(QEMUTimer *ts) { g_free(ts); @@ -457,6 +468,7 @@ void qemu_run_timers(QEMUClock *clock) QEMUTimer *ts; int64_t current_time; TimerList *tlist; + AioContext *ctx; atomic_inc(&clock->using); if (unlikely(!clock->enabled)) { @@ -465,7 +477,8 @@ void qemu_run_timers(QEMUClock *clock) current_time = qemu_get_clock_ns(clock); - tlist = clock_to_timerlist(clock); + ctx = *tls_get_thread_aio_context(); + tlist = clock_to_timerlist(clock, ctx); for(;;) { qemu_mutex_lock(&tlist->active_timers_lock);
Currently, timers run on iothread inside QBL, this limits the usage of timers in some case, e.g. virtio-blk-dataplane. In order to run timers on private thread based on different clocksource, we arm each AioContext with three timer lists in according to three clocksource (QemuClock). A little later, we will run timers in aio_poll. Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com> ------ issue to fix --- Note: before this patch, there should be another one to fix the race issue by qemu_mod_timer() and _run_timers(). --- async.c | 9 ++++++++ include/block/aio.h | 13 +++++++++++ include/qemu/timer.h | 20 ++++++++++++++++ qemu-timer.c | 65 +++++++++++++++++++++++++++++++--------------------- 4 files changed, 81 insertions(+), 26 deletions(-)