Patchwork [RFC,v2,4/5] timer: associate three timerlists with AioContext

login
register
mail settings
Submitter pingfan liu
Date July 29, 2013, 3:16 a.m.
Message ID <1375067768-11342-5-git-send-email-pingfank@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/262655/
State New
Headers show

Comments

pingfan liu - July 29, 2013, 3:16 a.m.
Currently, timers run on iothread inside QBL, this limits the usage
of timers in some case, e.g. virtio-blk-dataplane. In order to run
timers on private thread based on different clocksource, we arm each
AioContext with three timer lists in according to three clocksource
(QemuClock).

A little later, we will run timers in aio_poll.

Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>

------ issue to fix ---
Note: before this patch, there should be another one to fix the race
issue by qemu_mod_timer() and _run_timers().
---
 async.c              |  9 ++++++++
 include/block/aio.h  | 13 +++++++++++
 include/qemu/timer.h | 20 ++++++++++++++++
 qemu-timer.c         | 65 +++++++++++++++++++++++++++++++---------------------
 4 files changed, 81 insertions(+), 26 deletions(-)
Paolo Bonzini - July 29, 2013, 6:32 a.m.
Il 29/07/2013 05:16, Liu Ping Fan ha scritto:
> Currently, timers run on iothread inside QBL, this limits the usage
> of timers in some case, e.g. virtio-blk-dataplane. In order to run
> timers on private thread based on different clocksource, we arm each
> AioContext with three timer lists in according to three clocksource
> (QemuClock).
> 
> A little later, we will run timers in aio_poll.
> 
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> 
> ------ issue to fix ---
> Note: before this patch, there should be another one to fix the race
> issue by qemu_mod_timer() and _run_timers().

Another issue is that deadline computation is not using the AioContext's
timer lists.

Paolo

> ---
>  async.c              |  9 ++++++++
>  include/block/aio.h  | 13 +++++++++++
>  include/qemu/timer.h | 20 ++++++++++++++++
>  qemu-timer.c         | 65 +++++++++++++++++++++++++++++++---------------------
>  4 files changed, 81 insertions(+), 26 deletions(-)
> 
> diff --git a/async.c b/async.c
> index ba4072c..7e2340e 100644
> --- a/async.c
> +++ b/async.c
> @@ -201,11 +201,15 @@ static void
>  aio_ctx_finalize(GSource     *source)
>  {
>      AioContext *ctx = (AioContext *) source;
> +    int i;
>  
>      thread_pool_free(ctx->thread_pool);
>      aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL);
>      event_notifier_cleanup(&ctx->notifier);
>      g_array_free(ctx->pollfds, TRUE);
> +    for (i = 0; i < QEMU_CLOCK_MAXCNT; i++) {
> +        timer_list_finalize(&ctx->timer_list[i]);
> +    }
>  }
>  
>  static GSourceFuncs aio_source_funcs = {
> @@ -237,6 +241,8 @@ void aio_notify(AioContext *ctx)
>  AioContext *aio_context_new(void)
>  {
>      AioContext *ctx;
> +    int i;
> +
>      ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
>      ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
>      ctx->thread_pool = NULL;
> @@ -245,6 +251,9 @@ AioContext *aio_context_new(void)
>      aio_set_event_notifier(ctx, &ctx->notifier, 
>                             (EventNotifierHandler *)
>                             event_notifier_test_and_clear, NULL);
> +    for (i = 0; i < QEMU_CLOCK_MAXCNT; i++) {
> +        timer_list_init(&ctx->timer_list[i]);
> +    }
>  
>      return ctx;
>  }
> diff --git a/include/block/aio.h b/include/block/aio.h
> index 04598b2..cf41b42 100644
> --- a/include/block/aio.h
> +++ b/include/block/aio.h
> @@ -43,6 +43,18 @@ typedef struct AioHandler AioHandler;
>  typedef void QEMUBHFunc(void *opaque);
>  typedef void IOHandler(void *opaque);
>  
> +/* Related timer with AioContext */
> +typedef struct QEMUTimer QEMUTimer;
> +#define QEMU_CLOCK_MAXCNT 3
> +
> +typedef struct TimerList {
> +    QEMUTimer *active_timers;
> +    QemuMutex active_timers_lock;
> +} TimerList;
> +
> +void timer_list_init(TimerList *tlist);
> +void timer_list_finalize(TimerList *tlist);
> +
>  typedef struct AioContext {
>      GSource source;
>  
> @@ -73,6 +85,7 @@ typedef struct AioContext {
>  
>      /* Thread pool for performing work and receiving completion callbacks */
>      struct ThreadPool *thread_pool;
> +    TimerList timer_list[QEMU_CLOCK_MAXCNT];
>  } AioContext;
>  
>  /* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
> diff --git a/include/qemu/timer.h b/include/qemu/timer.h
> index 9dd206c..3e5016b 100644
> --- a/include/qemu/timer.h
> +++ b/include/qemu/timer.h
> @@ -33,9 +33,14 @@ extern QEMUClock *vm_clock;
>  extern QEMUClock *host_clock;
>  
>  int64_t qemu_get_clock_ns(QEMUClock *clock);
> +/* qemu_clock_has_timers, qemu_clock_expired, qemu_clock_deadline
> + * run In tcg icount mode. There is only one AioContext i.e. qemu_aio_context.
> + * So we only count the timers on qemu_aio_context.
> + */
>  int64_t qemu_clock_has_timers(QEMUClock *clock);
>  int64_t qemu_clock_expired(QEMUClock *clock);
>  int64_t qemu_clock_deadline(QEMUClock *clock);
> +
>  void qemu_clock_enable(QEMUClock *clock, bool enabled);
>  void qemu_clock_warp(QEMUClock *clock);
>  
> @@ -45,6 +50,9 @@ void qemu_unregister_clock_reset_notifier(QEMUClock *clock,
>  
>  QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale,
>                            QEMUTimerCB *cb, void *opaque);
> +QEMUTimer *aioctx_new_timer(QEMUClock *clock, int scale,
> +                          QEMUTimerCB *cb, void *opaque, AioContext *ctx);
> +
>  void qemu_free_timer(QEMUTimer *ts);
>  void qemu_del_timer(QEMUTimer *ts);
>  void qemu_mod_timer_ns(QEMUTimer *ts, int64_t expire_time);
> @@ -75,6 +83,18 @@ static inline QEMUTimer *qemu_new_timer_ms(QEMUClock *clock, QEMUTimerCB *cb,
>      return qemu_new_timer(clock, SCALE_MS, cb, opaque);
>  }
>  
> +static inline QEMUTimer *aioctx_new_timer_ns(QEMUClock *clock, QEMUTimerCB *cb,
> +                                           void *opaque, AioContext *ctx)
> +{
> +    return aioctx_new_timer(clock, SCALE_NS, cb, opaque, ctx);
> +}
> +
> +static inline QEMUTimer *aioctx_new_timer_ms(QEMUClock *clock, QEMUTimerCB *cb,
> +                                           void *opaque, AioContext *ctx)
> +{
> +    return aioctx_new_timer(clock, SCALE_MS, cb, opaque, ctx);
> +}
> +
>  static inline int64_t qemu_get_clock_ms(QEMUClock *clock)
>  {
>      return qemu_get_clock_ns(clock) / SCALE_MS;
> diff --git a/qemu-timer.c b/qemu-timer.c
> index d941a83..f15c3e6 100644
> --- a/qemu-timer.c
> +++ b/qemu-timer.c
> @@ -45,14 +45,6 @@
>  #define QEMU_CLOCK_REALTIME 0
>  #define QEMU_CLOCK_VIRTUAL  1
>  #define QEMU_CLOCK_HOST     2
> -#define QEMU_CLOCK_MAXCNT 3
> -
> -typedef struct TimerList {
> -    QEMUTimer *active_timers;
> -    QemuMutex active_timers_lock;
> -} TimerList;
> -
> -static TimerList timer_list[QEMU_CLOCK_MAXCNT];
>  
>  struct QEMUClock {
>      NotifierList reset_notifiers;
> @@ -72,7 +64,9 @@ struct QEMUClock {
>  
>  struct QEMUTimer {
>      int64_t expire_time;	/* in nanoseconds */
> +    /* quick link to AioContext timer list */
>      TimerList *list;
> +    AioContext *ctx;
>      QEMUTimerCB *cb;
>      void *opaque;
>      QEMUTimer *next;
> @@ -100,11 +94,12 @@ struct qemu_alarm_timer {
>  
>  static struct qemu_alarm_timer *alarm_timer;
>  
> -static TimerList *clock_to_timerlist(QEMUClock *clock)
> +static TimerList *clock_to_timerlist(QEMUClock *clock, AioContext *ctx)
>  {
>      int type = clock->type;
>  
> -    return  &timer_list[type];
> +    assert(ctx);
> +    return  &ctx->timer_list[type];
>  }
>  
>  static bool qemu_timer_expired_ns(QEMUTimer *timer_head, int64_t current_time)
> @@ -112,7 +107,8 @@ static bool qemu_timer_expired_ns(QEMUTimer *timer_head, int64_t current_time)
>      return timer_head && (timer_head->expire_time <= current_time);
>  }
>  
> -static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta)
> +static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta,
> +    AioContext *ctx)
>  {
>      int64_t expire_time, next;
>      bool has_timer = false;
> @@ -122,7 +118,7 @@ static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta)
>          return delta;
>      }
>  
> -    tlist = clock_to_timerlist(clock);
> +    tlist = clock_to_timerlist(clock, ctx);
>      qemu_mutex_lock(&tlist->active_timers_lock);
>      if (tlist->active_timers) {
>          has_timer = true;
> @@ -140,12 +136,13 @@ static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta)
>  static int64_t qemu_next_alarm_deadline(void)
>  {
>      int64_t delta = INT64_MAX;
> +    AioContext *ctx = *tls_get_thread_aio_context();
>  
>      if (!use_icount) {
> -        delta = qemu_next_clock_deadline(vm_clock, delta);
> +        delta = qemu_next_clock_deadline(vm_clock, delta, ctx);
>      }
> -    delta = qemu_next_clock_deadline(host_clock, delta);
> -    return qemu_next_clock_deadline(rt_clock, delta);
> +    delta = qemu_next_clock_deadline(host_clock, delta, ctx);
> +    return qemu_next_clock_deadline(rt_clock, delta, ctx);
>  }
>  
>  static void qemu_rearm_alarm_timer(struct qemu_alarm_timer *t)
> @@ -267,16 +264,21 @@ QEMUClock *rt_clock;
>  QEMUClock *vm_clock;
>  QEMUClock *host_clock;
>  
> -static void timer_list_init(TimerList *tlist)
> +void timer_list_init(TimerList *tlist)
>  {
>      qemu_mutex_init(&tlist->active_timers_lock);
>      tlist->active_timers = NULL;
>  }
>  
> +void timer_list_finalize(TimerList *tlist)
> +{
> +    qemu_mutex_destroy(&tlist->active_timers_lock);
> +    assert(!tlist->active_timers);
> +}
> +
>  static QEMUClock *qemu_new_clock(int type)
>  {
>      QEMUClock *clock;
> -    TimerList *tlist;
>  
>      clock = g_malloc0(sizeof(QEMUClock));
>      clock->type = type;
> @@ -286,8 +288,6 @@ static QEMUClock *qemu_new_clock(int type)
>      qemu_cond_init(&clock->wait_using);
>      qemu_mutex_init(&clock->lock);
>      notifier_list_init(&clock->reset_notifiers);
> -    tlist = clock_to_timerlist(clock);
> -    timer_list_init(tlist);
>  
>      return clock;
>  }
> @@ -308,10 +308,14 @@ void qemu_clock_enable(QEMUClock *clock, bool enabled)
>      }
>  }
>  
> +/* qemu_clock_has_timers, qemu_clock_expired, qemu_clock_deadline
> +  * run In tcg icount mode. There is only one AioContext i.e. qemu_aio_context.
> + * So we only count the timers on qemu_aio_context.
> +*/
>  int64_t qemu_clock_has_timers(QEMUClock *clock)
>  {
>      bool has_timers;
> -    TimerList *tlist = clock_to_timerlist(clock);
> +    TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context());
>  
>      qemu_mutex_lock(&tlist->active_timers_lock);
>      has_timers = !!tlist->active_timers;
> @@ -323,7 +327,7 @@ int64_t qemu_clock_expired(QEMUClock *clock)
>  {
>      bool has_timers;
>      int64_t expire_time;
> -    TimerList *tlist = clock_to_timerlist(clock);
> +    TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context());
>  
>      qemu_mutex_lock(&tlist->active_timers_lock);
>      has_timers = tlist->active_timers;
> @@ -339,7 +343,7 @@ int64_t qemu_clock_deadline(QEMUClock *clock)
>      int64_t delta = INT32_MAX;
>      bool has_timers;
>      int64_t expire_time;
> -    TimerList *tlist = clock_to_timerlist(clock);
> +    TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context());
>  
>      qemu_mutex_lock(&tlist->active_timers_lock);
>      has_timers = tlist->active_timers;
> @@ -355,19 +359,26 @@ int64_t qemu_clock_deadline(QEMUClock *clock)
>      return delta;
>  }
>  
> -QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale,
> -                          QEMUTimerCB *cb, void *opaque)
> +QEMUTimer *aioctx_new_timer(QEMUClock *clock, int scale,
> +                          QEMUTimerCB *cb, void *opaque, AioContext *ctx)
>  {
>      QEMUTimer *ts;
>  
>      ts = g_malloc0(sizeof(QEMUTimer));
> -    ts->list = clock_to_timerlist(clock);
> +    ts->list = clock_to_timerlist(clock, ctx);
>      ts->cb = cb;
>      ts->opaque = opaque;
>      ts->scale = scale;
> +    ts->ctx = ctx;
>      return ts;
>  }
>  
> +QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale,
> +                          QEMUTimerCB *cb, void *opaque)
> +{
> +    return aioctx_new_timer(clock, scale, cb, opaque, qemu_get_aio_context());
> +}
> +
>  void qemu_free_timer(QEMUTimer *ts)
>  {
>      g_free(ts);
> @@ -457,6 +468,7 @@ void qemu_run_timers(QEMUClock *clock)
>      QEMUTimer *ts;
>      int64_t current_time;
>      TimerList *tlist;
> +    AioContext *ctx;
>  
>      atomic_inc(&clock->using);
>      if (unlikely(!clock->enabled)) {
> @@ -465,7 +477,8 @@ void qemu_run_timers(QEMUClock *clock)
>  
>  
>      current_time = qemu_get_clock_ns(clock);
> -    tlist = clock_to_timerlist(clock);
> +    ctx = *tls_get_thread_aio_context();
> +    tlist = clock_to_timerlist(clock, ctx);
>  
>      for(;;) {
>          qemu_mutex_lock(&tlist->active_timers_lock);
>
pingfan liu - July 29, 2013, 8:20 a.m.
On Mon, Jul 29, 2013 at 2:32 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 29/07/2013 05:16, Liu Ping Fan ha scritto:
>> Currently, timers run on iothread inside QBL, this limits the usage
>> of timers in some case, e.g. virtio-blk-dataplane. In order to run
>> timers on private thread based on different clocksource, we arm each
>> AioContext with three timer lists in according to three clocksource
>> (QemuClock).
>>
>> A little later, we will run timers in aio_poll.
>>
>> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>>
>> ------ issue to fix ---
>> Note: before this patch, there should be another one to fix the race
>> issue by qemu_mod_timer() and _run_timers().
>
> Another issue is that deadline computation is not using the AioContext's
> timer lists.
>
Sorry, can not catch the meaning. When AioContext has its own timer
lists, it will have its own deadline(for ppoll timeout). So the
computation should be based on AioContext's timer lists, right?

Thanks and regards,
Pingfan
> Paolo
>
>> ---
>>  async.c              |  9 ++++++++
>>  include/block/aio.h  | 13 +++++++++++
>>  include/qemu/timer.h | 20 ++++++++++++++++
>>  qemu-timer.c         | 65 +++++++++++++++++++++++++++++++---------------------
>>  4 files changed, 81 insertions(+), 26 deletions(-)
>>
>> diff --git a/async.c b/async.c
>> index ba4072c..7e2340e 100644
>> --- a/async.c
>> +++ b/async.c
>> @@ -201,11 +201,15 @@ static void
>>  aio_ctx_finalize(GSource     *source)
>>  {
>>      AioContext *ctx = (AioContext *) source;
>> +    int i;
>>
>>      thread_pool_free(ctx->thread_pool);
>>      aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL);
>>      event_notifier_cleanup(&ctx->notifier);
>>      g_array_free(ctx->pollfds, TRUE);
>> +    for (i = 0; i < QEMU_CLOCK_MAXCNT; i++) {
>> +        timer_list_finalize(&ctx->timer_list[i]);
>> +    }
>>  }
>>
>>  static GSourceFuncs aio_source_funcs = {
>> @@ -237,6 +241,8 @@ void aio_notify(AioContext *ctx)
>>  AioContext *aio_context_new(void)
>>  {
>>      AioContext *ctx;
>> +    int i;
>> +
>>      ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
>>      ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
>>      ctx->thread_pool = NULL;
>> @@ -245,6 +251,9 @@ AioContext *aio_context_new(void)
>>      aio_set_event_notifier(ctx, &ctx->notifier,
>>                             (EventNotifierHandler *)
>>                             event_notifier_test_and_clear, NULL);
>> +    for (i = 0; i < QEMU_CLOCK_MAXCNT; i++) {
>> +        timer_list_init(&ctx->timer_list[i]);
>> +    }
>>
>>      return ctx;
>>  }
>> diff --git a/include/block/aio.h b/include/block/aio.h
>> index 04598b2..cf41b42 100644
>> --- a/include/block/aio.h
>> +++ b/include/block/aio.h
>> @@ -43,6 +43,18 @@ typedef struct AioHandler AioHandler;
>>  typedef void QEMUBHFunc(void *opaque);
>>  typedef void IOHandler(void *opaque);
>>
>> +/* Related timer with AioContext */
>> +typedef struct QEMUTimer QEMUTimer;
>> +#define QEMU_CLOCK_MAXCNT 3
>> +
>> +typedef struct TimerList {
>> +    QEMUTimer *active_timers;
>> +    QemuMutex active_timers_lock;
>> +} TimerList;
>> +
>> +void timer_list_init(TimerList *tlist);
>> +void timer_list_finalize(TimerList *tlist);
>> +
>>  typedef struct AioContext {
>>      GSource source;
>>
>> @@ -73,6 +85,7 @@ typedef struct AioContext {
>>
>>      /* Thread pool for performing work and receiving completion callbacks */
>>      struct ThreadPool *thread_pool;
>> +    TimerList timer_list[QEMU_CLOCK_MAXCNT];
>>  } AioContext;
>>
>>  /* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
>> diff --git a/include/qemu/timer.h b/include/qemu/timer.h
>> index 9dd206c..3e5016b 100644
>> --- a/include/qemu/timer.h
>> +++ b/include/qemu/timer.h
>> @@ -33,9 +33,14 @@ extern QEMUClock *vm_clock;
>>  extern QEMUClock *host_clock;
>>
>>  int64_t qemu_get_clock_ns(QEMUClock *clock);
>> +/* qemu_clock_has_timers, qemu_clock_expired, qemu_clock_deadline
>> + * run In tcg icount mode. There is only one AioContext i.e. qemu_aio_context.
>> + * So we only count the timers on qemu_aio_context.
>> + */
>>  int64_t qemu_clock_has_timers(QEMUClock *clock);
>>  int64_t qemu_clock_expired(QEMUClock *clock);
>>  int64_t qemu_clock_deadline(QEMUClock *clock);
>> +
>>  void qemu_clock_enable(QEMUClock *clock, bool enabled);
>>  void qemu_clock_warp(QEMUClock *clock);
>>
>> @@ -45,6 +50,9 @@ void qemu_unregister_clock_reset_notifier(QEMUClock *clock,
>>
>>  QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale,
>>                            QEMUTimerCB *cb, void *opaque);
>> +QEMUTimer *aioctx_new_timer(QEMUClock *clock, int scale,
>> +                          QEMUTimerCB *cb, void *opaque, AioContext *ctx);
>> +
>>  void qemu_free_timer(QEMUTimer *ts);
>>  void qemu_del_timer(QEMUTimer *ts);
>>  void qemu_mod_timer_ns(QEMUTimer *ts, int64_t expire_time);
>> @@ -75,6 +83,18 @@ static inline QEMUTimer *qemu_new_timer_ms(QEMUClock *clock, QEMUTimerCB *cb,
>>      return qemu_new_timer(clock, SCALE_MS, cb, opaque);
>>  }
>>
>> +static inline QEMUTimer *aioctx_new_timer_ns(QEMUClock *clock, QEMUTimerCB *cb,
>> +                                           void *opaque, AioContext *ctx)
>> +{
>> +    return aioctx_new_timer(clock, SCALE_NS, cb, opaque, ctx);
>> +}
>> +
>> +static inline QEMUTimer *aioctx_new_timer_ms(QEMUClock *clock, QEMUTimerCB *cb,
>> +                                           void *opaque, AioContext *ctx)
>> +{
>> +    return aioctx_new_timer(clock, SCALE_MS, cb, opaque, ctx);
>> +}
>> +
>>  static inline int64_t qemu_get_clock_ms(QEMUClock *clock)
>>  {
>>      return qemu_get_clock_ns(clock) / SCALE_MS;
>> diff --git a/qemu-timer.c b/qemu-timer.c
>> index d941a83..f15c3e6 100644
>> --- a/qemu-timer.c
>> +++ b/qemu-timer.c
>> @@ -45,14 +45,6 @@
>>  #define QEMU_CLOCK_REALTIME 0
>>  #define QEMU_CLOCK_VIRTUAL  1
>>  #define QEMU_CLOCK_HOST     2
>> -#define QEMU_CLOCK_MAXCNT 3
>> -
>> -typedef struct TimerList {
>> -    QEMUTimer *active_timers;
>> -    QemuMutex active_timers_lock;
>> -} TimerList;
>> -
>> -static TimerList timer_list[QEMU_CLOCK_MAXCNT];
>>
>>  struct QEMUClock {
>>      NotifierList reset_notifiers;
>> @@ -72,7 +64,9 @@ struct QEMUClock {
>>
>>  struct QEMUTimer {
>>      int64_t expire_time;     /* in nanoseconds */
>> +    /* quick link to AioContext timer list */
>>      TimerList *list;
>> +    AioContext *ctx;
>>      QEMUTimerCB *cb;
>>      void *opaque;
>>      QEMUTimer *next;
>> @@ -100,11 +94,12 @@ struct qemu_alarm_timer {
>>
>>  static struct qemu_alarm_timer *alarm_timer;
>>
>> -static TimerList *clock_to_timerlist(QEMUClock *clock)
>> +static TimerList *clock_to_timerlist(QEMUClock *clock, AioContext *ctx)
>>  {
>>      int type = clock->type;
>>
>> -    return  &timer_list[type];
>> +    assert(ctx);
>> +    return  &ctx->timer_list[type];
>>  }
>>
>>  static bool qemu_timer_expired_ns(QEMUTimer *timer_head, int64_t current_time)
>> @@ -112,7 +107,8 @@ static bool qemu_timer_expired_ns(QEMUTimer *timer_head, int64_t current_time)
>>      return timer_head && (timer_head->expire_time <= current_time);
>>  }
>>
>> -static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta)
>> +static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta,
>> +    AioContext *ctx)
>>  {
>>      int64_t expire_time, next;
>>      bool has_timer = false;
>> @@ -122,7 +118,7 @@ static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta)
>>          return delta;
>>      }
>>
>> -    tlist = clock_to_timerlist(clock);
>> +    tlist = clock_to_timerlist(clock, ctx);
>>      qemu_mutex_lock(&tlist->active_timers_lock);
>>      if (tlist->active_timers) {
>>          has_timer = true;
>> @@ -140,12 +136,13 @@ static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta)
>>  static int64_t qemu_next_alarm_deadline(void)
>>  {
>>      int64_t delta = INT64_MAX;
>> +    AioContext *ctx = *tls_get_thread_aio_context();
>>
>>      if (!use_icount) {
>> -        delta = qemu_next_clock_deadline(vm_clock, delta);
>> +        delta = qemu_next_clock_deadline(vm_clock, delta, ctx);
>>      }
>> -    delta = qemu_next_clock_deadline(host_clock, delta);
>> -    return qemu_next_clock_deadline(rt_clock, delta);
>> +    delta = qemu_next_clock_deadline(host_clock, delta, ctx);
>> +    return qemu_next_clock_deadline(rt_clock, delta, ctx);
>>  }
>>
>>  static void qemu_rearm_alarm_timer(struct qemu_alarm_timer *t)
>> @@ -267,16 +264,21 @@ QEMUClock *rt_clock;
>>  QEMUClock *vm_clock;
>>  QEMUClock *host_clock;
>>
>> -static void timer_list_init(TimerList *tlist)
>> +void timer_list_init(TimerList *tlist)
>>  {
>>      qemu_mutex_init(&tlist->active_timers_lock);
>>      tlist->active_timers = NULL;
>>  }
>>
>> +void timer_list_finalize(TimerList *tlist)
>> +{
>> +    qemu_mutex_destroy(&tlist->active_timers_lock);
>> +    assert(!tlist->active_timers);
>> +}
>> +
>>  static QEMUClock *qemu_new_clock(int type)
>>  {
>>      QEMUClock *clock;
>> -    TimerList *tlist;
>>
>>      clock = g_malloc0(sizeof(QEMUClock));
>>      clock->type = type;
>> @@ -286,8 +288,6 @@ static QEMUClock *qemu_new_clock(int type)
>>      qemu_cond_init(&clock->wait_using);
>>      qemu_mutex_init(&clock->lock);
>>      notifier_list_init(&clock->reset_notifiers);
>> -    tlist = clock_to_timerlist(clock);
>> -    timer_list_init(tlist);
>>
>>      return clock;
>>  }
>> @@ -308,10 +308,14 @@ void qemu_clock_enable(QEMUClock *clock, bool enabled)
>>      }
>>  }
>>
>> +/* qemu_clock_has_timers, qemu_clock_expired, qemu_clock_deadline
>> +  * run In tcg icount mode. There is only one AioContext i.e. qemu_aio_context.
>> + * So we only count the timers on qemu_aio_context.
>> +*/
>>  int64_t qemu_clock_has_timers(QEMUClock *clock)
>>  {
>>      bool has_timers;
>> -    TimerList *tlist = clock_to_timerlist(clock);
>> +    TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context());
>>
>>      qemu_mutex_lock(&tlist->active_timers_lock);
>>      has_timers = !!tlist->active_timers;
>> @@ -323,7 +327,7 @@ int64_t qemu_clock_expired(QEMUClock *clock)
>>  {
>>      bool has_timers;
>>      int64_t expire_time;
>> -    TimerList *tlist = clock_to_timerlist(clock);
>> +    TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context());
>>
>>      qemu_mutex_lock(&tlist->active_timers_lock);
>>      has_timers = tlist->active_timers;
>> @@ -339,7 +343,7 @@ int64_t qemu_clock_deadline(QEMUClock *clock)
>>      int64_t delta = INT32_MAX;
>>      bool has_timers;
>>      int64_t expire_time;
>> -    TimerList *tlist = clock_to_timerlist(clock);
>> +    TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context());
>>
>>      qemu_mutex_lock(&tlist->active_timers_lock);
>>      has_timers = tlist->active_timers;
>> @@ -355,19 +359,26 @@ int64_t qemu_clock_deadline(QEMUClock *clock)
>>      return delta;
>>  }
>>
>> -QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale,
>> -                          QEMUTimerCB *cb, void *opaque)
>> +QEMUTimer *aioctx_new_timer(QEMUClock *clock, int scale,
>> +                          QEMUTimerCB *cb, void *opaque, AioContext *ctx)
>>  {
>>      QEMUTimer *ts;
>>
>>      ts = g_malloc0(sizeof(QEMUTimer));
>> -    ts->list = clock_to_timerlist(clock);
>> +    ts->list = clock_to_timerlist(clock, ctx);
>>      ts->cb = cb;
>>      ts->opaque = opaque;
>>      ts->scale = scale;
>> +    ts->ctx = ctx;
>>      return ts;
>>  }
>>
>> +QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale,
>> +                          QEMUTimerCB *cb, void *opaque)
>> +{
>> +    return aioctx_new_timer(clock, scale, cb, opaque, qemu_get_aio_context());
>> +}
>> +
>>  void qemu_free_timer(QEMUTimer *ts)
>>  {
>>      g_free(ts);
>> @@ -457,6 +468,7 @@ void qemu_run_timers(QEMUClock *clock)
>>      QEMUTimer *ts;
>>      int64_t current_time;
>>      TimerList *tlist;
>> +    AioContext *ctx;
>>
>>      atomic_inc(&clock->using);
>>      if (unlikely(!clock->enabled)) {
>> @@ -465,7 +477,8 @@ void qemu_run_timers(QEMUClock *clock)
>>
>>
>>      current_time = qemu_get_clock_ns(clock);
>> -    tlist = clock_to_timerlist(clock);
>> +    ctx = *tls_get_thread_aio_context();
>> +    tlist = clock_to_timerlist(clock, ctx);
>>
>>      for(;;) {
>>          qemu_mutex_lock(&tlist->active_timers_lock);
>>
>
Paolo Bonzini - July 29, 2013, 1:11 p.m.
Il 29/07/2013 10:20, liu ping fan ha scritto:
>> > Another issue is that deadline computation is not using the AioContext's
>> > timer lists.
>> >
> Sorry, can not catch the meaning. When AioContext has its own timer
> lists, it will have its own deadline(for ppoll timeout). So the
> computation should be based on AioContext's timer lists, right?

Associating the main loop's timer lists to other AioContexts is really
really wrong...  It is _already_ wrong to run timers in the main
AioContext's aio_poll (because timers may not be ready to run from an
arbitrary aio_poll), it is even worse to run them in other threads
without taking the BQL.

What exactly is the purpose of this series?

Paolo
pingfan liu - July 30, 2013, 2:35 a.m.
On Mon, Jul 29, 2013 at 9:11 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 29/07/2013 10:20, liu ping fan ha scritto:
>>> > Another issue is that deadline computation is not using the AioContext's
>>> > timer lists.
>>> >
>> Sorry, can not catch the meaning. When AioContext has its own timer
>> lists, it will have its own deadline(for ppoll timeout). So the
>> computation should be based on AioContext's timer lists, right?
>
> Associating the main loop's timer lists to other AioContexts is really

Main loop's timers will only be associated with the qemu_aio_context,
not with other AioContext. (When creating timer, we have bound it with
a specified AioContext, so it will only run on that AioContext)

> really wrong...  It is _already_ wrong to run timers in the main
> AioContext's aio_poll (because timers may not be ready to run from an

Sorry, but except that timer cb will call aio_poll, any other reason ?
> arbitrary aio_poll), it is even worse to run them in other threads
> without taking the BQL.
>
Can be fixed by some method to sync  the dedicated thread with vcpus?

> What exactly is the purpose of this series?
>
Enable timers to run on the dedicated threads(using aio as the
mini-loop), currently for the following devices,
    virtio-blk-dataplane for throttling
    hpet for the best-effort resolution

Thanks and regards,
Pingfan
> Paolo

Patch

diff --git a/async.c b/async.c
index ba4072c..7e2340e 100644
--- a/async.c
+++ b/async.c
@@ -201,11 +201,15 @@  static void
 aio_ctx_finalize(GSource     *source)
 {
     AioContext *ctx = (AioContext *) source;
+    int i;
 
     thread_pool_free(ctx->thread_pool);
     aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL);
     event_notifier_cleanup(&ctx->notifier);
     g_array_free(ctx->pollfds, TRUE);
+    for (i = 0; i < QEMU_CLOCK_MAXCNT; i++) {
+        timer_list_finalize(&ctx->timer_list[i]);
+    }
 }
 
 static GSourceFuncs aio_source_funcs = {
@@ -237,6 +241,8 @@  void aio_notify(AioContext *ctx)
 AioContext *aio_context_new(void)
 {
     AioContext *ctx;
+    int i;
+
     ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
     ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
     ctx->thread_pool = NULL;
@@ -245,6 +251,9 @@  AioContext *aio_context_new(void)
     aio_set_event_notifier(ctx, &ctx->notifier, 
                            (EventNotifierHandler *)
                            event_notifier_test_and_clear, NULL);
+    for (i = 0; i < QEMU_CLOCK_MAXCNT; i++) {
+        timer_list_init(&ctx->timer_list[i]);
+    }
 
     return ctx;
 }
diff --git a/include/block/aio.h b/include/block/aio.h
index 04598b2..cf41b42 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -43,6 +43,18 @@  typedef struct AioHandler AioHandler;
 typedef void QEMUBHFunc(void *opaque);
 typedef void IOHandler(void *opaque);
 
+/* Related timer with AioContext */
+typedef struct QEMUTimer QEMUTimer;
+#define QEMU_CLOCK_MAXCNT 3
+
+typedef struct TimerList {
+    QEMUTimer *active_timers;
+    QemuMutex active_timers_lock;
+} TimerList;
+
+void timer_list_init(TimerList *tlist);
+void timer_list_finalize(TimerList *tlist);
+
 typedef struct AioContext {
     GSource source;
 
@@ -73,6 +85,7 @@  typedef struct AioContext {
 
     /* Thread pool for performing work and receiving completion callbacks */
     struct ThreadPool *thread_pool;
+    TimerList timer_list[QEMU_CLOCK_MAXCNT];
 } AioContext;
 
 /* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index 9dd206c..3e5016b 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -33,9 +33,14 @@  extern QEMUClock *vm_clock;
 extern QEMUClock *host_clock;
 
 int64_t qemu_get_clock_ns(QEMUClock *clock);
+/* qemu_clock_has_timers, qemu_clock_expired, qemu_clock_deadline
+ * run In tcg icount mode. There is only one AioContext i.e. qemu_aio_context.
+ * So we only count the timers on qemu_aio_context.
+ */
 int64_t qemu_clock_has_timers(QEMUClock *clock);
 int64_t qemu_clock_expired(QEMUClock *clock);
 int64_t qemu_clock_deadline(QEMUClock *clock);
+
 void qemu_clock_enable(QEMUClock *clock, bool enabled);
 void qemu_clock_warp(QEMUClock *clock);
 
@@ -45,6 +50,9 @@  void qemu_unregister_clock_reset_notifier(QEMUClock *clock,
 
 QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale,
                           QEMUTimerCB *cb, void *opaque);
+QEMUTimer *aioctx_new_timer(QEMUClock *clock, int scale,
+                          QEMUTimerCB *cb, void *opaque, AioContext *ctx);
+
 void qemu_free_timer(QEMUTimer *ts);
 void qemu_del_timer(QEMUTimer *ts);
 void qemu_mod_timer_ns(QEMUTimer *ts, int64_t expire_time);
@@ -75,6 +83,18 @@  static inline QEMUTimer *qemu_new_timer_ms(QEMUClock *clock, QEMUTimerCB *cb,
     return qemu_new_timer(clock, SCALE_MS, cb, opaque);
 }
 
+static inline QEMUTimer *aioctx_new_timer_ns(QEMUClock *clock, QEMUTimerCB *cb,
+                                           void *opaque, AioContext *ctx)
+{
+    return aioctx_new_timer(clock, SCALE_NS, cb, opaque, ctx);
+}
+
+static inline QEMUTimer *aioctx_new_timer_ms(QEMUClock *clock, QEMUTimerCB *cb,
+                                           void *opaque, AioContext *ctx)
+{
+    return aioctx_new_timer(clock, SCALE_MS, cb, opaque, ctx);
+}
+
 static inline int64_t qemu_get_clock_ms(QEMUClock *clock)
 {
     return qemu_get_clock_ns(clock) / SCALE_MS;
diff --git a/qemu-timer.c b/qemu-timer.c
index d941a83..f15c3e6 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -45,14 +45,6 @@ 
 #define QEMU_CLOCK_REALTIME 0
 #define QEMU_CLOCK_VIRTUAL  1
 #define QEMU_CLOCK_HOST     2
-#define QEMU_CLOCK_MAXCNT 3
-
-typedef struct TimerList {
-    QEMUTimer *active_timers;
-    QemuMutex active_timers_lock;
-} TimerList;
-
-static TimerList timer_list[QEMU_CLOCK_MAXCNT];
 
 struct QEMUClock {
     NotifierList reset_notifiers;
@@ -72,7 +64,9 @@  struct QEMUClock {
 
 struct QEMUTimer {
     int64_t expire_time;	/* in nanoseconds */
+    /* quick link to AioContext timer list */
     TimerList *list;
+    AioContext *ctx;
     QEMUTimerCB *cb;
     void *opaque;
     QEMUTimer *next;
@@ -100,11 +94,12 @@  struct qemu_alarm_timer {
 
 static struct qemu_alarm_timer *alarm_timer;
 
-static TimerList *clock_to_timerlist(QEMUClock *clock)
+static TimerList *clock_to_timerlist(QEMUClock *clock, AioContext *ctx)
 {
     int type = clock->type;
 
-    return  &timer_list[type];
+    assert(ctx);
+    return  &ctx->timer_list[type];
 }
 
 static bool qemu_timer_expired_ns(QEMUTimer *timer_head, int64_t current_time)
@@ -112,7 +107,8 @@  static bool qemu_timer_expired_ns(QEMUTimer *timer_head, int64_t current_time)
     return timer_head && (timer_head->expire_time <= current_time);
 }
 
-static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta)
+static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta,
+    AioContext *ctx)
 {
     int64_t expire_time, next;
     bool has_timer = false;
@@ -122,7 +118,7 @@  static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta)
         return delta;
     }
 
-    tlist = clock_to_timerlist(clock);
+    tlist = clock_to_timerlist(clock, ctx);
     qemu_mutex_lock(&tlist->active_timers_lock);
     if (tlist->active_timers) {
         has_timer = true;
@@ -140,12 +136,13 @@  static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta)
 static int64_t qemu_next_alarm_deadline(void)
 {
     int64_t delta = INT64_MAX;
+    AioContext *ctx = *tls_get_thread_aio_context();
 
     if (!use_icount) {
-        delta = qemu_next_clock_deadline(vm_clock, delta);
+        delta = qemu_next_clock_deadline(vm_clock, delta, ctx);
     }
-    delta = qemu_next_clock_deadline(host_clock, delta);
-    return qemu_next_clock_deadline(rt_clock, delta);
+    delta = qemu_next_clock_deadline(host_clock, delta, ctx);
+    return qemu_next_clock_deadline(rt_clock, delta, ctx);
 }
 
 static void qemu_rearm_alarm_timer(struct qemu_alarm_timer *t)
@@ -267,16 +264,21 @@  QEMUClock *rt_clock;
 QEMUClock *vm_clock;
 QEMUClock *host_clock;
 
-static void timer_list_init(TimerList *tlist)
+void timer_list_init(TimerList *tlist)
 {
     qemu_mutex_init(&tlist->active_timers_lock);
     tlist->active_timers = NULL;
 }
 
+void timer_list_finalize(TimerList *tlist)
+{
+    qemu_mutex_destroy(&tlist->active_timers_lock);
+    assert(!tlist->active_timers);
+}
+
 static QEMUClock *qemu_new_clock(int type)
 {
     QEMUClock *clock;
-    TimerList *tlist;
 
     clock = g_malloc0(sizeof(QEMUClock));
     clock->type = type;
@@ -286,8 +288,6 @@  static QEMUClock *qemu_new_clock(int type)
     qemu_cond_init(&clock->wait_using);
     qemu_mutex_init(&clock->lock);
     notifier_list_init(&clock->reset_notifiers);
-    tlist = clock_to_timerlist(clock);
-    timer_list_init(tlist);
 
     return clock;
 }
@@ -308,10 +308,14 @@  void qemu_clock_enable(QEMUClock *clock, bool enabled)
     }
 }
 
+/* qemu_clock_has_timers, qemu_clock_expired, qemu_clock_deadline
+  * run In tcg icount mode. There is only one AioContext i.e. qemu_aio_context.
+ * So we only count the timers on qemu_aio_context.
+*/
 int64_t qemu_clock_has_timers(QEMUClock *clock)
 {
     bool has_timers;
-    TimerList *tlist = clock_to_timerlist(clock);
+    TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context());
 
     qemu_mutex_lock(&tlist->active_timers_lock);
     has_timers = !!tlist->active_timers;
@@ -323,7 +327,7 @@  int64_t qemu_clock_expired(QEMUClock *clock)
 {
     bool has_timers;
     int64_t expire_time;
-    TimerList *tlist = clock_to_timerlist(clock);
+    TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context());
 
     qemu_mutex_lock(&tlist->active_timers_lock);
     has_timers = tlist->active_timers;
@@ -339,7 +343,7 @@  int64_t qemu_clock_deadline(QEMUClock *clock)
     int64_t delta = INT32_MAX;
     bool has_timers;
     int64_t expire_time;
-    TimerList *tlist = clock_to_timerlist(clock);
+    TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context());
 
     qemu_mutex_lock(&tlist->active_timers_lock);
     has_timers = tlist->active_timers;
@@ -355,19 +359,26 @@  int64_t qemu_clock_deadline(QEMUClock *clock)
     return delta;
 }
 
-QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale,
-                          QEMUTimerCB *cb, void *opaque)
+QEMUTimer *aioctx_new_timer(QEMUClock *clock, int scale,
+                          QEMUTimerCB *cb, void *opaque, AioContext *ctx)
 {
     QEMUTimer *ts;
 
     ts = g_malloc0(sizeof(QEMUTimer));
-    ts->list = clock_to_timerlist(clock);
+    ts->list = clock_to_timerlist(clock, ctx);
     ts->cb = cb;
     ts->opaque = opaque;
     ts->scale = scale;
+    ts->ctx = ctx;
     return ts;
 }
 
+QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale,
+                          QEMUTimerCB *cb, void *opaque)
+{
+    return aioctx_new_timer(clock, scale, cb, opaque, qemu_get_aio_context());
+}
+
 void qemu_free_timer(QEMUTimer *ts)
 {
     g_free(ts);
@@ -457,6 +468,7 @@  void qemu_run_timers(QEMUClock *clock)
     QEMUTimer *ts;
     int64_t current_time;
     TimerList *tlist;
+    AioContext *ctx;
 
     atomic_inc(&clock->using);
     if (unlikely(!clock->enabled)) {
@@ -465,7 +477,8 @@  void qemu_run_timers(QEMUClock *clock)
 
 
     current_time = qemu_get_clock_ns(clock);
-    tlist = clock_to_timerlist(clock);
+    ctx = *tls_get_thread_aio_context();
+    tlist = clock_to_timerlist(clock, ctx);
 
     for(;;) {
         qemu_mutex_lock(&tlist->active_timers_lock);