Patchwork qemu-clock: add an alarm timer based on timerfd

login
register
mail settings
Submitter Anthony Liguori
Date Sept. 18, 2012, 8:37 p.m.
Message ID <1348000626-16129-1-git-send-email-aliguori@us.ibm.com>
Download mbox | patch
Permalink /patch/184849/
State New
Headers show

Comments

Anthony Liguori - Sept. 18, 2012, 8:37 p.m.
timerfd is a new(ish) mechanism provided in the 2.6.25+ Linux kernels that
allows for a high resolution event timer signaled through a file descriptor.
This patch adds an alarm timer implementation using timerfd.

timerfd has a couple advantages over dynticks.  Namely, it's fd based instead
of signal based which reduces the likelihood of receiving EINTR failure codes.

Because we can't process timers in a signal handler, we write to a pipe() and
read that in the main loop.  timerfd allows us to avoid the extra write to a
pipe.

Finally, since timerfd is just a file descriptor, it could conceivably used to
implement per-thread alarm timers.  It also would integrate very easily in a
glib main loop.

Unfortunately, there's a lot of Windows code in qemu-timer.c and main-loop.c
right now otherwise the refactoring would be trivial.  I'll leave that for
another day.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
---
Please note, this is lightly tested.  Since this is such a fundamental change,
I'd like to do some performance analysis before committing but wanted to share
early.
---
 configure    |   14 +++++++++
 qemu-timer.c |   86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 100 insertions(+), 0 deletions(-)
Peter Portante - Sept. 19, 2012, 2:20 p.m.
On Tue, Sep 18, 2012 at 4:37 PM, Anthony Liguori <aliguori@us.ibm.com>wrote:

> timerfd is a new(ish) mechanism provided in the 2.6.25+ Linux kernels that
> allows for a high resolution event timer signaled through a file
> descriptor.
> This patch adds an alarm timer implementation using timerfd.
>
> timerfd has a couple advantages over dynticks.  Namely, it's fd based
> instead
> of signal based which reduces the likelihood of receiving EINTR failure
> codes.
>
> Because we can't process timers in a signal handler, we write to a pipe()
> and
> read that in the main loop.  timerfd allows us to avoid the extra write to
> a
> pipe.
>
> Finally, since timerfd is just a file descriptor, it could conceivably
> used to
> implement per-thread alarm timers.  It also would integrate very easily in
> a
> glib main loop.
>
> Unfortunately, there's a lot of Windows code in qemu-timer.c and
> main-loop.c
> right now otherwise the refactoring would be trivial.  I'll leave that for
> another day.
>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Jan Kiszka <jan.kiszka@siemens.com>
> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
> ---
> Please note, this is lightly tested.  Since this is such a fundamental
> change,
> I'd like to do some performance analysis before committing but wanted to
> share
> early.
> ---
>  configure    |   14 +++++++++
>  qemu-timer.c |   86
> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 100 insertions(+), 0 deletions(-)
>
> diff --git a/configure b/configure
> index 8564142..8a3135b 100755
> --- a/configure
> +++ b/configure
> @@ -2532,6 +2532,17 @@ if compile_prog "" "" ; then
>    sync_file_range=yes
>  fi
>
> +# check for timerfd
> +timerfd="no"
> +cat > $TMPC << EOF
> +#include <sys/timerfd.h>
> +int main(void) { return timerfd_create(CLOCK_REALTIME, 0); }
> +EOF
> +
> +if compile_prog "" "" ; then
> +  timerfd=yes
> +fi
> +
>  # check for linux/fiemap.h and FS_IOC_FIEMAP
>  fiemap=no
>  cat > $TMPC << EOF
> @@ -3467,6 +3478,9 @@ fi
>  if test "$signalfd" = "yes" ; then
>    echo "CONFIG_SIGNALFD=y" >> $config_host_mak
>  fi
> +if test "$timerfd" = "yes" ; then
> +  echo "CONFIG_TIMERFD=y" >> $config_host_mak
> +fi
>  if test "$tcg_interpreter" = "yes" ; then
>    echo "CONFIG_TCG_INTERPRETER=y" >> $config_host_mak
>  fi
> diff --git a/qemu-timer.c b/qemu-timer.c
> index c7a1551..6f7fa35 100644
> --- a/qemu-timer.c
> +++ b/qemu-timer.c
> @@ -35,6 +35,10 @@
>  #include <mmsystem.h>
>  #endif
>
> +#ifdef CONFIG_TIMERFD
> +#include <sys/timerfd.h>
> +#endif
> +
>  /***********************************************************/
>  /* timers */
>
> @@ -66,6 +70,9 @@ struct qemu_alarm_timer {
>      int (*start)(struct qemu_alarm_timer *t);
>      void (*stop)(struct qemu_alarm_timer *t);
>      void (*rearm)(struct qemu_alarm_timer *t, int64_t nearest_delta_ns);
> +#ifdef CONFIG_TIMERFD
> +    int timerfd;
> +#endif
>  #if defined(__linux__)
>      timer_t timer;
>      int fd;
> @@ -121,6 +128,12 @@ static void qemu_rearm_alarm_timer(struct
> qemu_alarm_timer *t)
>  /* TODO: MIN_TIMER_REARM_NS should be optimized */
>  #define MIN_TIMER_REARM_NS 250000
>
> +#ifdef CONFIG_TIMERFD
> +static int timerfd_start_timer(struct qemu_alarm_timer *t);
> +static void timerfd_stop_timer(struct qemu_alarm_timer *t);
> +static void timerfd_rearm_timer(struct qemu_alarm_timer *t, int64_t
> delta);
> +#endif
> +
>  #ifdef _WIN32
>
>  static int mm_start_timer(struct qemu_alarm_timer *t);
> @@ -148,6 +161,10 @@ static void dynticks_rearm_timer(struct
> qemu_alarm_timer *t, int64_t delta);
>  #endif /* _WIN32 */
>
>  static struct qemu_alarm_timer alarm_timers[] = {
> +#ifdef CONFIG_TIMERFD
> +    {"timerfd", timerfd_start_timer,
> +     timerfd_stop_timer, timerfd_rearm_timer},
> +#endif
>  #ifndef _WIN32
>  #ifdef __linux__
>      {"dynticks", dynticks_start_timer,
> @@ -476,6 +493,75 @@ static void host_alarm_handler(int host_signum)
>
>  #include "compatfd.h"
>
> +static void timerfd_event(void *opaque)
> +{
> +    struct qemu_alarm_timer *t = opaque;
> +    ssize_t len;
> +    uint64_t count;
> +
> +    len = read(t->timerfd, &count, sizeof(count));
> +    g_assert(len == sizeof(count));
> +
> +    t->expired = true;
> +    t->pending = true;
> +
> +    /* We already process pending timers at the end of the main loop
> whereas
> +     * this function is called during I/O processing.  That means we know
> that
> +     * pending timers will be checked before select()'ing again which
> means we
> +     * don't need to explicitly call qemu_notify_event()
> +     */
> +}
> +
> +static int timerfd_start_timer(struct qemu_alarm_timer *t)
> +{
> +    t->timerfd = timerfd_create(CLOCK_REALTIME, TFD_CLOEXEC);
> +    if (t->timerfd == -1) {
> +        return -errno;
> +    }
> +
> +    qemu_set_fd_handler(t->timerfd, timerfd_event, NULL, t);
> +
> +    return 0;
> +}
> +
> +static void timerfd_stop_timer(struct qemu_alarm_timer *t)
> +{
> +    qemu_set_fd_handler(t->timerfd, NULL, NULL, NULL);
> +    close(t->timerfd);
> +    t->timerfd = -1;
> +}
> +
> +static void timerfd_rearm_timer(struct qemu_alarm_timer *t,
> +                                int64_t nearest_delta_ns)
> +{
> +    int ret;
> +    struct itimerspec timeout;
> +    int64_t current_ns;
> +
> +    if (nearest_delta_ns < MIN_TIMER_REARM_NS) {
> +        nearest_delta_ns = MIN_TIMER_REARM_NS;
> +    }
>

It has been a few months, but IIRC, this pattern can lead to spurious
timeouts if these methods are not used correctly. If folks are interested,
I can spend some time to dig up my past work on this to explain more.


> +    /* check whether a timer is already running */
> +    ret = timerfd_gettime(t->timerfd, &timeout);
> +    g_assert(ret == 0);
> +
> +    current_ns = timeout.it_value.tv_sec * 1000000000LL;
> +    current_ns += timeout.it_value.tv_nsec;
> +
> +    if (current_ns && current_ns <= nearest_delta_ns) {
> +        return;
> +    }
> +
> +    timeout.it_interval.tv_sec = 0;
> +    timeout.it_interval.tv_nsec = 0; /* 0 for one-shot timer */
> +    timeout.it_value.tv_sec =  nearest_delta_ns / 1000000000;
> +    timeout.it_value.tv_nsec = nearest_delta_ns % 1000000000;
> +
> +    ret = timerfd_settime(t->timerfd, 0, &timeout, NULL);
> +    g_assert(ret == 0);
> +}
> +
>  static int dynticks_start_timer(struct qemu_alarm_timer *t)
>  {
>      struct sigevent ev;
> --
> 1.7.5.4
>
>
>

Patch

diff --git a/configure b/configure
index 8564142..8a3135b 100755
--- a/configure
+++ b/configure
@@ -2532,6 +2532,17 @@  if compile_prog "" "" ; then
   sync_file_range=yes
 fi
 
+# check for timerfd
+timerfd="no"
+cat > $TMPC << EOF
+#include <sys/timerfd.h>
+int main(void) { return timerfd_create(CLOCK_REALTIME, 0); }
+EOF
+
+if compile_prog "" "" ; then
+  timerfd=yes
+fi
+
 # check for linux/fiemap.h and FS_IOC_FIEMAP
 fiemap=no
 cat > $TMPC << EOF
@@ -3467,6 +3478,9 @@  fi
 if test "$signalfd" = "yes" ; then
   echo "CONFIG_SIGNALFD=y" >> $config_host_mak
 fi
+if test "$timerfd" = "yes" ; then
+  echo "CONFIG_TIMERFD=y" >> $config_host_mak
+fi
 if test "$tcg_interpreter" = "yes" ; then
   echo "CONFIG_TCG_INTERPRETER=y" >> $config_host_mak
 fi
diff --git a/qemu-timer.c b/qemu-timer.c
index c7a1551..6f7fa35 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -35,6 +35,10 @@ 
 #include <mmsystem.h>
 #endif
 
+#ifdef CONFIG_TIMERFD
+#include <sys/timerfd.h>
+#endif
+
 /***********************************************************/
 /* timers */
 
@@ -66,6 +70,9 @@  struct qemu_alarm_timer {
     int (*start)(struct qemu_alarm_timer *t);
     void (*stop)(struct qemu_alarm_timer *t);
     void (*rearm)(struct qemu_alarm_timer *t, int64_t nearest_delta_ns);
+#ifdef CONFIG_TIMERFD
+    int timerfd;
+#endif
 #if defined(__linux__)
     timer_t timer;
     int fd;
@@ -121,6 +128,12 @@  static void qemu_rearm_alarm_timer(struct qemu_alarm_timer *t)
 /* TODO: MIN_TIMER_REARM_NS should be optimized */
 #define MIN_TIMER_REARM_NS 250000
 
+#ifdef CONFIG_TIMERFD
+static int timerfd_start_timer(struct qemu_alarm_timer *t);
+static void timerfd_stop_timer(struct qemu_alarm_timer *t);
+static void timerfd_rearm_timer(struct qemu_alarm_timer *t, int64_t delta);
+#endif
+
 #ifdef _WIN32
 
 static int mm_start_timer(struct qemu_alarm_timer *t);
@@ -148,6 +161,10 @@  static void dynticks_rearm_timer(struct qemu_alarm_timer *t, int64_t delta);
 #endif /* _WIN32 */
 
 static struct qemu_alarm_timer alarm_timers[] = {
+#ifdef CONFIG_TIMERFD
+    {"timerfd", timerfd_start_timer,
+     timerfd_stop_timer, timerfd_rearm_timer},
+#endif
 #ifndef _WIN32
 #ifdef __linux__
     {"dynticks", dynticks_start_timer,
@@ -476,6 +493,75 @@  static void host_alarm_handler(int host_signum)
 
 #include "compatfd.h"
 
+static void timerfd_event(void *opaque)
+{
+    struct qemu_alarm_timer *t = opaque;
+    ssize_t len;
+    uint64_t count;
+
+    len = read(t->timerfd, &count, sizeof(count));
+    g_assert(len == sizeof(count));
+
+    t->expired = true;
+    t->pending = true;
+
+    /* We already process pending timers at the end of the main loop whereas
+     * this function is called during I/O processing.  That means we know that
+     * pending timers will be checked before select()'ing again which means we
+     * don't need to explicitly call qemu_notify_event()
+     */
+}
+
+static int timerfd_start_timer(struct qemu_alarm_timer *t)
+{
+    t->timerfd = timerfd_create(CLOCK_REALTIME, TFD_CLOEXEC);
+    if (t->timerfd == -1) {
+        return -errno;
+    }
+
+    qemu_set_fd_handler(t->timerfd, timerfd_event, NULL, t);
+
+    return 0;
+}
+
+static void timerfd_stop_timer(struct qemu_alarm_timer *t)
+{
+    qemu_set_fd_handler(t->timerfd, NULL, NULL, NULL);
+    close(t->timerfd);
+    t->timerfd = -1;
+}
+
+static void timerfd_rearm_timer(struct qemu_alarm_timer *t,
+                                int64_t nearest_delta_ns)
+{
+    int ret;
+    struct itimerspec timeout;
+    int64_t current_ns;
+
+    if (nearest_delta_ns < MIN_TIMER_REARM_NS) {
+        nearest_delta_ns = MIN_TIMER_REARM_NS;
+    }
+
+    /* check whether a timer is already running */
+    ret = timerfd_gettime(t->timerfd, &timeout);
+    g_assert(ret == 0);
+
+    current_ns = timeout.it_value.tv_sec * 1000000000LL;
+    current_ns += timeout.it_value.tv_nsec;
+
+    if (current_ns && current_ns <= nearest_delta_ns) {
+        return;
+    }
+
+    timeout.it_interval.tv_sec = 0;
+    timeout.it_interval.tv_nsec = 0; /* 0 for one-shot timer */
+    timeout.it_value.tv_sec =  nearest_delta_ns / 1000000000;
+    timeout.it_value.tv_nsec = nearest_delta_ns % 1000000000;
+
+    ret = timerfd_settime(t->timerfd, 0, &timeout, NULL);
+    g_assert(ret == 0);
+}
+
 static int dynticks_start_timer(struct qemu_alarm_timer *t)
 {
     struct sigevent ev;