Patchwork coroutine: switch per-thread free pool to a global pool

login
register
mail settings
Submitter Avi Kivity
Date Dec. 5, 2011, 5:20 p.m.
Message ID <1323105612-32419-1-git-send-email-avi@redhat.com>
Download mbox | patch
Permalink /patch/129372/
State New
Headers show

Comments

Avi Kivity - Dec. 5, 2011, 5:20 p.m.
ucontext-based coroutines use a free pool to reduce allocations and
deallocations of coroutine objects.  The pool is per-thread, presumably
to improve locality.  However, as coroutines are usually allocated in
a vcpu thread and freed in the I/O thread, the pool accounting gets
screwed up and we end allocating and freeing a coroutine for every I/O
request.  This is expensive since large objects are allocated via the
kernel, and are not cached by the C runtime.

Fix by switching to a global pool.  This is safe since we're protected
by the global mutex.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 coroutine-ucontext.c |   30 ++++++++++++++++--------------
 1 files changed, 16 insertions(+), 14 deletions(-)
Stefan Hajnoczi - Dec. 6, 2011, 1:25 p.m.
On Mon, Dec 5, 2011 at 5:20 PM, Avi Kivity <avi@redhat.com> wrote:
> ucontext-based coroutines use a free pool to reduce allocations and
> deallocations of coroutine objects.  The pool is per-thread, presumably
> to improve locality.  However, as coroutines are usually allocated in
> a vcpu thread and freed in the I/O thread, the pool accounting gets
> screwed up and we end allocating and freeing a coroutine for every I/O
> request.  This is expensive since large objects are allocated via the
> kernel, and are not cached by the C runtime.
>
> Fix by switching to a global pool.  This is safe since we're protected
> by the global mutex.

Looks good to me.  I did check how hw/9pfs/ uses coroutines because it
bounces them into worker threads that are not under the QEMU mutex but
they are not created/destroyed there so we should be okay.

Stefan

Patch

diff --git a/coroutine-ucontext.c b/coroutine-ucontext.c
index 2b8d3e9..3d01075 100644
--- a/coroutine-ucontext.c
+++ b/coroutine-ucontext.c
@@ -35,6 +35,10 @@  enum {
     POOL_MAX_SIZE = 64,
 };
 
+/** Free list to speed up creation */
+static QLIST_HEAD(, Coroutine) pool = QLIST_HEAD_INITIALIZER(pool);
+static unsigned int pool_size;
+
 typedef struct {
     Coroutine base;
     void *stack;
@@ -48,10 +52,6 @@  enum {
     /** Currently executing coroutine */
     Coroutine *current;
 
-    /** Free list to speed up creation */
-    QLIST_HEAD(, Coroutine) pool;
-    unsigned int pool_size;
-
     /** The default coroutine */
     CoroutineUContext leader;
 } CoroutineThreadState;
@@ -75,7 +75,6 @@  enum {
     if (!s) {
         s = g_malloc0(sizeof(*s));
         s->current = &s->leader.base;
-        QLIST_INIT(&s->pool);
         pthread_setspecific(thread_state_key, s);
     }
     return s;
@@ -84,14 +83,19 @@  enum {
 static void qemu_coroutine_thread_cleanup(void *opaque)
 {
     CoroutineThreadState *s = opaque;
+
+    g_free(s);
+}
+
+static void __attribute__((destructor)) coroutine_cleanup(void)
+{
     Coroutine *co;
     Coroutine *tmp;
 
-    QLIST_FOREACH_SAFE(co, &s->pool, pool_next, tmp) {
+    QLIST_FOREACH_SAFE(co, &pool, pool_next, tmp) {
         g_free(DO_UPCAST(CoroutineUContext, base, co)->stack);
         g_free(co);
     }
-    g_free(s);
 }
 
 static void __attribute__((constructor)) coroutine_init(void)
@@ -169,13 +173,12 @@  static void coroutine_trampoline(int i0, int i1)
 
 Coroutine *qemu_coroutine_new(void)
 {
-    CoroutineThreadState *s = coroutine_get_thread_state();
     Coroutine *co;
 
-    co = QLIST_FIRST(&s->pool);
+    co = QLIST_FIRST(&pool);
     if (co) {
         QLIST_REMOVE(co, pool_next);
-        s->pool_size--;
+        pool_size--;
     } else {
         co = coroutine_new();
     }
@@ -184,13 +187,12 @@  static void coroutine_trampoline(int i0, int i1)
 
 void qemu_coroutine_delete(Coroutine *co_)
 {
-    CoroutineThreadState *s = coroutine_get_thread_state();
     CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
 
-    if (s->pool_size < POOL_MAX_SIZE) {
-        QLIST_INSERT_HEAD(&s->pool, &co->base, pool_next);
+    if (pool_size < POOL_MAX_SIZE) {
+        QLIST_INSERT_HEAD(&pool, &co->base, pool_next);
         co->base.caller = NULL;
-        s->pool_size++;
+        pool_size++;
         return;
     }