@@ -35,6 +35,10 @@ enum {
POOL_MAX_SIZE = 64,
};
+/** Free list to speed up creation */
+static QLIST_HEAD(, Coroutine) pool = QLIST_HEAD_INITIALIZER(pool);
+static unsigned int pool_size;
+
typedef struct {
Coroutine base;
void *stack;
@@ -48,10 +52,6 @@ enum {
/** Currently executing coroutine */
Coroutine *current;
- /** Free list to speed up creation */
- QLIST_HEAD(, Coroutine) pool;
- unsigned int pool_size;
-
/** The default coroutine */
CoroutineUContext leader;
} CoroutineThreadState;
@@ -75,7 +75,6 @@ enum {
if (!s) {
s = g_malloc0(sizeof(*s));
s->current = &s->leader.base;
- QLIST_INIT(&s->pool);
pthread_setspecific(thread_state_key, s);
}
return s;
@@ -84,14 +83,19 @@ enum {
static void qemu_coroutine_thread_cleanup(void *opaque)
{
CoroutineThreadState *s = opaque;
+
+ g_free(s);
+}
+
+static void __attribute__((destructor)) coroutine_cleanup(void)
+{
Coroutine *co;
Coroutine *tmp;
- QLIST_FOREACH_SAFE(co, &s->pool, pool_next, tmp) {
+ QLIST_FOREACH_SAFE(co, &pool, pool_next, tmp) {
g_free(DO_UPCAST(CoroutineUContext, base, co)->stack);
g_free(co);
}
- g_free(s);
}
static void __attribute__((constructor)) coroutine_init(void)
@@ -169,13 +173,12 @@ static void coroutine_trampoline(int i0, int i1)
Coroutine *qemu_coroutine_new(void)
{
- CoroutineThreadState *s = coroutine_get_thread_state();
Coroutine *co;
- co = QLIST_FIRST(&s->pool);
+ co = QLIST_FIRST(&pool);
if (co) {
QLIST_REMOVE(co, pool_next);
- s->pool_size--;
+ pool_size--;
} else {
co = coroutine_new();
}
@@ -184,13 +187,12 @@ static void coroutine_trampoline(int i0, int i1)
void qemu_coroutine_delete(Coroutine *co_)
{
- CoroutineThreadState *s = coroutine_get_thread_state();
CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
- if (s->pool_size < POOL_MAX_SIZE) {
- QLIST_INSERT_HEAD(&s->pool, &co->base, pool_next);
+ if (pool_size < POOL_MAX_SIZE) {
+ QLIST_INSERT_HEAD(&pool, &co->base, pool_next);
co->base.caller = NULL;
- s->pool_size++;
+ pool_size++;
return;
}
ucontext-based coroutines use a free pool to reduce allocations and deallocations of coroutine objects. The pool is per-thread, presumably to improve locality. However, as coroutines are usually allocated in a vcpu thread and freed in the I/O thread, the pool accounting gets screwed up and we end allocating and freeing a coroutine for every I/O request. This is expensive since large objects are allocated via the kernel, and are not cached by the C runtime. Fix by switching to a global pool. This is safe since we're protected by the global mutex. Signed-off-by: Avi Kivity <avi@redhat.com> --- coroutine-ucontext.c | 30 ++++++++++++++++-------------- 1 files changed, 16 insertions(+), 14 deletions(-)