diff mbox series

Coroutine-aware monitor_cur() with coroutine-specific data

Message ID 87mu36zis7.fsf_-_@dusky.pond.sub.org
State New
Headers show
Series Coroutine-aware monitor_cur() with coroutine-specific data | expand

Commit Message

Markus Armbruster Aug. 7, 2020, 1:29 p.m. UTC
This is just a sketch.  It needs comments and a real commit message.

As is, it goes on top of Kevin's series.  It is meant to be squashed
into PATCH 06.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
---
 include/qemu/coroutine.h     |  4 ++++
 include/qemu/coroutine_int.h |  2 ++
 monitor/monitor.c            | 36 +++++++++++++++---------------------
 util/qemu-coroutine.c        | 20 ++++++++++++++++++++
 4 files changed, 41 insertions(+), 21 deletions(-)

Comments

Kevin Wolf Aug. 10, 2020, 12:58 p.m. UTC | #1
Am 07.08.2020 um 15:29 hat Markus Armbruster geschrieben:
> This is just a sketch.  It needs comments and a real commit message.
> 
> As is, it goes on top of Kevin's series.  It is meant to be squashed
> into PATCH 06.
> 
> Signed-off-by: Markus Armbruster <armbru@redhat.com>
> ---
>  include/qemu/coroutine.h     |  4 ++++
>  include/qemu/coroutine_int.h |  2 ++
>  monitor/monitor.c            | 36 +++++++++++++++---------------------
>  util/qemu-coroutine.c        | 20 ++++++++++++++++++++
>  4 files changed, 41 insertions(+), 21 deletions(-)
> 
> diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
> index dfd261c5b1..11da47092c 100644
> --- a/include/qemu/coroutine.h
> +++ b/include/qemu/coroutine.h
> @@ -65,6 +65,10 @@ typedef void coroutine_fn CoroutineEntry(void *opaque);
>   */
>  Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque);
>  
> +Coroutine *qemu_coroutine_create_with_storage(CoroutineEntry *entry,
> +                                              void *opaque, size_t storage);
> +void *qemu_coroutine_local_storage(Coroutine *co);
> +
>  /**
>   * Transfer control to a coroutine
>   */
> diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h
> index bd6b0468e1..7d7865a02f 100644
> --- a/include/qemu/coroutine_int.h
> +++ b/include/qemu/coroutine_int.h
> @@ -41,6 +41,8 @@ struct Coroutine {
>      void *entry_arg;
>      Coroutine *caller;
>  
> +    void *coroutine_local_storage;
> +
>      /* Only used when the coroutine has terminated.  */
>      QSLIST_ENTRY(Coroutine) pool_next;

This increases the size of Coroutine objects typically by 8 bytes and
shifts the following fields by the same amount. On my x86_64 build, we
have exactly those 8 bytes left in CoroutineUContext until a new
cacheline would start. With different CONFIG_* settings, it could be the
change that increases the size to a new cacheline. No idea what this
looks like on other architectures.

Does this or the shifting of fields matter for performance? I don't
know. It might even be unlikely. But cache effects are hard to predict
and not wanting to do the work of proving that it's indeed harmless is
one of the reasons why for the slow paths in question I preferred a
solution that doesn't touch the coroutine core at all.

> diff --git a/monitor/monitor.c b/monitor/monitor.c
> index 50fb5b20d3..047a8fb380 100644
> --- a/monitor/monitor.c
> +++ b/monitor/monitor.c
> @@ -82,38 +82,32 @@ bool qmp_dispatcher_co_shutdown;
>   */
>  bool qmp_dispatcher_co_busy;
>  
> -/*
> - * Protects mon_list, monitor_qapi_event_state, coroutine_mon,
> - * monitor_destroyed.
> - */
> +/* Protects mon_list, monitor_qapi_event_state, monitor_destroyed. */
>  QemuMutex monitor_lock;
>  static GHashTable *monitor_qapi_event_state;
> -static GHashTable *coroutine_mon; /* Maps Coroutine* to Monitor* */
>  
>  MonitorList mon_list;
>  int mon_refcount;
>  static bool monitor_destroyed;
>  
> +static Monitor **monitor_curp(Coroutine *co)
> +{
> +    static __thread Monitor *global_cur_mon;
> +
> +    if (co == qmp_dispatcher_co) {
> +        return qemu_coroutine_local_storage(co);
> +    }
> +    return &global_cur_mon;
> +}

Like the other patch, this needs to be extended for HMP. global_cur_mon
is never meant to be set.

The solution fails as soon as we have more than a single monitor
coroutine running at the same time because it relies on
qmp_dispatcher_co. In this respect, it makes the same assumptions as the
simple hack.

Only knowing that qmp_dispatcher_co is always created with storage
containing a Monitor** makes this safe.

>  Monitor *monitor_cur(void)
>  {
> -    Monitor *mon;
> -
> -    qemu_mutex_lock(&monitor_lock);
> -    mon = g_hash_table_lookup(coroutine_mon, qemu_coroutine_self());
> -    qemu_mutex_unlock(&monitor_lock);
> -
> -    return mon;
> +    return *monitor_curp(qemu_coroutine_self());
>  }
>  
>  void monitor_set_cur(Coroutine *co, Monitor *mon)
>  {
> -    qemu_mutex_lock(&monitor_lock);
> -    if (mon) {
> -        g_hash_table_replace(coroutine_mon, co, mon);
> -    } else {
> -        g_hash_table_remove(coroutine_mon, co);
> -    }
> -    qemu_mutex_unlock(&monitor_lock);
> +    *monitor_curp(co) = mon;
>  }
>  
>  /**
> @@ -666,14 +660,14 @@ void monitor_init_globals_core(void)
>  {
>      monitor_qapi_event_init();
>      qemu_mutex_init(&monitor_lock);
> -    coroutine_mon = g_hash_table_new(NULL, NULL);
>  
>      /*
>       * The dispatcher BH must run in the main loop thread, since we
>       * have commands assuming that context.  It would be nice to get
>       * rid of those assumptions.
>       */
> -    qmp_dispatcher_co = qemu_coroutine_create(monitor_qmp_dispatcher_co, NULL);
> +    qmp_dispatcher_co = qemu_coroutine_create_with_storage(
> +        monitor_qmp_dispatcher_co, NULL, sizeof(Monitor **));
>      atomic_mb_set(&qmp_dispatcher_co_busy, true);
>      aio_co_schedule(iohandler_get_aio_context(), qmp_dispatcher_co);
>  }
> diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
> index c3caa6c770..87bf7f0fc0 100644
> --- a/util/qemu-coroutine.c
> +++ b/util/qemu-coroutine.c
> @@ -81,8 +81,28 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
>      return co;
>  }
>  
> +Coroutine *qemu_coroutine_create_with_storage(CoroutineEntry *entry,
> +                                              void *opaque, size_t storage)
> +{
> +    Coroutine *co = qemu_coroutine_create(entry, opaque);
> +
> +    if (!co) {
> +        return NULL;
> +    }
> +
> +    co->coroutine_local_storage = g_malloc0(storage);
> +    return co;
> +}

As the code above shows, this interface is only useful if you can
identify the coroutine. It cannot be used in code that didn't create the
current coroutine because then it can't know whether or not the
coroutine has coroutine local storage, and if it has, what its structure
is.

For a supposedly generic solution, I think this is a bit weak.
Effectively, this might be a one-off solution in disguise because
it's a big restriction on the possible use cases.

> +void *qemu_coroutine_local_storage(Coroutine *co)
> +{
> +    return co->coroutine_local_storage;
> +}
> +
>  static void coroutine_delete(Coroutine *co)
>  {
> +    g_free(co->coroutine_local_storage);
> +    co->coroutine_local_storage = NULL;
>      co->caller = NULL;
>  
>      if (CONFIG_COROUTINE_POOL) {

Your list of pros/cons didn't mention coroutine creation/deletion as a
hot path at all (which it is, we have one coroutine per request).

You leave qemu_coroutine_create() untouched (except indirectly by a
larger g_malloc0() in the non-pooled case, which is negligible) and I
assume that g_free(NULL) is cheap, so at least this is probably as good
as it gets for something integrated in the coroutine core. Maybe an
explicit if (co->coroutine_local_storage) would improve it slightly.

Kevin
Markus Armbruster Aug. 26, 2020, 12:40 p.m. UTC | #2
Kevin Wolf <kwolf@redhat.com> writes:

> Am 07.08.2020 um 15:29 hat Markus Armbruster geschrieben:
>> This is just a sketch.  It needs comments and a real commit message.
>> 
>> As is, it goes on top of Kevin's series.  It is meant to be squashed
>> into PATCH 06.
>> 
>> Signed-off-by: Markus Armbruster <armbru@redhat.com>
>> ---
>>  include/qemu/coroutine.h     |  4 ++++
>>  include/qemu/coroutine_int.h |  2 ++
>>  monitor/monitor.c            | 36 +++++++++++++++---------------------
>>  util/qemu-coroutine.c        | 20 ++++++++++++++++++++
>>  4 files changed, 41 insertions(+), 21 deletions(-)
>> 
>> diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
>> index dfd261c5b1..11da47092c 100644
>> --- a/include/qemu/coroutine.h
>> +++ b/include/qemu/coroutine.h
>> @@ -65,6 +65,10 @@ typedef void coroutine_fn CoroutineEntry(void *opaque);
>>   */
>>  Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque);
>>  
>> +Coroutine *qemu_coroutine_create_with_storage(CoroutineEntry *entry,
>> +                                              void *opaque, size_t storage);
>> +void *qemu_coroutine_local_storage(Coroutine *co);
>> +
>>  /**
>>   * Transfer control to a coroutine
>>   */
>> diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h
>> index bd6b0468e1..7d7865a02f 100644
>> --- a/include/qemu/coroutine_int.h
>> +++ b/include/qemu/coroutine_int.h
>> @@ -41,6 +41,8 @@ struct Coroutine {
>>      void *entry_arg;
>>      Coroutine *caller;
>>  
>> +    void *coroutine_local_storage;
>> +
>>      /* Only used when the coroutine has terminated.  */
>>      QSLIST_ENTRY(Coroutine) pool_next;
>
> This increases the size of Coroutine objects typically by 8 bytes and
> shifts the following fields by the same amount. On my x86_64 build, we
> have exactly those 8 bytes left in CoroutineUContext until a new
> cacheline would start. With different CONFIG_* settings, it could be the
> change that increases the size to a new cacheline. No idea what this
> looks like on other architectures.
>
> Does this or the shifting of fields matter for performance? I don't
> know. It might even be unlikely. But cache effects are hard to predict
> and not wanting to do the work of proving that it's indeed harmless is
> one of the reasons why for the slow paths in question I preferred a
> solution that doesn't touch the coroutine core at all.

Point taken.

Possible mitigation: add at the end rather than in the middle.

>> diff --git a/monitor/monitor.c b/monitor/monitor.c
>> index 50fb5b20d3..047a8fb380 100644
>> --- a/monitor/monitor.c
>> +++ b/monitor/monitor.c
>> @@ -82,38 +82,32 @@ bool qmp_dispatcher_co_shutdown;
>>   */
>>  bool qmp_dispatcher_co_busy;
>>  
>> -/*
>> - * Protects mon_list, monitor_qapi_event_state, coroutine_mon,
>> - * monitor_destroyed.
>> - */
>> +/* Protects mon_list, monitor_qapi_event_state, monitor_destroyed. */
>>  QemuMutex monitor_lock;
>>  static GHashTable *monitor_qapi_event_state;
>> -static GHashTable *coroutine_mon; /* Maps Coroutine* to Monitor* */
>>  
>>  MonitorList mon_list;
>>  int mon_refcount;
>>  static bool monitor_destroyed;
>>  
>> +static Monitor **monitor_curp(Coroutine *co)
>> +{
>> +    static __thread Monitor *global_cur_mon;
>> +
>> +    if (co == qmp_dispatcher_co) {
>> +        return qemu_coroutine_local_storage(co);
>> +    }
>> +    return &global_cur_mon;
>> +}
>
> Like the other patch, this needs to be extended for HMP. global_cur_mon
> is never meant to be set.

It is, for OOB commands.

> The solution fails as soon as we have more than a single monitor
> coroutine running at the same time because it relies on
> qmp_dispatcher_co.

Yes, but pretty much everything below handle_qmp_command() falls apart
then.  Remembering to update monitor_curp() would be the least of my
worries :)

>                    In this respect, it makes the same assumptions as the
> simple hack.
>
> Only knowing that qmp_dispatcher_co is always created with storage
> containing a Monitor** makes this safe.

Correct.

>>  Monitor *monitor_cur(void)
>>  {
>> -    Monitor *mon;
>> -
>> -    qemu_mutex_lock(&monitor_lock);
>> -    mon = g_hash_table_lookup(coroutine_mon, qemu_coroutine_self());
>> -    qemu_mutex_unlock(&monitor_lock);
>> -
>> -    return mon;
>> +    return *monitor_curp(qemu_coroutine_self());
>>  }
>>  
>>  void monitor_set_cur(Coroutine *co, Monitor *mon)
>>  {
>> -    qemu_mutex_lock(&monitor_lock);
>> -    if (mon) {
>> -        g_hash_table_replace(coroutine_mon, co, mon);
>> -    } else {
>> -        g_hash_table_remove(coroutine_mon, co);
>> -    }
>> -    qemu_mutex_unlock(&monitor_lock);
>> +    *monitor_curp(co) = mon;
>>  }
>>  
>>  /**
>> @@ -666,14 +660,14 @@ void monitor_init_globals_core(void)
>>  {
>>      monitor_qapi_event_init();
>>      qemu_mutex_init(&monitor_lock);
>> -    coroutine_mon = g_hash_table_new(NULL, NULL);
>>  
>>      /*
>>       * The dispatcher BH must run in the main loop thread, since we
>>       * have commands assuming that context.  It would be nice to get
>>       * rid of those assumptions.
>>       */
>> -    qmp_dispatcher_co = qemu_coroutine_create(monitor_qmp_dispatcher_co, NULL);
>> +    qmp_dispatcher_co = qemu_coroutine_create_with_storage(
>> +        monitor_qmp_dispatcher_co, NULL, sizeof(Monitor **));
>>      atomic_mb_set(&qmp_dispatcher_co_busy, true);
>>      aio_co_schedule(iohandler_get_aio_context(), qmp_dispatcher_co);
>>  }
>> diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
>> index c3caa6c770..87bf7f0fc0 100644
>> --- a/util/qemu-coroutine.c
>> +++ b/util/qemu-coroutine.c
>> @@ -81,8 +81,28 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
>>      return co;
>>  }
>>  
>> +Coroutine *qemu_coroutine_create_with_storage(CoroutineEntry *entry,
>> +                                              void *opaque, size_t storage)
>> +{
>> +    Coroutine *co = qemu_coroutine_create(entry, opaque);
>> +
>> +    if (!co) {
>> +        return NULL;
>> +    }
>> +
>> +    co->coroutine_local_storage = g_malloc0(storage);
>> +    return co;
>> +}
>
> As the code above shows, this interface is only useful if you can
> identify the coroutine. It cannot be used in code that didn't create the
> current coroutine because then it can't know whether or not the
> coroutine has coroutine local storage, and if it has, what its structure
> is.
>
> For a supposedly generic solution, I think this is a bit weak.

Yes, that's fair.

The solution Daniel proposed is makes the weakness more explicit:
instead of relying on "coroutine was created with this coroutine-local
storage", we'd rely on "coroutine_getspecific(key) does not fail".  It
can fail only if coroutine_setspecific(key, ...) was not called.  Not
much better in practice.

> Effectively, this might be a one-off solution in disguise because
> it's a big restriction on the possible use cases.

Daniel's solution is basically pthread_getspecific() for coroutines,
with the keys dumbed down.

If pthread_getspecific() was good enough for pthreads...

Well, it wasn't, or rather it was only because something better could
not be had with just a library, without toolchain support.  And that's
where we are with coroutines.

>> +void *qemu_coroutine_local_storage(Coroutine *co)
>> +{
>> +    return co->coroutine_local_storage;
>> +}
>> +
>>  static void coroutine_delete(Coroutine *co)
>>  {
>> +    g_free(co->coroutine_local_storage);
>> +    co->coroutine_local_storage = NULL;
>>      co->caller = NULL;
>>  
>>      if (CONFIG_COROUTINE_POOL) {
>
> Your list of pros/cons didn't mention coroutine creation/deletion as a
> hot path at all (which it is, we have one coroutine per request).

I did not expect coroutine creation / deletion to be a hot path.

It is not a hot path for QMP, because QMP is not a hot path.

I'm ready to accept the proposition that it's a hot path elsewhere.

> You leave qemu_coroutine_create() untouched (except indirectly by a
> larger g_malloc0() in the non-pooled case, which is negligible) and I
> assume that g_free(NULL) is cheap, so at least this is probably as good
> as it gets for something integrated in the coroutine core. Maybe an
> explicit if (co->coroutine_local_storage) would improve it slightly.
>
> Kevin
Kevin Wolf Aug. 26, 2020, 1:49 p.m. UTC | #3
Am 26.08.2020 um 14:40 hat Markus Armbruster geschrieben:
> Kevin Wolf <kwolf@redhat.com> writes:
> 
> > Am 07.08.2020 um 15:29 hat Markus Armbruster geschrieben:
> >> This is just a sketch.  It needs comments and a real commit message.
> >> 
> >> As is, it goes on top of Kevin's series.  It is meant to be squashed
> >> into PATCH 06.
> >> 
> >> Signed-off-by: Markus Armbruster <armbru@redhat.com>
> >> ---
> >>  include/qemu/coroutine.h     |  4 ++++
> >>  include/qemu/coroutine_int.h |  2 ++
> >>  monitor/monitor.c            | 36 +++++++++++++++---------------------
> >>  util/qemu-coroutine.c        | 20 ++++++++++++++++++++
> >>  4 files changed, 41 insertions(+), 21 deletions(-)
> >> 
> >> diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
> >> index dfd261c5b1..11da47092c 100644
> >> --- a/include/qemu/coroutine.h
> >> +++ b/include/qemu/coroutine.h
> >> @@ -65,6 +65,10 @@ typedef void coroutine_fn CoroutineEntry(void *opaque);
> >>   */
> >>  Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque);
> >>  
> >> +Coroutine *qemu_coroutine_create_with_storage(CoroutineEntry *entry,
> >> +                                              void *opaque, size_t storage);
> >> +void *qemu_coroutine_local_storage(Coroutine *co);
> >> +
> >>  /**
> >>   * Transfer control to a coroutine
> >>   */
> >> diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h
> >> index bd6b0468e1..7d7865a02f 100644
> >> --- a/include/qemu/coroutine_int.h
> >> +++ b/include/qemu/coroutine_int.h
> >> @@ -41,6 +41,8 @@ struct Coroutine {
> >>      void *entry_arg;
> >>      Coroutine *caller;
> >>  
> >> +    void *coroutine_local_storage;
> >> +
> >>      /* Only used when the coroutine has terminated.  */
> >>      QSLIST_ENTRY(Coroutine) pool_next;
> >
> > This increases the size of Coroutine objects typically by 8 bytes and
> > shifts the following fields by the same amount. On my x86_64 build, we
> > have exactly those 8 bytes left in CoroutineUContext until a new
> > cacheline would start. With different CONFIG_* settings, it could be the
> > change that increases the size to a new cacheline. No idea what this
> > looks like on other architectures.
> >
> > Does this or the shifting of fields matter for performance? I don't
> > know. It might even be unlikely. But cache effects are hard to predict
> > and not wanting to do the work of proving that it's indeed harmless is
> > one of the reasons why for the slow paths in question I preferred a
> > solution that doesn't touch the coroutine core at all.
> 
> Point taken.
> 
> Possible mitigation: add at the end rather than in the middle.

Doesn't work: This is a struct that is embedded at the start of
CoroutineUContext, so while you can move it down a bit, you'll never get
to the end of the actual struct used at runtime.

> >> diff --git a/monitor/monitor.c b/monitor/monitor.c
> >> index 50fb5b20d3..047a8fb380 100644
> >> --- a/monitor/monitor.c
> >> +++ b/monitor/monitor.c
> >> @@ -82,38 +82,32 @@ bool qmp_dispatcher_co_shutdown;
> >>   */
> >>  bool qmp_dispatcher_co_busy;
> >>  
> >> -/*
> >> - * Protects mon_list, monitor_qapi_event_state, coroutine_mon,
> >> - * monitor_destroyed.
> >> - */
> >> +/* Protects mon_list, monitor_qapi_event_state, monitor_destroyed. */
> >>  QemuMutex monitor_lock;
> >>  static GHashTable *monitor_qapi_event_state;
> >> -static GHashTable *coroutine_mon; /* Maps Coroutine* to Monitor* */
> >>  
> >>  MonitorList mon_list;
> >>  int mon_refcount;
> >>  static bool monitor_destroyed;
> >>  
> >> +static Monitor **monitor_curp(Coroutine *co)
> >> +{
> >> +    static __thread Monitor *global_cur_mon;
> >> +
> >> +    if (co == qmp_dispatcher_co) {
> >> +        return qemu_coroutine_local_storage(co);
> >> +    }
> >> +    return &global_cur_mon;
> >> +}
> >
> > Like the other patch, this needs to be extended for HMP. global_cur_mon
> > is never meant to be set.
> 
> It is, for OOB commands.

Right, I missed this.

> > The solution fails as soon as we have more than a single monitor
> > coroutine running at the same time because it relies on
> > qmp_dispatcher_co.
> 
> Yes, but pretty much everything below handle_qmp_command() falls apart
> then.  Remembering to update monitor_curp() would be the least of my
> worries :)

Fair enough.

> >                    In this respect, it makes the same assumptions as the
> > simple hack.
> >
> > Only knowing that qmp_dispatcher_co is always created with storage
> > containing a Monitor** makes this safe.
> 
> Correct.
> 
> >>  Monitor *monitor_cur(void)
> >>  {
> >> -    Monitor *mon;
> >> -
> >> -    qemu_mutex_lock(&monitor_lock);
> >> -    mon = g_hash_table_lookup(coroutine_mon, qemu_coroutine_self());
> >> -    qemu_mutex_unlock(&monitor_lock);
> >> -
> >> -    return mon;
> >> +    return *monitor_curp(qemu_coroutine_self());
> >>  }
> >>  
> >>  void monitor_set_cur(Coroutine *co, Monitor *mon)
> >>  {
> >> -    qemu_mutex_lock(&monitor_lock);
> >> -    if (mon) {
> >> -        g_hash_table_replace(coroutine_mon, co, mon);
> >> -    } else {
> >> -        g_hash_table_remove(coroutine_mon, co);
> >> -    }
> >> -    qemu_mutex_unlock(&monitor_lock);
> >> +    *monitor_curp(co) = mon;
> >>  }
> >>  
> >>  /**
> >> @@ -666,14 +660,14 @@ void monitor_init_globals_core(void)
> >>  {
> >>      monitor_qapi_event_init();
> >>      qemu_mutex_init(&monitor_lock);
> >> -    coroutine_mon = g_hash_table_new(NULL, NULL);
> >>  
> >>      /*
> >>       * The dispatcher BH must run in the main loop thread, since we
> >>       * have commands assuming that context.  It would be nice to get
> >>       * rid of those assumptions.
> >>       */
> >> -    qmp_dispatcher_co = qemu_coroutine_create(monitor_qmp_dispatcher_co, NULL);
> >> +    qmp_dispatcher_co = qemu_coroutine_create_with_storage(
> >> +        monitor_qmp_dispatcher_co, NULL, sizeof(Monitor **));
> >>      atomic_mb_set(&qmp_dispatcher_co_busy, true);
> >>      aio_co_schedule(iohandler_get_aio_context(), qmp_dispatcher_co);
> >>  }
> >> diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
> >> index c3caa6c770..87bf7f0fc0 100644
> >> --- a/util/qemu-coroutine.c
> >> +++ b/util/qemu-coroutine.c
> >> @@ -81,8 +81,28 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
> >>      return co;
> >>  }
> >>  
> >> +Coroutine *qemu_coroutine_create_with_storage(CoroutineEntry *entry,
> >> +                                              void *opaque, size_t storage)
> >> +{
> >> +    Coroutine *co = qemu_coroutine_create(entry, opaque);
> >> +
> >> +    if (!co) {
> >> +        return NULL;
> >> +    }
> >> +
> >> +    co->coroutine_local_storage = g_malloc0(storage);
> >> +    return co;
> >> +}
> >
> > As the code above shows, this interface is only useful if you can
> > identify the coroutine. It cannot be used in code that didn't create the
> > current coroutine because then it can't know whether or not the
> > coroutine has coroutine local storage, and if it has, what its structure
> > is.
> >
> > For a supposedly generic solution, I think this is a bit weak.
> 
> Yes, that's fair.
> 
> The solution Daniel proposed is makes the weakness more explicit:
> instead of relying on "coroutine was created with this coroutine-local
> storage", we'd rely on "coroutine_getspecific(key) does not fail".  It
> can fail only if coroutine_setspecific(key, ...) was not called.  Not
> much better in practice.

It would be a little more generic, I guess. So for a solution that wants
to look generic, it might be better.

But as long as we don't have a second user (not even in our
imagination), I'm not sure how important it is to have something that
looks generic. With the simple and stupid patch, it would be more
obvious that it doesn't hurt cases that are unrelated to the monitor.

> > Effectively, this might be a one-off solution in disguise because
> > it's a big restriction on the possible use cases.
> 
> Daniel's solution is basically pthread_getspecific() for coroutines,
> with the keys dumbed down.
> 
> If pthread_getspecific() was good enough for pthreads...
> 
> Well, it wasn't, or rather it was only because something better could
> not be had with just a library, without toolchain support.  And that's
> where we are with coroutines.
> 
> >> +void *qemu_coroutine_local_storage(Coroutine *co)
> >> +{
> >> +    return co->coroutine_local_storage;
> >> +}
> >> +
> >>  static void coroutine_delete(Coroutine *co)
> >>  {
> >> +    g_free(co->coroutine_local_storage);
> >> +    co->coroutine_local_storage = NULL;
> >>      co->caller = NULL;
> >>  
> >>      if (CONFIG_COROUTINE_POOL) {
> >
> > Your list of pros/cons didn't mention coroutine creation/deletion as a
> > hot path at all (which it is, we have one coroutine per request).
> 
> I did not expect coroutine creation / deletion to be a hot path.
> 
> It is not a hot path for QMP, because QMP is not a hot path.
> 
> I'm ready to accept the proposition that it's a hot path elsewhere.

It is a hot path for block device requests. To be more specific, the
part that is executed when taking a coroutine from the pool or putting
it back to the pool is. The code to create a coroutine from scratch or
to actually free it may be less relevant.

Kevin

> > You leave qemu_coroutine_create() untouched (except indirectly by a
> > larger g_malloc0() in the non-pooled case, which is negligible) and I
> > assume that g_free(NULL) is cheap, so at least this is probably as good
> > as it gets for something integrated in the coroutine core. Maybe an
> > explicit if (co->coroutine_local_storage) would improve it slightly.
> >
> > Kevin
diff mbox series

Patch

diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
index dfd261c5b1..11da47092c 100644
--- a/include/qemu/coroutine.h
+++ b/include/qemu/coroutine.h
@@ -65,6 +65,10 @@  typedef void coroutine_fn CoroutineEntry(void *opaque);
  */
 Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque);
 
+Coroutine *qemu_coroutine_create_with_storage(CoroutineEntry *entry,
+                                              void *opaque, size_t storage);
+void *qemu_coroutine_local_storage(Coroutine *co);
+
 /**
  * Transfer control to a coroutine
  */
diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h
index bd6b0468e1..7d7865a02f 100644
--- a/include/qemu/coroutine_int.h
+++ b/include/qemu/coroutine_int.h
@@ -41,6 +41,8 @@  struct Coroutine {
     void *entry_arg;
     Coroutine *caller;
 
+    void *coroutine_local_storage;
+
     /* Only used when the coroutine has terminated.  */
     QSLIST_ENTRY(Coroutine) pool_next;
 
diff --git a/monitor/monitor.c b/monitor/monitor.c
index 50fb5b20d3..047a8fb380 100644
--- a/monitor/monitor.c
+++ b/monitor/monitor.c
@@ -82,38 +82,32 @@  bool qmp_dispatcher_co_shutdown;
  */
 bool qmp_dispatcher_co_busy;
 
-/*
- * Protects mon_list, monitor_qapi_event_state, coroutine_mon,
- * monitor_destroyed.
- */
+/* Protects mon_list, monitor_qapi_event_state, monitor_destroyed. */
 QemuMutex monitor_lock;
 static GHashTable *monitor_qapi_event_state;
-static GHashTable *coroutine_mon; /* Maps Coroutine* to Monitor* */
 
 MonitorList mon_list;
 int mon_refcount;
 static bool monitor_destroyed;
 
+static Monitor **monitor_curp(Coroutine *co)
+{
+    static __thread Monitor *global_cur_mon;
+
+    if (co == qmp_dispatcher_co) {
+        return qemu_coroutine_local_storage(co);
+    }
+    return &global_cur_mon;
+}
+
 Monitor *monitor_cur(void)
 {
-    Monitor *mon;
-
-    qemu_mutex_lock(&monitor_lock);
-    mon = g_hash_table_lookup(coroutine_mon, qemu_coroutine_self());
-    qemu_mutex_unlock(&monitor_lock);
-
-    return mon;
+    return *monitor_curp(qemu_coroutine_self());
 }
 
 void monitor_set_cur(Coroutine *co, Monitor *mon)
 {
-    qemu_mutex_lock(&monitor_lock);
-    if (mon) {
-        g_hash_table_replace(coroutine_mon, co, mon);
-    } else {
-        g_hash_table_remove(coroutine_mon, co);
-    }
-    qemu_mutex_unlock(&monitor_lock);
+    *monitor_curp(co) = mon;
 }
 
 /**
@@ -666,14 +660,14 @@  void monitor_init_globals_core(void)
 {
     monitor_qapi_event_init();
     qemu_mutex_init(&monitor_lock);
-    coroutine_mon = g_hash_table_new(NULL, NULL);
 
     /*
      * The dispatcher BH must run in the main loop thread, since we
      * have commands assuming that context.  It would be nice to get
      * rid of those assumptions.
      */
-    qmp_dispatcher_co = qemu_coroutine_create(monitor_qmp_dispatcher_co, NULL);
+    qmp_dispatcher_co = qemu_coroutine_create_with_storage(
+        monitor_qmp_dispatcher_co, NULL, sizeof(Monitor **));
     atomic_mb_set(&qmp_dispatcher_co_busy, true);
     aio_co_schedule(iohandler_get_aio_context(), qmp_dispatcher_co);
 }
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index c3caa6c770..87bf7f0fc0 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -81,8 +81,28 @@  Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
     return co;
 }
 
+Coroutine *qemu_coroutine_create_with_storage(CoroutineEntry *entry,
+                                              void *opaque, size_t storage)
+{
+    Coroutine *co = qemu_coroutine_create(entry, opaque);
+
+    if (!co) {
+        return NULL;
+    }
+
+    co->coroutine_local_storage = g_malloc0(storage);
+    return co;
+}
+
+void *qemu_coroutine_local_storage(Coroutine *co)
+{
+    return co->coroutine_local_storage;
+}
+
 static void coroutine_delete(Coroutine *co)
 {
+    g_free(co->coroutine_local_storage);
+    co->coroutine_local_storage = NULL;
     co->caller = NULL;
 
     if (CONFIG_COROUTINE_POOL) {