diff mbox series

[3/6] plugins/cache: Fixed a use-after-free bug with multithreaded usermode

Message ID 20210714172151.8494-4-ma.mandourr@gmail.com
State New
Headers show
Series plugins/cache: multicore cache emulation and minor | expand

Commit Message

Mahmoud Abumandour July 14, 2021, 5:21 p.m. UTC
Since callbacks may be interleaved because of multithreaded execution,
we should not make assumptions about `plugin_exit` either. The problem
with `plugin_exit` is that it frees shared data structures (caches and
`miss_ht` hash table). It should not be assumed that callbacks will not
be called after it and hence use already-freed data structures.

This is mitigated in this commit by synchronizing the call to
`plugin_exit` through locking to ensure execlusive access to data
structures, and NULL-ifying those data structures so that subsequent
callbacks can check whether the data strucutres are freed, and if so,
immediately exit.

It's okay to immediately exit and don't account for those callbacks
since they won't be accounted for anyway since `plugin_exit` is already
called once and reported the statistics.

Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
---
 contrib/plugins/cache.c | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

Comments

Alex Bennée July 19, 2021, 9:45 a.m. UTC | #1
Mahmoud Mandour <ma.mandourr@gmail.com> writes:

> Since callbacks may be interleaved because of multithreaded execution,
> we should not make assumptions about `plugin_exit` either. The problem
> with `plugin_exit` is that it frees shared data structures (caches and
> `miss_ht` hash table). It should not be assumed that callbacks will not
> be called after it and hence use already-freed data structures.

What was your test case for this because I wonder if it would be worth
coming up with one for check-tcg? From what I remember the race is
in between preexit_cleanup and the eventual _exit/exit_group which nixes
all other child threads. Maybe we should be triggering a more graceful
unload here?

> This is mitigated in this commit by synchronizing the call to
> `plugin_exit` through locking to ensure execlusive access to data
> structures, and NULL-ifying those data structures so that subsequent
> callbacks can check whether the data strucutres are freed, and if so,
> immediately exit.
>
> It's okay to immediately exit and don't account for those callbacks
> since they won't be accounted for anyway since `plugin_exit` is already
> called once and reported the statistics.
>
> Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
> ---
>  contrib/plugins/cache.c | 31 ++++++++++++++++++++++++++++++-
>  1 file changed, 30 insertions(+), 1 deletion(-)
>
> diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
> index 695fb969dc..a452aba01c 100644
> --- a/contrib/plugins/cache.c
> +++ b/contrib/plugins/cache.c
> @@ -363,6 +363,11 @@ static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info,
>      effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr;
>  
>      g_mutex_lock(&mtx);
> +    if (dcache == NULL) {
> +        g_mutex_unlock(&mtx);
> +        return;
> +    }
> +
>      if (!access_cache(dcache, effective_addr)) {
>          insn = (InsnData *) userdata;
>          insn->dmisses++;
> @@ -380,6 +385,11 @@ static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
>      g_mutex_lock(&mtx);
>      insn_addr = ((InsnData *) userdata)->addr;
>  
> +    if (icache == NULL) {
> +        g_mutex_unlock(&mtx);
> +        return;
> +    }
> +
>      if (!access_cache(icache, insn_addr)) {
>          insn = (InsnData *) userdata;
>          insn->imisses++;
> @@ -406,12 +416,24 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
>              effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn);
>          }
>  
> +        g_mutex_lock(&mtx);
> +
> +        /*
> +         * is the plugin_exit callback called? If so, any further callback
> +         * registration is useless as it won't get accounted for after calling
> +         * plugin_exit once already, and also will use miss_ht after it's freed
> +         */
> +        if (miss_ht == NULL) {
> +            g_mutex_unlock(&mtx);
> +            return;
> +        }
> +
>          /*
>           * Instructions might get translated multiple times, we do not create
>           * new entries for those instructions. Instead, we fetch the same
>           * entry from the hash table and register it for the callback again.
>           */
> -        g_mutex_lock(&mtx);
> +
>          data = g_hash_table_lookup(miss_ht, GUINT_TO_POINTER(effective_addr));
>          if (data == NULL) {
>              data = g_new0(InsnData, 1);
> @@ -527,13 +549,20 @@ static void log_top_insns()
>  
>  static void plugin_exit(qemu_plugin_id_t id, void *p)
>  {
> +    g_mutex_lock(&mtx);
>      log_stats();
>      log_top_insns();
>  
>      cache_free(dcache);
> +    dcache = NULL;
> +
>      cache_free(icache);
> +    icache = NULL;
>  
>      g_hash_table_destroy(miss_ht);
> +    miss_ht = NULL;
> +
> +    g_mutex_unlock(&mtx);
>  }
>  
>  static void policy_init()
Mahmoud Abumandour July 19, 2021, 10:46 a.m. UTC | #2
On Mon, Jul 19, 2021 at 11:48 AM Alex Bennée <alex.bennee@linaro.org> wrote:

>
> Mahmoud Mandour <ma.mandourr@gmail.com> writes:
>
> > Since callbacks may be interleaved because of multithreaded execution,
> > we should not make assumptions about `plugin_exit` either. The problem
> > with `plugin_exit` is that it frees shared data structures (caches and
> > `miss_ht` hash table). It should not be assumed that callbacks will not
> > be called after it and hence use already-freed data structures.
>
> What was your test case for this because I wonder if it would be worth
> coming up with one for check-tcg?


I think just any ad-hoc multithreaded execution will evoke the race pretty
much
consistently.


> From what I remember the race is
> in between preexit_cleanup and the eventual _exit/exit_group which nixes
> all other child threads. Maybe we should be triggering a more graceful
> unload here?
>

I think so. This remedies the bug for this particular plugin and I think
there
would be a better solution of course. However, I just can't ever get
plugin_exit
callback to be called more than once so I think that's probably not the
problem?

The problem is that we *use* the data in translation/mem_access/exec
callbacks
after a plugin_exit call is already called (this can be easily verified by
having a
boolean set to true once plugin_exit is called and then g_assert this
boolean is
false in the callbacks)


> > This is mitigated in this commit by synchronizing the call to
> > `plugin_exit` through locking to ensure execlusive access to data
> > structures, and NULL-ifying those data structures so that subsequent
> > callbacks can check whether the data strucutres are freed, and if so,
> > immediately exit.
> >
> > It's okay to immediately exit and don't account for those callbacks
> > since they won't be accounted for anyway since `plugin_exit` is already
> > called once and reported the statistics.
> >
> > Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
> > ---
> >  contrib/plugins/cache.c | 31 ++++++++++++++++++++++++++++++-
> >  1 file changed, 30 insertions(+), 1 deletion(-)
> >
> > diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
> > index 695fb969dc..a452aba01c 100644
> > --- a/contrib/plugins/cache.c
> > +++ b/contrib/plugins/cache.c
> > @@ -363,6 +363,11 @@ static void vcpu_mem_access(unsigned int
> vcpu_index, qemu_plugin_meminfo_t info,
> >      effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) :
> vaddr;
> >
> >      g_mutex_lock(&mtx);
> > +    if (dcache == NULL) {
> > +        g_mutex_unlock(&mtx);
> > +        return;
> > +    }
> > +
> >      if (!access_cache(dcache, effective_addr)) {
> >          insn = (InsnData *) userdata;
> >          insn->dmisses++;
> > @@ -380,6 +385,11 @@ static void vcpu_insn_exec(unsigned int vcpu_index,
> void *userdata)
> >      g_mutex_lock(&mtx);
> >      insn_addr = ((InsnData *) userdata)->addr;
> >
> > +    if (icache == NULL) {
> > +        g_mutex_unlock(&mtx);
> > +        return;
> > +    }
> > +
> >      if (!access_cache(icache, insn_addr)) {
> >          insn = (InsnData *) userdata;
> >          insn->imisses++;
> > @@ -406,12 +416,24 @@ static void vcpu_tb_trans(qemu_plugin_id_t id,
> struct qemu_plugin_tb *tb)
> >              effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn);
> >          }
> >
> > +        g_mutex_lock(&mtx);
> > +
> > +        /*
> > +         * is the plugin_exit callback called? If so, any further
> callback
> > +         * registration is useless as it won't get accounted for after
> calling
> > +         * plugin_exit once already, and also will use miss_ht after
> it's freed
> > +         */
> > +        if (miss_ht == NULL) {
> > +            g_mutex_unlock(&mtx);
> > +            return;
> > +        }
> > +
> >          /*
> >           * Instructions might get translated multiple times, we do not
> create
> >           * new entries for those instructions. Instead, we fetch the
> same
> >           * entry from the hash table and register it for the callback
> again.
> >           */
> > -        g_mutex_lock(&mtx);
> > +
> >          data = g_hash_table_lookup(miss_ht,
> GUINT_TO_POINTER(effective_addr));
> >          if (data == NULL) {
> >              data = g_new0(InsnData, 1);
> > @@ -527,13 +549,20 @@ static void log_top_insns()
> >
> >  static void plugin_exit(qemu_plugin_id_t id, void *p)
> >  {
> > +    g_mutex_lock(&mtx);
> >      log_stats();
> >      log_top_insns();
> >
> >      cache_free(dcache);
> > +    dcache = NULL;
> > +
> >      cache_free(icache);
> > +    icache = NULL;
> >
> >      g_hash_table_destroy(miss_ht);
> > +    miss_ht = NULL;
> > +
> > +    g_mutex_unlock(&mtx);
> >  }
> >
> >  static void policy_init()
>
>
> --
> Alex Bennée
>
Alex Bennée July 19, 2021, 11:06 a.m. UTC | #3
Mahmoud Mandour <ma.mandourr@gmail.com> writes:

> On Mon, Jul 19, 2021 at 11:48 AM Alex Bennée <alex.bennee@linaro.org> wrote:
>
>  Mahmoud Mandour <ma.mandourr@gmail.com> writes:
>
>  > Since callbacks may be interleaved because of multithreaded execution,
>  > we should not make assumptions about `plugin_exit` either. The problem
>  > with `plugin_exit` is that it frees shared data structures (caches and
>  > `miss_ht` hash table). It should not be assumed that callbacks will not
>  > be called after it and hence use already-freed data structures.
>
>  What was your test case for this because I wonder if it would be worth
>  coming up with one for check-tcg? 
>
> I think just any ad-hoc multithreaded execution will evoke the race pretty much 
> consistently.

I haven't managed to trigger it with testthread but of course my
libcache is trying to to defend against it.

>  
>  From what I remember the race is
>  in between preexit_cleanup and the eventual _exit/exit_group which nixes
>  all other child threads. Maybe we should be triggering a more graceful
>  unload here?
>
> I think so. This remedies the bug for this particular plugin and I think there
> would be a better solution of course. However, I just can't ever get plugin_exit
> callback to be called more than once so I think that's probably not the problem?
>
> The problem is that we *use* the data in translation/mem_access/exec callbacks
> after a plugin_exit call is already called (this can be easily verified by having a 
> boolean set to true once plugin_exit is called and then g_assert this boolean is 
> false in the callbacks)

We have mechanisms for safely unloading plugins during running so I
think we should be able to do something cleanly here. I'll cook up an
RFC.

>
>  > This is mitigated in this commit by synchronizing the call to
>  > `plugin_exit` through locking to ensure execlusive access to data
>  > structures, and NULL-ifying those data structures so that subsequent
>  > callbacks can check whether the data strucutres are freed, and if so,
>  > immediately exit.
>  >
>  > It's okay to immediately exit and don't account for those callbacks
>  > since they won't be accounted for anyway since `plugin_exit` is already
>  > called once and reported the statistics.
>  >
>  > Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
>  > ---
>  >  contrib/plugins/cache.c | 31 ++++++++++++++++++++++++++++++-
>  >  1 file changed, 30 insertions(+), 1 deletion(-)
>  >
>  > diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
>  > index 695fb969dc..a452aba01c 100644
>  > --- a/contrib/plugins/cache.c
>  > +++ b/contrib/plugins/cache.c
>  > @@ -363,6 +363,11 @@ static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info,
>  >      effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr;
>  >  
>  >      g_mutex_lock(&mtx);
>  > +    if (dcache == NULL) {
>  > +        g_mutex_unlock(&mtx);
>  > +        return;
>  > +    }
>  > +
>  >      if (!access_cache(dcache, effective_addr)) {
>  >          insn = (InsnData *) userdata;
>  >          insn->dmisses++;
>  > @@ -380,6 +385,11 @@ static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
>  >      g_mutex_lock(&mtx);
>  >      insn_addr = ((InsnData *) userdata)->addr;
>  >  
>  > +    if (icache == NULL) {
>  > +        g_mutex_unlock(&mtx);
>  > +        return;
>  > +    }
>  > +
>  >      if (!access_cache(icache, insn_addr)) {
>  >          insn = (InsnData *) userdata;
>  >          insn->imisses++;
>  > @@ -406,12 +416,24 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
>  >              effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn);
>  >          }
>  >  
>  > +        g_mutex_lock(&mtx);
>  > +
>  > +        /*
>  > +         * is the plugin_exit callback called? If so, any further callback
>  > +         * registration is useless as it won't get accounted for after calling
>  > +         * plugin_exit once already, and also will use miss_ht after it's freed
>  > +         */
>  > +        if (miss_ht == NULL) {
>  > +            g_mutex_unlock(&mtx);
>  > +            return;
>  > +        }
>  > +
>  >          /*
>  >           * Instructions might get translated multiple times, we do not create
>  >           * new entries for those instructions. Instead, we fetch the same
>  >           * entry from the hash table and register it for the callback again.
>  >           */
>  > -        g_mutex_lock(&mtx);
>  > +
>  >          data = g_hash_table_lookup(miss_ht, GUINT_TO_POINTER(effective_addr));
>  >          if (data == NULL) {
>  >              data = g_new0(InsnData, 1);
>  > @@ -527,13 +549,20 @@ static void log_top_insns()
>  >  
>  >  static void plugin_exit(qemu_plugin_id_t id, void *p)
>  >  {
>  > +    g_mutex_lock(&mtx);
>  >      log_stats();
>  >      log_top_insns();
>  >  
>  >      cache_free(dcache);
>  > +    dcache = NULL;
>  > +
>  >      cache_free(icache);
>  > +    icache = NULL;
>  >  
>  >      g_hash_table_destroy(miss_ht);
>  > +    miss_ht = NULL;
>  > +
>  > +    g_mutex_unlock(&mtx);
>  >  }
>  >  
>  >  static void policy_init()
>
>  -- 
>  Alex Bennée
Mahmoud Abumandour July 19, 2021, 11:28 a.m. UTC | #4
On Mon, Jul 19, 2021 at 1:08 PM Alex Bennée <alex.bennee@linaro.org> wrote:

>
> Mahmoud Mandour <ma.mandourr@gmail.com> writes:
>
> > On Mon, Jul 19, 2021 at 11:48 AM Alex Bennée <alex.bennee@linaro.org>
> wrote:
> >
> >  Mahmoud Mandour <ma.mandourr@gmail.com> writes:
> >
> >  > Since callbacks may be interleaved because of multithreaded execution,
> >  > we should not make assumptions about `plugin_exit` either. The problem
> >  > with `plugin_exit` is that it frees shared data structures (caches and
> >  > `miss_ht` hash table). It should not be assumed that callbacks will
> not
> >  > be called after it and hence use already-freed data structures.
> >
> >  What was your test case for this because I wonder if it would be worth
> >  coming up with one for check-tcg?
> >
> > I think just any ad-hoc multithreaded execution will evoke the race
> pretty much
> > consistently.
>
> I haven't managed to trigger it with testthread but of course my
> libcache is trying to to defend against it.
>

https://pastebin.com/X4Xazemh
That's a minimal program that reproduces the bug consistently for me (to my
observation, a simple
program that creates a bunch of threads that immediately exit does not
produce the bug as frequently)

You can also make hotblocks produce a similar crash (but make sure to add a
g_hash_table_destroy(hotblocks)
at the end of plugin_exit.)


>
> >
> >  From what I remember the race is
> >  in between preexit_cleanup and the eventual _exit/exit_group which nixes
> >  all other child threads. Maybe we should be triggering a more graceful
> >  unload here?
> >
> > I think so. This remedies the bug for this particular plugin and I think
> there
> > would be a better solution of course. However, I just can't ever get
> plugin_exit
> > callback to be called more than once so I think that's probably not the
> problem?
> >
> > The problem is that we *use* the data in translation/mem_access/exec
> callbacks
> > after a plugin_exit call is already called (this can be easily verified
> by having a
> > boolean set to true once plugin_exit is called and then g_assert this
> boolean is
> > false in the callbacks)
>
> We have mechanisms for safely unloading plugins during running so I
> think we should be able to do something cleanly here. I'll cook up an
> RFC.
>

I'll be waiting for it. Note that as I think I mentioned in the cover
letter, it's that plugin_uninstall
is probably problematic since it unregisters callbacks but already-fired
callbacks will continue executing.
So calling plugin_uninstall at the end of plugin_exit does not relieve the
bug...


>
> >
> >  > This is mitigated in this commit by synchronizing the call to
> >  > `plugin_exit` through locking to ensure execlusive access to data
> >  > structures, and NULL-ifying those data structures so that subsequent
> >  > callbacks can check whether the data strucutres are freed, and if so,
> >  > immediately exit.
> >  >
> >  > It's okay to immediately exit and don't account for those callbacks
> >  > since they won't be accounted for anyway since `plugin_exit` is
> already
> >  > called once and reported the statistics.
> >  >
> >  > Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
> >  > ---
> >  >  contrib/plugins/cache.c | 31 ++++++++++++++++++++++++++++++-
> >  >  1 file changed, 30 insertions(+), 1 deletion(-)
> >  >
> >  > diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
> >  > index 695fb969dc..a452aba01c 100644
> >  > --- a/contrib/plugins/cache.c
> >  > +++ b/contrib/plugins/cache.c
> >  > @@ -363,6 +363,11 @@ static void vcpu_mem_access(unsigned int
> vcpu_index, qemu_plugin_meminfo_t info,
> >  >      effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) :
> vaddr;
> >  >
> >  >      g_mutex_lock(&mtx);
> >  > +    if (dcache == NULL) {
> >  > +        g_mutex_unlock(&mtx);
> >  > +        return;
> >  > +    }
> >  > +
> >  >      if (!access_cache(dcache, effective_addr)) {
> >  >          insn = (InsnData *) userdata;
> >  >          insn->dmisses++;
> >  > @@ -380,6 +385,11 @@ static void vcpu_insn_exec(unsigned int
> vcpu_index, void *userdata)
> >  >      g_mutex_lock(&mtx);
> >  >      insn_addr = ((InsnData *) userdata)->addr;
> >  >
> >  > +    if (icache == NULL) {
> >  > +        g_mutex_unlock(&mtx);
> >  > +        return;
> >  > +    }
> >  > +
> >  >      if (!access_cache(icache, insn_addr)) {
> >  >          insn = (InsnData *) userdata;
> >  >          insn->imisses++;
> >  > @@ -406,12 +416,24 @@ static void vcpu_tb_trans(qemu_plugin_id_t id,
> struct qemu_plugin_tb *tb)
> >  >              effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn);
> >  >          }
> >  >
> >  > +        g_mutex_lock(&mtx);
> >  > +
> >  > +        /*
> >  > +         * is the plugin_exit callback called? If so, any further
> callback
> >  > +         * registration is useless as it won't get accounted for
> after calling
> >  > +         * plugin_exit once already, and also will use miss_ht after
> it's freed
> >  > +         */
> >  > +        if (miss_ht == NULL) {
> >  > +            g_mutex_unlock(&mtx);
> >  > +            return;
> >  > +        }
> >  > +
> >  >          /*
> >  >           * Instructions might get translated multiple times, we do
> not create
> >  >           * new entries for those instructions. Instead, we fetch the
> same
> >  >           * entry from the hash table and register it for the
> callback again.
> >  >           */
> >  > -        g_mutex_lock(&mtx);
> >  > +
> >  >          data = g_hash_table_lookup(miss_ht,
> GUINT_TO_POINTER(effective_addr));
> >  >          if (data == NULL) {
> >  >              data = g_new0(InsnData, 1);
> >  > @@ -527,13 +549,20 @@ static void log_top_insns()
> >  >
> >  >  static void plugin_exit(qemu_plugin_id_t id, void *p)
> >  >  {
> >  > +    g_mutex_lock(&mtx);
> >  >      log_stats();
> >  >      log_top_insns();
> >  >
> >  >      cache_free(dcache);
> >  > +    dcache = NULL;
> >  > +
> >  >      cache_free(icache);
> >  > +    icache = NULL;
> >  >
> >  >      g_hash_table_destroy(miss_ht);
> >  > +    miss_ht = NULL;
> >  > +
> >  > +    g_mutex_unlock(&mtx);
> >  >  }
> >  >
> >  >  static void policy_init()
> >
> >  --
> >  Alex Bennée
>
>
> --
> Alex Bennée
>
Alex Bennée July 19, 2021, 12:48 p.m. UTC | #5
Mahmoud Mandour <ma.mandourr@gmail.com> writes:

> On Mon, Jul 19, 2021 at 1:08 PM Alex Bennée <alex.bennee@linaro.org> wrote:
>
>  Mahmoud Mandour <ma.mandourr@gmail.com> writes:
>
>  > On Mon, Jul 19, 2021 at 11:48 AM Alex Bennée <alex.bennee@linaro.org> wrote:
>  >
>  >  Mahmoud Mandour <ma.mandourr@gmail.com> writes:
>  >
>  >  > Since callbacks may be interleaved because of multithreaded execution,
>  >  > we should not make assumptions about `plugin_exit` either. The problem
>  >  > with `plugin_exit` is that it frees shared data structures (caches and
>  >  > `miss_ht` hash table). It should not be assumed that callbacks will not
>  >  > be called after it and hence use already-freed data structures.
>  >
>  >  What was your test case for this because I wonder if it would be worth
>  >  coming up with one for check-tcg? 
>  >
>  > I think just any ad-hoc multithreaded execution will evoke the race pretty much 
>  > consistently.
>
>  I haven't managed to trigger it with testthread but of course my
>  libcache is trying to to defend against it.
>
> https://pastebin.com/X4Xazemh
> That's a minimal program that reproduces the bug consistently for me (to my observation, a simple 
> program that creates a bunch of threads that immediately exit does not produce the bug as frequently)
>
> You can also make hotblocks produce a similar crash (but make sure to add a g_hash_table_destroy(hotblocks)
> at the end of plugin_exit.)
>

Thanks, I'll give that a try.

>  
>  >  
>  >  From what I remember the race is
>  >  in between preexit_cleanup and the eventual _exit/exit_group which nixes
>  >  all other child threads. Maybe we should be triggering a more graceful
>  >  unload here?
>  >
>  > I think so. This remedies the bug for this particular plugin and I think there
>  > would be a better solution of course. However, I just can't ever get plugin_exit
>  > callback to be called more than once so I think that's probably not the problem?
>  >
>  > The problem is that we *use* the data in translation/mem_access/exec callbacks
>  > after a plugin_exit call is already called (this can be easily verified by having a 
>  > boolean set to true once plugin_exit is called and then g_assert this boolean is 
>  > false in the callbacks)
>
>  We have mechanisms for safely unloading plugins during running so I
>  think we should be able to do something cleanly here. I'll cook up an
>  RFC.
>
> I'll be waiting for it. Note that as I think I mentioned in the cover letter, it's that plugin_uninstall
> is probably problematic since it unregisters callbacks but already-fired callbacks will continue executing.
> So calling plugin_uninstall at the end of plugin_exit does not relieve
> the bug...

That's OK - the plugin_uninstall contract explicitly says that callbacks
may occur until the callback is called.

>  
>  
>  >
>  >  > This is mitigated in this commit by synchronizing the call to
>  >  > `plugin_exit` through locking to ensure execlusive access to data
>  >  > structures, and NULL-ifying those data structures so that subsequent
>  >  > callbacks can check whether the data strucutres are freed, and if so,
>  >  > immediately exit.
>  >  >
>  >  > It's okay to immediately exit and don't account for those callbacks
>  >  > since they won't be accounted for anyway since `plugin_exit` is already
>  >  > called once and reported the statistics.
>  >  >
>  >  > Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
>  >  > ---
>  >  >  contrib/plugins/cache.c | 31 ++++++++++++++++++++++++++++++-
>  >  >  1 file changed, 30 insertions(+), 1 deletion(-)
>  >  >
>  >  > diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
>  >  > index 695fb969dc..a452aba01c 100644
>  >  > --- a/contrib/plugins/cache.c
>  >  > +++ b/contrib/plugins/cache.c
>  >  > @@ -363,6 +363,11 @@ static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info,
>  >  >      effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr;
>  >  >  
>  >  >      g_mutex_lock(&mtx);
>  >  > +    if (dcache == NULL) {
>  >  > +        g_mutex_unlock(&mtx);
>  >  > +        return;
>  >  > +    }
>  >  > +
>  >  >      if (!access_cache(dcache, effective_addr)) {
>  >  >          insn = (InsnData *) userdata;
>  >  >          insn->dmisses++;
>  >  > @@ -380,6 +385,11 @@ static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
>  >  >      g_mutex_lock(&mtx);
>  >  >      insn_addr = ((InsnData *) userdata)->addr;
>  >  >  
>  >  > +    if (icache == NULL) {
>  >  > +        g_mutex_unlock(&mtx);
>  >  > +        return;
>  >  > +    }
>  >  > +
>  >  >      if (!access_cache(icache, insn_addr)) {
>  >  >          insn = (InsnData *) userdata;
>  >  >          insn->imisses++;
>  >  > @@ -406,12 +416,24 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
>  >  >              effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn);
>  >  >          }
>  >  >  
>  >  > +        g_mutex_lock(&mtx);
>  >  > +
>  >  > +        /*
>  >  > +         * is the plugin_exit callback called? If so, any further callback
>  >  > +         * registration is useless as it won't get accounted for after calling
>  >  > +         * plugin_exit once already, and also will use miss_ht after it's freed
>  >  > +         */
>  >  > +        if (miss_ht == NULL) {
>  >  > +            g_mutex_unlock(&mtx);
>  >  > +            return;
>  >  > +        }
>  >  > +
>  >  >          /*
>  >  >           * Instructions might get translated multiple times, we do not create
>  >  >           * new entries for those instructions. Instead, we fetch the same
>  >  >           * entry from the hash table and register it for the callback again.
>  >  >           */
>  >  > -        g_mutex_lock(&mtx);
>  >  > +
>  >  >          data = g_hash_table_lookup(miss_ht, GUINT_TO_POINTER(effective_addr));
>  >  >          if (data == NULL) {
>  >  >              data = g_new0(InsnData, 1);
>  >  > @@ -527,13 +549,20 @@ static void log_top_insns()
>  >  >  
>  >  >  static void plugin_exit(qemu_plugin_id_t id, void *p)
>  >  >  {
>  >  > +    g_mutex_lock(&mtx);
>  >  >      log_stats();
>  >  >      log_top_insns();
>  >  >  
>  >  >      cache_free(dcache);
>  >  > +    dcache = NULL;
>  >  > +
>  >  >      cache_free(icache);
>  >  > +    icache = NULL;
>  >  >  
>  >  >      g_hash_table_destroy(miss_ht);
>  >  > +    miss_ht = NULL;
>  >  > +
>  >  > +    g_mutex_unlock(&mtx);
>  >  >  }
>  >  >  
>  >  >  static void policy_init()
>  >
>  >  -- 
>  >  Alex Bennée
>
>  -- 
>  Alex Bennée
diff mbox series

Patch

diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
index 695fb969dc..a452aba01c 100644
--- a/contrib/plugins/cache.c
+++ b/contrib/plugins/cache.c
@@ -363,6 +363,11 @@  static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info,
     effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr;
 
     g_mutex_lock(&mtx);
+    if (dcache == NULL) {
+        g_mutex_unlock(&mtx);
+        return;
+    }
+
     if (!access_cache(dcache, effective_addr)) {
         insn = (InsnData *) userdata;
         insn->dmisses++;
@@ -380,6 +385,11 @@  static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
     g_mutex_lock(&mtx);
     insn_addr = ((InsnData *) userdata)->addr;
 
+    if (icache == NULL) {
+        g_mutex_unlock(&mtx);
+        return;
+    }
+
     if (!access_cache(icache, insn_addr)) {
         insn = (InsnData *) userdata;
         insn->imisses++;
@@ -406,12 +416,24 @@  static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
             effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn);
         }
 
+        g_mutex_lock(&mtx);
+
+        /*
+         * is the plugin_exit callback called? If so, any further callback
+         * registration is useless as it won't get accounted for after calling
+         * plugin_exit once already, and also will use miss_ht after it's freed
+         */
+        if (miss_ht == NULL) {
+            g_mutex_unlock(&mtx);
+            return;
+        }
+
         /*
          * Instructions might get translated multiple times, we do not create
          * new entries for those instructions. Instead, we fetch the same
          * entry from the hash table and register it for the callback again.
          */
-        g_mutex_lock(&mtx);
+
         data = g_hash_table_lookup(miss_ht, GUINT_TO_POINTER(effective_addr));
         if (data == NULL) {
             data = g_new0(InsnData, 1);
@@ -527,13 +549,20 @@  static void log_top_insns()
 
 static void plugin_exit(qemu_plugin_id_t id, void *p)
 {
+    g_mutex_lock(&mtx);
     log_stats();
     log_top_insns();
 
     cache_free(dcache);
+    dcache = NULL;
+
     cache_free(icache);
+    icache = NULL;
 
     g_hash_table_destroy(miss_ht);
+    miss_ht = NULL;
+
+    g_mutex_unlock(&mtx);
 }
 
 static void policy_init()