diff mbox series

[14/19] util/async: Fixed tsan warnings

Message ID 20200522160755.886-15-robert.foley@linaro.org
State New
Headers show
Series Add Thread Sanitizer support to QEMU | expand

Commit Message

Robert Foley May 22, 2020, 4:07 p.m. UTC
For example:
  Atomic write of size 8 at 0x7b4800113c28 by main thread (mutexes: write M30):
    #0 __tsan_atomic64_exchange <null> (qemu-system-aarch64+0x386f85)
    #1 aio_bh_poll util/async.c:146:5 (qemu-system-aarch64+0xcd1f61)
    #2 aio_dispatch util/aio-posix.c:380:5 (qemu-system-aarch64+0xcd8abb)
    #3 aio_ctx_dispatch util/async.c:298:5 (qemu-system-aarch64+0xcd31b0)
    #4 g_main_context_dispatch <null> (libglib-2.0.so.0+0x4c416)
    #5 qemu_main_loop softmmu/vl.c:1664:9 (qemu-system-aarch64+0x5cc6d6)
    #6 main softmmu/main.c:49:5 (qemu-system-aarch64+0xc62857)

  Previous read of size 8 at 0x7b4800113c28 by thread T3 (mutexes: write M81):
    #0 aio_bh_enqueue util/async.c:81:9 (qemu-system-aarch64+0xcd2267)
    #1 qemu_bh_schedule util/async.c:181:5 (qemu-system-aarch64+0xcd2267)
    #2 worker_thread util/thread-pool.c:113:9 (qemu-system-aarch64+0xcd473c)
    #3 qemu_thread_start util/qemu-thread-posix.c:519:9 (qemu-system-aarch64+0xcde280)

Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Fam Zheng <fam@euphon.net>
Signed-off-by: Robert Foley <robert.foley@linaro.org>
---
 util/async.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

Comments

Emilio Cota May 23, 2020, 8:12 p.m. UTC | #1
On Fri, May 22, 2020 at 12:07:50 -0400, Robert Foley wrote:
> For example:
>   Atomic write of size 8 at 0x7b4800113c28 by main thread (mutexes: write M30):
>     #0 __tsan_atomic64_exchange <null> (qemu-system-aarch64+0x386f85)
>     #1 aio_bh_poll util/async.c:146:5 (qemu-system-aarch64+0xcd1f61)
>     #2 aio_dispatch util/aio-posix.c:380:5 (qemu-system-aarch64+0xcd8abb)
>     #3 aio_ctx_dispatch util/async.c:298:5 (qemu-system-aarch64+0xcd31b0)
>     #4 g_main_context_dispatch <null> (libglib-2.0.so.0+0x4c416)
>     #5 qemu_main_loop softmmu/vl.c:1664:9 (qemu-system-aarch64+0x5cc6d6)
>     #6 main softmmu/main.c:49:5 (qemu-system-aarch64+0xc62857)
> 
>   Previous read of size 8 at 0x7b4800113c28 by thread T3 (mutexes: write M81):
>     #0 aio_bh_enqueue util/async.c:81:9 (qemu-system-aarch64+0xcd2267)
>     #1 qemu_bh_schedule util/async.c:181:5 (qemu-system-aarch64+0xcd2267)
>     #2 worker_thread util/thread-pool.c:113:9 (qemu-system-aarch64+0xcd473c)
>     #3 qemu_thread_start util/qemu-thread-posix.c:519:9 (qemu-system-aarch64+0xcde280)
> 
> Cc: Stefan Hajnoczi <stefanha@redhat.com>
> Cc: Fam Zheng <fam@euphon.net>
> Signed-off-by: Robert Foley <robert.foley@linaro.org>
> ---
>  util/async.c | 11 +++++++++--
>  1 file changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/util/async.c b/util/async.c
> index 1319eee3bc..51e306bf0c 100644
> --- a/util/async.c
> +++ b/util/async.c
> @@ -33,6 +33,7 @@
>  #include "block/raw-aio.h"
>  #include "qemu/coroutine_int.h"
>  #include "trace.h"
> +#include "qemu/tsan.h"
>  
>  /***********************************************************/
>  /* bottom halves (can be seen as timers which expire ASAP) */
> @@ -76,10 +77,12 @@ static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags)
>       * 2. ctx is loaded before the callback has a chance to execute and bh
>       *    could be freed.
>       */
> +    TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();

Why do we need these annotations here? It looks like the fix for the
race in the commit message is below (i.e. atomic_read).

In general, I'd expect annotations to come with a comment, since
they should be used sparingly. That is, the assumption is that
tsan is almost always right.

>      old_flags = atomic_fetch_or(&bh->flags, BH_PENDING | new_flags);
>      if (!(old_flags & BH_PENDING)) {
>          QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next);
>      }
> +    TSAN_ANNOTATE_IGNORE_WRITES_END();
>  
>      aio_notify(ctx);
>  }
> @@ -143,7 +146,9 @@ int aio_bh_poll(AioContext *ctx)
>      BHListSlice *s;
>      int ret = 0;
>  
> +    TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
>      QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list);
> +    TSAN_ANNOTATE_IGNORE_WRITES_END();

ditto

>      QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next);
>  
>      while ((s = QSIMPLEQ_FIRST(&ctx->bh_slice_list))) {
> @@ -280,14 +285,16 @@ aio_ctx_check(GSource *source)
>      aio_notify_accept(ctx);
>  
>      QSLIST_FOREACH_RCU(bh, &ctx->bh_list, next) {
> -        if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
> +        if ((atomic_read(&bh->flags) & (BH_SCHEDULED | BH_DELETED))
> +             == BH_SCHEDULED) {
>              return true;
>          }
>      }
>  
>      QSIMPLEQ_FOREACH(s, &ctx->bh_slice_list, next) {
>          QSLIST_FOREACH_RCU(bh, &s->bh_list, next) {
> -            if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
> +            if ((atomic_read(&bh->flags) & (BH_SCHEDULED | BH_DELETED))
> +                 == BH_SCHEDULED) {

This hunk like the real fix. Also, I'd put "fix race" in the commit
title as opposed to "fix warning" since fixing races is the goal, not
fixing warnings.

Thanks,

		Emilio
Stefan Hajnoczi May 26, 2020, 10:32 a.m. UTC | #2
On Fri, May 22, 2020 at 12:07:50PM -0400, Robert Foley wrote:
> For example:
>   Atomic write of size 8 at 0x7b4800113c28 by main thread (mutexes: write M30):
>     #0 __tsan_atomic64_exchange <null> (qemu-system-aarch64+0x386f85)
>     #1 aio_bh_poll util/async.c:146:5 (qemu-system-aarch64+0xcd1f61)
>     #2 aio_dispatch util/aio-posix.c:380:5 (qemu-system-aarch64+0xcd8abb)
>     #3 aio_ctx_dispatch util/async.c:298:5 (qemu-system-aarch64+0xcd31b0)
>     #4 g_main_context_dispatch <null> (libglib-2.0.so.0+0x4c416)
>     #5 qemu_main_loop softmmu/vl.c:1664:9 (qemu-system-aarch64+0x5cc6d6)
>     #6 main softmmu/main.c:49:5 (qemu-system-aarch64+0xc62857)
> 
>   Previous read of size 8 at 0x7b4800113c28 by thread T3 (mutexes: write M81):
>     #0 aio_bh_enqueue util/async.c:81:9 (qemu-system-aarch64+0xcd2267)
>     #1 qemu_bh_schedule util/async.c:181:5 (qemu-system-aarch64+0xcd2267)
>     #2 worker_thread util/thread-pool.c:113:9 (qemu-system-aarch64+0xcd473c)
>     #3 qemu_thread_start util/qemu-thread-posix.c:519:9 (qemu-system-aarch64+0xcde280)
> 
> Cc: Stefan Hajnoczi <stefanha@redhat.com>
> Cc: Fam Zheng <fam@euphon.net>
> Signed-off-by: Robert Foley <robert.foley@linaro.org>
> ---
>  util/async.c | 11 +++++++++--
>  1 file changed, 9 insertions(+), 2 deletions(-)

Please explain why these warnings should be ignored in the commit
description. I have been CCed on this patch, am unfamiliar with TSAN
annotations, and now need to figure out:
1. Is ignoring the warning safe or should async.c be fixed somehow?
2. How do the annotation macros work and are they being used correectly?

It's likely that people coming across this commit in the future would
also benefit from information in the commit description that makes these
things clear.

Thanks,
Stefan
Robert Foley May 26, 2020, 3:19 p.m. UTC | #3
On Sat, 23 May 2020 at 16:12, Emilio G. Cota <cota@braap.org> wrote:
>
> On Fri, May 22, 2020 at 12:07:50 -0400, Robert Foley wrote:
<snip>
>
> >      QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next);
> >
> >      while ((s = QSIMPLEQ_FIRST(&ctx->bh_slice_list))) {
> > @@ -280,14 +285,16 @@ aio_ctx_check(GSource *source)
> >      aio_notify_accept(ctx);
> >
> >      QSLIST_FOREACH_RCU(bh, &ctx->bh_list, next) {
> > -        if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
> > +        if ((atomic_read(&bh->flags) & (BH_SCHEDULED | BH_DELETED))
> > +             == BH_SCHEDULED) {
> >              return true;
> >          }
> >      }
> >
> >      QSIMPLEQ_FOREACH(s, &ctx->bh_slice_list, next) {
> >          QSLIST_FOREACH_RCU(bh, &s->bh_list, next) {
> > -            if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
> > +            if ((atomic_read(&bh->flags) & (BH_SCHEDULED | BH_DELETED))
> > +                 == BH_SCHEDULED) {
>
> This hunk like the real fix. Also, I'd put "fix race" in the commit
> title as opposed to "fix warning" since fixing races is the goal, not
> fixing warnings.

Good point, will update the commit.

Thanks & Regards,
-Rob
>
> Thanks,
>
>                 Emilio
Robert Foley May 26, 2020, 3:21 p.m. UTC | #4
Hi Stefan,

On Tue, 26 May 2020 at 06:32, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>
> On Fri, May 22, 2020 at 12:07:50PM -0400, Robert Foley wrote:
> > For example:
> >   Atomic write of size 8 at 0x7b4800113c28 by main thread (mutexes: write M30):
> >     #0 __tsan_atomic64_exchange <null> (qemu-system-aarch64+0x386f85)
> >     #1 aio_bh_poll util/async.c:146:5 (qemu-system-aarch64+0xcd1f61)
> >     #2 aio_dispatch util/aio-posix.c:380:5 (qemu-system-aarch64+0xcd8abb)
> >     #3 aio_ctx_dispatch util/async.c:298:5 (qemu-system-aarch64+0xcd31b0)
> >     #4 g_main_context_dispatch <null> (libglib-2.0.so.0+0x4c416)
> >     #5 qemu_main_loop softmmu/vl.c:1664:9 (qemu-system-aarch64+0x5cc6d6)
> >     #6 main softmmu/main.c:49:5 (qemu-system-aarch64+0xc62857)
> >
> >   Previous read of size 8 at 0x7b4800113c28 by thread T3 (mutexes: write M81):
> >     #0 aio_bh_enqueue util/async.c:81:9 (qemu-system-aarch64+0xcd2267)
> >     #1 qemu_bh_schedule util/async.c:181:5 (qemu-system-aarch64+0xcd2267)
> >     #2 worker_thread util/thread-pool.c:113:9 (qemu-system-aarch64+0xcd473c)
> >     #3 qemu_thread_start util/qemu-thread-posix.c:519:9 (qemu-system-aarch64+0xcde280)
> >
> > Cc: Stefan Hajnoczi <stefanha@redhat.com>
> > Cc: Fam Zheng <fam@euphon.net>
> > Signed-off-by: Robert Foley <robert.foley@linaro.org>
> > ---
> >  util/async.c | 11 +++++++++--
> >  1 file changed, 9 insertions(+), 2 deletions(-)
>
> Please explain why these warnings should be ignored in the commit
> description. I have been CCed on this patch, am unfamiliar with TSAN
> annotations, and now need to figure out:
> 1. Is ignoring the warning safe or should async.c be fixed somehow?
> 2. How do the annotation macros work and are they being used correectly?
>
> It's likely that people coming across this commit in the future would
> also benefit from information in the commit description that makes these
> things clear.
>
All good points.

I think we need to add a whole lot more details on TSan and its use
along with the use of the annotation macros among other things.
Will CC you on the updates to testing.rst, which will contain this
information.
This will hopefully provide the context, information and resources needed to
make the kinds of determinations we're looking for in 1. and 2.

As for the annotations, I think we are going to re-focus the patch and
remove the annotations for now.  However, we will plan to update
this commit description with more details on the changes to use atomic reads.

Thanks & Regards,
-Rob
> Thanks,
> Stefan
diff mbox series

Patch

diff --git a/util/async.c b/util/async.c
index 1319eee3bc..51e306bf0c 100644
--- a/util/async.c
+++ b/util/async.c
@@ -33,6 +33,7 @@ 
 #include "block/raw-aio.h"
 #include "qemu/coroutine_int.h"
 #include "trace.h"
+#include "qemu/tsan.h"
 
 /***********************************************************/
 /* bottom halves (can be seen as timers which expire ASAP) */
@@ -76,10 +77,12 @@  static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags)
      * 2. ctx is loaded before the callback has a chance to execute and bh
      *    could be freed.
      */
+    TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
     old_flags = atomic_fetch_or(&bh->flags, BH_PENDING | new_flags);
     if (!(old_flags & BH_PENDING)) {
         QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next);
     }
+    TSAN_ANNOTATE_IGNORE_WRITES_END();
 
     aio_notify(ctx);
 }
@@ -143,7 +146,9 @@  int aio_bh_poll(AioContext *ctx)
     BHListSlice *s;
     int ret = 0;
 
+    TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
     QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list);
+    TSAN_ANNOTATE_IGNORE_WRITES_END();
     QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next);
 
     while ((s = QSIMPLEQ_FIRST(&ctx->bh_slice_list))) {
@@ -280,14 +285,16 @@  aio_ctx_check(GSource *source)
     aio_notify_accept(ctx);
 
     QSLIST_FOREACH_RCU(bh, &ctx->bh_list, next) {
-        if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
+        if ((atomic_read(&bh->flags) & (BH_SCHEDULED | BH_DELETED))
+             == BH_SCHEDULED) {
             return true;
         }
     }
 
     QSIMPLEQ_FOREACH(s, &ctx->bh_slice_list, next) {
         QSLIST_FOREACH_RCU(bh, &s->bh_list, next) {
-            if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
+            if ((atomic_read(&bh->flags) & (BH_SCHEDULED | BH_DELETED))
+                 == BH_SCHEDULED) {
                 return true;
             }
         }