diff mbox series

[RFC,v3,1/3] memory: add depth assert in address_space_to_flatview

Message ID 20221213133510.1279488-2-xuchuangxclwt@bytedance.com
State New
Headers show
Series migration: reduce time of loading non-iterable vmstate | expand

Commit Message

Chuang Xu Dec. 13, 2022, 1:35 p.m. UTC
Before using any flatview, sanity check we're not during a memory
region transaction or the map can be invalid.

Signed-off-by: Chuang Xu <xuchuangxclwt@bytedance.com>
---
 include/exec/memory.h | 9 +++++++++
 softmmu/memory.c      | 1 -
 2 files changed, 9 insertions(+), 1 deletion(-)

Comments

Chuang Xu Dec. 14, 2022, 4:03 p.m. UTC | #1
On 2022/12/13 下午9:35, Chuang Xu wrote:

Before using any flatview, sanity check we're not during a memory
region transaction or the map can be invalid.

Signed-off-by: Chuang Xu <xuchuangxclwt@bytedance.com>
<xuchuangxclwt@bytedance.com>
---
 include/exec/memory.h | 9 +++++++++
 softmmu/memory.c      | 1 -
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 91f8a2395a..b43cd46084 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -1069,8 +1069,17 @@ struct FlatView {
     MemoryRegion *root;
 };

+static unsigned memory_region_transaction_depth;
+
 static inline FlatView *address_space_to_flatview(AddressSpace *as)
 {
+    /*
+     * Before using any flatview, sanity check we're not during a memory
+     * region transaction or the map can be invalid.  Note that this can
+     * also be called during commit phase of memory transaction, but that
+     * should also only happen when the depth decreases to 0 first.
+     */
+    assert(memory_region_transaction_depth == 0);
     return qatomic_rcu_read(&as->current_map);
 }

diff --git a/softmmu/memory.c b/softmmu/memory.c
index bc0be3f62c..f177c40cd8 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -37,7 +37,6 @@

 //#define DEBUG_UNASSIGNED

-static unsigned memory_region_transaction_depth;
 static bool memory_region_update_pending;
 static bool ioeventfd_update_pending;
 unsigned int global_dirty_tracking;

Here are some new situations to be synchronized.

I found that there is a probability to trigger assert in the QEMU startup phase.

Here is the coredump backtrace:

#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
#1  0x00007f7825e33535 in __GI_abort () at abort.c:79
#2  0x00007f7825e3340f in __assert_fail_base (fmt=0x7f7825f94ef0
"%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x5653c643add8
"memory_region_transaction_depth == 0",
    file=0x5653c63dad78
"/data00/migration/qemu-open/include/exec/memory.h", line=1082,
function=<optimized out>) at assert.c:92
#3  0x00007f7825e411a2 in __GI___assert_fail
(assertion=assertion@entry=0x5653c643add8
"memory_region_transaction_depth == 0",
    file=file@entry=0x5653c63dad78
"/data00/migration/qemu-open/include/exec/memory.h",
line=line@entry=1082,
    function=function@entry=0x5653c643bd00 <__PRETTY_FUNCTION__.18101>
"address_space_to_flatview") at assert.c:101
#4  0x00005653c60f0383 in address_space_to_flatview (as=0x5653c6af2340
<address_space_memory>) at
/data00/migration/qemu-open/include/exec/memory.h:1082
#5  address_space_to_flatview (as=0x5653c6af2340
<address_space_memory>) at
/data00/migration/qemu-open/include/exec/memory.h:1074
#6  address_space_get_flatview (as=0x5653c6af2340
<address_space_memory>) at ../softmmu/memory.c:809
#7  0x00005653c60fef04 in address_space_cache_init
(cache=cache@entry=0x7f781fff8420, as=<optimized out>,
addr=63310635776, len=48, is_write=is_write@entry=false)
    at ../softmmu/physmem.c:3352
#8  0x00005653c60c08c5 in virtqueue_split_pop (vq=0x7f781c576270,
sz=264) at ../hw/virtio/virtio.c:1959
#9  0x00005653c60c0b7d in virtqueue_pop (vq=vq@entry=0x7f781c576270,
sz=<optimized out>) at ../hw/virtio/virtio.c:2177
#10 0x00005653c609f14f in virtio_scsi_pop_req
(s=s@entry=0x5653c9034300, vq=vq@entry=0x7f781c576270) at
../hw/scsi/virtio-scsi.c:219
#11 0x00005653c60a04a3 in virtio_scsi_handle_cmd_vq
(vq=0x7f781c576270, s=0x5653c9034300) at ../hw/scsi/virtio-scsi.c:735
#12 virtio_scsi_handle_cmd (vdev=0x5653c9034300, vq=0x7f781c576270) at
../hw/scsi/virtio-scsi.c:776
#13 0x00005653c60ba72f in virtio_queue_notify_vq (vq=0x7f781c576270)
at ../hw/virtio/virtio.c:2847
#14 0x00005653c62d9706 in aio_dispatch_handler
(ctx=ctx@entry=0x5653c84909e0, node=0x7f68e4007840) at
../util/aio-posix.c:369
#15 0x00005653c62da254 in aio_dispatch_ready_handlers
(ready_list=0x7f781fffe6a8, ctx=0x5653c84909e0) at
../util/aio-posix.c:399
#16 aio_poll (ctx=0x5653c84909e0, blocking=blocking@entry=true) at
../util/aio-posix.c:713
#17 0x00005653c61b2296 in iothread_run
(opaque=opaque@entry=0x5653c822c390) at ../iothread.c:67
#18 0x00005653c62dcd8a in qemu_thread_start (args=<optimized out>) at
../util/qemu-thread-posix.c:505
#19 0x00007f7825fd8fa3 in start_thread (arg=<optimized out>) at
pthread_create.c:486
#20 0x00007f7825f0a06f in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:95

I find that some functions doesn't take bql before calling
address_space_to_flatview(), as shown in the backtrace. I
also find other similar situations in the code. I find that
prepare_mmio_access() will take bql to provide some protection,
but I don't think it's sufficient.I think that if we want to
ensure that the reading and writing of mr and the modification
of mr don't occur at the same time, maybe we need to take bql
in address_space_to_flatview() like this:


static FlatView *address_space_to_flatview(AddressSpace *as)
{
    bool release_lock = false;
    FlatView *ret;

    if (!qemu_mutex_iothread_locked()) {
        qemu_mutex_lock_iothread();
        release_lock = true;
    }

    /*
     * Before using any flatview, sanity check we're not during a memory
     * region transaction or the map can be invalid.  Note that this can
     * also be called during commit phase of memory transaction, but that
     * should also only happen when the depth decreases to 0 first.
     */
    assert(memory_region_transaction_depth == 0);
    ret = qatomic_rcu_read(&as->current_map);

    if (release_lock) {
        qemu_mutex_unlock_iothread();
    }

    return ret;
}
Peter Xu Dec. 14, 2022, 9:38 p.m. UTC | #2
On Wed, Dec 14, 2022 at 08:03:38AM -0800, Chuang Xu wrote:
> On 2022/12/13 下午9:35, Chuang Xu wrote:
> 
> Before using any flatview, sanity check we're not during a memory
> region transaction or the map can be invalid.
> 
> Signed-off-by: Chuang Xu <xuchuangxclwt@bytedance.com>
> <xuchuangxclwt@bytedance.com>
> ---
>  include/exec/memory.h | 9 +++++++++
>  softmmu/memory.c      | 1 -
>  2 files changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 91f8a2395a..b43cd46084 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -1069,8 +1069,17 @@ struct FlatView {
>      MemoryRegion *root;
>  };
> 
> +static unsigned memory_region_transaction_depth;
> +
>  static inline FlatView *address_space_to_flatview(AddressSpace *as)
>  {
> +    /*
> +     * Before using any flatview, sanity check we're not during a memory
> +     * region transaction or the map can be invalid.  Note that this can
> +     * also be called during commit phase of memory transaction, but that
> +     * should also only happen when the depth decreases to 0 first.
> +     */
> +    assert(memory_region_transaction_depth == 0);
>      return qatomic_rcu_read(&as->current_map);
>  }
> 
> diff --git a/softmmu/memory.c b/softmmu/memory.c
> index bc0be3f62c..f177c40cd8 100644
> --- a/softmmu/memory.c
> +++ b/softmmu/memory.c
> @@ -37,7 +37,6 @@
> 
>  //#define DEBUG_UNASSIGNED
> 
> -static unsigned memory_region_transaction_depth;
>  static bool memory_region_update_pending;
>  static bool ioeventfd_update_pending;
>  unsigned int global_dirty_tracking;
> 
> Here are some new situations to be synchronized.
> 
> I found that there is a probability to trigger assert in the QEMU startup phase.
> 
> Here is the coredump backtrace:
> 
> #0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
> #1  0x00007f7825e33535 in __GI_abort () at abort.c:79
> #2  0x00007f7825e3340f in __assert_fail_base (fmt=0x7f7825f94ef0
> "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x5653c643add8
> "memory_region_transaction_depth == 0",
>     file=0x5653c63dad78
> "/data00/migration/qemu-open/include/exec/memory.h", line=1082,
> function=<optimized out>) at assert.c:92
> #3  0x00007f7825e411a2 in __GI___assert_fail
> (assertion=assertion@entry=0x5653c643add8
> "memory_region_transaction_depth == 0",
>     file=file@entry=0x5653c63dad78
> "/data00/migration/qemu-open/include/exec/memory.h",
> line=line@entry=1082,
>     function=function@entry=0x5653c643bd00 <__PRETTY_FUNCTION__.18101>
> "address_space_to_flatview") at assert.c:101
> #4  0x00005653c60f0383 in address_space_to_flatview (as=0x5653c6af2340
> <address_space_memory>) at
> /data00/migration/qemu-open/include/exec/memory.h:1082
> #5  address_space_to_flatview (as=0x5653c6af2340
> <address_space_memory>) at
> /data00/migration/qemu-open/include/exec/memory.h:1074
> #6  address_space_get_flatview (as=0x5653c6af2340
> <address_space_memory>) at ../softmmu/memory.c:809
> #7  0x00005653c60fef04 in address_space_cache_init
> (cache=cache@entry=0x7f781fff8420, as=<optimized out>,
> addr=63310635776, len=48, is_write=is_write@entry=false)
>     at ../softmmu/physmem.c:3352
> #8  0x00005653c60c08c5 in virtqueue_split_pop (vq=0x7f781c576270,
> sz=264) at ../hw/virtio/virtio.c:1959
> #9  0x00005653c60c0b7d in virtqueue_pop (vq=vq@entry=0x7f781c576270,
> sz=<optimized out>) at ../hw/virtio/virtio.c:2177
> #10 0x00005653c609f14f in virtio_scsi_pop_req
> (s=s@entry=0x5653c9034300, vq=vq@entry=0x7f781c576270) at
> ../hw/scsi/virtio-scsi.c:219
> #11 0x00005653c60a04a3 in virtio_scsi_handle_cmd_vq
> (vq=0x7f781c576270, s=0x5653c9034300) at ../hw/scsi/virtio-scsi.c:735
> #12 virtio_scsi_handle_cmd (vdev=0x5653c9034300, vq=0x7f781c576270) at
> ../hw/scsi/virtio-scsi.c:776
> #13 0x00005653c60ba72f in virtio_queue_notify_vq (vq=0x7f781c576270)
> at ../hw/virtio/virtio.c:2847
> #14 0x00005653c62d9706 in aio_dispatch_handler
> (ctx=ctx@entry=0x5653c84909e0, node=0x7f68e4007840) at
> ../util/aio-posix.c:369
> #15 0x00005653c62da254 in aio_dispatch_ready_handlers
> (ready_list=0x7f781fffe6a8, ctx=0x5653c84909e0) at
> ../util/aio-posix.c:399
> #16 aio_poll (ctx=0x5653c84909e0, blocking=blocking@entry=true) at
> ../util/aio-posix.c:713
> #17 0x00005653c61b2296 in iothread_run
> (opaque=opaque@entry=0x5653c822c390) at ../iothread.c:67
> #18 0x00005653c62dcd8a in qemu_thread_start (args=<optimized out>) at
> ../util/qemu-thread-posix.c:505
> #19 0x00007f7825fd8fa3 in start_thread (arg=<optimized out>) at
> pthread_create.c:486
> #20 0x00007f7825f0a06f in clone () at
> ../sysdeps/unix/sysv/linux/x86_64/clone.S:95

This does look like a bug to me.

Paolo/Michael?

> 
> I find that some functions doesn't take bql before calling
> address_space_to_flatview(), as shown in the backtrace. I
> also find other similar situations in the code. I find that
> prepare_mmio_access() will take bql to provide some protection,
> but I don't think it's sufficient.I think that if we want to
> ensure that the reading and writing of mr and the modification
> of mr don't occur at the same time, maybe we need to take bql
> in address_space_to_flatview() like this:
> 
> 
> static FlatView *address_space_to_flatview(AddressSpace *as)
> {
>     bool release_lock = false;
>     FlatView *ret;
> 
>     if (!qemu_mutex_iothread_locked()) {
>         qemu_mutex_lock_iothread();
>         release_lock = true;
>     }
> 
>     /*
>      * Before using any flatview, sanity check we're not during a memory
>      * region transaction or the map can be invalid.  Note that this can
>      * also be called during commit phase of memory transaction, but that
>      * should also only happen when the depth decreases to 0 first.
>      */
>     assert(memory_region_transaction_depth == 0);
>     ret = qatomic_rcu_read(&as->current_map);
> 
>     if (release_lock) {
>         qemu_mutex_unlock_iothread();
>     }
> 
>     return ret;
> }
Peter Xu Dec. 15, 2022, 4:04 p.m. UTC | #3
On Wed, Dec 14, 2022 at 04:38:52PM -0500, Peter Xu wrote:
> On Wed, Dec 14, 2022 at 08:03:38AM -0800, Chuang Xu wrote:
> > On 2022/12/13 下午9:35, Chuang Xu wrote:
> > 
> > Before using any flatview, sanity check we're not during a memory
> > region transaction or the map can be invalid.
> > 
> > Signed-off-by: Chuang Xu <xuchuangxclwt@bytedance.com>
> > <xuchuangxclwt@bytedance.com>
> > ---
> >  include/exec/memory.h | 9 +++++++++
> >  softmmu/memory.c      | 1 -
> >  2 files changed, 9 insertions(+), 1 deletion(-)
> > 
> > diff --git a/include/exec/memory.h b/include/exec/memory.h
> > index 91f8a2395a..b43cd46084 100644
> > --- a/include/exec/memory.h
> > +++ b/include/exec/memory.h
> > @@ -1069,8 +1069,17 @@ struct FlatView {
> >      MemoryRegion *root;
> >  };
> > 
> > +static unsigned memory_region_transaction_depth;
> > +
> >  static inline FlatView *address_space_to_flatview(AddressSpace *as)
> >  {
> > +    /*
> > +     * Before using any flatview, sanity check we're not during a memory
> > +     * region transaction or the map can be invalid.  Note that this can
> > +     * also be called during commit phase of memory transaction, but that
> > +     * should also only happen when the depth decreases to 0 first.
> > +     */
> > +    assert(memory_region_transaction_depth == 0);
> >      return qatomic_rcu_read(&as->current_map);
> >  }
> > 
> > diff --git a/softmmu/memory.c b/softmmu/memory.c
> > index bc0be3f62c..f177c40cd8 100644
> > --- a/softmmu/memory.c
> > +++ b/softmmu/memory.c
> > @@ -37,7 +37,6 @@
> > 
> >  //#define DEBUG_UNASSIGNED
> > 
> > -static unsigned memory_region_transaction_depth;
> >  static bool memory_region_update_pending;
> >  static bool ioeventfd_update_pending;
> >  unsigned int global_dirty_tracking;
> > 
> > Here are some new situations to be synchronized.
> > 
> > I found that there is a probability to trigger assert in the QEMU startup phase.
> > 
> > Here is the coredump backtrace:
> > 
> > #0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
> > #1  0x00007f7825e33535 in __GI_abort () at abort.c:79
> > #2  0x00007f7825e3340f in __assert_fail_base (fmt=0x7f7825f94ef0
> > "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x5653c643add8
> > "memory_region_transaction_depth == 0",
> >     file=0x5653c63dad78
> > "/data00/migration/qemu-open/include/exec/memory.h", line=1082,
> > function=<optimized out>) at assert.c:92
> > #3  0x00007f7825e411a2 in __GI___assert_fail
> > (assertion=assertion@entry=0x5653c643add8
> > "memory_region_transaction_depth == 0",
> >     file=file@entry=0x5653c63dad78
> > "/data00/migration/qemu-open/include/exec/memory.h",
> > line=line@entry=1082,
> >     function=function@entry=0x5653c643bd00 <__PRETTY_FUNCTION__.18101>
> > "address_space_to_flatview") at assert.c:101
> > #4  0x00005653c60f0383 in address_space_to_flatview (as=0x5653c6af2340
> > <address_space_memory>) at
> > /data00/migration/qemu-open/include/exec/memory.h:1082
> > #5  address_space_to_flatview (as=0x5653c6af2340
> > <address_space_memory>) at
> > /data00/migration/qemu-open/include/exec/memory.h:1074
> > #6  address_space_get_flatview (as=0x5653c6af2340
> > <address_space_memory>) at ../softmmu/memory.c:809
> > #7  0x00005653c60fef04 in address_space_cache_init
> > (cache=cache@entry=0x7f781fff8420, as=<optimized out>,
> > addr=63310635776, len=48, is_write=is_write@entry=false)
> >     at ../softmmu/physmem.c:3352
> > #8  0x00005653c60c08c5 in virtqueue_split_pop (vq=0x7f781c576270,
> > sz=264) at ../hw/virtio/virtio.c:1959
> > #9  0x00005653c60c0b7d in virtqueue_pop (vq=vq@entry=0x7f781c576270,
> > sz=<optimized out>) at ../hw/virtio/virtio.c:2177
> > #10 0x00005653c609f14f in virtio_scsi_pop_req
> > (s=s@entry=0x5653c9034300, vq=vq@entry=0x7f781c576270) at
> > ../hw/scsi/virtio-scsi.c:219
> > #11 0x00005653c60a04a3 in virtio_scsi_handle_cmd_vq
> > (vq=0x7f781c576270, s=0x5653c9034300) at ../hw/scsi/virtio-scsi.c:735
> > #12 virtio_scsi_handle_cmd (vdev=0x5653c9034300, vq=0x7f781c576270) at
> > ../hw/scsi/virtio-scsi.c:776
> > #13 0x00005653c60ba72f in virtio_queue_notify_vq (vq=0x7f781c576270)
> > at ../hw/virtio/virtio.c:2847
> > #14 0x00005653c62d9706 in aio_dispatch_handler
> > (ctx=ctx@entry=0x5653c84909e0, node=0x7f68e4007840) at
> > ../util/aio-posix.c:369
> > #15 0x00005653c62da254 in aio_dispatch_ready_handlers
> > (ready_list=0x7f781fffe6a8, ctx=0x5653c84909e0) at
> > ../util/aio-posix.c:399
> > #16 aio_poll (ctx=0x5653c84909e0, blocking=blocking@entry=true) at
> > ../util/aio-posix.c:713
> > #17 0x00005653c61b2296 in iothread_run
> > (opaque=opaque@entry=0x5653c822c390) at ../iothread.c:67
> > #18 0x00005653c62dcd8a in qemu_thread_start (args=<optimized out>) at
> > ../util/qemu-thread-posix.c:505
> > #19 0x00007f7825fd8fa3 in start_thread (arg=<optimized out>) at
> > pthread_create.c:486
> > #20 0x00007f7825f0a06f in clone () at
> > ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
> 
> This does look like a bug to me.
> 
> Paolo/Michael?

Hmm, I found that virtqueue_split_pop() took the rcu lock.. then I think
it's fine.

Chuang, I think what you can try next is add a helper to detect holding of
rcu lock, then assert with "depth==0 || rcu_read_locked()".  I think that's:

static inline bool rcu_read_locked(void)
{
    struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader();
    
    return p_rcu_reader->depth > 0;
}

Then IIUC you can even drop patch 2 because virtio_load() also takes the
rcu lock.
Peter Maydell Dec. 15, 2022, 4:51 p.m. UTC | #4
On Tue, 13 Dec 2022 at 13:36, Chuang Xu <xuchuangxclwt@bytedance.com> wrote:
>
> Before using any flatview, sanity check we're not during a memory
> region transaction or the map can be invalid.
>
> Signed-off-by: Chuang Xu <xuchuangxclwt@bytedance.com>
> ---
>  include/exec/memory.h | 9 +++++++++
>  softmmu/memory.c      | 1 -
>  2 files changed, 9 insertions(+), 1 deletion(-)
>
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 91f8a2395a..b43cd46084 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -1069,8 +1069,17 @@ struct FlatView {
>      MemoryRegion *root;
>  };
>
> +static unsigned memory_region_transaction_depth;

This looks odd. If you define a static variable in a
header file then each .c file which directly or indirectly
includes the header will get its own private copy of the
variable. This probably isn't what you want...

thanks
-- PMM
Chuang Xu Dec. 20, 2022, 2:27 p.m. UTC | #5
On 2022/12/16 上午12:04, Peter Xu wrote:

On Wed, Dec 14, 2022 at 04:38:52PM -0500, Peter Xu wrote:

On Wed, Dec 14, 2022 at 08:03:38AM -0800, Chuang Xu wrote:

On 2022/12/13 下午9:35, Chuang Xu wrote:

Before using any flatview, sanity check we're not during a memory
region transaction or the map can be invalid.

Signed-off-by: Chuang Xu <xuchuangxclwt@bytedance.com>
<xuchuangxclwt@bytedance.com><xuchuangxclwt@bytedance.com>
<xuchuangxclwt@bytedance.com>
---
 include/exec/memory.h | 9 +++++++++
 softmmu/memory.c      | 1 -
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 91f8a2395a..b43cd46084 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -1069,8 +1069,17 @@ struct FlatView {
     MemoryRegion *root;
 };

+static unsigned memory_region_transaction_depth;
+
 static inline FlatView *address_space_to_flatview(AddressSpace *as)
 {
+    /*
+     * Before using any flatview, sanity check we're not during a memory
+     * region transaction or the map can be invalid.  Note that this can
+     * also be called during commit phase of memory transaction, but that
+     * should also only happen when the depth decreases to 0 first.
+     */
+    assert(memory_region_transaction_depth == 0);
     return qatomic_rcu_read(&as->current_map);
 }

diff --git a/softmmu/memory.c b/softmmu/memory.c
index bc0be3f62c..f177c40cd8 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -37,7 +37,6 @@

 //#define DEBUG_UNASSIGNED

-static unsigned memory_region_transaction_depth;
 static bool memory_region_update_pending;
 static bool ioeventfd_update_pending;
 unsigned int global_dirty_tracking;

Here are some new situations to be synchronized.

I found that there is a probability to trigger assert in the QEMU startup phase.

Here is the coredump backtrace:

#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
#1  0x00007f7825e33535 in __GI_abort () at abort.c:79
#2  0x00007f7825e3340f in __assert_fail_base (fmt=0x7f7825f94ef0
"%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x5653c643add8
"memory_region_transaction_depth == 0",
    file=0x5653c63dad78
"/data00/migration/qemu-open/include/exec/memory.h", line=1082,
function=<optimized out>) at assert.c:92
#3  0x00007f7825e411a2 in __GI___assert_fail
(assertion=assertion@entry=0x5653c643add8
"memory_region_transaction_depth == 0",
    file=file@entry=0x5653c63dad78
"/data00/migration/qemu-open/include/exec/memory.h",
line=line@entry=1082,
    function=function@entry=0x5653c643bd00 <__PRETTY_FUNCTION__.18101>
"address_space_to_flatview") at assert.c:101
#4  0x00005653c60f0383 in address_space_to_flatview (as=0x5653c6af2340
<address_space_memory>) at
/data00/migration/qemu-open/include/exec/memory.h:1082
#5  address_space_to_flatview (as=0x5653c6af2340
<address_space_memory>) at
/data00/migration/qemu-open/include/exec/memory.h:1074
#6  address_space_get_flatview (as=0x5653c6af2340
<address_space_memory>) at ../softmmu/memory.c:809
#7  0x00005653c60fef04 in address_space_cache_init
(cache=cache@entry=0x7f781fff8420, as=<optimized out>,
addr=63310635776, len=48, is_write=is_write@entry=false)
    at ../softmmu/physmem.c:3352
#8  0x00005653c60c08c5 in virtqueue_split_pop (vq=0x7f781c576270,
sz=264) at ../hw/virtio/virtio.c:1959
#9  0x00005653c60c0b7d in virtqueue_pop (vq=vq@entry=0x7f781c576270,
sz=<optimized out>) at ../hw/virtio/virtio.c:2177
#10 0x00005653c609f14f in virtio_scsi_pop_req
(s=s@entry=0x5653c9034300, vq=vq@entry=0x7f781c576270) at
../hw/scsi/virtio-scsi.c:219
#11 0x00005653c60a04a3 in virtio_scsi_handle_cmd_vq
(vq=0x7f781c576270, s=0x5653c9034300) at ../hw/scsi/virtio-scsi.c:735
#12 virtio_scsi_handle_cmd (vdev=0x5653c9034300, vq=0x7f781c576270) at
../hw/scsi/virtio-scsi.c:776
#13 0x00005653c60ba72f in virtio_queue_notify_vq (vq=0x7f781c576270)
at ../hw/virtio/virtio.c:2847
#14 0x00005653c62d9706 in aio_dispatch_handler
(ctx=ctx@entry=0x5653c84909e0, node=0x7f68e4007840) at
../util/aio-posix.c:369
#15 0x00005653c62da254 in aio_dispatch_ready_handlers
(ready_list=0x7f781fffe6a8, ctx=0x5653c84909e0) at
../util/aio-posix.c:399
#16 aio_poll (ctx=0x5653c84909e0, blocking=blocking@entry=true) at
../util/aio-posix.c:713
#17 0x00005653c61b2296 in iothread_run
(opaque=opaque@entry=0x5653c822c390) at ../iothread.c:67
#18 0x00005653c62dcd8a in qemu_thread_start (args=<optimized out>) at
../util/qemu-thread-posix.c:505
#19 0x00007f7825fd8fa3 in start_thread (arg=<optimized out>) at
pthread_create.c:486
#20 0x00007f7825f0a06f in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:95

This does look like a bug to me.

Paolo/Michael?

Hmm, I found that virtqueue_split_pop() took the rcu lock.. then I think
it's fine.

Chuang, I think what you can try next is add a helper to detect holding of
rcu lock, then assert with "depth==0 || rcu_read_locked()".  I think that's:

static inline bool rcu_read_locked(void)
{
    struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader();

    return p_rcu_reader->depth > 0;
}

Then IIUC you can even drop patch 2 because virtio_load() also takes the
rcu lock.


Good idea! Later I'll try this way in my v4 patches.

Thanks very much!
Chuang Xu Dec. 20, 2022, 2:28 p.m. UTC | #6
On 2022/12/16 上午12:51, Peter Maydell wrote:

On Tue, 13 Dec 2022 at 13:36, Chuang Xu <xuchuangxclwt@bytedance.com>
<xuchuangxclwt@bytedance.com> wrote:

Before using any flatview, sanity check we're not during a memory
region transaction or the map can be invalid.

Signed-off-by: Chuang Xu <xuchuangxclwt@bytedance.com>
<xuchuangxclwt@bytedance.com>
---
 include/exec/memory.h | 9 +++++++++
 softmmu/memory.c      | 1 -
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 91f8a2395a..b43cd46084 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -1069,8 +1069,17 @@ struct FlatView {
     MemoryRegion *root;
 };

+static unsigned memory_region_transaction_depth;

This looks odd. If you define a static variable in a
header file then each .c file which directly or indirectly
includes the header will get its own private copy of the
variable. This probably isn't what you want...

thanks
-- PMM

Yes, Maybe we should add a function to acquire the value..

I'll add this part to v4. Thanks!
Peter Xu Dec. 20, 2022, 3:02 p.m. UTC | #7
On Tue, Dec 20, 2022 at 06:28:40AM -0800, Chuang Xu wrote:
> Yes, Maybe we should add a function to acquire the value..

Or making it non-static would work too.
diff mbox series

Patch

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 91f8a2395a..b43cd46084 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -1069,8 +1069,17 @@  struct FlatView {
     MemoryRegion *root;
 };
 
+static unsigned memory_region_transaction_depth;
+
 static inline FlatView *address_space_to_flatview(AddressSpace *as)
 {
+    /*
+     * Before using any flatview, sanity check we're not during a memory
+     * region transaction or the map can be invalid.  Note that this can
+     * also be called during commit phase of memory transaction, but that
+     * should also only happen when the depth decreases to 0 first.
+     */
+    assert(memory_region_transaction_depth == 0);
     return qatomic_rcu_read(&as->current_map);
 }
 
diff --git a/softmmu/memory.c b/softmmu/memory.c
index bc0be3f62c..f177c40cd8 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -37,7 +37,6 @@ 
 
 //#define DEBUG_UNASSIGNED
 
-static unsigned memory_region_transaction_depth;
 static bool memory_region_update_pending;
 static bool ioeventfd_update_pending;
 unsigned int global_dirty_tracking;