diff mbox

[v5,11/12] qed: Implement .bdrv_drain

Message ID 1445393209-26545-12-git-send-email-famz@redhat.com
State New
Headers show

Commit Message

Fam Zheng Oct. 21, 2015, 2:06 a.m. UTC
The "need_check_timer" is used to clear the "NEED_CHECK" flag in the
image header after a grace period once metadata update has finished. In
compliance to the bdrv_drain semantics we should make sure it remains
deleted once .bdrv_drain is called.

Call the qed_need_check_timer_cb manually to update the header
immediately.

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block/qed.c | 7 +++++++
 1 file changed, 7 insertions(+)

Comments

Jeff Cody Oct. 22, 2015, 2:20 a.m. UTC | #1
On Wed, Oct 21, 2015 at 10:06:48AM +0800, Fam Zheng wrote:
> The "need_check_timer" is used to clear the "NEED_CHECK" flag in the
> image header after a grace period once metadata update has finished. In
> compliance to the bdrv_drain semantics we should make sure it remains
> deleted once .bdrv_drain is called.
> 
> Call the qed_need_check_timer_cb manually to update the header
> immediately.
> 
> Signed-off-by: Fam Zheng <famz@redhat.com>
> ---
>  block/qed.c | 7 +++++++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/block/qed.c b/block/qed.c
> index 5ea05d4..e9dcb4d 100644
> --- a/block/qed.c
> +++ b/block/qed.c
> @@ -375,6 +375,12 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
>      }
>  }
>  
> +static void bdrv_qed_drain(BlockDriverState *bs)
> +{
> +    qed_cancel_need_check_timer(bs->opaque);
> +    qed_need_check_timer_cb(bs->opaque);
> +}
> +

Uh oh.

This causes a segfault sometimes, and other times an abort:


   # ./qemu-img create -f qed test.qed 512M
   Formatting 'test.qed', fmt=qed size=536870912 cluster_size=65536

   # ./qemu-io -c "read 0 512M" test.qed
   read 536870912/536870912 bytes at offset 0
   512 MiB, 1 ops; 0.0556 sec (8.988 GiB/sec and 17.9759 ops/sec)
   Segmentation fault (core dumped)


If I run the above qemu-io command with gdb, it will abort in
qed_plug_allocating_write_reqs().

I'd hazard a guess (I have not verified) that it is due to the
qed_header_write() call triggered by the aio flush callback function
qed_clear_need_check().  The aio flush is done inside the
qed_need_check_timer_cb() call.



>  static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
>                           Error **errp)
>  {
> @@ -1676,6 +1682,7 @@ static BlockDriver bdrv_qed = {
>      .bdrv_check               = bdrv_qed_check,
>      .bdrv_detach_aio_context  = bdrv_qed_detach_aio_context,
>      .bdrv_attach_aio_context  = bdrv_qed_attach_aio_context,
> +    .bdrv_drain               = bdrv_qed_drain,
>  };
>  
>  static void bdrv_qed_init(void)
> -- 
> 2.4.3
> 
>
Fam Zheng Oct. 22, 2015, 2:59 a.m. UTC | #2
On Wed, 10/21 22:20, Jeff Cody wrote:
> On Wed, Oct 21, 2015 at 10:06:48AM +0800, Fam Zheng wrote:
> > The "need_check_timer" is used to clear the "NEED_CHECK" flag in the
> > image header after a grace period once metadata update has finished. In
> > compliance to the bdrv_drain semantics we should make sure it remains
> > deleted once .bdrv_drain is called.
> > 
> > Call the qed_need_check_timer_cb manually to update the header
> > immediately.
> > 
> > Signed-off-by: Fam Zheng <famz@redhat.com>
> > ---
> >  block/qed.c | 7 +++++++
> >  1 file changed, 7 insertions(+)
> > 
> > diff --git a/block/qed.c b/block/qed.c
> > index 5ea05d4..e9dcb4d 100644
> > --- a/block/qed.c
> > +++ b/block/qed.c
> > @@ -375,6 +375,12 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
> >      }
> >  }
> >  
> > +static void bdrv_qed_drain(BlockDriverState *bs)
> > +{
> > +    qed_cancel_need_check_timer(bs->opaque);
> > +    qed_need_check_timer_cb(bs->opaque);
> > +}
> > +
> 
> Uh oh.
> 
> This causes a segfault sometimes, and other times an abort:
> 
> 
>    # ./qemu-img create -f qed test.qed 512M
>    Formatting 'test.qed', fmt=qed size=536870912 cluster_size=65536
> 
>    # ./qemu-io -c "read 0 512M" test.qed
>    read 536870912/536870912 bytes at offset 0
>    512 MiB, 1 ops; 0.0556 sec (8.988 GiB/sec and 17.9759 ops/sec)
>    Segmentation fault (core dumped)
> 
> 
> If I run the above qemu-io command with gdb, it will abort in
> qed_plug_allocating_write_reqs().
> 
> I'd hazard a guess (I have not verified) that it is due to the
> qed_header_write() call triggered by the aio flush callback function
> qed_clear_need_check().  The aio flush is done inside the
> qed_need_check_timer_cb() call.

Good catch, I think it's because of the second bdrv_drain in bdrv_close(),
when the first bdrv_aio_flush in qed_need_check_timer_cb hasn't finished.

We need a different bdrv_qed_drain implementation here.

Fam

> 
> 
> 
> >  static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
> >                           Error **errp)
> >  {
> > @@ -1676,6 +1682,7 @@ static BlockDriver bdrv_qed = {
> >      .bdrv_check               = bdrv_qed_check,
> >      .bdrv_detach_aio_context  = bdrv_qed_detach_aio_context,
> >      .bdrv_attach_aio_context  = bdrv_qed_attach_aio_context,
> > +    .bdrv_drain               = bdrv_qed_drain,
> >  };
> >  
> >  static void bdrv_qed_init(void)
> > -- 
> > 2.4.3
> > 
> >
diff mbox

Patch

diff --git a/block/qed.c b/block/qed.c
index 5ea05d4..e9dcb4d 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -375,6 +375,12 @@  static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
     }
 }
 
+static void bdrv_qed_drain(BlockDriverState *bs)
+{
+    qed_cancel_need_check_timer(bs->opaque);
+    qed_need_check_timer_cb(bs->opaque);
+}
+
 static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
                          Error **errp)
 {
@@ -1676,6 +1682,7 @@  static BlockDriver bdrv_qed = {
     .bdrv_check               = bdrv_qed_check,
     .bdrv_detach_aio_context  = bdrv_qed_detach_aio_context,
     .bdrv_attach_aio_context  = bdrv_qed_attach_aio_context,
+    .bdrv_drain               = bdrv_qed_drain,
 };
 
 static void bdrv_qed_init(void)