diff mbox series

[v4,2/4] block/nbd.c: Add yank feature

Message ID 8e6a8e61b25813cdcdf385729709ef57f6255a3e.1590421341.git.lukasstraub2@web.de
State New
Headers show
Series Introduce 'yank' oob qmp command to recover from hanging qemu | expand

Commit Message

Lukas Straub May 25, 2020, 3:44 p.m. UTC
Register a yank function which shuts down the socket and sets
s->state = NBD_CLIENT_QUIT. This is the same behaviour as if an
error occured.

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
---
 Makefile.objs |   1 +
 block/nbd.c   | 101 ++++++++++++++++++++++++++++++++------------------
 2 files changed, 65 insertions(+), 37 deletions(-)

--
2.20.1

Comments

Daniel P. Berrangé June 16, 2020, 2:40 p.m. UTC | #1
On Mon, May 25, 2020 at 05:44:26PM +0200, Lukas Straub wrote:
> Register a yank function which shuts down the socket and sets
> s->state = NBD_CLIENT_QUIT. This is the same behaviour as if an
> error occured.
> 
> Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> ---
>  Makefile.objs |   1 +
>  block/nbd.c   | 101 ++++++++++++++++++++++++++++++++------------------
>  2 files changed, 65 insertions(+), 37 deletions(-)

Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>



Regards,
Daniel
Daniel P. Berrangé June 16, 2020, 2:44 p.m. UTC | #2
On Mon, May 25, 2020 at 05:44:26PM +0200, Lukas Straub wrote:
> Register a yank function which shuts down the socket and sets
> s->state = NBD_CLIENT_QUIT. This is the same behaviour as if an
> error occured.
> 
> Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> ---
>  Makefile.objs |   1 +
>  block/nbd.c   | 101 ++++++++++++++++++++++++++++++++------------------
>  2 files changed, 65 insertions(+), 37 deletions(-)
> 
> diff --git a/Makefile.objs b/Makefile.objs
> index a7c967633a..8e403b81f3 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -18,6 +18,7 @@ block-obj-y += block.o blockjob.o job.o
>  block-obj-y += block/ scsi/
>  block-obj-y += qemu-io-cmds.o
>  block-obj-$(CONFIG_REPLICATION) += replication.o
> +block-obj-y += yank.o

Oh, I see this is repeated for migration and chardev code too.

Instead of doing this and relying on linker to merge duplicates,
I think we should put yank.c into util/ and built it into util-obj-y,
so it gets added to everything.

Regards,
Daniel
Stefan Hajnoczi June 17, 2020, 3:09 p.m. UTC | #3
On Mon, May 25, 2020 at 05:44:26PM +0200, Lukas Straub wrote:
> @@ -1395,6 +1407,15 @@ static int nbd_client_reopen_prepare(BDRVReopenState *state,
>      return 0;
>  }
> 
> +static void nbd_yank(void *opaque)
> +{
> +    BlockDriverState *bs = opaque;
> +    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
> +
> +    qio_channel_shutdown(QIO_CHANNEL(s->sioc), QIO_CHANNEL_SHUTDOWN_BOTH, NULL);

qio_channel_shutdown() is not guaranteed to be thread-safe. Please
document new assumptions that are being introduced.

Today we can more or less get away with it (although TLS sockets are a
little iffy) because it boils down the a shutdown(2) system call. I
think it would be okay to update the qio_channel_shutdown() and
.io_shutdown() documentation to clarify that this is thread-safe.

> +    atomic_set(&s->state, NBD_CLIENT_QUIT);

docs/devel/atomics.rst says:

  No barriers are implied by ``atomic_read`` and ``atomic_set`` in either Linux
  or QEMU.

Other threads might not see the latest value of s->state because this is
a weakly ordered memory access.

I haven't audited the NBD code in detail, but if you want the other
threads to always see NBD_CLIENT_QUIT then s->state should be set before
calling qio_channel_shutdown() using a stronger atomics API like
atomic_load_acquire()/atomic_store_release().
Lukas Straub June 19, 2020, 4:23 p.m. UTC | #4
On Tue, 16 Jun 2020 15:44:06 +0100
Daniel P. Berrangé <berrange@redhat.com> wrote:

> On Mon, May 25, 2020 at 05:44:26PM +0200, Lukas Straub wrote:
> > Register a yank function which shuts down the socket and sets
> > s->state = NBD_CLIENT_QUIT. This is the same behaviour as if an
> > error occured.
> > 
> > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > ---
> >  Makefile.objs |   1 +
> >  block/nbd.c   | 101 ++++++++++++++++++++++++++++++++------------------
> >  2 files changed, 65 insertions(+), 37 deletions(-)
> > 
> > diff --git a/Makefile.objs b/Makefile.objs
> > index a7c967633a..8e403b81f3 100644
> > --- a/Makefile.objs
> > +++ b/Makefile.objs
> > @@ -18,6 +18,7 @@ block-obj-y += block.o blockjob.o job.o
> >  block-obj-y += block/ scsi/
> >  block-obj-y += qemu-io-cmds.o
> >  block-obj-$(CONFIG_REPLICATION) += replication.o
> > +block-obj-y += yank.o  
> 
> Oh, I see this is repeated for migration and chardev code too.
> 
> Instead of doing this and relying on linker to merge duplicates,
> I think we should put yank.c into util/ and built it into util-obj-y,
> so it gets added to everything.

Ok, I will do this in the next version.

Thanks,
Lukas Straub

> Regards,
> Daniel
Lukas Straub June 19, 2020, 6:07 p.m. UTC | #5
On Wed, 17 Jun 2020 16:09:09 +0100
Stefan Hajnoczi <stefanha@gmail.com> wrote:

> On Mon, May 25, 2020 at 05:44:26PM +0200, Lukas Straub wrote:
> > @@ -1395,6 +1407,15 @@ static int nbd_client_reopen_prepare(BDRVReopenState *state,
> >      return 0;
> >  }
> > 
> > +static void nbd_yank(void *opaque)
> > +{
> > +    BlockDriverState *bs = opaque;
> > +    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
> > +
> > +    qio_channel_shutdown(QIO_CHANNEL(s->sioc), QIO_CHANNEL_SHUTDOWN_BOTH, NULL);  
> 
> qio_channel_shutdown() is not guaranteed to be thread-safe. Please
> document new assumptions that are being introduced.
> 
> Today we can more or less get away with it (although TLS sockets are a
> little iffy) because it boils down the a shutdown(2) system call. I
> think it would be okay to update the qio_channel_shutdown() and
> .io_shutdown() documentation to clarify that this is thread-safe.

Good idea, especially since the migration code already assumes this. I will do this in the next version.

> > +    atomic_set(&s->state, NBD_CLIENT_QUIT);  
> 
> docs/devel/atomics.rst says:
> 
>   No barriers are implied by ``atomic_read`` and ``atomic_set`` in either Linux
>   or QEMU.
> 
> Other threads might not see the latest value of s->state because this is
> a weakly ordered memory access.
> 
> I haven't audited the NBD code in detail, but if you want the other
> threads to always see NBD_CLIENT_QUIT then s->state should be set before
> calling qio_channel_shutdown() using a stronger atomics API like
> atomic_load_acquire()/atomic_store_release().

You are right, I will change that in the next version.

Thanks,
Lukas Straub
diff mbox series

Patch

diff --git a/Makefile.objs b/Makefile.objs
index a7c967633a..8e403b81f3 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -18,6 +18,7 @@  block-obj-y += block.o blockjob.o job.o
 block-obj-y += block/ scsi/
 block-obj-y += qemu-io-cmds.o
 block-obj-$(CONFIG_REPLICATION) += replication.o
+block-obj-y += yank.o

 block-obj-m = block/

diff --git a/block/nbd.c b/block/nbd.c
index 2160859f64..3a41749f1b 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -35,6 +35,7 @@ 
 #include "qemu/option.h"
 #include "qemu/cutils.h"
 #include "qemu/main-loop.h"
+#include "qemu/atomic.h"

 #include "qapi/qapi-visit-sockets.h"
 #include "qapi/qmp/qstring.h"
@@ -43,6 +44,8 @@ 
 #include "block/nbd.h"
 #include "block/block_int.h"

+#include "yank.h"
+
 #define EN_OPTSTR ":exportname="
 #define MAX_NBD_REQUESTS    16

@@ -84,6 +87,8 @@  typedef struct BDRVNBDState {
     NBDReply reply;
     BlockDriverState *bs;

+    char *yank_name;
+
     /* Connection parameters */
     uint32_t reconnect_delay;
     SocketAddress *saddr;
@@ -94,6 +99,7 @@  typedef struct BDRVNBDState {
 } BDRVNBDState;

 static int nbd_client_connect(BlockDriverState *bs, Error **errp);
+static void nbd_yank(void *opaque);

 static void nbd_clear_bdrvstate(BDRVNBDState *s)
 {
@@ -106,17 +112,19 @@  static void nbd_clear_bdrvstate(BDRVNBDState *s)
     s->tlscredsid = NULL;
     g_free(s->x_dirty_bitmap);
     s->x_dirty_bitmap = NULL;
+    g_free(s->yank_name);
+    s->yank_name = NULL;
 }

 static void nbd_channel_error(BDRVNBDState *s, int ret)
 {
     if (ret == -EIO) {
-        if (s->state == NBD_CLIENT_CONNECTED) {
+        if (atomic_read(&s->state) == NBD_CLIENT_CONNECTED) {
             s->state = s->reconnect_delay ? NBD_CLIENT_CONNECTING_WAIT :
                                             NBD_CLIENT_CONNECTING_NOWAIT;
         }
     } else {
-        if (s->state == NBD_CLIENT_CONNECTED) {
+        if (atomic_read(&s->state) == NBD_CLIENT_CONNECTED) {
             qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
         }
         s->state = NBD_CLIENT_QUIT;
@@ -167,7 +175,7 @@  static void nbd_client_attach_aio_context(BlockDriverState *bs,
      * s->connection_co is either yielded from nbd_receive_reply or from
      * nbd_co_reconnect_loop()
      */
-    if (s->state == NBD_CLIENT_CONNECTED) {
+    if (atomic_read(&s->state) == NBD_CLIENT_CONNECTED) {
         qio_channel_attach_aio_context(QIO_CHANNEL(s->ioc), new_context);
     }

@@ -206,7 +214,7 @@  static void nbd_teardown_connection(BlockDriverState *bs)
 {
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;

-    if (s->state == NBD_CLIENT_CONNECTED) {
+    if (atomic_read(&s->state) == NBD_CLIENT_CONNECTED) {
         /* finish any pending coroutines */
         assert(s->ioc);
         qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
@@ -230,13 +238,14 @@  static void nbd_teardown_connection(BlockDriverState *bs)

 static bool nbd_client_connecting(BDRVNBDState *s)
 {
-    return s->state == NBD_CLIENT_CONNECTING_WAIT ||
-        s->state == NBD_CLIENT_CONNECTING_NOWAIT;
+    NBDClientState state = atomic_read(&s->state);
+    return state == NBD_CLIENT_CONNECTING_WAIT ||
+        state == NBD_CLIENT_CONNECTING_NOWAIT;
 }

 static bool nbd_client_connecting_wait(BDRVNBDState *s)
 {
-    return s->state == NBD_CLIENT_CONNECTING_WAIT;
+    return atomic_read(&s->state) == NBD_CLIENT_CONNECTING_WAIT;
 }

 static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s)
@@ -274,6 +283,7 @@  static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s)
     /* Finalize previous connection if any */
     if (s->ioc) {
         nbd_client_detach_aio_context(s->bs);
+        yank_unregister_function(s->yank_name, nbd_yank, s->bs);
         object_unref(OBJECT(s->sioc));
         s->sioc = NULL;
         object_unref(OBJECT(s->ioc));
@@ -305,7 +315,7 @@  static coroutine_fn void nbd_co_reconnect_loop(BDRVNBDState *s)
     nbd_reconnect_attempt(s);

     while (nbd_client_connecting(s)) {
-        if (s->state == NBD_CLIENT_CONNECTING_WAIT &&
+        if (atomic_read(&s->state) == NBD_CLIENT_CONNECTING_WAIT &&
             qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time_ns > delay_ns)
         {
             s->state = NBD_CLIENT_CONNECTING_NOWAIT;
@@ -341,7 +351,7 @@  static coroutine_fn void nbd_connection_entry(void *opaque)
     int ret = 0;
     Error *local_err = NULL;

-    while (s->state != NBD_CLIENT_QUIT) {
+    while (atomic_read(&s->state) != NBD_CLIENT_QUIT) {
         /*
          * The NBD client can only really be considered idle when it has
          * yielded from qio_channel_readv_all_eof(), waiting for data. This is
@@ -356,7 +366,7 @@  static coroutine_fn void nbd_connection_entry(void *opaque)
             nbd_co_reconnect_loop(s);
         }

-        if (s->state != NBD_CLIENT_CONNECTED) {
+        if (atomic_read(&s->state) != NBD_CLIENT_CONNECTED) {
             continue;
         }

@@ -411,6 +421,7 @@  static coroutine_fn void nbd_connection_entry(void *opaque)
     s->connection_co = NULL;
     if (s->ioc) {
         nbd_client_detach_aio_context(s->bs);
+        yank_unregister_function(s->yank_name, nbd_yank, s->bs);
         object_unref(OBJECT(s->sioc));
         s->sioc = NULL;
         object_unref(OBJECT(s->ioc));
@@ -435,7 +446,7 @@  static int nbd_co_send_request(BlockDriverState *bs,
         qemu_co_queue_wait(&s->free_sema, &s->send_mutex);
     }

-    if (s->state != NBD_CLIENT_CONNECTED) {
+    if (atomic_read(&s->state) != NBD_CLIENT_CONNECTED) {
         rc = -EIO;
         goto err;
     }
@@ -462,7 +473,7 @@  static int nbd_co_send_request(BlockDriverState *bs,
     if (qiov) {
         qio_channel_set_cork(s->ioc, true);
         rc = nbd_send_request(s->ioc, request);
-        if (rc >= 0 && s->state == NBD_CLIENT_CONNECTED) {
+        if (rc >= 0 && atomic_read(&s->state) == NBD_CLIENT_CONNECTED) {
             if (qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov,
                                        NULL) < 0) {
                 rc = -EIO;
@@ -777,7 +788,7 @@  static coroutine_fn int nbd_co_do_receive_one_chunk(
     s->requests[i].receiving = true;
     qemu_coroutine_yield();
     s->requests[i].receiving = false;
-    if (s->state != NBD_CLIENT_CONNECTED) {
+    if (atomic_read(&s->state) != NBD_CLIENT_CONNECTED) {
         error_setg(errp, "Connection closed");
         return -EIO;
     }
@@ -936,7 +947,7 @@  static bool nbd_reply_chunk_iter_receive(BDRVNBDState *s,
     NBDReply local_reply;
     NBDStructuredReplyChunk *chunk;
     Error *local_err = NULL;
-    if (s->state != NBD_CLIENT_CONNECTED) {
+    if (atomic_read(&s->state) != NBD_CLIENT_CONNECTED) {
         error_setg(&local_err, "Connection closed");
         nbd_iter_channel_error(iter, -EIO, &local_err);
         goto break_loop;
@@ -961,7 +972,8 @@  static bool nbd_reply_chunk_iter_receive(BDRVNBDState *s,
     }

     /* Do not execute the body of NBD_FOREACH_REPLY_CHUNK for simple reply. */
-    if (nbd_reply_is_simple(reply) || s->state != NBD_CLIENT_CONNECTED) {
+    if (nbd_reply_is_simple(reply) ||
+        atomic_read(&s->state) != NBD_CLIENT_CONNECTED) {
         goto break_loop;
     }

@@ -1395,6 +1407,15 @@  static int nbd_client_reopen_prepare(BDRVReopenState *state,
     return 0;
 }

+static void nbd_yank(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
+
+    qio_channel_shutdown(QIO_CHANNEL(s->sioc), QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+    atomic_set(&s->state, NBD_CLIENT_QUIT);
+}
+
 static void nbd_client_close(BlockDriverState *bs)
 {
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
@@ -1407,25 +1428,29 @@  static void nbd_client_close(BlockDriverState *bs)
     nbd_teardown_connection(bs);
 }

-static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
-                                                  Error **errp)
+static int nbd_establish_connection(BlockDriverState *bs,
+                                    SocketAddress *saddr,
+                                    Error **errp)
 {
-    QIOChannelSocket *sioc;
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     Error *local_err = NULL;

-    sioc = qio_channel_socket_new();
-    qio_channel_set_name(QIO_CHANNEL(sioc), "nbd-client");
+    s->sioc = qio_channel_socket_new();
+    qio_channel_set_name(QIO_CHANNEL(s->sioc), "nbd-client");
+    yank_register_function(s->yank_name, nbd_yank, bs);

-    qio_channel_socket_connect_sync(sioc, saddr, &local_err);
+    qio_channel_socket_connect_sync(s->sioc, saddr, &local_err);
     if (local_err) {
-        object_unref(OBJECT(sioc));
+        yank_unregister_function(s->yank_name, nbd_yank, bs);
+        object_unref(OBJECT(s->sioc));
+        s->sioc = NULL;
         error_propagate(errp, local_err);
-        return NULL;
+        return -1;
     }

-    qio_channel_set_delay(QIO_CHANNEL(sioc), false);
+    qio_channel_set_delay(QIO_CHANNEL(s->sioc), false);

-    return sioc;
+    return 0;
 }

 static int nbd_client_connect(BlockDriverState *bs, Error **errp)
@@ -1438,28 +1463,27 @@  static int nbd_client_connect(BlockDriverState *bs, Error **errp)
      * establish TCP connection, return error if it fails
      * TODO: Configurable retry-until-timeout behaviour.
      */
-    QIOChannelSocket *sioc = nbd_establish_connection(s->saddr, errp);
-
-    if (!sioc) {
+    if (nbd_establish_connection(bs, s->saddr, errp) < 0) {
         return -ECONNREFUSED;
     }

     /* NBD handshake */
     trace_nbd_client_connect(s->export);
-    qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
-    qio_channel_attach_aio_context(QIO_CHANNEL(sioc), aio_context);
+    qio_channel_set_blocking(QIO_CHANNEL(s->sioc), false, NULL);
+    qio_channel_attach_aio_context(QIO_CHANNEL(s->sioc), aio_context);

     s->info.request_sizes = true;
     s->info.structured_reply = true;
     s->info.base_allocation = true;
     s->info.x_dirty_bitmap = g_strdup(s->x_dirty_bitmap);
     s->info.name = g_strdup(s->export ?: "");
-    ret = nbd_receive_negotiate(aio_context, QIO_CHANNEL(sioc), s->tlscreds,
+    ret = nbd_receive_negotiate(aio_context, QIO_CHANNEL(s->sioc), s->tlscreds,
                                 s->hostname, &s->ioc, &s->info, errp);
     g_free(s->info.x_dirty_bitmap);
     g_free(s->info.name);
     if (ret < 0) {
-        object_unref(OBJECT(sioc));
+        yank_unregister_function(s->yank_name, nbd_yank, bs);
+        object_unref(OBJECT(s->sioc));
         return ret;
     }
     if (s->x_dirty_bitmap && !s->info.base_allocation) {
@@ -1485,10 +1509,8 @@  static int nbd_client_connect(BlockDriverState *bs, Error **errp)
         }
     }

-    s->sioc = sioc;
-
     if (!s->ioc) {
-        s->ioc = QIO_CHANNEL(sioc);
+        s->ioc = QIO_CHANNEL(s->sioc);
         object_ref(OBJECT(s->ioc));
     }

@@ -1504,9 +1526,10 @@  static int nbd_client_connect(BlockDriverState *bs, Error **errp)
     {
         NBDRequest request = { .type = NBD_CMD_DISC };

-        nbd_send_request(s->ioc ?: QIO_CHANNEL(sioc), &request);
+        nbd_send_request(s->ioc ?: QIO_CHANNEL(s->sioc), &request);

-        object_unref(OBJECT(sioc));
+        yank_unregister_function(s->yank_name, nbd_yank, bs);
+        object_unref(OBJECT(s->sioc));

         return ret;
     }
@@ -1913,6 +1936,9 @@  static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
     qemu_co_mutex_init(&s->send_mutex);
     qemu_co_queue_init(&s->free_sema);

+    s->yank_name = g_strconcat("blockdev:", bs->node_name, NULL);
+    yank_register_instance(s->yank_name);
+
     ret = nbd_client_connect(bs, errp);
     if (ret < 0) {
         nbd_clear_bdrvstate(s);
@@ -1972,6 +1998,7 @@  static void nbd_close(BlockDriverState *bs)
     BDRVNBDState *s = bs->opaque;

     nbd_client_close(bs);
+    yank_unregister_instance(s->yank_name);
     nbd_clear_bdrvstate(s);
 }