diff mbox series

[v4] nbd/server: do not poll within a coroutine context

Message ID 20240405174523.844181-2-eblake@redhat.com
State New
Headers show
Series [v4] nbd/server: do not poll within a coroutine context | expand

Commit Message

Eric Blake April 5, 2024, 5:44 p.m. UTC
From: Zhu Yangyang <zhuyangyang14@huawei.com>

Coroutines are not supposed to block. Instead, they should yield.

The client performs TLS upgrade outside of an AIOContext, during
synchronous handshake; this still requires g_main_loop.  But the
server responds to TLS upgrade inside a coroutine, so a nested
g_main_loop is wrong.  Since the two callbacks no longer share more
than the setting of data.complete and data.error, it's just as easy to
use static helpers instead of trying to share a common code path.

Fixes: f95910f ("nbd: implement TLS support in the protocol negotiation")
Signed-off-by: Zhu Yangyang <zhuyangyang14@huawei.com>
[eblake: move callbacks to their use point]
Signed-off-by: Eric Blake <eblake@redhat.com>
---

v3: https://lists.gnu.org/archive/html/qemu-devel/2024-04/msg00375.html

in v4, factor even the struct to the .c files, avoiding a union [Vladimir]

 nbd/nbd-internal.h | 10 ----------
 nbd/client.c       | 27 +++++++++++++++++++++++----
 nbd/common.c       | 11 -----------
 nbd/server.c       | 29 +++++++++++++++++++++++------
 4 files changed, 46 insertions(+), 31 deletions(-)

Comments

Vladimir Sementsov-Ogievskiy April 8, 2024, 8:46 a.m. UTC | #1
On 05.04.24 20:44, Eric Blake wrote:
> From: Zhu Yangyang <zhuyangyang14@huawei.com>
> 
> Coroutines are not supposed to block. Instead, they should yield.
> 
> The client performs TLS upgrade outside of an AIOContext, during
> synchronous handshake; this still requires g_main_loop.  But the
> server responds to TLS upgrade inside a coroutine, so a nested
> g_main_loop is wrong.  Since the two callbacks no longer share more
> than the setting of data.complete and data.error, it's just as easy to
> use static helpers instead of trying to share a common code path.
> 
> Fixes: f95910f ("nbd: implement TLS support in the protocol negotiation")
> Signed-off-by: Zhu Yangyang <zhuyangyang14@huawei.com>
> [eblake: move callbacks to their use point]
> Signed-off-by: Eric Blake <eblake@redhat.com>

Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>

still, some notes below

> ---
> 
> v3: https://lists.gnu.org/archive/html/qemu-devel/2024-04/msg00375.html
> 
> in v4, factor even the struct to the .c files, avoiding a union [Vladimir]
> 
>   nbd/nbd-internal.h | 10 ----------
>   nbd/client.c       | 27 +++++++++++++++++++++++----
>   nbd/common.c       | 11 -----------
>   nbd/server.c       | 29 +++++++++++++++++++++++------
>   4 files changed, 46 insertions(+), 31 deletions(-)
> 
> diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
> index dfa02f77ee4..91895106a95 100644
> --- a/nbd/nbd-internal.h
> +++ b/nbd/nbd-internal.h
> @@ -72,16 +72,6 @@ static inline int nbd_write(QIOChannel *ioc, const void *buffer, size_t size,
>       return qio_channel_write_all(ioc, buffer, size, errp) < 0 ? -EIO : 0;
>   }
> 
> -struct NBDTLSHandshakeData {
> -    GMainLoop *loop;
> -    bool complete;
> -    Error *error;
> -};
> -
> -
> -void nbd_tls_handshake(QIOTask *task,
> -                       void *opaque);
> -
>   int nbd_drop(QIOChannel *ioc, size_t size, Error **errp);
> 
>   #endif
> diff --git a/nbd/client.c b/nbd/client.c
> index 29ffc609a4b..c7141d7a098 100644
> --- a/nbd/client.c
> +++ b/nbd/client.c
> @@ -596,13 +596,31 @@ static int nbd_request_simple_option(QIOChannel *ioc, int opt, bool strict,
>       return 1;
>   }
> 
> +/* Callback to learn when QIO TLS upgrade is complete */
> +struct NBDTLSClientHandshakeData {
> +    bool complete;
> +    Error *error;
> +    GMainLoop *loop;
> +};
> +
> +static void nbd_client_tls_handshake(QIOTask *task, void *opaque)
> +{
> +    struct NBDTLSClientHandshakeData *data = opaque;
> +
> +    qio_task_propagate_error(task, &data->error);
> +    data->complete = true;
> +    if (data->loop) {
> +        g_main_loop_quit(data->loop);
> +    }
> +}
> +
>   static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
>                                           QCryptoTLSCreds *tlscreds,
>                                           const char *hostname, Error **errp)
>   {
>       int ret;
>       QIOChannelTLS *tioc;
> -    struct NBDTLSHandshakeData data = { 0 };
> +    struct NBDTLSClientHandshakeData data = { 0 };
> 
>       ret = nbd_request_simple_option(ioc, NBD_OPT_STARTTLS, true, errp);
>       if (ret <= 0) {
> @@ -619,18 +637,19 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
>           return NULL;
>       }
>       qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls");
> -    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
>       trace_nbd_receive_starttls_tls_handshake();
>       qio_channel_tls_handshake(tioc,
> -                              nbd_tls_handshake,
> +                              nbd_client_tls_handshake,
>                                 &data,
>                                 NULL,
>                                 NULL);
> 
>       if (!data.complete) {
> +        data.loop = g_main_loop_new(g_main_context_default(), FALSE);
>           g_main_loop_run(data.loop);
> +        g_main_loop_unref(data.loop);

probably good to assert(data.complete);

>       }
> -    g_main_loop_unref(data.loop);
> +
>       if (data.error) {
>           error_propagate(errp, data.error);
>           object_unref(OBJECT(tioc));
> diff --git a/nbd/common.c b/nbd/common.c
> index 3247c1d618a..589a748cfe6 100644
> --- a/nbd/common.c
> +++ b/nbd/common.c
> @@ -47,17 +47,6 @@ int nbd_drop(QIOChannel *ioc, size_t size, Error **errp)
>   }
> 
> 
> -void nbd_tls_handshake(QIOTask *task,
> -                       void *opaque)
> -{
> -    struct NBDTLSHandshakeData *data = opaque;
> -
> -    qio_task_propagate_error(task, &data->error);
> -    data->complete = true;
> -    g_main_loop_quit(data->loop);
> -}
> -
> -
>   const char *nbd_opt_lookup(uint32_t opt)
>   {
>       switch (opt) {
> diff --git a/nbd/server.c b/nbd/server.c
> index c3484cc1ebc..ea13cf0e766 100644
> --- a/nbd/server.c
> +++ b/nbd/server.c
> @@ -748,6 +748,23 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
>       return rc;
>   }
> 
> +/* Callback to learn when QIO TLS upgrade is complete */
> +struct NBDTLSServerHandshakeData {
> +    bool complete;
> +    Error *error;
> +    Coroutine *co;
> +};
> +
> +static void nbd_server_tls_handshake(QIOTask *task, void *opaque)
> +{
> +    struct NBDTLSServerHandshakeData *data = opaque;
> +
> +    qio_task_propagate_error(task, &data->error);
> +    data->complete = true;
> +    if (!qemu_coroutine_entered(data->co)) {
> +        aio_co_wake(data->co);
> +    }
> +}
> 
>   /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
>    * new channel for all further (now-encrypted) communication. */
> @@ -756,7 +773,7 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
>   {
>       QIOChannel *ioc;
>       QIOChannelTLS *tioc;
> -    struct NBDTLSHandshakeData data = { 0 };
> +    struct NBDTLSServerHandshakeData data = { 0 };
> 
>       assert(client->opt == NBD_OPT_STARTTLS);
> 
> @@ -777,17 +794,17 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,

preexisting: lack coroutine_fn, as well as caller nbd_negotiate_options()

> 
>       qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
>       trace_nbd_negotiate_handle_starttls_handshake();
> -    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
> +    data.co = qemu_coroutine_self();
>       qio_channel_tls_handshake(tioc,
> -                              nbd_tls_handshake,
> +                              nbd_server_tls_handshake,
>                                 &data,
>                                 NULL,
>                                 NULL);
> 
> -    if (!data.complete) {
> -        g_main_loop_run(data.loop);
> +    while (!data.complete) {

not "if", but "while".. Do we expect entering from somewhere except nbd_server_tls_handshake?

if not, probably safer construction would be

if (!data.complete) {
    qemu_coroutine_yield();
    assert(data.complete);
}

- to avoid hiding unexpected coroutine switching bugs

> +        qemu_coroutine_yield();
>       }
> -    g_main_loop_unref(data.loop);
> +
>       if (data.error) {
>           object_unref(OBJECT(tioc));
>           error_propagate(errp, data.error);
Eric Blake April 8, 2024, 2:12 p.m. UTC | #2
On Mon, Apr 08, 2024 at 11:46:39AM +0300, Vladimir Sementsov-Ogievskiy wrote:
> On 05.04.24 20:44, Eric Blake wrote:
> > From: Zhu Yangyang <zhuyangyang14@huawei.com>
> > 
> > Coroutines are not supposed to block. Instead, they should yield.
> > 
> > The client performs TLS upgrade outside of an AIOContext, during
> > synchronous handshake; this still requires g_main_loop.  But the
> > server responds to TLS upgrade inside a coroutine, so a nested
> > g_main_loop is wrong.  Since the two callbacks no longer share more
> > than the setting of data.complete and data.error, it's just as easy to
> > use static helpers instead of trying to share a common code path.
> > 
> > Fixes: f95910f ("nbd: implement TLS support in the protocol negotiation")
> > Signed-off-by: Zhu Yangyang <zhuyangyang14@huawei.com>
> > [eblake: move callbacks to their use point]
> > Signed-off-by: Eric Blake <eblake@redhat.com>
> 
> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>

I'm debating whether it is worth trying to shove this into 9.0; -rc3
is very late, and the problem is pre-existing, so I'm leaning towards
no.  At which point, it's better to get this right.

> 
> still, some notes below
> 
> > ---
> > 
> > v3: https://lists.gnu.org/archive/html/qemu-devel/2024-04/msg00375.html
> > 
> > in v4, factor even the struct to the .c files, avoiding a union [Vladimir]
> > 
> >   nbd/nbd-internal.h | 10 ----------
> >   nbd/client.c       | 27 +++++++++++++++++++++++----
> >   nbd/common.c       | 11 -----------
> >   nbd/server.c       | 29 +++++++++++++++++++++++------
> >   4 files changed, 46 insertions(+), 31 deletions(-)
> > 

> > +++ b/nbd/client.c
> > @@ -596,13 +596,31 @@ static int nbd_request_simple_option(QIOChannel *ioc, int opt, bool strict,
> >       return 1;
> >   }
> > 
> > +/* Callback to learn when QIO TLS upgrade is complete */
> > +struct NBDTLSClientHandshakeData {
> > +    bool complete;
> > +    Error *error;
> > +    GMainLoop *loop;
> > +};
> > +
> > +static void nbd_client_tls_handshake(QIOTask *task, void *opaque)
> > +{
> > +    struct NBDTLSClientHandshakeData *data = opaque;
> > +
> > +    qio_task_propagate_error(task, &data->error);
> > +    data->complete = true;
> > +    if (data->loop) {
> > +        g_main_loop_quit(data->loop);
> > +    }
> > +}
> > +
> >   static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
> >                                           QCryptoTLSCreds *tlscreds,
> >                                           const char *hostname, Error **errp)
> >   {
> >       int ret;
> >       QIOChannelTLS *tioc;
> > -    struct NBDTLSHandshakeData data = { 0 };
> > +    struct NBDTLSClientHandshakeData data = { 0 };
> > 
> >       ret = nbd_request_simple_option(ioc, NBD_OPT_STARTTLS, true, errp);
> >       if (ret <= 0) {
> > @@ -619,18 +637,19 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
> >           return NULL;
> >       }
> >       qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls");
> > -    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
> >       trace_nbd_receive_starttls_tls_handshake();
> >       qio_channel_tls_handshake(tioc,
> > -                              nbd_tls_handshake,
> > +                              nbd_client_tls_handshake,
> >                                 &data,
> >                                 NULL,
> >                                 NULL);
> > 
> >       if (!data.complete) {
> > +        data.loop = g_main_loop_new(g_main_context_default(), FALSE);
> >           g_main_loop_run(data.loop);
> > +        g_main_loop_unref(data.loop);
> 
> probably good to assert(data.complete);

Seems reasonable.

> > +++ b/nbd/server.c
> > @@ -748,6 +748,23 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
> >       return rc;
> >   }
> > 
> > +/* Callback to learn when QIO TLS upgrade is complete */
> > +struct NBDTLSServerHandshakeData {
> > +    bool complete;
> > +    Error *error;
> > +    Coroutine *co;
> > +};
> > +
> > +static void nbd_server_tls_handshake(QIOTask *task, void *opaque)
> > +{
> > +    struct NBDTLSServerHandshakeData *data = opaque;
> > +
> > +    qio_task_propagate_error(task, &data->error);
> > +    data->complete = true;
> > +    if (!qemu_coroutine_entered(data->co)) {
> > +        aio_co_wake(data->co);
> > +    }
> > +}
> > 
> >   /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
> >    * new channel for all further (now-encrypted) communication. */
> > @@ -756,7 +773,7 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
> >   {
> >       QIOChannel *ioc;
> >       QIOChannelTLS *tioc;
> > -    struct NBDTLSHandshakeData data = { 0 };
> > +    struct NBDTLSServerHandshakeData data = { 0 };
> > 
> >       assert(client->opt == NBD_OPT_STARTTLS);
> > 
> > @@ -777,17 +794,17 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
> 
> preexisting: lack coroutine_fn, as well as caller nbd_negotiate_options()

Indeed, so now would not hurt to add them now that a callback is no
longer shared between callers in different context.  But probably as a
separate patch.

> 
> > 
> >       qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
> >       trace_nbd_negotiate_handle_starttls_handshake();
> > -    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
> > +    data.co = qemu_coroutine_self();
> >       qio_channel_tls_handshake(tioc,
> > -                              nbd_tls_handshake,
> > +                              nbd_server_tls_handshake,
> >                                 &data,
> >                                 NULL,
> >                                 NULL);
> > 
> > -    if (!data.complete) {
> > -        g_main_loop_run(data.loop);
> > +    while (!data.complete) {
> 
> not "if", but "while".. Do we expect entering from somewhere except nbd_server_tls_handshake?
> 
> if not, probably safer construction would be
> 
> if (!data.complete) {
>    qemu_coroutine_yield();
>    assert(data.complete);
> }
> 
> - to avoid hiding unexpected coroutine switching bugs

TLS handshake is early enough in the NBD connection that there should
not be any parallel I/O yet (we can't switch to parallel outstanding
transactions until after successful NBD_OPT_GO), so I like your idea
of asserting that the coroutine is entered at most once.

v5 coming up
diff mbox series

Patch

diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index dfa02f77ee4..91895106a95 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -72,16 +72,6 @@  static inline int nbd_write(QIOChannel *ioc, const void *buffer, size_t size,
     return qio_channel_write_all(ioc, buffer, size, errp) < 0 ? -EIO : 0;
 }

-struct NBDTLSHandshakeData {
-    GMainLoop *loop;
-    bool complete;
-    Error *error;
-};
-
-
-void nbd_tls_handshake(QIOTask *task,
-                       void *opaque);
-
 int nbd_drop(QIOChannel *ioc, size_t size, Error **errp);

 #endif
diff --git a/nbd/client.c b/nbd/client.c
index 29ffc609a4b..c7141d7a098 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -596,13 +596,31 @@  static int nbd_request_simple_option(QIOChannel *ioc, int opt, bool strict,
     return 1;
 }

+/* Callback to learn when QIO TLS upgrade is complete */
+struct NBDTLSClientHandshakeData {
+    bool complete;
+    Error *error;
+    GMainLoop *loop;
+};
+
+static void nbd_client_tls_handshake(QIOTask *task, void *opaque)
+{
+    struct NBDTLSClientHandshakeData *data = opaque;
+
+    qio_task_propagate_error(task, &data->error);
+    data->complete = true;
+    if (data->loop) {
+        g_main_loop_quit(data->loop);
+    }
+}
+
 static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
                                         QCryptoTLSCreds *tlscreds,
                                         const char *hostname, Error **errp)
 {
     int ret;
     QIOChannelTLS *tioc;
-    struct NBDTLSHandshakeData data = { 0 };
+    struct NBDTLSClientHandshakeData data = { 0 };

     ret = nbd_request_simple_option(ioc, NBD_OPT_STARTTLS, true, errp);
     if (ret <= 0) {
@@ -619,18 +637,19 @@  static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
         return NULL;
     }
     qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls");
-    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
     trace_nbd_receive_starttls_tls_handshake();
     qio_channel_tls_handshake(tioc,
-                              nbd_tls_handshake,
+                              nbd_client_tls_handshake,
                               &data,
                               NULL,
                               NULL);

     if (!data.complete) {
+        data.loop = g_main_loop_new(g_main_context_default(), FALSE);
         g_main_loop_run(data.loop);
+        g_main_loop_unref(data.loop);
     }
-    g_main_loop_unref(data.loop);
+
     if (data.error) {
         error_propagate(errp, data.error);
         object_unref(OBJECT(tioc));
diff --git a/nbd/common.c b/nbd/common.c
index 3247c1d618a..589a748cfe6 100644
--- a/nbd/common.c
+++ b/nbd/common.c
@@ -47,17 +47,6 @@  int nbd_drop(QIOChannel *ioc, size_t size, Error **errp)
 }


-void nbd_tls_handshake(QIOTask *task,
-                       void *opaque)
-{
-    struct NBDTLSHandshakeData *data = opaque;
-
-    qio_task_propagate_error(task, &data->error);
-    data->complete = true;
-    g_main_loop_quit(data->loop);
-}
-
-
 const char *nbd_opt_lookup(uint32_t opt)
 {
     switch (opt) {
diff --git a/nbd/server.c b/nbd/server.c
index c3484cc1ebc..ea13cf0e766 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -748,6 +748,23 @@  static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
     return rc;
 }

+/* Callback to learn when QIO TLS upgrade is complete */
+struct NBDTLSServerHandshakeData {
+    bool complete;
+    Error *error;
+    Coroutine *co;
+};
+
+static void nbd_server_tls_handshake(QIOTask *task, void *opaque)
+{
+    struct NBDTLSServerHandshakeData *data = opaque;
+
+    qio_task_propagate_error(task, &data->error);
+    data->complete = true;
+    if (!qemu_coroutine_entered(data->co)) {
+        aio_co_wake(data->co);
+    }
+}

 /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
  * new channel for all further (now-encrypted) communication. */
@@ -756,7 +773,7 @@  static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
 {
     QIOChannel *ioc;
     QIOChannelTLS *tioc;
-    struct NBDTLSHandshakeData data = { 0 };
+    struct NBDTLSServerHandshakeData data = { 0 };

     assert(client->opt == NBD_OPT_STARTTLS);

@@ -777,17 +794,17 @@  static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,

     qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
     trace_nbd_negotiate_handle_starttls_handshake();
-    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
+    data.co = qemu_coroutine_self();
     qio_channel_tls_handshake(tioc,
-                              nbd_tls_handshake,
+                              nbd_server_tls_handshake,
                               &data,
                               NULL,
                               NULL);

-    if (!data.complete) {
-        g_main_loop_run(data.loop);
+    while (!data.complete) {
+        qemu_coroutine_yield();
     }
-    g_main_loop_unref(data.loop);
+
     if (data.error) {
         object_unref(OBJECT(tioc));
         error_propagate(errp, data.error);