[PATCH v4] nbd/server: do not poll within a coroutine context

Eric Blake posted 1 patch 3 weeks, 5 days ago
nbd/nbd-internal.h | 10 ----------
nbd/client.c       | 27 +++++++++++++++++++++++----
nbd/common.c       | 11 -----------
nbd/server.c       | 29 +++++++++++++++++++++++------
4 files changed, 46 insertions(+), 31 deletions(-)
[PATCH v4] nbd/server: do not poll within a coroutine context
Posted by Eric Blake 3 weeks, 5 days ago
From: Zhu Yangyang <zhuyangyang14@huawei.com>

Coroutines are not supposed to block. Instead, they should yield.

The client performs TLS upgrade outside of an AIOContext, during
synchronous handshake; this still requires g_main_loop.  But the
server responds to TLS upgrade inside a coroutine, so a nested
g_main_loop is wrong.  Since the two callbacks no longer share more
than the setting of data.complete and data.error, it's just as easy to
use static helpers instead of trying to share a common code path.

Fixes: f95910f ("nbd: implement TLS support in the protocol negotiation")
Signed-off-by: Zhu Yangyang <zhuyangyang14@huawei.com>
[eblake: move callbacks to their use point]
Signed-off-by: Eric Blake <eblake@redhat.com>
---

v3: https://lists.gnu.org/archive/html/qemu-devel/2024-04/msg00375.html

in v4, factor even the struct to the .c files, avoiding a union [Vladimir]

 nbd/nbd-internal.h | 10 ----------
 nbd/client.c       | 27 +++++++++++++++++++++++----
 nbd/common.c       | 11 -----------
 nbd/server.c       | 29 +++++++++++++++++++++++------
 4 files changed, 46 insertions(+), 31 deletions(-)

diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index dfa02f77ee4..91895106a95 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -72,16 +72,6 @@ static inline int nbd_write(QIOChannel *ioc, const void *buffer, size_t size,
     return qio_channel_write_all(ioc, buffer, size, errp) < 0 ? -EIO : 0;
 }

-struct NBDTLSHandshakeData {
-    GMainLoop *loop;
-    bool complete;
-    Error *error;
-};
-
-
-void nbd_tls_handshake(QIOTask *task,
-                       void *opaque);
-
 int nbd_drop(QIOChannel *ioc, size_t size, Error **errp);

 #endif
diff --git a/nbd/client.c b/nbd/client.c
index 29ffc609a4b..c7141d7a098 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -596,13 +596,31 @@ static int nbd_request_simple_option(QIOChannel *ioc, int opt, bool strict,
     return 1;
 }

+/* Callback to learn when QIO TLS upgrade is complete */
+struct NBDTLSClientHandshakeData {
+    bool complete;
+    Error *error;
+    GMainLoop *loop;
+};
+
+static void nbd_client_tls_handshake(QIOTask *task, void *opaque)
+{
+    struct NBDTLSClientHandshakeData *data = opaque;
+
+    qio_task_propagate_error(task, &data->error);
+    data->complete = true;
+    if (data->loop) {
+        g_main_loop_quit(data->loop);
+    }
+}
+
 static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
                                         QCryptoTLSCreds *tlscreds,
                                         const char *hostname, Error **errp)
 {
     int ret;
     QIOChannelTLS *tioc;
-    struct NBDTLSHandshakeData data = { 0 };
+    struct NBDTLSClientHandshakeData data = { 0 };

     ret = nbd_request_simple_option(ioc, NBD_OPT_STARTTLS, true, errp);
     if (ret <= 0) {
@@ -619,18 +637,19 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
         return NULL;
     }
     qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls");
-    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
     trace_nbd_receive_starttls_tls_handshake();
     qio_channel_tls_handshake(tioc,
-                              nbd_tls_handshake,
+                              nbd_client_tls_handshake,
                               &data,
                               NULL,
                               NULL);

     if (!data.complete) {
+        data.loop = g_main_loop_new(g_main_context_default(), FALSE);
         g_main_loop_run(data.loop);
+        g_main_loop_unref(data.loop);
     }
-    g_main_loop_unref(data.loop);
+
     if (data.error) {
         error_propagate(errp, data.error);
         object_unref(OBJECT(tioc));
diff --git a/nbd/common.c b/nbd/common.c
index 3247c1d618a..589a748cfe6 100644
--- a/nbd/common.c
+++ b/nbd/common.c
@@ -47,17 +47,6 @@ int nbd_drop(QIOChannel *ioc, size_t size, Error **errp)
 }


-void nbd_tls_handshake(QIOTask *task,
-                       void *opaque)
-{
-    struct NBDTLSHandshakeData *data = opaque;
-
-    qio_task_propagate_error(task, &data->error);
-    data->complete = true;
-    g_main_loop_quit(data->loop);
-}
-
-
 const char *nbd_opt_lookup(uint32_t opt)
 {
     switch (opt) {
diff --git a/nbd/server.c b/nbd/server.c
index c3484cc1ebc..ea13cf0e766 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -748,6 +748,23 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
     return rc;
 }

+/* Callback to learn when QIO TLS upgrade is complete */
+struct NBDTLSServerHandshakeData {
+    bool complete;
+    Error *error;
+    Coroutine *co;
+};
+
+static void nbd_server_tls_handshake(QIOTask *task, void *opaque)
+{
+    struct NBDTLSServerHandshakeData *data = opaque;
+
+    qio_task_propagate_error(task, &data->error);
+    data->complete = true;
+    if (!qemu_coroutine_entered(data->co)) {
+        aio_co_wake(data->co);
+    }
+}

 /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
  * new channel for all further (now-encrypted) communication. */
@@ -756,7 +773,7 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
 {
     QIOChannel *ioc;
     QIOChannelTLS *tioc;
-    struct NBDTLSHandshakeData data = { 0 };
+    struct NBDTLSServerHandshakeData data = { 0 };

     assert(client->opt == NBD_OPT_STARTTLS);

@@ -777,17 +794,17 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,

     qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
     trace_nbd_negotiate_handle_starttls_handshake();
-    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
+    data.co = qemu_coroutine_self();
     qio_channel_tls_handshake(tioc,
-                              nbd_tls_handshake,
+                              nbd_server_tls_handshake,
                               &data,
                               NULL,
                               NULL);

-    if (!data.complete) {
-        g_main_loop_run(data.loop);
+    while (!data.complete) {
+        qemu_coroutine_yield();
     }
-    g_main_loop_unref(data.loop);
+
     if (data.error) {
         object_unref(OBJECT(tioc));
         error_propagate(errp, data.error);
-- 
2.44.0
Re: [PATCH v4] nbd/server: do not poll within a coroutine context
Posted by Vladimir Sementsov-Ogievskiy 3 weeks, 2 days ago
On 05.04.24 20:44, Eric Blake wrote:
> From: Zhu Yangyang <zhuyangyang14@huawei.com>
> 
> Coroutines are not supposed to block. Instead, they should yield.
> 
> The client performs TLS upgrade outside of an AIOContext, during
> synchronous handshake; this still requires g_main_loop.  But the
> server responds to TLS upgrade inside a coroutine, so a nested
> g_main_loop is wrong.  Since the two callbacks no longer share more
> than the setting of data.complete and data.error, it's just as easy to
> use static helpers instead of trying to share a common code path.
> 
> Fixes: f95910f ("nbd: implement TLS support in the protocol negotiation")
> Signed-off-by: Zhu Yangyang <zhuyangyang14@huawei.com>
> [eblake: move callbacks to their use point]
> Signed-off-by: Eric Blake <eblake@redhat.com>

Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>

still, some notes below

> ---
> 
> v3: https://lists.gnu.org/archive/html/qemu-devel/2024-04/msg00375.html
> 
> in v4, factor even the struct to the .c files, avoiding a union [Vladimir]
> 
>   nbd/nbd-internal.h | 10 ----------
>   nbd/client.c       | 27 +++++++++++++++++++++++----
>   nbd/common.c       | 11 -----------
>   nbd/server.c       | 29 +++++++++++++++++++++++------
>   4 files changed, 46 insertions(+), 31 deletions(-)
> 
> diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
> index dfa02f77ee4..91895106a95 100644
> --- a/nbd/nbd-internal.h
> +++ b/nbd/nbd-internal.h
> @@ -72,16 +72,6 @@ static inline int nbd_write(QIOChannel *ioc, const void *buffer, size_t size,
>       return qio_channel_write_all(ioc, buffer, size, errp) < 0 ? -EIO : 0;
>   }
> 
> -struct NBDTLSHandshakeData {
> -    GMainLoop *loop;
> -    bool complete;
> -    Error *error;
> -};
> -
> -
> -void nbd_tls_handshake(QIOTask *task,
> -                       void *opaque);
> -
>   int nbd_drop(QIOChannel *ioc, size_t size, Error **errp);
> 
>   #endif
> diff --git a/nbd/client.c b/nbd/client.c
> index 29ffc609a4b..c7141d7a098 100644
> --- a/nbd/client.c
> +++ b/nbd/client.c
> @@ -596,13 +596,31 @@ static int nbd_request_simple_option(QIOChannel *ioc, int opt, bool strict,
>       return 1;
>   }
> 
> +/* Callback to learn when QIO TLS upgrade is complete */
> +struct NBDTLSClientHandshakeData {
> +    bool complete;
> +    Error *error;
> +    GMainLoop *loop;
> +};
> +
> +static void nbd_client_tls_handshake(QIOTask *task, void *opaque)
> +{
> +    struct NBDTLSClientHandshakeData *data = opaque;
> +
> +    qio_task_propagate_error(task, &data->error);
> +    data->complete = true;
> +    if (data->loop) {
> +        g_main_loop_quit(data->loop);
> +    }
> +}
> +
>   static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
>                                           QCryptoTLSCreds *tlscreds,
>                                           const char *hostname, Error **errp)
>   {
>       int ret;
>       QIOChannelTLS *tioc;
> -    struct NBDTLSHandshakeData data = { 0 };
> +    struct NBDTLSClientHandshakeData data = { 0 };
> 
>       ret = nbd_request_simple_option(ioc, NBD_OPT_STARTTLS, true, errp);
>       if (ret <= 0) {
> @@ -619,18 +637,19 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
>           return NULL;
>       }
>       qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls");
> -    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
>       trace_nbd_receive_starttls_tls_handshake();
>       qio_channel_tls_handshake(tioc,
> -                              nbd_tls_handshake,
> +                              nbd_client_tls_handshake,
>                                 &data,
>                                 NULL,
>                                 NULL);
> 
>       if (!data.complete) {
> +        data.loop = g_main_loop_new(g_main_context_default(), FALSE);
>           g_main_loop_run(data.loop);
> +        g_main_loop_unref(data.loop);

probably good to assert(data.complete);

>       }
> -    g_main_loop_unref(data.loop);
> +
>       if (data.error) {
>           error_propagate(errp, data.error);
>           object_unref(OBJECT(tioc));
> diff --git a/nbd/common.c b/nbd/common.c
> index 3247c1d618a..589a748cfe6 100644
> --- a/nbd/common.c
> +++ b/nbd/common.c
> @@ -47,17 +47,6 @@ int nbd_drop(QIOChannel *ioc, size_t size, Error **errp)
>   }
> 
> 
> -void nbd_tls_handshake(QIOTask *task,
> -                       void *opaque)
> -{
> -    struct NBDTLSHandshakeData *data = opaque;
> -
> -    qio_task_propagate_error(task, &data->error);
> -    data->complete = true;
> -    g_main_loop_quit(data->loop);
> -}
> -
> -
>   const char *nbd_opt_lookup(uint32_t opt)
>   {
>       switch (opt) {
> diff --git a/nbd/server.c b/nbd/server.c
> index c3484cc1ebc..ea13cf0e766 100644
> --- a/nbd/server.c
> +++ b/nbd/server.c
> @@ -748,6 +748,23 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
>       return rc;
>   }
> 
> +/* Callback to learn when QIO TLS upgrade is complete */
> +struct NBDTLSServerHandshakeData {
> +    bool complete;
> +    Error *error;
> +    Coroutine *co;
> +};
> +
> +static void nbd_server_tls_handshake(QIOTask *task, void *opaque)
> +{
> +    struct NBDTLSServerHandshakeData *data = opaque;
> +
> +    qio_task_propagate_error(task, &data->error);
> +    data->complete = true;
> +    if (!qemu_coroutine_entered(data->co)) {
> +        aio_co_wake(data->co);
> +    }
> +}
> 
>   /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
>    * new channel for all further (now-encrypted) communication. */
> @@ -756,7 +773,7 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
>   {
>       QIOChannel *ioc;
>       QIOChannelTLS *tioc;
> -    struct NBDTLSHandshakeData data = { 0 };
> +    struct NBDTLSServerHandshakeData data = { 0 };
> 
>       assert(client->opt == NBD_OPT_STARTTLS);
> 
> @@ -777,17 +794,17 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,

preexisting: lack coroutine_fn, as well as caller nbd_negotiate_options()

> 
>       qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
>       trace_nbd_negotiate_handle_starttls_handshake();
> -    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
> +    data.co = qemu_coroutine_self();
>       qio_channel_tls_handshake(tioc,
> -                              nbd_tls_handshake,
> +                              nbd_server_tls_handshake,
>                                 &data,
>                                 NULL,
>                                 NULL);
> 
> -    if (!data.complete) {
> -        g_main_loop_run(data.loop);
> +    while (!data.complete) {

not "if", but "while".. Do we expect entering from somewhere except nbd_server_tls_handshake?

if not, probably safer construction would be

if (!data.complete) {
    qemu_coroutine_yield();
    assert(data.complete);
}

- to avoid hiding unexpected coroutine switching bugs

> +        qemu_coroutine_yield();
>       }
> -    g_main_loop_unref(data.loop);
> +
>       if (data.error) {
>           object_unref(OBJECT(tioc));
>           error_propagate(errp, data.error);

-- 
Best regards,
Vladimir
Re: [PATCH v4] nbd/server: do not poll within a coroutine context
Posted by Eric Blake 3 weeks, 2 days ago
On Mon, Apr 08, 2024 at 11:46:39AM +0300, Vladimir Sementsov-Ogievskiy wrote:
> On 05.04.24 20:44, Eric Blake wrote:
> > From: Zhu Yangyang <zhuyangyang14@huawei.com>
> > 
> > Coroutines are not supposed to block. Instead, they should yield.
> > 
> > The client performs TLS upgrade outside of an AIOContext, during
> > synchronous handshake; this still requires g_main_loop.  But the
> > server responds to TLS upgrade inside a coroutine, so a nested
> > g_main_loop is wrong.  Since the two callbacks no longer share more
> > than the setting of data.complete and data.error, it's just as easy to
> > use static helpers instead of trying to share a common code path.
> > 
> > Fixes: f95910f ("nbd: implement TLS support in the protocol negotiation")
> > Signed-off-by: Zhu Yangyang <zhuyangyang14@huawei.com>
> > [eblake: move callbacks to their use point]
> > Signed-off-by: Eric Blake <eblake@redhat.com>
> 
> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>

I'm debating whether it is worth trying to shove this into 9.0; -rc3
is very late, and the problem is pre-existing, so I'm leaning towards
no.  At which point, it's better to get this right.

> 
> still, some notes below
> 
> > ---
> > 
> > v3: https://lists.gnu.org/archive/html/qemu-devel/2024-04/msg00375.html
> > 
> > in v4, factor even the struct to the .c files, avoiding a union [Vladimir]
> > 
> >   nbd/nbd-internal.h | 10 ----------
> >   nbd/client.c       | 27 +++++++++++++++++++++++----
> >   nbd/common.c       | 11 -----------
> >   nbd/server.c       | 29 +++++++++++++++++++++++------
> >   4 files changed, 46 insertions(+), 31 deletions(-)
> > 

> > +++ b/nbd/client.c
> > @@ -596,13 +596,31 @@ static int nbd_request_simple_option(QIOChannel *ioc, int opt, bool strict,
> >       return 1;
> >   }
> > 
> > +/* Callback to learn when QIO TLS upgrade is complete */
> > +struct NBDTLSClientHandshakeData {
> > +    bool complete;
> > +    Error *error;
> > +    GMainLoop *loop;
> > +};
> > +
> > +static void nbd_client_tls_handshake(QIOTask *task, void *opaque)
> > +{
> > +    struct NBDTLSClientHandshakeData *data = opaque;
> > +
> > +    qio_task_propagate_error(task, &data->error);
> > +    data->complete = true;
> > +    if (data->loop) {
> > +        g_main_loop_quit(data->loop);
> > +    }
> > +}
> > +
> >   static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
> >                                           QCryptoTLSCreds *tlscreds,
> >                                           const char *hostname, Error **errp)
> >   {
> >       int ret;
> >       QIOChannelTLS *tioc;
> > -    struct NBDTLSHandshakeData data = { 0 };
> > +    struct NBDTLSClientHandshakeData data = { 0 };
> > 
> >       ret = nbd_request_simple_option(ioc, NBD_OPT_STARTTLS, true, errp);
> >       if (ret <= 0) {
> > @@ -619,18 +637,19 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
> >           return NULL;
> >       }
> >       qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls");
> > -    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
> >       trace_nbd_receive_starttls_tls_handshake();
> >       qio_channel_tls_handshake(tioc,
> > -                              nbd_tls_handshake,
> > +                              nbd_client_tls_handshake,
> >                                 &data,
> >                                 NULL,
> >                                 NULL);
> > 
> >       if (!data.complete) {
> > +        data.loop = g_main_loop_new(g_main_context_default(), FALSE);
> >           g_main_loop_run(data.loop);
> > +        g_main_loop_unref(data.loop);
> 
> probably good to assert(data.complete);

Seems reasonable.

> > +++ b/nbd/server.c
> > @@ -748,6 +748,23 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
> >       return rc;
> >   }
> > 
> > +/* Callback to learn when QIO TLS upgrade is complete */
> > +struct NBDTLSServerHandshakeData {
> > +    bool complete;
> > +    Error *error;
> > +    Coroutine *co;
> > +};
> > +
> > +static void nbd_server_tls_handshake(QIOTask *task, void *opaque)
> > +{
> > +    struct NBDTLSServerHandshakeData *data = opaque;
> > +
> > +    qio_task_propagate_error(task, &data->error);
> > +    data->complete = true;
> > +    if (!qemu_coroutine_entered(data->co)) {
> > +        aio_co_wake(data->co);
> > +    }
> > +}
> > 
> >   /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
> >    * new channel for all further (now-encrypted) communication. */
> > @@ -756,7 +773,7 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
> >   {
> >       QIOChannel *ioc;
> >       QIOChannelTLS *tioc;
> > -    struct NBDTLSHandshakeData data = { 0 };
> > +    struct NBDTLSServerHandshakeData data = { 0 };
> > 
> >       assert(client->opt == NBD_OPT_STARTTLS);
> > 
> > @@ -777,17 +794,17 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
> 
> preexisting: lack coroutine_fn, as well as caller nbd_negotiate_options()

Indeed, so now would not hurt to add them now that a callback is no
longer shared between callers in different context.  But probably as a
separate patch.

> 
> > 
> >       qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
> >       trace_nbd_negotiate_handle_starttls_handshake();
> > -    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
> > +    data.co = qemu_coroutine_self();
> >       qio_channel_tls_handshake(tioc,
> > -                              nbd_tls_handshake,
> > +                              nbd_server_tls_handshake,
> >                                 &data,
> >                                 NULL,
> >                                 NULL);
> > 
> > -    if (!data.complete) {
> > -        g_main_loop_run(data.loop);
> > +    while (!data.complete) {
> 
> not "if", but "while".. Do we expect entering from somewhere except nbd_server_tls_handshake?
> 
> if not, probably safer construction would be
> 
> if (!data.complete) {
>    qemu_coroutine_yield();
>    assert(data.complete);
> }
> 
> - to avoid hiding unexpected coroutine switching bugs

TLS handshake is early enough in the NBD connection that there should
not be any parallel I/O yet (we can't switch to parallel outstanding
transactions until after successful NBD_OPT_GO), so I like your idea
of asserting that the coroutine is entered at most once.

v5 coming up

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.
Virtualization:  qemu.org | libguestfs.org