diff mbox series

[v3,09/14] nbd/server: Initial support for extended headers

Message ID 20230515195343.1915857-10-eblake@redhat.com
State New
Headers show
Series qemu patches for 64-bit NBD extensions | expand

Commit Message

Eric Blake May 15, 2023, 7:53 p.m. UTC
Time to support clients that request extended headers.  Now we can
finally reach the code added across several previous patches.

Even though the NBD spec has been altered to allow us to accept
NBD_CMD_READ larger than the max payload size (provided our response
is a hole or broken up over more than one data chunk), we are not
planning to take advantage of that, and continue to cap NBD_CMD_READ
to 32M regardless of header size.

For NBD_CMD_WRITE_ZEROES and NBD_CMD_TRIM, the block layer already
supports 64-bit operations without any effort on our part.  For
NBD_CMD_BLOCK_STATUS, the client's length is a hint, and the previous
patch took care of implementing the required
NBD_REPLY_TYPE_BLOCK_STATUS_EXT.

Signed-off-by: Eric Blake <eblake@redhat.com>
---
 nbd/nbd-internal.h |   5 +-
 nbd/server.c       | 130 +++++++++++++++++++++++++++++++++++----------
 2 files changed, 106 insertions(+), 29 deletions(-)

Comments

Vladimir Sementsov-Ogievskiy May 31, 2023, 2:46 p.m. UTC | #1
On 15.05.23 22:53, Eric Blake wrote:
> Time to support clients that request extended headers.  Now we can
> finally reach the code added across several previous patches.
> 
> Even though the NBD spec has been altered to allow us to accept
> NBD_CMD_READ larger than the max payload size (provided our response
> is a hole or broken up over more than one data chunk), we are not
> planning to take advantage of that, and continue to cap NBD_CMD_READ
> to 32M regardless of header size.
> 
> For NBD_CMD_WRITE_ZEROES and NBD_CMD_TRIM, the block layer already
> supports 64-bit operations without any effort on our part.  For
> NBD_CMD_BLOCK_STATUS, the client's length is a hint, and the previous
> patch took care of implementing the required
> NBD_REPLY_TYPE_BLOCK_STATUS_EXT.
> 
> Signed-off-by: Eric Blake <eblake@redhat.com>
> ---
>   nbd/nbd-internal.h |   5 +-

[..]

> 
>   static inline void set_be_simple_reply(NBDClient *client, struct iovec *iov,
> -                                       uint64_t error, NBDRequest *request)
> +                                       uint32_t error, NBDStructuredError *err,
> +                                       NBDRequest *request)
>   {
> -    NBDSimpleReply *reply = iov->iov_base;
> +    if (client->header_style >= NBD_HEADER_EXTENDED) {
> +        NBDExtendedReplyChunk *chunk = iov->iov_base;
> 
> -    iov->iov_len = sizeof(*reply);
> -    stl_be_p(&reply->magic, NBD_SIMPLE_REPLY_MAGIC);
> -    stl_be_p(&reply->error, error);
> -    stq_be_p(&reply->handle, request->handle);
> +        iov->iov_len = sizeof(*chunk);
> +        stl_be_p(&chunk->magic, NBD_EXTENDED_REPLY_MAGIC);
> +        stw_be_p(&chunk->flags, NBD_REPLY_FLAG_DONE);
> +        stq_be_p(&chunk->handle, request->handle);
> +        stq_be_p(&chunk->offset, request->from);
> +        if (error) {
> +            assert(!iov[1].iov_base);
> +            iov[1].iov_base = err;
> +            iov[1].iov_len = sizeof(*err);
> +            stw_be_p(&chunk->type, NBD_REPLY_TYPE_ERROR);
> +            stq_be_p(&chunk->length, sizeof(*err));
> +            stl_be_p(&err->error, error);
> +            stw_be_p(&err->message_length, 0);
> +        } else {
> +            stw_be_p(&chunk->type, NBD_REPLY_TYPE_NONE);
> +            stq_be_p(&chunk->length, 0);
> +        }
> +    } else {
> +        NBDSimpleReply *reply = iov->iov_base;
> +
> +        iov->iov_len = sizeof(*reply);
> +        stl_be_p(&reply->magic, NBD_SIMPLE_REPLY_MAGIC);
> +        stl_be_p(&reply->error, error);
> +        stq_be_p(&reply->handle, request->handle);
> +    }
>   }
> 
>   static int coroutine_fn nbd_co_send_simple_reply(NBDClient *client,
> @@ -1906,30 +1966,44 @@ static int coroutine_fn nbd_co_send_simple_reply(NBDClient *client,

So, that's not _simple_ now.. The function should be renamed. As well as set_be_simple_reply(). _simple_or_extended_ ? a bit too long. But continuing to use "simple" is in bad relation with use of "simple" word in specification.

Probably better to update callers? The only caller isi nbd_send_generic_reply(). So, could we just add nbd_co_send_single_extended_reply() to call from nbd_send_generic_reply() in case of EXTENDED?

Also, transformation of set_be_simple_reply() do look like it should be two separate functions.

>   {
>       NBDReply hdr;
>       int nbd_err = system_errno_to_nbd_errno(error);
> +    NBDStructuredError err;
>       struct iovec iov[] = {
>           {.iov_base = &hdr},
>           {.iov_base = data, .iov_len = len}
>       };
> 
> +    assert(!len || !nbd_err);
>       trace_nbd_co_send_simple_reply(request->handle, nbd_err,
>                                      nbd_err_lookup(nbd_err), len);
> -    set_be_simple_reply(client, &iov[0], nbd_err, request);
> +    set_be_simple_reply(client, &iov[0], nbd_err, &err, request);
> 
> -    return nbd_co_send_iov(client, iov, len ? 2 : 1, errp);
> +    return nbd_co_send_iov(client, iov, iov[1].iov_len ? 2 : 1, errp);
>   }
> 
>   static inline void set_be_chunk(NBDClient *client, struct iovec *iov,
>                                   uint16_t flags, uint16_t type,
>                                   NBDRequest *request, uint32_t length)
>   {
> -    NBDStructuredReplyChunk *chunk = iov->iov_base;
> +    if (client->header_style >= NBD_HEADER_EXTENDED) {
> +        NBDExtendedReplyChunk *chunk = iov->iov_base;
> 
> -    iov->iov_len = sizeof(*chunk);
> -    stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
> -    stw_be_p(&chunk->flags, flags);
> -    stw_be_p(&chunk->type, type);
> -    stq_be_p(&chunk->handle, request->handle);
> -    stl_be_p(&chunk->length, length);
> +        iov->iov_len = sizeof(*chunk);
> +        stl_be_p(&chunk->magic, NBD_EXTENDED_REPLY_MAGIC);
> +        stw_be_p(&chunk->flags, flags);
> +        stw_be_p(&chunk->type, type);
> +        stq_be_p(&chunk->handle, request->handle);
> +        stq_be_p(&chunk->offset, request->from);
> +        stq_be_p(&chunk->length, length);
> +    } else {
> +        NBDStructuredReplyChunk *chunk = iov->iov_base;
> +
> +        iov->iov_len = sizeof(*chunk);
> +        stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
> +        stw_be_p(&chunk->flags, flags);
> +        stw_be_p(&chunk->type, type);
> +        stq_be_p(&chunk->handle, request->handle);
> +        stl_be_p(&chunk->length, length);
> +    }
>   }
> 
>   static int coroutine_fn nbd_co_send_structured_done(NBDClient *client,
Eric Blake June 7, 2023, 11:39 a.m. UTC | #2
On Wed, May 31, 2023 at 05:46:55PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> On 15.05.23 22:53, Eric Blake wrote:
> > Time to support clients that request extended headers.  Now we can
> > finally reach the code added across several previous patches.
> > 
> > Even though the NBD spec has been altered to allow us to accept
> > NBD_CMD_READ larger than the max payload size (provided our response
> > is a hole or broken up over more than one data chunk), we are not
> > planning to take advantage of that, and continue to cap NBD_CMD_READ
> > to 32M regardless of header size.
> > 
> > For NBD_CMD_WRITE_ZEROES and NBD_CMD_TRIM, the block layer already
> > supports 64-bit operations without any effort on our part.  For
> > NBD_CMD_BLOCK_STATUS, the client's length is a hint, and the previous
> > patch took care of implementing the required
> > NBD_REPLY_TYPE_BLOCK_STATUS_EXT.
> > 
> > Signed-off-by: Eric Blake <eblake@redhat.com>
> > ---
> >   nbd/nbd-internal.h |   5 +-
> 
> [..]
> 
> > 
> >   static inline void set_be_simple_reply(NBDClient *client, struct iovec *iov,
> > -                                       uint64_t error, NBDRequest *request)
> > +                                       uint32_t error, NBDStructuredError *err,
> > +                                       NBDRequest *request)
> >   {
> > -    NBDSimpleReply *reply = iov->iov_base;
> > +    if (client->header_style >= NBD_HEADER_EXTENDED) {
> > +        NBDExtendedReplyChunk *chunk = iov->iov_base;
> > 
> > -    iov->iov_len = sizeof(*reply);
> > -    stl_be_p(&reply->magic, NBD_SIMPLE_REPLY_MAGIC);
> > -    stl_be_p(&reply->error, error);
> > -    stq_be_p(&reply->handle, request->handle);
> > +        iov->iov_len = sizeof(*chunk);
> > +        stl_be_p(&chunk->magic, NBD_EXTENDED_REPLY_MAGIC);
> > +        stw_be_p(&chunk->flags, NBD_REPLY_FLAG_DONE);
> > +        stq_be_p(&chunk->handle, request->handle);
> > +        stq_be_p(&chunk->offset, request->from);
> > +        if (error) {
> > +            assert(!iov[1].iov_base);
> > +            iov[1].iov_base = err;
> > +            iov[1].iov_len = sizeof(*err);
> > +            stw_be_p(&chunk->type, NBD_REPLY_TYPE_ERROR);
> > +            stq_be_p(&chunk->length, sizeof(*err));
> > +            stl_be_p(&err->error, error);
> > +            stw_be_p(&err->message_length, 0);
> > +        } else {
> > +            stw_be_p(&chunk->type, NBD_REPLY_TYPE_NONE);
> > +            stq_be_p(&chunk->length, 0);
> > +        }
> > +    } else {
> > +        NBDSimpleReply *reply = iov->iov_base;
> > +
> > +        iov->iov_len = sizeof(*reply);
> > +        stl_be_p(&reply->magic, NBD_SIMPLE_REPLY_MAGIC);
> > +        stl_be_p(&reply->error, error);
> > +        stq_be_p(&reply->handle, request->handle);
> > +    }
> >   }
> > 
> >   static int coroutine_fn nbd_co_send_simple_reply(NBDClient *client,
> > @@ -1906,30 +1966,44 @@ static int coroutine_fn nbd_co_send_simple_reply(NBDClient *client,
> 
> So, that's not _simple_ now.. The function should be renamed. As well as set_be_simple_reply(). _simple_or_extended_ ? a bit too long. But continuing to use "simple" is in bad relation with use of "simple" word in specification.

In fact, I added an assertion that set_be_simple_reply() can only be
reached when extended replies are not in use, so none of this
complexity here was needed after all.

> 
> Probably better to update callers? The only caller isi nbd_send_generic_reply(). So, could we just add nbd_co_send_single_extended_reply() to call from nbd_send_generic_reply() in case of EXTENDED?
> 
> Also, transformation of set_be_simple_reply() do look like it should be two separate functions.
> 
> >   {
> >       NBDReply hdr;
> >       int nbd_err = system_errno_to_nbd_errno(error);
> > +    NBDStructuredError err;
> >       struct iovec iov[] = {
> >           {.iov_base = &hdr},
> >           {.iov_base = data, .iov_len = len}
> >       };
> > 
> > +    assert(!len || !nbd_err);
> >       trace_nbd_co_send_simple_reply(request->handle, nbd_err,
> >                                      nbd_err_lookup(nbd_err), len);
> > -    set_be_simple_reply(client, &iov[0], nbd_err, request);
> > +    set_be_simple_reply(client, &iov[0], nbd_err, &err, request);
> > 
> > -    return nbd_co_send_iov(client, iov, len ? 2 : 1, errp);
> > +    return nbd_co_send_iov(client, iov, iov[1].iov_len ? 2 : 1, errp);

Not introduced in this patch, but it turns out that when
iov[1].iov_len == 0, blindly passing niov==2 to nbd_co_send_iov()
still does the right thing, so I can lose the conditional here.
diff mbox series

Patch

diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index 133b1d94b50..dfa02f77ee4 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -34,8 +34,11 @@ 
  * https://github.com/yoe/nbd/blob/master/doc/proto.md
  */

-/* Size of all NBD_OPT_*, without payload */
+/* Size of all compact NBD_CMD_*, without payload */
 #define NBD_REQUEST_SIZE            (4 + 2 + 2 + 8 + 8 + 4)
+/* Size of all extended NBD_CMD_*, without payload */
+#define NBD_EXTENDED_REQUEST_SIZE   (4 + 2 + 2 + 8 + 8 + 8)
+
 /* Size of all NBD_REP_* sent in answer to most NBD_OPT_*, without payload */
 #define NBD_REPLY_SIZE              (4 + 4 + 8)
 /* Size of reply to NBD_OPT_EXPORT_NAME */
diff --git a/nbd/server.c b/nbd/server.c
index b4c15ae1a14..6475a76c1f0 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -482,6 +482,10 @@  static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes,
         [10 .. 133]   reserved     (0) [unless no_zeroes]
      */
     trace_nbd_negotiate_handle_export_name();
+    if (client->header_style >= NBD_HEADER_EXTENDED) {
+        error_setg(errp, "Extended headers already negotiated");
+        return -EINVAL;
+    }
     if (client->optlen > NBD_MAX_STRING_SIZE) {
         error_setg(errp, "Bad length received");
         return -EINVAL;
@@ -1262,7 +1266,11 @@  static int nbd_negotiate_options(NBDClient *client, Error **errp)
             case NBD_OPT_STRUCTURED_REPLY:
                 if (length) {
                     ret = nbd_reject_length(client, false, errp);
-                } else if (client->header_style >= NBD_HEADER_STRUCTURED) {
+                } else if (client->header_style >= NBD_HEADER_EXTENDED) {
+                    ret = nbd_negotiate_send_rep_err(
+                        client, NBD_REP_ERR_EXT_HEADER_REQD, errp,
+                        "extended headers already negotiated");
+                } else if (client->header_style == NBD_HEADER_STRUCTURED) {
                     ret = nbd_negotiate_send_rep_err(
                         client, NBD_REP_ERR_INVALID, errp,
                         "structured reply already negotiated");
@@ -1278,6 +1286,19 @@  static int nbd_negotiate_options(NBDClient *client, Error **errp)
                                                  errp);
                 break;

+            case NBD_OPT_EXTENDED_HEADERS:
+                if (length) {
+                    ret = nbd_reject_length(client, false, errp);
+                } else if (client->header_style >= NBD_HEADER_EXTENDED) {
+                    ret = nbd_negotiate_send_rep_err(
+                        client, NBD_REP_ERR_INVALID, errp,
+                        "extended headers already negotiated");
+                } else {
+                    ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
+                    client->header_style = NBD_HEADER_EXTENDED;
+                }
+                break;
+
             default:
                 ret = nbd_opt_drop(client, NBD_REP_ERR_UNSUP, errp,
                                    "Unsupported option %" PRIu32 " (%s)",
@@ -1413,11 +1434,13 @@  nbd_read_eof(NBDClient *client, void *buffer, size_t size, Error **errp)
 static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *request,
                                             Error **errp)
 {
-    uint8_t buf[NBD_REQUEST_SIZE];
-    uint32_t magic;
+    uint8_t buf[NBD_EXTENDED_REQUEST_SIZE];
+    uint32_t magic, expect;
     int ret;
+    size_t size = client->header_style == NBD_HEADER_EXTENDED ?
+        NBD_EXTENDED_REQUEST_SIZE : NBD_REQUEST_SIZE;

-    ret = nbd_read_eof(client, buf, sizeof(buf), errp);
+    ret = nbd_read_eof(client, buf, size, errp);
     if (ret < 0) {
         return ret;
     }
@@ -1425,13 +1448,21 @@  static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *reque
         return -EIO;
     }

-    /* Request
-       [ 0 ..  3]   magic   (NBD_REQUEST_MAGIC)
-       [ 4 ..  5]   flags   (NBD_CMD_FLAG_FUA, ...)
-       [ 6 ..  7]   type    (NBD_CMD_READ, ...)
-       [ 8 .. 15]   handle
-       [16 .. 23]   from
-       [24 .. 27]   len
+    /*
+     * Compact request
+     *  [ 0 ..  3]   magic   (NBD_REQUEST_MAGIC)
+     *  [ 4 ..  5]   flags   (NBD_CMD_FLAG_FUA, ...)
+     *  [ 6 ..  7]   type    (NBD_CMD_READ, ...)
+     *  [ 8 .. 15]   handle
+     *  [16 .. 23]   from
+     *  [24 .. 27]   len
+     * Extended request
+     *  [ 0 ..  3]   magic   (NBD_EXTENDED_REQUEST_MAGIC)
+     *  [ 4 ..  5]   flags   (NBD_CMD_FLAG_FUA, NBD_CMD_FLAG_PAYLOAD_LEN, ...)
+     *  [ 6 ..  7]   type    (NBD_CMD_READ, ...)
+     *  [ 8 .. 15]   handle
+     *  [16 .. 23]   from
+     *  [24 .. 31]   len
      */

     magic = ldl_be_p(buf);
@@ -1439,12 +1470,18 @@  static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *reque
     request->type   = lduw_be_p(buf + 6);
     request->handle = ldq_be_p(buf + 8);
     request->from   = ldq_be_p(buf + 16);
-    request->len    = ldl_be_p(buf + 24); /* widen 32 to 64 bits */
+    if (client->header_style >= NBD_HEADER_EXTENDED) {
+        request->len = ldq_be_p(buf + 24);
+        expect = NBD_EXTENDED_REQUEST_MAGIC;
+    } else {
+        request->len = ldl_be_p(buf + 24); /* widen 32 to 64 bits */
+        expect = NBD_REQUEST_MAGIC;
+    }

     trace_nbd_receive_request(magic, request->flags, request->type,
                               request->from, request->len);

-    if (magic != NBD_REQUEST_MAGIC) {
+    if (magic != expect) {
         error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
         return -EINVAL;
     }
@@ -1887,14 +1924,37 @@  static int coroutine_fn nbd_co_send_iov(NBDClient *client, struct iovec *iov,
 }

 static inline void set_be_simple_reply(NBDClient *client, struct iovec *iov,
-                                       uint64_t error, NBDRequest *request)
+                                       uint32_t error, NBDStructuredError *err,
+                                       NBDRequest *request)
 {
-    NBDSimpleReply *reply = iov->iov_base;
+    if (client->header_style >= NBD_HEADER_EXTENDED) {
+        NBDExtendedReplyChunk *chunk = iov->iov_base;

-    iov->iov_len = sizeof(*reply);
-    stl_be_p(&reply->magic, NBD_SIMPLE_REPLY_MAGIC);
-    stl_be_p(&reply->error, error);
-    stq_be_p(&reply->handle, request->handle);
+        iov->iov_len = sizeof(*chunk);
+        stl_be_p(&chunk->magic, NBD_EXTENDED_REPLY_MAGIC);
+        stw_be_p(&chunk->flags, NBD_REPLY_FLAG_DONE);
+        stq_be_p(&chunk->handle, request->handle);
+        stq_be_p(&chunk->offset, request->from);
+        if (error) {
+            assert(!iov[1].iov_base);
+            iov[1].iov_base = err;
+            iov[1].iov_len = sizeof(*err);
+            stw_be_p(&chunk->type, NBD_REPLY_TYPE_ERROR);
+            stq_be_p(&chunk->length, sizeof(*err));
+            stl_be_p(&err->error, error);
+            stw_be_p(&err->message_length, 0);
+        } else {
+            stw_be_p(&chunk->type, NBD_REPLY_TYPE_NONE);
+            stq_be_p(&chunk->length, 0);
+        }
+    } else {
+        NBDSimpleReply *reply = iov->iov_base;
+
+        iov->iov_len = sizeof(*reply);
+        stl_be_p(&reply->magic, NBD_SIMPLE_REPLY_MAGIC);
+        stl_be_p(&reply->error, error);
+        stq_be_p(&reply->handle, request->handle);
+    }
 }

 static int coroutine_fn nbd_co_send_simple_reply(NBDClient *client,
@@ -1906,30 +1966,44 @@  static int coroutine_fn nbd_co_send_simple_reply(NBDClient *client,
 {
     NBDReply hdr;
     int nbd_err = system_errno_to_nbd_errno(error);
+    NBDStructuredError err;
     struct iovec iov[] = {
         {.iov_base = &hdr},
         {.iov_base = data, .iov_len = len}
     };

+    assert(!len || !nbd_err);
     trace_nbd_co_send_simple_reply(request->handle, nbd_err,
                                    nbd_err_lookup(nbd_err), len);
-    set_be_simple_reply(client, &iov[0], nbd_err, request);
+    set_be_simple_reply(client, &iov[0], nbd_err, &err, request);

-    return nbd_co_send_iov(client, iov, len ? 2 : 1, errp);
+    return nbd_co_send_iov(client, iov, iov[1].iov_len ? 2 : 1, errp);
 }

 static inline void set_be_chunk(NBDClient *client, struct iovec *iov,
                                 uint16_t flags, uint16_t type,
                                 NBDRequest *request, uint32_t length)
 {
-    NBDStructuredReplyChunk *chunk = iov->iov_base;
+    if (client->header_style >= NBD_HEADER_EXTENDED) {
+        NBDExtendedReplyChunk *chunk = iov->iov_base;

-    iov->iov_len = sizeof(*chunk);
-    stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
-    stw_be_p(&chunk->flags, flags);
-    stw_be_p(&chunk->type, type);
-    stq_be_p(&chunk->handle, request->handle);
-    stl_be_p(&chunk->length, length);
+        iov->iov_len = sizeof(*chunk);
+        stl_be_p(&chunk->magic, NBD_EXTENDED_REPLY_MAGIC);
+        stw_be_p(&chunk->flags, flags);
+        stw_be_p(&chunk->type, type);
+        stq_be_p(&chunk->handle, request->handle);
+        stq_be_p(&chunk->offset, request->from);
+        stq_be_p(&chunk->length, length);
+    } else {
+        NBDStructuredReplyChunk *chunk = iov->iov_base;
+
+        iov->iov_len = sizeof(*chunk);
+        stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
+        stw_be_p(&chunk->flags, flags);
+        stw_be_p(&chunk->type, type);
+        stq_be_p(&chunk->handle, request->handle);
+        stl_be_p(&chunk->length, length);
+    }
 }

 static int coroutine_fn nbd_co_send_structured_done(NBDClient *client,