diff mbox

[v4,14/14] nbd: Implement NBD_CMD_WRITE_ZEROES on client

Message ID 1466892954-8684-15-git-send-email-eblake@redhat.com
State New
Headers show

Commit Message

Eric Blake June 25, 2016, 10:15 p.m. UTC
Upstream NBD protocol recently added the ability to efficiently
write zeroes without having to send the zeroes over the wire,
along with a flag to control whether the client wants a hole.

The generic block code takes care of falling back to the obvious
write of lots of zeroes if we return -ENOTSUP because the server
does not have WRITE_ZEROES.

Signed-off-by: Eric Blake <eblake@redhat.com>

---
v4: rebase to byte-based limits
v3: rebase, tell block layer about our support
---
 block/nbd-client.h |  2 ++
 block/nbd-client.c | 35 +++++++++++++++++++++++++++++++++++
 block/nbd.c        |  4 ++++
 3 files changed, 41 insertions(+)

Comments

Paolo Bonzini June 27, 2016, 12:13 p.m. UTC | #1
On 26/06/2016 00:15, Eric Blake wrote:
> diff --git a/block/nbd.c b/block/nbd.c
> index 8d57220..049d1bd 100644
> --- a/block/nbd.c
> +++ b/block/nbd.c
> @@ -357,6 +357,7 @@ static int nbd_co_flush(BlockDriverState *bs)
>  static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
>  {
>      bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE;
> +    bs->bl.max_pwrite_zeroes = NBD_MAX_BUFFER_SIZE;

I have probably asked before---is there any reason for these to be
limited, since the commands have no payload?

Thanks,

Paolo

>      bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE;
>  }
Eric Blake June 27, 2016, 1 p.m. UTC | #2
On 06/27/2016 06:13 AM, Paolo Bonzini wrote:
> 
> 
> On 26/06/2016 00:15, Eric Blake wrote:
>> diff --git a/block/nbd.c b/block/nbd.c
>> index 8d57220..049d1bd 100644
>> --- a/block/nbd.c
>> +++ b/block/nbd.c
>> @@ -357,6 +357,7 @@ static int nbd_co_flush(BlockDriverState *bs)
>>  static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
>>  {
>>      bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE;
>> +    bs->bl.max_pwrite_zeroes = NBD_MAX_BUFFER_SIZE;
> 
> I have probably asked before---is there any reason for these to be
> limited, since the commands have no payload?

Here's the last time it was brought up on the nbd-general list [1].  We
have the potential BLOCK_SIZE handshake negotiation extension, where I
was proposing that the server can advertise its actual limits (rather
than the client having to guess them or rely on out-of-band information)
- and I was proposing that NBD_CMD_TRIM and NBD_CMD_WRITE_ZEROES should
be permitted to advertise additional limits that are larger than the
NBD_CMD_WRITE limit, precisely because they don't carry a payload and
can therefore be more efficient if done in bulk.

[1] https://sourceforge.net/p/nbd/mailman/message/35081223/

But at the time of that thread, there was concern expressed whether
adding and additional NBD_INFO for each NBD_CMD limit would scale well,
or whether we need a different approach, and I haven't revisited the
thread since that comment.  At any rate, I have BLOCK_SIZE patches ready
for qemu, once the WRITE_ZERO patches land, and where it should be easy
to make this limit runtime-settable to a larger value from actual server
limits, if we can decide how BLOCK_SIZE should advertise such a limit.
diff mbox

Patch

diff --git a/block/nbd-client.h b/block/nbd-client.h
index 044aca4..2cfe377 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -48,6 +48,8 @@  int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count);
 int nbd_client_co_flush(BlockDriverState *bs);
 int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
                           uint64_t bytes, QEMUIOVector *qiov, int flags);
+int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+                                int count, BdrvRequestFlags flags);
 int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
                          uint64_t bytes, QEMUIOVector *qiov, int flags);

diff --git a/block/nbd-client.c b/block/nbd-client.c
index 88e52c1..5fdac74 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -275,6 +275,41 @@  int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
     return -reply.error;
 }

+int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+                                int count, BdrvRequestFlags flags)
+{
+    ssize_t ret;
+    NbdClientSession *client = nbd_get_client_session(bs);
+    struct nbd_request request = {
+        .type = NBD_CMD_WRITE_ZEROES,
+        .from = offset,
+        .len = count,
+    };
+    struct nbd_reply reply;
+
+    if (!(client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES)) {
+        return -ENOTSUP;
+    }
+
+    if (flags & BDRV_REQ_FUA) {
+        assert(client->nbdflags & NBD_FLAG_SEND_FUA);
+        request.flags |= NBD_CMD_FLAG_FUA;
+    }
+    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
+        request.flags |= NBD_CMD_FLAG_NO_HOLE;
+    }
+
+    nbd_coroutine_start(client, &request);
+    ret = nbd_co_send_request(bs, &request, NULL);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(client, &request, &reply, NULL);
+    }
+    nbd_coroutine_end(client, &request);
+    return -reply.error;
+}
+
 int nbd_client_co_flush(BlockDriverState *bs)
 {
     NbdClientSession *client = nbd_get_client_session(bs);
diff --git a/block/nbd.c b/block/nbd.c
index 8d57220..049d1bd 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -357,6 +357,7 @@  static int nbd_co_flush(BlockDriverState *bs)
 static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE;
+    bs->bl.max_pwrite_zeroes = NBD_MAX_BUFFER_SIZE;
     bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE;
 }

@@ -440,6 +441,7 @@  static BlockDriver bdrv_nbd = {
     .bdrv_file_open             = nbd_open,
     .bdrv_co_preadv             = nbd_client_co_preadv,
     .bdrv_co_pwritev            = nbd_client_co_pwritev,
+    .bdrv_co_pwrite_zeroes      = nbd_client_co_pwrite_zeroes,
     .bdrv_close                 = nbd_close,
     .bdrv_co_flush_to_os        = nbd_co_flush,
     .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
@@ -458,6 +460,7 @@  static BlockDriver bdrv_nbd_tcp = {
     .bdrv_file_open             = nbd_open,
     .bdrv_co_preadv             = nbd_client_co_preadv,
     .bdrv_co_pwritev            = nbd_client_co_pwritev,
+    .bdrv_co_pwrite_zeroes      = nbd_client_co_pwrite_zeroes,
     .bdrv_close                 = nbd_close,
     .bdrv_co_flush_to_os        = nbd_co_flush,
     .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
@@ -476,6 +479,7 @@  static BlockDriver bdrv_nbd_unix = {
     .bdrv_file_open             = nbd_open,
     .bdrv_co_preadv             = nbd_client_co_preadv,
     .bdrv_co_pwritev            = nbd_client_co_pwritev,
+    .bdrv_co_pwrite_zeroes      = nbd_client_co_pwrite_zeroes,
     .bdrv_close                 = nbd_close,
     .bdrv_co_flush_to_os        = nbd_co_flush,
     .bdrv_co_pdiscard           = nbd_client_co_pdiscard,