diff mbox series

[2/5] nbd: Prepare for NBD_CMD_FLAG_FAST_ZERO

Message ID 20190823143726.27062-3-eblake@redhat.com
State New
Headers show
Series Add NBD fast zero support to qemu client and server | expand

Commit Message

Eric Blake Aug. 23, 2019, 2:37 p.m. UTC
Commit fe0480d6 and friends added BDRV_REQ_NO_FALLBACK as a way to
avoid wasting time on a preliminary write-zero request that will later
be rewritten by actual data, if it is known that the write-zero
request will use a slow fallback; but in doing so, could not optimize
for NBD.  The NBD specification is now considering an extension that
will allow passing on those semantics; this patch updates the new
protocol bits and 'qemu-nbd --list' output to recognize the bit, as
well as the new errno value possible when using the new flag; while
upcoming patches will improve the client to use the feature when
present, and the server to advertise support for it.

Signed-off-by: Eric Blake <eblake@redhat.com>
---
 docs/interop/nbd.txt | 3 ++-
 include/block/nbd.h  | 4 ++++
 nbd/common.c         | 5 +++++
 nbd/server.c         | 2 ++
 qemu-nbd.c           | 1 +
 5 files changed, 14 insertions(+), 1 deletion(-)

Comments

Vladimir Sementsov-Ogievskiy Aug. 30, 2019, 6:07 p.m. UTC | #1
23.08.2019 17:37, Eric Blake wrote:
> Commit fe0480d6 and friends added BDRV_REQ_NO_FALLBACK as a way to
> avoid wasting time on a preliminary write-zero request that will later
> be rewritten by actual data, if it is known that the write-zero
> request will use a slow fallback; but in doing so, could not optimize
> for NBD.  The NBD specification is now considering an extension that
> will allow passing on those semantics; this patch updates the new
> protocol bits and 'qemu-nbd --list' output to recognize the bit, as
> well as the new errno value possible when using the new flag; while
> upcoming patches will improve the client to use the feature when
> present, and the server to advertise support for it.
> 
> Signed-off-by: Eric Blake <eblake@redhat.com>
> ---
>   docs/interop/nbd.txt | 3 ++-
>   include/block/nbd.h  | 4 ++++
>   nbd/common.c         | 5 +++++
>   nbd/server.c         | 2 ++
>   qemu-nbd.c           | 1 +
>   5 files changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt
> index 6dfec7f47647..45118809618e 100644
> --- a/docs/interop/nbd.txt
> +++ b/docs/interop/nbd.txt
> @@ -53,4 +53,5 @@ the operation of that feature.
>   * 2.12: NBD_CMD_BLOCK_STATUS for "base:allocation"
>   * 3.0: NBD_OPT_STARTTLS with TLS Pre-Shared Keys (PSK),
>   NBD_CMD_BLOCK_STATUS for "qemu:dirty-bitmap:", NBD_CMD_CACHE
> -* 4.2: NBD_FLAG_CAN_MULTI_CONN for sharable read-only exports
> +* 4.2: NBD_FLAG_CAN_MULTI_CONN for sharable read-only exports,
> +NBD_CMD_FLAG_FAST_ZERO
> diff --git a/include/block/nbd.h b/include/block/nbd.h
> index 2c87b42dfd48..21550747cf35 100644
> --- a/include/block/nbd.h
> +++ b/include/block/nbd.h
> @@ -140,6 +140,7 @@ enum {
>       NBD_FLAG_CAN_MULTI_CONN_BIT     =  8, /* Multi-client cache consistent */
>       NBD_FLAG_SEND_RESIZE_BIT        =  9, /* Send resize */
>       NBD_FLAG_SEND_CACHE_BIT         = 10, /* Send CACHE (prefetch) */
> +    NBD_FLAG_SEND_FAST_ZERO_BIT     = 11, /* FAST_ZERO flag for WRITE_ZEROES */
>   };
> 
>   #define NBD_FLAG_HAS_FLAGS         (1 << NBD_FLAG_HAS_FLAGS_BIT)
> @@ -153,6 +154,7 @@ enum {
>   #define NBD_FLAG_CAN_MULTI_CONN    (1 << NBD_FLAG_CAN_MULTI_CONN_BIT)
>   #define NBD_FLAG_SEND_RESIZE       (1 << NBD_FLAG_SEND_RESIZE_BIT)
>   #define NBD_FLAG_SEND_CACHE        (1 << NBD_FLAG_SEND_CACHE_BIT)
> +#define NBD_FLAG_SEND_FAST_ZERO    (1 << NBD_FLAG_SEND_FAST_ZERO_BIT)
> 
>   /* New-style handshake (global) flags, sent from server to client, and
>      control what will happen during handshake phase. */
> @@ -205,6 +207,7 @@ enum {
>   #define NBD_CMD_FLAG_DF         (1 << 2) /* don't fragment structured read */
>   #define NBD_CMD_FLAG_REQ_ONE    (1 << 3) /* only one extent in BLOCK_STATUS
>                                             * reply chunk */
> +#define NBD_CMD_FLAG_FAST_ZERO  (1 << 4) /* fail if WRITE_ZEROES is not fast */
> 
>   /* Supported request types */
>   enum {
> @@ -270,6 +273,7 @@ static inline bool nbd_reply_type_is_error(int type)
>   #define NBD_EINVAL     22
>   #define NBD_ENOSPC     28
>   #define NBD_EOVERFLOW  75
> +#define NBD_ENOTSUP    95
>   #define NBD_ESHUTDOWN  108
> 
>   /* Details collected by NBD_OPT_EXPORT_NAME and NBD_OPT_GO */
> diff --git a/nbd/common.c b/nbd/common.c
> index cc8b278e541d..ddfe7d118371 100644
> --- a/nbd/common.c
> +++ b/nbd/common.c
> @@ -201,6 +201,8 @@ const char *nbd_err_lookup(int err)
>           return "ENOSPC";
>       case NBD_EOVERFLOW:
>           return "EOVERFLOW";
> +    case NBD_ENOTSUP:
> +        return "ENOTSUP";
>       case NBD_ESHUTDOWN:
>           return "ESHUTDOWN";
>       default:
> @@ -231,6 +233,9 @@ int nbd_errno_to_system_errno(int err)
>       case NBD_EOVERFLOW:
>           ret = EOVERFLOW;
>           break;
> +    case NBD_ENOTSUP:
> +        ret = ENOTSUP;
> +        break;
>       case NBD_ESHUTDOWN:
>           ret = ESHUTDOWN;
>           break;
> diff --git a/nbd/server.c b/nbd/server.c
> index b5577828aa44..981bc3cb1151 100644
> --- a/nbd/server.c
> +++ b/nbd/server.c
> @@ -55,6 +55,8 @@ static int system_errno_to_nbd_errno(int err)
>           return NBD_ENOSPC;
>       case EOVERFLOW:
>           return NBD_EOVERFLOW;
> +    case ENOTSUP:
> +        return NBD_ENOTSUP;

This may provoke returning NBD_ENOTSUP in other cases, not only new one we are going to add.

>       case ESHUTDOWN:
>           return NBD_ESHUTDOWN;
>       case EINVAL:
> diff --git a/qemu-nbd.c b/qemu-nbd.c
> index 079702bb837f..dce52f564b5a 100644
> --- a/qemu-nbd.c
> +++ b/qemu-nbd.c
> @@ -294,6 +294,7 @@ static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls,
>                   [NBD_FLAG_CAN_MULTI_CONN_BIT]       = "multi",
>                   [NBD_FLAG_SEND_RESIZE_BIT]          = "resize",
>                   [NBD_FLAG_SEND_CACHE_BIT]           = "cache",
> +                [NBD_FLAG_SEND_FAST_ZERO_BIT]       = "fast-zero",
>               };
> 
>               printf("  size:  %" PRIu64 "\n", list[i].size);
>
Eric Blake Aug. 30, 2019, 11:37 p.m. UTC | #2
On 8/30/19 1:07 PM, Vladimir Sementsov-Ogievskiy wrote:
> 23.08.2019 17:37, Eric Blake wrote:
>> Commit fe0480d6 and friends added BDRV_REQ_NO_FALLBACK as a way to
>> avoid wasting time on a preliminary write-zero request that will later
>> be rewritten by actual data, if it is known that the write-zero
>> request will use a slow fallback; but in doing so, could not optimize
>> for NBD.  The NBD specification is now considering an extension that
>> will allow passing on those semantics; this patch updates the new
>> protocol bits and 'qemu-nbd --list' output to recognize the bit, as
>> well as the new errno value possible when using the new flag; while
>> upcoming patches will improve the client to use the feature when
>> present, and the server to advertise support for it.
>>
>> Signed-off-by: Eric Blake <eblake@redhat.com>

>> +++ b/nbd/server.c
>> @@ -55,6 +55,8 @@ static int system_errno_to_nbd_errno(int err)
>>           return NBD_ENOSPC;
>>       case EOVERFLOW:
>>           return NBD_EOVERFLOW;
>> +    case ENOTSUP:
>> +        return NBD_ENOTSUP;
> 
> This may provoke returning NBD_ENOTSUP in other cases, not only new one we are going to add.

Correct. But the spec only said SHOULD avoid ENOTSUP in those other
cases, not MUST avoid ENOTSUP; and in practice, either the client that
is not suspecting it will treat it the same as NBD_EINVAL, or we've
managed to get a slightly better error message across than normal.  I
don't see that as a real show-stopper.

But if it bothers you, note that in nbdkit, I actually coded things up
to refuse to send NBD_EOVERFLOW unless NBD_CMD_FLAG_DF was set, and to
refuse to send NBD_ENOTSUP unless NBD_CMD_FLAG_FAST_ZERO was set. I
could copy that behavior here, if we want qemu to be that much stricter
as a server.

(Note to self: also check if, when using structured replies to send
error messages, in addition to the code, if the string contains
strerror(errno) from BEFORE the mapping, rather than after we've lost
information to the more-limited NBD_E* values)
Vladimir Sementsov-Ogievskiy Aug. 31, 2019, 8:11 a.m. UTC | #3
31.08.2019 2:37, Eric Blake wrote:
> On 8/30/19 1:07 PM, Vladimir Sementsov-Ogievskiy wrote:
>> 23.08.2019 17:37, Eric Blake wrote:
>>> Commit fe0480d6 and friends added BDRV_REQ_NO_FALLBACK as a way to
>>> avoid wasting time on a preliminary write-zero request that will later
>>> be rewritten by actual data, if it is known that the write-zero
>>> request will use a slow fallback; but in doing so, could not optimize
>>> for NBD.  The NBD specification is now considering an extension that
>>> will allow passing on those semantics; this patch updates the new
>>> protocol bits and 'qemu-nbd --list' output to recognize the bit, as
>>> well as the new errno value possible when using the new flag; while
>>> upcoming patches will improve the client to use the feature when
>>> present, and the server to advertise support for it.
>>>
>>> Signed-off-by: Eric Blake <eblake@redhat.com>
> 
>>> +++ b/nbd/server.c
>>> @@ -55,6 +55,8 @@ static int system_errno_to_nbd_errno(int err)
>>>            return NBD_ENOSPC;
>>>        case EOVERFLOW:
>>>            return NBD_EOVERFLOW;
>>> +    case ENOTSUP:
>>> +        return NBD_ENOTSUP;
>>
>> This may provoke returning NBD_ENOTSUP in other cases, not only new one we are going to add.
> 
> Correct. But the spec only said SHOULD avoid ENOTSUP in those other
> cases, not MUST avoid ENOTSUP; and in practice, either the client that
> is not suspecting it will treat it the same as NBD_EINVAL, or we've
> managed to get a slightly better error message across than normal.  I
> don't see that as a real show-stopper.
> 
> But if it bothers you,

No, I think it doesn't. OK, ENOTSUP is ENOTSUP anyway, it shouldn't be treated incorrectly.

> note that in nbdkit, I actually coded things up
> to refuse to send NBD_EOVERFLOW unless NBD_CMD_FLAG_DF was set, and to
> refuse to send NBD_ENOTSUP unless NBD_CMD_FLAG_FAST_ZERO was set. I
> could copy that behavior here, if we want qemu to be that much stricter
> as a server.
> 
> (Note to self: also check if, when using structured replies to send
> error messages, in addition to the code, if the string contains
> strerror(errno) from BEFORE the mapping, rather than after we've lost
> information to the more-limited NBD_E* values)
>
Vladimir Sementsov-Ogievskiy Aug. 31, 2019, 8:20 a.m. UTC | #4
23.08.2019 17:37, Eric Blake wrote:
> Commit fe0480d6 and friends added BDRV_REQ_NO_FALLBACK as a way to
> avoid wasting time on a preliminary write-zero request that will later
> be rewritten by actual data, if it is known that the write-zero
> request will use a slow fallback; but in doing so, could not optimize
> for NBD.  The NBD specification is now considering an extension that
> will allow passing on those semantics; this patch updates the new
> protocol bits and 'qemu-nbd --list' output to recognize the bit, as
> well as the new errno value possible when using the new flag; while
> upcoming patches will improve the client to use the feature when
> present, and the server to advertise support for it.
> 
> Signed-off-by: Eric Blake<eblake@redhat.com>

Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Eric Blake Sept. 3, 2019, 6:49 p.m. UTC | #5
On 8/30/19 1:07 PM, Vladimir Sementsov-Ogievskiy wrote:
> 23.08.2019 17:37, Eric Blake wrote:
>> Commit fe0480d6 and friends added BDRV_REQ_NO_FALLBACK as a way to
>> avoid wasting time on a preliminary write-zero request that will later
>> be rewritten by actual data, if it is known that the write-zero
>> request will use a slow fallback; but in doing so, could not optimize
>> for NBD.  The NBD specification is now considering an extension that
>> will allow passing on those semantics; this patch updates the new
>> protocol bits and 'qemu-nbd --list' output to recognize the bit, as
>> well as the new errno value possible when using the new flag; while
>> upcoming patches will improve the client to use the feature when
>> present, and the server to advertise support for it.
>>

>> +++ b/nbd/server.c
>> @@ -55,6 +55,8 @@ static int system_errno_to_nbd_errno(int err)
>>           return NBD_ENOSPC;
>>       case EOVERFLOW:
>>           return NBD_EOVERFLOW;
>> +    case ENOTSUP:
>> +        return NBD_ENOTSUP;
> 

I'm squashing this in:

diff --git i/nbd/server.c w/nbd/server.c
index b3bd08ef2953..4992148de1c4 100644
--- i/nbd/server.c
+++ w/nbd/server.c
@@ -56,6 +56,9 @@ static int system_errno_to_nbd_errno(int err)
     case EOVERFLOW:
         return NBD_EOVERFLOW;
     case ENOTSUP:
+#if ENOTSUP != EOPNOTSUPP
+    case EOPNOTSUPP:
+#endif
         return NBD_ENOTSUP;
     case ESHUTDOWN:
         return NBD_ESHUTDOWN;

It makes no difference on Linux, but may matter on other platforms
(since POSIX says the two may be equivalent, but may also be distinct).

>
diff mbox series

Patch

diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt
index 6dfec7f47647..45118809618e 100644
--- a/docs/interop/nbd.txt
+++ b/docs/interop/nbd.txt
@@ -53,4 +53,5 @@  the operation of that feature.
 * 2.12: NBD_CMD_BLOCK_STATUS for "base:allocation"
 * 3.0: NBD_OPT_STARTTLS with TLS Pre-Shared Keys (PSK),
 NBD_CMD_BLOCK_STATUS for "qemu:dirty-bitmap:", NBD_CMD_CACHE
-* 4.2: NBD_FLAG_CAN_MULTI_CONN for sharable read-only exports
+* 4.2: NBD_FLAG_CAN_MULTI_CONN for sharable read-only exports,
+NBD_CMD_FLAG_FAST_ZERO
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 2c87b42dfd48..21550747cf35 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -140,6 +140,7 @@  enum {
     NBD_FLAG_CAN_MULTI_CONN_BIT     =  8, /* Multi-client cache consistent */
     NBD_FLAG_SEND_RESIZE_BIT        =  9, /* Send resize */
     NBD_FLAG_SEND_CACHE_BIT         = 10, /* Send CACHE (prefetch) */
+    NBD_FLAG_SEND_FAST_ZERO_BIT     = 11, /* FAST_ZERO flag for WRITE_ZEROES */
 };

 #define NBD_FLAG_HAS_FLAGS         (1 << NBD_FLAG_HAS_FLAGS_BIT)
@@ -153,6 +154,7 @@  enum {
 #define NBD_FLAG_CAN_MULTI_CONN    (1 << NBD_FLAG_CAN_MULTI_CONN_BIT)
 #define NBD_FLAG_SEND_RESIZE       (1 << NBD_FLAG_SEND_RESIZE_BIT)
 #define NBD_FLAG_SEND_CACHE        (1 << NBD_FLAG_SEND_CACHE_BIT)
+#define NBD_FLAG_SEND_FAST_ZERO    (1 << NBD_FLAG_SEND_FAST_ZERO_BIT)

 /* New-style handshake (global) flags, sent from server to client, and
    control what will happen during handshake phase. */
@@ -205,6 +207,7 @@  enum {
 #define NBD_CMD_FLAG_DF         (1 << 2) /* don't fragment structured read */
 #define NBD_CMD_FLAG_REQ_ONE    (1 << 3) /* only one extent in BLOCK_STATUS
                                           * reply chunk */
+#define NBD_CMD_FLAG_FAST_ZERO  (1 << 4) /* fail if WRITE_ZEROES is not fast */

 /* Supported request types */
 enum {
@@ -270,6 +273,7 @@  static inline bool nbd_reply_type_is_error(int type)
 #define NBD_EINVAL     22
 #define NBD_ENOSPC     28
 #define NBD_EOVERFLOW  75
+#define NBD_ENOTSUP    95
 #define NBD_ESHUTDOWN  108

 /* Details collected by NBD_OPT_EXPORT_NAME and NBD_OPT_GO */
diff --git a/nbd/common.c b/nbd/common.c
index cc8b278e541d..ddfe7d118371 100644
--- a/nbd/common.c
+++ b/nbd/common.c
@@ -201,6 +201,8 @@  const char *nbd_err_lookup(int err)
         return "ENOSPC";
     case NBD_EOVERFLOW:
         return "EOVERFLOW";
+    case NBD_ENOTSUP:
+        return "ENOTSUP";
     case NBD_ESHUTDOWN:
         return "ESHUTDOWN";
     default:
@@ -231,6 +233,9 @@  int nbd_errno_to_system_errno(int err)
     case NBD_EOVERFLOW:
         ret = EOVERFLOW;
         break;
+    case NBD_ENOTSUP:
+        ret = ENOTSUP;
+        break;
     case NBD_ESHUTDOWN:
         ret = ESHUTDOWN;
         break;
diff --git a/nbd/server.c b/nbd/server.c
index b5577828aa44..981bc3cb1151 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -55,6 +55,8 @@  static int system_errno_to_nbd_errno(int err)
         return NBD_ENOSPC;
     case EOVERFLOW:
         return NBD_EOVERFLOW;
+    case ENOTSUP:
+        return NBD_ENOTSUP;
     case ESHUTDOWN:
         return NBD_ESHUTDOWN;
     case EINVAL:
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 079702bb837f..dce52f564b5a 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -294,6 +294,7 @@  static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls,
                 [NBD_FLAG_CAN_MULTI_CONN_BIT]       = "multi",
                 [NBD_FLAG_SEND_RESIZE_BIT]          = "resize",
                 [NBD_FLAG_SEND_CACHE_BIT]           = "cache",
+                [NBD_FLAG_SEND_FAST_ZERO_BIT]       = "fast-zero",
             };

             printf("  size:  %" PRIu64 "\n", list[i].size);