diff mbox series

[3/4] nbd/server: implement dirty bitmap export

Message ID 20180321121940.39426-4-vsementsov@virtuozzo.com
State New
Headers show
Series NBD export bitmaps | expand

Commit Message

Vladimir Sementsov-Ogievskiy March 21, 2018, 12:19 p.m. UTC
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 include/block/nbd.h |   2 +
 nbd/server.c        | 207 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 203 insertions(+), 6 deletions(-)

Comments

Eric Blake March 21, 2018, 4:57 p.m. UTC | #1
On 03/21/2018 07:19 AM, Vladimir Sementsov-Ogievskiy wrote:
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>

Rather sparse on the details in the commit message; I had to read the 
patch to even learn what the new namespace is.

> ---
>   include/block/nbd.h |   2 +
>   nbd/server.c        | 207 ++++++++++++++++++++++++++++++++++++++++++++++++++--
>   2 files changed, 203 insertions(+), 6 deletions(-)
> 

> +++ b/nbd/server.c
> @@ -23,6 +23,12 @@
>   #include "nbd-internal.h"
>   
>   #define NBD_META_ID_BASE_ALLOCATION 0
> +#define NBD_META_ID_DIRTY_BITMAP 1
> +
> +#define NBD_MAX_BITMAP_EXTENTS (0x100000 / 8) /* 1 mb of extents data */
> +#define MAX_EXTENT_LENGTH QEMU_ALIGN_DOWN(INT32_MAX, 512)

Worth comments on these two limits?

> +
> +#define NBD_STATE_DIRTY 1

Does this belong better in nbd.h, alongside NBD_STATE_HOLE?  (And 
defining it as (1 << 0) might be nicer, too).

>   
>   static int system_errno_to_nbd_errno(int err)
>   {
> @@ -80,6 +86,9 @@ struct NBDExport {
>   
>       BlockBackend *eject_notifier_blk;
>       Notifier eject_notifier;
> +
> +    BdrvDirtyBitmap *export_bitmap;
> +    char *export_bitmap_name;
>   };
>   
>   static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
> @@ -92,6 +101,7 @@ typedef struct NBDExportMetaContexts {
>       bool valid; /* means that negotiation of the option finished without
>                      errors */
>       bool base_allocation; /* export base:allocation context (block status) */
> +    bool dirty_bitmap; /* export qemu-dirty-bitmap:<export_bitmap_name> */

That's a LONG namespace name, and it does not lend itself well to other 
qemu extensions; so future qemu BLOCK_STATUS additions would require 
consuming yet another namespace and additional upstream NBD 
registration.  Wouldn't it be better to have the namespace be 'qemu:' 
(which we register with upstream NBD just once), where we are then free 
to document leafnames to look like 'dirty-bitmap:name' or 'foo:bar'?

>   } NBDExportMetaContexts;
>   
>   struct NBDClient {
> @@ -786,12 +796,32 @@ static int nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta,
>                                    &meta->base_allocation, errp);
>   }
>   
> +/* nbd_meta_bitmap_query
> + *
> + * Handle query to 'qemu-dirty-bitmap' namespace.
> + * 'len' is the amount of text remaining to be read from the current name, after
> + * the 'qemu-dirty-bitmap:' portion has been stripped.
> + *
> + * Return -errno on I/O error, 0 if option was completely handled by
> + * sending a reply about inconsistent lengths, or 1 on success. */
> +static int nbd_meta_bitmap_query(NBDClient *client, NBDExportMetaContexts *meta,
> +                                 uint32_t len, Error **errp)
> +{
> +    if (!client->exp->export_bitmap) {
> +        return nbd_opt_skip(client, len, errp);
> +    }
> +
> +    return nbd_meta_single_query(client, client->exp->export_bitmap_name, len,
> +                                 &meta->dirty_bitmap, errp);

So if I'm reading this right, a client can do _LIST_ 
'qemu-dirty-bitmap:' (or 'qemu:dirty-bitmap:' if we choose a shorter 
initial namespace) and get back the exported bitmap name; or the user 
already knows the bitmap name and both _LIST_ and _SET_ 
'qemu-dirty-bitmap:name' return the one supported bitmap for that NBD 
server.


>   /* nbd_co_send_extents
> + *
> + * NBD_REPLY_FLAG_DONE is not set, don't forget to send it.
> + *
>    * @extents should be in big-endian */
>   static int nbd_co_send_extents(NBDClient *client, uint64_t handle,
>                                  NBDExtent *extents, unsigned nb_extents,

Worth a bool flag that the caller can inform this function whether to 
include FLAG_DONE?
> +
> +static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle,
> +                              BdrvDirtyBitmap *bitmap, uint64_t offset,
> +                              uint64_t length, uint32_t context_id,
> +                              Error **errp)
> +{
> +    int ret;
> +    unsigned nb_extents;
> +    NBDExtent *extents = g_new(NBDExtent, NBD_MAX_BITMAP_EXTENTS);

Is this correct even when the client used NBD_CMD_FLAG_REQ_ONE?
Vladimir Sementsov-Ogievskiy March 22, 2018, 3:26 p.m. UTC | #2
21.03.2018 19:57, Eric Blake wrote:
> On 03/21/2018 07:19 AM, Vladimir Sementsov-Ogievskiy wrote:
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>
> Rather sparse on the details in the commit message; I had to read the 
> patch to even learn what the new namespace is.

oh, yes, sorry :(

>> @@ -92,6 +101,7 @@ typedef struct NBDExportMetaContexts {
>>       bool valid; /* means that negotiation of the option finished 
>> without
>>                      errors */
>>       bool base_allocation; /* export base:allocation context (block 
>> status) */
>> +    bool dirty_bitmap; /* export 
>> qemu-dirty-bitmap:<export_bitmap_name> */
>
> That's a LONG namespace name, and it does not lend itself well to 
> other qemu extensions; so future qemu BLOCK_STATUS additions would 
> require consuming yet another namespace and additional upstream NBD 
> registration.  Wouldn't it be better to have the namespace be 'qemu:' 
> (which we register with upstream NBD just once), where we are then 
> free to document leafnames to look like 'dirty-bitmap:name' or 'foo:bar'?

No doubts. (and this shows, that we may want namespaces in namespaces, 
so we'll parse ':' anyway).

Only one note here (I'm ashamed): 'qemu-dirty-bitmap' namespace is used 
in Virtuozzo for the feature, yes, without 'x-' prefix. It's my fault, 
and it should not influence final solution. The probability of problems 
is near to zero (especially if we decide now to use 'qemu:' namespace 
for all qemu-specific things. But I'm not sure about, should we mention 
this fact in the spec.
[cc NBD list]
Vladimir Sementsov-Ogievskiy March 22, 2018, 3:32 p.m. UTC | #3
21.03.2018 19:57, Eric Blake wrote:
> On 03/21/2018 07:19 AM, Vladimir Sementsov-Ogievskiy wrote:
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>

[...]

>
>> +
>> +#define NBD_STATE_DIRTY 1
>
> Does this belong better in nbd.h, alongside NBD_STATE_HOLE?  (And 
> defining it as (1 << 0) might be nicer, too).

It's not used anywhere else, but it may be worth moving, for consistency 
and for future users. Ok, I'll move.


[...]

>>   +/* nbd_meta_bitmap_query
>> + *
>> + * Handle query to 'qemu-dirty-bitmap' namespace.
>> + * 'len' is the amount of text remaining to be read from the current 
>> name, after
>> + * the 'qemu-dirty-bitmap:' portion has been stripped.
>> + *
>> + * Return -errno on I/O error, 0 if option was completely handled by
>> + * sending a reply about inconsistent lengths, or 1 on success. */
>> +static int nbd_meta_bitmap_query(NBDClient *client, 
>> NBDExportMetaContexts *meta,
>> +                                 uint32_t len, Error **errp)
>> +{
>> +    if (!client->exp->export_bitmap) {
>> +        return nbd_opt_skip(client, len, errp);
>> +    }
>> +
>> +    return nbd_meta_single_query(client, 
>> client->exp->export_bitmap_name, len,
>> +                                 &meta->dirty_bitmap, errp);
>
> So if I'm reading this right, a client can do _LIST_ 
> 'qemu-dirty-bitmap:' (or 'qemu:dirty-bitmap:' if we choose a shorter 
> initial namespace) and get back the exported bitmap name; or the user 
> already knows the bitmap name and both _LIST_ and _SET_ 
> 'qemu-dirty-bitmap:name' return the one supported bitmap for that NBD 
> server.

yes

>
>
>>   /* nbd_co_send_extents
>> + *
>> + * NBD_REPLY_FLAG_DONE is not set, don't forget to send it.
>> + *
>>    * @extents should be in big-endian */
>>   static int nbd_co_send_extents(NBDClient *client, uint64_t handle,
>>                                  NBDExtent *extents, unsigned 
>> nb_extents,
>
> Worth a bool flag that the caller can inform this function whether to 
> include FLAG_DONE?

It was simpler to just send it separately, after all BLOCK_STATUS 
replies. So, I didn't need it.

>> +
>> +static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle,
>> +                              BdrvDirtyBitmap *bitmap, uint64_t offset,
>> +                              uint64_t length, uint32_t context_id,
>> +                              Error **errp)
>> +{
>> +    int ret;
>> +    unsigned nb_extents;
>> +    NBDExtent *extents = g_new(NBDExtent, NBD_MAX_BITMAP_EXTENTS);
>
> Is this correct even when the client used NBD_CMD_FLAG_REQ_ONE?
>

Oops, looks like a bug.
Vladimir Sementsov-Ogievskiy March 28, 2018, 10:08 a.m. UTC | #4
22.03.2018 18:26, Vladimir Sementsov-Ogievskiy wrote:
> 21.03.2018 19:57, Eric Blake wrote:
>> On 03/21/2018 07:19 AM, Vladimir Sementsov-Ogievskiy wrote:
>>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>>
>> Rather sparse on the details in the commit message; I had to read the 
>> patch to even learn what the new namespace is.
>
> oh, yes, sorry :(
>
>>> @@ -92,6 +101,7 @@ typedef struct NBDExportMetaContexts {
>>>       bool valid; /* means that negotiation of the option finished 
>>> without
>>>                      errors */
>>>       bool base_allocation; /* export base:allocation context (block 
>>> status) */
>>> +    bool dirty_bitmap; /* export 
>>> qemu-dirty-bitmap:<export_bitmap_name> */
>>
>> That's a LONG namespace name, and it does not lend itself well to 
>> other qemu extensions; so future qemu BLOCK_STATUS additions would 
>> require consuming yet another namespace and additional upstream NBD 
>> registration.  Wouldn't it be better to have the namespace be 'qemu:' 
>> (which we register with upstream NBD just once), where we are then 
>> free to document leafnames to look like 'dirty-bitmap:name' or 
>> 'foo:bar'?
>
> No doubts. (and this shows, that we may want namespaces in namespaces, 
> so we'll parse ':' anyway).
>
> Only one note here (I'm ashamed): 'qemu-dirty-bitmap' namespace is 
> used in Virtuozzo for the feature, yes, without 'x-' prefix. It's my 
> fault, and it should not influence final solution. The probability of 
> problems is near to zero (especially if we decide now to use 'qemu:' 
> namespace for all qemu-specific things. But I'm not sure about, should 
> we mention this fact in the spec.
> [cc NBD list]
>

Looks like at this point, we can safely move to qemu:dirty-bitmap:name, 
or any other thing, and we don't need to support my first variant.
diff mbox series

Patch

diff --git a/include/block/nbd.h b/include/block/nbd.h
index fcdcd54502..f0b459283f 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -315,6 +315,8 @@  void nbd_client_put(NBDClient *client);
 void nbd_server_start(SocketAddress *addr, const char *tls_creds,
                       Error **errp);
 
+void nbd_export_bitmap(NBDExport *exp, const char *bitmap,
+                       const char *bitmap_export_name, Error **errp);
 
 /* nbd_read
  * Reads @size bytes from @ioc. Returns 0 on success.
diff --git a/nbd/server.c b/nbd/server.c
index 8fe53ffd4b..6554919ef2 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -23,6 +23,12 @@ 
 #include "nbd-internal.h"
 
 #define NBD_META_ID_BASE_ALLOCATION 0
+#define NBD_META_ID_DIRTY_BITMAP 1
+
+#define NBD_MAX_BITMAP_EXTENTS (0x100000 / 8) /* 1 mb of extents data */
+#define MAX_EXTENT_LENGTH QEMU_ALIGN_DOWN(INT32_MAX, 512)
+
+#define NBD_STATE_DIRTY 1
 
 static int system_errno_to_nbd_errno(int err)
 {
@@ -80,6 +86,9 @@  struct NBDExport {
 
     BlockBackend *eject_notifier_blk;
     Notifier eject_notifier;
+
+    BdrvDirtyBitmap *export_bitmap;
+    char *export_bitmap_name;
 };
 
 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
@@ -92,6 +101,7 @@  typedef struct NBDExportMetaContexts {
     bool valid; /* means that negotiation of the option finished without
                    errors */
     bool base_allocation; /* export base:allocation context (block status) */
+    bool dirty_bitmap; /* export qemu-dirty-bitmap:<export_bitmap_name> */
 } NBDExportMetaContexts;
 
 struct NBDClient {
@@ -786,12 +796,32 @@  static int nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta,
                                  &meta->base_allocation, errp);
 }
 
+/* nbd_meta_bitmap_query
+ *
+ * Handle query to 'qemu-dirty-bitmap' namespace.
+ * 'len' is the amount of text remaining to be read from the current name, after
+ * the 'qemu-dirty-bitmap:' portion has been stripped.
+ *
+ * Return -errno on I/O error, 0 if option was completely handled by
+ * sending a reply about inconsistent lengths, or 1 on success. */
+static int nbd_meta_bitmap_query(NBDClient *client, NBDExportMetaContexts *meta,
+                                 uint32_t len, Error **errp)
+{
+    if (!client->exp->export_bitmap) {
+        return nbd_opt_skip(client, len, errp);
+    }
+
+    return nbd_meta_single_query(client, client->exp->export_bitmap_name, len,
+                                 &meta->dirty_bitmap, errp);
+}
+
 struct {
     const char *ns;
     int (*func)(NBDClient *, NBDExportMetaContexts *, uint32_t, Error **);
 } meta_namespace_handlers[] = {
     /* namespaces should go in non-decreasing order by name length */
     {.ns = "base:", .func = nbd_meta_base_query},
+    {.ns = "qemu-dirty-bitmap:", .func = nbd_meta_bitmap_query},
 };
 
 /* nbd_negotiate_meta_query
@@ -921,6 +951,17 @@  static int nbd_negotiate_meta_queries(NBDClient *client,
         }
     }
 
+    if (meta->dirty_bitmap) {
+        char *context = g_strdup_printf("qemu-dirty-bitmap:%s",
+                                        exp->export_bitmap_name);
+        ret = nbd_negotiate_send_meta_context(client, context,
+                                              NBD_META_ID_DIRTY_BITMAP,
+                                              errp);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
     ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
     if (ret == 0) {
         meta->valid = true;
@@ -1549,6 +1590,11 @@  void nbd_export_put(NBDExport *exp)
             exp->blk = NULL;
         }
 
+        if (exp->export_bitmap) {
+            bdrv_dirty_bitmap_set_qmp_locked(exp->export_bitmap, false);
+            g_free(exp->export_bitmap_name);
+        }
+
         g_free(exp);
     }
 }
@@ -1790,6 +1836,9 @@  static int blockstatus_to_extent_be(BlockDriverState *bs, uint64_t offset,
 }
 
 /* nbd_co_send_extents
+ *
+ * NBD_REPLY_FLAG_DONE is not set, don't forget to send it.
+ *
  * @extents should be in big-endian */
 static int nbd_co_send_extents(NBDClient *client, uint64_t handle,
                                NBDExtent *extents, unsigned nb_extents,
@@ -1802,7 +1851,7 @@  static int nbd_co_send_extents(NBDClient *client, uint64_t handle,
         {.iov_base = extents, .iov_len = nb_extents * sizeof(extents[0])}
     };
 
-    set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_BLOCK_STATUS,
+    set_be_chunk(&chunk.h, 0, NBD_REPLY_TYPE_BLOCK_STATUS,
                  handle, sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
     stl_be_p(&chunk.context_id, context_id);
 
@@ -1827,6 +1876,91 @@  static int nbd_co_send_block_status(NBDClient *client, uint64_t handle,
     return nbd_co_send_extents(client, handle, &extent, 1, context_id, errp);
 }
 
+/* Set several extents, describing region of given @length with given @flags.
+ * Do not set more than @nb_extents, return number of set extents.
+ */
+static unsigned add_extents(NBDExtent *extents, unsigned nb_extents,
+                            uint64_t length, uint32_t flags)
+{
+    unsigned i = 0;
+    uint32_t max_extent_be = cpu_to_be32(MAX_EXTENT_LENGTH);
+    uint32_t flags_be = cpu_to_be32(flags);
+
+    for (i = 0; i < nb_extents && length > MAX_EXTENT_LENGTH;
+         i++, length -= MAX_EXTENT_LENGTH)
+    {
+        extents[i].length = max_extent_be;
+        extents[i].flags = flags_be;
+    }
+
+    if (length > 0 && i < nb_extents) {
+        extents[i].length = cpu_to_be32(length);
+        extents[i].flags = flags_be;
+        i++;
+    }
+
+    return i;
+}
+
+static unsigned bitmap_to_extents(BdrvDirtyBitmap *bitmap, uint64_t offset,
+                                  uint64_t length, NBDExtent *extents,
+                                  unsigned nb_extents)
+{
+    uint64_t begin = offset, end;
+    uint64_t overall_end = offset + length;
+    unsigned i = 0;
+    BdrvDirtyBitmapIter *it;
+    bool dirty;
+
+    bdrv_dirty_bitmap_lock(bitmap);
+
+    it = bdrv_dirty_iter_new(bitmap);
+    dirty = bdrv_get_dirty_locked(NULL, bitmap, offset);
+
+    while (begin < overall_end && i < nb_extents) {
+        if (dirty) {
+            end = bdrv_dirty_bitmap_next_zero(bitmap, begin);
+        } else {
+            bdrv_set_dirty_iter(it, begin);
+            end = bdrv_dirty_iter_next(it);
+        }
+        if (end == -1) {
+            end = overall_end;
+        }
+
+        i += add_extents(extents + i, nb_extents - i, end - begin,
+                         dirty ? NBD_STATE_DIRTY : 0);
+        begin = end;
+        dirty = !dirty;
+    }
+
+    bdrv_dirty_iter_free(it);
+
+    bdrv_dirty_bitmap_unlock(bitmap);
+
+    return i;
+}
+
+static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle,
+                              BdrvDirtyBitmap *bitmap, uint64_t offset,
+                              uint64_t length, uint32_t context_id,
+                              Error **errp)
+{
+    int ret;
+    unsigned nb_extents;
+    NBDExtent *extents = g_new(NBDExtent, NBD_MAX_BITMAP_EXTENTS);
+
+    nb_extents = bitmap_to_extents(bitmap, offset, length, extents,
+                                   NBD_MAX_BITMAP_EXTENTS);
+
+    ret = nbd_co_send_extents(client, handle, extents, nb_extents, context_id,
+                              errp);
+
+    g_free(extents);
+
+    return ret;
+}
+
 /* nbd_co_receive_request
  * Collect a client request. Return 0 if request looks valid, -EIO to drop
  * connection right away, and any other negative value to report an error to
@@ -2040,11 +2174,32 @@  static coroutine_fn int nbd_handle_request(NBDClient *client,
                                       "discard failed", errp);
 
     case NBD_CMD_BLOCK_STATUS:
-        if (client->export_meta.valid && client->export_meta.base_allocation) {
-            return nbd_co_send_block_status(client, request->handle,
-                                            blk_bs(exp->blk), request->from,
-                                            request->len,
-                                            NBD_META_ID_BASE_ALLOCATION, errp);
+        if (client->export_meta.valid &&
+            (client->export_meta.base_allocation ||
+             client->export_meta.dirty_bitmap))
+        {
+            if (client->export_meta.base_allocation) {
+                ret = nbd_co_send_block_status(client, request->handle,
+                                               blk_bs(exp->blk), request->from,
+                                               request->len,
+                                               NBD_META_ID_BASE_ALLOCATION,
+                                               errp);
+                if (ret < 0) {
+                    return ret;
+                }
+            }
+
+            if (client->export_meta.dirty_bitmap) {
+                ret = nbd_co_send_bitmap(client, request->handle,
+                                         client->exp->export_bitmap,
+                                         request->from, request->len,
+                                         NBD_META_ID_DIRTY_BITMAP, errp);
+                if (ret < 0) {
+                    return ret;
+                }
+            }
+
+            return nbd_co_send_structured_done(client, request->handle, errp);
         } else {
             return nbd_send_generic_reply(client, request->handle, -EINVAL,
                                           "CMD_BLOCK_STATUS not negotiated",
@@ -2196,3 +2351,43 @@  void nbd_client_new(NBDExport *exp,
     co = qemu_coroutine_create(nbd_co_client_start, client);
     qemu_coroutine_enter(co);
 }
+
+void nbd_export_bitmap(NBDExport *exp, const char *bitmap,
+                       const char *bitmap_export_name, Error **errp)
+{
+    BdrvDirtyBitmap *bm = NULL;
+    BlockDriverState *bs = blk_bs(exp->blk);
+
+    if (exp->export_bitmap) {
+        error_setg(errp, "Export bitmap is already set");
+        return;
+    }
+
+    while (true) {
+        bm = bdrv_find_dirty_bitmap(bs, bitmap);
+        if (bm != NULL || bs->backing == NULL) {
+            break;
+        }
+
+        bs = bs->backing->bs;
+    }
+
+    if (bm == NULL) {
+        error_setg(errp, "Bitmap '%s' is not found", bitmap);
+        return;
+    }
+
+    if (bdrv_dirty_bitmap_enabled(bm)) {
+        error_setg(errp, "Bitmap '%s' is enabled", bitmap);
+        return;
+    }
+
+    if (bdrv_dirty_bitmap_qmp_locked(bm)) {
+        error_setg(errp, "Bitmap '%s' is locked", bitmap);
+        return;
+    }
+
+    bdrv_dirty_bitmap_set_qmp_locked(bm, true);
+    exp->export_bitmap = bm;
+    exp->export_bitmap_name = g_strdup(bitmap_export_name);
+}