Patchwork [v3,1/8] block: add bdrv_add_before_write_cb()

login
register
mail settings
Submitter Stefan Hajnoczi
Date May 15, 2013, 2:34 p.m.
Message ID <1368628476-19622-2-git-send-email-stefanha@redhat.com>
Download mbox | patch
Permalink /patch/244097/
State New
Headers show

Comments

Stefan Hajnoczi - May 15, 2013, 2:34 p.m.
The bdrv_add_before_write_cb() function installs a callback that is
invoked before a write request is processed.  This will be used to
implement copy-on-write point-in-time snapshots where we need to copy
out old data before overwriting it.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block.c                   | 37 +++++++++++++++++++++++++++++++++++++
 include/block/block_int.h | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)
Paolo Bonzini - May 15, 2013, 2:42 p.m.
Il 15/05/2013 16:34, Stefan Hajnoczi ha scritto:
> The bdrv_add_before_write_cb() function installs a callback that is
> invoked before a write request is processed.  This will be used to
> implement copy-on-write point-in-time snapshots where we need to copy
> out old data before overwriting it.

Perhaps a notifier list that receives the BdrvTrackedRequest?  (BTW we
should probably remove all the notifier_remove wrappers, they're useless).

The BdrvTrackedRequest pointer would also act as a unique id of the request.

> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  block.c                   | 37 +++++++++++++++++++++++++++++++++++++
>  include/block/block_int.h | 32 ++++++++++++++++++++++++++++++++
>  2 files changed, 69 insertions(+)
> 
> diff --git a/block.c b/block.c
> index 3f87489..0fd7167 100644
> --- a/block.c
> +++ b/block.c
> @@ -308,6 +308,7 @@ BlockDriverState *bdrv_new(const char *device_name)
>      }
>      bdrv_iostatus_disable(bs);
>      notifier_list_init(&bs->close_notifiers);
> +    QTAILQ_INIT(&bs->before_write_cbs);
>  
>      return bs;
>  }
> @@ -1383,6 +1384,8 @@ void bdrv_close(BlockDriverState *bs)
>          bs->growable = 0;
>          QDECREF(bs->options);
>          bs->options = NULL;
> +        assert(QTAILQ_EMPTY(&bs->before_write_cbs));
> +        QTAILQ_INIT(&bs->before_write_cbs);

INIT not needed if you assert before.

Paolo

>  
>          if (bs->file != NULL) {
>              bdrv_delete(bs->file);
> @@ -2587,6 +2590,22 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
>      return ret;
>  }
>  
> +struct BDRVBeforeWrite {
> +    BDRVBeforeWriteFunc *cb;
> +    QTAILQ_ENTRY(BDRVBeforeWrite) list;
> +};
> +
> +static void invoke_before_write_cb(BlockDriverState *bs, int64_t sector_num,
> +                                   int nb_sectors, QEMUIOVector *qiov)
> +{
> +    BDRVBeforeWrite *before_write;
> +    BDRVBeforeWrite *tmp;
> +    QTAILQ_FOREACH_SAFE(before_write, &bs->before_write_cbs, list, tmp) {
> +        before_write->cb(bs, sector_num, nb_sectors, qiov);
> +    }
> +}
> +
> +
>  /*
>   * Handle a write request in coroutine context
>   */
> @@ -2619,6 +2638,8 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
>  
>      tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
>  
> +    invoke_before_write_cb(bs, sector_num, nb_sectors, qiov);
> +
>      if (flags & BDRV_REQ_ZERO_WRITE) {
>          ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
>      } else {
> @@ -4883,3 +4904,19 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs)
>      /* Currently BlockDriverState always uses the main loop AioContext */
>      return qemu_get_aio_context();
>  }
> +
> +BDRVBeforeWrite *bdrv_add_before_write_cb(BlockDriverState *bs,
> +                                          BDRVBeforeWriteFunc *cb)
> +{
> +    BDRVBeforeWrite *elem = g_slice_new(BDRVBeforeWrite);
> +    elem->cb = cb;
> +    QTAILQ_INSERT_TAIL(&bs->before_write_cbs, elem, list);
> +    return elem;
> +}
> +
> +void bdrv_remove_before_write_cb(BlockDriverState *bs,
> +                                 BDRVBeforeWrite *before_write)
> +{
> +    QTAILQ_REMOVE(&bs->before_write_cbs, before_write, list);
> +    g_slice_free(BDRVBeforeWrite, before_write);
> +}
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 6078dd3..e2299df 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -211,6 +211,16 @@ struct BlockDriver {
>      QLIST_ENTRY(BlockDriver) list;
>  };
>  
> +/**
> + * BDRVBeforeWriteFunc:
> + *
> + * See #bdrv_add_before_write_cb().
> + */
> +typedef void coroutine_fn BDRVBeforeWriteFunc(BlockDriverState *bs,
> +        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
> +
> +typedef struct BDRVBeforeWrite BDRVBeforeWrite;
> +
>  /*
>   * Note: the function bdrv_append() copies and swaps contents of
>   * BlockDriverStates, so if you add new fields to this struct, please
> @@ -289,6 +299,9 @@ struct BlockDriverState {
>      /* long-running background operation */
>      BlockJob *job;
>  
> +    /* Callback before write request is processed */
> +    QTAILQ_HEAD(, BDRVBeforeWrite) before_write_cbs;
> +
>      QDict *options;
>  };
>  
> @@ -298,6 +311,25 @@ void bdrv_set_io_limits(BlockDriverState *bs,
>                          BlockIOLimit *io_limits);
>  
>  /**
> + * bdrv_add_before_write_cb:
> + *
> + * Register a callback that is invoked before write requests are processed but
> + * after any throttling or waiting for overlapping requests.
> + *
> + * Returns: a #BDRVBeforeWrite to use with bdrv_remove_before_write_cb()
> + */
> +BDRVBeforeWrite *bdrv_add_before_write_cb(BlockDriverState *bs,
> +                                          BDRVBeforeWriteFunc *cb);
> +
> +/**
> + * bdrv_remove_before_write_cb:
> + *
> + * Unregister a before write callback.
> + */
> +void bdrv_remove_before_write_cb(BlockDriverState *bs,
> +                                 BDRVBeforeWrite *before_write);
> +
> +/**
>   * bdrv_get_aio_context:
>   *
>   * Returns: the currently bound #AioContext
>
Wayne Xia - May 16, 2013, 2:42 a.m.
Reviewed the code, except Paolo's comments, function seems fine.

> Il 15/05/2013 16:34, Stefan Hajnoczi ha scritto:
>> The bdrv_add_before_write_cb() function installs a callback that is
>> invoked before a write request is processed.  This will be used to
>> implement copy-on-write point-in-time snapshots where we need to copy
>> out old data before overwriting it.
>
> Perhaps a notifier list that receives the BdrvTrackedRequest?  (BTW we
> should probably remove all the notifier_remove wrappers, they're useless).
>
> The BdrvTrackedRequest pointer would also act as a unique id of the request.
>
>> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
>> ---
>>   block.c                   | 37 +++++++++++++++++++++++++++++++++++++
>>   include/block/block_int.h | 32 ++++++++++++++++++++++++++++++++
>>   2 files changed, 69 insertions(+)
>>
>> diff --git a/block.c b/block.c
>> index 3f87489..0fd7167 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -308,6 +308,7 @@ BlockDriverState *bdrv_new(const char *device_name)
>>       }
>>       bdrv_iostatus_disable(bs);
>>       notifier_list_init(&bs->close_notifiers);
>> +    QTAILQ_INIT(&bs->before_write_cbs);
>>
>>       return bs;
>>   }
>> @@ -1383,6 +1384,8 @@ void bdrv_close(BlockDriverState *bs)
>>           bs->growable = 0;
>>           QDECREF(bs->options);
>>           bs->options = NULL;
>> +        assert(QTAILQ_EMPTY(&bs->before_write_cbs));
>> +        QTAILQ_INIT(&bs->before_write_cbs);
>
> INIT not needed if you assert before.
>
> Paolo
>
Stefan Hajnoczi - May 16, 2013, 8:11 a.m.
On Wed, May 15, 2013 at 04:42:57PM +0200, Paolo Bonzini wrote:
> Il 15/05/2013 16:34, Stefan Hajnoczi ha scritto:
> > The bdrv_add_before_write_cb() function installs a callback that is
> > invoked before a write request is processed.  This will be used to
> > implement copy-on-write point-in-time snapshots where we need to copy
> > out old data before overwriting it.
> 
> Perhaps a notifier list that receives the BdrvTrackedRequest?  (BTW we
> should probably remove all the notifier_remove wrappers, they're useless).

Nice idea, done in v4.  I originally rejected NotifierList because it
only has a void * argument, but BdrvRequest has the information we need.

Patch

diff --git a/block.c b/block.c
index 3f87489..0fd7167 100644
--- a/block.c
+++ b/block.c
@@ -308,6 +308,7 @@  BlockDriverState *bdrv_new(const char *device_name)
     }
     bdrv_iostatus_disable(bs);
     notifier_list_init(&bs->close_notifiers);
+    QTAILQ_INIT(&bs->before_write_cbs);
 
     return bs;
 }
@@ -1383,6 +1384,8 @@  void bdrv_close(BlockDriverState *bs)
         bs->growable = 0;
         QDECREF(bs->options);
         bs->options = NULL;
+        assert(QTAILQ_EMPTY(&bs->before_write_cbs));
+        QTAILQ_INIT(&bs->before_write_cbs);
 
         if (bs->file != NULL) {
             bdrv_delete(bs->file);
@@ -2587,6 +2590,22 @@  static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
     return ret;
 }
 
+struct BDRVBeforeWrite {
+    BDRVBeforeWriteFunc *cb;
+    QTAILQ_ENTRY(BDRVBeforeWrite) list;
+};
+
+static void invoke_before_write_cb(BlockDriverState *bs, int64_t sector_num,
+                                   int nb_sectors, QEMUIOVector *qiov)
+{
+    BDRVBeforeWrite *before_write;
+    BDRVBeforeWrite *tmp;
+    QTAILQ_FOREACH_SAFE(before_write, &bs->before_write_cbs, list, tmp) {
+        before_write->cb(bs, sector_num, nb_sectors, qiov);
+    }
+}
+
+
 /*
  * Handle a write request in coroutine context
  */
@@ -2619,6 +2638,8 @@  static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
 
     tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
 
+    invoke_before_write_cb(bs, sector_num, nb_sectors, qiov);
+
     if (flags & BDRV_REQ_ZERO_WRITE) {
         ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
     } else {
@@ -4883,3 +4904,19 @@  AioContext *bdrv_get_aio_context(BlockDriverState *bs)
     /* Currently BlockDriverState always uses the main loop AioContext */
     return qemu_get_aio_context();
 }
+
+BDRVBeforeWrite *bdrv_add_before_write_cb(BlockDriverState *bs,
+                                          BDRVBeforeWriteFunc *cb)
+{
+    BDRVBeforeWrite *elem = g_slice_new(BDRVBeforeWrite);
+    elem->cb = cb;
+    QTAILQ_INSERT_TAIL(&bs->before_write_cbs, elem, list);
+    return elem;
+}
+
+void bdrv_remove_before_write_cb(BlockDriverState *bs,
+                                 BDRVBeforeWrite *before_write)
+{
+    QTAILQ_REMOVE(&bs->before_write_cbs, before_write, list);
+    g_slice_free(BDRVBeforeWrite, before_write);
+}
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 6078dd3..e2299df 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -211,6 +211,16 @@  struct BlockDriver {
     QLIST_ENTRY(BlockDriver) list;
 };
 
+/**
+ * BDRVBeforeWriteFunc:
+ *
+ * See #bdrv_add_before_write_cb().
+ */
+typedef void coroutine_fn BDRVBeforeWriteFunc(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+
+typedef struct BDRVBeforeWrite BDRVBeforeWrite;
+
 /*
  * Note: the function bdrv_append() copies and swaps contents of
  * BlockDriverStates, so if you add new fields to this struct, please
@@ -289,6 +299,9 @@  struct BlockDriverState {
     /* long-running background operation */
     BlockJob *job;
 
+    /* Callback before write request is processed */
+    QTAILQ_HEAD(, BDRVBeforeWrite) before_write_cbs;
+
     QDict *options;
 };
 
@@ -298,6 +311,25 @@  void bdrv_set_io_limits(BlockDriverState *bs,
                         BlockIOLimit *io_limits);
 
 /**
+ * bdrv_add_before_write_cb:
+ *
+ * Register a callback that is invoked before write requests are processed but
+ * after any throttling or waiting for overlapping requests.
+ *
+ * Returns: a #BDRVBeforeWrite to use with bdrv_remove_before_write_cb()
+ */
+BDRVBeforeWrite *bdrv_add_before_write_cb(BlockDriverState *bs,
+                                          BDRVBeforeWriteFunc *cb);
+
+/**
+ * bdrv_remove_before_write_cb:
+ *
+ * Unregister a before write callback.
+ */
+void bdrv_remove_before_write_cb(BlockDriverState *bs,
+                                 BDRVBeforeWrite *before_write);
+
+/**
  * bdrv_get_aio_context:
  *
  * Returns: the currently bound #AioContext