diff mbox series

[RFC,v2,1/7] block: Introduce API for copy offloading

Message ID 20180418030424.28980-2-famz@redhat.com
State New
Headers show
Series qemu-img convert with copy offloading | expand

Commit Message

Fam Zheng April 18, 2018, 3:04 a.m. UTC
Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block/io.c                | 91 +++++++++++++++++++++++++++++++++++++++++++++++
 include/block/block.h     |  4 +++
 include/block/block_int.h | 30 ++++++++++++++++
 3 files changed, 125 insertions(+)

Comments

Stefan Hajnoczi April 26, 2018, 2:57 p.m. UTC | #1
On Wed, Apr 18, 2018 at 11:04:18AM +0800, Fam Zheng wrote:
> diff --git a/block/io.c b/block/io.c
> index bd9a19a9c4..d274e9525f 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -2826,3 +2826,94 @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host)
>          bdrv_unregister_buf(child->bs, host);
>      }
>  }
> +
> +static int bdrv_co_copy_range_internal(BdrvChild *src,

Please remember to use coroutine_fn for coroutines!  This applies to the
other functions in this patch too.

> +int bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
> +                       BdrvChild *dst, uint64_t dst_offset,
> +                       uint64_t bytes, BdrvRequestFlags flags)
> +{
> +    BdrvTrackedRequest src_req, dst_req;
> +    BlockDriverState *src_bs = src->bs;
> +    BlockDriverState *dst_bs = dst->bs;
> +    int ret;
> +
> +    bdrv_inc_in_flight(src_bs);
> +    bdrv_inc_in_flight(dst_bs);
> +    tracked_request_begin(&src_req, src_bs, src_offset,
> +                          bytes, BDRV_TRACKED_READ);
> +    tracked_request_begin(&dst_req, dst_bs, dst_offset,
> +                          bytes, BDRV_TRACKED_WRITE);

Tracked requests and in-flight counters are only updated on root nodes.
This is not how read/write works.  Does drain work on an internal or
leaf node with multiple parents?

> diff --git a/include/block/block.h b/include/block/block.h
> index cdec3639a3..72ac011b2b 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -604,4 +604,8 @@ bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
>   */
>  void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
>  void bdrv_unregister_buf(BlockDriverState *bs, void *host);
> +
> +int bdrv_co_copy_range(BdrvChild *bs, uint64_t offset,
> +                       BdrvChild *src, uint64_t src_offset,
> +                       uint64_t bytes, BdrvRequestFlags flags);

Please document this new block.h API.

These arguments are in the wrong order!  The first BdrvChild is the
source and the second is the destination.
Fam Zheng April 27, 2018, 5:52 a.m. UTC | #2
On Thu, 04/26 15:57, Stefan Hajnoczi wrote:
> On Wed, Apr 18, 2018 at 11:04:18AM +0800, Fam Zheng wrote:
> > diff --git a/block/io.c b/block/io.c
> > index bd9a19a9c4..d274e9525f 100644
> > --- a/block/io.c
> > +++ b/block/io.c
> > @@ -2826,3 +2826,94 @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host)
> >          bdrv_unregister_buf(child->bs, host);
> >      }
> >  }
> > +
> > +static int bdrv_co_copy_range_internal(BdrvChild *src,
> 
> Please remember to use coroutine_fn for coroutines!  This applies to the
> other functions in this patch too.

OK!

> 
> > +int bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
> > +                       BdrvChild *dst, uint64_t dst_offset,
> > +                       uint64_t bytes, BdrvRequestFlags flags)
> > +{
> > +    BdrvTrackedRequest src_req, dst_req;
> > +    BlockDriverState *src_bs = src->bs;
> > +    BlockDriverState *dst_bs = dst->bs;
> > +    int ret;
> > +
> > +    bdrv_inc_in_flight(src_bs);
> > +    bdrv_inc_in_flight(dst_bs);
> > +    tracked_request_begin(&src_req, src_bs, src_offset,
> > +                          bytes, BDRV_TRACKED_READ);
> > +    tracked_request_begin(&dst_req, dst_bs, dst_offset,
> > +                          bytes, BDRV_TRACKED_WRITE);
> 
> Tracked requests and in-flight counters are only updated on root nodes.
> This is not how read/write works.  Does drain work on an internal or
> leaf node with multiple parents?

Telling from how bdrv_do_drained_begin is implemented now (recursive both to
parents and children), I think it should work. But you are right this is
inconsistent with read/write code, it seems I could move this it to
bdrv_co_copy_range_internal.

> 
> > diff --git a/include/block/block.h b/include/block/block.h
> > index cdec3639a3..72ac011b2b 100644
> > --- a/include/block/block.h
> > +++ b/include/block/block.h
> > @@ -604,4 +604,8 @@ bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
> >   */
> >  void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
> >  void bdrv_unregister_buf(BlockDriverState *bs, void *host);
> > +
> > +int bdrv_co_copy_range(BdrvChild *bs, uint64_t offset,
> > +                       BdrvChild *src, uint64_t src_offset,
> > +                       uint64_t bytes, BdrvRequestFlags flags);
> 
> Please document this new block.h API.

OK.

> 
> These arguments are in the wrong order!  The first BdrvChild is the
> source and the second is the destination.

Right, will fix.

Fam
diff mbox series

Patch

diff --git a/block/io.c b/block/io.c
index bd9a19a9c4..d274e9525f 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2826,3 +2826,94 @@  void bdrv_unregister_buf(BlockDriverState *bs, void *host)
         bdrv_unregister_buf(child->bs, host);
     }
 }
+
+static int bdrv_co_copy_range_internal(BdrvChild *src,
+                                       uint64_t src_offset,
+                                       BdrvChild *dst,
+                                       uint64_t dst_offset,
+                                       uint64_t bytes, BdrvRequestFlags flags,
+                                       bool recurse_src)
+{
+    int ret;
+
+    if (!src || !dst || !src->bs || !dst->bs) {
+        return -ENOMEDIUM;
+    }
+    ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
+    if (ret) {
+        return ret;
+    }
+
+    ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes);
+    if (ret) {
+        return ret;
+    }
+    if (flags & BDRV_REQ_ZERO_WRITE) {
+        return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, flags);
+    }
+
+    if (!src->bs->drv->bdrv_co_copy_range_from
+        || !dst->bs->drv->bdrv_co_copy_range_to
+        || src->bs->encrypted || dst->bs->encrypted) {
+        return -ENOTSUP;
+    }
+    if (recurse_src) {
+        return src->bs->drv->bdrv_co_copy_range_from(src->bs,
+                                                     src, src_offset,
+                                                     dst, dst_offset,
+                                                     bytes, flags);
+    } else {
+        return dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
+                                                   src, src_offset,
+                                                   dst, dst_offset,
+                                                   bytes, flags);
+    }
+}
+
+/* Copy range from @bs to @dst. */
+int bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
+                            BdrvChild *dst, uint64_t dst_offset,
+                            uint64_t bytes, BdrvRequestFlags flags)
+{
+    return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
+                                       bytes, flags, true);
+}
+
+/* Copy range from @src to @bs. Should only be called by block drivers when @bs
+ * is the leaf. */
+int bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
+                          BdrvChild *dst, uint64_t dst_offset,
+                          uint64_t bytes, BdrvRequestFlags flags)
+{
+    return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
+                                       bytes, flags, false);
+}
+
+int bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
+                       BdrvChild *dst, uint64_t dst_offset,
+                       uint64_t bytes, BdrvRequestFlags flags)
+{
+    BdrvTrackedRequest src_req, dst_req;
+    BlockDriverState *src_bs = src->bs;
+    BlockDriverState *dst_bs = dst->bs;
+    int ret;
+
+    bdrv_inc_in_flight(src_bs);
+    bdrv_inc_in_flight(dst_bs);
+    tracked_request_begin(&src_req, src_bs, src_offset,
+                          bytes, BDRV_TRACKED_READ);
+    tracked_request_begin(&dst_req, dst_bs, dst_offset,
+                          bytes, BDRV_TRACKED_WRITE);
+
+    wait_serialising_requests(&src_req);
+    wait_serialising_requests(&dst_req);
+    ret = bdrv_co_copy_range_from(src, src_offset,
+                                  dst, dst_offset,
+                                  bytes, flags);
+
+    tracked_request_end(&src_req);
+    tracked_request_end(&dst_req);
+    bdrv_dec_in_flight(src_bs);
+    bdrv_dec_in_flight(dst_bs);
+    return ret;
+}
diff --git a/include/block/block.h b/include/block/block.h
index cdec3639a3..72ac011b2b 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -604,4 +604,8 @@  bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
  */
 void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
 void bdrv_unregister_buf(BlockDriverState *bs, void *host);
+
+int bdrv_co_copy_range(BdrvChild *bs, uint64_t offset,
+                       BdrvChild *src, uint64_t src_offset,
+                       uint64_t bytes, BdrvRequestFlags flags);
 #endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index c4dd1d4bb8..305c29b75e 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -206,6 +206,29 @@  struct BlockDriver {
     int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
         int64_t offset, int bytes);
 
+    /* Map [offset, offset + nbytes) range onto a child of @bs to copy from,
+     * and invoke bdrv_co_copy_range_from(child, ...), or invoke
+     * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from.
+     */
+    int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs,
+                                                BdrvChild *src,
+                                                uint64_t offset,
+                                                BdrvChild *dst,
+                                                uint64_t dst_offset,
+                                                uint64_t bytes, BdrvRequestFlags flags);
+
+    /* Map [offset, offset + nbytes) range onto a child of bs to copy data to,
+     * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy
+     * operation if @bs is the leaf and @src has the same BlockDriver.  Return
+     * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver.
+     */
+    int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs,
+                                              BdrvChild *src,
+                                              uint64_t src_offset,
+                                              BdrvChild *dst,
+                                              uint64_t dst_offset,
+                                              uint64_t bytes, BdrvRequestFlags flags);
+
     /*
      * Building block for bdrv_block_status[_above] and
      * bdrv_is_allocated[_above].  The driver should answer only
@@ -1090,4 +1113,11 @@  void bdrv_dec_in_flight(BlockDriverState *bs);
 
 void blockdev_close_all_bdrv_states(void);
 
+int bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
+                            BdrvChild *dst, uint64_t dst_offset,
+                            uint64_t bytes, BdrvRequestFlags flags);
+int bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
+                            BdrvChild *dst, uint64_t dst_offset,
+                            uint64_t bytes, BdrvRequestFlags flags);
+
 #endif /* BLOCK_INT_H */