Patchwork [3/6] block: switch bdrv_read()/bdrv_write() to coroutines

login
register
mail settings
Submitter Stefan Hajnoczi
Date Oct. 5, 2011, 4:17 p.m.
Message ID <1317831427-477-4-git-send-email-stefanha@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/117891/
State New
Headers show

Comments

Stefan Hajnoczi - Oct. 5, 2011, 4:17 p.m.
The bdrv_read()/bdrv_write() functions call .bdrv_read()/.bdrv_write().
They should go through bdrv_co_do_readv() and bdrv_co_do_writev()
instead in order to unify request processing code across sync, aio, and
coroutine interfaces.  This is also an important step towards removing
BlockDriverState .bdrv_read()/.bdrv_write() in the future.

Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 block.c |  112 +++++++++++++++++++++++++++++++++++----------------------------
 1 files changed, 62 insertions(+), 50 deletions(-)
Zhiyong Wu - Oct. 11, 2011, 6:44 a.m.
On Thu, Oct 6, 2011 at 12:17 AM, Stefan Hajnoczi
<stefanha@linux.vnet.ibm.com> wrote:
> The bdrv_read()/bdrv_write() functions call .bdrv_read()/.bdrv_write().
> They should go through bdrv_co_do_readv() and bdrv_co_do_writev()
> instead in order to unify request processing code across sync, aio, and
> coroutine interfaces.  This is also an important step towards removing
> BlockDriverState .bdrv_read()/.bdrv_write() in the future.
>
> Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
> ---
>  block.c |  112 +++++++++++++++++++++++++++++++++++----------------------------
>  1 files changed, 62 insertions(+), 50 deletions(-)
>
> diff --git a/block.c b/block.c
> index d15784e..90c29db 100644
> --- a/block.c
> +++ b/block.c
> @@ -44,6 +44,8 @@
>  #include <windows.h>
>  #endif
>
> +#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
> +
>  static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
>  static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
>         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
> @@ -74,6 +76,8 @@ static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
>  static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
>  static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
>     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
> +static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
> +    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
>
>  static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
>     QTAILQ_HEAD_INITIALIZER(bdrv_states);
> @@ -1038,30 +1042,69 @@ static inline bool bdrv_has_async_flush(BlockDriver *drv)
>     return drv->bdrv_aio_flush != bdrv_aio_flush_em;
>  }
>
> -/* return < 0 if error. See bdrv_write() for the return codes */
> -int bdrv_read(BlockDriverState *bs, int64_t sector_num,
> -              uint8_t *buf, int nb_sectors)
> +typedef struct RwCo {
> +    BlockDriverState *bs;
> +    int64_t sector_num;
> +    int nb_sectors;
> +    QEMUIOVector *qiov;
> +    bool is_write;
> +    int ret;
> +} RwCo;
> +
> +static void coroutine_fn bdrv_rw_co_entry(void *opaque)
>  {
> -    BlockDriver *drv = bs->drv;
> +    RwCo *rwco = opaque;
>
> -    if (!drv)
> -        return -ENOMEDIUM;
> +    if (!rwco->is_write) {
> +        rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
> +                                     rwco->nb_sectors, rwco->qiov);
> +    } else {
> +        rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
> +                                      rwco->nb_sectors, rwco->qiov);
> +    }
> +}
>
> -    if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
> -        QEMUIOVector qiov;
> -        struct iovec iov = {
> -            .iov_base = (void *)buf,
> -            .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
> -        };
> +/*
> + * Process a synchronous request using coroutines
> + */
> +static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
> +                      int nb_sectors, bool is_write)
> +{
> +    QEMUIOVector qiov;
> +    struct iovec iov = {
> +        .iov_base = (void *)buf,
> +        .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
> +    };
> +    Coroutine *co;
> +    RwCo rwco = {
> +        .bs = bs,
> +        .sector_num = sector_num,
> +        .nb_sectors = nb_sectors,
> +        .qiov = &qiov,
> +        .is_write = is_write,
> +        .ret = NOT_DONE,
> +    };
>
> -        qemu_iovec_init_external(&qiov, &iov, 1);
> -        return bdrv_co_readv(bs, sector_num, nb_sectors, &qiov);
> -    }
> +    qemu_iovec_init_external(&qiov, &iov, 1);
>
> -    if (bdrv_check_request(bs, sector_num, nb_sectors))
> -        return -EIO;
> +    if (qemu_in_coroutine()) {
> +        /* Fast-path if already in coroutine context */
> +        bdrv_rw_co_entry(&rwco);
> +    } else {
> +        co = qemu_coroutine_create(bdrv_rw_co_entry);
> +        qemu_coroutine_enter(co, &rwco);
> +        while (rwco.ret == NOT_DONE) {
> +            qemu_aio_wait();
> +        }
> +    }
> +    return rwco.ret;
> +}
>
> -    return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
> +/* return < 0 if error. See bdrv_write() for the return codes */
> +int bdrv_read(BlockDriverState *bs, int64_t sector_num,
> +              uint8_t *buf, int nb_sectors)
> +{
> +    return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
>  }
>
>  static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
> @@ -1101,36 +1144,7 @@ static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
>  int bdrv_write(BlockDriverState *bs, int64_t sector_num,
>                const uint8_t *buf, int nb_sectors)
>  {
> -    BlockDriver *drv = bs->drv;
> -
> -    if (!bs->drv)
> -        return -ENOMEDIUM;
> -
> -    if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
> -        QEMUIOVector qiov;
> -        struct iovec iov = {
> -            .iov_base = (void *)buf,
> -            .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
> -        };
> -
> -        qemu_iovec_init_external(&qiov, &iov, 1);
> -        return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
> -    }
> -
> -    if (bs->read_only)
> -        return -EACCES;
> -    if (bdrv_check_request(bs, sector_num, nb_sectors))
> -        return -EIO;
> -
> -    if (bs->dirty_bitmap) {
> -        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
> -    }
> -
> -    if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
> -        bs->wr_highest_sector = sector_num + nb_sectors - 1;
> -    }
The above codes are removed, will it be safe?
> -
> -    return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
> +    return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
>  }
>
>  int bdrv_pread(BlockDriverState *bs, int64_t offset,
> @@ -2891,8 +2905,6 @@ static void bdrv_rw_em_cb(void *opaque, int ret)
>     *(int *)opaque = ret;
>  }
>
> -#define NOT_DONE 0x7fffffff
> -
>  static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
>                         uint8_t *buf, int nb_sectors)
>  {
> --
> 1.7.6.3
>
>
>
Stefan Hajnoczi - Oct. 12, 2011, 9:03 a.m.
On Tue, Oct 11, 2011 at 7:44 AM, Zhi Yong Wu <zwu.kernel@gmail.com> wrote:
> On Thu, Oct 6, 2011 at 12:17 AM, Stefan Hajnoczi
> <stefanha@linux.vnet.ibm.com> wrote:
>> @@ -1101,36 +1144,7 @@ static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
>>  int bdrv_write(BlockDriverState *bs, int64_t sector_num,
>>                const uint8_t *buf, int nb_sectors)
>>  {
>> -    BlockDriver *drv = bs->drv;
>> -
>> -    if (!bs->drv)
>> -        return -ENOMEDIUM;
>> -
>> -    if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
>> -        QEMUIOVector qiov;
>> -        struct iovec iov = {
>> -            .iov_base = (void *)buf,
>> -            .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
>> -        };
>> -
>> -        qemu_iovec_init_external(&qiov, &iov, 1);
>> -        return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
>> -    }
>> -
>> -    if (bs->read_only)
>> -        return -EACCES;
>> -    if (bdrv_check_request(bs, sector_num, nb_sectors))
>> -        return -EIO;
>> -
>> -    if (bs->dirty_bitmap) {
>> -        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
>> -    }
>> -
>> -    if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
>> -        bs->wr_highest_sector = sector_num + nb_sectors - 1;
>> -    }
> The above codes are removed, will it be safe?

If you are checking that removing bs->wr_highest_sector code is okay,
then yes, it is safe because bdrv_co_do_writev() does the dirty bitmap
and wr_highest_sector updates.  We haven't lost any code by unifying
request processing - bdrv_co_do_writev() must do everything that
bdrv_aio_writev() and bdrv_write() did.

Stefan
Zhiyong Wu - Oct. 12, 2011, 9:11 a.m.
On Wed, Oct 12, 2011 at 5:03 PM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Tue, Oct 11, 2011 at 7:44 AM, Zhi Yong Wu <zwu.kernel@gmail.com> wrote:
>> On Thu, Oct 6, 2011 at 12:17 AM, Stefan Hajnoczi
>> <stefanha@linux.vnet.ibm.com> wrote:
>>> @@ -1101,36 +1144,7 @@ static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
>>>  int bdrv_write(BlockDriverState *bs, int64_t sector_num,
>>>                const uint8_t *buf, int nb_sectors)
>>>  {
>>> -    BlockDriver *drv = bs->drv;
>>> -
>>> -    if (!bs->drv)
>>> -        return -ENOMEDIUM;
>>> -
>>> -    if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
>>> -        QEMUIOVector qiov;
>>> -        struct iovec iov = {
>>> -            .iov_base = (void *)buf,
>>> -            .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
>>> -        };
>>> -
>>> -        qemu_iovec_init_external(&qiov, &iov, 1);
>>> -        return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
>>> -    }
>>> -
>>> -    if (bs->read_only)
>>> -        return -EACCES;
>>> -    if (bdrv_check_request(bs, sector_num, nb_sectors))
>>> -        return -EIO;
How about the above four lines of codes?
>>> -
>>> -    if (bs->dirty_bitmap) {
>>> -        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
>>> -    }
>>> -
>>> -    if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
>>> -        bs->wr_highest_sector = sector_num + nb_sectors - 1;
>>> -    }
>> The above codes are removed, will it be safe?
>
> If you are checking that removing bs->wr_highest_sector code is okay,
> then yes, it is safe because bdrv_co_do_writev() does the dirty bitmap
> and wr_highest_sector updates.  We haven't lost any code by unifying
OK. got it. thanks.
> request processing - bdrv_co_do_writev() must do everything that
> bdrv_aio_writev() and bdrv_write() did.
>
> Stefan
>
Kevin Wolf - Oct. 12, 2011, 12:53 p.m.
Am 05.10.2011 18:17, schrieb Stefan Hajnoczi:
> The bdrv_read()/bdrv_write() functions call .bdrv_read()/.bdrv_write().
> They should go through bdrv_co_do_readv() and bdrv_co_do_writev()
> instead in order to unify request processing code across sync, aio, and
> coroutine interfaces.  This is also an important step towards removing
> BlockDriverState .bdrv_read()/.bdrv_write() in the future.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>

This breaks drivers that only provide synchronous .bdrv_read/write.
Attempts to read or write anything results in endless recursion.

Kevin

Patch

diff --git a/block.c b/block.c
index d15784e..90c29db 100644
--- a/block.c
+++ b/block.c
@@ -44,6 +44,8 @@ 
 #include <windows.h>
 #endif
 
+#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
+
 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
@@ -74,6 +76,8 @@  static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
 
 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
     QTAILQ_HEAD_INITIALIZER(bdrv_states);
@@ -1038,30 +1042,69 @@  static inline bool bdrv_has_async_flush(BlockDriver *drv)
     return drv->bdrv_aio_flush != bdrv_aio_flush_em;
 }
 
-/* return < 0 if error. See bdrv_write() for the return codes */
-int bdrv_read(BlockDriverState *bs, int64_t sector_num,
-              uint8_t *buf, int nb_sectors)
+typedef struct RwCo {
+    BlockDriverState *bs;
+    int64_t sector_num;
+    int nb_sectors;
+    QEMUIOVector *qiov;
+    bool is_write;
+    int ret;
+} RwCo;
+
+static void coroutine_fn bdrv_rw_co_entry(void *opaque)
 {
-    BlockDriver *drv = bs->drv;
+    RwCo *rwco = opaque;
 
-    if (!drv)
-        return -ENOMEDIUM;
+    if (!rwco->is_write) {
+        rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
+                                     rwco->nb_sectors, rwco->qiov);
+    } else {
+        rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
+                                      rwco->nb_sectors, rwco->qiov);
+    }
+}
 
-    if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
-        QEMUIOVector qiov;
-        struct iovec iov = {
-            .iov_base = (void *)buf,
-            .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
-        };
+/*
+ * Process a synchronous request using coroutines
+ */
+static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
+                      int nb_sectors, bool is_write)
+{
+    QEMUIOVector qiov;
+    struct iovec iov = {
+        .iov_base = (void *)buf,
+        .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+    };
+    Coroutine *co;
+    RwCo rwco = {
+        .bs = bs,
+        .sector_num = sector_num,
+        .nb_sectors = nb_sectors,
+        .qiov = &qiov,
+        .is_write = is_write,
+        .ret = NOT_DONE,
+    };
 
-        qemu_iovec_init_external(&qiov, &iov, 1);
-        return bdrv_co_readv(bs, sector_num, nb_sectors, &qiov);
-    }
+    qemu_iovec_init_external(&qiov, &iov, 1);
 
-    if (bdrv_check_request(bs, sector_num, nb_sectors))
-        return -EIO;
+    if (qemu_in_coroutine()) {
+        /* Fast-path if already in coroutine context */
+        bdrv_rw_co_entry(&rwco);
+    } else {
+        co = qemu_coroutine_create(bdrv_rw_co_entry);
+        qemu_coroutine_enter(co, &rwco);
+        while (rwco.ret == NOT_DONE) {
+            qemu_aio_wait();
+        }
+    }
+    return rwco.ret;
+}
 
-    return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
+/* return < 0 if error. See bdrv_write() for the return codes */
+int bdrv_read(BlockDriverState *bs, int64_t sector_num,
+              uint8_t *buf, int nb_sectors)
+{
+    return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
 }
 
 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
@@ -1101,36 +1144,7 @@  static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
                const uint8_t *buf, int nb_sectors)
 {
-    BlockDriver *drv = bs->drv;
-
-    if (!bs->drv)
-        return -ENOMEDIUM;
-
-    if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
-        QEMUIOVector qiov;
-        struct iovec iov = {
-            .iov_base = (void *)buf,
-            .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
-        };
-
-        qemu_iovec_init_external(&qiov, &iov, 1);
-        return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
-    }
-
-    if (bs->read_only)
-        return -EACCES;
-    if (bdrv_check_request(bs, sector_num, nb_sectors))
-        return -EIO;
-
-    if (bs->dirty_bitmap) {
-        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
-    }
-
-    if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
-        bs->wr_highest_sector = sector_num + nb_sectors - 1;
-    }
-
-    return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
+    return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
 }
 
 int bdrv_pread(BlockDriverState *bs, int64_t offset,
@@ -2891,8 +2905,6 @@  static void bdrv_rw_em_cb(void *opaque, int ret)
     *(int *)opaque = ret;
 }
 
-#define NOT_DONE 0x7fffffff
-
 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
                         uint8_t *buf, int nb_sectors)
 {