Patchwork [v2] Improve accuracy of block migration bandwidth calculation

login
register
mail settings
Submitter Avishay Traeger1
Date April 3, 2011, 8:31 a.m.
Message ID <OFB6BB9DCC.7838F587-ONC2257867.002AF477-C2257867.002ED7B5@il.ibm.com>
Download mbox | patch
Permalink /patch/89470/
State New
Headers show

Comments

Avishay Traeger1 - April 3, 2011, 8:31 a.m.
Revised patch for improving the accuracy of the block migration bandwidth
calculation.  Thanks a lot to Michael Roth for the input.

For those that missed the original patch, here is the description:
block_mig_state.total_time is currently the sum of the read request
latencies.  This is not very accurate because block migration uses aio and
so several requests can be submitted at once.  Bandwidth should be computed
with wall-clock time, not by adding the latencies.  In this case,
"total_time" has a higher value than it should, and so the computed
bandwidth is lower than it is in reality.  This means that migration can
take longer than it needs to.
However, we don't want to use pure wall-clock time here.  We are computing
bandwidth in the asynchronous phase, where the migration repeatedly wakes
up and sends some aio requests.  The computed bandwidth will be used for
synchronous transfer.


Avishay


Signed-off-by: Avishay Traeger <avishay@il.ibm.com>
---
 block-migration.c |   21 +++++++++------------
 1 files changed, 9 insertions(+), 12 deletions(-)

     bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0);
@@ -250,7 +245,8 @@ static int mig_save_device_bulk(Monitor *mon, QEMUFile
*f,
     blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
     qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);

-    blk->time = qemu_get_clock_ns(rt_clock);
+    if (block_mig_state.submitted == 0)
+        block_mig_state.prev_time_offset = qemu_get_clock_ns(rt_clock);

     blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
                                 nr_sectors, blk_mig_read_cb, blk);
@@ -409,7 +405,8 @@ static int mig_save_device_dirty(Monitor *mon, QEMUFile
*f,
                 blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
                 qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);

-                blk->time = qemu_get_clock_ns(rt_clock);
+                if (block_mig_state.submitted == 0)
+                    block_mig_state.prev_time_offset = qemu_get_clock_ns
(rt_clock);

                 blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
                                             nr_sectors, blk_mig_read_cb,
blk);
--
1.7.0.4
Michael Roth - April 5, 2011, 6:20 p.m.
On 04/03/2011 03:31 AM, Avishay Traeger wrote:
>
> Revised patch for improving the accuracy of the block migration bandwidth
> calculation.  Thanks a lot to Michael Roth for the input.
>
> For those that missed the original patch, here is the description:
> block_mig_state.total_time is currently the sum of the read request
> latencies.  This is not very accurate because block migration uses aio and
> so several requests can be submitted at once.  Bandwidth should be computed
> with wall-clock time, not by adding the latencies.  In this case,
> "total_time" has a higher value than it should, and so the computed
> bandwidth is lower than it is in reality.  This means that migration can
> take longer than it needs to.
> However, we don't want to use pure wall-clock time here.  We are computing
> bandwidth in the asynchronous phase, where the migration repeatedly wakes
> up and sends some aio requests.  The computed bandwidth will be used for
> synchronous transfer.
>
>
> Avishay
>
>
> Signed-off-by: Avishay Traeger<avishay@il.ibm.com>
> ---
>   block-migration.c |   21 +++++++++------------
>   1 files changed, 9 insertions(+), 12 deletions(-)
>
> diff --git a/block-migration.c b/block-migration.c
> index 8218bac..47296ca 100644
> --- a/block-migration.c
> +++ b/block-migration.c
> @@ -62,7 +62,6 @@ typedef struct BlkMigBlock {
>       QEMUIOVector qiov;
>       BlockDriverAIOCB *aiocb;
>       int ret;
> -    int64_t time;
>       QSIMPLEQ_ENTRY(BlkMigBlock) entry;
>   } BlkMigBlock;
>
> @@ -78,6 +77,7 @@ typedef struct BlkMigState {
>       int prev_progress;
>       int bulk_completed;
>       long double total_time;
> +    long double prev_time_offset;
>       int reads;
>   } BlkMigState;
>
> @@ -131,12 +131,6 @@ uint64_t blk_mig_bytes_total(void)
>       return sum<<  BDRV_SECTOR_BITS;
>   }
>
> -static inline void add_avg_read_time(int64_t time)
> -{
> -    block_mig_state.reads++;
> -    block_mig_state.total_time += time;
> -}
> -
>   static inline long double compute_read_bwidth(void)
>   {
>       assert(block_mig_state.total_time != 0);
> @@ -191,13 +185,14 @@ static void alloc_aio_bitmap(BlkMigDevState *bmds)
>
>   static void blk_mig_read_cb(void *opaque, int ret)
>   {
> +    long double curr_time = qemu_get_clock_ns(rt_clock);
>       BlkMigBlock *blk = opaque;
>
>       blk->ret = ret;
>
> -    blk->time = qemu_get_clock_ns(rt_clock) - blk->time;
> -
> -    add_avg_read_time(blk->time);
> +    block_mig_state.reads++;
> +    block_mig_state.total_time += (curr_time -
> block_mig_state.prev_time_offset);
> +    block_mig_state.prev_time_offset = curr_time;
>
>       QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
>       bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0);
> @@ -250,7 +245,8 @@ static int mig_save_device_bulk(Monitor *mon, QEMUFile
> *f,
>       blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
>       qemu_iovec_init_external(&blk->qiov,&blk->iov, 1);
>
> -    blk->time = qemu_get_clock_ns(rt_clock);
> +    if (block_mig_state.submitted == 0)
> +        block_mig_state.prev_time_offset = qemu_get_clock_ns(rt_clock);
>
>       blk->aiocb = bdrv_aio_readv(bs, cur_sector,&blk->qiov,
>                                   nr_sectors, blk_mig_read_cb, blk);
> @@ -409,7 +405,8 @@ static int mig_save_device_dirty(Monitor *mon, QEMUFile
> *f,
>                   blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
>                   qemu_iovec_init_external(&blk->qiov,&blk->iov, 1);
>
> -                blk->time = qemu_get_clock_ns(rt_clock);
> +                if (block_mig_state.submitted == 0)
> +                    block_mig_state.prev_time_offset = qemu_get_clock_ns
> (rt_clock);
>
>                   blk->aiocb = bdrv_aio_readv(bmds->bs, sector,&blk->qiov,
>                                               nr_sectors, blk_mig_read_cb,
> blk);
> --
> 1.7.0.4
>

Looks good. This addresses the concerns I had.

Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Kevin Wolf - April 20, 2011, 12:34 p.m.
Am 03.04.2011 10:31, schrieb Avishay Traeger:
> 
> Revised patch for improving the accuracy of the block migration bandwidth
> calculation.  Thanks a lot to Michael Roth for the input.
> 
> For those that missed the original patch, here is the description:
> block_mig_state.total_time is currently the sum of the read request
> latencies.  This is not very accurate because block migration uses aio and
> so several requests can be submitted at once.  Bandwidth should be computed
> with wall-clock time, not by adding the latencies.  In this case,
> "total_time" has a higher value than it should, and so the computed
> bandwidth is lower than it is in reality.  This means that migration can
> take longer than it needs to.
> However, we don't want to use pure wall-clock time here.  We are computing
> bandwidth in the asynchronous phase, where the migration repeatedly wakes
> up and sends some aio requests.  The computed bandwidth will be used for
> synchronous transfer.
> 
> 
> Avishay
> 
> 
> Signed-off-by: Avishay Traeger <avishay@il.ibm.com>

Thanks. Fixed line wraps and coding style and applied to the block
branch. Please take care to avoid patch corruption next time - usually
such patches are not fixed manually, but rejected.

Kevin

Patch

diff --git a/block-migration.c b/block-migration.c
index 8218bac..47296ca 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -62,7 +62,6 @@  typedef struct BlkMigBlock {
     QEMUIOVector qiov;
     BlockDriverAIOCB *aiocb;
     int ret;
-    int64_t time;
     QSIMPLEQ_ENTRY(BlkMigBlock) entry;
 } BlkMigBlock;

@@ -78,6 +77,7 @@  typedef struct BlkMigState {
     int prev_progress;
     int bulk_completed;
     long double total_time;
+    long double prev_time_offset;
     int reads;
 } BlkMigState;

@@ -131,12 +131,6 @@  uint64_t blk_mig_bytes_total(void)
     return sum << BDRV_SECTOR_BITS;
 }

-static inline void add_avg_read_time(int64_t time)
-{
-    block_mig_state.reads++;
-    block_mig_state.total_time += time;
-}
-
 static inline long double compute_read_bwidth(void)
 {
     assert(block_mig_state.total_time != 0);
@@ -191,13 +185,14 @@  static void alloc_aio_bitmap(BlkMigDevState *bmds)

 static void blk_mig_read_cb(void *opaque, int ret)
 {
+    long double curr_time = qemu_get_clock_ns(rt_clock);
     BlkMigBlock *blk = opaque;

     blk->ret = ret;

-    blk->time = qemu_get_clock_ns(rt_clock) - blk->time;
-
-    add_avg_read_time(blk->time);
+    block_mig_state.reads++;
+    block_mig_state.total_time += (curr_time -
block_mig_state.prev_time_offset);
+    block_mig_state.prev_time_offset = curr_time;

     QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);