Patchwork [8/8] block-migration: efficiently encode zero blocks

login
register
mail settings
Submitter Peter Lieven
Date June 22, 2013, 8:58 p.m.
Message ID <1371934712-11714-9-git-send-email-pl@kamp.de>
Download mbox | patch
Permalink /patch/253435/
State New
Headers show

Comments

Peter Lieven - June 22, 2013, 8:58 p.m.
this patch adds a efficient encoding for zero blocks by
adding a new flag indiciating a block is completly zero.

additionally bdrv_write_zeros() is used at the destination
to efficiently write these zeroes. if the driver supports
it this avoids blindly allocating all sectors consumed by
zero blocks effectively re-thinning the device.

Signed-off-by: Peter Lieven <pl@kamp.de>
---
 block-migration.c             |   29 +++++++++++++++++++++++------
 include/migration/qemu-file.h |    1 +
 savevm.c                      |    2 +-
 3 files changed, 25 insertions(+), 7 deletions(-)
Paolo Bonzini - June 24, 2013, 2:32 p.m.
Il 22/06/2013 22:58, Peter Lieven ha scritto:
> this patch adds a efficient encoding for zero blocks by
> adding a new flag indiciating a block is completly zero.
> 
> additionally bdrv_write_zeros() is used at the destination
> to efficiently write these zeroes. if the driver supports
> it this avoids blindly allocating all sectors consumed by
> zero blocks effectively re-thinning the device.
> 
> Signed-off-by: Peter Lieven <pl@kamp.de>

This is a bit ugly because it doesn't work with drive-mirror.  Perhaps
we can add a write-zeroes opcode to NBD, too.

Paolo

> ---
>  block-migration.c             |   29 +++++++++++++++++++++++------
>  include/migration/qemu-file.h |    1 +
>  savevm.c                      |    2 +-
>  3 files changed, 25 insertions(+), 7 deletions(-)
> 
> diff --git a/block-migration.c b/block-migration.c
> index 2fd7699..99b3757 100644
> --- a/block-migration.c
> +++ b/block-migration.c
> @@ -29,6 +29,7 @@
>  #define BLK_MIG_FLAG_DEVICE_BLOCK       0x01
>  #define BLK_MIG_FLAG_EOS                0x02
>  #define BLK_MIG_FLAG_PROGRESS           0x04
> +#define BLK_MIG_FLAG_ZERO_BLOCK         0x08
>  
>  #define MAX_IS_ALLOCATED_SEARCH 65536
>  
> @@ -114,16 +115,29 @@ static void blk_mig_unlock(void)
>  static void blk_send(QEMUFile *f, BlkMigBlock * blk)
>  {
>      int len;
> +    int flags = BLK_MIG_FLAG_DEVICE_BLOCK;
> +    
> +    if (buffer_is_zero(blk->buf, BLOCK_SIZE)) {
> +        flags |= BLK_MIG_FLAG_ZERO_BLOCK;
> +    }
>  
>      /* sector number and flags */
>      qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
> -                     | BLK_MIG_FLAG_DEVICE_BLOCK);
> +                     | flags);
>  
>      /* device name */
>      len = strlen(blk->bmds->bs->device_name);
>      qemu_put_byte(f, len);
>      qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
>  
> +    /* if a block is zero we need to flush here since the network
> +     * bandwidth is now a lot higher than the storage device bandwidth.
> +     * thus if we queue zero blocks we slow down the migration */
> +    if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
> +        qemu_fflush(f);
> +        return;
> +    }
> +
>      qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
>  }
>  
> @@ -762,12 +776,15 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
>                  nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
>              }
>  
> -            buf = g_malloc(BLOCK_SIZE);
> -
> -            qemu_get_buffer(f, buf, BLOCK_SIZE);
> -            ret = bdrv_write(bs, addr, buf, nr_sectors);
> +            if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
> +                ret = bdrv_write_zeroes(bs, addr, nr_sectors);
> +            } else {
> +                buf = g_malloc(BLOCK_SIZE);
> +                qemu_get_buffer(f, buf, BLOCK_SIZE);
> +                ret = bdrv_write(bs, addr, buf, nr_sectors);
> +                g_free(buf);
> +            }
>  
> -            g_free(buf);
>              if (ret < 0) {
>                  return ret;
>              }
> diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
> index 7519464..b73298d 100644
> --- a/include/migration/qemu-file.h
> +++ b/include/migration/qemu-file.h
> @@ -71,6 +71,7 @@ QEMUFile *qemu_fdopen(int fd, const char *mode);
>  QEMUFile *qemu_fopen_socket(int fd, const char *mode);
>  QEMUFile *qemu_popen_cmd(const char *command, const char *mode);
>  int qemu_get_fd(QEMUFile *f);
> +void qemu_fflush(QEMUFile *f);
>  int qemu_fclose(QEMUFile *f);
>  int64_t qemu_ftell(QEMUFile *f);
>  void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size);
> diff --git a/savevm.c b/savevm.c
> index ff5ece6..4d12d92 100644
> --- a/savevm.c
> +++ b/savevm.c
> @@ -610,7 +610,7 @@ static inline bool qemu_file_is_writable(QEMUFile *f)
>   * If there is writev_buffer QEMUFileOps it uses it otherwise uses
>   * put_buffer ops.
>   */
> -static void qemu_fflush(QEMUFile *f)
> +void qemu_fflush(QEMUFile *f)
>  {
>      ssize_t ret = 0;
>  
>
Peter Lieven - June 24, 2013, 4:14 p.m.
Am 24.06.2013 16:32, schrieb Paolo Bonzini:
> Il 22/06/2013 22:58, Peter Lieven ha scritto:
>> this patch adds a efficient encoding for zero blocks by
>> adding a new flag indiciating a block is completly zero.
>>
>> additionally bdrv_write_zeros() is used at the destination
>> to efficiently write these zeroes. if the driver supports
>> it this avoids blindly allocating all sectors consumed by
>> zero blocks effectively re-thinning the device.
>>
>> Signed-off-by: Peter Lieven <pl@kamp.de>
> This is a bit ugly because it doesn't work with drive-mirror.  Perhaps
> we can add a write-zeroes opcode to NBD, too.
That is the exact problem with NBD. If write zeroes is not supported
the routine falls back to write zeros with writev.

Peter
>
> Paolo
>
>> ---
>>  block-migration.c             |   29 +++++++++++++++++++++++------
>>  include/migration/qemu-file.h |    1 +
>>  savevm.c                      |    2 +-
>>  3 files changed, 25 insertions(+), 7 deletions(-)
>>
>> diff --git a/block-migration.c b/block-migration.c
>> index 2fd7699..99b3757 100644
>> --- a/block-migration.c
>> +++ b/block-migration.c
>> @@ -29,6 +29,7 @@
>>  #define BLK_MIG_FLAG_DEVICE_BLOCK       0x01
>>  #define BLK_MIG_FLAG_EOS                0x02
>>  #define BLK_MIG_FLAG_PROGRESS           0x04
>> +#define BLK_MIG_FLAG_ZERO_BLOCK         0x08
>>  
>>  #define MAX_IS_ALLOCATED_SEARCH 65536
>>  
>> @@ -114,16 +115,29 @@ static void blk_mig_unlock(void)
>>  static void blk_send(QEMUFile *f, BlkMigBlock * blk)
>>  {
>>      int len;
>> +    int flags = BLK_MIG_FLAG_DEVICE_BLOCK;
>> +    
>> +    if (buffer_is_zero(blk->buf, BLOCK_SIZE)) {
>> +        flags |= BLK_MIG_FLAG_ZERO_BLOCK;
>> +    }
>>  
>>      /* sector number and flags */
>>      qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
>> -                     | BLK_MIG_FLAG_DEVICE_BLOCK);
>> +                     | flags);
>>  
>>      /* device name */
>>      len = strlen(blk->bmds->bs->device_name);
>>      qemu_put_byte(f, len);
>>      qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
>>  
>> +    /* if a block is zero we need to flush here since the network
>> +     * bandwidth is now a lot higher than the storage device bandwidth.
>> +     * thus if we queue zero blocks we slow down the migration */
>> +    if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
>> +        qemu_fflush(f);
>> +        return;
>> +    }
>> +
>>      qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
>>  }
>>  
>> @@ -762,12 +776,15 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
>>                  nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
>>              }
>>  
>> -            buf = g_malloc(BLOCK_SIZE);
>> -
>> -            qemu_get_buffer(f, buf, BLOCK_SIZE);
>> -            ret = bdrv_write(bs, addr, buf, nr_sectors);
>> +            if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
>> +                ret = bdrv_write_zeroes(bs, addr, nr_sectors);
>> +            } else {
>> +                buf = g_malloc(BLOCK_SIZE);
>> +                qemu_get_buffer(f, buf, BLOCK_SIZE);
>> +                ret = bdrv_write(bs, addr, buf, nr_sectors);
>> +                g_free(buf);
>> +            }
>>  
>> -            g_free(buf);
>>              if (ret < 0) {
>>                  return ret;
>>              }
>> diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
>> index 7519464..b73298d 100644
>> --- a/include/migration/qemu-file.h
>> +++ b/include/migration/qemu-file.h
>> @@ -71,6 +71,7 @@ QEMUFile *qemu_fdopen(int fd, const char *mode);
>>  QEMUFile *qemu_fopen_socket(int fd, const char *mode);
>>  QEMUFile *qemu_popen_cmd(const char *command, const char *mode);
>>  int qemu_get_fd(QEMUFile *f);
>> +void qemu_fflush(QEMUFile *f);
>>  int qemu_fclose(QEMUFile *f);
>>  int64_t qemu_ftell(QEMUFile *f);
>>  void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size);
>> diff --git a/savevm.c b/savevm.c
>> index ff5ece6..4d12d92 100644
>> --- a/savevm.c
>> +++ b/savevm.c
>> @@ -610,7 +610,7 @@ static inline bool qemu_file_is_writable(QEMUFile *f)
>>   * If there is writev_buffer QEMUFileOps it uses it otherwise uses
>>   * put_buffer ops.
>>   */
>> -static void qemu_fflush(QEMUFile *f)
>> +void qemu_fflush(QEMUFile *f)
>>  {
>>      ssize_t ret = 0;
>>  
>>

Patch

diff --git a/block-migration.c b/block-migration.c
index 2fd7699..99b3757 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -29,6 +29,7 @@ 
 #define BLK_MIG_FLAG_DEVICE_BLOCK       0x01
 #define BLK_MIG_FLAG_EOS                0x02
 #define BLK_MIG_FLAG_PROGRESS           0x04
+#define BLK_MIG_FLAG_ZERO_BLOCK         0x08
 
 #define MAX_IS_ALLOCATED_SEARCH 65536
 
@@ -114,16 +115,29 @@  static void blk_mig_unlock(void)
 static void blk_send(QEMUFile *f, BlkMigBlock * blk)
 {
     int len;
+    int flags = BLK_MIG_FLAG_DEVICE_BLOCK;
+    
+    if (buffer_is_zero(blk->buf, BLOCK_SIZE)) {
+        flags |= BLK_MIG_FLAG_ZERO_BLOCK;
+    }
 
     /* sector number and flags */
     qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
-                     | BLK_MIG_FLAG_DEVICE_BLOCK);
+                     | flags);
 
     /* device name */
     len = strlen(blk->bmds->bs->device_name);
     qemu_put_byte(f, len);
     qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
 
+    /* if a block is zero we need to flush here since the network
+     * bandwidth is now a lot higher than the storage device bandwidth.
+     * thus if we queue zero blocks we slow down the migration */
+    if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
+        qemu_fflush(f);
+        return;
+    }
+
     qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
 }
 
@@ -762,12 +776,15 @@  static int block_load(QEMUFile *f, void *opaque, int version_id)
                 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
             }
 
-            buf = g_malloc(BLOCK_SIZE);
-
-            qemu_get_buffer(f, buf, BLOCK_SIZE);
-            ret = bdrv_write(bs, addr, buf, nr_sectors);
+            if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
+                ret = bdrv_write_zeroes(bs, addr, nr_sectors);
+            } else {
+                buf = g_malloc(BLOCK_SIZE);
+                qemu_get_buffer(f, buf, BLOCK_SIZE);
+                ret = bdrv_write(bs, addr, buf, nr_sectors);
+                g_free(buf);
+            }
 
-            g_free(buf);
             if (ret < 0) {
                 return ret;
             }
diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index 7519464..b73298d 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -71,6 +71,7 @@  QEMUFile *qemu_fdopen(int fd, const char *mode);
 QEMUFile *qemu_fopen_socket(int fd, const char *mode);
 QEMUFile *qemu_popen_cmd(const char *command, const char *mode);
 int qemu_get_fd(QEMUFile *f);
+void qemu_fflush(QEMUFile *f);
 int qemu_fclose(QEMUFile *f);
 int64_t qemu_ftell(QEMUFile *f);
 void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size);
diff --git a/savevm.c b/savevm.c
index ff5ece6..4d12d92 100644
--- a/savevm.c
+++ b/savevm.c
@@ -610,7 +610,7 @@  static inline bool qemu_file_is_writable(QEMUFile *f)
  * If there is writev_buffer QEMUFileOps it uses it otherwise uses
  * put_buffer ops.
  */
-static void qemu_fflush(QEMUFile *f)
+void qemu_fflush(QEMUFile *f)
 {
     ssize_t ret = 0;