diff mbox

[PATCHv2,1.8,1/9] qemu-img: add support for skipping zeroes in input during convert

Message ID 1385456183-24840-2-git-send-email-pl@kamp.de
State New
Headers show

Commit Message

Peter Lieven Nov. 26, 2013, 8:56 a.m. UTC
we currently do not check if a sector is allocated during convert.
This means if a sector is unallocated that we allocate a bounce
buffer of zeroes, find out its zero later and do not write it
in the best case. In the worst case this can lead to reading
blocks from a raw device (like iSCSI) altough we could easily
know via get_block_status that they are zero and simply skip them.

This patch also fixes the progress output not being at 100% after
a successful conversion.

Signed-off-by: Peter Lieven <pl@kamp.de>
---
 qemu-img.c |   85 ++++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 48 insertions(+), 37 deletions(-)

Comments

Paolo Bonzini Nov. 26, 2013, 10:02 a.m. UTC | #1
Il 26/11/2013 09:56, Peter Lieven ha scritto:
> we currently do not check if a sector is allocated during convert.
> This means if a sector is unallocated that we allocate a bounce
> buffer of zeroes, find out its zero later and do not write it
> in the best case. In the worst case this can lead to reading
> blocks from a raw device (like iSCSI) altough we could easily
> know via get_block_status that they are zero and simply skip them.
> 
> This patch also fixes the progress output not being at 100% after
> a successful conversion.
> 
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
>  qemu-img.c |   85 ++++++++++++++++++++++++++++++++++--------------------------
>  1 file changed, 48 insertions(+), 37 deletions(-)
> 
> diff --git a/qemu-img.c b/qemu-img.c
> index dc0c2f0..efb744c 100644
> --- a/qemu-img.c
> +++ b/qemu-img.c
> @@ -1125,13 +1125,15 @@ out3:
>  
>  static int img_convert(int argc, char **argv)
>  {
> -    int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size,
> +    int c, n, n1, bs_n, bs_i, compress, cluster_size,
>          cluster_sectors, skip_create;
> +    int64_t ret = 0;
>      int progress = 0, flags;
>      const char *fmt, *out_fmt, *cache, *out_baseimg, *out_filename;
>      BlockDriver *drv, *proto_drv;
>      BlockDriverState **bs = NULL, *out_bs = NULL;
> -    int64_t total_sectors, nb_sectors, sector_num, bs_offset;
> +    int64_t total_sectors, nb_sectors, sector_num, bs_offset,
> +            sector_num_next_status = 0;
>      uint64_t bs_sectors;
>      uint8_t * buf = NULL;
>      const uint8_t *buf1;
> @@ -1140,7 +1142,6 @@ static int img_convert(int argc, char **argv)
>      QEMUOptionParameter *out_baseimg_param;
>      char *options = NULL;
>      const char *snapshot_name = NULL;
> -    float local_progress = 0;
>      int min_sparse = 8; /* Need at least 4k of zeros for sparse detection */
>      bool quiet = false;
>      Error *local_err = NULL;
> @@ -1403,10 +1404,6 @@ static int img_convert(int argc, char **argv)
>          sector_num = 0;
>  
>          nb_sectors = total_sectors;
> -        if (nb_sectors != 0) {
> -            local_progress = (float)100 /
> -                (nb_sectors / MIN(nb_sectors, cluster_sectors));
> -        }
>  
>          for(;;) {
>              int64_t bs_num;
> @@ -1464,7 +1461,7 @@ static int img_convert(int argc, char **argv)
>                  }
>              }
>              sector_num += n;
> -            qemu_progress_print(local_progress, 100);
> +            qemu_progress_print(100.0 * sector_num / total_sectors, 0);
>          }
>          /* signal EOF to align */
>          bdrv_write_compressed(out_bs, 0, NULL, 0);
> @@ -1481,21 +1478,13 @@ static int img_convert(int argc, char **argv)
>  
>          sector_num = 0; // total number of sectors converted so far
>          nb_sectors = total_sectors - sector_num;
> -        if (nb_sectors != 0) {
> -            local_progress = (float)100 /
> -                (nb_sectors / MIN(nb_sectors, IO_BUF_SIZE / 512));
> -        }
>  
>          for(;;) {
>              nb_sectors = total_sectors - sector_num;
>              if (nb_sectors <= 0) {
> +                ret = 0;
>                  break;
>              }
> -            if (nb_sectors >= (IO_BUF_SIZE / 512)) {
> -                n = (IO_BUF_SIZE / 512);
> -            } else {
> -                n = nb_sectors;
> -            }
>  
>              while (sector_num - bs_offset >= bs_sectors) {
>                  bs_i ++;
> @@ -1507,34 +1496,53 @@ static int img_convert(int argc, char **argv)
>                     sector_num, bs_i, bs_offset, bs_sectors); */
>              }
>  
> -            if (n > bs_offset + bs_sectors - sector_num) {
> -                n = bs_offset + bs_sectors - sector_num;
> -            }
> -
> -            /* If the output image is being created as a copy on write image,
> -               assume that sectors which are unallocated in the input image
> -               are present in both the output's and input's base images (no
> -               need to copy them). */
> -            if (out_baseimg) {
> -                ret = bdrv_is_allocated(bs[bs_i], sector_num - bs_offset,
> -                                        n, &n1);
> +            if ((out_baseimg || has_zero_init) &&
> +                sector_num >= sector_num_next_status) {
> +                n = nb_sectors > INT_MAX ? INT_MAX : nb_sectors;
> +                ret = bdrv_get_block_status(bs[bs_i], sector_num - bs_offset,
> +                                            n, &n1);
>                  if (ret < 0) {
> -                    error_report("error while reading metadata for sector "
> -                                 "%" PRId64 ": %s",
> -                                 sector_num - bs_offset, strerror(-ret));
> +                    error_report("error while reading block status of sector %"
> +                                 PRId64 ": %s", sector_num - bs_offset,
> +                                 strerror(-ret));
>                      goto out;
>                  }
> -                if (!ret) {
> +                /* If the output image is zero initialized, we are not working
> +                 * on a shared base and the input is zero we can skip the next
> +                 * n1 sectors */
> +                if (has_zero_init && !out_baseimg && ret & BDRV_BLOCK_ZERO) {

has_zero_init will imply !out_baseimg if skip_create == false.  Should
we care and check out_baseimg separately if skip_create == true?  If
not, you can remove "&& !out_baseimg".

Also, please add parentheses around "ret & BDRV_BLOCK_ZERO".

>                      sector_num += n1;
>                      continue;
>                  }
> -                /* The next 'n1' sectors are allocated in the input image. Copy
> -                   only those as they may be followed by unallocated sectors. */
> -                n = n1;
> +                /* If the output image is being created as a copy on write
> +                 * image, assume that sectors which are unallocated in the
> +                 * input image are present in both the output's and input's
> +                 * base images (no need to copy them). */
> +                if (out_baseimg) {
> +                    if (!(ret & BDRV_BLOCK_DATA)) {
> +                        sector_num += n1;
> +                        continue;
> +                    }
> +                    /* The next 'n1' sectors are allocated in the input image.
> +                     * Copy only those as they may be followed by unallocated
> +                     * sectors. */
> +                    nb_sectors = n1;
> +                }
> +                /* avoid redundant callouts to get_block_status */
> +                sector_num_next_status = sector_num + n1;
> +            }
> +
> +            if (nb_sectors >= (IO_BUF_SIZE / 512)) {
> +                n = (IO_BUF_SIZE / 512);
>              } else {
> -                n1 = n;
> +                n = nb_sectors;
>              }
>  
> +            if (n > bs_offset + bs_sectors - sector_num) {
> +                n = bs_offset + bs_sectors - sector_num;
> +            }
> +            n1 = n;

Please change this to:

n = MIN(nb_sectors, IO_BUF_SIZE / 512);
n = MIN(n, bs_sectors - (sector_num - bs_offset));
n1 = n;

Otherwise looks good.

Thanks,

Paolo

>              ret = bdrv_read(bs[bs_i], sector_num - bs_offset, buf, n);
>              if (ret < 0) {
>                  error_report("error while reading sector %" PRId64 ": %s",
> @@ -1559,10 +1567,13 @@ static int img_convert(int argc, char **argv)
>                  n -= n1;
>                  buf1 += n1 * 512;
>              }
> -            qemu_progress_print(local_progress, 100);
> +            qemu_progress_print(100.0 * sector_num / total_sectors, 0);
>          }
>      }
>  out:
> +    if (!ret) {
> +        qemu_progress_print(100, 0);
> +    }
>      qemu_progress_end();
>      free_option_parameters(create_options);
>      free_option_parameters(param);
>
Peter Lieven Nov. 26, 2013, 1:40 p.m. UTC | #2
On 26.11.2013 11:02, Paolo Bonzini wrote:
> Il 26/11/2013 09:56, Peter Lieven ha scritto:
>> we currently do not check if a sector is allocated during convert.
>> This means if a sector is unallocated that we allocate a bounce
>> buffer of zeroes, find out its zero later and do not write it
>> in the best case. In the worst case this can lead to reading
>> blocks from a raw device (like iSCSI) altough we could easily
>> know via get_block_status that they are zero and simply skip them.
>>
>> This patch also fixes the progress output not being at 100% after
>> a successful conversion.
>>
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>>   qemu-img.c |   85 ++++++++++++++++++++++++++++++++++--------------------------
>>   1 file changed, 48 insertions(+), 37 deletions(-)
>>
>> diff --git a/qemu-img.c b/qemu-img.c
>> index dc0c2f0..efb744c 100644
>> --- a/qemu-img.c
>> +++ b/qemu-img.c
>> @@ -1125,13 +1125,15 @@ out3:
>>   
>>   static int img_convert(int argc, char **argv)
>>   {
>> -    int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size,
>> +    int c, n, n1, bs_n, bs_i, compress, cluster_size,
>>           cluster_sectors, skip_create;
>> +    int64_t ret = 0;
>>       int progress = 0, flags;
>>       const char *fmt, *out_fmt, *cache, *out_baseimg, *out_filename;
>>       BlockDriver *drv, *proto_drv;
>>       BlockDriverState **bs = NULL, *out_bs = NULL;
>> -    int64_t total_sectors, nb_sectors, sector_num, bs_offset;
>> +    int64_t total_sectors, nb_sectors, sector_num, bs_offset,
>> +            sector_num_next_status = 0;
>>       uint64_t bs_sectors;
>>       uint8_t * buf = NULL;
>>       const uint8_t *buf1;
>> @@ -1140,7 +1142,6 @@ static int img_convert(int argc, char **argv)
>>       QEMUOptionParameter *out_baseimg_param;
>>       char *options = NULL;
>>       const char *snapshot_name = NULL;
>> -    float local_progress = 0;
>>       int min_sparse = 8; /* Need at least 4k of zeros for sparse detection */
>>       bool quiet = false;
>>       Error *local_err = NULL;
>> @@ -1403,10 +1404,6 @@ static int img_convert(int argc, char **argv)
>>           sector_num = 0;
>>   
>>           nb_sectors = total_sectors;
>> -        if (nb_sectors != 0) {
>> -            local_progress = (float)100 /
>> -                (nb_sectors / MIN(nb_sectors, cluster_sectors));
>> -        }
>>   
>>           for(;;) {
>>               int64_t bs_num;
>> @@ -1464,7 +1461,7 @@ static int img_convert(int argc, char **argv)
>>                   }
>>               }
>>               sector_num += n;
>> -            qemu_progress_print(local_progress, 100);
>> +            qemu_progress_print(100.0 * sector_num / total_sectors, 0);
>>           }
>>           /* signal EOF to align */
>>           bdrv_write_compressed(out_bs, 0, NULL, 0);
>> @@ -1481,21 +1478,13 @@ static int img_convert(int argc, char **argv)
>>   
>>           sector_num = 0; // total number of sectors converted so far
>>           nb_sectors = total_sectors - sector_num;
>> -        if (nb_sectors != 0) {
>> -            local_progress = (float)100 /
>> -                (nb_sectors / MIN(nb_sectors, IO_BUF_SIZE / 512));
>> -        }
>>   
>>           for(;;) {
>>               nb_sectors = total_sectors - sector_num;
>>               if (nb_sectors <= 0) {
>> +                ret = 0;
>>                   break;
>>               }
>> -            if (nb_sectors >= (IO_BUF_SIZE / 512)) {
>> -                n = (IO_BUF_SIZE / 512);
>> -            } else {
>> -                n = nb_sectors;
>> -            }
>>   
>>               while (sector_num - bs_offset >= bs_sectors) {
>>                   bs_i ++;
>> @@ -1507,34 +1496,53 @@ static int img_convert(int argc, char **argv)
>>                      sector_num, bs_i, bs_offset, bs_sectors); */
>>               }
>>   
>> -            if (n > bs_offset + bs_sectors - sector_num) {
>> -                n = bs_offset + bs_sectors - sector_num;
>> -            }
>> -
>> -            /* If the output image is being created as a copy on write image,
>> -               assume that sectors which are unallocated in the input image
>> -               are present in both the output's and input's base images (no
>> -               need to copy them). */
>> -            if (out_baseimg) {
>> -                ret = bdrv_is_allocated(bs[bs_i], sector_num - bs_offset,
>> -                                        n, &n1);
>> +            if ((out_baseimg || has_zero_init) &&
>> +                sector_num >= sector_num_next_status) {
>> +                n = nb_sectors > INT_MAX ? INT_MAX : nb_sectors;
>> +                ret = bdrv_get_block_status(bs[bs_i], sector_num - bs_offset,
>> +                                            n, &n1);
>>                   if (ret < 0) {
>> -                    error_report("error while reading metadata for sector "
>> -                                 "%" PRId64 ": %s",
>> -                                 sector_num - bs_offset, strerror(-ret));
>> +                    error_report("error while reading block status of sector %"
>> +                                 PRId64 ": %s", sector_num - bs_offset,
>> +                                 strerror(-ret));
>>                       goto out;
>>                   }
>> -                if (!ret) {
>> +                /* If the output image is zero initialized, we are not working
>> +                 * on a shared base and the input is zero we can skip the next
>> +                 * n1 sectors */
>> +                if (has_zero_init && !out_baseimg && ret & BDRV_BLOCK_ZERO) {
> has_zero_init will imply !out_baseimg if skip_create == false.  Should
> we care and check out_baseimg separately if skip_create == true?  If
> not, you can remove "&& !out_baseimg".
I would leave it in it doesn't hurt.
>
> Also, please add parentheses around "ret & BDRV_BLOCK_ZERO".
Ok.
>
>>                       sector_num += n1;
>>                       continue;
>>                   }
>> -                /* The next 'n1' sectors are allocated in the input image. Copy
>> -                   only those as they may be followed by unallocated sectors. */
>> -                n = n1;
>> +                /* If the output image is being created as a copy on write
>> +                 * image, assume that sectors which are unallocated in the
>> +                 * input image are present in both the output's and input's
>> +                 * base images (no need to copy them). */
>> +                if (out_baseimg) {
>> +                    if (!(ret & BDRV_BLOCK_DATA)) {
>> +                        sector_num += n1;
>> +                        continue;
>> +                    }
>> +                    /* The next 'n1' sectors are allocated in the input image.
>> +                     * Copy only those as they may be followed by unallocated
>> +                     * sectors. */
>> +                    nb_sectors = n1;
>> +                }
>> +                /* avoid redundant callouts to get_block_status */
>> +                sector_num_next_status = sector_num + n1;
>> +            }
>> +
>> +            if (nb_sectors >= (IO_BUF_SIZE / 512)) {
>> +                n = (IO_BUF_SIZE / 512);
>>               } else {
>> -                n1 = n;
>> +                n = nb_sectors;
>>               }
>>   
>> +            if (n > bs_offset + bs_sectors - sector_num) {
>> +                n = bs_offset + bs_sectors - sector_num;
>> +            }
>> +            n1 = n;
> Please change this to:
>
> n = MIN(nb_sectors, IO_BUF_SIZE / 512);
> n = MIN(n, bs_sectors - (sector_num - bs_offset));
> n1 = n;

Thats just copy and paste, I will change it.

I was thinking if it would be a good improvement (separate patch) to
scan the whole source image for holes and account only allocated sectors
in the progress display. It would be much more accurate then.

Peter
>
> Otherwise looks good.
>
> Thanks,
>
> Paolo
>
>>               ret = bdrv_read(bs[bs_i], sector_num - bs_offset, buf, n);
>>               if (ret < 0) {
>>                   error_report("error while reading sector %" PRId64 ": %s",
>> @@ -1559,10 +1567,13 @@ static int img_convert(int argc, char **argv)
>>                   n -= n1;
>>                   buf1 += n1 * 512;
>>               }
>> -            qemu_progress_print(local_progress, 100);
>> +            qemu_progress_print(100.0 * sector_num / total_sectors, 0);
>>           }
>>       }
>>   out:
>> +    if (!ret) {
>> +        qemu_progress_print(100, 0);
>> +    }
>>       qemu_progress_end();
>>       free_option_parameters(create_options);
>>       free_option_parameters(param);
>>
diff mbox

Patch

diff --git a/qemu-img.c b/qemu-img.c
index dc0c2f0..efb744c 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1125,13 +1125,15 @@  out3:
 
 static int img_convert(int argc, char **argv)
 {
-    int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size,
+    int c, n, n1, bs_n, bs_i, compress, cluster_size,
         cluster_sectors, skip_create;
+    int64_t ret = 0;
     int progress = 0, flags;
     const char *fmt, *out_fmt, *cache, *out_baseimg, *out_filename;
     BlockDriver *drv, *proto_drv;
     BlockDriverState **bs = NULL, *out_bs = NULL;
-    int64_t total_sectors, nb_sectors, sector_num, bs_offset;
+    int64_t total_sectors, nb_sectors, sector_num, bs_offset,
+            sector_num_next_status = 0;
     uint64_t bs_sectors;
     uint8_t * buf = NULL;
     const uint8_t *buf1;
@@ -1140,7 +1142,6 @@  static int img_convert(int argc, char **argv)
     QEMUOptionParameter *out_baseimg_param;
     char *options = NULL;
     const char *snapshot_name = NULL;
-    float local_progress = 0;
     int min_sparse = 8; /* Need at least 4k of zeros for sparse detection */
     bool quiet = false;
     Error *local_err = NULL;
@@ -1403,10 +1404,6 @@  static int img_convert(int argc, char **argv)
         sector_num = 0;
 
         nb_sectors = total_sectors;
-        if (nb_sectors != 0) {
-            local_progress = (float)100 /
-                (nb_sectors / MIN(nb_sectors, cluster_sectors));
-        }
 
         for(;;) {
             int64_t bs_num;
@@ -1464,7 +1461,7 @@  static int img_convert(int argc, char **argv)
                 }
             }
             sector_num += n;
-            qemu_progress_print(local_progress, 100);
+            qemu_progress_print(100.0 * sector_num / total_sectors, 0);
         }
         /* signal EOF to align */
         bdrv_write_compressed(out_bs, 0, NULL, 0);
@@ -1481,21 +1478,13 @@  static int img_convert(int argc, char **argv)
 
         sector_num = 0; // total number of sectors converted so far
         nb_sectors = total_sectors - sector_num;
-        if (nb_sectors != 0) {
-            local_progress = (float)100 /
-                (nb_sectors / MIN(nb_sectors, IO_BUF_SIZE / 512));
-        }
 
         for(;;) {
             nb_sectors = total_sectors - sector_num;
             if (nb_sectors <= 0) {
+                ret = 0;
                 break;
             }
-            if (nb_sectors >= (IO_BUF_SIZE / 512)) {
-                n = (IO_BUF_SIZE / 512);
-            } else {
-                n = nb_sectors;
-            }
 
             while (sector_num - bs_offset >= bs_sectors) {
                 bs_i ++;
@@ -1507,34 +1496,53 @@  static int img_convert(int argc, char **argv)
                    sector_num, bs_i, bs_offset, bs_sectors); */
             }
 
-            if (n > bs_offset + bs_sectors - sector_num) {
-                n = bs_offset + bs_sectors - sector_num;
-            }
-
-            /* If the output image is being created as a copy on write image,
-               assume that sectors which are unallocated in the input image
-               are present in both the output's and input's base images (no
-               need to copy them). */
-            if (out_baseimg) {
-                ret = bdrv_is_allocated(bs[bs_i], sector_num - bs_offset,
-                                        n, &n1);
+            if ((out_baseimg || has_zero_init) &&
+                sector_num >= sector_num_next_status) {
+                n = nb_sectors > INT_MAX ? INT_MAX : nb_sectors;
+                ret = bdrv_get_block_status(bs[bs_i], sector_num - bs_offset,
+                                            n, &n1);
                 if (ret < 0) {
-                    error_report("error while reading metadata for sector "
-                                 "%" PRId64 ": %s",
-                                 sector_num - bs_offset, strerror(-ret));
+                    error_report("error while reading block status of sector %"
+                                 PRId64 ": %s", sector_num - bs_offset,
+                                 strerror(-ret));
                     goto out;
                 }
-                if (!ret) {
+                /* If the output image is zero initialized, we are not working
+                 * on a shared base and the input is zero we can skip the next
+                 * n1 sectors */
+                if (has_zero_init && !out_baseimg && ret & BDRV_BLOCK_ZERO) {
                     sector_num += n1;
                     continue;
                 }
-                /* The next 'n1' sectors are allocated in the input image. Copy
-                   only those as they may be followed by unallocated sectors. */
-                n = n1;
+                /* If the output image is being created as a copy on write
+                 * image, assume that sectors which are unallocated in the
+                 * input image are present in both the output's and input's
+                 * base images (no need to copy them). */
+                if (out_baseimg) {
+                    if (!(ret & BDRV_BLOCK_DATA)) {
+                        sector_num += n1;
+                        continue;
+                    }
+                    /* The next 'n1' sectors are allocated in the input image.
+                     * Copy only those as they may be followed by unallocated
+                     * sectors. */
+                    nb_sectors = n1;
+                }
+                /* avoid redundant callouts to get_block_status */
+                sector_num_next_status = sector_num + n1;
+            }
+
+            if (nb_sectors >= (IO_BUF_SIZE / 512)) {
+                n = (IO_BUF_SIZE / 512);
             } else {
-                n1 = n;
+                n = nb_sectors;
             }
 
+            if (n > bs_offset + bs_sectors - sector_num) {
+                n = bs_offset + bs_sectors - sector_num;
+            }
+
+            n1 = n;
             ret = bdrv_read(bs[bs_i], sector_num - bs_offset, buf, n);
             if (ret < 0) {
                 error_report("error while reading sector %" PRId64 ": %s",
@@ -1559,10 +1567,13 @@  static int img_convert(int argc, char **argv)
                 n -= n1;
                 buf1 += n1 * 512;
             }
-            qemu_progress_print(local_progress, 100);
+            qemu_progress_print(100.0 * sector_num / total_sectors, 0);
         }
     }
 out:
+    if (!ret) {
+        qemu_progress_print(100, 0);
+    }
     qemu_progress_end();
     free_option_parameters(create_options);
     free_option_parameters(param);