diff mbox

[v5,06/26] qcow2: Use 64 bits for refcount values

Message ID 1418647857-3589-7-git-send-email-mreitz@redhat.com
State New
Headers show

Commit Message

Max Reitz Dec. 15, 2014, 12:50 p.m. UTC
Refcounts may have a width of up to 64 bits, so qemu should use the same
width to represent refcount values internally.

Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c  |  2 +-
 block/qcow2-refcount.c | 46 ++++++++++++++++++++++------------------------
 block/qcow2.h          |  4 ++--
 3 files changed, 25 insertions(+), 27 deletions(-)

Comments

Eric Blake Jan. 22, 2015, 3:35 p.m. UTC | #1
On 12/15/2014 05:50 AM, Max Reitz wrote:
> Refcounts may have a width of up to 64 bits, so qemu should use the same
> width to represent refcount values internally.
> 
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> ---
>  block/qcow2-cluster.c  |  2 +-
>  block/qcow2-refcount.c | 46 ++++++++++++++++++++++------------------------
>  block/qcow2.h          |  4 ++--
>  3 files changed, 25 insertions(+), 27 deletions(-)
> 

Reviewed-by: Eric Blake <eblake@redhat.com>
Kevin Wolf Feb. 3, 2015, 7:26 p.m. UTC | #2
Am 15.12.2014 um 13:50 hat Max Reitz geschrieben:
> Refcounts may have a width of up to 64 bits, so qemu should use the same
> width to represent refcount values internally.
> 
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> ---
>  block/qcow2-cluster.c  |  2 +-
>  block/qcow2-refcount.c | 46 ++++++++++++++++++++++------------------------
>  block/qcow2.h          |  4 ++--
>  3 files changed, 25 insertions(+), 27 deletions(-)

> @@ -897,11 +895,10 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
>      int64_t l1_table_offset, int l1_size, int addend)
>  {

Your leaving addend an int here...

>      BDRVQcowState *s = bs->opaque;
> -    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2;
> +    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, refcount;
>      bool l1_allocated = false;
>      int64_t old_offset, old_l2_offset;
>      int i, j, l1_modified = 0, nb_csectors;
> -    uint16_t refcount;
>      int ret;
>  
>      l2_table = NULL;
> @@ -968,7 +965,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
>                          if (addend != 0) {
>                              ret = update_refcount(bs,
>                                  (offset & s->cluster_offset_mask) & ~511,
> -                                nb_csectors * 512, abs(addend), addend < 0,
> +                                nb_csectors * 512, imaxabs(addend), addend < 0,
>                                  QCOW2_DISCARD_SNAPSHOT);
>                              if (ret < 0) {
>                                  goto fail;
> @@ -999,7 +996,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
>                          }
>                          if (addend != 0) {
>                              ret = qcow2_update_cluster_refcount(bs,
> -                                    cluster_index, abs(addend), addend < 0,
> +                                    cluster_index, imaxabs(addend), addend < 0,
>                                      QCOW2_DISCARD_SNAPSHOT);
>                              if (ret < 0) {
>                                  goto fail;
> @@ -1042,7 +1039,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
>              if (addend != 0) {
>                  ret = qcow2_update_cluster_refcount(bs, l2_offset >>
>                                                          s->cluster_bits,
> -                                                    abs(addend), addend < 0,
> +                                                    imaxabs(addend), addend < 0,
>                                                      QCOW2_DISCARD_SNAPSHOT);
>                  if (ret < 0) {
>                      goto fail;

...but still replace abs() by imaxabs(). Did you intend to convert
addend or why this change?

> @@ -1658,7 +1655,7 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>  {
>      BDRVQcowState *s = bs->opaque;
>      int64_t i;
> -    uint16_t refcount1, refcount2;
> +    uint64_t refcount1, refcount2;
>      int ret;
>  
>      for (i = 0, *highest_cluster = 0; i < nb_clusters; i++) {
> @@ -1687,7 +1684,8 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>                  num_fixed = &res->corruptions_fixed;
>              }
>  
> -            fprintf(stderr, "%s cluster %" PRId64 " refcount=%d reference=%d\n",
> +            fprintf(stderr, "%s cluster %" PRId64 " refcount=%" PRIu64
> +                    " reference=%" PRIu64 "\n",
>                     num_fixed != NULL     ? "Repairing" :
>                     refcount1 < refcount2 ? "ERROR" :
>                                             "Leaked",
> @@ -1695,7 +1693,7 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>  
>              if (num_fixed) {
>                  ret = update_refcount(bs, i << s->cluster_bits, 1,
> -                                      abs(refcount2 - refcount1),
> +                                      imaxabs(refcount2 - refcount1),
>                                        refcount1 > refcount2,

Hope I got that right. Here's my analysis:

Before: refcount{1,2} were both uint16_t. Promoted to int for the
subtraction. Therefore a negative result could occur. abs() takes the
absolute value and the sign is passed separately.

After: refcount{1,2} are both uint64_t. No integer promotion happens, we
perform an unsigned subtraction. The separate passed sign is okay. For
the absolute value, there are two cases:

1. refcount2 >= refcount1: No overflow occurs, everything fine.

2. refcount2 < refcount1: (refcount2 - refcount1) wraps around, but is
   still an uint64_t. imaxabs() takes an intmax_t, which is signed. The
   conversion is implementation defined, but let's assume the obvious
   one. imaxabs() has two cases again:

   diff := refcount2 - refcount1 + UINT64_MAX

   a. diff > INTMAX_MAX:
      We get diff converted back to signed, which undoes the wraparound.
      The absolute value of the signed difference is:

        -(refcount2 - refcount1) = refcount1 - refcount2

      This is what we wanted. Good.

   b. diff <= INTMAX_MAX:
      diff is again converted back to signed, however its value is
      unchanged because diff can be represented by intmax_t. This is a
      positive value, so taking the absolute value changes nothing.

      This is _not_ refcount1 - refcount2!

I suggest using a function that calculates the absolute value of the
difference of two unsigned values the naive way with an if statement.
Gets us rid of the implementation defined conversion, too.

>                                        QCOW2_DISCARD_ALWAYS);
>                  if (ret >= 0) {

Kevin
Max Reitz Feb. 3, 2015, 7:48 p.m. UTC | #3
On 2015-02-03 at 14:26, Kevin Wolf wrote:
> Am 15.12.2014 um 13:50 hat Max Reitz geschrieben:
>> Refcounts may have a width of up to 64 bits, so qemu should use the same
>> width to represent refcount values internally.
>>
>> Signed-off-by: Max Reitz <mreitz@redhat.com>
>> ---
>>   block/qcow2-cluster.c  |  2 +-
>>   block/qcow2-refcount.c | 46 ++++++++++++++++++++++------------------------
>>   block/qcow2.h          |  4 ++--
>>   3 files changed, 25 insertions(+), 27 deletions(-)
>> @@ -897,11 +895,10 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
>>       int64_t l1_table_offset, int l1_size, int addend)
>>   {
> Your leaving addend an int here...
>
>>       BDRVQcowState *s = bs->opaque;
>> -    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2;
>> +    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, refcount;
>>       bool l1_allocated = false;
>>       int64_t old_offset, old_l2_offset;
>>       int i, j, l1_modified = 0, nb_csectors;
>> -    uint16_t refcount;
>>       int ret;
>>   
>>       l2_table = NULL;
>> @@ -968,7 +965,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
>>                           if (addend != 0) {
>>                               ret = update_refcount(bs,
>>                                   (offset & s->cluster_offset_mask) & ~511,
>> -                                nb_csectors * 512, abs(addend), addend < 0,
>> +                                nb_csectors * 512, imaxabs(addend), addend < 0,
>>                                   QCOW2_DISCARD_SNAPSHOT);
>>                               if (ret < 0) {
>>                                   goto fail;
>> @@ -999,7 +996,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
>>                           }
>>                           if (addend != 0) {
>>                               ret = qcow2_update_cluster_refcount(bs,
>> -                                    cluster_index, abs(addend), addend < 0,
>> +                                    cluster_index, imaxabs(addend), addend < 0,
>>                                       QCOW2_DISCARD_SNAPSHOT);
>>                               if (ret < 0) {
>>                                   goto fail;
>> @@ -1042,7 +1039,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
>>               if (addend != 0) {
>>                   ret = qcow2_update_cluster_refcount(bs, l2_offset >>
>>                                                           s->cluster_bits,
>> -                                                    abs(addend), addend < 0,
>> +                                                    imaxabs(addend), addend < 0,
>>                                                       QCOW2_DISCARD_SNAPSHOT);
>>                   if (ret < 0) {
>>                       goto fail;
> ...but still replace abs() by imaxabs(). Did you intend to convert
> addend or why this change?

Mechanical replacement of every abs(addend) most likely.

Considering that qcow2_update_snapshot_refcount() is only called with 
@addend \in { -1, 0, 1 }, it doesn't seem to make any technical sense to 
convert @addend to something else than an int; and thus it doesn't make 
any sense to use imaxabs() instead of abs() (although it doesn't hurt, 
it just looks bad). Also, if I were to convert 
qcow2_update_snapshot_refcount() to the "full 64 bit difference 
interface", I'd need an additional argument for the sign of the addend.

Therefore, I'll drop the imaxabs() hunks for 
qcow2_update_snapshot_refcount(), if you're fine with that.

>> @@ -1658,7 +1655,7 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>>   {
>>       BDRVQcowState *s = bs->opaque;
>>       int64_t i;
>> -    uint16_t refcount1, refcount2;
>> +    uint64_t refcount1, refcount2;
>>       int ret;
>>   
>>       for (i = 0, *highest_cluster = 0; i < nb_clusters; i++) {
>> @@ -1687,7 +1684,8 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>>                   num_fixed = &res->corruptions_fixed;
>>               }
>>   
>> -            fprintf(stderr, "%s cluster %" PRId64 " refcount=%d reference=%d\n",
>> +            fprintf(stderr, "%s cluster %" PRId64 " refcount=%" PRIu64
>> +                    " reference=%" PRIu64 "\n",
>>                      num_fixed != NULL     ? "Repairing" :
>>                      refcount1 < refcount2 ? "ERROR" :
>>                                              "Leaked",
>> @@ -1695,7 +1693,7 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
>>   
>>               if (num_fixed) {
>>                   ret = update_refcount(bs, i << s->cluster_bits, 1,
>> -                                      abs(refcount2 - refcount1),
>> +                                      imaxabs(refcount2 - refcount1),
>>                                         refcount1 > refcount2,
> Hope I got that right. Here's my analysis:
>
> Before: refcount{1,2} were both uint16_t. Promoted to int for the
> subtraction. Therefore a negative result could occur. abs() takes the
> absolute value and the sign is passed separately.
>
> After: refcount{1,2} are both uint64_t. No integer promotion happens, we
> perform an unsigned subtraction. The separate passed sign is okay. For
> the absolute value, there are two cases:
>
> 1. refcount2 >= refcount1: No overflow occurs, everything fine.
>
> 2. refcount2 < refcount1: (refcount2 - refcount1) wraps around, but is
>     still an uint64_t. imaxabs() takes an intmax_t, which is signed. The
>     conversion is implementation defined, but let's assume the obvious
>     one. imaxabs() has two cases again:
>
>     diff := refcount2 - refcount1 + UINT64_MAX

Actually it's + UINT64_MAX + 1, but it doesn't matter for the point 
you're making.

>     a. diff > INTMAX_MAX:
>        We get diff converted back to signed, which undoes the wraparound.
>        The absolute value of the signed difference is:
>
>          -(refcount2 - refcount1) = refcount1 - refcount2
>
>        This is what we wanted. Good.
>
>     b. diff <= INTMAX_MAX:
>        diff is again converted back to signed, however its value is
>        unchanged because diff can be represented by intmax_t. This is a
>        positive value, so taking the absolute value changes nothing.
>
>        This is _not_ refcount1 - refcount2!

You're completely right. Actually, I won absolutely nothing by 
separating the sign if using imaxabs() because the latter will only 
return values in 0 .. 2^63 - 1, which makes it (using imaxabs()) a very 
bad idea in the first place.

> I suggest using a function that calculates the absolute value of the
> difference of two unsigned values the naive way with an if statement.
> Gets us rid of the implementation defined conversion, too.

Indeed, will do. Thanks!

Max

>>                                         QCOW2_DISCARD_ALWAYS);
>>                   if (ret >= 0) {
> Kevin
diff mbox

Patch

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index b664d2f..a86bb5e 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1640,7 +1640,7 @@  static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
     for (i = 0; i < l1_size; i++) {
         uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK;
         bool l2_dirty = false;
-        uint16_t l2_refcount;
+        uint64_t l2_refcount;
 
         if (!l2_offset) {
             /* unallocated */
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 99a3cce..0308a7e 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -29,7 +29,7 @@ 
 
 static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size);
 static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
-                            int64_t offset, int64_t length, uint16_t addend,
+                            int64_t offset, int64_t length, uint64_t addend,
                             bool decrease, enum qcow2_discard_type type);
 
 
@@ -91,7 +91,7 @@  static int load_refcount_block(BlockDriverState *bs,
  * *refcount. Returns 0 on success and -errno on failure.
  */
 int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
-                       uint16_t *refcount)
+                       uint64_t *refcount)
 {
     BDRVQcowState *s = bs->opaque;
     uint64_t refcount_table_index, block_index;
@@ -535,7 +535,7 @@  found:
 static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
                                                    int64_t offset,
                                                    int64_t length,
-                                                   uint16_t addend,
+                                                   uint64_t addend,
                                                    bool decrease,
                                                    enum qcow2_discard_type type)
 {
@@ -547,7 +547,7 @@  static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
 
 #ifdef DEBUG_ALLOC2
     fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64
-            " addend=%s%" PRIu16 "\n", offset, length, decrease ? "-" : "",
+            " addend=%s%" PRIu64 "\n", offset, length, decrease ? "-" : "",
             addend);
 #endif
     if (length < 0) {
@@ -567,7 +567,7 @@  static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
         cluster_offset += s->cluster_size)
     {
         int block_index;
-        uint16_t refcount;
+        uint64_t refcount;
         int64_t cluster_index = cluster_offset >> s->cluster_bits;
         int64_t table_index = cluster_index >> s->refcount_block_bits;
 
@@ -594,9 +594,9 @@  static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
         block_index = cluster_index & (s->refcount_block_size - 1);
 
         refcount = be16_to_cpu(refcount_block[block_index]);
-        if (decrease ? ((uint16_t)(refcount - addend) > refcount)
-                     : ((uint16_t)(refcount + addend) < refcount ||
-                        (uint16_t)(refcount + addend) > s->refcount_max))
+        if (decrease ? (refcount - addend > refcount)
+                     : (refcount + addend < refcount ||
+                        refcount + addend > s->refcount_max))
         {
             ret = -EINVAL;
             goto fail;
@@ -656,7 +656,7 @@  fail:
  */
 int qcow2_update_cluster_refcount(BlockDriverState *bs,
                                   int64_t cluster_index,
-                                  uint16_t addend, bool decrease,
+                                  uint64_t addend, bool decrease,
                                   enum qcow2_discard_type type)
 {
     BDRVQcowState *s = bs->opaque;
@@ -682,8 +682,7 @@  int qcow2_update_cluster_refcount(BlockDriverState *bs,
 static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size)
 {
     BDRVQcowState *s = bs->opaque;
-    uint64_t i, nb_clusters;
-    uint16_t refcount;
+    uint64_t i, nb_clusters, refcount;
     int ret;
 
     nb_clusters = size_to_clusters(s, size);
@@ -741,9 +740,8 @@  int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
     int nb_clusters)
 {
     BDRVQcowState *s = bs->opaque;
-    uint64_t cluster_index;
+    uint64_t cluster_index, refcount;
     uint64_t i;
-    uint16_t refcount;
     int ret;
 
     assert(nb_clusters >= 0);
@@ -897,11 +895,10 @@  int qcow2_update_snapshot_refcount(BlockDriverState *bs,
     int64_t l1_table_offset, int l1_size, int addend)
 {
     BDRVQcowState *s = bs->opaque;
-    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2;
+    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, refcount;
     bool l1_allocated = false;
     int64_t old_offset, old_l2_offset;
     int i, j, l1_modified = 0, nb_csectors;
-    uint16_t refcount;
     int ret;
 
     l2_table = NULL;
@@ -968,7 +965,7 @@  int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                         if (addend != 0) {
                             ret = update_refcount(bs,
                                 (offset & s->cluster_offset_mask) & ~511,
-                                nb_csectors * 512, abs(addend), addend < 0,
+                                nb_csectors * 512, imaxabs(addend), addend < 0,
                                 QCOW2_DISCARD_SNAPSHOT);
                             if (ret < 0) {
                                 goto fail;
@@ -999,7 +996,7 @@  int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                         }
                         if (addend != 0) {
                             ret = qcow2_update_cluster_refcount(bs,
-                                    cluster_index, abs(addend), addend < 0,
+                                    cluster_index, imaxabs(addend), addend < 0,
                                     QCOW2_DISCARD_SNAPSHOT);
                             if (ret < 0) {
                                 goto fail;
@@ -1042,7 +1039,7 @@  int qcow2_update_snapshot_refcount(BlockDriverState *bs,
             if (addend != 0) {
                 ret = qcow2_update_cluster_refcount(bs, l2_offset >>
                                                         s->cluster_bits,
-                                                    abs(addend), addend < 0,
+                                                    imaxabs(addend), addend < 0,
                                                     QCOW2_DISCARD_SNAPSHOT);
                 if (ret < 0) {
                     goto fail;
@@ -1368,7 +1365,7 @@  static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
     BDRVQcowState *s = bs->opaque;
     uint64_t *l2_table = qemu_blockalign(bs, s->cluster_size);
     int ret;
-    uint16_t refcount;
+    uint64_t refcount;
     int i, j;
 
     for (i = 0; i < s->l1_size; i++) {
@@ -1388,7 +1385,7 @@  static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
         }
         if ((refcount == 1) != ((l1_entry & QCOW_OFLAG_COPIED) != 0)) {
             fprintf(stderr, "%s OFLAG_COPIED L2 cluster: l1_index=%d "
-                    "l1_entry=%" PRIx64 " refcount=%d\n",
+                    "l1_entry=%" PRIx64 " refcount=%" PRIu64 "\n",
                     fix & BDRV_FIX_ERRORS ? "Repairing" :
                                             "ERROR",
                     i, l1_entry, refcount);
@@ -1432,7 +1429,7 @@  static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
                 }
                 if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
                     fprintf(stderr, "%s OFLAG_COPIED data cluster: "
-                            "l2_entry=%" PRIx64 " refcount=%d\n",
+                            "l2_entry=%" PRIx64 " refcount=%" PRIu64 "\n",
                             fix & BDRV_FIX_ERRORS ? "Repairing" :
                                                     "ERROR",
                             l2_entry, refcount);
@@ -1658,7 +1655,7 @@  static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
 {
     BDRVQcowState *s = bs->opaque;
     int64_t i;
-    uint16_t refcount1, refcount2;
+    uint64_t refcount1, refcount2;
     int ret;
 
     for (i = 0, *highest_cluster = 0; i < nb_clusters; i++) {
@@ -1687,7 +1684,8 @@  static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
                 num_fixed = &res->corruptions_fixed;
             }
 
-            fprintf(stderr, "%s cluster %" PRId64 " refcount=%d reference=%d\n",
+            fprintf(stderr, "%s cluster %" PRId64 " refcount=%" PRIu64
+                    " reference=%" PRIu64 "\n",
                    num_fixed != NULL     ? "Repairing" :
                    refcount1 < refcount2 ? "ERROR" :
                                            "Leaked",
@@ -1695,7 +1693,7 @@  static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
 
             if (num_fixed) {
                 ret = update_refcount(bs, i << s->cluster_bits, 1,
-                                      abs(refcount2 - refcount1),
+                                      imaxabs(refcount2 - refcount1),
                                       refcount1 > refcount2,
                                       QCOW2_DISCARD_ALWAYS);
                 if (ret >= 0) {
diff --git a/block/qcow2.h b/block/qcow2.h
index 0240ee8..adf515c 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -490,10 +490,10 @@  int qcow2_refcount_init(BlockDriverState *bs);
 void qcow2_refcount_close(BlockDriverState *bs);
 
 int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
-                       uint16_t *refcount);
+                       uint64_t *refcount);
 
 int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
-                                  uint16_t addend, bool decrease,
+                                  uint64_t addend, bool decrease,
                                   enum qcow2_discard_type type);
 
 int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size);