diff mbox series

[v6,4/5] parallels: Add checking and repairing duplicate offsets in BAT

Message ID 20230621082010.139195-5-alexander.ivanov@virtuozzo.com
State New
Headers show
Series parallels: Add duplication check, repair at open, fix bugs | expand

Commit Message

Alexander Ivanov June 21, 2023, 8:20 a.m. UTC
Cluster offsets must be unique among all the BAT entries. Find duplicate
offsets in the BAT and fix it by copying the content of the relevant
cluster to a newly allocated cluster and set the new cluster offset to the
duplicated entry.

Add host_cluster_index() helper to deduplicate the code.

When new clusters are allocated, the file size increases by 128 Mb. Call
parallels_check_leak() to fix this leak.

Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
---
 block/parallels.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 142 insertions(+)

Comments

Denis V. Lunev June 21, 2023, 2:42 p.m. UTC | #1
On 6/21/23 10:20, Alexander Ivanov wrote:
> Cluster offsets must be unique among all the BAT entries. Find duplicate
> offsets in the BAT and fix it by copying the content of the relevant
> cluster to a newly allocated cluster and set the new cluster offset to the
> duplicated entry.
>
> Add host_cluster_index() helper to deduplicate the code.
>
> When new clusters are allocated, the file size increases by 128 Mb. Call
> parallels_check_leak() to fix this leak.
>
> Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
> ---
>   block/parallels.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 142 insertions(+)
>
> diff --git a/block/parallels.c b/block/parallels.c
> index 78a34bd667..d507fe7d90 100644
> --- a/block/parallels.c
> +++ b/block/parallels.c
> @@ -136,6 +136,12 @@ static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num,
>       return MIN(nb_sectors, ret);
>   }
>   
> +static uint32_t host_cluster_index(BDRVParallelsState *s, int64_t off)
> +{
> +    off -= le32_to_cpu(s->header->data_off) << BDRV_SECTOR_BITS;
According to


> +    return off / s->cluster_size;
> +}
> +
>   static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
>                               int nb_sectors, int *pnum)
>   {
> @@ -528,6 +534,137 @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
>       return 0;
>   }
>   
> +static int parallels_check_duplicate(BlockDriverState *bs,
> +                                     BdrvCheckResult *res,
> +                                     BdrvCheckMode fix)
> +{
> +    BDRVParallelsState *s = bs->opaque;
> +    int64_t off, sector;
> +    unsigned long *bitmap;
> +    uint32_t i, bitmap_size, cluster_index, old_offset;
> +    int n, ret = 0;
> +    uint64_t *buf = NULL;
> +    bool fixed = false;
> +
> +    /*
> +     * Create a bitmap of used clusters.
> +     * If a bit is set, there is a BAT entry pointing to this cluster.
> +     * Loop through the BAT entries, check bits relevant to an entry offset.
> +     * If bit is set, this entry is duplicated. Otherwise set the bit.
> +     *
> +     * We shouldn't worry about newly allocated clusters outside the image
> +     * because they are created higher then any existing cluster pointed by
> +     * a BAT entry.
> +     */
> +    bitmap_size = host_cluster_index(s, res->image_end_offset);
> +    if (bitmap_size == 0) {
> +        return 0;
> +    }
> +
> +    bitmap = bitmap_new(bitmap_size);
> +
> +    buf = qemu_blockalign(bs, s->cluster_size);
> +
> +    for (i = 0; i < s->bat_size; i++) {
> +        off = bat2sect(s, i) << BDRV_SECTOR_BITS;
> +        if (off == 0) {
> +            continue;
> +        }
> +
> +        cluster_index = host_cluster_index(s, off);
> +        assert(cluster_index < bitmap_size);
> +        if (test_bit(cluster_index, bitmap)) {
> +            /* this cluster duplicates another one */
> +            fprintf(stderr,
> +                    "%s duplicate offset in BAT entry %u\n",
> +                    fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
> +
> +            res->corruptions++;
> +
> +            if (fix & BDRV_FIX_ERRORS) {
> +                /*
> +                 * Reset the entry and allocate a new cluster
> +                 * for the relevant guest offset. In this way we let
> +                 * the lower layer to place the new cluster properly.
> +                 * Copy the original cluster to the allocated one.
> +                 * But before save the old offset value for repairing
> +                 * if we have an error.
> +                 */
> +                old_offset = le32_to_cpu(s->bat_bitmap[i]);
> +                parallels_set_bat_entry(s, i, 0);
> +
> +                ret = bdrv_co_pread(bs->file, off, s->cluster_size, buf, 0);
> +                if (ret < 0) {
> +                    res->check_errors++;
> +                    goto out_repare_bat;
> +                }
> +
> +                sector = i * (int64_t)s->tracks;
> +                sector = allocate_clusters(bs, sector, s->tracks, &n);
> +                if (sector < 0) {
> +                    res->check_errors++;
> +                    ret = sector;
> +                    goto out_repare_bat;
> +                }
> +                off = sector << BDRV_SECTOR_BITS;
> +
> +                ret = bdrv_co_pwrite(bs->file, off, s->cluster_size, buf, 0);
> +                if (ret < 0) {
> +                    res->check_errors++;
> +                    goto out_repare_bat;
> +                }
> +
> +                if (off + s->cluster_size > res->image_end_offset) {
> +                    res->image_end_offset = off + s->cluster_size;
> +                }
> +
> +                /*
> +                 * In the future allocate_cluster() will reuse holed offsets
> +                 * inside the image. Keep the used clusters bitmap content
> +                 * consistent for the new allocated clusters too.
> +                 *
> +                 * Note, clusters allocated outside the current image are not
> +                 * considered, and the bitmap size doesn't change.
> +                 */
> +                cluster_index = host_cluster_index(s, off);
> +                if (cluster_index < bitmap_size) {
> +                    bitmap_set(bitmap, cluster_index, 1);
> +                }
> +
> +                fixed = true;
> +                res->corruptions_fixed++;
> +            }
> +        } else {
> +            bitmap_set(bitmap, cluster_index, 1);
> +        }
> +    }
> +
> +    if (fixed) {
> +        /*
> +         * When new clusters are allocated, the file size increases by
> +         * 128 Mb. We need to truncate the file to the right size. Let
> +         * the leak fix code make its job without res changing.
> +         */
> +        ret = parallels_check_leak(bs, res, fix, false);
> +        if (ret < 0) {
> +            goto out;
> +        }
> +    }
> +    goto out;
> +
> +/*
> + * We can get here only from places where index and old_offset have
> + * meaningful values.
> + */
> +out_repare_bat:
> +    s->bat_bitmap[i] = cpu_to_le32(old_offset);
> +
> +out:
> +    g_free(buf);
> +    g_free(bitmap);
> +    return ret;
> +}
> +
>   static void parallels_collect_statistics(BlockDriverState *bs,
>                                            BdrvCheckResult *res,
>                                            BdrvCheckMode fix)
> @@ -579,6 +716,11 @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
>               return ret;
>           }
>   
> +        ret = parallels_check_duplicate(bs, res, fix);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
>           parallels_collect_statistics(bs, res, fix);
>       }
>
Denis V. Lunev June 21, 2023, 3:30 p.m. UTC | #2
On 6/21/23 10:20, Alexander Ivanov wrote:
> Cluster offsets must be unique among all the BAT entries. Find duplicate
> offsets in the BAT and fix it by copying the content of the relevant
> cluster to a newly allocated cluster and set the new cluster offset to the
> duplicated entry.
>
> Add host_cluster_index() helper to deduplicate the code.
>
> When new clusters are allocated, the file size increases by 128 Mb. Call
> parallels_check_leak() to fix this leak.
>
> Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
> ---
>   block/parallels.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 142 insertions(+)
>
> diff --git a/block/parallels.c b/block/parallels.c
> index 78a34bd667..d507fe7d90 100644
> --- a/block/parallels.c
> +++ b/block/parallels.c
> @@ -136,6 +136,12 @@ static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num,
>       return MIN(nb_sectors, ret);
>   }
>   
> +static uint32_t host_cluster_index(BDRVParallelsState *s, int64_t off)
> +{
> +    off -= le32_to_cpu(s->header->data_off) << BDRV_SECTOR_BITS;
According to parallels_open()
     s->data_end = le32_to_cpu(ph.data_off);
     if (s->data_end == 0) {
         s->data_end = ROUND_UP(bat_entry_off(s->bat_size), 
BDRV_SECTOR_SIZE);
     }
     if (s->data_end < s->header_size) {
         /* there is not enough unused space to fit to block align 
between BAT
            and actual data. We can't avoid read-modify-write... */
         s->header_size = size;
     }
data_off can be 0. In that case the logic of conversion of
host offset is incorrect.

This does not break THIS code, but by the name of this
helper further code reuse could lead to mistakes in some
cases.

Side note. This function could result in a bitmap which would
be shorter by 1 if file length is not cluster aligned. There are
no such checks.

Side note2. It would be nice to validate data_off in advance
as follows from the specification. In all cases data_off should
be greater than bat_entry_off(s->bat_size) if non-zero.

   48 - 51:    data_off
               An offset, in sectors, from the start of the file to the 
start of
               the data area.

               For "WithoutFreeSpace" images:
               - If data_off is zero, the offset is calculated as the 
end of BAT
                 table plus some padding to ensure sector size alignment.
               - If data_off is non-zero, the offset should be aligned 
to sector
                 size. However it is recommended to align it to cluster 
size for
                 newly created images.

               For "WithouFreSpacExt" images:
               data_off must be non-zero and aligned to cluster size.

> +    return off / s->cluster_size;
> +}
> +
>   static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
>                               int nb_sectors, int *pnum)
>   {
> @@ -528,6 +534,137 @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
>       return 0;
>   }
>   
> +static int parallels_check_duplicate(BlockDriverState *bs,
> +                                     BdrvCheckResult *res,
> +                                     BdrvCheckMode fix)
> +{
> +    BDRVParallelsState *s = bs->opaque;
> +    int64_t off, sector;
> +    unsigned long *bitmap;
> +    uint32_t i, bitmap_size, cluster_index, old_offset;
> +    int n, ret = 0;
> +    uint64_t *buf = NULL;
> +    bool fixed = false;
> +
> +    /*
> +     * Create a bitmap of used clusters.
> +     * If a bit is set, there is a BAT entry pointing to this cluster.
> +     * Loop through the BAT entries, check bits relevant to an entry offset.
> +     * If bit is set, this entry is duplicated. Otherwise set the bit.
> +     *
> +     * We shouldn't worry about newly allocated clusters outside the image
> +     * because they are created higher then any existing cluster pointed by
> +     * a BAT entry.
> +     */
> +    bitmap_size = host_cluster_index(s, res->image_end_offset);
> +    if (bitmap_size == 0) {
> +        return 0;
> +    }
> +
> +    bitmap = bitmap_new(bitmap_size);
> +
> +    buf = qemu_blockalign(bs, s->cluster_size);
> +
> +    for (i = 0; i < s->bat_size; i++) {
> +        off = bat2sect(s, i) << BDRV_SECTOR_BITS;
> +        if (off == 0) {
> +            continue;
> +        }
> +
> +        cluster_index = host_cluster_index(s, off);
> +        assert(cluster_index < bitmap_size);
> +        if (test_bit(cluster_index, bitmap)) {
I would prefer to revert this if and next one and have a code like

if (test_bit(cluster_index, bitmap)) {
    
    bitmap_set(bitmap, cluster_index, 1);
    continue;
}
....
if (!(fix & BDRV_FIX_ERRORS)) {
    continue;
}

> +            /* this cluster duplicates another one */
> +            fprintf(stderr,
> +                    "%s duplicate offset in BAT entry %u\n",
> +                    fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
> +
> +            res->corruptions++;
> +
> +            if (fix & BDRV_FIX_ERRORS) {
> +                /*
> +                 * Reset the entry and allocate a new cluster
> +                 * for the relevant guest offset. In this way we let
> +                 * the lower layer to place the new cluster properly.
> +                 * Copy the original cluster to the allocated one.
> +                 * But before save the old offset value for repairing
> +                 * if we have an error.
> +                 */
> +                old_offset = le32_to_cpu(s->bat_bitmap[i]);
> +                parallels_set_bat_entry(s, i, 0);
> +
> +                ret = bdrv_co_pread(bs->file, off, s->cluster_size, buf, 0);
> +                if (ret < 0) {
> +                    res->check_errors++;
> +                    goto out_repare_bat;
> +                }
> +
> +                sector = i * (int64_t)s->tracks;
This is somehow counter intuitive. I vole for
     sector  = (i * s->cluster_size) >> BDRV_SECTOR_BITS;
This would be transparent for the rest of the code.

Anyway, we use tracks in other places. The question is
just about readability. I would not insist too much :)

> +                sector = allocate_clusters(bs, sector, s->tracks, &n);
It is better to avoid such code. There are "guest_sector" and "host_sector".
I would say that they should not be mixed. It is too dangerous.

Actually the same applies to the 'off' variable.

> +                if (sector < 0) {
> +                    res->check_errors++;
> +                    ret = sector;
> +                    goto out_repare_bat;
repair

> +                }
> +                off = sector << BDRV_SECTOR_BITS;
> +
> +                ret = bdrv_co_pwrite(bs->file, off, s->cluster_size, buf, 0);
> +                if (ret < 0) {
> +                    res->check_errors++;
> +                    goto out_repare_bat;
> +                }
> +
> +                if (off + s->cluster_size > res->image_end_offset) {
> +                    res->image_end_offset = off + s->cluster_size;
> +                }
> +
> +                /*
> +                 * In the future allocate_cluster() will reuse holed offsets
> +                 * inside the image. Keep the used clusters bitmap content
> +                 * consistent for the new allocated clusters too.
> +                 *
> +                 * Note, clusters allocated outside the current image are not
> +                 * considered, and the bitmap size doesn't change.
> +                 */
> +                cluster_index = host_cluster_index(s, off);
> +                if (cluster_index < bitmap_size) {
> +                    bitmap_set(bitmap, cluster_index, 1);
> +                }
> +
> +                fixed = true;
> +                res->corruptions_fixed++;
> +            }
> +        } else {
> +            bitmap_set(bitmap, cluster_index, 1);
> +        }
> +    }
> +
> +    if (fixed) {
> +        /*
> +         * When new clusters are allocated, the file size increases by
> +         * 128 Mb. We need to truncate the file to the right size. Let
> +         * the leak fix code make its job without res changing.
> +         */
> +        ret = parallels_check_leak(bs, res, fix, false);
> +        if (ret < 0) {
> +            goto out;
> +        }
> +    }
> +    goto out;
> +
> +/*
> + * We can get here only from places where index and old_offset have
> + * meaningful values.
> + */
> +out_repare_bat:
> +    s->bat_bitmap[i] = cpu_to_le32(old_offset);
it would be better to move out_repare_bat: below return to get jumps on 
error path only.
> +
> +out:
> +    g_free(buf);
> +    g_free(bitmap);
> +    return ret;
> +}
> +
>   static void parallels_collect_statistics(BlockDriverState *bs,
>                                            BdrvCheckResult *res,
>                                            BdrvCheckMode fix)
> @@ -579,6 +716,11 @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
>               return ret;
>           }
>   
> +        ret = parallels_check_duplicate(bs, res, fix);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
>           parallels_collect_statistics(bs, res, fix);
>       }
>
diff mbox series

Patch

diff --git a/block/parallels.c b/block/parallels.c
index 78a34bd667..d507fe7d90 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -136,6 +136,12 @@  static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num,
     return MIN(nb_sectors, ret);
 }
 
+static uint32_t host_cluster_index(BDRVParallelsState *s, int64_t off)
+{
+    off -= le32_to_cpu(s->header->data_off) << BDRV_SECTOR_BITS;
+    return off / s->cluster_size;
+}
+
 static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
                             int nb_sectors, int *pnum)
 {
@@ -528,6 +534,137 @@  parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
     return 0;
 }
 
+static int parallels_check_duplicate(BlockDriverState *bs,
+                                     BdrvCheckResult *res,
+                                     BdrvCheckMode fix)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int64_t off, sector;
+    unsigned long *bitmap;
+    uint32_t i, bitmap_size, cluster_index, old_offset;
+    int n, ret = 0;
+    uint64_t *buf = NULL;
+    bool fixed = false;
+
+    /*
+     * Create a bitmap of used clusters.
+     * If a bit is set, there is a BAT entry pointing to this cluster.
+     * Loop through the BAT entries, check bits relevant to an entry offset.
+     * If bit is set, this entry is duplicated. Otherwise set the bit.
+     *
+     * We shouldn't worry about newly allocated clusters outside the image
+     * because they are created higher then any existing cluster pointed by
+     * a BAT entry.
+     */
+    bitmap_size = host_cluster_index(s, res->image_end_offset);
+    if (bitmap_size == 0) {
+        return 0;
+    }
+
+    bitmap = bitmap_new(bitmap_size);
+
+    buf = qemu_blockalign(bs, s->cluster_size);
+
+    for (i = 0; i < s->bat_size; i++) {
+        off = bat2sect(s, i) << BDRV_SECTOR_BITS;
+        if (off == 0) {
+            continue;
+        }
+
+        cluster_index = host_cluster_index(s, off);
+        assert(cluster_index < bitmap_size);
+        if (test_bit(cluster_index, bitmap)) {
+            /* this cluster duplicates another one */
+            fprintf(stderr,
+                    "%s duplicate offset in BAT entry %u\n",
+                    fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
+
+            res->corruptions++;
+
+            if (fix & BDRV_FIX_ERRORS) {
+                /*
+                 * Reset the entry and allocate a new cluster
+                 * for the relevant guest offset. In this way we let
+                 * the lower layer to place the new cluster properly.
+                 * Copy the original cluster to the allocated one.
+                 * But before save the old offset value for repairing
+                 * if we have an error.
+                 */
+                old_offset = le32_to_cpu(s->bat_bitmap[i]);
+                parallels_set_bat_entry(s, i, 0);
+
+                ret = bdrv_co_pread(bs->file, off, s->cluster_size, buf, 0);
+                if (ret < 0) {
+                    res->check_errors++;
+                    goto out_repare_bat;
+                }
+
+                sector = i * (int64_t)s->tracks;
+                sector = allocate_clusters(bs, sector, s->tracks, &n);
+                if (sector < 0) {
+                    res->check_errors++;
+                    ret = sector;
+                    goto out_repare_bat;
+                }
+                off = sector << BDRV_SECTOR_BITS;
+
+                ret = bdrv_co_pwrite(bs->file, off, s->cluster_size, buf, 0);
+                if (ret < 0) {
+                    res->check_errors++;
+                    goto out_repare_bat;
+                }
+
+                if (off + s->cluster_size > res->image_end_offset) {
+                    res->image_end_offset = off + s->cluster_size;
+                }
+
+                /*
+                 * In the future allocate_cluster() will reuse holed offsets
+                 * inside the image. Keep the used clusters bitmap content
+                 * consistent for the new allocated clusters too.
+                 *
+                 * Note, clusters allocated outside the current image are not
+                 * considered, and the bitmap size doesn't change.
+                 */
+                cluster_index = host_cluster_index(s, off);
+                if (cluster_index < bitmap_size) {
+                    bitmap_set(bitmap, cluster_index, 1);
+                }
+
+                fixed = true;
+                res->corruptions_fixed++;
+            }
+        } else {
+            bitmap_set(bitmap, cluster_index, 1);
+        }
+    }
+
+    if (fixed) {
+        /*
+         * When new clusters are allocated, the file size increases by
+         * 128 Mb. We need to truncate the file to the right size. Let
+         * the leak fix code make its job without res changing.
+         */
+        ret = parallels_check_leak(bs, res, fix, false);
+        if (ret < 0) {
+            goto out;
+        }
+    }
+    goto out;
+
+/*
+ * We can get here only from places where index and old_offset have
+ * meaningful values.
+ */
+out_repare_bat:
+    s->bat_bitmap[i] = cpu_to_le32(old_offset);
+
+out:
+    g_free(buf);
+    g_free(bitmap);
+    return ret;
+}
+
 static void parallels_collect_statistics(BlockDriverState *bs,
                                          BdrvCheckResult *res,
                                          BdrvCheckMode fix)
@@ -579,6 +716,11 @@  parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
             return ret;
         }
 
+        ret = parallels_check_duplicate(bs, res, fix);
+        if (ret < 0) {
+            return ret;
+        }
+
         parallels_collect_statistics(bs, res, fix);
     }