Patchwork [RFC,V6,04/33] qcow2: Add qcow2_dedup_read_missing_and_concatenate

login
register
mail settings
Submitter Benoît Canet
Date Feb. 6, 2013, 12:31 p.m.
Message ID <1360153926-9492-5-git-send-email-benoit@irqsave.net>
Download mbox | patch
Permalink /patch/218637/
State New
Headers show

Comments

Benoît Canet - Feb. 6, 2013, 12:31 p.m.
This function is used to read missing data when unaligned writes are
done. This function also concatenate missing data with the given
qiov data in order to prepare a buffer used to look for duplicated
clusters.

Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
 block/Makefile.objs |    1 +
 block/qcow2-dedup.c |  119 +++++++++++++++++++++++++++++++++++++++++++++++++++
 block/qcow2.c       |   35 +++++++++++++++
 block/qcow2.h       |   12 ++++++
 4 files changed, 167 insertions(+)
 create mode 100644 block/qcow2-dedup.c
Stefan Hajnoczi - Feb. 6, 2013, 4:45 p.m.
On Wed, Feb 06, 2013 at 01:31:37PM +0100, Benoît Canet wrote:
> +/*
> + * Prepare a buffer containing all the required data required to compute cluster

Easier to read this way:
s/all the required data required/everything required/

> + * sized deduplication hashes.
> + * If sector_num or nb_sectors are not cluster-aligned, missing data
> + * before/after the qiov will be read.
> + *
> + * @qiov:               the qiov for which missing data must be read
> + * @sector_num:         the first sectors that must be read into the qiov
> + * @nb_sectors:         the number of sectors to read into the qiov
> + * @data:               the place where the data will be concatenated and stored
> + * @nb_data_sectors:    the resulting size of the contatenated data (in sectors)
> + * @ret:                negative on error
> + */
> +int qcow2_dedup_read_missing_and_concatenate(BlockDriverState *bs,
> +                                             QEMUIOVector *qiov,
> +                                             uint64_t sector_num,
> +                                             int nb_sectors,
> +                                             uint8_t **data,
> +                                             int *nb_data_sectors)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int ret = 0;
> +    uint64_t cluster_beginning_sector;
> +    uint64_t first_sector_after_qiov;
> +    int cluster_beginning_nr;
> +    int cluster_ending_nr;
> +    int unaligned_ending_nr;
> +    uint64_t max_cluster_ending_nr;
> +
> +    /* compute how much and where to read at the beginning */
> +    cluster_beginning_nr = sector_num & (s->cluster_sectors - 1);
> +    cluster_beginning_sector = sector_num - cluster_beginning_nr;
> +
> +    /* for the ending */
> +    first_sector_after_qiov = sector_num + nb_sectors;
> +    unaligned_ending_nr = first_sector_after_qiov & (s->cluster_sectors - 1);
> +    cluster_ending_nr = unaligned_ending_nr ?
> +                        s->cluster_sectors - unaligned_ending_nr : 0;
> +
> +    /* compute total size in sectors and allocate memory */
> +    *nb_data_sectors = cluster_beginning_nr + nb_sectors + cluster_ending_nr;
> +    *data = qemu_blockalign(bs, *nb_data_sectors * BDRV_SECTOR_SIZE);
> +
> +    /* read beginning */
> +    if (cluster_beginning_nr) {
> +        ret = qcow2_read_cluster_data(bs,
> +                                      *data,
> +                                      cluster_beginning_sector,
> +                                      cluster_beginning_nr);
> +    }
> +
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    /* append qiov content */
> +    qemu_iovec_to_buf(qiov, 0, *data + cluster_beginning_nr * BDRV_SECTOR_SIZE,
> +                      qiov->size);
> +
> +    /* Fix cluster_ending_nr if we are at risk of reading outside the image
> +     * (Cluster unaligned image size)
> +     */
> +    max_cluster_ending_nr = bs->total_sectors - first_sector_after_qiov;
> +    cluster_ending_nr = max_cluster_ending_nr < (uint64_t) cluster_ending_nr ?
> +                        (int) max_cluster_ending_nr : cluster_ending_nr;

Is there a test case for the cluster unaligned image size scenario?

> +
> +    /* read and add ending */
> +    if (cluster_ending_nr) {
> +        ret = qcow2_read_cluster_data(bs,
> +                                      *data +
> +                                      (cluster_beginning_nr +
> +                                      nb_sectors) *
> +                                      BDRV_SECTOR_SIZE,
> +                                      first_sector_after_qiov,
> +                                      cluster_ending_nr);
> +    }
> +
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    return 0;
> +
> +fail:
> +    qemu_vfree(*data);
> +    *data = NULL;
> +    return ret;
> +}
> diff --git a/block/qcow2.c b/block/qcow2.c
> index 7610e56..ecbe352 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -1110,6 +1110,41 @@ fail:
>      return ret;
>  }
>  
> +/**
> + * Read some data from the QCOW2 file
> + *
> + * Important: s->lock is dropped. Things can change before the function returns
> + *            to the caller.
> + *
> + * @data:       the buffer where the data must be stored
> + * @sector_num: the sector number to read in the QCOW2 file
> + * @nb_sectors: the number of sectors to read
> + * @ret:        negative on error
> + */
> +int qcow2_read_cluster_data(BlockDriverState *bs,
> +                            uint8_t *data,
> +                            uint64_t sector_num,
> +                            int nb_sectors)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    QEMUIOVector qiov;
> +    struct iovec iov;
> +    int ret;
> +
> +    iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
> +    iov.iov_base = data;
> +    qemu_iovec_init_external(&qiov, &iov, 1);
> +    qemu_co_mutex_unlock(&s->lock);
> +    ret = bdrv_co_readv(bs, sector_num, nb_sectors, &qiov);

This function should be marked coroutine_fn - it may only be called from
inside a coroutine.  It's good to mark all coroutine functions so the
reader knows immediately this will run in coroutine context.

bdrv_co_readv() is does I/O throttling.  This is wrong here since we
don't want to charge for internal I/O.
Eric Blake - Feb. 6, 2013, 5:48 p.m.
On 02/06/2013 05:31 AM, Benoît Canet wrote:
> This function is used to read missing data when unaligned writes are
> done. This function also concatenate missing data with the given
> qiov data in order to prepare a buffer used to look for duplicated
> clusters.
> 

> +/*
> + * Prepare a buffer containing all the required data required to compute cluster
> + * sized deduplication hashes.
> + * If sector_num or nb_sectors are not cluster-aligned, missing data
> + * before/after the qiov will be read.
> + *
> + * @qiov:               the qiov for which missing data must be read
> + * @sector_num:         the first sectors that must be read into the qiov
> + * @nb_sectors:         the number of sectors to read into the qiov
> + * @data:               the place where the data will be concatenated and stored

Since data was allocated with qemu_blockalign() instead of the more
typical malloc() or glib functions, you should document that the caller
is responsible to use qemu_vfree() to clean up data on success.
Benoît Canet - Feb. 8, 2013, 2:12 p.m.
> > +    /* Fix cluster_ending_nr if we are at risk of reading outside the image
> > +     * (Cluster unaligned image size)
> > +     */
> > +    max_cluster_ending_nr = bs->total_sectors - first_sector_after_qiov;
> > +    cluster_ending_nr = max_cluster_ending_nr < (uint64_t) cluster_ending_nr ?
> > +                        (int) max_cluster_ending_nr : cluster_ending_nr;
> 
> Is there a test case for the cluster unaligned image size scenario?

I discovered this case using qemu-io-tests one of the test case trigger this.

> > +    iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
> > +    iov.iov_base = data;
> > +    qemu_iovec_init_external(&qiov, &iov, 1);
> > +    qemu_co_mutex_unlock(&s->lock);
> > +    ret = bdrv_co_readv(bs, sector_num, nb_sectors, &qiov);
> 
> This function should be marked coroutine_fn - it may only be called from
> inside a coroutine.  It's good to mark all coroutine functions so the
> reader knows immediately this will run in coroutine context.
> 
> bdrv_co_readv() is does I/O throttling.  This is wrong here since we
> don't want to charge for internal I/O.
Should I use qcow2_co_readv to bypass the I/O throttling ?

Benoît
Stefan Hajnoczi - Feb. 8, 2013, 2:51 p.m.
On Fri, Feb 08, 2013 at 03:12:57PM +0100, Benoît Canet wrote:
> > > +    iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
> > > +    iov.iov_base = data;
> > > +    qemu_iovec_init_external(&qiov, &iov, 1);
> > > +    qemu_co_mutex_unlock(&s->lock);
> > > +    ret = bdrv_co_readv(bs, sector_num, nb_sectors, &qiov);
> > 
> > This function should be marked coroutine_fn - it may only be called from
> > inside a coroutine.  It's good to mark all coroutine functions so the
> > reader knows immediately this will run in coroutine context.
> > 
> > bdrv_co_readv() is does I/O throttling.  This is wrong here since we
> > don't want to charge for internal I/O.
> Should I use qcow2_co_readv to bypass the I/O throttling ?

I *think* it's alright because the other common feature - copy-on-read -
will already be handled correctly by the guest's I/O request.

Stefan

Patch

diff --git a/block/Makefile.objs b/block/Makefile.objs
index c067f38..21afc85 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,5 +1,6 @@ 
 block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
+block-obj-y += qcow2-dedup.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-y += parallels.o blkdebug.o blkverify.o
diff --git a/block/qcow2-dedup.c b/block/qcow2-dedup.c
new file mode 100644
index 0000000..4e99eb1
--- /dev/null
+++ b/block/qcow2-dedup.c
@@ -0,0 +1,119 @@ 
+/*
+ * Deduplication for the QCOW2 format
+ *
+ * Copyright (C) Nodalink, SARL. 2012-2013
+ *
+ * Author:
+ *   Benoît Canet <benoit.canet@irqsave.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "block/block_int.h"
+#include "qemu-common.h"
+#include "qcow2.h"
+
+/*
+ * Prepare a buffer containing all the required data required to compute cluster
+ * sized deduplication hashes.
+ * If sector_num or nb_sectors are not cluster-aligned, missing data
+ * before/after the qiov will be read.
+ *
+ * @qiov:               the qiov for which missing data must be read
+ * @sector_num:         the first sectors that must be read into the qiov
+ * @nb_sectors:         the number of sectors to read into the qiov
+ * @data:               the place where the data will be concatenated and stored
+ * @nb_data_sectors:    the resulting size of the contatenated data (in sectors)
+ * @ret:                negative on error
+ */
+int qcow2_dedup_read_missing_and_concatenate(BlockDriverState *bs,
+                                             QEMUIOVector *qiov,
+                                             uint64_t sector_num,
+                                             int nb_sectors,
+                                             uint8_t **data,
+                                             int *nb_data_sectors)
+{
+    BDRVQcowState *s = bs->opaque;
+    int ret = 0;
+    uint64_t cluster_beginning_sector;
+    uint64_t first_sector_after_qiov;
+    int cluster_beginning_nr;
+    int cluster_ending_nr;
+    int unaligned_ending_nr;
+    uint64_t max_cluster_ending_nr;
+
+    /* compute how much and where to read at the beginning */
+    cluster_beginning_nr = sector_num & (s->cluster_sectors - 1);
+    cluster_beginning_sector = sector_num - cluster_beginning_nr;
+
+    /* for the ending */
+    first_sector_after_qiov = sector_num + nb_sectors;
+    unaligned_ending_nr = first_sector_after_qiov & (s->cluster_sectors - 1);
+    cluster_ending_nr = unaligned_ending_nr ?
+                        s->cluster_sectors - unaligned_ending_nr : 0;
+
+    /* compute total size in sectors and allocate memory */
+    *nb_data_sectors = cluster_beginning_nr + nb_sectors + cluster_ending_nr;
+    *data = qemu_blockalign(bs, *nb_data_sectors * BDRV_SECTOR_SIZE);
+
+    /* read beginning */
+    if (cluster_beginning_nr) {
+        ret = qcow2_read_cluster_data(bs,
+                                      *data,
+                                      cluster_beginning_sector,
+                                      cluster_beginning_nr);
+    }
+
+    if (ret < 0) {
+        goto fail;
+    }
+
+    /* append qiov content */
+    qemu_iovec_to_buf(qiov, 0, *data + cluster_beginning_nr * BDRV_SECTOR_SIZE,
+                      qiov->size);
+
+    /* Fix cluster_ending_nr if we are at risk of reading outside the image
+     * (Cluster unaligned image size)
+     */
+    max_cluster_ending_nr = bs->total_sectors - first_sector_after_qiov;
+    cluster_ending_nr = max_cluster_ending_nr < (uint64_t) cluster_ending_nr ?
+                        (int) max_cluster_ending_nr : cluster_ending_nr;
+
+    /* read and add ending */
+    if (cluster_ending_nr) {
+        ret = qcow2_read_cluster_data(bs,
+                                      *data +
+                                      (cluster_beginning_nr +
+                                      nb_sectors) *
+                                      BDRV_SECTOR_SIZE,
+                                      first_sector_after_qiov,
+                                      cluster_ending_nr);
+    }
+
+    if (ret < 0) {
+        goto fail;
+    }
+
+    return 0;
+
+fail:
+    qemu_vfree(*data);
+    *data = NULL;
+    return ret;
+}
diff --git a/block/qcow2.c b/block/qcow2.c
index 7610e56..ecbe352 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1110,6 +1110,41 @@  fail:
     return ret;
 }
 
+/**
+ * Read some data from the QCOW2 file
+ *
+ * Important: s->lock is dropped. Things can change before the function returns
+ *            to the caller.
+ *
+ * @data:       the buffer where the data must be stored
+ * @sector_num: the sector number to read in the QCOW2 file
+ * @nb_sectors: the number of sectors to read
+ * @ret:        negative on error
+ */
+int qcow2_read_cluster_data(BlockDriverState *bs,
+                            uint8_t *data,
+                            uint64_t sector_num,
+                            int nb_sectors)
+{
+    BDRVQcowState *s = bs->opaque;
+    QEMUIOVector qiov;
+    struct iovec iov;
+    int ret;
+
+    iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
+    iov.iov_base = data;
+    qemu_iovec_init_external(&qiov, &iov, 1);
+    qemu_co_mutex_unlock(&s->lock);
+    ret = bdrv_co_readv(bs, sector_num, nb_sectors, &qiov);
+    qemu_co_mutex_lock(&s->lock);
+    if (ret < 0) {
+        error_report("failed to read %d sectors at offset %" PRIu64 "\n",
+                     nb_sectors, sector_num);
+    }
+
+    return ret;
+}
+
 static int qcow2_change_backing_file(BlockDriverState *bs,
     const char *backing_file, const char *backing_fmt)
 {
diff --git a/block/qcow2.h b/block/qcow2.h
index c7b6860..0197a2b 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -373,6 +373,10 @@  int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
 
 int qcow2_mark_dirty(BlockDriverState *bs);
 int qcow2_update_header(BlockDriverState *bs);
+int qcow2_read_cluster_data(BlockDriverState *bs,
+                            uint8_t *data,
+                            uint64_t sector_num,
+                            int nb_sectors);
 
 /* qcow2-refcount.c functions */
 int qcow2_refcount_init(BlockDriverState *bs);
@@ -441,4 +445,12 @@  int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
     void **table);
 int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);
 
+/* qcow2-dedup.c functions */
+int qcow2_dedup_read_missing_and_concatenate(BlockDriverState *bs,
+                                             QEMUIOVector *qiov,
+                                             uint64_t sector,
+                                             int sectors_nr,
+                                             uint8_t **dedup_cluster_data,
+                                             int *dedup_cluster_data_nr);
+
 #endif