| Submitter | Benoît Canet |
|---|---|
| Date | Feb. 6, 2013, 12:31 p.m. |
| Message ID | <1360153926-9492-14-git-send-email-benoit@irqsave.net> |
| Download | mbox | patch |
| Permalink | /patch/218627/ |
| State | New |
| Headers | show |
Comments
On Wed, Feb 06, 2013 at 01:31:46PM +0100, Benoît Canet wrote: > diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c > index ef91216..5b1d20d 100644 > --- a/block/qcow2-cluster.c > +++ b/block/qcow2-cluster.c > @@ -710,6 +710,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) > > for (i = 0; i < m->nb_clusters; i++) { > uint64_t flags = 0; > + uint64_t offset = cluster_offset + (i << s->cluster_bits); > /* if two concurrent writes happen to the same unallocated cluster > * each write allocates separate cluster and writes data concurrently. > * The first one to complete updates l2 table with pointer to its > @@ -722,8 +723,14 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) > flags = m->oflag_copied ? QCOW_OFLAG_COPIED : 0; > flags |= m->to_deduplicate ? QCOW_OFLAG_TO_DEDUP : 0; > > - l2_table[l2_index + i] = cpu_to_be64((cluster_offset + > - (i << s->cluster_bits)) | flags); > + l2_table[l2_index + i] = cpu_to_be64(offset | flags); > + > + /* make the deduplication forget the cluster to avoid making > + * the dedup pointing to a cluster that has changed on it's back. > + */ > + if (m->to_deduplicate) { > + qcow2_dedup_forget_cluster_by_sector(bs, offset >> 9); > + } This does not play well with internal snapshots. Imagine that an internal snapshot was taken, so refcount == 2. Now the cluster is overwritten by the guest but we still need to hang on to the original data since the snapshot refers to it. If dedup forgets about the cluster then dedup is only effective for the current disk image, but it ignores snapshots. Ideally dedup would take snapshots into account since they share the same data clusters as the current image. Stefan
Le Thursday 07 Feb 2013 à 10:48:14 (+0100), Stefan Hajnoczi a écrit : > On Wed, Feb 06, 2013 at 01:31:46PM +0100, Benoît Canet wrote: > > diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c > > index ef91216..5b1d20d 100644 > > --- a/block/qcow2-cluster.c > > +++ b/block/qcow2-cluster.c > > @@ -710,6 +710,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) > > > > for (i = 0; i < m->nb_clusters; i++) { > > uint64_t flags = 0; > > + uint64_t offset = cluster_offset + (i << s->cluster_bits); > > /* if two concurrent writes happen to the same unallocated cluster > > * each write allocates separate cluster and writes data concurrently. > > * The first one to complete updates l2 table with pointer to its > > @@ -722,8 +723,14 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) > > flags = m->oflag_copied ? QCOW_OFLAG_COPIED : 0; > > flags |= m->to_deduplicate ? QCOW_OFLAG_TO_DEDUP : 0; > > > > - l2_table[l2_index + i] = cpu_to_be64((cluster_offset + > > - (i << s->cluster_bits)) | flags); > > + l2_table[l2_index + i] = cpu_to_be64(offset | flags); > > + > > + /* make the deduplication forget the cluster to avoid making > > + * the dedup pointing to a cluster that has changed on it's back. > > + */ > > + if (m->to_deduplicate) { > > + qcow2_dedup_forget_cluster_by_sector(bs, offset >> 9); > > + } > > This does not play well with internal snapshots. Imagine that an > internal snapshot was taken, so refcount == 2. > > Now the cluster is overwritten by the guest but we still need to hang on > to the original data since the snapshot refers to it. > > If dedup forgets about the cluster then dedup is only effective for the > current disk image, but it ignores snapshots. Ideally dedup would take > snapshots into account since they share the same data clusters as the > current image. > > Stefan Is checking the refcount with a qcow2_get_refcounts(index) a solution ? Benoît
Patch
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index ef91216..5b1d20d 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -710,6 +710,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) for (i = 0; i < m->nb_clusters; i++) { uint64_t flags = 0; + uint64_t offset = cluster_offset + (i << s->cluster_bits); /* if two concurrent writes happen to the same unallocated cluster * each write allocates separate cluster and writes data concurrently. * The first one to complete updates l2 table with pointer to its @@ -722,8 +723,14 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) flags = m->oflag_copied ? QCOW_OFLAG_COPIED : 0; flags |= m->to_deduplicate ? QCOW_OFLAG_TO_DEDUP : 0; - l2_table[l2_index + i] = cpu_to_be64((cluster_offset + - (i << s->cluster_bits)) | flags); + l2_table[l2_index + i] = cpu_to_be64(offset | flags); + + /* make the deduplication forget the cluster to avoid making + * the dedup pointing to a cluster that has changed on it's back. + */ + if (m->to_deduplicate) { + qcow2_dedup_forget_cluster_by_sector(bs, offset >> 9); + } } diff --git a/block/qcow2-dedup.c b/block/qcow2-dedup.c index 9a6dcc7..9fb0ced 100644 --- a/block/qcow2-dedup.c +++ b/block/qcow2-dedup.c @@ -824,7 +824,7 @@ static void qcow2_remove_hash_node(BlockDriverState *bs, * @physical_sect: The physical sector of the cluster corresponding to the hash */ static void qcow2_remove_hash_node_by_sector(BlockDriverState *bs, - uint64_t physical_sect) + uint64_t physical_sect) { BDRVQcowState *s = bs->opaque; QCowHashNode *hash_node; @@ -838,6 +838,12 @@ static void qcow2_remove_hash_node_by_sector(BlockDriverState *bs, qcow2_remove_hash_node(bs, hash_node); } +void qcow2_dedup_forget_cluster_by_sector(BlockDriverState *bs, + uint64_t physical_sect) +{ + qcow2_remove_hash_node_by_sector(bs, physical_sect); +} + /* This function store a hash information to disk and RAM * * @hash: the QCowHash to process diff --git a/block/qcow2.h b/block/qcow2.h index dcb925d..1ee4c13 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -466,6 +466,8 @@ int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table); /* qcow2-dedup.c functions */ bool qcow2_must_deduplicate(BlockDriverState *bs); +void qcow2_dedup_forget_cluster_by_sector(BlockDriverState *bs, + uint64_t physical_sect); int qcow2_dedup_read_missing_and_concatenate(BlockDriverState *bs, QEMUIOVector *qiov, uint64_t sector,
Signed-off-by: Benoit Canet <benoit@irqsave.net> --- block/qcow2-cluster.c | 11 +++++++++-- block/qcow2-dedup.c | 8 +++++++- block/qcow2.h | 2 ++ 3 files changed, 18 insertions(+), 3 deletions(-)