diff mbox

[RFC,v2,16/23] qcow2: Reading from areas not in L2 tables yet

Message ID 1360761733-25347-17-git-send-email-kwolf@redhat.com
State New
Headers show

Commit Message

Kevin Wolf Feb. 13, 2013, 1:22 p.m. UTC
In preparation of delayed COW (i.e. completing the guest write request
before the associated COWs have completed) we must make sure that after
the guest data has written the new data is read back, even if the COW
hasn't completed and the new cluster isn't linked in the L2 table yet.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/qcow2-cluster.c |   39 +++++++++++++++++++++++++++++++++++++++
 block/qcow2.c         |    2 ++
 block/qcow2.h         |   19 +++++++++++++++++++
 3 files changed, 60 insertions(+), 0 deletions(-)
diff mbox

Patch

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 645ea25..be75c17 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -372,6 +372,40 @@  out:
     return ret;
 }
 
+static bool overlaps_allocation(BlockDriverState *bs, uint64_t start,
+    int *num, uint64_t *cluster_offset)
+{
+    BDRVQcowState *s = bs->opaque;
+    QCowL2Meta *m;
+    uint64_t end = start + (*num << BDRV_SECTOR_BITS);
+
+    QLIST_FOREACH(m, &s->cluster_allocs, next_in_flight) {
+
+        uint64_t old_start = l2meta_req_start(m);
+        uint64_t old_end = l2meta_req_end(m);
+
+        /* If the write hasn't completed yet and the allocating request can't
+         * have completed yet therefore, we're free to read the old data. */
+        if (!m->is_written) {
+            continue;
+        }
+
+        if (start >= old_start && start < old_end) {
+            /* Start of the new request overlaps: Read from the newly allocated
+             * cluster even if it isn't in the L2 table yet. */
+            *num = MIN(*num, (old_end - start) >> BDRV_SECTOR_BITS);
+            *cluster_offset = m->alloc_offset
+                + ((start - old_start) & ~(s->cluster_size - 1));
+            return true;
+        } else if (start < old_start && end > old_start) {
+            /* Overlap somewhere after the start. Shorten this request so that
+             * no overlap occurs. */
+            *num = MIN(*num, (old_start - start) >> BDRV_SECTOR_BITS);
+        }
+    }
+
+    return false;
+}
 
 /*
  * get_cluster_offset
@@ -398,6 +432,11 @@  int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
     uint64_t nb_available, nb_needed;
     int ret;
 
+    /* Check overlap with not yet completed allocations */
+    if (overlaps_allocation(bs, offset, num, cluster_offset)) {
+        return QCOW2_CLUSTER_NORMAL;
+    }
+
     index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1);
     nb_needed = *num + index_in_cluster;
 
diff --git a/block/qcow2.c b/block/qcow2.c
index 345487e..07f7493 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -827,6 +827,8 @@  static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
         while (l2meta != NULL) {
             QCowL2Meta *next;
 
+            l2meta->is_written = true;
+
             ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
             if (ret < 0) {
                 goto fail;
diff --git a/block/qcow2.h b/block/qcow2.h
index 9b16538..4c139d0 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -229,6 +229,14 @@  typedef struct QCowL2Meta
     int nb_clusters;
 
     /**
+     * true if the guest data (but not necessarily the related COW) has been
+     * written to disk, so that read requests can (and after having completed
+     * this request actually _must_) read the new data instead of reading the
+     * old data that the L2 table still refers to.
+     */
+    bool is_written;
+
+    /**
      * Requests that overlap with this allocation and wait to be restarted
      * when the allocating request has completed.
      */
@@ -316,6 +324,17 @@  static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s)
     return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY);
 }
 
+static inline uint64_t l2meta_req_start(QCowL2Meta *m)
+{
+    return (m->offset + m->cow_start.offset)
+        + (m->cow_start.nb_sectors << BDRV_SECTOR_BITS);
+}
+
+static inline uint64_t l2meta_req_end(QCowL2Meta *m)
+{
+    return m->offset + (m->nb_available << BDRV_SECTOR_BITS);
+}
+
 static inline uint64_t l2meta_cow_start(QCowL2Meta *m)
 {
     return m->offset + m->cow_start.offset;