Patchwork [12/26] FVD: add impl of interface bdrv_aio_readv()

login
register
mail settings
Submitter Chunqiang Tang
Date Feb. 25, 2011, 10:37 p.m.
Message ID <1298673486-3573-12-git-send-email-ctang@us.ibm.com>
Download mbox | patch
Permalink /patch/84590/
State New
Headers show

Comments

Chunqiang Tang - Feb. 25, 2011, 10:37 p.m.
This patch is part of the Fast Virtual Disk (FVD) proposal.
See http://wiki.qemu.org/Features/FVD.

This patch adds FVD's implementation of the bdrv_aio_readv() interface. It
supports read and copy-on-read in FVD.

Signed-off-by: Chunqiang Tang <ctang@us.ibm.com>
---
 block/fvd-bitmap.c |   88 ++++++++++
 block/fvd-load.c   |   20 +++
 block/fvd-read.c   |  484 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 block/fvd-utils.c  |   44 +++++
 block/fvd.c        |    2 +
 5 files changed, 637 insertions(+), 1 deletions(-)
 create mode 100644 block/fvd-load.c
 create mode 100644 block/fvd-utils.c

Patch

diff --git a/block/fvd-bitmap.c b/block/fvd-bitmap.c
index 7e96201..30e4a4b 100644
--- a/block/fvd-bitmap.c
+++ b/block/fvd-bitmap.c
@@ -148,3 +148,91 @@  static bool update_fresh_bitmap_and_check_stale_bitmap(FvdAIOCB * acb)
 
     return need_update;
 }
+
+/* Return true if a valid region is found. */
+static bool find_region_in_base_img(BDRVFvdState * s, int64_t * from,
+                                    int64_t * to)
+{
+    int64_t sec = *from;
+    int64_t region_end = *to;
+
+    if (region_end > s->base_img_sectors) {
+        region_end = s->base_img_sectors;
+    }
+
+check_next_region:
+    if (sec >= region_end) {
+        return false;
+    }
+
+    if (!fresh_bitmap_show_sector_in_base_img(sec, s)) {
+        /* Find the first sector in the base image. */
+
+        sec = ROUND_UP(sec + 1, s->block_size); /* Begin of next block. */
+        while (1) {
+            if (sec >= region_end) {
+                return false;
+            }
+            if (fresh_bitmap_show_sector_in_base_img(sec, s)) {
+                break;
+            }
+            sec += s->block_size;       /* Begin of the next block. */
+        }
+    }
+
+    /* Find the end of the region in the base image. */
+    int64_t first_sec = sec;
+    sec = ROUND_UP(sec + 1, s->block_size);     /* Begin of next block. */
+    while (1) {
+        if (sec >= region_end) {
+            sec = region_end;
+            break;
+        }
+        if (!fresh_bitmap_show_sector_in_base_img(sec, s)) {
+            break;
+        }
+        sec += s->block_size;   /* Begin of the next block. */
+    }
+    int64_t last_sec = sec;
+
+    /* Check conflicting writes. */
+    FvdAIOCB *old;
+    QLIST_FOREACH(old, &s->write_locks, write.next_write_lock) {
+        int64_t old_begin = ROUND_DOWN(old->sector_num, s->block_size);
+        int64_t old_end = old->sector_num + old->nb_sectors;
+        old_end = ROUND_UP(old_end, s->block_size);
+        if (old_begin <= first_sec && first_sec < old_end) {
+            first_sec = old_end;
+        }
+        if (old_begin < last_sec && last_sec <= old_end) {
+            last_sec = old_begin;
+        }
+    }
+
+    if (first_sec >= last_sec) {
+        /* The region in [first_sec, sec) is fully covered. */
+        goto check_next_region;
+    }
+
+    /* This loop cannot be merged with the loop above. Otherwise, the logic
+     * would be incorrect.  This loop covers the case that an old request
+     * spans over a subset of the region being checked. */
+    QLIST_FOREACH(old, &s->write_locks, write.next_write_lock) {
+        int64_t old_begin = ROUND_DOWN(old->sector_num, s->block_size);
+        if (first_sec <= old_begin && old_begin < last_sec) {
+            last_sec = old_begin;
+        }
+    }
+
+    if (first_sec >= last_sec) {
+        /* The region in [first_sec, sec) is fully covered. */
+        goto check_next_region;
+    }
+
+    ASSERT(first_sec % s->block_size == 0 && (last_sec % s->block_size == 0 ||
+           last_sec == s->base_img_sectors));
+
+    *from = first_sec;
+    *to = last_sec;
+    return true;
+}
diff --git a/block/fvd-load.c b/block/fvd-load.c
new file mode 100644
index 0000000..80ab32c
--- /dev/null
+++ b/block/fvd-load.c
@@ -0,0 +1,20 @@ 
+/*
+ * QEMU Fast Virtual Disk Format Load Data from Compact Image
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ *    Chunqiang Tang <ctang@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+static inline BlockDriverAIOCB *load_data(FvdAIOCB * parent_acb,
+                    BlockDriverState * bs, int64_t sector_num,
+                    QEMUIOVector * orig_qiov, int nb_sectors,
+                    BlockDriverCompletionFunc * cb, void *opaque)
+{
+    return NULL;
+}
diff --git a/block/fvd-read.c b/block/fvd-read.c
index b9f3ac9..cd041e5 100644
--- a/block/fvd-read.c
+++ b/block/fvd-read.c
@@ -11,11 +11,493 @@ 
  *
  */
 
+static void read_backing_for_copy_on_read_cb(void *opaque, int ret);
+static void read_fvd_cb(void *opaque, int ret);
+static inline void calc_read_region(BDRVFvdState * s, int64_t sector_num,
+                    int nb_sectors, int64_t * p_first_sec_in_fvd,
+                    int64_t * p_last_sec_in_fvd,
+                    int64_t * p_first_sec_in_backing,
+                    int64_t * p_last_sec_in_backing);
+static inline BlockDriverAIOCB *load_data(FvdAIOCB * parent_acb,
+                    BlockDriverState * bs, int64_t sector_num,
+                    QEMUIOVector * orig_qiov, int nb_sectors,
+                    BlockDriverCompletionFunc * cb, void *opaque);
+
 static BlockDriverAIOCB *fvd_aio_readv(BlockDriverState * bs,
                                        int64_t sector_num, QEMUIOVector * qiov,
                                        int nb_sectors,
                                        BlockDriverCompletionFunc * cb,
                                        void *opaque)
 {
-    return NULL;
+    BDRVFvdState *s = bs->opaque;
+    TRACE_REQUEST(false, sector_num, nb_sectors);
+
+    if (!s->data_region_prepared) {
+        init_data_region(s);
+    }
+
+    if (s->prefetch_state == PREFETCH_STATE_FINISHED
+        || sector_num >= s->base_img_sectors) {
+        /* This is an  efficient case. See Section 3.3.5 of the FVD-cow paper.
+         * This also covers the case of no base image. */
+        return load_data(NULL, bs, sector_num, qiov, nb_sectors, cb, opaque);
+    }
+
+    /* Figure out data regions in the base image and in the FVD data file. */
+    int64_t last_sec_in_backing, first_sec_in_backing;
+    int64_t last_sec_in_fvd, first_sec_in_fvd;
+    calc_read_region(s, sector_num, nb_sectors, &first_sec_in_fvd,
+                     &last_sec_in_fvd, &first_sec_in_backing,
+                     &last_sec_in_backing);
+
+    if (first_sec_in_backing < 0) {
+        /* A simple case: all requested data are in the FVD data file. */
+        return load_data(NULL, bs, sector_num, qiov, nb_sectors, cb, opaque);
+    }
+
+    /* Do copy-on-read only if the context id is 0, i.e., it is not emulating
+     * synchronous I/O.  Doing copy-on-read in emulated synchronous I/O may
+     * leave the copy-on-read callbacks never being processed due to
+     * mismatching contextid. */
+    const bool copy_on_read = s->copy_on_read && (get_async_context_id() == 0);
+
+    if (first_sec_in_fvd < 0 && !copy_on_read) {
+        /* A simple case: all requested data are in the base image and no need
+         * to do copy_on_read. */
+        return bdrv_aio_readv(bs->backing_hd, sector_num, qiov, nb_sectors, cb,
+                              opaque);
+    }
+
+    /* The remaining cases are more complicated, which can be: 1. Data are
+     * only in the base image and copy-on-read is needed.  2. Data are in both
+     * the base image and the FVD data file. Copy-on-read may be either true
+     * or false. */
+    FvdAIOCB *acb = my_qemu_aio_get(&fvd_aio_pool, bs, cb, opaque);
+    if (!acb) {
+        return NULL;
+    }
+
+    QDEBUG("READ: acb%llu-%p  start  sector_num=%" PRId64 " nb_sectors=%d\n",
+           acb->uuid, acb, sector_num, nb_sectors);
+
+    acb->type = OP_READ;
+    acb->cancel_in_progress = false;
+    acb->sector_num = sector_num;
+    acb->nb_sectors = nb_sectors;
+    acb->read.qiov = qiov;
+    acb->read.ret = 0;
+    acb->read.read_backing.hd_acb = NULL;
+    acb->read.read_backing.done = false;
+    acb->read.read_backing.iov.iov_base = NULL;
+    acb->read.read_fvd.hd_acb = NULL;
+    acb->read.read_fvd.iov.iov_base = NULL;
+    acb->read.read_fvd.done = (first_sec_in_fvd < 0);
+
+    /* Read from the base image. */
+    if (copy_on_read) {
+        /* Round the request to the block boundary. */
+        acb->read.read_backing.sector_num =
+            ROUND_DOWN(first_sec_in_backing, s->block_size);
+        int64_t end = ROUND_UP(last_sec_in_backing + 1, s->block_size);
+        if (end > s->base_img_sectors) {
+            end = s->base_img_sectors;
+        }
+        acb->read.read_backing.nb_sectors =
+            end - acb->read.read_backing.sector_num;
+    } else {
+        acb->read.read_backing.sector_num = first_sec_in_backing;
+        acb->read.read_backing.nb_sectors =
+            last_sec_in_backing - first_sec_in_backing + 1;
+    }
+
+    acb->read.read_backing.iov.iov_len = acb->read.read_backing.nb_sectors*512;
+    acb->read.read_backing.iov.iov_base =
+        my_qemu_blockalign(bs->backing_hd, acb->read.read_backing.iov.iov_len);
+    qemu_iovec_init_external(&acb->read.read_backing.qiov,
+                             &acb->read.read_backing.iov, 1);
+    acb->read.read_backing.hd_acb =
+        bdrv_aio_readv(bs->backing_hd, acb->read.read_backing.sector_num,
+                       &acb->read.read_backing.qiov,
+                       acb->read.read_backing.nb_sectors,
+                       read_backing_for_copy_on_read_cb, acb);
+    QDEBUG("READ: acb%llu-%p  read_backing  backing_sector_num=%" PRId64
+           " backing_nb_sectors=%d\n", acb->uuid, acb,
+           acb->read.read_backing.sector_num,
+           acb->read.read_backing.nb_sectors);
+
+    if (!acb->read.read_backing.hd_acb) {
+        my_qemu_vfree(acb->read.read_backing.iov.iov_base);
+        my_qemu_aio_release(acb);
+        return NULL;
+    }
+
+    if (first_sec_in_fvd >= 0) {
+        /* Read the FVD data file. */
+        acb->read.read_fvd.sector_num = first_sec_in_fvd;
+        acb->read.read_fvd.nb_sectors = last_sec_in_fvd - first_sec_in_fvd + 1;
+        acb->read.read_fvd.iov.iov_len = acb->read.read_fvd.nb_sectors * 512;
+
+        /* Make a copy of the current bitmap because it may change when the
+         * read requests finish. */
+        int64_t b = MIN(acb->read.read_backing.sector_num,
+                        acb->read.read_fvd.sector_num);
+        b = b / s->block_size / 8;      /* First byte of the bitmap we need. */
+        int64_t e1 = acb->read.read_backing.sector_num +
+            acb->read.read_backing.nb_sectors;
+        int64_t e2 = acb->read.read_fvd.sector_num +
+            acb->read.read_fvd.nb_sectors;
+        int64_t e = MAX(e1, e2);
+        if (e > s->base_img_sectors) {
+            e = s->base_img_sectors;
+        }
+        e = (e - 1) / s->block_size / 8; /* Last byte of the bitmap we need. */
+        int bitmap_bytes = e - b + 1;
+        int buf_size = acb->read.read_fvd.iov.iov_len +
+            ROUND_UP(bitmap_bytes, 512);
+        acb->read.read_fvd.iov.iov_base =
+            my_qemu_blockalign(s->fvd_data, buf_size);
+        uint8_t *saved_bitmap = ((uint8_t *) acb->read.read_fvd.iov.iov_base) +
+            acb->read.read_fvd.iov.iov_len;
+        memcpy(saved_bitmap, s->fresh_bitmap + b, bitmap_bytes);
+
+        qemu_iovec_init_external(&acb->read.read_fvd.qiov,
+                                 &acb->read.read_fvd.iov, 1);
+        QDEBUG("READ: acb%llu-%p  read_fvd  fvd_sector_num=%" PRId64
+               " fvd_nb_sectors=%d\n", acb->uuid, acb,
+               acb->read.read_fvd.sector_num, acb->read.read_fvd.nb_sectors);
+        acb->read.read_fvd.hd_acb = load_data(acb, bs, first_sec_in_fvd,
+                                              &acb->read.read_fvd.qiov,
+                                              acb->read.read_fvd.nb_sectors,
+                                              read_fvd_cb, acb);
+        if (!acb->read.read_fvd.hd_acb) {
+            if (acb->read.read_backing.hd_acb) {
+                bdrv_aio_cancel(acb->read.read_backing.hd_acb);
+                my_qemu_vfree(acb->read.read_backing.iov.iov_base);
+            }
+            my_qemu_vfree(acb->read.read_fvd.iov.iov_base);
+            my_qemu_aio_release(acb);
+            return NULL;
+        }
+    }
+
+    return &acb->common;
+}
+
+static void copy_on_read_cb(void *opaque, int ret)
+{
+    FvdAIOCB *acb = opaque;
+    BlockDriverState *bs = acb->common.bs;
+    BDRVFvdState *s = bs->opaque;
+
+    if (acb->cancel_in_progress) {
+        return;
+    }
+
+    if (ret == 0) {
+        /* Update fresh_bitmap but do not update stale_bitmap or the on-disk
+         * bitmap. See Section 3.3.4 of the FVD-cow paper. */
+        update_fresh_bitmap(acb->sector_num, acb->nb_sectors, s);
+    }
+
+    s->outstanding_copy_on_read_data -= acb->nb_sectors * 512;
+
+#ifdef FVD_DEBUG
+    s->total_copy_on_read_data += acb->nb_sectors * 512;
+#endif
+    QDEBUG("READ: acb%llu-%p  copy_on_read_cb  buffer_sector_num=%" PRId64
+           " buffer_nb_sectors=%d write_sector_num=%" PRId64
+           " write_nb_sectors=%d outstanding_copy_on_read=%" PRId64 "\n",
+           acb->uuid, acb, acb->copy.buffered_sector_begin,
+           (int)(acb->copy.buffered_sector_end -
+                 acb->copy.buffered_sector_begin), acb->sector_num,
+           acb->nb_sectors, s->outstanding_copy_on_read_data);
+
+    QLIST_REMOVE(acb, copy_lock.next);
+    restart_dependent_writes(acb);
+
+    int64_t begin = acb->sector_num + acb->nb_sectors;
+    int64_t end = acb->copy.buffered_sector_end;
+
+    if (find_region_in_base_img(s, &begin, &end)) {
+        acb->sector_num = begin;
+        acb->nb_sectors = end - begin;
+        acb->copy.iov.iov_base = acb->copy.buf +
+            (begin - acb->copy.buffered_sector_begin) * 512;
+        acb->copy.iov.iov_len = acb->nb_sectors * 512;
+        qemu_iovec_init_external(&acb->copy.qiov, &acb->copy.iov, 1);
+        QDEBUG("READ: acb%llu-%p  copy_on_read  buffer_sector_num=%" PRId64
+               " buffer_nb_sectors=%d write_sector_num=%" PRId64
+               " write_nb_sectors=%d outstanding_copy_on_read=%" PRId64 "\n",
+               acb->uuid, acb, acb->copy.buffered_sector_begin,
+               (int)(acb->copy.buffered_sector_end -
+                     acb->copy.buffered_sector_begin), acb->sector_num,
+               acb->nb_sectors, s->outstanding_copy_on_read_data);
+        acb->copy.hd_acb = store_data(true, acb, bs, acb->sector_num,
+                                      &acb->copy.qiov, acb->nb_sectors,
+                                      copy_on_read_cb, acb);
+        if (acb->copy.hd_acb) {
+            QLIST_INIT(&acb->copy_lock.dependent_writes);
+            acb->copy_lock.begin = begin;
+            acb->copy_lock.end = end;
+            QLIST_INSERT_HEAD(&s->copy_locks, acb, copy_lock.next);
+            s->outstanding_copy_on_read_data += acb->copy.iov.iov_len;
+            return;
+        }
+    }
+
+    QDEBUG("READ: acb%llu-%p  no_more_copy_on_read\n", acb->uuid, acb);
+    my_qemu_vfree(acb->copy.buf);
+    my_qemu_aio_release(acb);
+}
+
+static void finish_read(FvdAIOCB * acb)
+{
+    BlockDriverState *bs = acb->common.bs;
+    BDRVFvdState *s = bs->opaque;
+
+    if (acb->read.ret != 0) {
+        QDEBUG("READ: acb%llu-%p  finish_read error ret=%d sector_num=%" PRId64
+               " nb_sectors=%d\n", acb->uuid, acb, acb->read.ret,
+               acb->sector_num, acb->nb_sectors);
+        acb->common.cb(acb->common.opaque, acb->read.ret);
+        if (acb->read.read_backing.iov.iov_base) {
+            my_qemu_vfree(acb->read.read_backing.iov.iov_base);
+        }
+        if (acb->read.read_fvd.iov.iov_base) {
+            my_qemu_vfree(acb->read.read_fvd.iov.iov_base);
+        }
+        my_qemu_aio_release(acb);
+
+        return;
+    }
+
+    if (!acb->read.read_fvd.iov.iov_base) {
+        /* Only read data from the base image. */
+        uint8_t *data = ((uint8_t *) acb->read.read_backing.iov.iov_base) +
+            (acb->sector_num - acb->read.read_backing.sector_num) * 512;
+        qemu_iovec_from_buffer(acb->read.qiov, data, acb->nb_sectors * 512);
+    } else {
+        /* Under the guidance of the saved bitmap, merge data from the FVD
+         * data file and the base image. */
+        uint8_t *saved_bitmap = ((uint8_t *) acb->read.read_fvd.iov.iov_base) +
+            acb->read.read_fvd.iov.iov_len;
+        int64_t bitmap_offset = MIN(acb->read.read_backing.sector_num,
+                                    acb->read.read_fvd.sector_num);
+        bitmap_offset = bitmap_offset / s->block_size / 8;
+        int iov_index = 0;
+        uint8_t *iov_buf = acb->read.qiov->iov[0].iov_base;
+        int iov_left = acb->read.qiov->iov[0].iov_len;
+        int64_t sec = acb->sector_num;
+        const int64_t end = acb->sector_num + acb->nb_sectors;
+        int64_t first_sec;
+        uint8_t *source;
+
+        if (bitmap_show_sector_in_base_img(sec,s,bitmap_offset,saved_bitmap)) {
+            goto in_backing;
+        }
+
+        while (1) {
+            /* For a section of data in the FVD data file. */
+            if (sec >= end) {
+                break;
+            }
+
+            first_sec = sec;
+            do {
+                sec++;
+            } while (sec < end && !bitmap_show_sector_in_base_img(sec, s,
+                                     bitmap_offset, saved_bitmap));
+
+            source = ((uint8_t *) acb->read.read_fvd.iov.iov_base) +
+                (first_sec - acb->read.read_fvd.sector_num) * 512;
+            copy_iov(acb->read.qiov->iov, &iov_index, &iov_buf, &iov_left,
+                        source, (sec - first_sec) * 512);
+
+in_backing:
+            /* For a section of data in the base image. */
+            if (sec >= end) {
+                break;
+            }
+
+            first_sec = sec;
+            do {
+                sec++;
+            } while (sec < end && bitmap_show_sector_in_base_img(sec, s,
+                                    bitmap_offset, saved_bitmap));
+
+            source = ((uint8_t *) acb->read.read_backing.iov.iov_base) +
+                (first_sec - acb->read.read_backing.sector_num) * 512;
+            copy_iov(acb->read.qiov->iov, &iov_index, &iov_buf, &iov_left,
+                        source, (sec - first_sec) * 512);
+        }
+
+        ASSERT(iov_index == acb->read.qiov->niov - 1 && iov_left == 0);
+        my_qemu_vfree(acb->read.read_fvd.iov.iov_base);
+    }
+
+    QDEBUG("READ: acb%llu-%p  finish_read  ret=%d\n", acb->uuid, acb,
+           acb->read.ret);
+    acb->common.cb(acb->common.opaque, acb->read.ret);
+
+    if (!s->copy_on_read || get_async_context_id() != 0) {
+        /* Do copy-on-read only if the context id is 0, i.e., it is not
+         * emulating synchronous I/O.  Doing copy-on-read in emulated
+         * synchronous I/O may leave the copy-on-read callbacks never being
+         * processed due to mismatching context id. */
+        my_qemu_vfree(acb->read.read_backing.iov.iov_base);
+        my_qemu_aio_release(acb);
+        return;
+    }
+
+    /* Convert AIOReadCB into a AIOCopyCB for copy-on-read. */
+    uint8_t *buf = acb->read.read_backing.iov.iov_base;
+    int64_t begin = acb->read.read_backing.sector_num;
+    int64_t end = begin + acb->read.read_backing.nb_sectors;
+
+    acb->type = OP_COPY;
+    acb->copy.buf = buf;
+    acb->copy.buffered_sector_begin = begin;
+    acb->copy.buffered_sector_end = end;
+
+    if (s->outstanding_copy_on_read_data < s->max_outstanding_copy_on_read_data
+        && find_region_in_base_img(s, &begin, &end)) {
+        /* Write to the FVD data file. */
+        acb->sector_num = begin;
+        acb->nb_sectors = end - begin;
+        acb->copy.iov.iov_base =
+            buf + (begin - acb->copy.buffered_sector_begin) * 512;
+        acb->copy.iov.iov_len = acb->nb_sectors * 512;
+        qemu_iovec_init_external(&acb->copy.qiov, &acb->copy.iov, 1);
+        QDEBUG("READ: acb%llu-%p  copy_on_read  buffer_sector_num=%" PRId64
+               " buffer_nb_sectors=%d write_sector_num=%" PRId64
+               " write_nb_sectors=%d outstanding_copy_on_read=%" PRId64 "\n",
+               acb->uuid, acb, acb->copy.buffered_sector_begin,
+               (int)(acb->copy.buffered_sector_end -
+                     acb->copy.buffered_sector_begin), acb->sector_num,
+               acb->nb_sectors, s->outstanding_copy_on_read_data);
+        acb->copy.hd_acb = store_data(true, acb, bs, acb->sector_num,
+                                      &acb->copy.qiov, acb->nb_sectors,
+                                      copy_on_read_cb, acb);
+        if (acb->copy.hd_acb) {
+            QLIST_INIT(&acb->copy_lock.dependent_writes);
+            acb->copy_lock.begin = begin;
+            acb->copy_lock.end = end;
+            QLIST_INSERT_HEAD(&s->copy_locks, acb, copy_lock.next);
+            s->outstanding_copy_on_read_data += acb->copy.iov.iov_len;
+            return;
+        }
+    }
+
+    /* No more copy-on-read to do. */
+    my_qemu_vfree(acb->copy.buf);
+    my_qemu_aio_release(acb);
+}
+
+static void read_fvd_cb(void *opaque, int ret)
+{
+    FvdAIOCB *acb = opaque;
+
+    if (acb->cancel_in_progress) {
+        return;
+    }
+
+    QDEBUG("READ: acb%llu-%p  read_fvd_cb ret=%d\n", acb->uuid, acb, ret);
+    acb->read.read_fvd.hd_acb = NULL;
+    acb->read.read_fvd.done = true;
+    if (acb->read.ret == 0) {
+        acb->read.ret = ret;
+    }
+
+    if (acb->read.read_backing.done) {
+        finish_read(acb);       /* The other request also finished. */
+    }
+}
+
+static void read_backing_for_copy_on_read_cb(void *opaque, int ret)
+{
+    FvdAIOCB *acb = opaque;
+
+    if (acb->cancel_in_progress) {
+        return;
+    }
+
+    QDEBUG("READ: acb%llu-%p  read_backing_cb ret=%d\n", acb->uuid, acb, ret);
+    acb->read.read_backing.hd_acb = NULL;
+    acb->read.read_backing.done = true;
+    if (acb->read.ret == 0) {
+        acb->read.ret = ret;
+    }
+
+    if (acb->read.read_fvd.done) {
+        finish_read(acb);       /* The other request also finished. */
+    }
+}
+
+static inline void calc_read_region(BDRVFvdState * s, int64_t sector_num,
+                                    int nb_sectors,
+                                    int64_t * p_first_sec_in_fvd,
+                                    int64_t * p_last_sec_in_fvd,
+                                    int64_t * p_first_sec_in_backing,
+                                    int64_t * p_last_sec_in_backing)
+{
+    int64_t last_sec_in_backing = -1, first_sec_in_backing = -1;
+    int64_t last_sec_in_fvd = -1, first_sec_in_fvd = -1;
+    int prev_block_in_backing;
+
+    if (fresh_bitmap_show_sector_in_base_img(sector_num, s)) {
+        first_sec_in_backing = last_sec_in_backing = sector_num;
+        prev_block_in_backing = true;
+    } else {
+        first_sec_in_fvd = last_sec_in_fvd = sector_num;
+        prev_block_in_backing = false;
+    }
+
+    /* Begin of next block. */
+    int64_t sec = ROUND_UP(sector_num + 1, s->block_size);
+
+    const int64_t sec_end = sector_num + nb_sectors;
+    int64_t last_sec = MIN(sec_end, s->base_img_sectors) - 1;
+
+    while (1) {
+        if (sec > last_sec) {
+            sec = last_sec;
+        }
+
+        if (fresh_bitmap_show_sector_in_base_img(sec, s)) {
+            if (first_sec_in_backing < 0) {
+                first_sec_in_backing = sec;
+            }
+            if (!prev_block_in_backing) {
+                last_sec_in_fvd = sec - 1;
+                prev_block_in_backing = true;
+            }
+            last_sec_in_backing = sec;
+        } else {
+            if (first_sec_in_fvd < 0) {
+                first_sec_in_fvd = sec;
+            }
+            if (prev_block_in_backing) {
+                last_sec_in_backing = sec - 1;
+                prev_block_in_backing = false;
+            }
+            last_sec_in_fvd = sec;
+        }
+
+        if (sec == last_sec) {
+            break;
+        }
+        sec += s->block_size;
+    }
+
+    if (sec_end > s->base_img_sectors) {
+        if (first_sec_in_fvd < 0) {
+            first_sec_in_fvd = s->base_img_sectors;
+        }
+        last_sec_in_fvd = sec_end - 1;
+    }
+
+    *p_first_sec_in_fvd = first_sec_in_fvd;
+    *p_last_sec_in_fvd = last_sec_in_fvd;
+    *p_first_sec_in_backing = first_sec_in_backing;
+    *p_last_sec_in_backing = last_sec_in_backing;
 }
diff --git a/block/fvd-utils.c b/block/fvd-utils.c
new file mode 100644
index 0000000..ff2bb8f
--- /dev/null
+++ b/block/fvd-utils.c
@@ -0,0 +1,44 @@ 
+/*
+ * QEMU Fast Virtual Disk Format Utility Functions
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ *    Chunqiang Tang <ctang@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+static inline void copy_iov(struct iovec *iov, int *p_index,
+                               uint8_t ** p_buf, int *p_left,
+                               uint8_t * source, int total)
+{
+    int index = *p_index;
+    uint8_t *buf = *p_buf;
+    int left = *p_left;
+
+    if (left <= 0) {
+        index++;
+        buf = iov[index].iov_base;
+        left = iov[index].iov_len;
+    }
+
+    while (1) {
+        if (left >= total) {
+            memcpy(buf, source, total);
+            *p_buf = buf + total;
+            *p_left = left - total;
+            *p_index = index;
+            return;
+        }
+
+        memcpy(buf, source, left);
+        total -= left;
+        source += left;
+        index++;
+        buf = iov[index].iov_base;
+        left = iov[index].iov_len;
+    }
+}
diff --git a/block/fvd.c b/block/fvd.c
index 5b3dcac..74845e7 100644
--- a/block/fvd.c
+++ b/block/fvd.c
@@ -27,6 +27,7 @@ 
  * function optimization. */
 #include "block/fvd-debug.c"
 #include "block/fvd-flush.c"
+#include "block/fvd-utils.c"
 #include "block/fvd-bitmap.c"
 #include "block/fvd-misc.c"
 #include "block/fvd-create.c"
@@ -34,6 +35,7 @@ 
 #include "block/fvd-write.c"
 #include "block/fvd-read.c"
 #include "block/fvd-store.c"
+#include "block/fvd-load.c"
 #include "block/fvd-journal.c"
 #include "block/fvd-prefetch.c"
 #include "block/fvd-update.c"