Patchwork [02/26] FVD: extend qemu-io to do fully automated testing

login
register
mail settings
Submitter Chunqiang Tang
Date Feb. 25, 2011, 10:37 p.m.
Message ID <1298673486-3573-2-git-send-email-ctang@us.ibm.com>
Download mbox | patch
Permalink /patch/84613/
State New
Headers show

Comments

Chunqiang Tang - Feb. 25, 2011, 10:37 p.m.
This patch is part of the Fast Virtual Disk (FVD) proposal.
See http://wiki.qemu.org/Features/FVD.

This patch extends qemu-io in two ways. First, it adds the 'sim' command to
work with the simulated block device driver 'blksim', which allows a developer
to fully control the order of disk I/Os, the order of callbacks, and the
return code of every I/O operation. Second, it adds a fully automated testing
mode, 'qemu-io --auto'. This mode can, e.g., simulate 1,000 threads
concurrently submitting overlapping disk I/O requests to QEMU block drivers,
use blksim to inject I/O errors and race conditions, and automatically verify
the correctness of I/O results. This tool can run unattended to exercise an
unlimited number of randomized test cases. Once it finds a bug, the bug is
precisely repeatable with the help of blksim, even if it is a rare race
condition bug. This makes debugging much easier.

Signed-off-by: Chunqiang Tang <ctang@us.ibm.com>
---
 qemu-io-auto.c |  947 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 qemu-io-sim.c  |  127 ++++++++
 qemu-io.c      |   50 +++-
 qemu-tool.c    |  107 ++++++-
 4 files changed, 1209 insertions(+), 22 deletions(-)
 create mode 100644 qemu-io-auto.c
 create mode 100644 qemu-io-sim.c

Patch

diff --git a/qemu-io-auto.c b/qemu-io-auto.c
new file mode 100644
index 0000000..73d79c7
--- /dev/null
+++ b/qemu-io-auto.c
@@ -0,0 +1,947 @@ 
+/*
+ * Extension of qemu-io to perform automated random tests
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ *    Chunqiang Tang <ctang@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+/*=============================================================================
+ *  This module implements a fully automated testing tool for block device
+ *  drivers. It works with block/blksim.c to test race conditions by
+ *  randomizing event timing. It is recommended to perform automated testing
+ *  on a ramdisk or tmpfs, which stores files in memory and avoids wearing out
+ *  the disk. Below is one example of using qemu-io to perform a fully
+ *  automated testing.
+
+# mount -t tmpfs none /var/tmpfs -o size=4G
+# dd if=/dev/zero of=/var/tmpfs/truth.raw count=0 bs=1 seek=1G
+# dd if=/dev/zero of=/var/tmpfs/zero-500M.raw count=0 bs=1 seek=500M
+# qemu-img create -f qcow2 -obacking_fmt=blksim -b /var/tmpfs/zero-500M.raw \
+            /var/tmpfs/test.qcow2 1G
+# qemu-io --auto --seed=1 --truth=/var/tmpfs/truth.raw --format=qcow2 \
+    --test=blksim:/var/tmpfs/test.qcow2 --verify_write=true \
+    --compare_before=false --compare_after=true --round=100000 \
+    --parallel=1000 --io_size=10485760 --fail_prob=0 --cancel_prob=0 \
+    --instant_qemubh=true
+ *=============================================================================
+ */
+
+#include "qemu-timer.h"
+#include "qemu-common.h"
+#include "block_int.h"
+#include "block/blksim.h"
+
+#if 1
+# define QDEBUG(format,...) do {} while (0)
+#else
+# define QDEBUG printf
+#endif
+
+#define die(format,...) \
+    do { \
+        fprintf (stderr, "%s:%d --- ", __FILE__, __LINE__); \
+        fprintf (stderr, format, ##__VA_ARGS__); \
+        abort(); \
+    } while(0)
+
+typedef enum { OP_NULL = 0, OP_READ, OP_WRITE, OP_FLUSH,
+    OP_AIO_FLUSH } op_type_t;
+const char *op_type_str[] = { "NULL", "READ", "WRITE", "FLUSH", "AIO_FLUSH"};
+
+typedef struct CompareFullCB
+{
+    QEMUIOVector qiov;
+    struct iovec iov;
+    int64_t sector_num;
+    int nb_sectors;
+    int max_nb_sectors;
+    uint8_t *truth_buf;
+} CompareFullCB;
+
+typedef struct RandomIO
+{
+    QEMUIOVector qiov;
+    int64_t sector_num;
+    int nb_sectors;
+    uint8_t *truth_buf;
+    uint8_t *test_buf;
+    op_type_t type;
+    int tester;
+    int64_t uuid;
+    int allow_cancel;
+    BlockDriverAIOCB *acb;
+} RandomIO;
+
+static int fd;
+static int64_t total_sectors;
+static int64_t io_size = 262144;
+static bool verify_write = false;
+static int parallel = 1;
+static int max_iov = 10;
+static int64_t round = 10;
+static int64_t finished_round = 0;
+static RandomIO *testers = NULL;
+static double fail_prob = 0;
+static double cancel_prob = 0;
+static double aio_flush_prob = 0;
+static double flush_prob = 0;
+static int64_t rand_time = 1000;
+static int64_t test_uuid = 0;
+static int finished_testers = 0;
+
+static void rand_io_cb(void *opaque, int ret);
+static void perform_next_io(RandomIO * r);
+
+static void auto_test_usage(void)
+{
+    printf("%s --auto [--help]\n"
+           "\t[--truth=<truth_img>]\n"
+           "\t[--test=<img_to_test>]\n"
+           "\t[--seed=<#d>]\n"
+           "\t[--format=<test_img_fmt>]\n"
+           "\t[--round=<#d>]\n"
+           "\t[--instant_qemubh=<true|false>]\n"
+           "\t[--fail_prob=<#f>]\n"
+           "\t[--cancel_prob=<#f>]\n"
+           "\t[--aio_flush_prob=<#f>]\n"
+           "\t[--flush_prob=<#f>]\n"
+           "\t[--io_size=<#d>]\n"
+           "\t[--verify_write=[true|false]]\n"
+           "\t[--parallel=[#d]\n"
+           "\t[--max_iov=[#d]\n"
+           "\t[--compare_before=[true|false]]\n"
+           "\t[--compare_after=[true|false]]\n"
+           "\t[--create <file>\\n"
+           "\t[--block_size=<#d>]\n"
+           "\t[--empty_block_prob=<#f>]\n"
+           "\t[--empty_block_chain=<#d>]\n"
+           "\n", progname);
+    exit(1);
+}
+
+static int truth_io(void *buf, int64_t sector_num, int nb_sectors, int do_read)
+{
+    off_t offset = sector_num * 512;
+    size_t size = nb_sectors * 512;
+
+    if (lseek(fd, offset, SEEK_SET) < 0) {
+        die("lseek\n");
+    }
+
+    while (size > 0) {
+        int r;
+        if (do_read) {
+            r = read(fd, buf, size);
+        } else {
+            r = write(fd, buf, size);
+        }
+        if (r >= 0) {
+            size -= r;
+            offset += r;
+            buf = (void *)(((char *)buf) + r);
+        } else if (errno != EINTR) {
+            die("I/O error on the truth file.\n");
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+static int verify(uint8_t * truth_buf, uint8_t * test_buf,
+                  int64_t sector_num, int nb_sectors)
+{
+    int i;
+    for (i = 0; i < nb_sectors; i++) {
+        int64_t offset = i * 512;
+        if (truth_buf[offset] == (uint8_t)0) {
+            /* Skip this sector as data were never written to it before. It is
+             * possible that the test image has random garbage data in this
+             * sector. */
+            continue;
+        }
+
+        if (memcmp(&truth_buf[offset], &test_buf[offset], 512) != 0) {
+            int j;
+            fprintf(stderr, "Sector %"PRId64" differs, discovered by "
+                    "process %d\n", sector_num + i, getpid());
+            QDEBUG("Sector %"PRId64" differs.\noffset\texpect\tactual\n",
+                   sector_num + i);
+            for (j = 0; j < 512; j++) {
+                if (truth_buf[offset + j] == test_buf[offset + j]) {
+                    QDEBUG("%02d:\t%02X\t%02X\n", j, truth_buf[offset + j],
+                           test_buf[offset + j]);
+                } else {
+                    QDEBUG("%02d:\t%02X\t%02X   ***\n", j,
+                           truth_buf[offset + j], test_buf[offset + j]);
+                }
+            }
+            abort();
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+static void compare_full_images_cb(void *opaque, int ret)
+{
+    CompareFullCB *cf = opaque;
+
+    if (ret) {
+        /* Failed. Retry the operation. */
+        bdrv_aio_readv(bs, cf->sector_num, &cf->qiov, cf->nb_sectors,
+                       compare_full_images_cb, cf);
+        return;
+    }
+
+    truth_io(cf->truth_buf, cf->sector_num, cf->nb_sectors, true);
+    verify(cf->truth_buf, cf->iov.iov_base, cf->sector_num, cf->nb_sectors);
+
+    cf->sector_num += cf->nb_sectors;
+    if (cf->sector_num >= total_sectors) {
+        /* Finished. */
+        free(cf->truth_buf);
+        qemu_vfree(cf->iov.iov_base);
+        qemu_free(cf);
+        return;
+    }
+
+    /* Read more data to compare. */
+    if (cf->sector_num + cf->max_nb_sectors > total_sectors) {
+        cf->nb_sectors = total_sectors - cf->sector_num;
+    } else {
+        cf->nb_sectors = cf->max_nb_sectors;
+    }
+    cf->iov.iov_len = cf->nb_sectors * 512;
+    qemu_iovec_init_external(&cf->qiov, &cf->iov, 1);
+    QDEBUG("FULL IMAGE COMPARISON: read sector_num=%" PRId64 " nb_sectors=%d\n",
+           cf->sector_num, cf->nb_sectors);
+
+    if (!bdrv_aio_readv(bs, cf->sector_num, &cf->qiov,
+                        cf->nb_sectors, compare_full_images_cb, cf)) {
+        die("bdrv_aio_readv\n");
+    }
+}
+
+static int compare_full_images(void)
+{
+    CompareFullCB *cf;
+
+    printf("Performing a full comparison of the truth image and "
+           "the test image...\n");
+    fflush(stdout);
+
+    cf = qemu_malloc(sizeof(CompareFullCB));
+    cf->max_nb_sectors = 1048576L / 512;
+    cf->nb_sectors = MIN(cf->max_nb_sectors, total_sectors);
+    cf->truth_buf = qemu_memalign(512, cf->max_nb_sectors * 512);
+    cf->iov.iov_base = qemu_blockalign(bs, cf->max_nb_sectors * 512);
+    cf->iov.iov_len = cf->nb_sectors * 512;
+    cf->sector_num = 0;
+    qemu_iovec_init_external(&cf->qiov, &cf->iov, 1);
+    QDEBUG("FULL IMAGE COMPARISON: read sector_num=%" PRId64 " nb_sectors=%d\n",
+           cf->sector_num, cf->nb_sectors);
+    if (!bdrv_aio_readv(bs, cf->sector_num, &cf->qiov,
+                        cf->nb_sectors, compare_full_images_cb, cf)) {
+        die("bdrv_aio_readv\n");
+    }
+
+    blksim_run_all_tasks();
+    QDEBUG("Finished full image comparison.\n");
+    return 0;
+}
+
+static inline int64_t rand64(void)
+{
+    int64_t f1 = rand();
+    int64_t f2 = rand();
+    int64_t f3 = (f1 << 32) | f2;
+    return f3 >= 0 ? f3 : -f3;
+}
+
+static bool check_conflict(RandomIO * r)
+{
+    int i;
+
+    for (i = 0; i < parallel; i++) {
+        RandomIO *s = &testers[i];
+        if (s == r || s->type == OP_AIO_FLUSH || s->type == OP_FLUSH ||
+            (r->type == OP_READ && s->type == OP_READ)) {
+            continue;
+        }
+
+        if ((r->sector_num <= s->sector_num &&
+             s->sector_num < r->sector_num + r->nb_sectors) ||
+            (s->sector_num <= r->sector_num &&
+             r->sector_num < s->sector_num + s->nb_sectors)) {
+            return true;   /* Conflict. */
+        }
+    }
+
+    return false;   /* No confict. */
+}
+
+/* Return false if the submitted request is cancelled or already finished. */
+static bool submit_rand_io(RandomIO * r)
+{
+    BlockDriverAIOCB *acb = NULL;
+    int ret;
+    const char *fail;
+
+    if (!r->allow_cancel || r->type == OP_FLUSH || r->type == OP_AIO_FLUSH
+        || fail_prob <= 0) {
+        ret = 0;
+    } else if (rand() / (double)RAND_MAX > fail_prob) {
+        ret = 0;
+    } else if (rand() % 10 == 0) {
+        /* Tell blksim to return NULL acb. */
+        ret = RETURN_CODE_FOR_NULL_ACB;
+    } else {
+        /* Tell blksim to fail I/O operations with error code -EIO. */
+        ret = -EIO;
+    }
+
+    if (ret == 0) {
+        fail = "";
+    } else {
+        fail = "fail ";
+    }
+    QDEBUG("TESTER %03d:  %s%s  test%" PRIX64 " sector_num=%" PRId64
+           " nb_sectors=%d niov=%d\n", r->tester, fail, op_type_str[r->type],
+           r->uuid, r->sector_num, r->nb_sectors, r->qiov.niov);
+    printf("TESTER %03d:  %s%s  sector_num=%" PRId64 " nb_sectors=%d "
+           "niov=%d\n", r->tester, fail, op_type_str[r->type],
+           r->sector_num, r->nb_sectors, r->qiov.niov);
+
+    if (r->type == OP_FLUSH) {
+        /* This is special because it is a synchronous operation. */
+        if (bdrv_flush(bs) != 0) {
+            die("bdrv_flush failed");
+        }
+        return false;
+    }
+
+    /* Ensure all subrequests triggered by one outtermost request either
+     * succeed together or fail together. Otherwise, the truth image and the
+     * test image will diverge. */
+    blksim_set_disk_io_return_code(ret);
+
+    switch (r->type) {
+    case OP_READ:
+        acb = bdrv_aio_readv(bs, r->sector_num, &r->qiov, r->nb_sectors,
+                             rand_io_cb, r);
+        break;
+    case OP_WRITE:
+        acb = bdrv_aio_writev(bs, r->sector_num, &r->qiov, r->nb_sectors,
+                              rand_io_cb, r);
+        break;
+    case OP_AIO_FLUSH:
+        acb = bdrv_aio_flush(bs, rand_io_cb, r);
+        break;
+    default:
+        die("Unknown OP");
+        break;
+    }
+
+    blksim_set_disk_io_return_code(0);
+
+    if (!acb) {
+        if (ret != RETURN_CODE_FOR_NULL_ACB) {
+            die("Unexpected NULL ACB");
+        }
+        return false;
+    }
+
+    if (r->allow_cancel && cancel_prob > 0 &&
+        rand() / (double)RAND_MAX <= cancel_prob) {
+        QDEBUG("TESTER %03d:  cancel %s test%" PRIX64 " sector_num=%" PRId64
+               " nb_sectors=%d niov=%d\n", r->tester, op_type_str[r->type],
+               r->uuid, r->sector_num, r->nb_sectors, r->qiov.niov);
+        printf("TESTER %03d:  cancel %s sector_num=%" PRId64
+               " nb_sectors=%d niov=%d\n", r->tester, op_type_str[r->type],
+               r->sector_num, r->nb_sectors, r->qiov.niov);
+        bdrv_aio_cancel(acb);
+        return false;
+    } else {
+        return true;
+    }
+}
+
+static void prepare_read_write(RandomIO * r)
+{
+    /* Find the next region to perform io. */
+    do {
+        /* Do a READ or WRITE? */
+        if (rand() % 2) {
+            r->type = OP_READ;
+        } else {
+            r->type = OP_WRITE;
+        }
+
+        if (parallel <= 1 || (rand() % 2 == 0)) {
+            /* Perform a random I/O. */
+            r->sector_num = rand64() % total_sectors;
+        } else {
+            /* Perform an I/O next to a currently ongoing I/O. */
+            int id;
+            do {
+                id = rand() % parallel;
+            } while (id == r->tester);
+
+            RandomIO *p = &testers[id];
+            r->sector_num =
+                p->sector_num + 2 * io_size - rand64() % (4 * io_size);
+            if (r->sector_num < 0) {
+                r->sector_num = 0;
+            } else if (r->sector_num >= total_sectors) {
+                r->sector_num = total_sectors - 1;
+            }
+        }
+
+        r->nb_sectors = 1 + rand64() % io_size;
+        if (r->sector_num + r->nb_sectors > total_sectors) {
+            r->nb_sectors = total_sectors - r->sector_num;
+        }
+    } while (check_conflict(r));
+
+    if (r->type == OP_WRITE) {
+        /* Fill test_buf with random data. */
+        int i, j;
+        for (i = 0; i < r->nb_sectors; i++) {
+            const uint64_t TEST_MAGIC = 0x0123456789ABCDEFULL;
+            /* This first byte is always 0xBB to indicate that this is not an
+             * empty sector (see check_conflict()). The next 7 bytes of the
+             * sector stores the current testing round. The next 8 bytes store
+             * a magic number.  This info helps debugging. */
+            uint64_t *p = (uint64_t *)&r->test_buf[i * 512];
+            *p = r->uuid;
+            cpu_to_be64s(p);
+            r->test_buf[i * 512] = 0xBB; /* First byte marks sector non-empty */
+
+            p++;
+            *p = TEST_MAGIC;
+            cpu_to_be64s(p);
+
+            /* The rest of the sector are filled with random data. */
+            uint32_t *q = (uint32_t *) (p + 1);
+            int n = (512 - 2 * sizeof(uint64_t)) / sizeof(uint32_t);
+            for (j = 0; j < n; j++) {
+                *q++ = rand();
+            }
+        }
+    }
+
+    /* Determine the number of iov. */
+    int niov = 0;
+    uint8_t *p = r->test_buf;
+    int left = r->nb_sectors;
+    do {
+        if (niov == max_iov - 1) {
+            r->qiov.iov[niov].iov_len = left * 512;
+            r->qiov.iov[niov].iov_base = p;
+            niov++;
+            break;
+        }
+
+        int nb = 1 + rand() % left;
+        r->qiov.iov[niov].iov_len = nb * 512;
+        r->qiov.iov[niov].iov_base = p;
+        p += r->qiov.iov[niov].iov_len;
+        left -= nb;
+        niov++;
+    } while (left > 0);
+
+    qemu_iovec_init_external(&r->qiov, r->qiov.iov, niov);
+}
+
+static void perform_next_io(RandomIO * r)
+{
+    if (finished_round >= round) {
+        finished_testers++;
+        return;
+    }
+
+    finished_round++;
+    r->allow_cancel = true;
+
+    do {
+        r->uuid = test_uuid++;
+
+        if (aio_flush_prob > 0 && rand() / (double)RAND_MAX < aio_flush_prob) {
+            r->type = OP_AIO_FLUSH;
+        } else if (flush_prob > 0 && rand() / (double)RAND_MAX < flush_prob) {
+            r->type = OP_FLUSH;
+        }
+        else {
+            prepare_read_write(r);
+        }
+    } while (!submit_rand_io(r));
+}
+
+static void rand_io_cb(void *opaque, int ret)
+{
+    RandomIO *r = opaque;
+
+    if (ret) {
+        if (fail_prob <= 0) {
+            die("Request %s sector_num=%"PRId64" nb_sectors=%d "
+                "failed while fail_prob=0.\n",
+                op_type_str[r->type], r->sector_num, r->nb_sectors);
+        } else {
+            /* Failed. Retry the operation. */
+            QDEBUG("TESTER %03d:  retry %s  test%" PRIX64 " sector_num=%"
+                   PRId64 " nb_sectors=%d niov=%d\n",
+                   r->tester, op_type_str[r->type], r->uuid,
+                   r->sector_num, r->nb_sectors, r->qiov.niov);
+            if (!submit_rand_io(r)) {
+                perform_next_io(r);
+            }
+            return;
+        }
+    } else {
+        QDEBUG("TESTER %03d:  finished %s  test%" PRIX64 " sector_num=%" PRId64
+               " nb_sectors=%d niov=%d\n", r->tester, op_type_str[r->type],
+               r->uuid, r->sector_num, r->nb_sectors, r->qiov.niov);
+    }
+
+    switch (r->type) {
+    case OP_AIO_FLUSH:
+        perform_next_io(r);
+        return;
+
+    case OP_READ:
+        truth_io(r->truth_buf, r->sector_num, r->nb_sectors, true);
+        verify(r->truth_buf, r->test_buf, r->sector_num, r->nb_sectors);
+        perform_next_io(r);
+        return;
+
+    case OP_WRITE:
+        /* Write data to the truth image. */
+        truth_io(r->test_buf, r->sector_num, r->nb_sectors, false);
+        if (verify_write) {
+            r->type = OP_READ; /* Perform a read for the same data. */
+            r->allow_cancel = false; /* Ensure verification happens. */
+            r->qiov.niov = 1;
+            r->qiov.iov[0].iov_len = r->qiov.size;
+            memset(r->test_buf, 0xA5, r->qiov.size);    /* Fill in garbage. */
+            submit_rand_io(r);
+        } else {
+            perform_next_io(r);
+        }
+        return;
+
+    default:
+        die("Unknown OP");
+        return;
+    }
+}
+
+static int read_bool(const char *arg)
+{
+    int val = true;
+    if (strcmp(optarg, "true") == 0) {
+        val = true;
+    } else if (strcmp(optarg, "false") == 0) {
+        val = false;
+    } else {
+        printf("%s is neither 'true' nor 'false'\n", arg);
+        auto_test_usage();
+    }
+
+    return val;
+}
+
+static void open_test_file(const char *format, const char *test_file, int flags)
+{
+    if (flags & BDRV_O_RDWR) {
+        QDEBUG("Open image for test.\n");
+    } else {
+        QDEBUG("Open image for comparison.\n");
+    }
+
+    bs = bdrv_new("hda");
+    if (!bs) {
+        die("bdrv_new failed\n");
+    }
+
+    BlockDriver *drv = NULL;
+    if (format) {
+        drv = bdrv_find_format(format);
+        if (!drv) {
+            die("Found no driver for format '%s'.\n", format);
+        }
+    }
+
+    if (bdrv_open(bs, test_file, flags, drv) < 0) {
+        die("Failed to open '%s'\n", test_file);
+    }
+}
+
+static void perform_test(const char *truth_file, const char *test_file,
+                    const char *format, int compare_before, int compare_after,
+                    int cache_flag)
+{
+    int i;
+
+    if (compare_before) {
+        /* Open as read-only to compare. */
+        open_test_file(format, test_file, cache_flag);
+    } else {
+        open_test_file(format, test_file, BDRV_O_RDWR | cache_flag);
+    }
+
+    fd = open(truth_file, O_RDWR | O_LARGEFILE, 0);
+    if (fd < 0) {
+        die("Failed to open '%s'\n", truth_file);
+    }
+
+    int64_t l0 = lseek(fd, 0, SEEK_END);
+    int64_t l1 = bdrv_getlength(bs);
+    if (l0 < 0 || l1 < 0 || l0 < l1) {
+        die("Mismatch: truth image %s length %"PRId64", test image %s "
+            "length %"PRId64"\n", truth_file, l0, test_file, l1);
+    }
+
+    total_sectors = l1 / 512;
+    if (total_sectors <= 1) {
+        die("Total sectors: %" PRId64 "\n", total_sectors);
+    }
+
+    io_size /= 512;
+    if (io_size <= 0) {
+        io_size = 1;
+    } else if (io_size > total_sectors / 2) {
+        io_size = total_sectors / 2;
+    }
+
+    if (compare_before) {
+        if (compare_full_images()) {
+            die("The original two files do not match.\n");
+        }
+
+        /* After comparison, reopen as writeable. */
+        bdrv_delete(bs);
+        open_test_file(format, test_file, BDRV_O_RDWR | cache_flag);
+    }
+
+    if (round > 0) {
+        /* Create testers. */
+        testers = qemu_malloc(sizeof(RandomIO) * parallel);
+        for (i = 0; i < parallel; i++) {
+            RandomIO *r = &testers[i];
+            r->test_buf = qemu_blockalign(bs, io_size * 512);
+            r->truth_buf = qemu_memalign(512, io_size * 512);
+            r->qiov.iov = qemu_malloc(sizeof(struct iovec) * max_iov);
+            r->sector_num = 0;
+            r->nb_sectors = 0;
+            r->type = OP_READ;
+            r->tester = i;
+        }
+        for (i = 0; i < parallel; i++) {
+            perform_next_io(&testers[i]);
+        }
+    }
+
+    /* Run the tests. It is possible that all testers have finished but there
+     * are still tasks in blksim due to copy_on_read or prefetching. Those
+     * tasks are ignored and a properly implemented driver should cancel
+     * those I/Os in bdrv_close() anyway. */
+    while (blksim_qemu_aio_wait() && finished_testers < parallel);
+
+    if (rand() % 10 == 0) {
+        /* With a random probability, finish the remaining tasks (especially
+         * prefetching so that it can test more code paths. */
+        while (blksim_qemu_aio_wait());
+    }
+
+    if (round > 0) {
+        /* Create testers. */
+        if (compare_after) {
+            /* Reopen as read-only to compare. */
+            bdrv_delete(bs);
+            if (blksim_has_task()) {
+                die("blksim still has tasks after the device is closed.\n"
+                    "This indicates that the device driver's bdrv_close() did"
+                    "not fully clean up timers, QEMUBH, copy_on_read, "
+                    "or prefetch.\n");
+            }
+            open_test_file(format, test_file, cache_flag);
+            if (compare_full_images()) {
+                die("The two files do not match after I/O operations.\n");
+            }
+        }
+
+        for (i = 0; i < parallel; i++) {
+            RandomIO *r = &testers[i];
+            qemu_vfree(r->test_buf);
+            free(r->truth_buf);
+            qemu_free(r->qiov.iov);
+        }
+        qemu_free(testers);
+    }
+
+    printf("Test process %d finished successfully\n", getpid());
+    bdrv_delete(bs);
+    close(fd);
+}
+
+static int create_test_file(int seed, const char *file, int64_t file_size,
+                            int block_size, double empty_block_prob,
+                            int empty_block_chain)
+{
+    if (file_size <= 0) {
+        fprintf (stderr, "file_size is not positive: %"PRId64"\n", file_size);
+        return -EINVAL;
+    }
+
+    fd = qemu_open(file, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
+    if (fd < 0) {
+        fprintf(stderr, "Failed to create %s.\n", file);
+        return -errno;
+    }
+
+    if (empty_block_chain <= 0) {
+        empty_block_chain = 1;
+    }
+    if (block_size <= 0) {
+        block_size = 65536;
+    }
+
+    srand (seed);
+    uint8_t *buf = qemu_malloc (empty_block_chain * block_size);
+    ssize_t len;
+
+    printf ("Creating test file...\n");
+    int64_t offset = 0;
+    while (file_size > 0) {
+        if (empty_block_prob == 0 ||
+            rand() / (double)RAND_MAX >= empty_block_prob) {
+            /* Generate a random, non-empty block. */
+            int i;
+            uint32_t *q = (uint32_t *)buf;
+            len = MIN(file_size, block_size);
+            for (i = 0; i < len / sizeof(uint32_t); i++) {
+                if (offset % 512 == 0) {
+                    *q++ = UINT32_C(0xAAAAAAAA); /* Make sector non-empty. */
+                } else {
+                    *q++ = rand();
+                }
+                offset += sizeof(uint32_t);
+            }
+        } else {
+            /* Generate a chain of empty blocks. */
+            int n = 1 + rand () % empty_block_chain;
+            len = n * block_size;
+            if (len > file_size) {
+                len = file_size;
+            }
+            memset (buf, 0, len);
+            offset += len;
+        }
+
+        if (qemu_write_full(fd, buf, len) != len) {
+            die("Error in writing %s: %s\n", file, strerror(errno));
+        }
+
+        file_size -= len;
+    }
+
+    qemu_free (buf);
+    close (fd);
+    return 0;
+}
+
+static int auto_test(int argc, char **argv)
+{
+    int c;
+    const char *truth_file = NULL;
+    const char *test_file = NULL;
+    const char *format = NULL;
+    int compare_before = false;
+    int compare_after = true;
+    int seed = 0;
+    const char *create_file = NULL;
+    int block_size = 65536;
+    double empty_block_prob = 0.2;
+    int empty_block_chain = 10;
+    int64_t file_size = 0;
+    int cache_flag = BDRV_O_CACHE_WB;
+
+    const struct option lopt[] = {
+        {"auto", 0, 0, 'a'},
+        {"help", 0, 0, 'h'},
+        {"seed", 1, 0, 'd'},
+        {"truth", 1, 0, 'b'},
+        {"test", 1, 0, 't'},
+        {"format", 1, 0, 'f'},
+        {"rand_time", 1, 0, 'n'},
+        {"fail_prob", 1, 0, 'u'},
+        {"cancel_prob", 1, 0, 'c'},
+        {"aio_flush_prob", 1, 0, 'w'},
+        {"flush_prob", 1, 0, 'y'},
+        {"round", 1, 0, 'r'},
+        {"parallel", 1, 0, 'p'},
+        {"compare_before", 1, 0, 'm'},
+        {"verify_write", 1, 0, 'v'},
+        {"compare_after", 1, 0, 'e'},
+        {"max_iov", 1, 0, 'i'},
+        {"io_size", 1, 0, 's'},
+        {"instant_qemubh", 1, 0, 'q'},
+        {"create", 1, 0, 'g'},
+        {"file_size", 1, 0, 'j'},
+        {"block_size", 1, 0, 'k'},
+        {"empty_block_prob", 1, 0, 'l'},
+        {"empty_block_chain", 1, 0, 'x'},
+        {"cache", 1, 0, 'z'},
+        {NULL, 0, NULL, 0}
+    };
+
+    optind = 1;
+    while ((c = getopt_long(argc, argv,
+            "ahc:u:p:q:i:f:d:b:t:r:m:v:e:s:g:j:k:l:x:z:", lopt, NULL)) != -1) {
+        switch (c) {
+        case 'a':
+            break;
+
+        case 'h':
+            auto_test_usage();
+            return 0;
+
+        case 'q':
+            blksim_set_instant_qemubh(read_bool(optarg));
+            break;
+
+        case 'w':
+            aio_flush_prob = atof(optarg);
+            break;
+
+        case 'y':
+            flush_prob = atof(optarg);
+            break;
+
+        case 'c':
+            cancel_prob = atof(optarg);
+            break;
+
+        case 'u':
+            fail_prob = atof(optarg);
+            break;
+
+        case 'n':
+            rand_time = atoll(optarg);
+            break;
+
+        case 'i':
+            max_iov = atoi(optarg);
+            break;
+
+        case 'p':
+            parallel = atoi(optarg);
+            break;
+
+        case 'v':
+            verify_write = read_bool(optarg);
+            break;
+
+        case 'm':
+            compare_before = read_bool(optarg);
+            break;
+
+        case 'e':
+            compare_after = read_bool(optarg);
+            break;
+
+        case 'd':
+            seed = atoll(optarg);
+            break;
+
+        case 'f':
+            format = optarg;
+            break;
+
+        case 'b':
+            truth_file = optarg;
+            break;
+
+        case 't':
+            test_file = optarg;
+            break;
+
+        case 's':
+            io_size = atoll(optarg);
+            break;
+
+        case 'r':
+            round = atoll(optarg);
+            break;
+
+        case 'g':
+            create_file = optarg;
+            break;
+
+        case 'k':
+            block_size = atoi(optarg);
+            break;
+
+        case 'j':
+            file_size = atoll(optarg);
+            break;
+
+        case 'l':
+            empty_block_prob = atof(optarg);
+            break;
+
+        case 'x':
+            empty_block_chain = atoi(optarg);
+            break;
+
+        case 'z':
+            if (!strcasecmp(optarg, "writethrough")) {
+                cache_flag = 0;
+            } else if (!strcasecmp(optarg, "writeback")) {
+                cache_flag = BDRV_O_CACHE_WB;
+            } else if (!strcasecmp(optarg, "none")) {
+                cache_flag = BDRV_O_NOCACHE;
+            } else {
+                die ("Unknown cache option: %s\n", optarg);
+            }
+            break;
+
+        default:
+            auto_test_usage();
+            return 1;
+        }
+    }
+
+    if (create_file) {
+        return create_test_file(seed, create_file, file_size, block_size,
+                                empty_block_prob, empty_block_chain);
+    }
+
+    if (!truth_file || !test_file) {
+        auto_test_usage();
+        return 1;
+    }
+
+    /* A hack to convince FVD that it is not running in a qemu-tool so that
+     * prefetching and copy_on_read can be enabled for testing. Note that
+     * prefetching and copy_on_read are disabled for qemu-nbd. */
+    rt_clock = (QEMUClock *) - 1;
+
+    if (parallel <= 0) {
+        parallel = 1;
+    }
+    init_blksim(false /*no print */ , rand_time);
+    bdrv_init();
+    srand(seed);
+    perform_test(truth_file, test_file, format, compare_before,
+                 compare_after, cache_flag);
+    return 0;
+}
diff --git a/qemu-io-sim.c b/qemu-io-sim.c
new file mode 100644
index 0000000..923c1b8
--- /dev/null
+++ b/qemu-io-sim.c
@@ -0,0 +1,127 @@ 
+/*
+ * Extension of qemu-io to work with the simulated block device driver blksim
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ *    Chunqiang Tang <ctang@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+/*=============================================================================
+ * qemu-io-sim works with qemu-io to perform simulated testing. The 'sim'
+ * command allows the user to control the order of disk I/O and callback
+ * activities in order to test rare race conditions. See block/blksim.c
+ * Note that in the manual mode, qemu-io's 'sim' command can only work with
+ * qemu-io's 'aio_read', 'aio_write', and 'flush' commands. The automated
+ * testing mode, 'qemu-io --auto', performs a much more comprehensive fully
+ * automated test (see qemu-io-auto.c). Below is one example of using qemu-io
+ * to perform manual testing in the simulation mode.
+
+$ qemu-img create -f qcow2 -obacking_fmt=blksim -b base.raw img.qcow2
+Formatting 'img.qcow2', fmt=qcow2 size=1073741824 backing_file='base.raw' backing_fmt='blksim' encryption=off cluster_size=0
+
+$ qemu-io -f qcow2 blksim:img.qcow2
+Execute READ blksim:img.qcow2 sector_num=0 nb_sectors=1
+Execute READ blksim:img.qcow2 sector_num=384 nb_sectors=1
+Execute READ blksim:img.qcow2 sector_num=128 nb_sectors=128
+Execute READ blksim:img.qcow2 sector_num=0 nb_sectors=1
+Execute READ blksim:img.qcow2 sector_num=0 nb_sectors=1
+Execute READ blksim:img.qcow2 sector_num=0 nb_sectors=1
+qemu-io> aio_write 0 512
+Execute READ blksim:img.qcow2 sector_num=256 nb_sectors=128
+Execute WRITE blksim:img.qcow2 sector_num=256 nb_sectors=128
+Execute FLUSH blksim:img.qcow2
+Execute WRITE blksim:img.qcow2 sector_num=512 nb_sectors=128
+Execute FLUSH blksim:img.qcow2
+Execute WRITE blksim:img.qcow2 sector_num=384 nb_sectors=1
+Execute WRITE blksim:img.qcow2 sector_num=256 nb_sectors=128
+Queue WRITE uuid=0 filename=blksim:img.qcow2 sector_num=640 nb_sectors=1
+qemu-io> sim list
+uuid=0  WRITE          file=blksim:img.qcow2  sector_num=640  nb_sectors=1
+qemu-io> sim 0
+Execute WRITE blksim:img.qcow2 sector_num=640 nb_sectors=1
+Queue WRITE_CALLBACK uuid=1 filename=blksim:img.qcow2 sector_num=640 nb_sectors=1
+qemu-io> sim 1
+Execute READ base.raw sector_num=1 nb_sectors=127
+Execute WRITE blksim:img.qcow2 sector_num=641 nb_sectors=127
+Execute FLUSH blksim:img.qcow2
+Execute WRITE blksim:img.qcow2 sector_num=512 nb_sectors=128
+wrote 512/512 bytes at offset 0
+512.000000 bytes, 1 ops; 0:00:09.00 (53.333589 bytes/sec and 0.1042 ops/sec)
+qemu-io> aio_write 65536 1024
+Execute WRITE blksim:img.qcow2 sector_num=256 nb_sectors=128
+Queue WRITE uuid=2 filename=blksim:img.qcow2 sector_num=768 nb_sectors=2
+qemu-io> aio_read 1048576 1024
+Queue READ uuid=3 filename=base.raw sector_num=2048 nb_sectors=2
+qemu-io> sim list
+uuid=2  WRITE          file=blksim:img.qcow2  sector_num=768  nb_sectors=2
+uuid=3  READ           file=base.raw  sector_num=2048  nb_sectors=2
+qemu-io> sim 2
+Execute WRITE blksim:img.qcow2 sector_num=768 nb_sectors=2
+Queue WRITE_CALLBACK uuid=4 filename=blksim:img.qcow2 sector_num=768 nb_sectors=2
+qemu-io> sim list
+uuid=3  READ           file=base.raw  sector_num=2048  nb_sectors=2
+uuid=4  CALLBACK WRITE file=blksim:img.qcow2  sector_num=768  nb_sectors=2
+qemu-io> sim 4
+Execute READ base.raw sector_num=130 nb_sectors=126
+Execute WRITE blksim:img.qcow2 sector_num=770 nb_sectors=126
+Execute FLUSH blksim:img.qcow2
+Execute WRITE blksim:img.qcow2 sector_num=512 nb_sectors=128
+wrote 1024/1024 bytes at offset 65536
+1 KiB, 1 ops; 0:00:20.00 (50.304774 bytes/sec and 0.0491 ops/sec)
+
+*=============================================================================*/
+
+#include "block/blksim.h"
+
+static void sim_help(void)
+{
+    printf("\nsim list\t\tlist all simulation tasks\n"
+           "\nsim <#task> [#ret]\trun a simulation task, optionally "
+                "using #ret as the return value of a read/write operation\n"
+           "\nsim all [#ret]\t\trun all tasks, optionally using #ret as "
+                "the return value of read/write tasks\n"
+           "\nsim prefetch\t\tstart prefetching\n");
+}
+
+static int sim_f(int argc, char **argv)
+{
+    int ret = 0;
+
+    if (argc == 3) {
+        ret = atoi(argv[2]);
+    } else if (argc != 2) {
+        sim_help();
+        return 0;
+    }
+
+    if (strcmp(argv[1], "list") == 0) {
+        blksim_list_tasks();
+    } else if (strcmp(argv[1], "all") == 0) {
+        blksim_set_disk_io_return_code(ret);
+        int n = blksim_run_all_tasks();
+        blksim_set_disk_io_return_code(0);
+        printf("Executed %d tasks.\n", n);
+    } else {
+        blksim_set_disk_io_return_code(ret);
+        blksim_run_task_by_uuid(atoll(argv[1]));
+        blksim_set_disk_io_return_code(0);
+    }
+
+    return 0;
+}
+
+static const cmdinfo_t sim_cmd = {
+    .name = "sim",
+    .altname = "s",
+    .cfunc = sim_f,
+    .argmin = 1,
+    .argmax = 2,
+    .args = "",
+    .oneline = "use simulation to control the order of disk I/Os and callbacks",
+    .help = sim_help,
+};
diff --git a/qemu-io.c b/qemu-io.c
index 4470e49..27591f0 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -1584,7 +1584,7 @@  static const cmdinfo_t close_cmd = {
 	.oneline	= "close the current open file",
 };
 
-static int openfile(char *name, int flags, int growable)
+static int openfile(char *name, const char *fmt, int flags, int growable)
 {
 	if (bs) {
 		fprintf(stderr, "file open already, try 'help close'\n");
@@ -1597,9 +1597,17 @@  static int openfile(char *name, int flags, int growable)
 			return 1;
 		}
 	} else {
+                BlockDriver *drv = NULL;
+                if (fmt && !(drv = bdrv_find_format (fmt))) {
+                        fprintf(stderr, "%s: can't find driver for format "
+                                "%s \n", progname, fmt);
+                        bs = NULL;
+                        return 1;
+                }
+
 		bs = bdrv_new("hda");
 
-		if (bdrv_open(bs, name, flags, NULL) < 0) {
+		if (bdrv_open(bs, name, flags, drv) < 0) {
 			fprintf(stderr, "%s: can't open device %s\n", progname, name);
 			bs = NULL;
 			return 1;
@@ -1636,7 +1644,7 @@  static const cmdinfo_t open_cmd = {
 	.argmin		= 1,
 	.argmax		= -1,
 	.flags		= CMD_NOFILE_OK,
-	.args		= "[-Crsn] [path]",
+	.args		= "[-Crsn] [-f <format>] [path]",
 	.oneline	= "open the file specified by path",
 	.help		= open_help,
 };
@@ -1648,8 +1656,9 @@  open_f(int argc, char **argv)
 	int readonly = 0;
 	int growable = 0;
 	int c;
+        const char *fmt = NULL;
 
-	while ((c = getopt(argc, argv, "snrg")) != EOF) {
+	while ((c = getopt(argc, argv, "snrgf:")) != EOF) {
 		switch (c) {
 		case 's':
 			flags |= BDRV_O_SNAPSHOT;
@@ -1663,6 +1672,9 @@  open_f(int argc, char **argv)
 		case 'g':
 			growable = 1;
 			break;
+		case 'f':
+                        fmt = optarg;
+			break;
 		default:
 			return command_usage(&open_cmd);
 		}
@@ -1675,7 +1687,7 @@  open_f(int argc, char **argv)
 	if (optind != argc - 1)
 		return command_usage(&open_cmd);
 
-	return openfile(argv[optind], flags, growable);
+	return openfile(argv[optind], fmt, flags, growable);
 }
 
 static int
@@ -1701,10 +1713,13 @@  init_check_command(
 	return 1;
 }
 
+#include "qemu-io-sim.c"
+#include "qemu-io-auto.c"
+
 static void usage(const char *name)
 {
 	printf(
-"Usage: %s [-h] [-V] [-rsnm] [-c cmd] ... [file]\n"
+"Usage: %s [-h] [-a] [-V] [-rsnm] [-c cmd] ... [file]\n"
 "QEMU Disk exerciser\n"
 "\n"
 "  -c, --cmd            command to execute\n"
@@ -1714,18 +1729,19 @@  static void usage(const char *name)
 "  -g, --growable       allow file to grow (only applies to protocols)\n"
 "  -m, --misalign       misalign allocations for O_DIRECT\n"
 "  -k, --native-aio     use kernel AIO implementation (on Linux only)\n"
+"  -f, --format         image format of the file\n"
+"  -a, --auto           fully automated test\n"
 "  -h, --help           display this help and exit\n"
 "  -V, --version        output version information and exit\n"
 "\n",
 	name);
 }
 
-
 int main(int argc, char **argv)
 {
 	int readonly = 0;
 	int growable = 0;
-	const char *sopt = "hVc:rsnmgk";
+	const char *sopt = "hVc:rsnmgkaf:d";
         const struct option lopt[] = {
 		{ "help", 0, NULL, 'h' },
 		{ "version", 0, NULL, 'V' },
@@ -1737,11 +1753,15 @@  int main(int argc, char **argv)
 		{ "misalign", 0, NULL, 'm' },
 		{ "growable", 0, NULL, 'g' },
 		{ "native-aio", 0, NULL, 'k' },
+		{ "format", 1, NULL, 'f' },
+		{ "auto", 0, NULL, 'a' },
+		{ "sim", 0, NULL, 'd' },
 		{ NULL, 0, NULL, 0 }
 	};
 	int c;
 	int opt_index = 0;
 	int flags = 0;
+        const char *fmt = NULL;
 
 	progname = basename(argv[0]);
 
@@ -1756,6 +1776,12 @@  int main(int argc, char **argv)
 		case 'c':
 			add_user_command(optarg);
 			break;
+		case 'd':
+                        /* A hack to convince FVD that it is running in a
+                         * qemu-tool so that prefetching and copy_on_read can
+                         * be enabled for testing with blksim. */
+                        rt_clock = (QEMUClock *) - 1;
+			break;
 		case 'r':
 			readonly = 1;
 			break;
@@ -1768,6 +1794,11 @@  int main(int argc, char **argv)
 		case 'k':
 			flags |= BDRV_O_NATIVE_AIO;
 			break;
+		case 'f':
+                        fmt = optarg;
+                        break;
+		case 'a':
+                        return auto_test(argc, argv);
 		case 'V':
 			printf("%s version %s\n", progname, VERSION);
 			exit(0);
@@ -1807,6 +1838,7 @@  int main(int argc, char **argv)
 	add_command(&discard_cmd);
 	add_command(&alloc_cmd);
 	add_command(&map_cmd);
+        add_command(&sim_cmd);
 
 	add_args_command(init_args_command);
 	add_check_command(init_check_command);
@@ -1817,7 +1849,7 @@  int main(int argc, char **argv)
         }
 
 	if ((argc - optind) == 1)
-		openfile(argv[optind], flags, growable);
+		openfile(argv[optind], fmt, flags, growable);
 	command_loop();
 
 	/*
diff --git a/qemu-tool.c b/qemu-tool.c
index 392e1c9..ebf7355 100644
--- a/qemu-tool.c
+++ b/qemu-tool.c
@@ -16,11 +16,11 @@ 
 #include "qemu-timer.h"
 #include "qemu-log.h"
 #include "sysemu.h"
+#include "block/blksim.h"
 
 #include <sys/time.h>
 
 QEMUClock *rt_clock;
-
 FILE *logfile;
 
 struct QEMUBH
@@ -73,34 +73,115 @@  void monitor_protocol_event(MonitorEvent event, QObject *data)
 {
 }
 
-QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
+/*
+ * In the simulation mode, the QEMUBH and time related functions are handled
+ * differently through simulatoin.
+ */
+int64_t qemu_get_clock (QEMUClock * clock)
+{
+    if (using_blksim()) {
+        return blksim_get_time ();
+    }
+    else {
+        qemu_timeval tv;
+        qemu_gettimeofday (&tv);
+        return (tv.tv_sec * 1000000000LL + (tv.tv_usec * 1000)) / 1000000;
+    }
+}
+
+void qemu_mod_timer (QEMUTimer * ts, int64_t expire_time)
+{
+    if (using_blksim()) {
+        blksim_mod_timer (ts, expire_time);
+    }
+    else {
+        fprintf (stderr, "A QEMU tool should not invoke qemu_mod_timer() "
+                 "unless it is in the simulation mode.\n");
+        exit (1);
+    }
+}
+
+QEMUTimer *qemu_new_timer (QEMUClock * clock, QEMUTimerCB * cb, void *opaque)
+{
+    if (using_blksim()) {
+        return blksim_new_timer (cb, opaque);
+    }
+    else {
+        fprintf (stderr, "A QEMU tool should not invoke qemu_new_timer() "
+                 "unless it is in the simulation mode.\n");
+        exit (1);
+        return NULL;
+    }
+}
+
+void qemu_free_timer (QEMUTimer * ts)
 {
-    QEMUBH *bh;
+    if (using_blksim()) {
+        blksim_free_timer (ts);
+    }
+    else {
+        fprintf (stderr, "A QEMU tool should not invoke qemu_free_timer() "
+                 "unless it is in the simulation mode.\n");
+        exit (1);
+    }
+}
 
-    bh = qemu_malloc(sizeof(*bh));
-    bh->cb = cb;
-    bh->opaque = opaque;
+void qemu_del_timer (QEMUTimer * ts)
+{
+    if (using_blksim()) {
+        blksim_del_timer (ts);
+    }
+    else {
+        fprintf (stderr, "A QEMU tool should not invoke qemu_del_timer() "
+                 "unless it is in the simulation mode.\n");
+        exit (1);
+    }
+}
 
-    return bh;
+QEMUBH *qemu_bh_new (QEMUBHFunc * cb, void *opaque)
+{
+    if (using_blksim()) {
+        return blksim_new_timer (cb, opaque);
+    }
+    else {
+        QEMUBH *bh;
+        bh = qemu_malloc (sizeof (*bh));
+        bh->cb = cb;
+        bh->opaque = opaque;
+        return bh;
+    }
 }
 
-int qemu_bh_poll(void)
+int qemu_bh_poll (void)
 {
     return 0;
 }
 
-void qemu_bh_schedule(QEMUBH *bh)
+void qemu_bh_schedule (QEMUBH * bh)
 {
-    bh->cb(bh->opaque);
+    if (using_blksim()) {
+        blksim_bh_schedule (bh);
+    }
+    else {
+        bh->cb (bh->opaque);
+    }
 }
 
-void qemu_bh_cancel(QEMUBH *bh)
+void qemu_bh_cancel (QEMUBH * bh)
 {
+    if (using_blksim()) {
+        blksim_del_timer (bh);
+    }
 }
 
-void qemu_bh_delete(QEMUBH *bh)
+void qemu_bh_delete (QEMUBH * bh)
 {
-    qemu_free(bh);
+    if (using_blksim()) {
+        blksim_free_timer (bh);
+    }
+    else {
+        qemu_free (bh);
+    }
 }
 
 int qemu_set_fd_handler2(int fd,