diff mbox

[RFC,v2,34/49] replay: replay aio requests

Message ID 20140717110509.8352.58763.stgit@PASHA-ISP
State New
Headers show

Commit Message

Pavel Dovgalyuk July 17, 2014, 11:05 a.m. UTC
This patch adds identifier to aio requests. ID is used for creating bottom
halves and identifying them while replaying.
The patch also introduces several functions that make possible replaying
of the aio requests.

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
---
 block.c               |   79 +++++++++++++++++++++++++++++++++++++++++++------
 block/qcow2.c         |    4 ++
 dma-helpers.c         |    6 ++--
 hw/block/virtio-blk.c |   10 +++---
 hw/ide/atapi.c        |    9 ++++--
 hw/ide/core.c         |   14 ++++++---
 include/block/block.h |   15 +++++++++
 include/qemu-common.h |    2 +
 qemu-io-cmds.c        |    2 +
 stubs/replay.c        |    5 +++
 trace-events          |    2 +
 util/iov.c            |    4 ++
 12 files changed, 126 insertions(+), 26 deletions(-)
diff mbox

Patch

diff --git a/block.c b/block.c
index fb6c849..849b75a 100644
--- a/block.c
+++ b/block.c
@@ -84,7 +84,8 @@  static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
                                                BdrvRequestFlags flags,
                                                BlockDriverCompletionFunc *cb,
                                                void *opaque,
-                                               bool is_write);
+                                               bool is_write,
+                                               bool aio_replay);
 static void coroutine_fn bdrv_co_do_rw(void *opaque);
 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
     int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
@@ -4370,7 +4371,19 @@  BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
     trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
 
     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
-                                 cb, opaque, false);
+                                 cb, opaque, false, false);
+}
+
+BlockDriverAIOCB *bdrv_aio_readv_replay(BlockDriverState *bs,
+                                        int64_t sector_num,
+                                        QEMUIOVector *qiov, int nb_sectors,
+                                        BlockDriverCompletionFunc *cb,
+                                        void *opaque)
+{
+    trace_bdrv_aio_readv_replay(bs, sector_num, nb_sectors, opaque);
+
+    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
+                                 cb, opaque, false, true);
 }
 
 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
@@ -4380,7 +4393,19 @@  BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
     trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
 
     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
-                                 cb, opaque, true);
+                                 cb, opaque, true, false);
+}
+
+BlockDriverAIOCB *bdrv_aio_writev_replay(BlockDriverState *bs,
+                                         int64_t sector_num,
+                                         QEMUIOVector *qiov, int nb_sectors,
+                                         BlockDriverCompletionFunc *cb,
+                                         void *opaque)
+{
+    trace_bdrv_aio_writev_replay(bs, sector_num, nb_sectors, opaque);
+
+    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
+                                 cb, opaque, true, false);
 }
 
 BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
@@ -4391,7 +4416,7 @@  BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
 
     return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
                                  BDRV_REQ_ZERO_WRITE | flags,
-                                 cb, opaque, true);
+                                 cb, opaque, true, true);
 }
 
 
@@ -4527,7 +4552,8 @@  static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
  * requests. However, the fields opaque and error are left unmodified as they
  * are used to signal failure for a single request to the caller.
  */
-int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
+int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs,
+                        bool replay)
 {
     MultiwriteCB *mcb;
     int i;
@@ -4565,7 +4591,7 @@  int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
         bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
                               reqs[i].nb_sectors, reqs[i].flags,
                               multiwrite_cb, mcb,
-                              true);
+                              true, replay);
     }
 
     return 0;
@@ -4714,7 +4740,11 @@  static void coroutine_fn bdrv_co_do_rw(void *opaque)
             acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
     }
 
-    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
+    if (acb->common.replay) {
+        acb->bh = aio_bh_new_replay(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb, acb->common.replay_step);
+    } else {
+        acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
+    }
     qemu_bh_schedule(acb->bh);
 }
 
@@ -4725,7 +4755,8 @@  static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
                                                BdrvRequestFlags flags,
                                                BlockDriverCompletionFunc *cb,
                                                void *opaque,
-                                               bool is_write)
+                                               bool is_write,
+                                               bool aio_replay)
 {
     Coroutine *co;
     BlockDriverAIOCBCoroutine *acb;
@@ -4737,6 +4768,10 @@  static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
     acb->req.flags = flags;
     acb->is_write = is_write;
     acb->done = NULL;
+    acb->common.replay = aio_replay;
+    if (aio_replay) {
+        acb->common.replay_step = replay_get_current_step();
+    }
 
     co = qemu_coroutine_create(bdrv_co_do_rw);
     qemu_coroutine_enter(co, acb);
@@ -4750,7 +4785,12 @@  static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
     BlockDriverState *bs = acb->common.bs;
 
     acb->req.error = bdrv_co_flush(bs);
-    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
+    if (acb->common.replay) {
+        acb->bh = aio_bh_new_replay(bdrv_get_aio_context(bs), bdrv_co_em_bh,
+                                    acb, acb->common.replay_step);
+    } else {
+        acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
+    }
     qemu_bh_schedule(acb->bh);
 }
 
@@ -4771,6 +4811,25 @@  BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
     return &acb->common;
 }
 
+BlockDriverAIOCB *bdrv_aio_flush_replay(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    trace_bdrv_aio_flush(bs, opaque);
+
+    Coroutine *co;
+    BlockDriverAIOCBCoroutine *acb;
+
+    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+    acb->done = NULL;
+    acb->common.replay = true;
+    acb->common.replay_step = replay_get_current_step();
+
+    co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
+    qemu_coroutine_enter(co, acb);
+
+    return &acb->common;
+}
+
 static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
 {
     BlockDriverAIOCBCoroutine *acb = opaque;
@@ -4821,6 +4880,8 @@  void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
     acb->bs = bs;
     acb->cb = cb;
     acb->opaque = opaque;
+    acb->replay_step = 0;
+    acb->replay = false;
     return acb;
 }
 
diff --git a/block/qcow2.c b/block/qcow2.c
index d9da238..358a144 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -988,6 +988,8 @@  static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
     uint8_t *cluster_data = NULL;
 
     qemu_iovec_init(&hd_qiov, qiov->niov);
+    hd_qiov.replay = qiov->replay;
+    hd_qiov.replay_step = qiov->replay_step;
 
     qemu_co_mutex_lock(&s->lock);
 
@@ -1131,6 +1133,8 @@  static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
                                  remaining_sectors);
 
     qemu_iovec_init(&hd_qiov, qiov->niov);
+    hd_qiov.replay = qiov->replay;
+    hd_qiov.replay_step = qiov->replay_step;
 
     s->cluster_cache_offset = -1; /* disable compressed cache */
 
diff --git a/dma-helpers.c b/dma-helpers.c
index 05aadea..1606a19 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -219,6 +219,8 @@  BlockDriverAIOCB *dma_bdrv_io(
     dbs->io_func = io_func;
     dbs->bh = NULL;
     qemu_iovec_init(&dbs->iov, sg->nsg);
+    dbs->iov.replay = true;
+    dbs->iov.replay_step = replay_get_current_step();
     dma_bdrv_cb(dbs, 0);
     return &dbs->common;
 }
@@ -228,7 +230,7 @@  BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs,
                                 QEMUSGList *sg, uint64_t sector,
                                 void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv, cb, opaque,
+    return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv_replay, cb, opaque,
                        DMA_DIRECTION_FROM_DEVICE);
 }
 
@@ -236,7 +238,7 @@  BlockDriverAIOCB *dma_bdrv_write(BlockDriverState *bs,
                                  QEMUSGList *sg, uint64_t sector,
                                  void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev, cb, opaque,
+    return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev_replay, cb, opaque,
                        DMA_DIRECTION_TO_DEVICE);
 }
 
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index aec3146..adf8dd2 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -265,7 +265,7 @@  void virtio_submit_multiwrite(BlockDriverState *bs, MultiReqBuffer *mrb)
         return;
     }
 
-    ret = bdrv_aio_multiwrite(bs, mrb->blkreq, mrb->num_writes);
+    ret = bdrv_aio_multiwrite(bs, mrb->blkreq, mrb->num_writes, true);
     if (ret != 0) {
         for (i = 0; i < mrb->num_writes; i++) {
             if (mrb->blkreq[i].error) {
@@ -285,7 +285,7 @@  static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
      * Make sure all outstanding writes are posted to the backing device.
      */
     virtio_submit_multiwrite(req->dev->bs, mrb);
-    bdrv_aio_flush(req->dev->bs, virtio_blk_flush_complete, req);
+    bdrv_aio_flush_replay(req->dev->bs, virtio_blk_flush_complete, req);
 }
 
 static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb)
@@ -341,9 +341,9 @@  static void virtio_blk_handle_read(VirtIOBlockReq *req)
         virtio_blk_rw_complete(req, -EIO);
         return;
     }
-    bdrv_aio_readv(req->dev->bs, sector, &req->qiov,
-                   req->qiov.size / BDRV_SECTOR_SIZE,
-                   virtio_blk_rw_complete, req);
+    bdrv_aio_readv_replay(req->dev->bs, sector, &req->qiov,
+                          req->qiov.size / BDRV_SECTOR_SIZE,
+                          virtio_blk_rw_complete, req);
 }
 
 void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index f7d2009..e94cd93 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -25,6 +25,7 @@ 
 
 #include "hw/ide/internal.h"
 #include "hw/scsi/scsi.h"
+#include "replay/replay.h"
 
 static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret);
 
@@ -348,10 +349,12 @@  static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret)
     s->bus->dma->iov.iov_base = (void *)(s->io_buffer + data_offset);
     s->bus->dma->iov.iov_len = n * 4 * 512;
     qemu_iovec_init_external(&s->bus->dma->qiov, &s->bus->dma->iov, 1);
+    s->bus->dma->qiov.replay = true;
+    s->bus->dma->qiov.replay_step = replay_get_current_step();
 
-    s->bus->dma->aiocb = bdrv_aio_readv(s->bs, (int64_t)s->lba << 2,
-                                       &s->bus->dma->qiov, n * 4,
-                                       ide_atapi_cmd_read_dma_cb, s);
+    s->bus->dma->aiocb = bdrv_aio_readv_replay(s->bs, (int64_t)s->lba << 2,
+                                               &s->bus->dma->qiov, n * 4,
+                                               ide_atapi_cmd_read_dma_cb, s);
     return;
 
 eot:
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 3df26f0..8c3cfe2 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -559,10 +559,12 @@  void ide_sector_read(IDEState *s)
     s->iov.iov_base = s->io_buffer;
     s->iov.iov_len  = n * BDRV_SECTOR_SIZE;
     qemu_iovec_init_external(&s->qiov, &s->iov, 1);
+    s->qiov.replay = true;
+    s->qiov.replay_step = replay_get_current_step();
 
     bdrv_acct_start(s->bs, &s->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_READ);
-    s->pio_aiocb = bdrv_aio_readv(s->bs, sector_num, &s->qiov, n,
-                                  ide_sector_read_cb, s);
+    s->pio_aiocb = bdrv_aio_readv_replay(s->bs, sector_num, &s->qiov, n,
+                                         ide_sector_read_cb, s);
 }
 
 static void dma_buf_commit(IDEState *s)
@@ -795,10 +797,12 @@  void ide_sector_write(IDEState *s)
     s->iov.iov_base = s->io_buffer;
     s->iov.iov_len  = n * BDRV_SECTOR_SIZE;
     qemu_iovec_init_external(&s->qiov, &s->iov, 1);
+    s->qiov.replay = true;
+    s->qiov.replay_step = replay_get_current_step();
 
     bdrv_acct_start(s->bs, &s->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_READ);
-    s->pio_aiocb = bdrv_aio_writev(s->bs, sector_num, &s->qiov, n,
-                                   ide_sector_write_cb, s);
+    s->pio_aiocb = bdrv_aio_writev_replay(s->bs, sector_num, &s->qiov, n,
+                                          ide_sector_write_cb, s);
 }
 
 static void ide_flush_cb(void *opaque, int ret)
@@ -827,7 +831,7 @@  void ide_flush_cache(IDEState *s)
 
     s->status |= BUSY_STAT;
     bdrv_acct_start(s->bs, &s->acct, 0, BDRV_ACCT_FLUSH);
-    bdrv_aio_flush(s->bs, ide_flush_cb, s);
+    bdrv_aio_flush_replay(s->bs, ide_flush_cb, s);
 }
 
 static void ide_cfata_metadata_inquiry(IDEState *s)
diff --git a/include/block/block.h b/include/block/block.h
index dd05458..636589a 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -325,11 +325,24 @@  typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector,
 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
                                  QEMUIOVector *iov, int nb_sectors,
                                  BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_readv_replay(BlockDriverState *bs,
+                                        int64_t sector_num,
+                                        QEMUIOVector *iov, int nb_sectors,
+                                        BlockDriverCompletionFunc *cb,
+                                        void *opaque);
 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
                                   QEMUIOVector *iov, int nb_sectors,
                                   BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_writev_replay(BlockDriverState *bs,
+                                         int64_t sector_num,
+                                         QEMUIOVector *iov, int nb_sectors,
+                                         BlockDriverCompletionFunc *cb,
+                                         void *opaque);
 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
                                  BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_flush_replay(BlockDriverState *bs,
+                                        BlockDriverCompletionFunc *cb,
+                                        void *opaque);
 BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
                                    int64_t sector_num, int nb_sectors,
                                    BlockDriverCompletionFunc *cb, void *opaque);
@@ -349,7 +362,7 @@  typedef struct BlockRequest {
 } BlockRequest;
 
 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs,
-    int num_reqs);
+                        int num_reqs, bool replay);
 
 /* sg packet commands */
 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf);
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 48013ce..50ab79c 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -309,6 +309,8 @@  typedef struct QEMUIOVector {
     int niov;
     int nalloc;
     size_t size;
+    bool replay;
+    uint64_t replay_step;
 } QEMUIOVector;
 
 void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index 60c1ceb..f8a809a 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -592,7 +592,7 @@  static int do_aio_multiwrite(BlockDriverState *bs, BlockRequest* reqs,
         *total += reqs[i].qiov->size;
     }
 
-    ret = bdrv_aio_multiwrite(bs, reqs, num_reqs);
+    ret = bdrv_aio_multiwrite(bs, reqs, num_reqs, false);
     if (ret < 0) {
         return ret;
     }
diff --git a/stubs/replay.c b/stubs/replay.c
index 8a4d69e..1dea1f6 100755
--- a/stubs/replay.c
+++ b/stubs/replay.c
@@ -31,3 +31,8 @@  int runstate_is_running(void)
 void replay_add_bh_event(void *bh, uint64_t id)
 {
 }
+
+uint64_t replay_get_current_step(void)
+{
+    return 0;
+}
diff --git a/trace-events b/trace-events
index 11a17a8..8382348 100644
--- a/trace-events
+++ b/trace-events
@@ -59,7 +59,9 @@  bdrv_aio_multiwrite(void *mcb, int num_callbacks, int num_reqs) "mcb %p num_call
 bdrv_aio_discard(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
 bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p"
 bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
+bdrv_aio_readv_replay(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
 bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
+bdrv_aio_writev_replay(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
 bdrv_aio_write_zeroes(void *bs, int64_t sector_num, int nb_sectors, int flags, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d flags %#x opaque %p"
 bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d"
 bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
diff --git a/util/iov.c b/util/iov.c
index 2b4f46d..3bf7092 100644
--- a/util/iov.c
+++ b/util/iov.c
@@ -257,6 +257,8 @@  void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint)
     qiov->niov = 0;
     qiov->nalloc = alloc_hint;
     qiov->size = 0;
+    qiov->replay = false;
+    qiov->replay_step = 0;
 }
 
 void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov)
@@ -267,6 +269,8 @@  void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov)
     qiov->niov = niov;
     qiov->nalloc = -1;
     qiov->size = 0;
+    qiov->replay = false;
+    qiov->replay_step = 0;
     for (i = 0; i < niov; i++)
         qiov->size += iov[i].iov_len;
 }