diff mbox

[RFC,v3,34/49] replay: replay aio requests

Message ID 20140731125639.1600.91367.stgit@PASHA-ISP.novsu.ac.ru
State New
Headers show

Commit Message

Pavel Dovgalyuk July 31, 2014, 12:56 p.m. UTC
This patch adds identifier to aio requests. ID is used for creating bottom
halves and identifying them while replaying.
The patch also introduces several functions that make possible replaying
of the aio requests.

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
---
 block.c               |   80 +++++++++++++++++++++++++++++++++++++++++++------
 block/qcow2.c         |    4 ++
 dma-helpers.c         |    6 ++--
 hw/block/virtio-blk.c |   10 +++---
 hw/ide/atapi.c        |    9 ++++--
 hw/ide/core.c         |   14 ++++++---
 include/block/block.h |   15 +++++++++
 include/qemu-common.h |    2 +
 qemu-io-cmds.c        |    2 +
 stubs/replay.c        |    5 +++
 trace-events          |    2 +
 util/iov.c            |    4 ++
 12 files changed, 127 insertions(+), 26 deletions(-)
diff mbox

Patch

diff --git a/block.c b/block.c
index 3548be7..1335259 100644
--- a/block.c
+++ b/block.c
@@ -84,7 +84,8 @@  static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
                                                BdrvRequestFlags flags,
                                                BlockDriverCompletionFunc *cb,
                                                void *opaque,
-                                               bool is_write);
+                                               bool is_write,
+                                               bool aio_replay);
 static void coroutine_fn bdrv_co_do_rw(void *opaque);
 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
     int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
@@ -4402,7 +4403,19 @@  BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
     trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
 
     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
-                                 cb, opaque, false);
+                                 cb, opaque, false, false);
+}
+
+BlockDriverAIOCB *bdrv_aio_readv_replay(BlockDriverState *bs,
+                                        int64_t sector_num,
+                                        QEMUIOVector *qiov, int nb_sectors,
+                                        BlockDriverCompletionFunc *cb,
+                                        void *opaque)
+{
+    trace_bdrv_aio_readv_replay(bs, sector_num, nb_sectors, opaque);
+
+    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
+                                 cb, opaque, false, true);
 }
 
 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
@@ -4412,7 +4425,19 @@  BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
     trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
 
     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
-                                 cb, opaque, true);
+                                 cb, opaque, true, false);
+}
+
+BlockDriverAIOCB *bdrv_aio_writev_replay(BlockDriverState *bs,
+                                         int64_t sector_num,
+                                         QEMUIOVector *qiov, int nb_sectors,
+                                         BlockDriverCompletionFunc *cb,
+                                         void *opaque)
+{
+    trace_bdrv_aio_writev_replay(bs, sector_num, nb_sectors, opaque);
+
+    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
+                                 cb, opaque, true, true);
 }
 
 BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
@@ -4423,7 +4448,7 @@  BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
 
     return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
                                  BDRV_REQ_ZERO_WRITE | flags,
-                                 cb, opaque, true);
+                                 cb, opaque, true, true);
 }
 
 
@@ -4559,7 +4584,8 @@  static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
  * requests. However, the fields opaque and error are left unmodified as they
  * are used to signal failure for a single request to the caller.
  */
-int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
+int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs,
+                        bool replay)
 {
     MultiwriteCB *mcb;
     int i;
@@ -4597,7 +4623,7 @@  int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
         bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
                               reqs[i].nb_sectors, reqs[i].flags,
                               multiwrite_cb, mcb,
-                              true);
+                              true, replay);
     }
 
     return 0;
@@ -4746,7 +4772,12 @@  static void coroutine_fn bdrv_co_do_rw(void *opaque)
             acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
     }
 
-    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
+    if (acb->common.replay) {
+        acb->bh = aio_bh_new_replay(bdrv_get_aio_context(bs), bdrv_co_em_bh,
+                                    acb, acb->common.replay_step);
+    } else {
+        acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
+    }
     qemu_bh_schedule(acb->bh);
 }
 
@@ -4757,7 +4788,8 @@  static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
                                                BdrvRequestFlags flags,
                                                BlockDriverCompletionFunc *cb,
                                                void *opaque,
-                                               bool is_write)
+                                               bool is_write,
+                                               bool aio_replay)
 {
     Coroutine *co;
     BlockDriverAIOCBCoroutine *acb;
@@ -4769,6 +4801,10 @@  static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
     acb->req.flags = flags;
     acb->is_write = is_write;
     acb->done = NULL;
+    acb->common.replay = aio_replay;
+    if (aio_replay) {
+        acb->common.replay_step = replay_get_current_step();
+    }
 
     co = qemu_coroutine_create(bdrv_co_do_rw);
     qemu_coroutine_enter(co, acb);
@@ -4782,7 +4818,12 @@  static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
     BlockDriverState *bs = acb->common.bs;
 
     acb->req.error = bdrv_co_flush(bs);
-    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
+    if (acb->common.replay) {
+        acb->bh = aio_bh_new_replay(bdrv_get_aio_context(bs), bdrv_co_em_bh,
+                                    acb, acb->common.replay_step);
+    } else {
+        acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
+    }
     qemu_bh_schedule(acb->bh);
 }
 
@@ -4803,6 +4844,25 @@  BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
     return &acb->common;
 }
 
+BlockDriverAIOCB *bdrv_aio_flush_replay(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    trace_bdrv_aio_flush(bs, opaque);
+
+    Coroutine *co;
+    BlockDriverAIOCBCoroutine *acb;
+
+    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+    acb->done = NULL;
+    acb->common.replay = true;
+    acb->common.replay_step = replay_get_current_step();
+
+    co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
+    qemu_coroutine_enter(co, acb);
+
+    return &acb->common;
+}
+
 static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
 {
     BlockDriverAIOCBCoroutine *acb = opaque;
@@ -4853,6 +4913,8 @@  void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
     acb->bs = bs;
     acb->cb = cb;
     acb->opaque = opaque;
+    acb->replay_step = 0;
+    acb->replay = false;
     return acb;
 }
 
diff --git a/block/qcow2.c b/block/qcow2.c
index 43ceb34..c8169bb 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -997,6 +997,8 @@  static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
     uint8_t *cluster_data = NULL;
 
     qemu_iovec_init(&hd_qiov, qiov->niov);
+    hd_qiov.replay = qiov->replay;
+    hd_qiov.replay_step = qiov->replay_step;
 
     qemu_co_mutex_lock(&s->lock);
 
@@ -1149,6 +1151,8 @@  static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
                                  remaining_sectors);
 
     qemu_iovec_init(&hd_qiov, qiov->niov);
+    hd_qiov.replay = qiov->replay;
+    hd_qiov.replay_step = qiov->replay_step;
 
     s->cluster_cache_offset = -1; /* disable compressed cache */
 
diff --git a/dma-helpers.c b/dma-helpers.c
index 8faf53d..a020dc5 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -223,6 +223,8 @@  BlockDriverAIOCB *dma_bdrv_io(
     dbs->io_func = io_func;
     dbs->bh = NULL;
     qemu_iovec_init(&dbs->iov, sg->nsg);
+    dbs->iov.replay = true;
+    dbs->iov.replay_step = replay_get_current_step();
     dma_bdrv_cb(dbs, 0);
     return &dbs->common;
 }
@@ -232,7 +234,7 @@  BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs,
                                 QEMUSGList *sg, uint64_t sector,
                                 void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv, cb, opaque,
+    return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv_replay, cb, opaque,
                        DMA_DIRECTION_FROM_DEVICE);
 }
 
@@ -240,7 +242,7 @@  BlockDriverAIOCB *dma_bdrv_write(BlockDriverState *bs,
                                  QEMUSGList *sg, uint64_t sector,
                                  void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev, cb, opaque,
+    return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev_replay, cb, opaque,
                        DMA_DIRECTION_TO_DEVICE);
 }
 
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index c241c50..5d238af 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -265,7 +265,7 @@  void virtio_submit_multiwrite(BlockDriverState *bs, MultiReqBuffer *mrb)
         return;
     }
 
-    ret = bdrv_aio_multiwrite(bs, mrb->blkreq, mrb->num_writes);
+    ret = bdrv_aio_multiwrite(bs, mrb->blkreq, mrb->num_writes, true);
     if (ret != 0) {
         for (i = 0; i < mrb->num_writes; i++) {
             if (mrb->blkreq[i].error) {
@@ -285,7 +285,7 @@  static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
      * Make sure all outstanding writes are posted to the backing device.
      */
     virtio_submit_multiwrite(req->dev->bs, mrb);
-    bdrv_aio_flush(req->dev->bs, virtio_blk_flush_complete, req);
+    bdrv_aio_flush_replay(req->dev->bs, virtio_blk_flush_complete, req);
 }
 
 static bool virtio_blk_sect_range_ok(VirtIOBlock *dev,
@@ -354,9 +354,9 @@  static void virtio_blk_handle_read(VirtIOBlockReq *req)
     }
 
     bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ);
-    bdrv_aio_readv(req->dev->bs, sector, &req->qiov,
-                   req->qiov.size / BDRV_SECTOR_SIZE,
-                   virtio_blk_rw_complete, req);
+    bdrv_aio_readv_replay(req->dev->bs, sector, &req->qiov,
+                          req->qiov.size / BDRV_SECTOR_SIZE,
+                          virtio_blk_rw_complete, req);
 }
 
 void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index f7d2009..e94cd93 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -25,6 +25,7 @@ 
 
 #include "hw/ide/internal.h"
 #include "hw/scsi/scsi.h"
+#include "replay/replay.h"
 
 static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret);
 
@@ -348,10 +349,12 @@  static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret)
     s->bus->dma->iov.iov_base = (void *)(s->io_buffer + data_offset);
     s->bus->dma->iov.iov_len = n * 4 * 512;
     qemu_iovec_init_external(&s->bus->dma->qiov, &s->bus->dma->iov, 1);
+    s->bus->dma->qiov.replay = true;
+    s->bus->dma->qiov.replay_step = replay_get_current_step();
 
-    s->bus->dma->aiocb = bdrv_aio_readv(s->bs, (int64_t)s->lba << 2,
-                                       &s->bus->dma->qiov, n * 4,
-                                       ide_atapi_cmd_read_dma_cb, s);
+    s->bus->dma->aiocb = bdrv_aio_readv_replay(s->bs, (int64_t)s->lba << 2,
+                                               &s->bus->dma->qiov, n * 4,
+                                               ide_atapi_cmd_read_dma_cb, s);
     return;
 
 eot:
diff --git a/hw/ide/core.c b/hw/ide/core.c
index ed2c769..8d77969 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -576,10 +576,12 @@  void ide_sector_read(IDEState *s)
     s->iov.iov_base = s->io_buffer;
     s->iov.iov_len  = n * BDRV_SECTOR_SIZE;
     qemu_iovec_init_external(&s->qiov, &s->iov, 1);
+    s->qiov.replay = true;
+    s->qiov.replay_step = replay_get_current_step();
 
     bdrv_acct_start(s->bs, &s->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_READ);
-    s->pio_aiocb = bdrv_aio_readv(s->bs, sector_num, &s->qiov, n,
-                                  ide_sector_read_cb, s);
+    s->pio_aiocb = bdrv_aio_readv_replay(s->bs, sector_num, &s->qiov, n,
+                                         ide_sector_read_cb, s);
 }
 
 static void dma_buf_commit(IDEState *s)
@@ -823,10 +825,12 @@  void ide_sector_write(IDEState *s)
     s->iov.iov_base = s->io_buffer;
     s->iov.iov_len  = n * BDRV_SECTOR_SIZE;
     qemu_iovec_init_external(&s->qiov, &s->iov, 1);
+    s->qiov.replay = true;
+    s->qiov.replay_step = replay_get_current_step();
 
     bdrv_acct_start(s->bs, &s->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_READ);
-    s->pio_aiocb = bdrv_aio_writev(s->bs, sector_num, &s->qiov, n,
-                                   ide_sector_write_cb, s);
+    s->pio_aiocb = bdrv_aio_writev_replay(s->bs, sector_num, &s->qiov, n,
+                                          ide_sector_write_cb, s);
 }
 
 static void ide_flush_cb(void *opaque, int ret)
@@ -855,7 +859,7 @@  void ide_flush_cache(IDEState *s)
 
     s->status |= BUSY_STAT;
     bdrv_acct_start(s->bs, &s->acct, 0, BDRV_ACCT_FLUSH);
-    bdrv_aio_flush(s->bs, ide_flush_cb, s);
+    bdrv_aio_flush_replay(s->bs, ide_flush_cb, s);
 }
 
 static void ide_cfata_metadata_inquiry(IDEState *s)
diff --git a/include/block/block.h b/include/block/block.h
index 95e6d1c..93b8e22 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -326,11 +326,24 @@  typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector,
 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
                                  QEMUIOVector *iov, int nb_sectors,
                                  BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_readv_replay(BlockDriverState *bs,
+                                        int64_t sector_num,
+                                        QEMUIOVector *iov, int nb_sectors,
+                                        BlockDriverCompletionFunc *cb,
+                                        void *opaque);
 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
                                   QEMUIOVector *iov, int nb_sectors,
                                   BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_writev_replay(BlockDriverState *bs,
+                                         int64_t sector_num,
+                                         QEMUIOVector *iov, int nb_sectors,
+                                         BlockDriverCompletionFunc *cb,
+                                         void *opaque);
 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
                                  BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_flush_replay(BlockDriverState *bs,
+                                        BlockDriverCompletionFunc *cb,
+                                        void *opaque);
 BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
                                    int64_t sector_num, int nb_sectors,
                                    BlockDriverCompletionFunc *cb, void *opaque);
@@ -350,7 +363,7 @@  typedef struct BlockRequest {
 } BlockRequest;
 
 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs,
-    int num_reqs);
+                        int num_reqs, bool replay);
 
 /* sg packet commands */
 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf);
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 5b9d358..126e138 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -309,6 +309,8 @@  typedef struct QEMUIOVector {
     int niov;
     int nalloc;
     size_t size;
+    bool replay;
+    uint64_t replay_step;
 } QEMUIOVector;
 
 void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index c503fc6..fd54eed 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -592,7 +592,7 @@  static int do_aio_multiwrite(BlockDriverState *bs, BlockRequest* reqs,
         *total += reqs[i].qiov->size;
     }
 
-    ret = bdrv_aio_multiwrite(bs, reqs, num_reqs);
+    ret = bdrv_aio_multiwrite(bs, reqs, num_reqs, false);
     if (ret < 0) {
         return ret;
     }
diff --git a/stubs/replay.c b/stubs/replay.c
index dfea8d8..e6fb20a 100755
--- a/stubs/replay.c
+++ b/stubs/replay.c
@@ -36,3 +36,8 @@  int64_t replay_get_icount(void)
 void replay_add_bh_event(void *bh, uint64_t id)
 {
 }
+
+uint64_t replay_get_current_step(void)
+{
+    return 0;
+}
diff --git a/trace-events b/trace-events
index 11a17a8..8382348 100644
--- a/trace-events
+++ b/trace-events
@@ -59,7 +59,9 @@  bdrv_aio_multiwrite(void *mcb, int num_callbacks, int num_reqs) "mcb %p num_call
 bdrv_aio_discard(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
 bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p"
 bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
+bdrv_aio_readv_replay(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
 bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
+bdrv_aio_writev_replay(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
 bdrv_aio_write_zeroes(void *bs, int64_t sector_num, int nb_sectors, int flags, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d flags %#x opaque %p"
 bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d"
 bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
diff --git a/util/iov.c b/util/iov.c
index 24566c8..b03f83f 100644
--- a/util/iov.c
+++ b/util/iov.c
@@ -257,6 +257,8 @@  void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint)
     qiov->niov = 0;
     qiov->nalloc = alloc_hint;
     qiov->size = 0;
+    qiov->replay = false;
+    qiov->replay_step = 0;
 }
 
 void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov)
@@ -267,6 +269,8 @@  void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov)
     qiov->niov = niov;
     qiov->nalloc = -1;
     qiov->size = 0;
+    qiov->replay = false;
+    qiov->replay_step = 0;
     for (i = 0; i < niov; i++)
         qiov->size += iov[i].iov_len;
 }