diff mbox

Add delay option to blkdebug

Message ID 1372528923-12354-2-git-send-email-alex@alex.org.uk
State New
Headers show

Commit Message

Alex Bligh June 29, 2013, 6:02 p.m. UTC
Add a delay option to blkdebug, allowing operations to be delayed by
a specifiable number of microseconds. Example configuration:

[inject-error]
event = "read_aio"
delay = "200000"

Signed-off-by: Alex Bligh <alex@alex.org.uk>
---
 block/blkdebug.c |   83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)

Comments

Kevin Wolf July 1, 2013, 10:23 a.m. UTC | #1
Am 29.06.2013 um 20:02 hat Alex Bligh geschrieben:
> Add a delay option to blkdebug, allowing operations to be delayed by
> a specifiable number of microseconds. Example configuration:
> 
> [inject-error]
> event = "read_aio"
> delay = "200000"
> 
> Signed-off-by: Alex Bligh <alex@alex.org.uk>

"inject-error" doesn't really describe this well. Shouldn't we rather
introduce a new section "[delay]" or something like that?

I remember that I once tried something similar, but never submitted it.
I think the reason was that using timers inside requests doesn't really
work. It works as long as everything is indeed asynchronous, but
bdrv_drain_all() or a loop waiting for a single request that tries to
make progress with qemu_aio_wait() will simply hang because timers
aren't processed in these nested event loops.

Kevin
Alex Bligh July 1, 2013, 11:15 a.m. UTC | #2
Kevin,

--On 1 July 2013 12:23:53 +0200 Kevin Wolf <kwolf@redhat.com> wrote:

> "inject-error" doesn't really describe this well. Shouldn't we rather
> introduce a new section "[delay]" or something like that?

That's how I started off. Then I realised you might want to make
the delay dependent on the sector or state, or the operation
concerned. Changing the name of the section to '[inject]' would
probably be best.

> I remember that I once tried something similar, but never submitted it.
> I think the reason was that using timers inside requests doesn't really
> work. It works as long as everything is indeed asynchronous, but
> bdrv_drain_all() or a loop waiting for a single request that tries to
> make progress with qemu_aio_wait() will simply hang because timers
> aren't processed in these nested event loops.

Yes I discovered that the hard way. In particular, timers are not
run at all in (e.g.) qemu-img. I think they are not run at all outside
qemu itself.

This is why I don't use timers.

What it currently does is schedule a bh which looks at the time
(as time still moves forwards) - using idle scheduling, and if it's
not time to run the bh, the bh reschedules itself (using idle scheduling
again). Because of the use of idle scheduling it isn't busy-waiting,
but it is hardly ideal.

I /think/ this is safe because if bh's aren't running, lots of
drivers (e.g. blkverify) would not work. And as far as I can tell,
idle bh's are scheduled whenever non-idle ones are. However, I am
newbie as far as the async code and the block code are concerned.
diff mbox

Patch

diff --git a/block/blkdebug.c b/block/blkdebug.c
index ccb627a..dafb805 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -40,6 +40,9 @@  typedef struct BlkdebugAIOCB {
     BlockDriverAIOCB common;
     QEMUBH *bh;
     int ret;
+    bool *finished;
+    int64_t delay;
+    int64_t starttime;
 } BlkdebugAIOCB;
 
 typedef struct BlkdebugSuspendedReq {
@@ -71,6 +74,7 @@  typedef struct BlkdebugRule {
             int immediately;
             int once;
             int64_t sector;
+            int64_t delay;
         } inject;
         struct {
             int new_state;
@@ -111,6 +115,10 @@  static QemuOptsList inject_error_opts = {
             .name = "immediately",
             .type = QEMU_OPT_BOOL,
         },
+        {
+            .name = "delay", /* delay in microseconds */
+            .type = QEMU_OPT_NUMBER,
+        },
         { /* end of list */ }
     },
 };
@@ -236,6 +244,10 @@  static int add_rule(QemuOpts *opts, void *opaque)
         rule->options.inject.immediately =
             qemu_opt_get_bool(opts, "immediately", 0);
         rule->options.inject.sector = qemu_opt_get_number(opts, "sector", -1);
+        rule->options.inject.delay = qemu_opt_get_number(opts, "delay", 0);
+        if (rule->options.inject.delay) {
+            rule->options.inject.error = 0;
+        }
         break;
 
     case ACTION_SET_STATE:
@@ -399,6 +411,9 @@  fail:
 static void error_callback_bh(void *opaque)
 {
     struct BlkdebugAIOCB *acb = opaque;
+    if (acb->finished) {
+        *acb->finished = true;
+    }
     qemu_bh_delete(acb->bh);
     acb->common.cb(acb->common.opaque, acb->ret);
     qemu_aio_release(acb);
@@ -407,6 +422,13 @@  static void error_callback_bh(void *opaque)
 static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb)
 {
     BlkdebugAIOCB *acb = container_of(blockacb, BlkdebugAIOCB, common);
+    bool finished = false;
+
+    /* Wait until request completes, invokes its callback, and frees itself */
+    acb->finished = &finished;
+    while (!finished) {
+        qemu_aio_wait();
+    }
     qemu_aio_release(acb);
 }
 
@@ -427,6 +449,7 @@  static BlockDriverAIOCB *inject_error(BlockDriverState *bs,
     }
 
     acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque);
+    acb->finished = NULL;
     acb->ret = -error;
 
     bh = qemu_bh_new(error_callback_bh, acb);
@@ -436,6 +459,50 @@  static BlockDriverAIOCB *inject_error(BlockDriverState *bs,
     return &acb->common;
 }
 
+static BlkdebugAIOCB *blkdebug_aio_get(BlockDriverState *bs, int64_t delay,
+                                       BlockDriverCompletionFunc *cb,
+                                       void *opaque)
+{
+    BlkdebugAIOCB *acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque);
+    
+    acb->bh = NULL;
+    acb->delay = delay;
+    acb->finished = NULL;
+    return acb;
+}
+
+static void blkdebug_aio_delay_bh(void *opaque)
+{
+    int64_t currenttime;
+    BlkdebugAIOCB *acb = opaque;
+
+    /* Unfortunately we cannot use a timer as under qemu-img for instance
+     * the timer loop is not run.
+     */
+    currenttime = qemu_get_clock_ns(rt_clock);
+    if ((currenttime - acb->starttime) < (acb->delay * 1000)) {
+        qemu_bh_schedule_idle(acb->bh);
+        return;
+    }
+
+    qemu_bh_delete(acb->bh);
+
+    acb->common.cb(acb->common.opaque, acb->ret);
+    if (acb->finished) {
+        *acb->finished = true;
+    }
+    qemu_aio_release(acb);
+}
+
+static void blkdebug_aio_delay_cb(void *opaque, int ret)
+{
+    BlkdebugAIOCB *acb = opaque;
+
+    acb->starttime = qemu_get_clock_ns(rt_clock);
+    acb->bh = qemu_bh_new(blkdebug_aio_delay_bh, acb);
+    qemu_bh_schedule_idle(acb->bh);
+}
+
 static BlockDriverAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
     int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
     BlockDriverCompletionFunc *cb, void *opaque)
@@ -455,6 +522,14 @@  static BlockDriverAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
         return inject_error(bs, cb, opaque, rule);
     }
 
+    if (rule && rule->options.inject.delay) {
+        BlkdebugAIOCB *acb = blkdebug_aio_get(bs, rule->options.inject.delay, cb, opaque);
+        
+        bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors,
+                       blkdebug_aio_delay_cb, acb);
+        return &acb->common;
+    }
+
     return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
 }
 
@@ -477,6 +552,14 @@  static BlockDriverAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
         return inject_error(bs, cb, opaque, rule);
     }
 
+    if (rule && rule->options.inject.delay) {
+        BlkdebugAIOCB *acb = blkdebug_aio_get(bs, rule->options.inject.delay, cb, opaque);
+        
+        bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
+                        blkdebug_aio_delay_cb, acb);
+        return &acb->common;
+    }
+
     return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
 }