Message ID | 1455645388-32401-7-git-send-email-pbonzini@redhat.com |
---|---|
State | New |
Headers | show |
On Tue, 02/16 18:56, Paolo Bonzini wrote: > Unlike tracked_requests, this field also counts throttled requests, > and remains non-zero if an AIO operation needs a BH to be "really" > completed. > > With this change, it is no longer necessary to have a dummy > BdrvTrackedRequest for requests that are never serialising, and > it is no longer necessary to poll the AioContext once after > bdrv_requests_pending(bs) returns false. > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > block/io.c | 71 +++++++++++++++++++++++++++++++---------------- > block/mirror.c | 2 ++ > include/block/block_int.h | 8 ++++-- > 3 files changed, 54 insertions(+), 27 deletions(-) > > diff --git a/block/io.c b/block/io.c > index d8d50b7..a9a23a6 100644 > --- a/block/io.c > +++ b/block/io.c > @@ -224,13 +224,7 @@ bool bdrv_requests_pending(BlockDriverState *bs) > { > BdrvChild *child; > > - if (!QLIST_EMPTY(&bs->tracked_requests)) { > - return true; > - } > - if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) { > - return true; > - } > - if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) { > + if (atomic_read(&bs->in_flight)) { > return true; > } > > @@ -268,15 +262,12 @@ static void bdrv_drain_recurse(BlockDriverState *bs) > */ > void bdrv_drain(BlockDriverState *bs) > { > - bool busy = true; > - > bdrv_no_throttling_begin(bs); > bdrv_io_unplugged_begin(bs); > bdrv_drain_recurse(bs); > - while (busy) { > + while (bdrv_requests_pending(bs)) { > /* Keep iterating */ > - busy = bdrv_requests_pending(bs); > - busy |= aio_poll(bdrv_get_aio_context(bs), busy); > + aio_poll(bdrv_get_aio_context(bs), true); > } > bdrv_io_unplugged_end(bs); > bdrv_no_throttling_end(bs); > @@ -291,7 +282,7 @@ void bdrv_drain(BlockDriverState *bs) > void bdrv_drain_all(void) > { > /* Always run first iteration so any pending completion BHs run */ > - bool busy = true; > + bool waited = true; > BlockDriverState *bs = NULL; > GSList *aio_ctxs = NULL, *ctx; > > @@ -318,8 +309,8 @@ void bdrv_drain_all(void) > * request completion. Therefore we must keep looping until there was no > * more activity rather than simply draining each device independently. > */ > - while (busy) { > - busy = false; > + while (waited) { > + waited = false; > > for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) { > AioContext *aio_context = ctx->data; > @@ -329,12 +320,11 @@ void bdrv_drain_all(void) > while ((bs = bdrv_next(bs))) { > if (aio_context == bdrv_get_aio_context(bs)) { > if (bdrv_requests_pending(bs)) { > - busy = true; > - aio_poll(aio_context, busy); > + aio_poll(aio_context, true); > + waited = true; > } > } > } > - busy |= aio_poll(aio_context, false); > aio_context_release(aio_context); > } > } > @@ -457,6 +447,16 @@ static bool tracked_request_overlaps(BdrvTrackedRequest *req, > return true; > } > > +void bdrv_inc_in_flight(BlockDriverState *bs) > +{ > + atomic_inc(&bs->in_flight); > +} > + > +void bdrv_dec_in_flight(BlockDriverState *bs) > +{ > + atomic_dec(&bs->in_flight); > +} > + > static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self) > { > BlockDriverState *bs = self->bs; > @@ -963,6 +963,8 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, > return ret; > } > > + bdrv_inc_in_flight(bs); > + > /* Don't do copy-on-read if we read data before write operation */ > if (bs->copy_on_read && !(flags & BDRV_REQ_NO_SERIALISING)) { > flags |= BDRV_REQ_COPY_ON_READ; > @@ -1003,6 +1005,7 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, > use_local_qiov ? &local_qiov : qiov, > flags); > tracked_request_end(&req); > + bdrv_dec_in_flight(bs); > > if (use_local_qiov) { > qemu_iovec_destroy(&local_qiov); > @@ -1310,6 +1313,8 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, > return ret; > } > > + bdrv_inc_in_flight(bs); > + > /* throttling disk I/O */ > if (bs->throttle_state) { > throttle_group_co_io_limits_intercept(bs, bytes, true); > @@ -1408,6 +1413,7 @@ fail: > qemu_vfree(tail_buf); > out: > tracked_request_end(&req); > + bdrv_dec_in_flight(bs); > return ret; > } > > @@ -2065,6 +2071,7 @@ static const AIOCBInfo bdrv_em_aiocb_info = { > static void bdrv_aio_bh_cb(void *opaque) > { > BlockAIOCBSync *acb = opaque; > + BlockDriverState *bs = acb->common.bs; > > if (!acb->is_write && acb->ret >= 0) { > qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); > @@ -2072,6 +2079,7 @@ static void bdrv_aio_bh_cb(void *opaque) > qemu_vfree(acb->bounce); > acb->common.cb(acb->common.opaque, acb->ret); > qemu_bh_delete(acb->bh); > + bdrv_dec_in_flight(bs); > acb->bh = NULL; > qemu_aio_unref(acb); > } > @@ -2087,6 +2095,7 @@ static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, > { > BlockAIOCBSync *acb; > > + bdrv_inc_in_flight(bs); > acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque); > acb->is_write = is_write; > acb->qiov = qiov; > @@ -2139,6 +2148,7 @@ static void bdrv_co_complete(BlockAIOCBCoroutine *acb) > { > if (!acb->need_bh) { > acb->common.cb(acb->common.opaque, acb->req.error); > + bdrv_dec_in_flight(acb->common.bs); > qemu_aio_unref(acb); > } > } > @@ -2192,6 +2202,9 @@ static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, > Coroutine *co; > BlockAIOCBCoroutine *acb; > > + /* Matched by bdrv_co_complete's bdrv_dec_in_flight. */ > + bdrv_inc_in_flight(bs); > + > acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); > acb->need_bh = true; > acb->req.error = -EINPROGRESS; > @@ -2225,6 +2238,9 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs, > Coroutine *co; > BlockAIOCBCoroutine *acb; > > + /* Matched by bdrv_co_complete's bdrv_dec_in_flight. */ > + bdrv_inc_in_flight(bs); > + > acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); > acb->need_bh = true; > acb->req.error = -EINPROGRESS; > @@ -2254,6 +2270,9 @@ BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs, > > trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque); > > + /* Matched by bdrv_co_complete's bdrv_dec_in_flight. */ > + bdrv_inc_in_flight(bs); > + > acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); > acb->need_bh = true; > acb->req.error = -EINPROGRESS; > @@ -2361,14 +2380,14 @@ static void coroutine_fn bdrv_flush_co_entry(void *opaque) > int coroutine_fn bdrv_co_flush(BlockDriverState *bs) > { > int ret; > - BdrvTrackedRequest req; > > if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) || > bdrv_is_sg(bs)) { > return 0; > } > > - tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH); > + bdrv_inc_in_flight(bs); > + > /* Write back cached data to the OS even with cache=unsafe */ > BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS); > if (bs->drv->bdrv_co_flush_to_os) { > @@ -2423,7 +2442,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) > flush_parent: > ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0; > out: > - tracked_request_end(&req); > + bdrv_dec_in_flight(bs); > return ret; > } > > @@ -2491,6 +2510,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, > return 0; > } > > + bdrv_inc_in_flight(bs); > tracked_request_begin(&req, bs, sector_num, nb_sectors, > BDRV_TRACKED_DISCARD); > bdrv_set_dirty(bs, sector_num, nb_sectors); > @@ -2543,6 +2563,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, > ret = 0; > out: > tracked_request_end(&req); > + bdrv_dec_in_flight(bs); > return ret; > } > > @@ -2588,13 +2609,12 @@ static void bdrv_ioctl_bh_cb(void *opaque) > static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf) > { > BlockDriver *drv = bs->drv; > - BdrvTrackedRequest tracked_req; > CoroutineIOCompletion co = { > .coroutine = qemu_coroutine_self(), > }; > BlockAIOCB *acb; > > - tracked_request_begin(&tracked_req, bs, 0, 0, BDRV_TRACKED_IOCTL); > + bdrv_inc_in_flight(bs); > if (!drv || !drv->bdrv_aio_ioctl) { > co.ret = -ENOTSUP; > goto out; > @@ -2610,7 +2630,7 @@ static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf) > } > qemu_coroutine_yield(); > out: > - tracked_request_end(&tracked_req); > + bdrv_dec_in_flight(bs); > return co.ret; > } > > @@ -2667,6 +2687,9 @@ BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, > bs, cb, opaque); > Coroutine *co; > > + /* Matched by bdrv_co_complete's bdrv_dec_in_flight. */ > + bdrv_inc_in_flight(bs); > + > acb->need_bh = true; > acb->req.error = -EINPROGRESS; > acb->req.req = req; > diff --git a/block/mirror.c b/block/mirror.c > index 793c20c..3f163b8 100644 > --- a/block/mirror.c > +++ b/block/mirror.c > @@ -94,6 +94,7 @@ static void mirror_bh_cb(void *opaque) > > qemu_bh_delete(op->co_enter_bh); > g_free(op); > + bdrv_dec_in_flight(s->common.bs); > if (s->waiting_for_io) { > qemu_coroutine_enter(s->common.co, NULL); > } > @@ -134,6 +135,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret) > * If we call qemu_coroutine_enter here, there is the possibility > * of a deadlock when the coroutine calls bdrv_drained_begin. > */ > + bdrv_inc_in_flight(s->common.bs); > op->co_enter_bh = qemu_bh_new(mirror_bh_cb, op); > qemu_bh_schedule(op->co_enter_bh); > } > diff --git a/include/block/block_int.h b/include/block/block_int.h > index 2838814..89c38c0 100644 > --- a/include/block/block_int.h > +++ b/include/block/block_int.h > @@ -63,8 +63,6 @@ > enum BdrvTrackedRequestType { > BDRV_TRACKED_READ, > BDRV_TRACKED_WRITE, > - BDRV_TRACKED_FLUSH, > - BDRV_TRACKED_IOCTL, > BDRV_TRACKED_DISCARD, Okay, so flush and ioctl are not needed, but why is discard different? Fam > }; > > @@ -406,7 +404,8 @@ struct BlockDriverState { > /* Callback before write request is processed */ > NotifierWithReturnList before_write_notifiers; > > - /* number of in-flight serialising requests */ > + /* number of in-flight requests; overall and serialising */ > + unsigned int in_flight; > unsigned int serialising_in_flight; > > /* I/O throttling. > @@ -713,6 +712,9 @@ bool bdrv_requests_pending(BlockDriverState *bs); > void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); > void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in); > > +void bdrv_inc_in_flight(BlockDriverState *bs); > +void bdrv_dec_in_flight(BlockDriverState *bs); > + > void blockdev_close_all_bdrv_states(void); > > #endif /* BLOCK_INT_H */ > -- > 2.5.0 > > >
On 09/03/2016 04:35, Fam Zheng wrote: >> > enum BdrvTrackedRequestType { >> > BDRV_TRACKED_READ, >> > BDRV_TRACKED_WRITE, >> > - BDRV_TRACKED_FLUSH, >> > - BDRV_TRACKED_IOCTL, >> > BDRV_TRACKED_DISCARD, > Okay, so flush and ioctl are not needed, but why is discard different? Discard can modify the contents of the device, so I think it's safer to serialize it against RMW and copy-on-read operations. Paolo
On Wed, 03/09 08:43, Paolo Bonzini wrote: > > > On 09/03/2016 04:35, Fam Zheng wrote: > >> > enum BdrvTrackedRequestType { > >> > BDRV_TRACKED_READ, > >> > BDRV_TRACKED_WRITE, > >> > - BDRV_TRACKED_FLUSH, > >> > - BDRV_TRACKED_IOCTL, > >> > BDRV_TRACKED_DISCARD, > > Okay, so flush and ioctl are not needed, but why is discard different? > > Discard can modify the contents of the device, so I think it's safer to > serialize it against RMW and copy-on-read operations. Okay, that makes sense, but ioctl like SG_IO can also modify content, no? Fam
On 09/03/2016 09:00, Fam Zheng wrote: >> > On 09/03/2016 04:35, Fam Zheng wrote: >>>>> > >> > enum BdrvTrackedRequestType { >>>>> > >> > BDRV_TRACKED_READ, >>>>> > >> > BDRV_TRACKED_WRITE, >>>>> > >> > - BDRV_TRACKED_FLUSH, >>>>> > >> > - BDRV_TRACKED_IOCTL, >>>>> > >> > BDRV_TRACKED_DISCARD, >>> > > Okay, so flush and ioctl are not needed, but why is discard different? >> > >> > Discard can modify the contents of the device, so I think it's safer to >> > serialize it against RMW and copy-on-read operations. > Okay, that makes sense, but ioctl like SG_IO can also modify content, no? If you use SG_IO you shouldn't use RMW (because scsi_read_complete traps READ CAPACITY and sets the host block size as the guest block size) or copy-on-read (because raw has no backing file). Besides, BDRV_TRACKED_IOCTL didn't include sector_num/nr_sectors operations so it didn't provide serialization. Paolo
On Wed, 03/09 09:22, Paolo Bonzini wrote: > > > On 09/03/2016 09:00, Fam Zheng wrote: > >> > On 09/03/2016 04:35, Fam Zheng wrote: > >>>>> > >> > enum BdrvTrackedRequestType { > >>>>> > >> > BDRV_TRACKED_READ, > >>>>> > >> > BDRV_TRACKED_WRITE, > >>>>> > >> > - BDRV_TRACKED_FLUSH, > >>>>> > >> > - BDRV_TRACKED_IOCTL, > >>>>> > >> > BDRV_TRACKED_DISCARD, > >>> > > Okay, so flush and ioctl are not needed, but why is discard different? > >> > > >> > Discard can modify the contents of the device, so I think it's safer to > >> > serialize it against RMW and copy-on-read operations. > > Okay, that makes sense, but ioctl like SG_IO can also modify content, no? > > If you use SG_IO you shouldn't use RMW (because scsi_read_complete traps > READ CAPACITY and sets the host block size as the guest block size) or > copy-on-read (because raw has no backing file). > > Besides, BDRV_TRACKED_IOCTL didn't include sector_num/nr_sectors > operations so it didn't provide serialization. > Yes, that's right. Thanks. Fam
diff --git a/block/io.c b/block/io.c index d8d50b7..a9a23a6 100644 --- a/block/io.c +++ b/block/io.c @@ -224,13 +224,7 @@ bool bdrv_requests_pending(BlockDriverState *bs) { BdrvChild *child; - if (!QLIST_EMPTY(&bs->tracked_requests)) { - return true; - } - if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) { - return true; - } - if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) { + if (atomic_read(&bs->in_flight)) { return true; } @@ -268,15 +262,12 @@ static void bdrv_drain_recurse(BlockDriverState *bs) */ void bdrv_drain(BlockDriverState *bs) { - bool busy = true; - bdrv_no_throttling_begin(bs); bdrv_io_unplugged_begin(bs); bdrv_drain_recurse(bs); - while (busy) { + while (bdrv_requests_pending(bs)) { /* Keep iterating */ - busy = bdrv_requests_pending(bs); - busy |= aio_poll(bdrv_get_aio_context(bs), busy); + aio_poll(bdrv_get_aio_context(bs), true); } bdrv_io_unplugged_end(bs); bdrv_no_throttling_end(bs); @@ -291,7 +282,7 @@ void bdrv_drain(BlockDriverState *bs) void bdrv_drain_all(void) { /* Always run first iteration so any pending completion BHs run */ - bool busy = true; + bool waited = true; BlockDriverState *bs = NULL; GSList *aio_ctxs = NULL, *ctx; @@ -318,8 +309,8 @@ void bdrv_drain_all(void) * request completion. Therefore we must keep looping until there was no * more activity rather than simply draining each device independently. */ - while (busy) { - busy = false; + while (waited) { + waited = false; for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) { AioContext *aio_context = ctx->data; @@ -329,12 +320,11 @@ void bdrv_drain_all(void) while ((bs = bdrv_next(bs))) { if (aio_context == bdrv_get_aio_context(bs)) { if (bdrv_requests_pending(bs)) { - busy = true; - aio_poll(aio_context, busy); + aio_poll(aio_context, true); + waited = true; } } } - busy |= aio_poll(aio_context, false); aio_context_release(aio_context); } } @@ -457,6 +447,16 @@ static bool tracked_request_overlaps(BdrvTrackedRequest *req, return true; } +void bdrv_inc_in_flight(BlockDriverState *bs) +{ + atomic_inc(&bs->in_flight); +} + +void bdrv_dec_in_flight(BlockDriverState *bs) +{ + atomic_dec(&bs->in_flight); +} + static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self) { BlockDriverState *bs = self->bs; @@ -963,6 +963,8 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, return ret; } + bdrv_inc_in_flight(bs); + /* Don't do copy-on-read if we read data before write operation */ if (bs->copy_on_read && !(flags & BDRV_REQ_NO_SERIALISING)) { flags |= BDRV_REQ_COPY_ON_READ; @@ -1003,6 +1005,7 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, use_local_qiov ? &local_qiov : qiov, flags); tracked_request_end(&req); + bdrv_dec_in_flight(bs); if (use_local_qiov) { qemu_iovec_destroy(&local_qiov); @@ -1310,6 +1313,8 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, return ret; } + bdrv_inc_in_flight(bs); + /* throttling disk I/O */ if (bs->throttle_state) { throttle_group_co_io_limits_intercept(bs, bytes, true); @@ -1408,6 +1413,7 @@ fail: qemu_vfree(tail_buf); out: tracked_request_end(&req); + bdrv_dec_in_flight(bs); return ret; } @@ -2065,6 +2071,7 @@ static const AIOCBInfo bdrv_em_aiocb_info = { static void bdrv_aio_bh_cb(void *opaque) { BlockAIOCBSync *acb = opaque; + BlockDriverState *bs = acb->common.bs; if (!acb->is_write && acb->ret >= 0) { qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); @@ -2072,6 +2079,7 @@ static void bdrv_aio_bh_cb(void *opaque) qemu_vfree(acb->bounce); acb->common.cb(acb->common.opaque, acb->ret); qemu_bh_delete(acb->bh); + bdrv_dec_in_flight(bs); acb->bh = NULL; qemu_aio_unref(acb); } @@ -2087,6 +2095,7 @@ static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, { BlockAIOCBSync *acb; + bdrv_inc_in_flight(bs); acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque); acb->is_write = is_write; acb->qiov = qiov; @@ -2139,6 +2148,7 @@ static void bdrv_co_complete(BlockAIOCBCoroutine *acb) { if (!acb->need_bh) { acb->common.cb(acb->common.opaque, acb->req.error); + bdrv_dec_in_flight(acb->common.bs); qemu_aio_unref(acb); } } @@ -2192,6 +2202,9 @@ static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, Coroutine *co; BlockAIOCBCoroutine *acb; + /* Matched by bdrv_co_complete's bdrv_dec_in_flight. */ + bdrv_inc_in_flight(bs); + acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); acb->need_bh = true; acb->req.error = -EINPROGRESS; @@ -2225,6 +2238,9 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs, Coroutine *co; BlockAIOCBCoroutine *acb; + /* Matched by bdrv_co_complete's bdrv_dec_in_flight. */ + bdrv_inc_in_flight(bs); + acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); acb->need_bh = true; acb->req.error = -EINPROGRESS; @@ -2254,6 +2270,9 @@ BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs, trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque); + /* Matched by bdrv_co_complete's bdrv_dec_in_flight. */ + bdrv_inc_in_flight(bs); + acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); acb->need_bh = true; acb->req.error = -EINPROGRESS; @@ -2361,14 +2380,14 @@ static void coroutine_fn bdrv_flush_co_entry(void *opaque) int coroutine_fn bdrv_co_flush(BlockDriverState *bs) { int ret; - BdrvTrackedRequest req; if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) || bdrv_is_sg(bs)) { return 0; } - tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH); + bdrv_inc_in_flight(bs); + /* Write back cached data to the OS even with cache=unsafe */ BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS); if (bs->drv->bdrv_co_flush_to_os) { @@ -2423,7 +2442,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) flush_parent: ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0; out: - tracked_request_end(&req); + bdrv_dec_in_flight(bs); return ret; } @@ -2491,6 +2510,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, return 0; } + bdrv_inc_in_flight(bs); tracked_request_begin(&req, bs, sector_num, nb_sectors, BDRV_TRACKED_DISCARD); bdrv_set_dirty(bs, sector_num, nb_sectors); @@ -2543,6 +2563,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, ret = 0; out: tracked_request_end(&req); + bdrv_dec_in_flight(bs); return ret; } @@ -2588,13 +2609,12 @@ static void bdrv_ioctl_bh_cb(void *opaque) static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf) { BlockDriver *drv = bs->drv; - BdrvTrackedRequest tracked_req; CoroutineIOCompletion co = { .coroutine = qemu_coroutine_self(), }; BlockAIOCB *acb; - tracked_request_begin(&tracked_req, bs, 0, 0, BDRV_TRACKED_IOCTL); + bdrv_inc_in_flight(bs); if (!drv || !drv->bdrv_aio_ioctl) { co.ret = -ENOTSUP; goto out; @@ -2610,7 +2630,7 @@ static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf) } qemu_coroutine_yield(); out: - tracked_request_end(&tracked_req); + bdrv_dec_in_flight(bs); return co.ret; } @@ -2667,6 +2687,9 @@ BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, bs, cb, opaque); Coroutine *co; + /* Matched by bdrv_co_complete's bdrv_dec_in_flight. */ + bdrv_inc_in_flight(bs); + acb->need_bh = true; acb->req.error = -EINPROGRESS; acb->req.req = req; diff --git a/block/mirror.c b/block/mirror.c index 793c20c..3f163b8 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -94,6 +94,7 @@ static void mirror_bh_cb(void *opaque) qemu_bh_delete(op->co_enter_bh); g_free(op); + bdrv_dec_in_flight(s->common.bs); if (s->waiting_for_io) { qemu_coroutine_enter(s->common.co, NULL); } @@ -134,6 +135,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret) * If we call qemu_coroutine_enter here, there is the possibility * of a deadlock when the coroutine calls bdrv_drained_begin. */ + bdrv_inc_in_flight(s->common.bs); op->co_enter_bh = qemu_bh_new(mirror_bh_cb, op); qemu_bh_schedule(op->co_enter_bh); } diff --git a/include/block/block_int.h b/include/block/block_int.h index 2838814..89c38c0 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -63,8 +63,6 @@ enum BdrvTrackedRequestType { BDRV_TRACKED_READ, BDRV_TRACKED_WRITE, - BDRV_TRACKED_FLUSH, - BDRV_TRACKED_IOCTL, BDRV_TRACKED_DISCARD, }; @@ -406,7 +404,8 @@ struct BlockDriverState { /* Callback before write request is processed */ NotifierWithReturnList before_write_notifiers; - /* number of in-flight serialising requests */ + /* number of in-flight requests; overall and serialising */ + unsigned int in_flight; unsigned int serialising_in_flight; /* I/O throttling. @@ -713,6 +712,9 @@ bool bdrv_requests_pending(BlockDriverState *bs); void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in); +void bdrv_inc_in_flight(BlockDriverState *bs); +void bdrv_dec_in_flight(BlockDriverState *bs); + void blockdev_close_all_bdrv_states(void); #endif /* BLOCK_INT_H */
Unlike tracked_requests, this field also counts throttled requests, and remains non-zero if an AIO operation needs a BH to be "really" completed. With this change, it is no longer necessary to have a dummy BdrvTrackedRequest for requests that are never serialising, and it is no longer necessary to poll the AioContext once after bdrv_requests_pending(bs) returns false. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- block/io.c | 71 +++++++++++++++++++++++++++++++---------------- block/mirror.c | 2 ++ include/block/block_int.h | 8 ++++-- 3 files changed, 54 insertions(+), 27 deletions(-)