diff mbox series

[v5,1/4] linux-aio: use LinuxAioState from the running thread

Message ID 20230203131731.851116-2-eesposit@redhat.com
State New
Headers show
Series AioContext removal: LinuxAioState and ThreadPool | expand

Commit Message

Emanuele Giuseppe Esposito Feb. 3, 2023, 1:17 p.m. UTC
Remove usage of aio_context_acquire by always submitting asynchronous
AIO to the current thread's LinuxAioState.

In order to prevent mistakes from the caller side, avoid passing LinuxAioState
in laio_io_{plug/unplug} and laio_co_submit, and document the functions
to make clear that they work in the current thread's AioContext.

Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
 include/block/aio.h               |  4 ----
 include/block/raw-aio.h           | 18 ++++++++++++------
 include/sysemu/block-backend-io.h |  6 ++++++
 block/file-posix.c                | 10 +++-------
 block/linux-aio.c                 | 29 +++++++++++++++++------------
 5 files changed, 38 insertions(+), 29 deletions(-)

Comments

Stefan Hajnoczi March 1, 2023, 4:16 p.m. UTC | #1
On Fri, Feb 03, 2023 at 08:17:28AM -0500, Emanuele Giuseppe Esposito wrote:
> Remove usage of aio_context_acquire by always submitting asynchronous
> AIO to the current thread's LinuxAioState.
> 
> In order to prevent mistakes from the caller side, avoid passing LinuxAioState
> in laio_io_{plug/unplug} and laio_co_submit, and document the functions
> to make clear that they work in the current thread's AioContext.
> 
> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
> ---
>  include/block/aio.h               |  4 ----
>  include/block/raw-aio.h           | 18 ++++++++++++------
>  include/sysemu/block-backend-io.h |  6 ++++++
>  block/file-posix.c                | 10 +++-------
>  block/linux-aio.c                 | 29 +++++++++++++++++------------
>  5 files changed, 38 insertions(+), 29 deletions(-)
> 
> diff --git a/include/block/aio.h b/include/block/aio.h
> index 8fba6a3584..b6b396cfcb 100644
> --- a/include/block/aio.h
> +++ b/include/block/aio.h
> @@ -208,10 +208,6 @@ struct AioContext {
>      struct ThreadPool *thread_pool;
>  
>  #ifdef CONFIG_LINUX_AIO
> -    /*
> -     * State for native Linux AIO.  Uses aio_context_acquire/release for
> -     * locking.
> -     */
>      struct LinuxAioState *linux_aio;
>  #endif
>  #ifdef CONFIG_LINUX_IO_URING
> diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
> index f8cda9df91..db614472e6 100644
> --- a/include/block/raw-aio.h
> +++ b/include/block/raw-aio.h
> @@ -49,14 +49,20 @@
>  typedef struct LinuxAioState LinuxAioState;
>  LinuxAioState *laio_init(Error **errp);
>  void laio_cleanup(LinuxAioState *s);
> -int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
> -                                uint64_t offset, QEMUIOVector *qiov, int type,
> -                                uint64_t dev_max_batch);
> +
> +/* laio_co_submit: submit I/O requests in the thread's current AioContext. */
> +int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
> +                                int type, uint64_t dev_max_batch);
> +
>  void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
>  void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
> -void laio_io_plug(BlockDriverState *bs, LinuxAioState *s);
> -void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
> -                    uint64_t dev_max_batch);
> +
> +/*
> + * laio_io_plug/unplug work in the thread's current AioContext, therefore the
> + * caller must ensure that they are paired in the same IOThread.
> + */
> +void laio_io_plug(void);
> +void laio_io_unplug(uint64_t dev_max_batch);
>  #endif
>  /* io_uring.c - Linux io_uring implementation */
>  #ifdef CONFIG_LINUX_IO_URING
> diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h
> index 031a27ba10..d41698ccc5 100644
> --- a/include/sysemu/block-backend-io.h
> +++ b/include/sysemu/block-backend-io.h
> @@ -74,8 +74,14 @@ void blk_iostatus_set_err(BlockBackend *blk, int error);
>  int blk_get_max_iov(BlockBackend *blk);
>  int blk_get_max_hw_iov(BlockBackend *blk);
>  
> +/*
> + * blk_io_plug/unplug are thread-local operations. This means that multiple
> + * IOThreads can simultaneously call plug/unplug, but the caller must ensure
> + * that each unplug() is called in the same IOThread of the matching plug().
> + */
>  void blk_io_plug(BlockBackend *blk);
>  void blk_io_unplug(BlockBackend *blk);
> +
>  AioContext *blk_get_aio_context(BlockBackend *blk);
>  BlockAcctStats *blk_get_stats(BlockBackend *blk);
>  void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
> diff --git a/block/file-posix.c b/block/file-posix.c
> index fa227d9d14..fa99d1c25a 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -2095,10 +2095,8 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
>  #endif
>  #ifdef CONFIG_LINUX_AIO
>      } else if (s->use_linux_aio) {
> -        LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
>          assert(qiov->size == bytes);
> -        return laio_co_submit(bs, aio, s->fd, offset, qiov, type,
> -                              s->aio_max_batch);
> +        return laio_co_submit(s->fd, offset, qiov, type, s->aio_max_batch);

I'm having second thoughts here. This is correct in an IOThread today,
but the main loop thread case concerns me:

This patch changes behavior when the main loop or vCPU thread submits
I/O. Before, the IOThread's LinuxAioState would be used. Now the main
loop's LinuxAioState will be used instead and aio callbacks will be
invoked in the main loop thread instead of the IOThread.

This change will be fine when QEMU block layer support is complete, but
will does it already work today?

When blk_preadv() is called from a non-coroutine in the main loop thread
then the coroutine is spawned in the IOThread today. So we avoid the
issue.

But when blk_preadv() is called from a coroutine in the main loop thread
we'll have multi-queue activity (I/O being processed in both the main
loop thread and IOThread).

I like this patch series and think it's the right thing to do, but I'm
not sure if it's safe to do this yet. We first need to be sure all aio
callbacks are thread-safe (may are already, but there are probably still
some that are not).

Stefan
Kevin Wolf March 7, 2023, 8:48 a.m. UTC | #2
Am 01.03.2023 um 17:16 hat Stefan Hajnoczi geschrieben:
> On Fri, Feb 03, 2023 at 08:17:28AM -0500, Emanuele Giuseppe Esposito wrote:
> > Remove usage of aio_context_acquire by always submitting asynchronous
> > AIO to the current thread's LinuxAioState.
> > 
> > In order to prevent mistakes from the caller side, avoid passing LinuxAioState
> > in laio_io_{plug/unplug} and laio_co_submit, and document the functions
> > to make clear that they work in the current thread's AioContext.
> > 
> > Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
> > ---
> >  include/block/aio.h               |  4 ----
> >  include/block/raw-aio.h           | 18 ++++++++++++------
> >  include/sysemu/block-backend-io.h |  6 ++++++
> >  block/file-posix.c                | 10 +++-------
> >  block/linux-aio.c                 | 29 +++++++++++++++++------------
> >  5 files changed, 38 insertions(+), 29 deletions(-)
> > 
> > diff --git a/include/block/aio.h b/include/block/aio.h
> > index 8fba6a3584..b6b396cfcb 100644
> > --- a/include/block/aio.h
> > +++ b/include/block/aio.h
> > @@ -208,10 +208,6 @@ struct AioContext {
> >      struct ThreadPool *thread_pool;
> >  
> >  #ifdef CONFIG_LINUX_AIO
> > -    /*
> > -     * State for native Linux AIO.  Uses aio_context_acquire/release for
> > -     * locking.
> > -     */
> >      struct LinuxAioState *linux_aio;
> >  #endif
> >  #ifdef CONFIG_LINUX_IO_URING
> > diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
> > index f8cda9df91..db614472e6 100644
> > --- a/include/block/raw-aio.h
> > +++ b/include/block/raw-aio.h
> > @@ -49,14 +49,20 @@
> >  typedef struct LinuxAioState LinuxAioState;
> >  LinuxAioState *laio_init(Error **errp);
> >  void laio_cleanup(LinuxAioState *s);
> > -int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
> > -                                uint64_t offset, QEMUIOVector *qiov, int type,
> > -                                uint64_t dev_max_batch);
> > +
> > +/* laio_co_submit: submit I/O requests in the thread's current AioContext. */
> > +int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
> > +                                int type, uint64_t dev_max_batch);
> > +
> >  void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
> >  void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
> > -void laio_io_plug(BlockDriverState *bs, LinuxAioState *s);
> > -void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
> > -                    uint64_t dev_max_batch);
> > +
> > +/*
> > + * laio_io_plug/unplug work in the thread's current AioContext, therefore the
> > + * caller must ensure that they are paired in the same IOThread.
> > + */
> > +void laio_io_plug(void);
> > +void laio_io_unplug(uint64_t dev_max_batch);
> >  #endif
> >  /* io_uring.c - Linux io_uring implementation */
> >  #ifdef CONFIG_LINUX_IO_URING
> > diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h
> > index 031a27ba10..d41698ccc5 100644
> > --- a/include/sysemu/block-backend-io.h
> > +++ b/include/sysemu/block-backend-io.h
> > @@ -74,8 +74,14 @@ void blk_iostatus_set_err(BlockBackend *blk, int error);
> >  int blk_get_max_iov(BlockBackend *blk);
> >  int blk_get_max_hw_iov(BlockBackend *blk);
> >  
> > +/*
> > + * blk_io_plug/unplug are thread-local operations. This means that multiple
> > + * IOThreads can simultaneously call plug/unplug, but the caller must ensure
> > + * that each unplug() is called in the same IOThread of the matching plug().
> > + */
> >  void blk_io_plug(BlockBackend *blk);
> >  void blk_io_unplug(BlockBackend *blk);
> > +
> >  AioContext *blk_get_aio_context(BlockBackend *blk);
> >  BlockAcctStats *blk_get_stats(BlockBackend *blk);
> >  void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
> > diff --git a/block/file-posix.c b/block/file-posix.c
> > index fa227d9d14..fa99d1c25a 100644
> > --- a/block/file-posix.c
> > +++ b/block/file-posix.c
> > @@ -2095,10 +2095,8 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
> >  #endif
> >  #ifdef CONFIG_LINUX_AIO
> >      } else if (s->use_linux_aio) {
> > -        LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
> >          assert(qiov->size == bytes);
> > -        return laio_co_submit(bs, aio, s->fd, offset, qiov, type,
> > -                              s->aio_max_batch);
> > +        return laio_co_submit(s->fd, offset, qiov, type, s->aio_max_batch);
> 
> I'm having second thoughts here. This is correct in an IOThread today,
> but the main loop thread case concerns me:
> 
> This patch changes behavior when the main loop or vCPU thread submits
> I/O. Before, the IOThread's LinuxAioState would be used. Now the main
> loop's LinuxAioState will be used instead and aio callbacks will be
> invoked in the main loop thread instead of the IOThread.

You mean we have a device that has a separate iothread, but a request is
submitted from the main thread? This isn't even allowed today; if a node
is in an iothread, all I/O must be submitted from that iothread. Do you
know any code that does submit I/O from the main thread instead?

> This change will be fine when QEMU block layer support is complete, but
> will does it already work today?
> 
> When blk_preadv() is called from a non-coroutine in the main loop thread
> then the coroutine is spawned in the IOThread today. So we avoid the
> issue.
> 
> But when blk_preadv() is called from a coroutine in the main loop thread
> we'll have multi-queue activity (I/O being processed in both the main
> loop thread and IOThread).

That's a bug then. But calling blk_*() from coroutine context should be
quite rare anyway in the current code. I can think of .run in the block
jobs and possible some exports.

Actually, we may have a bug in the export code. blk_exp_add() enables
support for changing iothreads only depending on whether the user
requested it, but doesn't check if the export driver actually supports
it. Most do, but FUSE just ignores AioContext changes (it does use the
initial iothread of the node, though, not always the main thread).

> I like this patch series and think it's the right thing to do, but I'm
> not sure if it's safe to do this yet. We first need to be sure all aio
> callbacks are thread-safe (may are already, but there are probably
> still some that are not).

I would argue that if we do have buggy code like this, the new code is
probably better than the old one because getting callbacks scheduled in
a different thread is the more surprising behaviour. It's probably done
by code that doesn't expect to ever run in iothreads, so staying in the
main loop certainly feels safer.

Kevin
Paolo Bonzini March 7, 2023, 10:58 a.m. UTC | #3
On 3/7/23 09:48, Kevin Wolf wrote:
> You mean we have a device that has a separate iothread, but a request is
> submitted from the main thread? This isn't even allowed today; if a node
> is in an iothread, all I/O must be submitted from that iothread. Do you
> know any code that does submit I/O from the main thread instead?

I think it is allowed, you just have to take the AioContext lock around 
the bdrv_*?  For example it could happen when you do block device migration.

Paolo
Kevin Wolf March 7, 2023, 12:17 p.m. UTC | #4
Am 07.03.2023 um 11:58 hat Paolo Bonzini geschrieben:
> On 3/7/23 09:48, Kevin Wolf wrote:
> > You mean we have a device that has a separate iothread, but a request is
> > submitted from the main thread? This isn't even allowed today; if a node
> > is in an iothread, all I/O must be submitted from that iothread. Do you
> > know any code that does submit I/O from the main thread instead?
> 
> I think it is allowed, you just have to take the AioContext lock around the
> bdrv_*?

Didn't we say at some point that we don't want to do this either? Though
maybe it's not strictly forbidden then.

> For example it could happen when you do block device migration.

As in migration/block.c? As far as I can tell, all of the requests made
there are actually processed in the iothread. (blk_aio_*() calls the
callback in the iothread even when it was called from the main thread
itself, which feels like a trap, but it shouldn't be affected by this
change lower in the stack.)

The potentially critical code would be coroutine_fns that call
blk_co_*() or bdrv_co_*() directly while running in a different thread.
Everything else schedules a new coroutine in the AioContext of the block
node.

Kevin
Stefan Hajnoczi March 7, 2023, 2:18 p.m. UTC | #5
On Tue, Mar 07, 2023 at 09:48:51AM +0100, Kevin Wolf wrote:
> Am 01.03.2023 um 17:16 hat Stefan Hajnoczi geschrieben:
> > On Fri, Feb 03, 2023 at 08:17:28AM -0500, Emanuele Giuseppe Esposito wrote:
> > > Remove usage of aio_context_acquire by always submitting asynchronous
> > > AIO to the current thread's LinuxAioState.
> > > 
> > > In order to prevent mistakes from the caller side, avoid passing LinuxAioState
> > > in laio_io_{plug/unplug} and laio_co_submit, and document the functions
> > > to make clear that they work in the current thread's AioContext.
> > > 
> > > Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
> > > ---
> > >  include/block/aio.h               |  4 ----
> > >  include/block/raw-aio.h           | 18 ++++++++++++------
> > >  include/sysemu/block-backend-io.h |  6 ++++++
> > >  block/file-posix.c                | 10 +++-------
> > >  block/linux-aio.c                 | 29 +++++++++++++++++------------
> > >  5 files changed, 38 insertions(+), 29 deletions(-)
> > > 
> > > diff --git a/include/block/aio.h b/include/block/aio.h
> > > index 8fba6a3584..b6b396cfcb 100644
> > > --- a/include/block/aio.h
> > > +++ b/include/block/aio.h
> > > @@ -208,10 +208,6 @@ struct AioContext {
> > >      struct ThreadPool *thread_pool;
> > >  
> > >  #ifdef CONFIG_LINUX_AIO
> > > -    /*
> > > -     * State for native Linux AIO.  Uses aio_context_acquire/release for
> > > -     * locking.
> > > -     */
> > >      struct LinuxAioState *linux_aio;
> > >  #endif
> > >  #ifdef CONFIG_LINUX_IO_URING
> > > diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
> > > index f8cda9df91..db614472e6 100644
> > > --- a/include/block/raw-aio.h
> > > +++ b/include/block/raw-aio.h
> > > @@ -49,14 +49,20 @@
> > >  typedef struct LinuxAioState LinuxAioState;
> > >  LinuxAioState *laio_init(Error **errp);
> > >  void laio_cleanup(LinuxAioState *s);
> > > -int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
> > > -                                uint64_t offset, QEMUIOVector *qiov, int type,
> > > -                                uint64_t dev_max_batch);
> > > +
> > > +/* laio_co_submit: submit I/O requests in the thread's current AioContext. */
> > > +int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
> > > +                                int type, uint64_t dev_max_batch);
> > > +
> > >  void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
> > >  void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
> > > -void laio_io_plug(BlockDriverState *bs, LinuxAioState *s);
> > > -void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
> > > -                    uint64_t dev_max_batch);
> > > +
> > > +/*
> > > + * laio_io_plug/unplug work in the thread's current AioContext, therefore the
> > > + * caller must ensure that they are paired in the same IOThread.
> > > + */
> > > +void laio_io_plug(void);
> > > +void laio_io_unplug(uint64_t dev_max_batch);
> > >  #endif
> > >  /* io_uring.c - Linux io_uring implementation */
> > >  #ifdef CONFIG_LINUX_IO_URING
> > > diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h
> > > index 031a27ba10..d41698ccc5 100644
> > > --- a/include/sysemu/block-backend-io.h
> > > +++ b/include/sysemu/block-backend-io.h
> > > @@ -74,8 +74,14 @@ void blk_iostatus_set_err(BlockBackend *blk, int error);
> > >  int blk_get_max_iov(BlockBackend *blk);
> > >  int blk_get_max_hw_iov(BlockBackend *blk);
> > >  
> > > +/*
> > > + * blk_io_plug/unplug are thread-local operations. This means that multiple
> > > + * IOThreads can simultaneously call plug/unplug, but the caller must ensure
> > > + * that each unplug() is called in the same IOThread of the matching plug().
> > > + */
> > >  void blk_io_plug(BlockBackend *blk);
> > >  void blk_io_unplug(BlockBackend *blk);
> > > +
> > >  AioContext *blk_get_aio_context(BlockBackend *blk);
> > >  BlockAcctStats *blk_get_stats(BlockBackend *blk);
> > >  void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
> > > diff --git a/block/file-posix.c b/block/file-posix.c
> > > index fa227d9d14..fa99d1c25a 100644
> > > --- a/block/file-posix.c
> > > +++ b/block/file-posix.c
> > > @@ -2095,10 +2095,8 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
> > >  #endif
> > >  #ifdef CONFIG_LINUX_AIO
> > >      } else if (s->use_linux_aio) {
> > > -        LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
> > >          assert(qiov->size == bytes);
> > > -        return laio_co_submit(bs, aio, s->fd, offset, qiov, type,
> > > -                              s->aio_max_batch);
> > > +        return laio_co_submit(s->fd, offset, qiov, type, s->aio_max_batch);
> > 
> > I'm having second thoughts here. This is correct in an IOThread today,
> > but the main loop thread case concerns me:
> > 
> > This patch changes behavior when the main loop or vCPU thread submits
> > I/O. Before, the IOThread's LinuxAioState would be used. Now the main
> > loop's LinuxAioState will be used instead and aio callbacks will be
> > invoked in the main loop thread instead of the IOThread.
> 
> You mean we have a device that has a separate iothread, but a request is
> submitted from the main thread? This isn't even allowed today; if a node
> is in an iothread, all I/O must be submitted from that iothread. Do you
> know any code that does submit I/O from the main thread instead?

I think you're right. My mental model was outdated. Both the coroutine
and non-coroutine code paths schedule coroutines in the AioContext.

However, I think this patch series is still risky because it could
reveal latent bugs. Let's merge it in the next development cycle (soft
freeze is today!) to avoid destabilizing 8.0.

Stefan
Kevin Wolf March 8, 2023, 11:42 a.m. UTC | #6
Am 07.03.2023 um 15:18 hat Stefan Hajnoczi geschrieben:
> On Tue, Mar 07, 2023 at 09:48:51AM +0100, Kevin Wolf wrote:
> > Am 01.03.2023 um 17:16 hat Stefan Hajnoczi geschrieben:
> > > On Fri, Feb 03, 2023 at 08:17:28AM -0500, Emanuele Giuseppe Esposito wrote:
> > > > Remove usage of aio_context_acquire by always submitting asynchronous
> > > > AIO to the current thread's LinuxAioState.
> > > > 
> > > > In order to prevent mistakes from the caller side, avoid passing LinuxAioState
> > > > in laio_io_{plug/unplug} and laio_co_submit, and document the functions
> > > > to make clear that they work in the current thread's AioContext.
> > > > 
> > > > Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
> > > > ---
> > > >  include/block/aio.h               |  4 ----
> > > >  include/block/raw-aio.h           | 18 ++++++++++++------
> > > >  include/sysemu/block-backend-io.h |  6 ++++++
> > > >  block/file-posix.c                | 10 +++-------
> > > >  block/linux-aio.c                 | 29 +++++++++++++++++------------
> > > >  5 files changed, 38 insertions(+), 29 deletions(-)
> > > > 
> > > > diff --git a/include/block/aio.h b/include/block/aio.h
> > > > index 8fba6a3584..b6b396cfcb 100644
> > > > --- a/include/block/aio.h
> > > > +++ b/include/block/aio.h
> > > > @@ -208,10 +208,6 @@ struct AioContext {
> > > >      struct ThreadPool *thread_pool;
> > > >  
> > > >  #ifdef CONFIG_LINUX_AIO
> > > > -    /*
> > > > -     * State for native Linux AIO.  Uses aio_context_acquire/release for
> > > > -     * locking.
> > > > -     */
> > > >      struct LinuxAioState *linux_aio;
> > > >  #endif
> > > >  #ifdef CONFIG_LINUX_IO_URING
> > > > diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
> > > > index f8cda9df91..db614472e6 100644
> > > > --- a/include/block/raw-aio.h
> > > > +++ b/include/block/raw-aio.h
> > > > @@ -49,14 +49,20 @@
> > > >  typedef struct LinuxAioState LinuxAioState;
> > > >  LinuxAioState *laio_init(Error **errp);
> > > >  void laio_cleanup(LinuxAioState *s);
> > > > -int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
> > > > -                                uint64_t offset, QEMUIOVector *qiov, int type,
> > > > -                                uint64_t dev_max_batch);
> > > > +
> > > > +/* laio_co_submit: submit I/O requests in the thread's current AioContext. */
> > > > +int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
> > > > +                                int type, uint64_t dev_max_batch);
> > > > +
> > > >  void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
> > > >  void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
> > > > -void laio_io_plug(BlockDriverState *bs, LinuxAioState *s);
> > > > -void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
> > > > -                    uint64_t dev_max_batch);
> > > > +
> > > > +/*
> > > > + * laio_io_plug/unplug work in the thread's current AioContext, therefore the
> > > > + * caller must ensure that they are paired in the same IOThread.
> > > > + */
> > > > +void laio_io_plug(void);
> > > > +void laio_io_unplug(uint64_t dev_max_batch);
> > > >  #endif
> > > >  /* io_uring.c - Linux io_uring implementation */
> > > >  #ifdef CONFIG_LINUX_IO_URING
> > > > diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h
> > > > index 031a27ba10..d41698ccc5 100644
> > > > --- a/include/sysemu/block-backend-io.h
> > > > +++ b/include/sysemu/block-backend-io.h
> > > > @@ -74,8 +74,14 @@ void blk_iostatus_set_err(BlockBackend *blk, int error);
> > > >  int blk_get_max_iov(BlockBackend *blk);
> > > >  int blk_get_max_hw_iov(BlockBackend *blk);
> > > >  
> > > > +/*
> > > > + * blk_io_plug/unplug are thread-local operations. This means that multiple
> > > > + * IOThreads can simultaneously call plug/unplug, but the caller must ensure
> > > > + * that each unplug() is called in the same IOThread of the matching plug().
> > > > + */
> > > >  void blk_io_plug(BlockBackend *blk);
> > > >  void blk_io_unplug(BlockBackend *blk);
> > > > +
> > > >  AioContext *blk_get_aio_context(BlockBackend *blk);
> > > >  BlockAcctStats *blk_get_stats(BlockBackend *blk);
> > > >  void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
> > > > diff --git a/block/file-posix.c b/block/file-posix.c
> > > > index fa227d9d14..fa99d1c25a 100644
> > > > --- a/block/file-posix.c
> > > > +++ b/block/file-posix.c
> > > > @@ -2095,10 +2095,8 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
> > > >  #endif
> > > >  #ifdef CONFIG_LINUX_AIO
> > > >      } else if (s->use_linux_aio) {
> > > > -        LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
> > > >          assert(qiov->size == bytes);
> > > > -        return laio_co_submit(bs, aio, s->fd, offset, qiov, type,
> > > > -                              s->aio_max_batch);
> > > > +        return laio_co_submit(s->fd, offset, qiov, type, s->aio_max_batch);
> > > 
> > > I'm having second thoughts here. This is correct in an IOThread today,
> > > but the main loop thread case concerns me:
> > > 
> > > This patch changes behavior when the main loop or vCPU thread submits
> > > I/O. Before, the IOThread's LinuxAioState would be used. Now the main
> > > loop's LinuxAioState will be used instead and aio callbacks will be
> > > invoked in the main loop thread instead of the IOThread.
> > 
> > You mean we have a device that has a separate iothread, but a request is
> > submitted from the main thread? This isn't even allowed today; if a node
> > is in an iothread, all I/O must be submitted from that iothread. Do you
> > know any code that does submit I/O from the main thread instead?
> 
> I think you're right. My mental model was outdated. Both the coroutine
> and non-coroutine code paths schedule coroutines in the AioContext.
> 
> However, I think this patch series is still risky because it could
> reveal latent bugs. Let's merge it in the next development cycle (soft
> freeze is today!) to avoid destabilizing 8.0.

Makes sense, I've already started a block-next anyway.

So is this an R-b or A-b or nothing for now?

Kevin
Stefan Hajnoczi March 8, 2023, 5:24 p.m. UTC | #7
On Wed, Mar 08, 2023 at 12:42:11PM +0100, Kevin Wolf wrote:
> Am 07.03.2023 um 15:18 hat Stefan Hajnoczi geschrieben:
> > On Tue, Mar 07, 2023 at 09:48:51AM +0100, Kevin Wolf wrote:
> > > Am 01.03.2023 um 17:16 hat Stefan Hajnoczi geschrieben:
> > > > On Fri, Feb 03, 2023 at 08:17:28AM -0500, Emanuele Giuseppe Esposito wrote:
> > > > > Remove usage of aio_context_acquire by always submitting asynchronous
> > > > > AIO to the current thread's LinuxAioState.
> > > > > 
> > > > > In order to prevent mistakes from the caller side, avoid passing LinuxAioState
> > > > > in laio_io_{plug/unplug} and laio_co_submit, and document the functions
> > > > > to make clear that they work in the current thread's AioContext.
> > > > > 
> > > > > Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
> > > > > ---
> > > > >  include/block/aio.h               |  4 ----
> > > > >  include/block/raw-aio.h           | 18 ++++++++++++------
> > > > >  include/sysemu/block-backend-io.h |  6 ++++++
> > > > >  block/file-posix.c                | 10 +++-------
> > > > >  block/linux-aio.c                 | 29 +++++++++++++++++------------
> > > > >  5 files changed, 38 insertions(+), 29 deletions(-)
> > > > > 
> > > > > diff --git a/include/block/aio.h b/include/block/aio.h
> > > > > index 8fba6a3584..b6b396cfcb 100644
> > > > > --- a/include/block/aio.h
> > > > > +++ b/include/block/aio.h
> > > > > @@ -208,10 +208,6 @@ struct AioContext {
> > > > >      struct ThreadPool *thread_pool;
> > > > >  
> > > > >  #ifdef CONFIG_LINUX_AIO
> > > > > -    /*
> > > > > -     * State for native Linux AIO.  Uses aio_context_acquire/release for
> > > > > -     * locking.
> > > > > -     */
> > > > >      struct LinuxAioState *linux_aio;
> > > > >  #endif
> > > > >  #ifdef CONFIG_LINUX_IO_URING
> > > > > diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
> > > > > index f8cda9df91..db614472e6 100644
> > > > > --- a/include/block/raw-aio.h
> > > > > +++ b/include/block/raw-aio.h
> > > > > @@ -49,14 +49,20 @@
> > > > >  typedef struct LinuxAioState LinuxAioState;
> > > > >  LinuxAioState *laio_init(Error **errp);
> > > > >  void laio_cleanup(LinuxAioState *s);
> > > > > -int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
> > > > > -                                uint64_t offset, QEMUIOVector *qiov, int type,
> > > > > -                                uint64_t dev_max_batch);
> > > > > +
> > > > > +/* laio_co_submit: submit I/O requests in the thread's current AioContext. */
> > > > > +int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
> > > > > +                                int type, uint64_t dev_max_batch);
> > > > > +
> > > > >  void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
> > > > >  void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
> > > > > -void laio_io_plug(BlockDriverState *bs, LinuxAioState *s);
> > > > > -void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
> > > > > -                    uint64_t dev_max_batch);
> > > > > +
> > > > > +/*
> > > > > + * laio_io_plug/unplug work in the thread's current AioContext, therefore the
> > > > > + * caller must ensure that they are paired in the same IOThread.
> > > > > + */
> > > > > +void laio_io_plug(void);
> > > > > +void laio_io_unplug(uint64_t dev_max_batch);
> > > > >  #endif
> > > > >  /* io_uring.c - Linux io_uring implementation */
> > > > >  #ifdef CONFIG_LINUX_IO_URING
> > > > > diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h
> > > > > index 031a27ba10..d41698ccc5 100644
> > > > > --- a/include/sysemu/block-backend-io.h
> > > > > +++ b/include/sysemu/block-backend-io.h
> > > > > @@ -74,8 +74,14 @@ void blk_iostatus_set_err(BlockBackend *blk, int error);
> > > > >  int blk_get_max_iov(BlockBackend *blk);
> > > > >  int blk_get_max_hw_iov(BlockBackend *blk);
> > > > >  
> > > > > +/*
> > > > > + * blk_io_plug/unplug are thread-local operations. This means that multiple
> > > > > + * IOThreads can simultaneously call plug/unplug, but the caller must ensure
> > > > > + * that each unplug() is called in the same IOThread of the matching plug().
> > > > > + */
> > > > >  void blk_io_plug(BlockBackend *blk);
> > > > >  void blk_io_unplug(BlockBackend *blk);
> > > > > +
> > > > >  AioContext *blk_get_aio_context(BlockBackend *blk);
> > > > >  BlockAcctStats *blk_get_stats(BlockBackend *blk);
> > > > >  void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
> > > > > diff --git a/block/file-posix.c b/block/file-posix.c
> > > > > index fa227d9d14..fa99d1c25a 100644
> > > > > --- a/block/file-posix.c
> > > > > +++ b/block/file-posix.c
> > > > > @@ -2095,10 +2095,8 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
> > > > >  #endif
> > > > >  #ifdef CONFIG_LINUX_AIO
> > > > >      } else if (s->use_linux_aio) {
> > > > > -        LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
> > > > >          assert(qiov->size == bytes);
> > > > > -        return laio_co_submit(bs, aio, s->fd, offset, qiov, type,
> > > > > -                              s->aio_max_batch);
> > > > > +        return laio_co_submit(s->fd, offset, qiov, type, s->aio_max_batch);
> > > > 
> > > > I'm having second thoughts here. This is correct in an IOThread today,
> > > > but the main loop thread case concerns me:
> > > > 
> > > > This patch changes behavior when the main loop or vCPU thread submits
> > > > I/O. Before, the IOThread's LinuxAioState would be used. Now the main
> > > > loop's LinuxAioState will be used instead and aio callbacks will be
> > > > invoked in the main loop thread instead of the IOThread.
> > > 
> > > You mean we have a device that has a separate iothread, but a request is
> > > submitted from the main thread? This isn't even allowed today; if a node
> > > is in an iothread, all I/O must be submitted from that iothread. Do you
> > > know any code that does submit I/O from the main thread instead?
> > 
> > I think you're right. My mental model was outdated. Both the coroutine
> > and non-coroutine code paths schedule coroutines in the AioContext.
> > 
> > However, I think this patch series is still risky because it could
> > reveal latent bugs. Let's merge it in the next development cycle (soft
> > freeze is today!) to avoid destabilizing 8.0.
> 
> Makes sense, I've already started a block-next anyway.
> 
> So is this an R-b or A-b or nothing for now?

I'm happy with it and I've read the code:

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
diff mbox series

Patch

diff --git a/include/block/aio.h b/include/block/aio.h
index 8fba6a3584..b6b396cfcb 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -208,10 +208,6 @@  struct AioContext {
     struct ThreadPool *thread_pool;
 
 #ifdef CONFIG_LINUX_AIO
-    /*
-     * State for native Linux AIO.  Uses aio_context_acquire/release for
-     * locking.
-     */
     struct LinuxAioState *linux_aio;
 #endif
 #ifdef CONFIG_LINUX_IO_URING
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
index f8cda9df91..db614472e6 100644
--- a/include/block/raw-aio.h
+++ b/include/block/raw-aio.h
@@ -49,14 +49,20 @@ 
 typedef struct LinuxAioState LinuxAioState;
 LinuxAioState *laio_init(Error **errp);
 void laio_cleanup(LinuxAioState *s);
-int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
-                                uint64_t offset, QEMUIOVector *qiov, int type,
-                                uint64_t dev_max_batch);
+
+/* laio_co_submit: submit I/O requests in the thread's current AioContext. */
+int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
+                                int type, uint64_t dev_max_batch);
+
 void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
 void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
-void laio_io_plug(BlockDriverState *bs, LinuxAioState *s);
-void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
-                    uint64_t dev_max_batch);
+
+/*
+ * laio_io_plug/unplug work in the thread's current AioContext, therefore the
+ * caller must ensure that they are paired in the same IOThread.
+ */
+void laio_io_plug(void);
+void laio_io_unplug(uint64_t dev_max_batch);
 #endif
 /* io_uring.c - Linux io_uring implementation */
 #ifdef CONFIG_LINUX_IO_URING
diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h
index 031a27ba10..d41698ccc5 100644
--- a/include/sysemu/block-backend-io.h
+++ b/include/sysemu/block-backend-io.h
@@ -74,8 +74,14 @@  void blk_iostatus_set_err(BlockBackend *blk, int error);
 int blk_get_max_iov(BlockBackend *blk);
 int blk_get_max_hw_iov(BlockBackend *blk);
 
+/*
+ * blk_io_plug/unplug are thread-local operations. This means that multiple
+ * IOThreads can simultaneously call plug/unplug, but the caller must ensure
+ * that each unplug() is called in the same IOThread of the matching plug().
+ */
 void blk_io_plug(BlockBackend *blk);
 void blk_io_unplug(BlockBackend *blk);
+
 AioContext *blk_get_aio_context(BlockBackend *blk);
 BlockAcctStats *blk_get_stats(BlockBackend *blk);
 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
diff --git a/block/file-posix.c b/block/file-posix.c
index fa227d9d14..fa99d1c25a 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -2095,10 +2095,8 @@  static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
 #endif
 #ifdef CONFIG_LINUX_AIO
     } else if (s->use_linux_aio) {
-        LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
         assert(qiov->size == bytes);
-        return laio_co_submit(bs, aio, s->fd, offset, qiov, type,
-                              s->aio_max_batch);
+        return laio_co_submit(s->fd, offset, qiov, type, s->aio_max_batch);
 #endif
     }
 
@@ -2137,8 +2135,7 @@  static void raw_aio_plug(BlockDriverState *bs)
     BDRVRawState __attribute__((unused)) *s = bs->opaque;
 #ifdef CONFIG_LINUX_AIO
     if (s->use_linux_aio) {
-        LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
-        laio_io_plug(bs, aio);
+        laio_io_plug();
     }
 #endif
 #ifdef CONFIG_LINUX_IO_URING
@@ -2154,8 +2151,7 @@  static void raw_aio_unplug(BlockDriverState *bs)
     BDRVRawState __attribute__((unused)) *s = bs->opaque;
 #ifdef CONFIG_LINUX_AIO
     if (s->use_linux_aio) {
-        LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
-        laio_io_unplug(bs, aio, s->aio_max_batch);
+        laio_io_unplug(s->aio_max_batch);
     }
 #endif
 #ifdef CONFIG_LINUX_IO_URING
diff --git a/block/linux-aio.c b/block/linux-aio.c
index d2cfb7f523..fc50cdd1bf 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -16,6 +16,9 @@ 
 #include "qemu/coroutine.h"
 #include "qapi/error.h"
 
+/* Only used for assertions.  */
+#include "qemu/coroutine_int.h"
+
 #include <libaio.h>
 
 /*
@@ -56,10 +59,8 @@  struct LinuxAioState {
     io_context_t ctx;
     EventNotifier e;
 
-    /* io queue for submit at batch.  Protected by AioContext lock. */
+    /* No locking required, only accessed from AioContext home thread */
     LaioQueue io_q;
-
-    /* I/O completion processing.  Only runs in I/O thread.  */
     QEMUBH *completion_bh;
     int event_idx;
     int event_max;
@@ -102,6 +103,7 @@  static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
      * later.  Coroutines cannot be entered recursively so avoid doing
      * that!
      */
+    assert(laiocb->co->ctx == laiocb->ctx->aio_context);
     if (!qemu_coroutine_entered(laiocb->co)) {
         aio_co_wake(laiocb->co);
     }
@@ -232,13 +234,11 @@  static void qemu_laio_process_completions(LinuxAioState *s)
 
 static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
 {
-    aio_context_acquire(s->aio_context);
     qemu_laio_process_completions(s);
 
     if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
         ioq_submit(s);
     }
-    aio_context_release(s->aio_context);
 }
 
 static void qemu_laio_completion_bh(void *opaque)
@@ -354,14 +354,19 @@  static uint64_t laio_max_batch(LinuxAioState *s, uint64_t dev_max_batch)
     return max_batch;
 }
 
-void laio_io_plug(BlockDriverState *bs, LinuxAioState *s)
+void laio_io_plug(void)
 {
+    AioContext *ctx = qemu_get_current_aio_context();
+    LinuxAioState *s = aio_get_linux_aio(ctx);
+
     s->io_q.plugged++;
 }
 
-void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
-                    uint64_t dev_max_batch)
+void laio_io_unplug(uint64_t dev_max_batch)
 {
+    AioContext *ctx = qemu_get_current_aio_context();
+    LinuxAioState *s = aio_get_linux_aio(ctx);
+
     assert(s->io_q.plugged);
     s->io_q.plugged--;
 
@@ -411,15 +416,15 @@  static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
     return 0;
 }
 
-int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
-                                uint64_t offset, QEMUIOVector *qiov, int type,
-                                uint64_t dev_max_batch)
+int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
+                                int type, uint64_t dev_max_batch)
 {
     int ret;
+    AioContext *ctx = qemu_get_current_aio_context();
     struct qemu_laiocb laiocb = {
         .co         = qemu_coroutine_self(),
         .nbytes     = qiov->size,
-        .ctx        = s,
+        .ctx        = aio_get_linux_aio(ctx),
         .ret        = -EINPROGRESS,
         .is_read    = (type == QEMU_AIO_READ),
         .qiov       = qiov,