diff mbox series

[SRU,Jammy,1/1] io_uring: ensure IOPOLL locks around deferred work

Message ID 20230803213711.689308-2-cascardo@canonical.com
State New
Headers show
Series [SRU,Jammy,1/1] io_uring: ensure IOPOLL locks around deferred work | expand

Commit Message

Thadeu Lima de Souza Cascardo Aug. 3, 2023, 9:37 p.m. UTC
From: Jens Axboe <axboe@kernel.dk>

No direct upstream commit exists for this issue. It was fixed in
5.18 as part of a larger rework of the completion side.

io_commit_cqring() writes the CQ ring tail to make it visible, but it
also kicks off any deferred work we have. A ring setup with IOPOLL
does not need any locking around the CQ ring updates, as we're always
under the ctx uring_lock. But if we have deferred work that needs
processing, then io_queue_deferred() assumes that the completion_lock
is held, as it is for !IOPOLL.

Add a lockdep assertion to check and document this fact, and have
io_iopoll_complete() check if we have deferred work and run that
separately with the appropriate lock grabbed.

Cc: stable@vger.kernel.org # 5.10, 5.15
Reported-by: dghost david <daviduniverse18@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit fb348857e7b67eefe365052f1423427b66dedbf3)
CVE-2023-21400
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
---
 io_uring/io_uring.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

Comments

Stefan Bader Aug. 4, 2023, 8:21 a.m. UTC | #1
On 03.08.23 23:37, Thadeu Lima de Souza Cascardo wrote:
> From: Jens Axboe <axboe@kernel.dk>
> 
> No direct upstream commit exists for this issue. It was fixed in
> 5.18 as part of a larger rework of the completion side.
> 
> io_commit_cqring() writes the CQ ring tail to make it visible, but it
> also kicks off any deferred work we have. A ring setup with IOPOLL
> does not need any locking around the CQ ring updates, as we're always
> under the ctx uring_lock. But if we have deferred work that needs
> processing, then io_queue_deferred() assumes that the completion_lock
> is held, as it is for !IOPOLL.
> 
> Add a lockdep assertion to check and document this fact, and have
> io_iopoll_complete() check if we have deferred work and run that
> separately with the appropriate lock grabbed.
> 
> Cc: stable@vger.kernel.org # 5.10, 5.15
> Reported-by: dghost david <daviduniverse18@gmail.com>
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> (cherry picked from commit fb348857e7b67eefe365052f1423427b66dedbf3)

I suspect we should add a linux-5.15.y to the line above...

> CVE-2023-21400
> Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
> ---
>   io_uring/io_uring.c | 25 +++++++++++++++++++++----
>   1 file changed, 21 insertions(+), 4 deletions(-)
> 
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index cefa14592e9f..4cde8aee1c75 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -1523,6 +1523,8 @@ static void io_kill_timeout(struct io_kiocb *req, int status)
>   
>   static void io_queue_deferred(struct io_ring_ctx *ctx)
>   {
> +	lockdep_assert_held(&ctx->completion_lock);
> +
>   	while (!list_empty(&ctx->defer_list)) {
>   		struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
>   						struct io_defer_entry, list);
> @@ -1574,14 +1576,24 @@ static void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
>   		io_queue_deferred(ctx);
>   }
>   
> -static inline void io_commit_cqring(struct io_ring_ctx *ctx)
> +static inline bool io_commit_needs_flush(struct io_ring_ctx *ctx)
> +{
> +	return ctx->off_timeout_used || ctx->drain_active;
> +}
> +
> +static inline void __io_commit_cqring(struct io_ring_ctx *ctx)
>   {
> -	if (unlikely(ctx->off_timeout_used || ctx->drain_active))
> -		__io_commit_cqring_flush(ctx);
>   	/* order cqe stores with ring update */
>   	smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
>   }
>   
> +static inline void io_commit_cqring(struct io_ring_ctx *ctx)
> +{
> +	if (unlikely(io_commit_needs_flush(ctx)))
> +		__io_commit_cqring_flush(ctx);
> +	__io_commit_cqring(ctx);
> +}
> +
>   static inline bool io_sqring_full(struct io_ring_ctx *ctx)
>   {
>   	struct io_rings *r = ctx->rings;
> @@ -2520,7 +2532,12 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
>   			io_req_free_batch(&rb, req, &ctx->submit_state);
>   	}
>   
> -	io_commit_cqring(ctx);
> +	if (io_commit_needs_flush(ctx)) {
> +		spin_lock(&ctx->completion_lock);
> +		__io_commit_cqring_flush(ctx);
> +		spin_unlock(&ctx->completion_lock);
> +	}
> +	__io_commit_cqring(ctx);
>   	io_cqring_ev_posted_iopoll(ctx);
>   	io_req_free_batch_finish(ctx, &rb);
>   }
Thadeu Lima de Souza Cascardo Aug. 4, 2023, 10:29 a.m. UTC | #2
On Fri, Aug 04, 2023 at 10:21:37AM +0200, Stefan Bader wrote:
> On 03.08.23 23:37, Thadeu Lima de Souza Cascardo wrote:
> > From: Jens Axboe <axboe@kernel.dk>
> > 
> > No direct upstream commit exists for this issue. It was fixed in
> > 5.18 as part of a larger rework of the completion side.
> > 
> > io_commit_cqring() writes the CQ ring tail to make it visible, but it
> > also kicks off any deferred work we have. A ring setup with IOPOLL
> > does not need any locking around the CQ ring updates, as we're always
> > under the ctx uring_lock. But if we have deferred work that needs
> > processing, then io_queue_deferred() assumes that the completion_lock
> > is held, as it is for !IOPOLL.
> > 
> > Add a lockdep assertion to check and document this fact, and have
> > io_iopoll_complete() check if we have deferred work and run that
> > separately with the appropriate lock grabbed.
> > 
> > Cc: stable@vger.kernel.org # 5.10, 5.15
> > Reported-by: dghost david <daviduniverse18@gmail.com>
> > Signed-off-by: Jens Axboe <axboe@kernel.dk>
> > Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> > (cherry picked from commit fb348857e7b67eefe365052f1423427b66dedbf3)
> 
> I suspect we should add a linux-5.15.y to the line above...
> 

That's correct. This has come from linux-5.15.y.

Cascardo.

> > CVE-2023-21400
> > Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
> > ---
> >   io_uring/io_uring.c | 25 +++++++++++++++++++++----
> >   1 file changed, 21 insertions(+), 4 deletions(-)
> > 
> > diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> > index cefa14592e9f..4cde8aee1c75 100644
> > --- a/io_uring/io_uring.c
> > +++ b/io_uring/io_uring.c
> > @@ -1523,6 +1523,8 @@ static void io_kill_timeout(struct io_kiocb *req, int status)
> >   static void io_queue_deferred(struct io_ring_ctx *ctx)
> >   {
> > +	lockdep_assert_held(&ctx->completion_lock);
> > +
> >   	while (!list_empty(&ctx->defer_list)) {
> >   		struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
> >   						struct io_defer_entry, list);
> > @@ -1574,14 +1576,24 @@ static void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
> >   		io_queue_deferred(ctx);
> >   }
> > -static inline void io_commit_cqring(struct io_ring_ctx *ctx)
> > +static inline bool io_commit_needs_flush(struct io_ring_ctx *ctx)
> > +{
> > +	return ctx->off_timeout_used || ctx->drain_active;
> > +}
> > +
> > +static inline void __io_commit_cqring(struct io_ring_ctx *ctx)
> >   {
> > -	if (unlikely(ctx->off_timeout_used || ctx->drain_active))
> > -		__io_commit_cqring_flush(ctx);
> >   	/* order cqe stores with ring update */
> >   	smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
> >   }
> > +static inline void io_commit_cqring(struct io_ring_ctx *ctx)
> > +{
> > +	if (unlikely(io_commit_needs_flush(ctx)))
> > +		__io_commit_cqring_flush(ctx);
> > +	__io_commit_cqring(ctx);
> > +}
> > +
> >   static inline bool io_sqring_full(struct io_ring_ctx *ctx)
> >   {
> >   	struct io_rings *r = ctx->rings;
> > @@ -2520,7 +2532,12 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
> >   			io_req_free_batch(&rb, req, &ctx->submit_state);
> >   	}
> > -	io_commit_cqring(ctx);
> > +	if (io_commit_needs_flush(ctx)) {
> > +		spin_lock(&ctx->completion_lock);
> > +		__io_commit_cqring_flush(ctx);
> > +		spin_unlock(&ctx->completion_lock);
> > +	}
> > +	__io_commit_cqring(ctx);
> >   	io_cqring_ev_posted_iopoll(ctx);
> >   	io_req_free_batch_finish(ctx, &rb);
> >   }
> 
> -- 
> - Stefan
>
diff mbox series

Patch

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index cefa14592e9f..4cde8aee1c75 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1523,6 +1523,8 @@  static void io_kill_timeout(struct io_kiocb *req, int status)
 
 static void io_queue_deferred(struct io_ring_ctx *ctx)
 {
+	lockdep_assert_held(&ctx->completion_lock);
+
 	while (!list_empty(&ctx->defer_list)) {
 		struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
 						struct io_defer_entry, list);
@@ -1574,14 +1576,24 @@  static void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
 		io_queue_deferred(ctx);
 }
 
-static inline void io_commit_cqring(struct io_ring_ctx *ctx)
+static inline bool io_commit_needs_flush(struct io_ring_ctx *ctx)
+{
+	return ctx->off_timeout_used || ctx->drain_active;
+}
+
+static inline void __io_commit_cqring(struct io_ring_ctx *ctx)
 {
-	if (unlikely(ctx->off_timeout_used || ctx->drain_active))
-		__io_commit_cqring_flush(ctx);
 	/* order cqe stores with ring update */
 	smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
 }
 
+static inline void io_commit_cqring(struct io_ring_ctx *ctx)
+{
+	if (unlikely(io_commit_needs_flush(ctx)))
+		__io_commit_cqring_flush(ctx);
+	__io_commit_cqring(ctx);
+}
+
 static inline bool io_sqring_full(struct io_ring_ctx *ctx)
 {
 	struct io_rings *r = ctx->rings;
@@ -2520,7 +2532,12 @@  static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
 			io_req_free_batch(&rb, req, &ctx->submit_state);
 	}
 
-	io_commit_cqring(ctx);
+	if (io_commit_needs_flush(ctx)) {
+		spin_lock(&ctx->completion_lock);
+		__io_commit_cqring_flush(ctx);
+		spin_unlock(&ctx->completion_lock);
+	}
+	__io_commit_cqring(ctx);
 	io_cqring_ev_posted_iopoll(ctx);
 	io_req_free_batch_finish(ctx, &rb);
 }