[07/36] aio: add delayed cancel support

Message ID	20180305212743.16664-8-hch@lst.de
State	Not Applicable, archived
Delegated to:	David Miller
Headers	show Return-Path: <netdev-owner@vger.kernel.org> From: Christoph Hellwig <hch@lst.de> To: viro@zeniv.linux.org.uk Cc: Avi Kivity <avi@scylladb.com>, linux-aio@kvack.org, linux-fsdevel@vger.kernel.org, netdev@vger.kernel.org, linux-api@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH 07/36] aio: add delayed cancel support Date: Mon, 5 Mar 2018 13:27:14 -0800 Message-Id: <20180305212743.16664-8-hch@lst.de> In-Reply-To: <20180305212743.16664-1-hch@lst.de> References: <20180305212743.16664-1-hch@lst.de> Sender: netdev-owner@vger.kernel.org Precedence: bulk
Series	[01/36] aio: don't print the page size at boot time \| expand [01/36] aio: don't print the page size at boot time [02/36] aio: remove an outdated comment in aio_complete [03/36] aio: refactor read/write iocb setup [04/36] aio: sanitize ki_list handling [05/36] aio: simplify cancellation [06/36] aio: delete iocbs from the active_reqs list in kiocb_cancel [07/36] aio: add delayed cancel support [08/36] aio: implement io_pgetevents [09/36] fs: unexport poll_schedule_timeout [10/36] fs: cleanup do_pollfd [11/36] fs: update documentation for __poll_t [12/36] fs: add new vfs_poll and file_can_poll helpers [13/36] fs: introduce new ->get_poll_head and ->poll_mask methods [14/36] aio: implement IOCB_CMD_POLL [15/36] net: refactor socket_poll [16/36] net: add support for ->poll_mask in proto_ops [17/36] net: remove sock_no_poll [18/36] net/tcp: convert to ->poll_mask [19/36] net/unix: convert to ->poll_mask [20/36] net: convert datagram_poll users tp ->poll_mask [21/36] net/dccp: convert to ->poll_mask [22/36] net/atm: convert to ->poll_mask [23/36] net/vmw_vsock: convert to ->poll_mask [24/36] net/tipc: convert to ->poll_mask [25/36] net/sctp: convert to ->poll_mask [26/36] net/bluetooth: convert to ->poll_mask [27/36] net/caif: convert to ->poll_mask [28/36] net/nfc: convert to ->poll_mask [29/36] net/phonet: convert to ->poll_mask [30/36] net/iucv: convert to ->poll_mask [31/36] net/rxrpc: convert to ->poll_mask [32/36] crypto: af_alg: convert to ->poll_mask [33/36] pipe: convert to ->poll_mask [34/36] eventfd: switch to ->poll_mask [35/36] timerfd: convert to ->poll_mask [36/36] random: convert to ->poll_mask

Message ID

20180305212743.16664-8-hch@lst.de

State

Not Applicable, archived

Delegated to:

David Miller

Headers

From: Christoph Hellwig <hch@lst.de>
To: viro@zeniv.linux.org.uk
Cc: Avi Kivity <avi@scylladb.com>, linux-aio@kvack.org,
	linux-fsdevel@vger.kernel.org, netdev@vger.kernel.org,
	linux-api@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 07/36] aio: add delayed cancel support
Date: Mon,  5 Mar 2018 13:27:14 -0800
Message-Id: <20180305212743.16664-8-hch@lst.de>
In-Reply-To: <20180305212743.16664-1-hch@lst.de>
References: <20180305212743.16664-1-hch@lst.de>
Sender: netdev-owner@vger.kernel.org
Precedence: bulk

Series

[01/36] aio: don't print the page size at boot time | expand

Commit Message

Christoph Hellwig March 5, 2018, 9:27 p.m. UTC

The upcoming aio poll support would like to be able to complete the
iocb inline from the cancellation context, but that would cause
a lock order reversal.  Add support for optionally moving the cancelation
outside the context lock to avoid this reversal.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
---
 fs/aio.c | 49 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 38 insertions(+), 11 deletions(-)

Comments

Darrick Wong March 20, 2018, 3:19 a.m. UTC | #1

On Mon, Mar 05, 2018 at 01:27:14PM -0800, Christoph Hellwig wrote:
> The upcoming aio poll support would like to be able to complete the
> iocb inline from the cancellation context, but that would cause
> a lock order reversal.  Add support for optionally moving the cancelation
> outside the context lock to avoid this reversal.

I started to wonder which lock order reversal the commit message refers
to?

I think the reason for adding delayed cancellations is that we want to
be able to call io_cancel -> kiocb_cancel -> aio_poll_cancel ->
aio_complete without double locking ctx_lock?

--D

> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Acked-by: Jeff Moyer <jmoyer@redhat.com>
> ---
>  fs/aio.c | 49 ++++++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 38 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/aio.c b/fs/aio.c
> index 0b6394b4e528..9d7d6e4cde87 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -170,6 +170,10 @@ struct aio_kiocb {
>  	struct list_head	ki_list;	/* the aio core uses this
>  						 * for cancellation */
>  
> +	unsigned int		flags;		/* protected by ctx->ctx_lock */
> +#define AIO_IOCB_DELAYED_CANCEL	(1 << 0)
> +#define AIO_IOCB_CANCELLED	(1 << 1)
> +
>  	/*
>  	 * If the aio_resfd field of the userspace iocb is not zero,
>  	 * this is the underlying eventfd context to deliver events to.
> @@ -536,9 +540,9 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
>  #define AIO_EVENTS_FIRST_PAGE	((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
>  #define AIO_EVENTS_OFFSET	(AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
>  
> -void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
> +static void __kiocb_set_cancel_fn(struct aio_kiocb *req,
> +		kiocb_cancel_fn *cancel, unsigned int iocb_flags)
>  {
> -	struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
>  	struct kioctx *ctx = req->ki_ctx;
>  	unsigned long flags;
>  
> @@ -548,8 +552,15 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
>  	spin_lock_irqsave(&ctx->ctx_lock, flags);
>  	list_add_tail(&req->ki_list, &ctx->active_reqs);
>  	req->ki_cancel = cancel;
> +	req->flags |= iocb_flags;
>  	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
>  }
> +
> +void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
> +{
> +	return __kiocb_set_cancel_fn(container_of(iocb, struct aio_kiocb, rw),
> +			cancel, 0);
> +}
>  EXPORT_SYMBOL(kiocb_set_cancel_fn);
>  
>  /*
> @@ -603,17 +614,27 @@ static void free_ioctx_users(struct percpu_ref *ref)
>  {
>  	struct kioctx *ctx = container_of(ref, struct kioctx, users);
>  	struct aio_kiocb *req;
> +	LIST_HEAD(list);
>  
>  	spin_lock_irq(&ctx->ctx_lock);
> -
>  	while (!list_empty(&ctx->active_reqs)) {
>  		req = list_first_entry(&ctx->active_reqs,
>  				       struct aio_kiocb, ki_list);
> -		kiocb_cancel(req);
> -	}
>  
> +		if (req->flags & AIO_IOCB_DELAYED_CANCEL) {
> +			req->flags |= AIO_IOCB_CANCELLED;
> +			list_move_tail(&req->ki_list, &list);
> +		} else {
> +			kiocb_cancel(req);
> +		}
> +	}
>  	spin_unlock_irq(&ctx->ctx_lock);
>  
> +	while (!list_empty(&list)) {
> +		req = list_first_entry(&list, struct aio_kiocb, ki_list);
> +		kiocb_cancel(req);
> +	}
> +
>  	percpu_ref_kill(&ctx->reqs);
>  	percpu_ref_put(&ctx->reqs);
>  }
> @@ -1785,15 +1806,22 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
>  	if (unlikely(!ctx))
>  		return -EINVAL;
>  
> -	spin_lock_irq(&ctx->ctx_lock);
> +	ret = -EINVAL;
>  
> +	spin_lock_irq(&ctx->ctx_lock);
>  	kiocb = lookup_kiocb(ctx, iocb, key);
> +	if (kiocb) {
> +		if (kiocb->flags & AIO_IOCB_DELAYED_CANCEL) {
> +			kiocb->flags |= AIO_IOCB_CANCELLED;
> +		} else {
> +			ret = kiocb_cancel(kiocb);
> +			kiocb = NULL;
> +		}
> +	}
> +	spin_unlock_irq(&ctx->ctx_lock);
> +
>  	if (kiocb)
>  		ret = kiocb_cancel(kiocb);
> -	else
> -		ret = -EINVAL;
> -
> -	spin_unlock_irq(&ctx->ctx_lock);
>  
>  	if (!ret) {
>  		/*
> @@ -1805,7 +1833,6 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
>  	}
>  
>  	percpu_ref_put(&ctx->users);
> -
>  	return ret;
>  }
>  
> -- 
> 2.14.2
>

Christoph Hellwig March 20, 2018, 3:20 p.m. UTC | #2

On Mon, Mar 19, 2018 at 08:19:57PM -0700, Darrick J. Wong wrote:
> On Mon, Mar 05, 2018 at 01:27:14PM -0800, Christoph Hellwig wrote:
> > The upcoming aio poll support would like to be able to complete the
> > iocb inline from the cancellation context, but that would cause
> > a lock order reversal.  Add support for optionally moving the cancelation
> > outside the context lock to avoid this reversal.
> 
> I started to wonder which lock order reversal the commit message refers
> to?
> 
> I think the reason for adding delayed cancellations is that we want to
> be able to call io_cancel -> kiocb_cancel -> aio_poll_cancel ->
> aio_complete without double locking ctx_lock?

It is.  I've updated the commit message.

diff --git a/fs/aio.c b/fs/aio.c
index 0b6394b4e528..9d7d6e4cde87 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -170,6 +170,10 @@  struct aio_kiocb {
 	struct list_head	ki_list;	/* the aio core uses this
 						 * for cancellation */
 
+	unsigned int		flags;		/* protected by ctx->ctx_lock */
+#define AIO_IOCB_DELAYED_CANCEL	(1 << 0)
+#define AIO_IOCB_CANCELLED	(1 << 1)
+
 	/*
 	 * If the aio_resfd field of the userspace iocb is not zero,
 	 * this is the underlying eventfd context to deliver events to.
@@ -536,9 +540,9 @@  static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
 #define AIO_EVENTS_FIRST_PAGE	((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
 #define AIO_EVENTS_OFFSET	(AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
 
-void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
+static void __kiocb_set_cancel_fn(struct aio_kiocb *req,
+		kiocb_cancel_fn *cancel, unsigned int iocb_flags)
 {
-	struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
 	struct kioctx *ctx = req->ki_ctx;
 	unsigned long flags;
 
@@ -548,8 +552,15 @@  void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
 	spin_lock_irqsave(&ctx->ctx_lock, flags);
 	list_add_tail(&req->ki_list, &ctx->active_reqs);
 	req->ki_cancel = cancel;
+	req->flags |= iocb_flags;
 	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
 }
+
+void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
+{
+	return __kiocb_set_cancel_fn(container_of(iocb, struct aio_kiocb, rw),
+			cancel, 0);
+}
 EXPORT_SYMBOL(kiocb_set_cancel_fn);
 
 /*
@@ -603,17 +614,27 @@  static void free_ioctx_users(struct percpu_ref *ref)
 {
 	struct kioctx *ctx = container_of(ref, struct kioctx, users);
 	struct aio_kiocb *req;
+	LIST_HEAD(list);
 
 	spin_lock_irq(&ctx->ctx_lock);
-
 	while (!list_empty(&ctx->active_reqs)) {
 		req = list_first_entry(&ctx->active_reqs,
 				       struct aio_kiocb, ki_list);
-		kiocb_cancel(req);
-	}
 
+		if (req->flags & AIO_IOCB_DELAYED_CANCEL) {
+			req->flags |= AIO_IOCB_CANCELLED;
+			list_move_tail(&req->ki_list, &list);
+		} else {
+			kiocb_cancel(req);
+		}
+	}
 	spin_unlock_irq(&ctx->ctx_lock);
 
+	while (!list_empty(&list)) {
+		req = list_first_entry(&list, struct aio_kiocb, ki_list);
+		kiocb_cancel(req);
+	}
+
 	percpu_ref_kill(&ctx->reqs);
 	percpu_ref_put(&ctx->reqs);
 }
@@ -1785,15 +1806,22 @@  SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 	if (unlikely(!ctx))
 		return -EINVAL;
 
-	spin_lock_irq(&ctx->ctx_lock);
+	ret = -EINVAL;
 
+	spin_lock_irq(&ctx->ctx_lock);
 	kiocb = lookup_kiocb(ctx, iocb, key);
+	if (kiocb) {
+		if (kiocb->flags & AIO_IOCB_DELAYED_CANCEL) {
+			kiocb->flags |= AIO_IOCB_CANCELLED;
+		} else {
+			ret = kiocb_cancel(kiocb);
+			kiocb = NULL;
+		}
+	}
+	spin_unlock_irq(&ctx->ctx_lock);
+
 	if (kiocb)
 		ret = kiocb_cancel(kiocb);
-	else
-		ret = -EINVAL;
-
-	spin_unlock_irq(&ctx->ctx_lock);
 
 	if (!ret) {
 		/*
@@ -1805,7 +1833,6 @@  SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 	}
 
 	percpu_ref_put(&ctx->users);
-
 	return ret;
 }

[07/36] aio: add delayed cancel support

Commit Message

Comments

Patch