From patchwork Sun Mar 10 07:08:15 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 1053911 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=ZenIV.linux.org.uk Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 44HC5w0ptJz9sBp for ; Sun, 10 Mar 2019 18:09:24 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726387AbfCJHJS (ORCPT ); Sun, 10 Mar 2019 03:09:18 -0400 Received: from zeniv.linux.org.uk ([195.92.253.2]:35358 "EHLO ZenIV.linux.org.uk" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725777AbfCJHJR (ORCPT ); Sun, 10 Mar 2019 03:09:17 -0400 Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92 #3 (Red Hat Linux)) id 1h2sZK-000318-AA; Sun, 10 Mar 2019 07:08:29 +0000 From: Al Viro To: Linus Torvalds Cc: Eric Dumazet , David Miller , Jason Baron , kgraul@linux.ibm.com, ktkhai@virtuozzo.com, kyeongdon.kim@lge.com, Linux List Kernel Mailing , Netdev , pabeni@redhat.com, syzkaller-bugs@googlegroups.com, xiyou.wangcong@gmail.com, Christoph Hellwig , zhengbin , bcrl@kvack.org, linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, houtao1@huawei.com, yi.zhang@huawei.com Subject: [PATCH 1/8] pin iocb through aio. Date: Sun, 10 Mar 2019 07:08:15 +0000 Message-Id: <20190310070822.11564-1-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190310070606.GA10138@ZenIV.linux.org.uk> References: <20190310070606.GA10138@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Linus Torvalds aio_poll() is not the only case that needs file pinned; worse, while aio_read()/aio_write() can live without pinning iocb itself, the proof is rather brittle and can easily break on later changes. Signed-off-by: Linus Torvalds Signed-off-by: Al Viro Reviewed-by: Christoph Hellwig --- fs/aio.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 3d9669d011b9..363d7d7c8bff 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1022,6 +1022,9 @@ static bool get_reqs_available(struct kioctx *ctx) /* aio_get_req * Allocate a slot for an aio request. * Returns NULL if no requests are free. + * + * The refcount is initialized to 2 - one for the async op completion, + * one for the synchronous code that does this. */ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) { @@ -1034,7 +1037,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) percpu_ref_get(&ctx->reqs); req->ki_ctx = ctx; INIT_LIST_HEAD(&req->ki_list); - refcount_set(&req->ki_refcnt, 0); + refcount_set(&req->ki_refcnt, 2); req->ki_eventfd = NULL; return req; } @@ -1067,15 +1070,18 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) return ret; } +static inline void iocb_destroy(struct aio_kiocb *iocb) +{ + if (iocb->ki_filp) + fput(iocb->ki_filp); + percpu_ref_put(&iocb->ki_ctx->reqs); + kmem_cache_free(kiocb_cachep, iocb); +} + static inline void iocb_put(struct aio_kiocb *iocb) { - if (refcount_read(&iocb->ki_refcnt) == 0 || - refcount_dec_and_test(&iocb->ki_refcnt)) { - if (iocb->ki_filp) - fput(iocb->ki_filp); - percpu_ref_put(&iocb->ki_ctx->reqs); - kmem_cache_free(kiocb_cachep, iocb); - } + if (refcount_dec_and_test(&iocb->ki_refcnt)) + iocb_destroy(iocb); } static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb, @@ -1749,9 +1755,6 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) INIT_LIST_HEAD(&req->wait.entry); init_waitqueue_func_entry(&req->wait, aio_poll_wake); - /* one for removal from waitqueue, one for this function */ - refcount_set(&aiocb->ki_refcnt, 2); - mask = vfs_poll(req->file, &apt.pt) & req->events; if (unlikely(!req->head)) { /* we did not manage to set up a waitqueue, done */ @@ -1782,7 +1785,6 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) if (mask) aio_poll_complete(aiocb, mask); - iocb_put(aiocb); return 0; } @@ -1873,18 +1875,21 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb, break; } + /* Done with the synchronous reference */ + iocb_put(req); + /* * If ret is 0, we'd either done aio_complete() ourselves or have * arranged for that to be done asynchronously. Anything non-zero * means that we need to destroy req ourselves. */ - if (ret) - goto out_put_req; - return 0; + if (!ret) + return 0; + out_put_req: if (req->ki_eventfd) eventfd_ctx_put(req->ki_eventfd); - iocb_put(req); + iocb_destroy(req); out_put_reqs_available: put_reqs_available(ctx, 1); return ret; From patchwork Sun Mar 10 07:08:16 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 1053912 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=ZenIV.linux.org.uk Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 44HC6W3zFtz9sBp for ; Sun, 10 Mar 2019 18:09:55 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726438AbfCJHJu (ORCPT ); Sun, 10 Mar 2019 03:09:50 -0400 Received: from zeniv.linux.org.uk ([195.92.253.2]:35382 "EHLO ZenIV.linux.org.uk" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725798AbfCJHJu (ORCPT ); Sun, 10 Mar 2019 03:09:50 -0400 Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92 #3 (Red Hat Linux)) id 1h2sZj-00033E-Tc; Sun, 10 Mar 2019 07:08:59 +0000 From: Al Viro To: Linus Torvalds Cc: Eric Dumazet , David Miller , Jason Baron , kgraul@linux.ibm.com, ktkhai@virtuozzo.com, kyeongdon.kim@lge.com, Linux List Kernel Mailing , Netdev , pabeni@redhat.com, syzkaller-bugs@googlegroups.com, xiyou.wangcong@gmail.com, Christoph Hellwig , zhengbin , bcrl@kvack.org, linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, houtao1@huawei.com, yi.zhang@huawei.com Subject: [PATCH 2/8] keep io_event in aio_kiocb Date: Sun, 10 Mar 2019 07:08:16 +0000 Message-Id: <20190310070822.11564-2-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190310070822.11564-1-viro@ZenIV.linux.org.uk> References: <20190310070606.GA10138@ZenIV.linux.org.uk> <20190310070822.11564-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Al Viro Signed-off-by: Al Viro --- fs/aio.c | 56 +++++++++++++++++++++----------------------------------- 1 file changed, 21 insertions(+), 35 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 363d7d7c8bff..2249a7a1d6b3 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -204,8 +204,7 @@ struct aio_kiocb { struct kioctx *ki_ctx; kiocb_cancel_fn *ki_cancel; - struct iocb __user *ki_user_iocb; /* user's aiocb */ - __u64 ki_user_data; /* user's data for completion */ + struct io_event ki_res; struct list_head ki_list; /* the aio core uses this * for cancellation */ @@ -1087,10 +1086,9 @@ static inline void iocb_put(struct aio_kiocb *iocb) static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb, long res, long res2) { - ev->obj = (u64)(unsigned long)iocb->ki_user_iocb; - ev->data = iocb->ki_user_data; - ev->res = res; - ev->res2 = res2; + iocb->ki_res.res = res; + iocb->ki_res.res2 = res2; + *ev = iocb->ki_res; } /* aio_complete @@ -1126,7 +1124,7 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2) flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n", - ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data, + ctx, tail, iocb, (void __user *)iocb->ki_res.obj, iocb->ki_res.data, res, res2); /* after flagging the request as done, we @@ -1674,13 +1672,13 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, __poll_t mask = key_to_poll(key); unsigned long flags; + /* for instances that support it check for an event match first: */ + if (mask && !(mask & req->events)) + return 0; + req->woken = true; - /* for instances that support it check for an event match first: */ if (mask) { - if (!(mask & req->events)) - return 0; - /* * Try to complete the iocb inline if we can. Use * irqsave/irqrestore because not all filesystems (e.g. fuse) @@ -1844,8 +1842,10 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb, goto out_put_req; } - req->ki_user_iocb = user_iocb; - req->ki_user_data = iocb->aio_data; + req->ki_res.obj = (u64)(unsigned long)user_iocb; + req->ki_res.data = iocb->aio_data; + req->ki_res.res = 0; + req->ki_res.res2 = 0; switch (iocb->aio_lio_opcode) { case IOCB_CMD_PREAD: @@ -2002,24 +2002,6 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, } #endif -/* lookup_kiocb - * Finds a given iocb for cancellation. - */ -static struct aio_kiocb * -lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb) -{ - struct aio_kiocb *kiocb; - - assert_spin_locked(&ctx->ctx_lock); - - /* TODO: use a hash or array, this sucks. */ - list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) { - if (kiocb->ki_user_iocb == iocb) - return kiocb; - } - return NULL; -} - /* sys_io_cancel: * Attempts to cancel an iocb previously passed to io_submit. If * the operation is successfully cancelled, the resulting event is @@ -2037,6 +2019,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct aio_kiocb *kiocb; int ret = -EINVAL; u32 key; + u64 obj = (u64)(unsigned long)iocb; if (unlikely(get_user(key, &iocb->aio_key))) return -EFAULT; @@ -2048,10 +2031,13 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, return -EINVAL; spin_lock_irq(&ctx->ctx_lock); - kiocb = lookup_kiocb(ctx, iocb); - if (kiocb) { - ret = kiocb->ki_cancel(&kiocb->rw); - list_del_init(&kiocb->ki_list); + /* TODO: use a hash or array, this sucks. */ + list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) { + if (kiocb->ki_res.obj == obj) { + ret = kiocb->ki_cancel(&kiocb->rw); + list_del_init(&kiocb->ki_list); + break; + } } spin_unlock_irq(&ctx->ctx_lock); From patchwork Sun Mar 10 07:08:17 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 1053913 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=ZenIV.linux.org.uk Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 44HC736SHcz9s9h for ; Sun, 10 Mar 2019 18:10:23 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726496AbfCJHKS (ORCPT ); Sun, 10 Mar 2019 03:10:18 -0400 Received: from zeniv.linux.org.uk ([195.92.253.2]:35416 "EHLO ZenIV.linux.org.uk" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725852AbfCJHKS (ORCPT ); Sun, 10 Mar 2019 03:10:18 -0400 Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92 #3 (Red Hat Linux)) id 1h2sa6-000354-Hz; Sun, 10 Mar 2019 07:09:21 +0000 From: Al Viro To: Linus Torvalds Cc: Eric Dumazet , David Miller , Jason Baron , kgraul@linux.ibm.com, ktkhai@virtuozzo.com, kyeongdon.kim@lge.com, Linux List Kernel Mailing , Netdev , pabeni@redhat.com, syzkaller-bugs@googlegroups.com, xiyou.wangcong@gmail.com, Christoph Hellwig , zhengbin , bcrl@kvack.org, linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, houtao1@huawei.com, yi.zhang@huawei.com Subject: [PATCH 3/8] aio: store event at final iocb_put() Date: Sun, 10 Mar 2019 07:08:17 +0000 Message-Id: <20190310070822.11564-3-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190310070822.11564-1-viro@ZenIV.linux.org.uk> References: <20190310070606.GA10138@ZenIV.linux.org.uk> <20190310070822.11564-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Al Viro Instead of having aio_complete() set ->ki_res.{res,res2}, do that explicitly in its callers, drop the reference (as aio_complete() used to do) and delay the rest until the final iocb_put(). Signed-off-by: Al Viro --- fs/aio.c | 45 ++++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 2249a7a1d6b3..b9c4c1894020 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1077,24 +1077,10 @@ static inline void iocb_destroy(struct aio_kiocb *iocb) kmem_cache_free(kiocb_cachep, iocb); } -static inline void iocb_put(struct aio_kiocb *iocb) -{ - if (refcount_dec_and_test(&iocb->ki_refcnt)) - iocb_destroy(iocb); -} - -static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb, - long res, long res2) -{ - iocb->ki_res.res = res; - iocb->ki_res.res2 = res2; - *ev = iocb->ki_res; -} - /* aio_complete * Called when the io request on the given iocb is complete. */ -static void aio_complete(struct aio_kiocb *iocb, long res, long res2) +static void aio_complete(struct aio_kiocb *iocb) { struct kioctx *ctx = iocb->ki_ctx; struct aio_ring *ring; @@ -1118,14 +1104,14 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2) ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); event = ev_page + pos % AIO_EVENTS_PER_PAGE; - aio_fill_event(event, iocb, res, res2); + *event = iocb->ki_res; kunmap_atomic(ev_page); flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); - pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n", + pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb, (void __user *)iocb->ki_res.obj, iocb->ki_res.data, - res, res2); + iocb->ki_res.res, iocb->ki_res.res2); /* after flagging the request as done, we * must never even look at it again @@ -1167,7 +1153,14 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2) if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); - iocb_put(iocb); +} + +static inline void iocb_put(struct aio_kiocb *iocb) +{ + if (refcount_dec_and_test(&iocb->ki_refcnt)) { + aio_complete(iocb); + iocb_destroy(iocb); + } } /* aio_read_events_ring @@ -1441,7 +1434,9 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2) file_end_write(kiocb->ki_filp); } - aio_complete(iocb, res, res2); + iocb->ki_res.res = res; + iocb->ki_res.res2 = res2; + iocb_put(iocb); } static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) @@ -1589,11 +1584,10 @@ static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb, static void aio_fsync_work(struct work_struct *work) { - struct fsync_iocb *req = container_of(work, struct fsync_iocb, work); - int ret; + struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work); - ret = vfs_fsync(req->file, req->datasync); - aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0); + iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync); + iocb_put(iocb); } static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb, @@ -1614,7 +1608,8 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb, static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask) { - aio_complete(iocb, mangle_poll(mask), 0); + iocb->ki_res.res = mangle_poll(mask); + iocb_put(iocb); } static void aio_poll_complete_work(struct work_struct *work) From patchwork Sun Mar 10 07:08:18 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 1053914 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=ZenIV.linux.org.uk Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 44HC7Y2g8Sz9sBp for ; Sun, 10 Mar 2019 18:10:49 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726527AbfCJHKn (ORCPT ); Sun, 10 Mar 2019 03:10:43 -0400 Received: from zeniv.linux.org.uk ([195.92.253.2]:35442 "EHLO ZenIV.linux.org.uk" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725852AbfCJHKn (ORCPT ); Sun, 10 Mar 2019 03:10:43 -0400 Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92 #3 (Red Hat Linux)) id 1h2sai-00037R-4R; Sun, 10 Mar 2019 07:09:59 +0000 From: Al Viro To: Linus Torvalds Cc: Eric Dumazet , David Miller , Jason Baron , kgraul@linux.ibm.com, ktkhai@virtuozzo.com, kyeongdon.kim@lge.com, Linux List Kernel Mailing , Netdev , pabeni@redhat.com, syzkaller-bugs@googlegroups.com, xiyou.wangcong@gmail.com, Christoph Hellwig , zhengbin , bcrl@kvack.org, linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, houtao1@huawei.com, yi.zhang@huawei.com Subject: [PATCH 4/8] Fix aio_poll() races Date: Sun, 10 Mar 2019 07:08:18 +0000 Message-Id: <20190310070822.11564-4-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190310070822.11564-1-viro@ZenIV.linux.org.uk> References: <20190310070606.GA10138@ZenIV.linux.org.uk> <20190310070822.11564-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Al Viro aio_poll() has to cope with several unpleasant problems: * requests that might stay around indefinitely need to be made visible for io_cancel(2); that must not be done to a request already completed, though. * in cases when ->poll() has placed us on a waitqueue, wakeup might have happened (and request completed) before ->poll() returns. * worse, in some early wakeup cases request might end up re-added into the queue later - we can't treat "woken up and currently not in the queue" as "it's not going to stick around indefinitely" * ... moreover, ->poll() might have decided not to put it on any queues to start with, and that needs to be distinguished from the previous case * ->poll() might have tried to put us on more than one queue. Only the first will succeed for aio poll, so we might end up missing wakeups. OTOH, we might very well notice that only after the wakeup hits and request gets completed (all before ->poll() gets around to the second poll_wait()). In that case it's too late to decide that we have an error. req->woken was an attempt to deal with that. Unfortunately, it was broken. What we need to keep track of is not that wakeup has happened - the thing might come back after that. It's that async reference is already gone and won't come back, so we can't (and needn't) put the request on the list of cancellables. The easiest case is "request hadn't been put on any waitqueues"; we can tell by seeing NULL apt.head, and in that case there won't be anything async. We should either complete the request ourselves (if vfs_poll() reports anything of interest) or return an error. In all other cases we get exclusion with wakeups by grabbing the queue lock. If request is currently on queue and we have something interesting from vfs_poll(), we can steal it and complete the request ourselves. If it's on queue and vfs_poll() has not reported anything interesting, we either put it on the cancellable list, or, if we know that it hadn't been put on all queues ->poll() wanted it on, we steal it and return an error. If it's _not_ on queue, it's either been already dealt with (in which case we do nothing), or there's aio_poll_complete_work() about to be executed. In that case we either put it on the cancellable list, or, if we know it hadn't been put on all queues ->poll() wanted it on, simulate what cancel would've done. It's a lot more convoluted than I'd like it to be. Single-consumer APIs suck, and unfortunately aio is not an exception... Signed-off-by: Al Viro --- fs/aio.c | 71 +++++++++++++++++++++++++++++----------------------------------- 1 file changed, 32 insertions(+), 39 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index b9c4c1894020..f47a29f7f201 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -181,7 +181,7 @@ struct poll_iocb { struct file *file; struct wait_queue_head *head; __poll_t events; - bool woken; + bool done; bool cancelled; struct wait_queue_entry wait; struct work_struct work; @@ -1606,12 +1606,6 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb, return 0; } -static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask) -{ - iocb->ki_res.res = mangle_poll(mask); - iocb_put(iocb); -} - static void aio_poll_complete_work(struct work_struct *work) { struct poll_iocb *req = container_of(work, struct poll_iocb, work); @@ -1637,9 +1631,11 @@ static void aio_poll_complete_work(struct work_struct *work) return; } list_del_init(&iocb->ki_list); + iocb->ki_res.res = mangle_poll(mask); + req->done = true; spin_unlock_irq(&ctx->ctx_lock); - aio_poll_complete(iocb, mask); + iocb_put(iocb); } /* assumes we are called with irqs disabled */ @@ -1671,7 +1667,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, if (mask && !(mask & req->events)) return 0; - req->woken = true; + list_del_init(&req->wait.entry); if (mask) { /* @@ -1682,15 +1678,14 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, */ if (spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { list_del(&iocb->ki_list); + iocb->ki_res.res = mangle_poll(mask); + req->done = true; spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags); - - list_del_init(&req->wait.entry); - aio_poll_complete(iocb, mask); + iocb_put(iocb); return 1; } } - list_del_init(&req->wait.entry); schedule_work(&req->work); return 1; } @@ -1723,6 +1718,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) struct kioctx *ctx = aiocb->ki_ctx; struct poll_iocb *req = &aiocb->poll; struct aio_poll_table apt; + bool cancel = false; __poll_t mask; /* reject any unknown events outside the normal event mask. */ @@ -1736,7 +1732,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP; req->head = NULL; - req->woken = false; + req->done = false; req->cancelled = false; apt.pt._qproc = aio_poll_queue_proc; @@ -1749,36 +1745,33 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) init_waitqueue_func_entry(&req->wait, aio_poll_wake); mask = vfs_poll(req->file, &apt.pt) & req->events; - if (unlikely(!req->head)) { - /* we did not manage to set up a waitqueue, done */ - goto out; - } - spin_lock_irq(&ctx->ctx_lock); - spin_lock(&req->head->lock); - if (req->woken) { - /* wake_up context handles the rest */ - mask = 0; + if (likely(req->head)) { + spin_lock(&req->head->lock); + if (unlikely(list_empty(&req->wait.entry))) { + if (apt.error) + cancel = true; + apt.error = 0; + mask = 0; + } + if (mask || apt.error) { + list_del_init(&req->wait.entry); + } else if (cancel) { + WRITE_ONCE(req->cancelled, true); + } else if (!req->done) { /* actually waiting for an event */ + list_add_tail(&aiocb->ki_list, &ctx->active_reqs); + aiocb->ki_cancel = aio_poll_cancel; + } + spin_unlock(&req->head->lock); + } + if (mask) { /* no async, we'd stolen it */ + aiocb->ki_res.res = mangle_poll(mask); apt.error = 0; - } else if (mask || apt.error) { - /* if we get an error or a mask we are done */ - WARN_ON_ONCE(list_empty(&req->wait.entry)); - list_del_init(&req->wait.entry); - } else { - /* actually waiting for an event */ - list_add_tail(&aiocb->ki_list, &ctx->active_reqs); - aiocb->ki_cancel = aio_poll_cancel; } - spin_unlock(&req->head->lock); spin_unlock_irq(&ctx->ctx_lock); - -out: - if (unlikely(apt.error)) - return apt.error; - if (mask) - aio_poll_complete(aiocb, mask); - return 0; + iocb_put(aiocb); + return apt.error; } static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb, From patchwork Sun Mar 10 07:08:19 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 1053915 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=ZenIV.linux.org.uk Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 44HC850BRfz9sBp for ; Sun, 10 Mar 2019 18:11:17 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726544AbfCJHLL (ORCPT ); Sun, 10 Mar 2019 03:11:11 -0400 Received: from zeniv.linux.org.uk ([195.92.253.2]:35478 "EHLO ZenIV.linux.org.uk" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725796AbfCJHLL (ORCPT ); Sun, 10 Mar 2019 03:11:11 -0400 Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92 #3 (Red Hat Linux)) id 1h2sb7-00039G-1i; Sun, 10 Mar 2019 07:10:19 +0000 From: Al Viro To: Linus Torvalds Cc: Eric Dumazet , David Miller , Jason Baron , kgraul@linux.ibm.com, ktkhai@virtuozzo.com, kyeongdon.kim@lge.com, Linux List Kernel Mailing , Netdev , pabeni@redhat.com, syzkaller-bugs@googlegroups.com, xiyou.wangcong@gmail.com, Christoph Hellwig , zhengbin , bcrl@kvack.org, linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, houtao1@huawei.com, yi.zhang@huawei.com Subject: [PATCH 5/8] make aio_read()/aio_write() return int Date: Sun, 10 Mar 2019 07:08:19 +0000 Message-Id: <20190310070822.11564-5-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190310070822.11564-1-viro@ZenIV.linux.org.uk> References: <20190310070606.GA10138@ZenIV.linux.org.uk> <20190310070822.11564-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Al Viro that ssize_t is a rudiment of earlier calling conventions; it's been used only to pass 0 and -E... since last autumn. Signed-off-by: Al Viro Reviewed-by: Christoph Hellwig --- fs/aio.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index f47a29f7f201..f9e8f1edfe36 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1513,13 +1513,13 @@ static inline void aio_rw_done(struct kiocb *req, ssize_t ret) } } -static ssize_t aio_read(struct kiocb *req, const struct iocb *iocb, +static int aio_read(struct kiocb *req, const struct iocb *iocb, bool vectored, bool compat) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iov_iter iter; struct file *file; - ssize_t ret; + int ret; ret = aio_prep_rw(req, iocb); if (ret) @@ -1541,13 +1541,13 @@ static ssize_t aio_read(struct kiocb *req, const struct iocb *iocb, return ret; } -static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb, +static int aio_write(struct kiocb *req, const struct iocb *iocb, bool vectored, bool compat) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iov_iter iter; struct file *file; - ssize_t ret; + int ret; ret = aio_prep_rw(req, iocb); if (ret) @@ -1713,7 +1713,7 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head, add_wait_queue(head, &pt->iocb->poll.wait); } -static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) +static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) { struct kioctx *ctx = aiocb->ki_ctx; struct poll_iocb *req = &aiocb->poll; @@ -1778,7 +1778,7 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb, struct iocb __user *user_iocb, bool compat) { struct aio_kiocb *req; - ssize_t ret; + int ret; /* enforce forwards compatibility on users */ if (unlikely(iocb->aio_reserved2)) { From patchwork Sun Mar 10 07:08:20 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 1053916 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=ZenIV.linux.org.uk Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 44HC8k4WhLz9s9h for ; Sun, 10 Mar 2019 18:11:50 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726446AbfCJHLp (ORCPT ); Sun, 10 Mar 2019 03:11:45 -0400 Received: from zeniv.linux.org.uk ([195.92.253.2]:35512 "EHLO ZenIV.linux.org.uk" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725796AbfCJHLp (ORCPT ); Sun, 10 Mar 2019 03:11:45 -0400 Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92 #3 (Red Hat Linux)) id 1h2sbY-0003BI-O1; Sun, 10 Mar 2019 07:10:49 +0000 From: Al Viro To: Linus Torvalds Cc: Eric Dumazet , David Miller , Jason Baron , kgraul@linux.ibm.com, ktkhai@virtuozzo.com, kyeongdon.kim@lge.com, Linux List Kernel Mailing , Netdev , pabeni@redhat.com, syzkaller-bugs@googlegroups.com, xiyou.wangcong@gmail.com, Christoph Hellwig , zhengbin , bcrl@kvack.org, linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, houtao1@huawei.com, yi.zhang@huawei.com Subject: [PATCH 6/8] move dropping ->ki_eventfd into iocb_destroy() Date: Sun, 10 Mar 2019 07:08:20 +0000 Message-Id: <20190310070822.11564-6-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190310070822.11564-1-viro@ZenIV.linux.org.uk> References: <20190310070606.GA10138@ZenIV.linux.org.uk> <20190310070822.11564-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Al Viro Signed-off-by: Al Viro --- fs/aio.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index f9e8f1edfe36..595c19965a8b 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1071,6 +1071,8 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) static inline void iocb_destroy(struct aio_kiocb *iocb) { + if (iocb->ki_eventfd) + eventfd_ctx_put(iocb->ki_eventfd); if (iocb->ki_filp) fput(iocb->ki_filp); percpu_ref_put(&iocb->ki_ctx->reqs); @@ -1138,10 +1140,8 @@ static void aio_complete(struct aio_kiocb *iocb) * eventfd. The eventfd_signal() function is safe to be called * from IRQ context. */ - if (iocb->ki_eventfd) { + if (iocb->ki_eventfd) eventfd_signal(iocb->ki_eventfd, 1); - eventfd_ctx_put(iocb->ki_eventfd); - } /* * We have to order our ring_info tail store above and test @@ -1810,18 +1810,19 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb, goto out_put_req; if (iocb->aio_flags & IOCB_FLAG_RESFD) { + struct eventfd_ctx *eventfd; /* * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an * instance of the file* now. The file descriptor must be * an eventfd() fd, and will be signaled for each completed * event using the eventfd_signal() function. */ - req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd); - if (IS_ERR(req->ki_eventfd)) { - ret = PTR_ERR(req->ki_eventfd); - req->ki_eventfd = NULL; + eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd); + if (IS_ERR(eventfd)) { + ret = PTR_ERR(eventfd); goto out_put_req; } + req->ki_eventfd = eventfd; } ret = put_user(KIOCB_KEY, &user_iocb->aio_key); @@ -1875,8 +1876,6 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb, return 0; out_put_req: - if (req->ki_eventfd) - eventfd_ctx_put(req->ki_eventfd); iocb_destroy(req); out_put_reqs_available: put_reqs_available(ctx, 1); From patchwork Sun Mar 10 07:08:21 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 1053917 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=ZenIV.linux.org.uk Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 44HC964297z9s9h for ; Sun, 10 Mar 2019 18:12:10 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726578AbfCJHMF (ORCPT ); Sun, 10 Mar 2019 03:12:05 -0400 Received: from zeniv.linux.org.uk ([195.92.253.2]:35534 "EHLO ZenIV.linux.org.uk" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726364AbfCJHMF (ORCPT ); Sun, 10 Mar 2019 03:12:05 -0400 Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92 #3 (Red Hat Linux)) id 1h2sc1-0003D7-Dq; Sun, 10 Mar 2019 07:11:17 +0000 From: Al Viro To: Linus Torvalds Cc: Eric Dumazet , David Miller , Jason Baron , kgraul@linux.ibm.com, ktkhai@virtuozzo.com, kyeongdon.kim@lge.com, Linux List Kernel Mailing , Netdev , pabeni@redhat.com, syzkaller-bugs@googlegroups.com, xiyou.wangcong@gmail.com, Christoph Hellwig , zhengbin , bcrl@kvack.org, linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, houtao1@huawei.com, yi.zhang@huawei.com Subject: [PATCH 7/8] deal with get_reqs_available() in aio_get_req() itself Date: Sun, 10 Mar 2019 07:08:21 +0000 Message-Id: <20190310070822.11564-7-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190310070822.11564-1-viro@ZenIV.linux.org.uk> References: <20190310070606.GA10138@ZenIV.linux.org.uk> <20190310070822.11564-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Al Viro simplifies the caller Signed-off-by: Al Viro Reviewed-by: Christoph Hellwig --- fs/aio.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 595c19965a8b..66ca52c57cca 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1033,6 +1033,11 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) if (unlikely(!req)) return NULL; + if (unlikely(!get_reqs_available(ctx))) { + kfree(req); + return NULL; + } + percpu_ref_get(&ctx->reqs); req->ki_ctx = ctx; INIT_LIST_HEAD(&req->ki_list); @@ -1796,13 +1801,9 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb, return -EINVAL; } - if (!get_reqs_available(ctx)) - return -EAGAIN; - - ret = -EAGAIN; req = aio_get_req(ctx); if (unlikely(!req)) - goto out_put_reqs_available; + return -EAGAIN; req->ki_filp = fget(iocb->aio_fildes); ret = -EBADF; @@ -1877,7 +1878,6 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb, out_put_req: iocb_destroy(req); -out_put_reqs_available: put_reqs_available(ctx, 1); return ret; } From patchwork Sun Mar 10 07:08:22 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 1053918 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=ZenIV.linux.org.uk Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 44HC9g0pD1z9s9h for ; Sun, 10 Mar 2019 18:12:39 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726596AbfCJHMe (ORCPT ); Sun, 10 Mar 2019 03:12:34 -0400 Received: from zeniv.linux.org.uk ([195.92.253.2]:35566 "EHLO ZenIV.linux.org.uk" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725865AbfCJHMd (ORCPT ); Sun, 10 Mar 2019 03:12:33 -0400 Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92 #3 (Red Hat Linux)) id 1h2scS-0003Ex-U4; Sun, 10 Mar 2019 07:11:45 +0000 From: Al Viro To: Linus Torvalds Cc: Eric Dumazet , David Miller , Jason Baron , kgraul@linux.ibm.com, ktkhai@virtuozzo.com, kyeongdon.kim@lge.com, Linux List Kernel Mailing , Netdev , pabeni@redhat.com, syzkaller-bugs@googlegroups.com, xiyou.wangcong@gmail.com, Christoph Hellwig , zhengbin , bcrl@kvack.org, linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, houtao1@huawei.com, yi.zhang@huawei.com Subject: [PATCH 8/8] aio: move sanity checks and request allocation to io_submit_one() Date: Sun, 10 Mar 2019 07:08:22 +0000 Message-Id: <20190310070822.11564-8-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190310070822.11564-1-viro@ZenIV.linux.org.uk> References: <20190310070606.GA10138@ZenIV.linux.org.uk> <20190310070822.11564-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Al Viro makes for somewhat cleaner control flow in __io_submit_one() Signed-off-by: Al Viro --- fs/aio.c | 119 ++++++++++++++++++++++++++++----------------------------------- 1 file changed, 53 insertions(+), 66 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 66ca52c57cca..2d1db52b1268 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1780,35 +1780,12 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) } static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb, - struct iocb __user *user_iocb, bool compat) + struct iocb __user *user_iocb, struct aio_kiocb *req, + bool compat) { - struct aio_kiocb *req; - int ret; - - /* enforce forwards compatibility on users */ - if (unlikely(iocb->aio_reserved2)) { - pr_debug("EINVAL: reserve field set\n"); - return -EINVAL; - } - - /* prevent overflows */ - if (unlikely( - (iocb->aio_buf != (unsigned long)iocb->aio_buf) || - (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) || - ((ssize_t)iocb->aio_nbytes < 0) - )) { - pr_debug("EINVAL: overflow check\n"); - return -EINVAL; - } - - req = aio_get_req(ctx); - if (unlikely(!req)) - return -EAGAIN; - req->ki_filp = fget(iocb->aio_fildes); - ret = -EBADF; if (unlikely(!req->ki_filp)) - goto out_put_req; + return -EBADF; if (iocb->aio_flags & IOCB_FLAG_RESFD) { struct eventfd_ctx *eventfd; @@ -1819,17 +1796,15 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb, * event using the eventfd_signal() function. */ eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd); - if (IS_ERR(eventfd)) { - ret = PTR_ERR(eventfd); - goto out_put_req; - } + if (IS_ERR(eventfd)) + return PTR_ERR(req->ki_eventfd); + req->ki_eventfd = eventfd; } - ret = put_user(KIOCB_KEY, &user_iocb->aio_key); - if (unlikely(ret)) { + if (unlikely(put_user(KIOCB_KEY, &user_iocb->aio_key))) { pr_debug("EFAULT: aio_key\n"); - goto out_put_req; + return -EFAULT; } req->ki_res.obj = (u64)(unsigned long)user_iocb; @@ -1839,58 +1814,70 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb, switch (iocb->aio_lio_opcode) { case IOCB_CMD_PREAD: - ret = aio_read(&req->rw, iocb, false, compat); - break; + return aio_read(&req->rw, iocb, false, compat); case IOCB_CMD_PWRITE: - ret = aio_write(&req->rw, iocb, false, compat); - break; + return aio_write(&req->rw, iocb, false, compat); case IOCB_CMD_PREADV: - ret = aio_read(&req->rw, iocb, true, compat); - break; + return aio_read(&req->rw, iocb, true, compat); case IOCB_CMD_PWRITEV: - ret = aio_write(&req->rw, iocb, true, compat); - break; + return aio_write(&req->rw, iocb, true, compat); case IOCB_CMD_FSYNC: - ret = aio_fsync(&req->fsync, iocb, false); - break; + return aio_fsync(&req->fsync, iocb, false); case IOCB_CMD_FDSYNC: - ret = aio_fsync(&req->fsync, iocb, true); - break; + return aio_fsync(&req->fsync, iocb, true); case IOCB_CMD_POLL: - ret = aio_poll(req, iocb); - break; + return aio_poll(req, iocb); default: pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode); - ret = -EINVAL; - break; + return -EINVAL; } - - /* Done with the synchronous reference */ - iocb_put(req); - - /* - * If ret is 0, we'd either done aio_complete() ourselves or have - * arranged for that to be done asynchronously. Anything non-zero - * means that we need to destroy req ourselves. - */ - if (!ret) - return 0; - -out_put_req: - iocb_destroy(req); - put_reqs_available(ctx, 1); - return ret; } static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, bool compat) { + struct aio_kiocb *req; struct iocb iocb; + int err; if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb)))) return -EFAULT; - return __io_submit_one(ctx, &iocb, user_iocb, compat); + /* enforce forwards compatibility on users */ + if (unlikely(iocb.aio_reserved2)) { + pr_debug("EINVAL: reserve field set\n"); + return -EINVAL; + } + + /* prevent overflows */ + if (unlikely( + (iocb.aio_buf != (unsigned long)iocb.aio_buf) || + (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) || + ((ssize_t)iocb.aio_nbytes < 0) + )) { + pr_debug("EINVAL: overflow check\n"); + return -EINVAL; + } + + req = aio_get_req(ctx); + if (unlikely(!req)) + return -EAGAIN; + + err = __io_submit_one(ctx, &iocb, user_iocb, req, compat); + + /* Done with the synchronous reference */ + iocb_put(req); + + /* + * If err is 0, we'd either done aio_complete() ourselves or have + * arranged for that to be done asynchronously. Anything non-zero + * means that we need to destroy req ourselves. + */ + if (unlikely(err)) { + iocb_destroy(req); + put_reqs_available(ctx, 1); + } + return err; } /* sys_io_submit: