From patchwork Thu Oct 8 03:19:37 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jason Baron X-Patchwork-Id: 527567 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id ADAAF140D6E for ; Thu, 8 Oct 2015 14:23:00 +1100 (AEDT) Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=akamai.com header.i=@akamai.com header.b=f2K6mp6a; dkim-atps=neutral Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752481AbbJHDWV (ORCPT ); Wed, 7 Oct 2015 23:22:21 -0400 Received: from a23-79-238-179.deploy.static.akamaitechnologies.com ([23.79.238.179]:41854 "EHLO prod-mail-xrelay05.akamai.com" rhost-flags-OK-FAIL-OK-OK) by vger.kernel.org with ESMTP id S1752414AbbJHDWR (ORCPT ); Wed, 7 Oct 2015 23:22:17 -0400 Received: from prod-mail-xrelay05.akamai.com (localhost.localdomain [127.0.0.1]) by postfix.imss70 (Postfix) with ESMTP id 716BD4237AD; Thu, 8 Oct 2015 03:22:16 +0000 (GMT) Received: from prod-mail-relay10.akamai.com (prod-mail-relay10.akamai.com [172.27.118.251]) by prod-mail-xrelay05.akamai.com (Postfix) with ESMTP id 4ED2B423796; Thu, 8 Oct 2015 03:22:16 +0000 (GMT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=akamai.com; s=a1; t=1444274536; bh=VIFOOLcKI9iMkebms2DDa6fC1YfBF29chDQq2ZUOdh8=; l=5024; h=From:To:Cc:Date:In-Reply-To:References:In-Reply-To:References: From; b=f2K6mp6aAHF7XMHDdtTKGsxT4zBLIAgM1j50tYqJgKKzKV4BBiEdvQnalwHkCsGTO VzeUrwPX29IRvEeDPMntLa00+KBieHe34NRuaT6mBc75G0zJTEJivltCYffJ8NGZgP 4K6AWJaurTosKZA4gokpHTOu56ECjFU7I06DanQg= Received: from bos-lpjec.kendall.corp.akamai.com (bos-lpjec.kendall.corp.akamai.com [172.28.13.171]) by prod-mail-relay10.akamai.com (Postfix) with ESMTP id 38E2B2029; Thu, 8 Oct 2015 03:22:16 +0000 (GMT) From: Jason Baron To: davem@davemloft.net Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org, minipli@googlemail.com, normalperson@yhbt.net, eric.dumazet@gmail.com, rweikusat@mobileactivedefense.com, viro@zeniv.linux.org.uk, davidel@xmailserver.org, dave@stgolabs.net, olivier@mauras.ch, pageexec@freemail.hu, torvalds@linux-foundation.org, peterz@infradead.org, joe@perches.com Subject: [PATCH v3 3/3] net: unix: optimize wakeups in unix_dgram_recvmsg() Date: Wed, 7 Oct 2015 23:19:37 -0400 Message-Id: <8c0da634833af50fb9cd1e0fc9cddcdd49345523.1444272769.git.jbaron@akamai.com> X-Mailer: git-send-email 1.9.1 In-Reply-To: References: In-Reply-To: References: Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Now that connect() permanently registers a callback routine, we can induce extra overhead in unix_dgram_recvmsg(), which unconditionally wakes up its peer_wait queue on every receive. This patch makes the wakeup there conditional on there being waiters. Signed-off-by: Jason Baron --- include/net/af_unix.h | 1 + net/unix/af_unix.c | 85 ++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 62 insertions(+), 24 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 6a4a345..cf21ffd 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -61,6 +61,7 @@ struct unix_sock { unsigned long flags; #define UNIX_GC_CANDIDATE 0 #define UNIX_GC_MAYBE_CYCLE 1 +#define UNIX_NOSPACE 2 struct socket_wq peer_wq; wait_queue_t wait; }; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f789423..66979d4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -326,7 +326,7 @@ found: return s; } -static inline int unix_writable(struct sock *sk) +static inline bool unix_writable(struct sock *sk) { return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; } @@ -1079,6 +1079,12 @@ static long unix_wait_for_peer(struct sock *other, long timeo) prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); + set_bit(UNIX_NOSPACE, &u->flags); + /* Ensure that we either see space in the peer sk_receive_queue via the + * unix_recvq_full() check below, or we receive a wakeup when it + * empties. Pairs with the mb in unix_dgram_recvmsg(). + */ + smp_mb__after_atomic(); sched = !sock_flag(other, SOCK_DEAD) && !(other->sk_shutdown & RCV_SHUTDOWN) && unix_recvq_full(other); @@ -1623,17 +1629,27 @@ restart: if (unix_peer(other) != sk && unix_recvq_full(other)) { if (!timeo) { - err = -EAGAIN; - goto out_unlock; - } + set_bit(UNIX_NOSPACE, &unix_sk(other)->flags); + /* Ensure that we either see space in the peer + * sk_receive_queue via the unix_recvq_full() check + * below, or we receive a wakeup when it empties. This + * makes sure that epoll ET triggers correctly. Pairs + * with the mb in unix_dgram_recvmsg(). + */ + smp_mb__after_atomic(); + if (unix_recvq_full(other)) { + err = -EAGAIN; + goto out_unlock; + } + } else { + timeo = unix_wait_for_peer(other, timeo); - timeo = unix_wait_for_peer(other, timeo); + err = sock_intr_errno(timeo); + if (signal_pending(current)) + goto out_free; - err = sock_intr_errno(timeo); - if (signal_pending(current)) - goto out_free; - - goto restart; + goto restart; + } } if (sock_flag(other, SOCK_RCVTSTAMP)) @@ -1939,8 +1955,19 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, goto out_unlock; } - wake_up_interruptible_sync_poll(&u->peer_wait, - POLLOUT | POLLWRNORM | POLLWRBAND); + /* Ensure that waiters on our sk->sk_receive_queue draining that check + * via unix_recvq_full() either see space in the queue or get a wakeup + * below. sk->sk_receive_queue is reduece by the __skb_recv_datagram() + * call above. Pairs with the mb in unix_dgram_sendmsg(), + *unix_dgram_poll(), and unix_wait_for_peer(). + */ + smp_mb(); + if (test_bit(UNIX_NOSPACE, &u->flags)) { + clear_bit(UNIX_NOSPACE, &u->flags); + wake_up_interruptible_sync_poll(&u->peer_wait, + POLLOUT | POLLWRNORM | + POLLWRBAND); + } if (msg->msg_name) unix_copy_addr(msg, skb->sk); @@ -2432,11 +2459,19 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table return mask; } +static bool unix_dgram_writable(struct sock *sk, struct sock *other) +{ + if (other && unix_peer(other) != sk && unix_recvq_full(other)) + return false; + + return unix_writable(sk); +} + static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk, *other; - unsigned int mask, writable; + unsigned int mask; sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; @@ -2468,20 +2503,22 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT))) return mask; - writable = unix_writable(sk); other = unix_peer_get(sk); - if (other) { - if (unix_peer(other) != sk) { - if (unix_recvq_full(other)) - writable = 0; - } - sock_put(other); - } - - if (writable) + if (unix_dgram_writable(sk, other)) { mask |= POLLOUT | POLLWRNORM | POLLWRBAND; - else + } else { set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + set_bit(UNIX_NOSPACE, &unix_sk(other)->flags); + /* Ensure that we either see space in the peer sk_receive_queue + * via the unix_recvq_full() check below, or we receive a wakeup + * when it empties. Pairs with the mb in unix_dgram_recvmsg(). + */ + smp_mb__after_atomic(); + if (unix_dgram_writable(sk, other)) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + } + if (other) + sock_put(other); return mask; }