From patchwork Thu Feb 21 08:15:23 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 1045864 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=zeniv.linux.org.uk Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 444nN00psVz9s7h for ; Thu, 21 Feb 2019 19:15:27 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726656AbfBUIPZ (ORCPT ); Thu, 21 Feb 2019 03:15:25 -0500 Received: from zeniv.linux.org.uk ([195.92.253.2]:37842 "EHLO ZenIV.linux.org.uk" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726437AbfBUIPZ (ORCPT ); Thu, 21 Feb 2019 03:15:25 -0500 Received: from viro by ZenIV.linux.org.uk with local (Exim 4.91 #2 (Red Hat Linux)) id 1gwjVr-0006gW-9C for netdev@vger.kernel.org; Thu, 21 Feb 2019 08:15:23 +0000 Date: Thu, 21 Feb 2019 08:15:23 +0000 From: Al Viro To: netdev@vger.kernel.org Subject: [RFC] coallocating struct socket and struct socket_wq Message-ID: <20190221081523.GZ2217@ZenIV.linux.org.uk> MIME-Version: 1.0 Content-Disposition: inline User-Agent: Mutt/1.10.1 (2018-07-13) Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org All instances of struct socket are embedded into some bigger structure - most into struct socket_alloc (with struct inode following struct socket), some into struct tun_file or struct tap_queue. In the last two cases the corresponding struct socket_wq (the one whose address goes into socket->wq) is in the same containing structure, right after struct socket. In case of struct socket_alloc, we allocate struct socket_wq separately and set socket->wq before anyone sees either (in sock_alloc_inode()). In sock_destroy_inode() they are both freed (struct sock_alloc immediately, struct socket_wq - RCU-delayed). AFAICS, nothing ever reassigns socket->wq. Could we simply embed struct socket_wq into struct socket? RCU delay is not an issue - net/socket.c is non-modular, so call_rcu()-based variant freeing both together is not horrible. sock_alloc_inode() would be simplified, tun/tap uses would simply lose the separate socket_wq members. The only problem I see here is ____cacheline_aligned_in_smp we have on struct socket_wq. Could we simply make it the first field in struct socket? Without lockdep they are reasonably small - on amd64 socket_wq is 64 bytes, while the rest of struct socket is 48 (and pointer to wq would obviously disappear). Or is there something subtle I'm missing here? What I have in mind is something along the lines of diff --git a/drivers/net/tap.c b/drivers/net/tap.c index c0b52e48f0e6..7aedc748fbd0 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -519,8 +519,7 @@ static int tap_open(struct inode *inode, struct file *file) goto err; } - RCU_INIT_POINTER(q->sock.wq, &q->wq); - init_waitqueue_head(&q->wq.wait); + init_waitqueue_head(&q->sock.wq.wait); q->sock.type = SOCK_RAW; q->sock.state = SS_CONNECTED; q->sock.file = file; @@ -578,7 +577,7 @@ static __poll_t tap_poll(struct file *file, poll_table *wait) goto out; mask = 0; - poll_wait(file, &q->wq.wait, wait); + poll_wait(file, &q->sock.wq.wait, wait); if (!ptr_ring_empty(&q->ring)) mask |= EPOLLIN | EPOLLRDNORM; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index fed298c0cb39..51f023391998 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -169,7 +169,6 @@ struct tun_pcpu_stats { struct tun_file { struct sock sk; struct socket socket; - struct socket_wq wq; struct tun_struct __rcu *tun; struct fasync_struct *fasync; /* only used for fasnyc */ @@ -2166,7 +2165,7 @@ static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err) goto out; } - add_wait_queue(&tfile->wq.wait, &wait); + add_wait_queue(&tfile->socket.wq.wait, &wait); current->state = TASK_INTERRUPTIBLE; while (1) { @@ -2186,7 +2185,7 @@ static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err) } current->state = TASK_RUNNING; - remove_wait_queue(&tfile->wq.wait, &wait); + remove_wait_queue(&tfile->socket.wq.wait, &wait); out: *err = error; @@ -3409,8 +3408,7 @@ static int tun_chr_open(struct inode *inode, struct file * file) tfile->flags = 0; tfile->ifindex = 0; - init_waitqueue_head(&tfile->wq.wait); - RCU_INIT_POINTER(tfile->socket.wq, &tfile->wq); + init_waitqueue_head(&tfile->socket.wq.wait); tfile->socket.file = file; tfile->socket.ops = &tun_socket_ops; diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 8e66866c11be..915a187cfabd 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -62,7 +62,6 @@ struct tap_dev { struct tap_queue { struct sock sk; struct socket sock; - struct socket_wq wq; int vnet_hdr_sz; struct tap_dev __rcu *tap; struct file *file; diff --git a/include/linux/net.h b/include/linux/net.h index e0930678c8bf..e6e0d7858c78 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -108,13 +108,13 @@ struct socket_wq { * @wq: wait queue for several uses */ struct socket { + struct socket_wq wq; socket_state state; short type; unsigned long flags; - struct socket_wq *wq; struct file *file; struct sock *sk; diff --git a/include/net/sock.h b/include/net/sock.h index 6679f3c120b0..c05c08487900 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1805,7 +1805,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) { WARN_ON(parent->sk); write_lock_bh(&sk->sk_callback_lock); - rcu_assign_pointer(sk->sk_wq, parent->wq); + rcu_assign_pointer(sk->sk_wq, &parent->wq); parent->sk = sk; sk_set_socket(sk, parent); sk->sk_uid = SOCK_INODE(parent)->i_uid; @@ -2089,7 +2089,7 @@ static inline void sock_poll_wait(struct file *filp, struct socket *sock, poll_table *p) { if (!poll_does_not_wait(p)) { - poll_wait(filp, &sock->wq->wait, p); + poll_wait(filp, &sock->wq.wait, p); /* We need to be sure we are in sync with the * socket flags modification. * diff --git a/net/core/sock.c b/net/core/sock.c index 71ded4d8025c..d097f981e7d8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2808,7 +2808,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) if (sock) { sk->sk_type = sock->type; - sk->sk_wq = sock->wq; + sk->sk_wq = &sock->wq; sock->sk = sk; sk->sk_uid = SOCK_INODE(sock)->i_uid; } else { diff --git a/net/socket.c b/net/socket.c index 643a1648fcc2..03cf4128b3ba 100644 --- a/net/socket.c +++ b/net/socket.c @@ -239,20 +239,13 @@ static struct kmem_cache *sock_inode_cachep __ro_after_init; static struct inode *sock_alloc_inode(struct super_block *sb) { struct socket_alloc *ei; - struct socket_wq *wq; ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); if (!ei) return NULL; - wq = kmalloc(sizeof(*wq), GFP_KERNEL); - if (!wq) { - kmem_cache_free(sock_inode_cachep, ei); - return NULL; - } - init_waitqueue_head(&wq->wait); - wq->fasync_list = NULL; - wq->flags = 0; - ei->socket.wq = wq; + init_waitqueue_head(&ei->socket.wq.wait); + ei->socket.wq.fasync_list = NULL; + ei->socket.wq.flags = 0; ei->socket.state = SS_UNCONNECTED; ei->socket.flags = 0; @@ -263,15 +256,18 @@ static struct inode *sock_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void sock_destroy_inode(struct inode *inode) +static void sock_destroy_inode_callback(struct rcu_head *head) { - struct socket_alloc *ei; - - ei = container_of(inode, struct socket_alloc, vfs_inode); - kfree_rcu(ei->socket.wq, rcu); + struct socket_alloc *ei = container_of(head, struct socket_alloc, + vfs_inode.i_rcu); kmem_cache_free(sock_inode_cachep, ei); } +static void sock_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, sock_destroy_inode_callback); +} + static void init_once(void *foo) { struct socket_alloc *ei = (struct socket_alloc *)foo; @@ -583,7 +579,7 @@ static void __sock_release(struct socket *sock, struct inode *inode) module_put(owner); } - if (sock->wq->fasync_list) + if (sock->wq.fasync_list) pr_err("%s: fasync list not empty!\n", __func__); if (!sock->file) { @@ -1183,7 +1179,7 @@ static int sock_fasync(int fd, struct file *filp, int on) return -EINVAL; lock_sock(sk); - wq = sock->wq; + wq = &sock->wq; fasync_helper(fd, filp, on, &wq->fasync_list); if (!wq->fasync_list)