diff mbox series

[v6,2/3] sock: Move the socket inuse to namespace.

Message ID 1512918726-2731-2-git-send-email-xiangxia.m.yue@gmail.com
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series [v6,1/3] sock: Change the netns_core member name. | expand

Commit Message

Tonghao Zhang Dec. 10, 2017, 3:12 p.m. UTC
In some case, we want to know how many sockets are in use in
different _net_ namespaces. It's a key resource metric.

This patch adds a member in struct netns_core. This is a counter
for socket-inuse in the _net_ namespace. The patch will add/sub
counter in the sk_alloc, sk_clone_lock and __sk_free.

The main reasons for doing this are that:

1. When linux calls the 'do_exit' for processes to exit, the functions
'exit_task_namespaces' and 'exit_task_work' will be called sequentially.
'exit_task_namespaces' may have destroyed the _net_ namespace, but
'sock_release' called in 'exit_task_work' may use the _net_ namespace
if we counter the socket-inuse in sock_release.

2. socket and sock are in pair. More important, sock holds the _net_
namespace. We counter the socket-inuse in sock, for avoiding holding
_net_ namespace again in socket. It's a easy way to maintain the code.

3. We alloc the sock_inuse in net_alloc() and free it in net_free()
because we should make sure that the sock_inuse will not be used anymore
after we release it. Notice that some sockets (e.g netlink socket created
in kernel) will be released after all of the network namespace exit methods.
For more details, see the cleanup_net. Then, we should not use the per
network namespace operations to malloc the sock_inuse.

Signed-off-by: Martin Zhang <zhangjunweimartin@didichuxing.com>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
 include/net/netns/core.h |  3 +++
 include/net/sock.h       |  1 +
 net/core/net_namespace.c | 10 ++++++++++
 net/core/sock.c          | 26 ++++++++++++++++++++++++++
 net/socket.c             | 21 ++-------------------
 5 files changed, 42 insertions(+), 19 deletions(-)

Comments

Cong Wang Dec. 12, 2017, 6:04 p.m. UTC | #1
On Sun, Dec 10, 2017 at 7:12 AM, Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> index b797832..6c191fb 100644
> --- a/net/core/net_namespace.c
> +++ b/net/core/net_namespace.c
> @@ -363,6 +363,13 @@ static struct net *net_alloc(void)
>         if (!net)
>                 goto out_free;
>
> +#ifdef CONFIG_PROC_FS
> +       net->core.sock_inuse = alloc_percpu(int);
> +       if (!net->core.sock_inuse) {
> +               kmem_cache_free(net_cachep, net);
> +               goto out_free;
> +       }
> +#endif
>         rcu_assign_pointer(net->gen, ng);
>  out:
>         return net;
> @@ -374,6 +381,9 @@ static struct net *net_alloc(void)
>
>  static void net_free(struct net *net)
>  {
> +#ifdef CONFIG_PROC_FS
> +       free_percpu(net->core.sock_inuse);
> +#endif
>         kfree(rcu_access_pointer(net->gen));
>         kmem_cache_free(net_cachep, net);
>  }

Putting socket code in net_namespace.c doesn't look good.
Tonghao Zhang Dec. 14, 2017, 1:22 a.m. UTC | #2
On Wed, Dec 13, 2017 at 2:04 AM, Cong Wang <xiyou.wangcong@gmail.com> wrote:
> On Sun, Dec 10, 2017 at 7:12 AM, Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
>> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
>> index b797832..6c191fb 100644
>> --- a/net/core/net_namespace.c
>> +++ b/net/core/net_namespace.c
>> @@ -363,6 +363,13 @@ static struct net *net_alloc(void)
>>         if (!net)
>>                 goto out_free;
>>
>> +#ifdef CONFIG_PROC_FS
>> +       net->core.sock_inuse = alloc_percpu(int);
>> +       if (!net->core.sock_inuse) {
>> +               kmem_cache_free(net_cachep, net);
>> +               goto out_free;
>> +       }
>> +#endif
>>         rcu_assign_pointer(net->gen, ng);
>>  out:
>>         return net;
>> @@ -374,6 +381,9 @@ static struct net *net_alloc(void)
>>
>>  static void net_free(struct net *net)
>>  {
>> +#ifdef CONFIG_PROC_FS
>> +       free_percpu(net->core.sock_inuse);
>> +#endif
>>         kfree(rcu_access_pointer(net->gen));
>>         kmem_cache_free(net_cachep, net);
>>  }
>
> Putting socket code in net_namespace.c doesn't look good.
hi cong,
Thanks for your work. If we dont alloc the in the net_alloc, it's
better to counter the sock for userspace
while the sock created in kernel will be omitted.
diff mbox series

Patch

diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index 45cfb5d..a5e8a66 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -11,6 +11,9 @@  struct netns_core {
 
 	int	sysctl_somaxconn;
 
+#ifdef CONFIG_PROC_FS
+	int __percpu *sock_inuse;
+#endif
 	struct prot_inuse __percpu *prot_inuse;
 };
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 9155da4..44f4890 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1262,6 +1262,7 @@  static inline void sk_sockets_allocated_inc(struct sock *sk)
 /* Called with local bh disabled */
 void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc);
 int sock_prot_inuse_get(struct net *net, struct proto *proto);
+int sock_inuse_get(struct net *net);
 #else
 static inline void sock_prot_inuse_add(struct net *net, struct proto *prot,
 		int inc)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b797832..6c191fb 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -363,6 +363,13 @@  static struct net *net_alloc(void)
 	if (!net)
 		goto out_free;
 
+#ifdef CONFIG_PROC_FS
+	net->core.sock_inuse = alloc_percpu(int);
+	if (!net->core.sock_inuse) {
+		kmem_cache_free(net_cachep, net);
+		goto out_free;
+	}
+#endif
 	rcu_assign_pointer(net->gen, ng);
 out:
 	return net;
@@ -374,6 +381,9 @@  static struct net *net_alloc(void)
 
 static void net_free(struct net *net)
 {
+#ifdef CONFIG_PROC_FS
+	free_percpu(net->core.sock_inuse);
+#endif
 	kfree(rcu_access_pointer(net->gen));
 	kmem_cache_free(net_cachep, net);
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index c2dd2d3..f6974eb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -145,6 +145,8 @@ 
 static DEFINE_MUTEX(proto_list_mutex);
 static LIST_HEAD(proto_list);
 
+static void sock_inuse_add(struct net *net, int val);
+
 /**
  * sk_ns_capable - General socket capability test
  * @sk: Socket to use a capability on or through
@@ -1534,6 +1536,7 @@  struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 		if (likely(sk->sk_net_refcnt))
 			get_net(net);
 		sock_net_set(sk, net);
+		sock_inuse_add(net, 1);
 		refcount_set(&sk->sk_wmem_alloc, 1);
 
 		mem_cgroup_sk_alloc(sk);
@@ -1595,6 +1598,8 @@  void sk_destruct(struct sock *sk)
 
 static void __sk_free(struct sock *sk)
 {
+	sock_inuse_add(sock_net(sk), -1);
+
 	if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
 		sock_diag_broadcast_destroy(sk);
 	else
@@ -1716,6 +1721,7 @@  struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		newsk->sk_priority = 0;
 		newsk->sk_incoming_cpu = raw_smp_processor_id();
 		atomic64_set(&newsk->sk_cookie, 0);
+		sock_inuse_add(sock_net(newsk), 1);
 
 		/*
 		 * Before updating sk_refcnt, we must commit prior changes to memory
@@ -3061,6 +3067,22 @@  int sock_prot_inuse_get(struct net *net, struct proto *prot)
 }
 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
 
+static void sock_inuse_add(struct net *net, int val)
+{
+	this_cpu_add(*net->core.sock_inuse, val);
+}
+
+int sock_inuse_get(struct net *net)
+{
+	int cpu, res = 0;
+
+	for_each_possible_cpu(cpu)
+		res += *per_cpu_ptr(net->core.sock_inuse, cpu);
+
+	return res >= 0 ? res : 0;
+}
+EXPORT_SYMBOL_GPL(sock_inuse_get);
+
 static int __net_init sock_inuse_init_net(struct net *net)
 {
 	net->core.prot_inuse = alloc_percpu(struct prot_inuse);
@@ -3112,6 +3134,10 @@  static inline void assign_proto_idx(struct proto *prot)
 static inline void release_proto_idx(struct proto *prot)
 {
 }
+
+static void sock_inuse_add(struct net *net, int val)
+{
+}
 #endif
 
 static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
diff --git a/net/socket.c b/net/socket.c
index 05f361f..bbd2e9c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -163,12 +163,6 @@  static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
 
 /*
- *	Statistics counters of the socket lists
- */
-
-static DEFINE_PER_CPU(int, sockets_in_use);
-
-/*
  * Support routines.
  * Move socket addresses back and forth across the kernel/user
  * divide and look after the messy bits.
@@ -578,7 +572,6 @@  struct socket *sock_alloc(void)
 	inode->i_gid = current_fsgid();
 	inode->i_op = &sockfs_inode_ops;
 
-	this_cpu_add(sockets_in_use, 1);
 	return sock;
 }
 EXPORT_SYMBOL(sock_alloc);
@@ -605,7 +598,6 @@  void sock_release(struct socket *sock)
 	if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
 		pr_err("%s: fasync list not empty!\n", __func__);
 
-	this_cpu_sub(sockets_in_use, 1);
 	if (!sock->file) {
 		iput(SOCK_INODE(sock));
 		return;
@@ -2622,17 +2614,8 @@  static int __init sock_init(void)
 #ifdef CONFIG_PROC_FS
 void socket_seq_show(struct seq_file *seq)
 {
-	int cpu;
-	int counter = 0;
-
-	for_each_possible_cpu(cpu)
-	    counter += per_cpu(sockets_in_use, cpu);
-
-	/* It can be negative, by the way. 8) */
-	if (counter < 0)
-		counter = 0;
-
-	seq_printf(seq, "sockets: used %d\n", counter);
+	seq_printf(seq, "sockets: used %d\n",
+		   sock_inuse_get(seq->private));
 }
 #endif				/* CONFIG_PROC_FS */