diff mbox

[v2,3/9] socket: initial cgroup code.

Message ID 1315369399-3073-4-git-send-email-glommer@parallels.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Glauber Costa Sept. 7, 2011, 4:23 a.m. UTC
We aim to control the amount of kernel memory pinned at any
time by tcp sockets. To lay the foundations for this work,
this patch adds a pointer to the kmem_cgroup to the socket
structure.

Signed-off-by: Glauber Costa <glommer@parallels.com>
CC: David S. Miller <davem@davemloft.net>
CC: Hiroyouki Kamezawa <kamezawa.hiroyu@jp.fujitsu.com>
CC: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/kmem_cgroup.h |   29 +++++++++++++++++++++++++++++
 include/net/sock.h          |    2 ++
 net/core/sock.c             |    5 ++---
 3 files changed, 33 insertions(+), 3 deletions(-)

Comments

Paul Menage Sept. 7, 2011, 5:26 a.m. UTC | #1
On Tue, Sep 6, 2011 at 9:23 PM, Glauber Costa <glommer@parallels.com> wrote:
> We aim to control the amount of kernel memory pinned at any
> time by tcp sockets. To lay the foundations for this work,
> this patch adds a pointer to the kmem_cgroup to the socket
> structure.
>
> Signed-off-by: Glauber Costa <glommer@parallels.com>
> CC: David S. Miller <davem@davemloft.net>
> CC: Hiroyouki Kamezawa <kamezawa.hiroyu@jp.fujitsu.com>
> CC: Eric W. Biederman <ebiederm@xmission.com>
> ---
>  include/linux/kmem_cgroup.h |   29 +++++++++++++++++++++++++++++
>  include/net/sock.h          |    2 ++
>  net/core/sock.c             |    5 ++---
>  3 files changed, 33 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/kmem_cgroup.h b/include/linux/kmem_cgroup.h
> index 0e4a74b..77076d8 100644
> --- a/include/linux/kmem_cgroup.h
> +++ b/include/linux/kmem_cgroup.h
> @@ -49,5 +49,34 @@ static inline struct kmem_cgroup *kcg_from_task(struct task_struct *tsk)
>        return NULL;
>  }
>  #endif /* CONFIG_CGROUP_KMEM */
> +
> +#ifdef CONFIG_INET
> +#include <net/sock.h>
> +static inline void sock_update_kmem_cgrp(struct sock *sk)
> +{
> +#ifdef CONFIG_CGROUP_KMEM
> +       sk->sk_cgrp = kcg_from_task(current);

BUG_ON(sk->sk_cgrp) ? Or else release the old cgroup if necessary.

> @@ -339,6 +340,7 @@ struct sock {
>  #endif
>        __u32                   sk_mark;
>        u32                     sk_classid;
> +       struct kmem_cgroup      *sk_cgrp;

Should this be protected by a #ifdef?

Paul
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Glauber Costa Sept. 7, 2011, 5:59 a.m. UTC | #2
On 09/07/2011 02:26 AM, Paul Menage wrote:
> On Tue, Sep 6, 2011 at 9:23 PM, Glauber Costa<glommer@parallels.com>  wrote:
>> We aim to control the amount of kernel memory pinned at any
>> time by tcp sockets. To lay the foundations for this work,
>> this patch adds a pointer to the kmem_cgroup to the socket
>> structure.
>>
>> Signed-off-by: Glauber Costa<glommer@parallels.com>
>> CC: David S. Miller<davem@davemloft.net>
>> CC: Hiroyouki Kamezawa<kamezawa.hiroyu@jp.fujitsu.com>
>> CC: Eric W. Biederman<ebiederm@xmission.com>
>> ---
>>   include/linux/kmem_cgroup.h |   29 +++++++++++++++++++++++++++++
>>   include/net/sock.h          |    2 ++
>>   net/core/sock.c             |    5 ++---
>>   3 files changed, 33 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/linux/kmem_cgroup.h b/include/linux/kmem_cgroup.h
>> index 0e4a74b..77076d8 100644
>> --- a/include/linux/kmem_cgroup.h
>> +++ b/include/linux/kmem_cgroup.h
>> @@ -49,5 +49,34 @@ static inline struct kmem_cgroup *kcg_from_task(struct task_struct *tsk)
>>         return NULL;
>>   }
>>   #endif /* CONFIG_CGROUP_KMEM */
>> +
>> +#ifdef CONFIG_INET
>> +#include<net/sock.h>
>> +static inline void sock_update_kmem_cgrp(struct sock *sk)
>> +{
>> +#ifdef CONFIG_CGROUP_KMEM
>> +       sk->sk_cgrp = kcg_from_task(current);
>
> BUG_ON(sk->sk_cgrp) ? Or else release the old cgroup if necessary.

Since at least in this current incarnation, I am not doing migrations,
I definitely don't expect to have a pointer already present here.
BUG_ON() it is.

>> @@ -339,6 +340,7 @@ struct sock {
>>   #endif
>>         __u32                   sk_mark;
>>         u32                     sk_classid;
>> +       struct kmem_cgroup      *sk_cgrp;
>
> Should this be protected by a #ifdef?
I don't particularly like it. I think that ifdef'ing fields
in structures, while allowing for size optimization, takes away
size and alignment predictability. But... can do.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kirill A. Shutemov Sept. 7, 2011, 10:17 p.m. UTC | #3
On Wed, Sep 07, 2011 at 01:23:13AM -0300, Glauber Costa wrote:
> We aim to control the amount of kernel memory pinned at any
> time by tcp sockets. To lay the foundations for this work,
> this patch adds a pointer to the kmem_cgroup to the socket
> structure.
> 
> Signed-off-by: Glauber Costa <glommer@parallels.com>
> CC: David S. Miller <davem@davemloft.net>
> CC: Hiroyouki Kamezawa <kamezawa.hiroyu@jp.fujitsu.com>
> CC: Eric W. Biederman <ebiederm@xmission.com>
> ---
>  include/linux/kmem_cgroup.h |   29 +++++++++++++++++++++++++++++
>  include/net/sock.h          |    2 ++
>  net/core/sock.c             |    5 ++---
>  3 files changed, 33 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/kmem_cgroup.h b/include/linux/kmem_cgroup.h
> index 0e4a74b..77076d8 100644
> --- a/include/linux/kmem_cgroup.h
> +++ b/include/linux/kmem_cgroup.h
> @@ -49,5 +49,34 @@ static inline struct kmem_cgroup *kcg_from_task(struct task_struct *tsk)
>  	return NULL;
>  }
>  #endif /* CONFIG_CGROUP_KMEM */
> +
> +#ifdef CONFIG_INET

Will it break something if you define the helpers even if CONFIG_INET
is not defined?
It will be much cleaner. You can reuse ifdef CONFIG_CGROUP_KMEM in this
case.

> +#include <net/sock.h>
> +static inline void sock_update_kmem_cgrp(struct sock *sk)
> +{
> +#ifdef CONFIG_CGROUP_KMEM
> +	sk->sk_cgrp = kcg_from_task(current);
> +
> +	/*
> +	 * We don't need to protect against anything task-related, because
> +	 * we are basically stuck with the sock pointer that won't change,
> +	 * even if the task that originated the socket changes cgroups.
> +	 *
> +	 * What we do have to guarantee, is that the chain leading us to
> +	 * the top level won't change under our noses. Incrementing the
> +	 * reference count via cgroup_exclude_rmdir guarantees that.
> +	 */
> +	cgroup_exclude_rmdir(&sk->sk_cgrp->css);
> +#endif
> +}
> +
> +static inline void sock_release_kmem_cgrp(struct sock *sk)
> +{
> +#ifdef CONFIG_CGROUP_KMEM
> +	cgroup_release_and_wakeup_rmdir(&sk->sk_cgrp->css);
> +#endif
> +}
> +
> +#endif /* CONFIG_INET */
>  #endif /* _LINUX_KMEM_CGROUP_H */

> @@ -2252,9 +2254,6 @@ void sk_common_release(struct sock *sk)
>  }
>  EXPORT_SYMBOL(sk_common_release);
>  
> -static DEFINE_RWLOCK(proto_list_lock);
> -static LIST_HEAD(proto_list);
> -

Wrong patch?
Glauber Costa Sept. 8, 2011, 4:54 a.m. UTC | #4
On 09/07/2011 07:17 PM, Kirill A. Shutemov wrote:
> On Wed, Sep 07, 2011 at 01:23:13AM -0300, Glauber Costa wrote:
>> We aim to control the amount of kernel memory pinned at any
>> time by tcp sockets. To lay the foundations for this work,
>> this patch adds a pointer to the kmem_cgroup to the socket
>> structure.
>>
>> Signed-off-by: Glauber Costa<glommer@parallels.com>
>> CC: David S. Miller<davem@davemloft.net>
>> CC: Hiroyouki Kamezawa<kamezawa.hiroyu@jp.fujitsu.com>
>> CC: Eric W. Biederman<ebiederm@xmission.com>
>> ---
>>   include/linux/kmem_cgroup.h |   29 +++++++++++++++++++++++++++++
>>   include/net/sock.h          |    2 ++
>>   net/core/sock.c             |    5 ++---
>>   3 files changed, 33 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/linux/kmem_cgroup.h b/include/linux/kmem_cgroup.h
>> index 0e4a74b..77076d8 100644
>> --- a/include/linux/kmem_cgroup.h
>> +++ b/include/linux/kmem_cgroup.h
>> @@ -49,5 +49,34 @@ static inline struct kmem_cgroup *kcg_from_task(struct task_struct *tsk)
>>   	return NULL;
>>   }
>>   #endif /* CONFIG_CGROUP_KMEM */
>> +
>> +#ifdef CONFIG_INET
>
> Will it break something if you define the helpers even if CONFIG_INET
> is not defined?
> It will be much cleaner. You can reuse ifdef CONFIG_CGROUP_KMEM in this
> case.

The helpers inside CONFIG_INET are needed for the network code, 
regardless of kmem cgroup is defined or not, not the other way around.

So I could remove CONFIG_INET, but I can't possibly move it inside
CONFIG_CGROUP_KMEM. So this buy us nothing.

>> +#include<net/sock.h>
>> +static inline void sock_update_kmem_cgrp(struct sock *sk)
>> +{
>> +#ifdef CONFIG_CGROUP_KMEM
>> +	sk->sk_cgrp = kcg_from_task(current);
>> +
>> +	/*
>> +	 * We don't need to protect against anything task-related, because
>> +	 * we are basically stuck with the sock pointer that won't change,
>> +	 * even if the task that originated the socket changes cgroups.
>> +	 *
>> +	 * What we do have to guarantee, is that the chain leading us to
>> +	 * the top level won't change under our noses. Incrementing the
>> +	 * reference count via cgroup_exclude_rmdir guarantees that.
>> +	 */
>> +	cgroup_exclude_rmdir(&sk->sk_cgrp->css);
>> +#endif
>> +}
>> +
>> +static inline void sock_release_kmem_cgrp(struct sock *sk)
>> +{
>> +#ifdef CONFIG_CGROUP_KMEM
>> +	cgroup_release_and_wakeup_rmdir(&sk->sk_cgrp->css);
>> +#endif
>> +}
>> +
>> +#endif /* CONFIG_INET */
>>   #endif /* _LINUX_KMEM_CGROUP_H */
>
>> @@ -2252,9 +2254,6 @@ void sk_common_release(struct sock *sk)
>>   }
>>   EXPORT_SYMBOL(sk_common_release);
>>
>> -static DEFINE_RWLOCK(proto_list_lock);
>> -static LIST_HEAD(proto_list);
>> -
>
> Wrong patch?
Yes, it is. Thanks for noticing.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kirill A. Shutemov Sept. 8, 2011, 5:35 a.m. UTC | #5
On Thu, Sep 08, 2011 at 01:54:03AM -0300, Glauber Costa wrote:
> On 09/07/2011 07:17 PM, Kirill A. Shutemov wrote:
> > On Wed, Sep 07, 2011 at 01:23:13AM -0300, Glauber Costa wrote:
> >> We aim to control the amount of kernel memory pinned at any
> >> time by tcp sockets. To lay the foundations for this work,
> >> this patch adds a pointer to the kmem_cgroup to the socket
> >> structure.
> >>
> >> Signed-off-by: Glauber Costa<glommer@parallels.com>
> >> CC: David S. Miller<davem@davemloft.net>
> >> CC: Hiroyouki Kamezawa<kamezawa.hiroyu@jp.fujitsu.com>
> >> CC: Eric W. Biederman<ebiederm@xmission.com>
> >> ---
> >>   include/linux/kmem_cgroup.h |   29 +++++++++++++++++++++++++++++
> >>   include/net/sock.h          |    2 ++
> >>   net/core/sock.c             |    5 ++---
> >>   3 files changed, 33 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/include/linux/kmem_cgroup.h b/include/linux/kmem_cgroup.h
> >> index 0e4a74b..77076d8 100644
> >> --- a/include/linux/kmem_cgroup.h
> >> +++ b/include/linux/kmem_cgroup.h
> >> @@ -49,5 +49,34 @@ static inline struct kmem_cgroup *kcg_from_task(struct task_struct *tsk)
> >>   	return NULL;
> >>   }
> >>   #endif /* CONFIG_CGROUP_KMEM */
> >> +
> >> +#ifdef CONFIG_INET
> >
> > Will it break something if you define the helpers even if CONFIG_INET
> > is not defined?
> > It will be much cleaner. You can reuse ifdef CONFIG_CGROUP_KMEM in this
> > case.
> 
> The helpers inside CONFIG_INET are needed for the network code, 
> regardless of kmem cgroup is defined or not, not the other way around.
> 
> So I could remove CONFIG_INET, but I can't possibly move it inside
> CONFIG_CGROUP_KMEM. So this buy us nothing.

You can define empty under CONFIG_CGROUP_KMEM's #else, can't you?
Like with kcg_from_cgroup()/kcg_from_task().
Glauber Costa Sept. 8, 2011, 12:41 p.m. UTC | #6
On 09/08/2011 02:35 AM, Kirill A. Shutemov wrote:
> On Thu, Sep 08, 2011 at 01:54:03AM -0300, Glauber Costa wrote:
>> On 09/07/2011 07:17 PM, Kirill A. Shutemov wrote:
>>> On Wed, Sep 07, 2011 at 01:23:13AM -0300, Glauber Costa wrote:
>>>> We aim to control the amount of kernel memory pinned at any
>>>> time by tcp sockets. To lay the foundations for this work,
>>>> this patch adds a pointer to the kmem_cgroup to the socket
>>>> structure.
>>>>
>>>> Signed-off-by: Glauber Costa<glommer@parallels.com>
>>>> CC: David S. Miller<davem@davemloft.net>
>>>> CC: Hiroyouki Kamezawa<kamezawa.hiroyu@jp.fujitsu.com>
>>>> CC: Eric W. Biederman<ebiederm@xmission.com>
>>>> ---
>>>>    include/linux/kmem_cgroup.h |   29 +++++++++++++++++++++++++++++
>>>>    include/net/sock.h          |    2 ++
>>>>    net/core/sock.c             |    5 ++---
>>>>    3 files changed, 33 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/include/linux/kmem_cgroup.h b/include/linux/kmem_cgroup.h
>>>> index 0e4a74b..77076d8 100644
>>>> --- a/include/linux/kmem_cgroup.h
>>>> +++ b/include/linux/kmem_cgroup.h
>>>> @@ -49,5 +49,34 @@ static inline struct kmem_cgroup *kcg_from_task(struct task_struct *tsk)
>>>>    	return NULL;
>>>>    }
>>>>    #endif /* CONFIG_CGROUP_KMEM */
>>>> +
>>>> +#ifdef CONFIG_INET
>>>
>>> Will it break something if you define the helpers even if CONFIG_INET
>>> is not defined?
>>> It will be much cleaner. You can reuse ifdef CONFIG_CGROUP_KMEM in this
>>> case.
>>
>> The helpers inside CONFIG_INET are needed for the network code,
>> regardless of kmem cgroup is defined or not, not the other way around.
>>
>> So I could remove CONFIG_INET, but I can't possibly move it inside
>> CONFIG_CGROUP_KMEM. So this buy us nothing.
>
> You can define empty under CONFIG_CGROUP_KMEM's #else, can't you?
> Like with kcg_from_cgroup()/kcg_from_task().
>
Do you really think it is cleaner?

Why would I define empty something that is not empty at all?
Look again. Most of those helpers would be the exact same with or 
without CONFIG_CGROUP_KMEM . The others, very few differences. If 
CONFIG_INET bothers you, I can remove it altogether, making it 
unconditional. But moving it inside CONFIG_CGROUP_KMEM makes no sense.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/kmem_cgroup.h b/include/linux/kmem_cgroup.h
index 0e4a74b..77076d8 100644
--- a/include/linux/kmem_cgroup.h
+++ b/include/linux/kmem_cgroup.h
@@ -49,5 +49,34 @@  static inline struct kmem_cgroup *kcg_from_task(struct task_struct *tsk)
 	return NULL;
 }
 #endif /* CONFIG_CGROUP_KMEM */
+
+#ifdef CONFIG_INET
+#include <net/sock.h>
+static inline void sock_update_kmem_cgrp(struct sock *sk)
+{
+#ifdef CONFIG_CGROUP_KMEM
+	sk->sk_cgrp = kcg_from_task(current);
+
+	/*
+	 * We don't need to protect against anything task-related, because
+	 * we are basically stuck with the sock pointer that won't change,
+	 * even if the task that originated the socket changes cgroups.
+	 *
+	 * What we do have to guarantee, is that the chain leading us to
+	 * the top level won't change under our noses. Incrementing the
+	 * reference count via cgroup_exclude_rmdir guarantees that.
+	 */
+	cgroup_exclude_rmdir(&sk->sk_cgrp->css);
+#endif
+}
+
+static inline void sock_release_kmem_cgrp(struct sock *sk)
+{
+#ifdef CONFIG_CGROUP_KMEM
+	cgroup_release_and_wakeup_rmdir(&sk->sk_cgrp->css);
+#endif
+}
+
+#endif /* CONFIG_INET */
 #endif /* _LINUX_KMEM_CGROUP_H */
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 8e4062f..709382f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -228,6 +228,7 @@  struct sock_common {
   *	@sk_security: used by security modules
   *	@sk_mark: generic packet mark
   *	@sk_classid: this socket's cgroup classid
+  *	@sk_cgrp: this socket's kernel memory (kmem) cgroup 
   *	@sk_write_pending: a write to stream socket waits to start
   *	@sk_state_change: callback to indicate change in the state of the sock
   *	@sk_data_ready: callback to indicate there is data to be processed
@@ -339,6 +340,7 @@  struct sock {
 #endif
 	__u32			sk_mark;
 	u32			sk_classid;
+	struct kmem_cgroup	*sk_cgrp;
 	void			(*sk_state_change)(struct sock *sk);
 	void			(*sk_data_ready)(struct sock *sk, int bytes);
 	void			(*sk_write_space)(struct sock *sk);
diff --git a/net/core/sock.c b/net/core/sock.c
index 3449df8..7109864 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1139,6 +1139,7 @@  struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 		atomic_set(&sk->sk_wmem_alloc, 1);
 
 		sock_update_classid(sk);
+		sock_update_kmem_cgrp(sk);
 	}
 
 	return sk;
@@ -1170,6 +1171,7 @@  static void __sk_free(struct sock *sk)
 		put_cred(sk->sk_peer_cred);
 	put_pid(sk->sk_peer_pid);
 	put_net(sock_net(sk));
+	sock_release_kmem_cgrp(sk);
 	sk_prot_free(sk->sk_prot_creator, sk);
 }
 
@@ -2252,9 +2254,6 @@  void sk_common_release(struct sock *sk)
 }
 EXPORT_SYMBOL(sk_common_release);
 
-static DEFINE_RWLOCK(proto_list_lock);
-static LIST_HEAD(proto_list);
-
 #ifdef CONFIG_PROC_FS
 #define PROTO_INUSE_NR	64	/* should be enough for the first time */
 struct prot_inuse {