Message ID | 1315369399-3073-4-git-send-email-glommer@parallels.com |
---|---|
State | RFC, archived |
Delegated to: | David Miller |
Headers | show |
On Tue, Sep 6, 2011 at 9:23 PM, Glauber Costa <glommer@parallels.com> wrote: > We aim to control the amount of kernel memory pinned at any > time by tcp sockets. To lay the foundations for this work, > this patch adds a pointer to the kmem_cgroup to the socket > structure. > > Signed-off-by: Glauber Costa <glommer@parallels.com> > CC: David S. Miller <davem@davemloft.net> > CC: Hiroyouki Kamezawa <kamezawa.hiroyu@jp.fujitsu.com> > CC: Eric W. Biederman <ebiederm@xmission.com> > --- > include/linux/kmem_cgroup.h | 29 +++++++++++++++++++++++++++++ > include/net/sock.h | 2 ++ > net/core/sock.c | 5 ++--- > 3 files changed, 33 insertions(+), 3 deletions(-) > > diff --git a/include/linux/kmem_cgroup.h b/include/linux/kmem_cgroup.h > index 0e4a74b..77076d8 100644 > --- a/include/linux/kmem_cgroup.h > +++ b/include/linux/kmem_cgroup.h > @@ -49,5 +49,34 @@ static inline struct kmem_cgroup *kcg_from_task(struct task_struct *tsk) > return NULL; > } > #endif /* CONFIG_CGROUP_KMEM */ > + > +#ifdef CONFIG_INET > +#include <net/sock.h> > +static inline void sock_update_kmem_cgrp(struct sock *sk) > +{ > +#ifdef CONFIG_CGROUP_KMEM > + sk->sk_cgrp = kcg_from_task(current); BUG_ON(sk->sk_cgrp) ? Or else release the old cgroup if necessary. > @@ -339,6 +340,7 @@ struct sock { > #endif > __u32 sk_mark; > u32 sk_classid; > + struct kmem_cgroup *sk_cgrp; Should this be protected by a #ifdef? Paul -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 09/07/2011 02:26 AM, Paul Menage wrote: > On Tue, Sep 6, 2011 at 9:23 PM, Glauber Costa<glommer@parallels.com> wrote: >> We aim to control the amount of kernel memory pinned at any >> time by tcp sockets. To lay the foundations for this work, >> this patch adds a pointer to the kmem_cgroup to the socket >> structure. >> >> Signed-off-by: Glauber Costa<glommer@parallels.com> >> CC: David S. Miller<davem@davemloft.net> >> CC: Hiroyouki Kamezawa<kamezawa.hiroyu@jp.fujitsu.com> >> CC: Eric W. Biederman<ebiederm@xmission.com> >> --- >> include/linux/kmem_cgroup.h | 29 +++++++++++++++++++++++++++++ >> include/net/sock.h | 2 ++ >> net/core/sock.c | 5 ++--- >> 3 files changed, 33 insertions(+), 3 deletions(-) >> >> diff --git a/include/linux/kmem_cgroup.h b/include/linux/kmem_cgroup.h >> index 0e4a74b..77076d8 100644 >> --- a/include/linux/kmem_cgroup.h >> +++ b/include/linux/kmem_cgroup.h >> @@ -49,5 +49,34 @@ static inline struct kmem_cgroup *kcg_from_task(struct task_struct *tsk) >> return NULL; >> } >> #endif /* CONFIG_CGROUP_KMEM */ >> + >> +#ifdef CONFIG_INET >> +#include<net/sock.h> >> +static inline void sock_update_kmem_cgrp(struct sock *sk) >> +{ >> +#ifdef CONFIG_CGROUP_KMEM >> + sk->sk_cgrp = kcg_from_task(current); > > BUG_ON(sk->sk_cgrp) ? Or else release the old cgroup if necessary. Since at least in this current incarnation, I am not doing migrations, I definitely don't expect to have a pointer already present here. BUG_ON() it is. >> @@ -339,6 +340,7 @@ struct sock { >> #endif >> __u32 sk_mark; >> u32 sk_classid; >> + struct kmem_cgroup *sk_cgrp; > > Should this be protected by a #ifdef? I don't particularly like it. I think that ifdef'ing fields in structures, while allowing for size optimization, takes away size and alignment predictability. But... can do. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Sep 07, 2011 at 01:23:13AM -0300, Glauber Costa wrote: > We aim to control the amount of kernel memory pinned at any > time by tcp sockets. To lay the foundations for this work, > this patch adds a pointer to the kmem_cgroup to the socket > structure. > > Signed-off-by: Glauber Costa <glommer@parallels.com> > CC: David S. Miller <davem@davemloft.net> > CC: Hiroyouki Kamezawa <kamezawa.hiroyu@jp.fujitsu.com> > CC: Eric W. Biederman <ebiederm@xmission.com> > --- > include/linux/kmem_cgroup.h | 29 +++++++++++++++++++++++++++++ > include/net/sock.h | 2 ++ > net/core/sock.c | 5 ++--- > 3 files changed, 33 insertions(+), 3 deletions(-) > > diff --git a/include/linux/kmem_cgroup.h b/include/linux/kmem_cgroup.h > index 0e4a74b..77076d8 100644 > --- a/include/linux/kmem_cgroup.h > +++ b/include/linux/kmem_cgroup.h > @@ -49,5 +49,34 @@ static inline struct kmem_cgroup *kcg_from_task(struct task_struct *tsk) > return NULL; > } > #endif /* CONFIG_CGROUP_KMEM */ > + > +#ifdef CONFIG_INET Will it break something if you define the helpers even if CONFIG_INET is not defined? It will be much cleaner. You can reuse ifdef CONFIG_CGROUP_KMEM in this case. > +#include <net/sock.h> > +static inline void sock_update_kmem_cgrp(struct sock *sk) > +{ > +#ifdef CONFIG_CGROUP_KMEM > + sk->sk_cgrp = kcg_from_task(current); > + > + /* > + * We don't need to protect against anything task-related, because > + * we are basically stuck with the sock pointer that won't change, > + * even if the task that originated the socket changes cgroups. > + * > + * What we do have to guarantee, is that the chain leading us to > + * the top level won't change under our noses. Incrementing the > + * reference count via cgroup_exclude_rmdir guarantees that. > + */ > + cgroup_exclude_rmdir(&sk->sk_cgrp->css); > +#endif > +} > + > +static inline void sock_release_kmem_cgrp(struct sock *sk) > +{ > +#ifdef CONFIG_CGROUP_KMEM > + cgroup_release_and_wakeup_rmdir(&sk->sk_cgrp->css); > +#endif > +} > + > +#endif /* CONFIG_INET */ > #endif /* _LINUX_KMEM_CGROUP_H */ > @@ -2252,9 +2254,6 @@ void sk_common_release(struct sock *sk) > } > EXPORT_SYMBOL(sk_common_release); > > -static DEFINE_RWLOCK(proto_list_lock); > -static LIST_HEAD(proto_list); > - Wrong patch?
On 09/07/2011 07:17 PM, Kirill A. Shutemov wrote: > On Wed, Sep 07, 2011 at 01:23:13AM -0300, Glauber Costa wrote: >> We aim to control the amount of kernel memory pinned at any >> time by tcp sockets. To lay the foundations for this work, >> this patch adds a pointer to the kmem_cgroup to the socket >> structure. >> >> Signed-off-by: Glauber Costa<glommer@parallels.com> >> CC: David S. Miller<davem@davemloft.net> >> CC: Hiroyouki Kamezawa<kamezawa.hiroyu@jp.fujitsu.com> >> CC: Eric W. Biederman<ebiederm@xmission.com> >> --- >> include/linux/kmem_cgroup.h | 29 +++++++++++++++++++++++++++++ >> include/net/sock.h | 2 ++ >> net/core/sock.c | 5 ++--- >> 3 files changed, 33 insertions(+), 3 deletions(-) >> >> diff --git a/include/linux/kmem_cgroup.h b/include/linux/kmem_cgroup.h >> index 0e4a74b..77076d8 100644 >> --- a/include/linux/kmem_cgroup.h >> +++ b/include/linux/kmem_cgroup.h >> @@ -49,5 +49,34 @@ static inline struct kmem_cgroup *kcg_from_task(struct task_struct *tsk) >> return NULL; >> } >> #endif /* CONFIG_CGROUP_KMEM */ >> + >> +#ifdef CONFIG_INET > > Will it break something if you define the helpers even if CONFIG_INET > is not defined? > It will be much cleaner. You can reuse ifdef CONFIG_CGROUP_KMEM in this > case. The helpers inside CONFIG_INET are needed for the network code, regardless of kmem cgroup is defined or not, not the other way around. So I could remove CONFIG_INET, but I can't possibly move it inside CONFIG_CGROUP_KMEM. So this buy us nothing. >> +#include<net/sock.h> >> +static inline void sock_update_kmem_cgrp(struct sock *sk) >> +{ >> +#ifdef CONFIG_CGROUP_KMEM >> + sk->sk_cgrp = kcg_from_task(current); >> + >> + /* >> + * We don't need to protect against anything task-related, because >> + * we are basically stuck with the sock pointer that won't change, >> + * even if the task that originated the socket changes cgroups. >> + * >> + * What we do have to guarantee, is that the chain leading us to >> + * the top level won't change under our noses. Incrementing the >> + * reference count via cgroup_exclude_rmdir guarantees that. >> + */ >> + cgroup_exclude_rmdir(&sk->sk_cgrp->css); >> +#endif >> +} >> + >> +static inline void sock_release_kmem_cgrp(struct sock *sk) >> +{ >> +#ifdef CONFIG_CGROUP_KMEM >> + cgroup_release_and_wakeup_rmdir(&sk->sk_cgrp->css); >> +#endif >> +} >> + >> +#endif /* CONFIG_INET */ >> #endif /* _LINUX_KMEM_CGROUP_H */ > >> @@ -2252,9 +2254,6 @@ void sk_common_release(struct sock *sk) >> } >> EXPORT_SYMBOL(sk_common_release); >> >> -static DEFINE_RWLOCK(proto_list_lock); >> -static LIST_HEAD(proto_list); >> - > > Wrong patch? Yes, it is. Thanks for noticing. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Sep 08, 2011 at 01:54:03AM -0300, Glauber Costa wrote: > On 09/07/2011 07:17 PM, Kirill A. Shutemov wrote: > > On Wed, Sep 07, 2011 at 01:23:13AM -0300, Glauber Costa wrote: > >> We aim to control the amount of kernel memory pinned at any > >> time by tcp sockets. To lay the foundations for this work, > >> this patch adds a pointer to the kmem_cgroup to the socket > >> structure. > >> > >> Signed-off-by: Glauber Costa<glommer@parallels.com> > >> CC: David S. Miller<davem@davemloft.net> > >> CC: Hiroyouki Kamezawa<kamezawa.hiroyu@jp.fujitsu.com> > >> CC: Eric W. Biederman<ebiederm@xmission.com> > >> --- > >> include/linux/kmem_cgroup.h | 29 +++++++++++++++++++++++++++++ > >> include/net/sock.h | 2 ++ > >> net/core/sock.c | 5 ++--- > >> 3 files changed, 33 insertions(+), 3 deletions(-) > >> > >> diff --git a/include/linux/kmem_cgroup.h b/include/linux/kmem_cgroup.h > >> index 0e4a74b..77076d8 100644 > >> --- a/include/linux/kmem_cgroup.h > >> +++ b/include/linux/kmem_cgroup.h > >> @@ -49,5 +49,34 @@ static inline struct kmem_cgroup *kcg_from_task(struct task_struct *tsk) > >> return NULL; > >> } > >> #endif /* CONFIG_CGROUP_KMEM */ > >> + > >> +#ifdef CONFIG_INET > > > > Will it break something if you define the helpers even if CONFIG_INET > > is not defined? > > It will be much cleaner. You can reuse ifdef CONFIG_CGROUP_KMEM in this > > case. > > The helpers inside CONFIG_INET are needed for the network code, > regardless of kmem cgroup is defined or not, not the other way around. > > So I could remove CONFIG_INET, but I can't possibly move it inside > CONFIG_CGROUP_KMEM. So this buy us nothing. You can define empty under CONFIG_CGROUP_KMEM's #else, can't you? Like with kcg_from_cgroup()/kcg_from_task().
On 09/08/2011 02:35 AM, Kirill A. Shutemov wrote: > On Thu, Sep 08, 2011 at 01:54:03AM -0300, Glauber Costa wrote: >> On 09/07/2011 07:17 PM, Kirill A. Shutemov wrote: >>> On Wed, Sep 07, 2011 at 01:23:13AM -0300, Glauber Costa wrote: >>>> We aim to control the amount of kernel memory pinned at any >>>> time by tcp sockets. To lay the foundations for this work, >>>> this patch adds a pointer to the kmem_cgroup to the socket >>>> structure. >>>> >>>> Signed-off-by: Glauber Costa<glommer@parallels.com> >>>> CC: David S. Miller<davem@davemloft.net> >>>> CC: Hiroyouki Kamezawa<kamezawa.hiroyu@jp.fujitsu.com> >>>> CC: Eric W. Biederman<ebiederm@xmission.com> >>>> --- >>>> include/linux/kmem_cgroup.h | 29 +++++++++++++++++++++++++++++ >>>> include/net/sock.h | 2 ++ >>>> net/core/sock.c | 5 ++--- >>>> 3 files changed, 33 insertions(+), 3 deletions(-) >>>> >>>> diff --git a/include/linux/kmem_cgroup.h b/include/linux/kmem_cgroup.h >>>> index 0e4a74b..77076d8 100644 >>>> --- a/include/linux/kmem_cgroup.h >>>> +++ b/include/linux/kmem_cgroup.h >>>> @@ -49,5 +49,34 @@ static inline struct kmem_cgroup *kcg_from_task(struct task_struct *tsk) >>>> return NULL; >>>> } >>>> #endif /* CONFIG_CGROUP_KMEM */ >>>> + >>>> +#ifdef CONFIG_INET >>> >>> Will it break something if you define the helpers even if CONFIG_INET >>> is not defined? >>> It will be much cleaner. You can reuse ifdef CONFIG_CGROUP_KMEM in this >>> case. >> >> The helpers inside CONFIG_INET are needed for the network code, >> regardless of kmem cgroup is defined or not, not the other way around. >> >> So I could remove CONFIG_INET, but I can't possibly move it inside >> CONFIG_CGROUP_KMEM. So this buy us nothing. > > You can define empty under CONFIG_CGROUP_KMEM's #else, can't you? > Like with kcg_from_cgroup()/kcg_from_task(). > Do you really think it is cleaner? Why would I define empty something that is not empty at all? Look again. Most of those helpers would be the exact same with or without CONFIG_CGROUP_KMEM . The others, very few differences. If CONFIG_INET bothers you, I can remove it altogether, making it unconditional. But moving it inside CONFIG_CGROUP_KMEM makes no sense. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/include/linux/kmem_cgroup.h b/include/linux/kmem_cgroup.h index 0e4a74b..77076d8 100644 --- a/include/linux/kmem_cgroup.h +++ b/include/linux/kmem_cgroup.h @@ -49,5 +49,34 @@ static inline struct kmem_cgroup *kcg_from_task(struct task_struct *tsk) return NULL; } #endif /* CONFIG_CGROUP_KMEM */ + +#ifdef CONFIG_INET +#include <net/sock.h> +static inline void sock_update_kmem_cgrp(struct sock *sk) +{ +#ifdef CONFIG_CGROUP_KMEM + sk->sk_cgrp = kcg_from_task(current); + + /* + * We don't need to protect against anything task-related, because + * we are basically stuck with the sock pointer that won't change, + * even if the task that originated the socket changes cgroups. + * + * What we do have to guarantee, is that the chain leading us to + * the top level won't change under our noses. Incrementing the + * reference count via cgroup_exclude_rmdir guarantees that. + */ + cgroup_exclude_rmdir(&sk->sk_cgrp->css); +#endif +} + +static inline void sock_release_kmem_cgrp(struct sock *sk) +{ +#ifdef CONFIG_CGROUP_KMEM + cgroup_release_and_wakeup_rmdir(&sk->sk_cgrp->css); +#endif +} + +#endif /* CONFIG_INET */ #endif /* _LINUX_KMEM_CGROUP_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 8e4062f..709382f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -228,6 +228,7 @@ struct sock_common { * @sk_security: used by security modules * @sk_mark: generic packet mark * @sk_classid: this socket's cgroup classid + * @sk_cgrp: this socket's kernel memory (kmem) cgroup * @sk_write_pending: a write to stream socket waits to start * @sk_state_change: callback to indicate change in the state of the sock * @sk_data_ready: callback to indicate there is data to be processed @@ -339,6 +340,7 @@ struct sock { #endif __u32 sk_mark; u32 sk_classid; + struct kmem_cgroup *sk_cgrp; void (*sk_state_change)(struct sock *sk); void (*sk_data_ready)(struct sock *sk, int bytes); void (*sk_write_space)(struct sock *sk); diff --git a/net/core/sock.c b/net/core/sock.c index 3449df8..7109864 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1139,6 +1139,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, atomic_set(&sk->sk_wmem_alloc, 1); sock_update_classid(sk); + sock_update_kmem_cgrp(sk); } return sk; @@ -1170,6 +1171,7 @@ static void __sk_free(struct sock *sk) put_cred(sk->sk_peer_cred); put_pid(sk->sk_peer_pid); put_net(sock_net(sk)); + sock_release_kmem_cgrp(sk); sk_prot_free(sk->sk_prot_creator, sk); } @@ -2252,9 +2254,6 @@ void sk_common_release(struct sock *sk) } EXPORT_SYMBOL(sk_common_release); -static DEFINE_RWLOCK(proto_list_lock); -static LIST_HEAD(proto_list); - #ifdef CONFIG_PROC_FS #define PROTO_INUSE_NR 64 /* should be enough for the first time */ struct prot_inuse {
We aim to control the amount of kernel memory pinned at any time by tcp sockets. To lay the foundations for this work, this patch adds a pointer to the kmem_cgroup to the socket structure. Signed-off-by: Glauber Costa <glommer@parallels.com> CC: David S. Miller <davem@davemloft.net> CC: Hiroyouki Kamezawa <kamezawa.hiroyu@jp.fujitsu.com> CC: Eric W. Biederman <ebiederm@xmission.com> --- include/linux/kmem_cgroup.h | 29 +++++++++++++++++++++++++++++ include/net/sock.h | 2 ++ net/core/sock.c | 5 ++--- 3 files changed, 33 insertions(+), 3 deletions(-)