Message ID | 1345215494-9181-4-git-send-email-wagi@monom.org |
---|---|
State | Changes Requested, archived |
Delegated to: | David Miller |
Headers | show |
On Fri, Aug 17, 2012 at 04:58:12PM +0200, Daniel Wagner wrote: > From: Daniel Wagner <daniel.wagner@bmw-carit.de> > > The module version of task_cls_classid() checks if net_cls_sbusys_id > is valid to indentify when it is okay to access the controller. > > Instead relying on the subusys_id to be set, make it explicit > with a jump label. > > Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de> > Cc: "David S. Miller" <davem@davemloft.net> > Cc: Gao feng <gaofeng@cn.fujitsu.com> > Cc: Jamal Hadi Salim <jhs@mojatatu.com> > Cc: John Fastabend <john.r.fastabend@intel.com> > Cc: Li Zefan <lizefan@huawei.com> > Cc: Neil Horman <nhorman@tuxdriver.com> > Cc: Tejun Heo <tj@kernel.org> > Cc: netdev@vger.kernel.org > Cc: cgroups@vger.kernel.org > --- > include/net/cls_cgroup.h | 5 ++++- > net/core/sock.c | 5 +++++ > net/sched/cls_cgroup.c | 9 +++++++++ > 3 files changed, 18 insertions(+), 1 deletion(-) > > diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h > index 401672c..bbbd957 100644 > --- a/include/net/cls_cgroup.h > +++ b/include/net/cls_cgroup.h > @@ -16,6 +16,7 @@ > #include <linux/cgroup.h> > #include <linux/hardirq.h> > #include <linux/rcupdate.h> > +#include <linux/jump_label.h> > > #ifdef CONFIG_CGROUPS > struct cgroup_cls_state > @@ -44,6 +45,8 @@ static inline u32 task_cls_classid(struct task_struct *p) > } > > #elif IS_MODULE(CONFIG_NET_CLS_CGROUP) > +extern struct static_key cgroup_cls_enabled; > +#define clscg_enabled static_key_false(&cgroup_cls_enabled) > > extern int net_cls_subsys_id; > > @@ -52,7 +55,7 @@ static inline u32 task_cls_classid(struct task_struct *p) > int id; > u32 classid = 0; > > - if (in_interrupt()) > + if (!clscg_enabled || in_interrupt()) > return 0; > > rcu_read_lock(); > diff --git a/net/core/sock.c b/net/core/sock.c > index 8f67ced..8106e77 100644 > --- a/net/core/sock.c > +++ b/net/core/sock.c > @@ -327,6 +327,11 @@ int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) > EXPORT_SYMBOL(__sk_backlog_rcv); > > #if defined(CONFIG_CGROUPS) > +#if IS_MODULE(CONFIG_NET_CLS_CGROUP) > +struct static_key cgroup_cls_enabled = STATIC_KEY_INIT_FALSE; > +EXPORT_SYMBOL_GPL(cgroup_cls_enabled); > +#endif > + > #if !defined(CONFIG_NET_CLS_CGROUP) > int net_cls_subsys_id = -1; > EXPORT_SYMBOL_GPL(net_cls_subsys_id); > diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c > index 7743ea8..0635894 100644 > --- a/net/sched/cls_cgroup.c > +++ b/net/sched/cls_cgroup.c > @@ -44,12 +44,21 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) > > if (cgrp->parent) > cs->classid = cgrp_cls_state(cgrp->parent)->classid; > +#if IS_MODULE(CONFIG_NET_CLS_CGROUP) > + else if (!clscg_enabled) > + static_key_slow_inc(&cgroup_cls_enabled); This is racy I think. The read of the static key is atomic with other reads, but the entire conditional is not atomic. If two cpus were creating cgroups in parallel, it would be possible for both to read the static key as being zero (the second cpu would read the key before the first cpu could increment it). > +#endif > > return &cs->css; > } > > static void cgrp_destroy(struct cgroup *cgrp) > { > +#if IS_MODULE(CONFIG_NET_CLS_CGROUP) > + if (!cgrp->parent && clscg_enabled) > + static_key_slow_dec(&cgroup_cls_enabled); Ditto here with the race above. I think what you want is one of: 1) Use static_key_slow_[inc|dec] unconditionally 2) Keep a separate internal counter to track the number of cgroup instances so that you only inc the static key on the first create and dec it on the last delete. I would think (1) would be sufficent. It looks like static_key_slow_inc uses atomic_inc_not_zero to just do an inc anyway in the event that multiple inc events are made. Neil > +#endif > + > kfree(cgrp_cls_state(cgrp)); > } > > -- > 1.7.12.rc1.16.g05a20c8 > > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
于 2012/8/18 2:28, Neil Horman 写道: > On Fri, Aug 17, 2012 at 04:58:12PM +0200, Daniel Wagner wrote: >> From: Daniel Wagner <daniel.wagner@bmw-carit.de> >> >> The module version of task_cls_classid() checks if net_cls_sbusys_id >> is valid to indentify when it is okay to access the controller. >> >> Instead relying on the subusys_id to be set, make it explicit >> with a jump label. >> >> Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de> >> Cc: "David S. Miller" <davem@davemloft.net> >> Cc: Gao feng <gaofeng@cn.fujitsu.com> >> Cc: Jamal Hadi Salim <jhs@mojatatu.com> >> Cc: John Fastabend <john.r.fastabend@intel.com> >> Cc: Li Zefan <lizefan@huawei.com> >> Cc: Neil Horman <nhorman@tuxdriver.com> >> Cc: Tejun Heo <tj@kernel.org> >> Cc: netdev@vger.kernel.org >> Cc: cgroups@vger.kernel.org >> --- >> include/net/cls_cgroup.h | 5 ++++- >> net/core/sock.c | 5 +++++ >> net/sched/cls_cgroup.c | 9 +++++++++ >> 3 files changed, 18 insertions(+), 1 deletion(-) >> >> diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h >> index 401672c..bbbd957 100644 >> --- a/include/net/cls_cgroup.h >> +++ b/include/net/cls_cgroup.h >> @@ -16,6 +16,7 @@ >> #include <linux/cgroup.h> >> #include <linux/hardirq.h> >> #include <linux/rcupdate.h> >> +#include <linux/jump_label.h> >> >> #ifdef CONFIG_CGROUPS >> struct cgroup_cls_state >> @@ -44,6 +45,8 @@ static inline u32 task_cls_classid(struct task_struct *p) >> } >> >> #elif IS_MODULE(CONFIG_NET_CLS_CGROUP) >> +extern struct static_key cgroup_cls_enabled; >> +#define clscg_enabled static_key_false(&cgroup_cls_enabled) >> >> extern int net_cls_subsys_id; >> >> @@ -52,7 +55,7 @@ static inline u32 task_cls_classid(struct task_struct *p) >> int id; >> u32 classid = 0; >> >> - if (in_interrupt()) >> + if (!clscg_enabled || in_interrupt()) >> return 0; >> >> rcu_read_lock(); >> diff --git a/net/core/sock.c b/net/core/sock.c >> index 8f67ced..8106e77 100644 >> --- a/net/core/sock.c >> +++ b/net/core/sock.c >> @@ -327,6 +327,11 @@ int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) >> EXPORT_SYMBOL(__sk_backlog_rcv); >> >> #if defined(CONFIG_CGROUPS) >> +#if IS_MODULE(CONFIG_NET_CLS_CGROUP) >> +struct static_key cgroup_cls_enabled = STATIC_KEY_INIT_FALSE; >> +EXPORT_SYMBOL_GPL(cgroup_cls_enabled); >> +#endif >> + >> #if !defined(CONFIG_NET_CLS_CGROUP) >> int net_cls_subsys_id = -1; >> EXPORT_SYMBOL_GPL(net_cls_subsys_id); >> diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c >> index 7743ea8..0635894 100644 >> --- a/net/sched/cls_cgroup.c >> +++ b/net/sched/cls_cgroup.c >> @@ -44,12 +44,21 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) >> >> if (cgrp->parent) >> cs->classid = cgrp_cls_state(cgrp->parent)->classid; >> +#if IS_MODULE(CONFIG_NET_CLS_CGROUP) >> + else if (!clscg_enabled) >> + static_key_slow_inc(&cgroup_cls_enabled); > This is racy I think. The read of the static key is atomic with other reads, > but the entire conditional is not atomic. If two cpus were creating cgroups in > parallel, it would be possible for both to read the static key as being zero > (the second cpu would read the key before the first cpu could increment it). static_key_slow_inc() is called only when we're creating the root cgroup, and that's module loading. so it's safe. >> +#endif >> >> return &cs->css; >> } >> >> static void cgrp_destroy(struct cgroup *cgrp) >> { >> +#if IS_MODULE(CONFIG_NET_CLS_CGROUP) >> + if (!cgrp->parent && clscg_enabled) >> + static_key_slow_dec(&cgroup_cls_enabled); > Ditto here with the race above. I think what you want is one of: and this is called only when we're destroying the root cgroup, and that's module unload. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Aug 20, 2012 at 08:59:37AM +0800, Li Zefan wrote: > 于 2012/8/18 2:28, Neil Horman 写道: > > On Fri, Aug 17, 2012 at 04:58:12PM +0200, Daniel Wagner wrote: > >> From: Daniel Wagner <daniel.wagner@bmw-carit.de> > >> > >> The module version of task_cls_classid() checks if net_cls_sbusys_id > >> is valid to indentify when it is okay to access the controller. > >> > >> Instead relying on the subusys_id to be set, make it explicit > >> with a jump label. > >> > >> Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de> > >> Cc: "David S. Miller" <davem@davemloft.net> > >> Cc: Gao feng <gaofeng@cn.fujitsu.com> > >> Cc: Jamal Hadi Salim <jhs@mojatatu.com> > >> Cc: John Fastabend <john.r.fastabend@intel.com> > >> Cc: Li Zefan <lizefan@huawei.com> > >> Cc: Neil Horman <nhorman@tuxdriver.com> > >> Cc: Tejun Heo <tj@kernel.org> > >> Cc: netdev@vger.kernel.org > >> Cc: cgroups@vger.kernel.org > >> --- > >> include/net/cls_cgroup.h | 5 ++++- > >> net/core/sock.c | 5 +++++ > >> net/sched/cls_cgroup.c | 9 +++++++++ > >> 3 files changed, 18 insertions(+), 1 deletion(-) > >> > >> diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h > >> index 401672c..bbbd957 100644 > >> --- a/include/net/cls_cgroup.h > >> +++ b/include/net/cls_cgroup.h > >> @@ -16,6 +16,7 @@ > >> #include <linux/cgroup.h> > >> #include <linux/hardirq.h> > >> #include <linux/rcupdate.h> > >> +#include <linux/jump_label.h> > >> > >> #ifdef CONFIG_CGROUPS > >> struct cgroup_cls_state > >> @@ -44,6 +45,8 @@ static inline u32 task_cls_classid(struct task_struct *p) > >> } > >> > >> #elif IS_MODULE(CONFIG_NET_CLS_CGROUP) > >> +extern struct static_key cgroup_cls_enabled; > >> +#define clscg_enabled static_key_false(&cgroup_cls_enabled) > >> > >> extern int net_cls_subsys_id; > >> > >> @@ -52,7 +55,7 @@ static inline u32 task_cls_classid(struct task_struct *p) > >> int id; > >> u32 classid = 0; > >> > >> - if (in_interrupt()) > >> + if (!clscg_enabled || in_interrupt()) > >> return 0; > >> > >> rcu_read_lock(); > >> diff --git a/net/core/sock.c b/net/core/sock.c > >> index 8f67ced..8106e77 100644 > >> --- a/net/core/sock.c > >> +++ b/net/core/sock.c > >> @@ -327,6 +327,11 @@ int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) > >> EXPORT_SYMBOL(__sk_backlog_rcv); > >> > >> #if defined(CONFIG_CGROUPS) > >> +#if IS_MODULE(CONFIG_NET_CLS_CGROUP) > >> +struct static_key cgroup_cls_enabled = STATIC_KEY_INIT_FALSE; > >> +EXPORT_SYMBOL_GPL(cgroup_cls_enabled); > >> +#endif > >> + > >> #if !defined(CONFIG_NET_CLS_CGROUP) > >> int net_cls_subsys_id = -1; > >> EXPORT_SYMBOL_GPL(net_cls_subsys_id); > >> diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c > >> index 7743ea8..0635894 100644 > >> --- a/net/sched/cls_cgroup.c > >> +++ b/net/sched/cls_cgroup.c > >> @@ -44,12 +44,21 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) > >> > >> if (cgrp->parent) > >> cs->classid = cgrp_cls_state(cgrp->parent)->classid; > >> +#if IS_MODULE(CONFIG_NET_CLS_CGROUP) > >> + else if (!clscg_enabled) > >> + static_key_slow_inc(&cgroup_cls_enabled); > > This is racy I think. The read of the static key is atomic with other reads, > > but the entire conditional is not atomic. If two cpus were creating cgroups in > > parallel, it would be possible for both to read the static key as being zero > > (the second cpu would read the key before the first cpu could increment it). > > static_key_slow_inc() is called only when we're creating the root cgroup, and that's > module loading. > > so it's safe. > What makes you say that? It appears to me that we call ss->create (and potential ss->destroy, depending on failure conditions) from cgroup_create, which in turn is called from cgroup_mkdir, which is called for every cgroup instance created, not just the root cgroup. Neil -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Aug 17, 2012 at 02:28:55PM -0400, Neil Horman wrote: > On Fri, Aug 17, 2012 at 04:58:12PM +0200, Daniel Wagner wrote: > > From: Daniel Wagner <daniel.wagner@bmw-carit.de> > > > > The module version of task_cls_classid() checks if net_cls_sbusys_id > > is valid to indentify when it is okay to access the controller. > > > > Instead relying on the subusys_id to be set, make it explicit > > with a jump label. > > > > Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de> > > Cc: "David S. Miller" <davem@davemloft.net> > > Cc: Gao feng <gaofeng@cn.fujitsu.com> > > Cc: Jamal Hadi Salim <jhs@mojatatu.com> > > Cc: John Fastabend <john.r.fastabend@intel.com> > > Cc: Li Zefan <lizefan@huawei.com> > > Cc: Neil Horman <nhorman@tuxdriver.com> > > Cc: Tejun Heo <tj@kernel.org> > > Cc: netdev@vger.kernel.org > > Cc: cgroups@vger.kernel.org > > --- > > include/net/cls_cgroup.h | 5 ++++- > > net/core/sock.c | 5 +++++ > > net/sched/cls_cgroup.c | 9 +++++++++ > > 3 files changed, 18 insertions(+), 1 deletion(-) > > > > diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h > > index 401672c..bbbd957 100644 > > --- a/include/net/cls_cgroup.h > > +++ b/include/net/cls_cgroup.h > > @@ -16,6 +16,7 @@ > > #include <linux/cgroup.h> > > #include <linux/hardirq.h> > > #include <linux/rcupdate.h> > > +#include <linux/jump_label.h> > > > > #ifdef CONFIG_CGROUPS > > struct cgroup_cls_state > > @@ -44,6 +45,8 @@ static inline u32 task_cls_classid(struct task_struct *p) > > } > > > > #elif IS_MODULE(CONFIG_NET_CLS_CGROUP) > > +extern struct static_key cgroup_cls_enabled; > > +#define clscg_enabled static_key_false(&cgroup_cls_enabled) > > > > extern int net_cls_subsys_id; > > > > @@ -52,7 +55,7 @@ static inline u32 task_cls_classid(struct task_struct *p) > > int id; > > u32 classid = 0; > > > > - if (in_interrupt()) > > + if (!clscg_enabled || in_interrupt()) > > return 0; > > > > rcu_read_lock(); > > diff --git a/net/core/sock.c b/net/core/sock.c > > index 8f67ced..8106e77 100644 > > --- a/net/core/sock.c > > +++ b/net/core/sock.c > > @@ -327,6 +327,11 @@ int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) > > EXPORT_SYMBOL(__sk_backlog_rcv); > > > > #if defined(CONFIG_CGROUPS) > > +#if IS_MODULE(CONFIG_NET_CLS_CGROUP) > > +struct static_key cgroup_cls_enabled = STATIC_KEY_INIT_FALSE; > > +EXPORT_SYMBOL_GPL(cgroup_cls_enabled); > > +#endif > > + > > #if !defined(CONFIG_NET_CLS_CGROUP) > > int net_cls_subsys_id = -1; > > EXPORT_SYMBOL_GPL(net_cls_subsys_id); > > diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c > > index 7743ea8..0635894 100644 > > --- a/net/sched/cls_cgroup.c > > +++ b/net/sched/cls_cgroup.c > > @@ -44,12 +44,21 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) > > > > if (cgrp->parent) > > cs->classid = cgrp_cls_state(cgrp->parent)->classid; > > +#if IS_MODULE(CONFIG_NET_CLS_CGROUP) > > + else if (!clscg_enabled) > > + static_key_slow_inc(&cgroup_cls_enabled); > This is racy I think. The read of the static key is atomic with other reads, > but the entire conditional is not atomic. If two cpus were creating cgroups in > parallel, it would be possible for both to read the static key as being zero > (the second cpu would read the key before the first cpu could increment it). D'oh, That is racy. > > +#endif > > > > return &cs->css; > > } > > > > static void cgrp_destroy(struct cgroup *cgrp) > > { > > +#if IS_MODULE(CONFIG_NET_CLS_CGROUP) > > + if (!cgrp->parent && clscg_enabled) > > + static_key_slow_dec(&cgroup_cls_enabled); > Ditto here with the race above. I think what you want is one of: > > 1) Use static_key_slow_[inc|dec] unconditionally While the static_key_slow_inc() case will work, I am not so sure about the static_key_slow_dec(), e.g. we could still access inside task_cls_classid() a destroyed container. > 2) Keep a separate internal counter to track the number of cgroup instances > so that you only inc the static key on the first create and dec it on the last > delete. If I got you right, than this would not be different then direclty using static_key_slow_[inc|dec]. > I would think (1) would be sufficent. It looks like static_key_slow_inc uses > atomic_inc_not_zero to just do an inc anyway in the event that multiple inc > events are made. Would something like this work? static inline u32 task_cls_classid(struct task_struct *p) { u32 classid; struct cgroup_cls_state *css; if (!clscg_enabled || in_interrupt()) return 0; rcu_read_lock(); css = container_of(task_subsys_state(p, net_cls_subsys_id), struct cgroup_cls_state, css); if (!css) classid = css->classid; else classid = 0; rcu_read_unlock(); return classid; } Daniel -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 08/20/2012 03:29 PM, Daniel Wagner wrote: >> This is racy I think. The read of the static key is atomic with other reads, >> > but the entire conditional is not atomic. If two cpus were creating cgroups in >> > parallel, it would be possible for both to read the static key as being zero >> > (the second cpu would read the key before the first cpu could increment it). > D'oh, That is racy. > Take a look at how we solve this particular problem in memcg. By using a pair of bits meaning "ever active" and "currently active", we can avoid problems with the static key increments. Nothing bad can't happen by increasing the static key more than once; the problem is that you need to somehow take note of that to make sure you decrement the as many times you incremented when you release it. Two other important things to keep in mind while dealing with static branches: * You can't increase/decrease while holding the cgroup_lock. lockdep may trigger, because the cgroup_lock holds the cpu_hotplug lock, that the static branches update path will also take. (due to cpusets). Doing a logical hotplug will stress this path, and make the problem visible. Take a look at disarm_sock_keys() (mm/memcontrol.c) to see how we solve this particular problem. * If you have code in more than one call site, the update among them is not atomic. Not sure if this one applies to your case, but it can lead you to some very unpleasant to debug problems. We use one of those two bits I mentioned ("currently active") to make sure objects are not marked before all sites are already patched. Hope our previous experience with this can help you. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Aug 20, 2012 at 01:29:38PM +0200, Daniel Wagner wrote: > On Fri, Aug 17, 2012 at 02:28:55PM -0400, Neil Horman wrote: > > On Fri, Aug 17, 2012 at 04:58:12PM +0200, Daniel Wagner wrote: > > > From: Daniel Wagner <daniel.wagner@bmw-carit.de> > > > > > > The module version of task_cls_classid() checks if net_cls_sbusys_id > > > is valid to indentify when it is okay to access the controller. > > > > > > Instead relying on the subusys_id to be set, make it explicit > > > with a jump label. > > > > > > Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de> > > > Cc: "David S. Miller" <davem@davemloft.net> > > > Cc: Gao feng <gaofeng@cn.fujitsu.com> > > > Cc: Jamal Hadi Salim <jhs@mojatatu.com> > > > Cc: John Fastabend <john.r.fastabend@intel.com> > > > Cc: Li Zefan <lizefan@huawei.com> > > > Cc: Neil Horman <nhorman@tuxdriver.com> > > > Cc: Tejun Heo <tj@kernel.org> > > > Cc: netdev@vger.kernel.org > > > Cc: cgroups@vger.kernel.org > > > --- > > > include/net/cls_cgroup.h | 5 ++++- > > > net/core/sock.c | 5 +++++ > > > net/sched/cls_cgroup.c | 9 +++++++++ > > > 3 files changed, 18 insertions(+), 1 deletion(-) > > > > > > diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h > > > index 401672c..bbbd957 100644 > > > --- a/include/net/cls_cgroup.h > > > +++ b/include/net/cls_cgroup.h > > > @@ -16,6 +16,7 @@ > > > #include <linux/cgroup.h> > > > #include <linux/hardirq.h> > > > #include <linux/rcupdate.h> > > > +#include <linux/jump_label.h> > > > > > > #ifdef CONFIG_CGROUPS > > > struct cgroup_cls_state > > > @@ -44,6 +45,8 @@ static inline u32 task_cls_classid(struct task_struct *p) > > > } > > > > > > #elif IS_MODULE(CONFIG_NET_CLS_CGROUP) > > > +extern struct static_key cgroup_cls_enabled; > > > +#define clscg_enabled static_key_false(&cgroup_cls_enabled) > > > > > > extern int net_cls_subsys_id; > > > > > > @@ -52,7 +55,7 @@ static inline u32 task_cls_classid(struct task_struct *p) > > > int id; > > > u32 classid = 0; > > > > > > - if (in_interrupt()) > > > + if (!clscg_enabled || in_interrupt()) > > > return 0; > > > > > > rcu_read_lock(); > > > diff --git a/net/core/sock.c b/net/core/sock.c > > > index 8f67ced..8106e77 100644 > > > --- a/net/core/sock.c > > > +++ b/net/core/sock.c > > > @@ -327,6 +327,11 @@ int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) > > > EXPORT_SYMBOL(__sk_backlog_rcv); > > > > > > #if defined(CONFIG_CGROUPS) > > > +#if IS_MODULE(CONFIG_NET_CLS_CGROUP) > > > +struct static_key cgroup_cls_enabled = STATIC_KEY_INIT_FALSE; > > > +EXPORT_SYMBOL_GPL(cgroup_cls_enabled); > > > +#endif > > > + > > > #if !defined(CONFIG_NET_CLS_CGROUP) > > > int net_cls_subsys_id = -1; > > > EXPORT_SYMBOL_GPL(net_cls_subsys_id); > > > diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c > > > index 7743ea8..0635894 100644 > > > --- a/net/sched/cls_cgroup.c > > > +++ b/net/sched/cls_cgroup.c > > > @@ -44,12 +44,21 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) > > > > > > if (cgrp->parent) > > > cs->classid = cgrp_cls_state(cgrp->parent)->classid; > > > +#if IS_MODULE(CONFIG_NET_CLS_CGROUP) > > > + else if (!clscg_enabled) > > > + static_key_slow_inc(&cgroup_cls_enabled); > > This is racy I think. The read of the static key is atomic with other reads, > > but the entire conditional is not atomic. If two cpus were creating cgroups in > > parallel, it would be possible for both to read the static key as being zero > > (the second cpu would read the key before the first cpu could increment it). > > D'oh, That is racy. > > > > +#endif > > > > > > return &cs->css; > > > } > > > > > > static void cgrp_destroy(struct cgroup *cgrp) > > > { > > > +#if IS_MODULE(CONFIG_NET_CLS_CGROUP) > > > + if (!cgrp->parent && clscg_enabled) > > > + static_key_slow_dec(&cgroup_cls_enabled); > > Ditto here with the race above. I think what you want is one of: > > > > 1) Use static_key_slow_[inc|dec] unconditionally > > While the static_key_slow_inc() case will work, I am not so sure about > the static_key_slow_dec(), e.g. we could still access inside > task_cls_classid() a destroyed container. > Possibly, yes, I think. > > 2) Keep a separate internal counter to track the number of cgroup instances > > so that you only inc the static key on the first create and dec it on the last > > delete. > > If I got you right, than this would not be different then direclty using > static_key_slow_[inc|dec]. > As long as a cgroup subsystems ->destroy method is only called when the subsystem is being removed, then I think thats correct. I'm not 100% sure thats the case though. > > I would think (1) would be sufficent. It looks like static_key_slow_inc uses > > atomic_inc_not_zero to just do an inc anyway in the event that multiple inc > > events are made. > > Would something like this work? > I think so yes, assuming that you also make the slow_inc|dec changes > static inline u32 task_cls_classid(struct task_struct *p) > { > u32 classid; > struct cgroup_cls_state *css; > > if (!clscg_enabled || in_interrupt()) > return 0; > > rcu_read_lock(); > css = container_of(task_subsys_state(p, net_cls_subsys_id), > struct cgroup_cls_state, css); > if (!css) > classid = css->classid; > else > classid = 0; > rcu_read_unlock(); > > return classid; > } > > Daniel > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 08/20/2012 09:03 PM, Neil Horman wrote: >>> 2) Keep a separate internal counter to track the number of cgroup instances >>> > > so that you only inc the static key on the first create and dec it on the last >>> > > delete. >> > >> > If I got you right, than this would not be different then direclty using >> > static_key_slow_[inc|dec]. >> > > As long as a cgroup subsystems ->destroy method is only called when the > subsystem is being removed, then I think thats correct. I'm not 100% sure thats > the case though. > THAT is correct, but not the call itself. ->destroy() is called with the cgroup_lock held, and there is a lockdep dependency created by cpuset that prevents the cpu_hotplug lock, taken by static branch updates, to be taken inside cgroup_lock. So unless cpuset is fixed - which is a major work, we can't do static_branch updates while holding the cgroup_lock. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> > So there is no need to move the enabling/disabling of the static branch > at this point to create/destroy. > That will surely save you some trouble. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 21.08.2012 11:12, Glauber Costa wrote: > On 08/20/2012 09:03 PM, Neil Horman wrote: >>>> 2) Keep a separate internal counter to track the number of cgroup instances >>>>>> so that you only inc the static key on the first create and dec it on the last >>>>>> delete. >>>> >>>> If I got you right, than this would not be different then direclty using >>>> static_key_slow_[inc|dec]. >>>> >> As long as a cgroup subsystems ->destroy method is only called when the >> subsystem is being removed, then I think thats correct. I'm not 100% sure thats >> the case though. >> > THAT is correct, but not the call itself. ->destroy() is called with the > cgroup_lock held, and there is a lockdep dependency created by cpuset > that prevents the cpu_hotplug lock, taken by static branch updates, to > be taken inside cgroup_lock. > > So unless cpuset is fixed - which is a major work, we can't do > static_branch updates while holding the cgroup_lock. Thanks a lot for the info on this problem. I have spend the last days trying to figure out how you have solved this in memcg. It looks like complex and fragile. So I rather avoid enabling/disabling the static branch in cgrp_create()/cgroup_destroy() and do it on module init/exit as it is done currently with the id assigment. Initially I wanted to optimize the task_cls_classid() path, so that only when a cgroup really exists we spent time in there. Though as soon the controller is registered to the cgroup core, the code path is enabled. That means when cls is builtin it is used all the time (even though no one is using it). In the module case, it is the same. task_cls_classid() path is used right after module init. So there is no need to move the enabling/disabling of the static branch at this point to create/destroy. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index 401672c..bbbd957 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -16,6 +16,7 @@ #include <linux/cgroup.h> #include <linux/hardirq.h> #include <linux/rcupdate.h> +#include <linux/jump_label.h> #ifdef CONFIG_CGROUPS struct cgroup_cls_state @@ -44,6 +45,8 @@ static inline u32 task_cls_classid(struct task_struct *p) } #elif IS_MODULE(CONFIG_NET_CLS_CGROUP) +extern struct static_key cgroup_cls_enabled; +#define clscg_enabled static_key_false(&cgroup_cls_enabled) extern int net_cls_subsys_id; @@ -52,7 +55,7 @@ static inline u32 task_cls_classid(struct task_struct *p) int id; u32 classid = 0; - if (in_interrupt()) + if (!clscg_enabled || in_interrupt()) return 0; rcu_read_lock(); diff --git a/net/core/sock.c b/net/core/sock.c index 8f67ced..8106e77 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -327,6 +327,11 @@ int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) EXPORT_SYMBOL(__sk_backlog_rcv); #if defined(CONFIG_CGROUPS) +#if IS_MODULE(CONFIG_NET_CLS_CGROUP) +struct static_key cgroup_cls_enabled = STATIC_KEY_INIT_FALSE; +EXPORT_SYMBOL_GPL(cgroup_cls_enabled); +#endif + #if !defined(CONFIG_NET_CLS_CGROUP) int net_cls_subsys_id = -1; EXPORT_SYMBOL_GPL(net_cls_subsys_id); diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 7743ea8..0635894 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -44,12 +44,21 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) if (cgrp->parent) cs->classid = cgrp_cls_state(cgrp->parent)->classid; +#if IS_MODULE(CONFIG_NET_CLS_CGROUP) + else if (!clscg_enabled) + static_key_slow_inc(&cgroup_cls_enabled); +#endif return &cs->css; } static void cgrp_destroy(struct cgroup *cgrp) { +#if IS_MODULE(CONFIG_NET_CLS_CGROUP) + if (!cgrp->parent && clscg_enabled) + static_key_slow_dec(&cgroup_cls_enabled); +#endif + kfree(cgrp_cls_state(cgrp)); }