Message ID | 1276004075.2987.208.camel@twins |
---|---|
State | Not Applicable, archived |
Delegated to: | David Miller |
Headers | show |
On Tue, Jun 8, 2010 at 9:34 AM, Peter Zijlstra <peterz@infradead.org> wrote: > On Tue, 2010-06-08 at 09:14 -0400, Miles Lane wrote: > >> CC kernel/sched.o >> kernel/sched.c: In function ‘task_group’: >> kernel/sched.c:321: error: implicit declaration of function ‘task_rq’ >> kernel/sched.c:321: error: invalid type argument of ‘->’ (have ‘int’) >> make[1]: *** [kernel/sched.o] Error 1 >> >> I had to apply with fuzz. Did it mess up? > > > No, I probably did.. task_rq() is defined on line 636 or thereabouts, > and this function landed around line 320. > > Ahh, and it compiled here because I have CGROUP_SCHED=y, but > PROVE_RCU=n, so that whole check expression disappears and is never > evaluated... > > /me fixes > > --- > Subject: sched: PROVE_RCU vs cpu_cgroup > From: Peter Zijlstra <a.p.zijlstra@chello.nl> > Date: Tue Jun 08 11:40:42 CEST 2010 > > PROVE_RCU has a few issues with the cpu_cgroup because the scheduler > typically holds rq->lock around the css rcu derefs but the generic > cgroup code doesn't (and can't) know about that lock. > > Provide means to add extra checks to the css dereference and use that > in the scheduler to annotate its users. > > The addition of rq->lock to these checks is correct because the > cgroup_subsys::attach() method takes the rq->lock for each task it > moves, therefore by holding that lock, we ensure the task is pinned to > the current cgroup and the RCU dereference is valid. > > That leaves one genuine race in __sched_setscheduler() where we used > task_group() without holding any of the required locks and thus raced > with the cgroup code. Solve this by moving the check under the rq->lock. > > Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> > --- > include/linux/cgroup.h | 20 +++++--- > kernel/sched.c | 115 +++++++++++++++++++++++++------------------------ > 2 files changed, 73 insertions(+), 62 deletions(-) > > Index: linux-2.6/include/linux/cgroup.h > =================================================================== > --- linux-2.6.orig/include/linux/cgroup.h > +++ linux-2.6/include/linux/cgroup.h > @@ -525,13 +525,21 @@ static inline struct cgroup_subsys_state > return cgrp->subsys[subsys_id]; > } > > -static inline struct cgroup_subsys_state *task_subsys_state( > - struct task_struct *task, int subsys_id) > +/* > + * function to get the cgroup_subsys_state which allows for extra > + * rcu_dereference_check() conditions, such as locks used during the > + * cgroup_subsys::attach() methods. > + */ > +#define task_subsys_state_check(task, subsys_id, __c) \ > + rcu_dereference_check(task->cgroups->subsys[subsys_id], \ > + rcu_read_lock_held() || \ > + lockdep_is_held(&task->alloc_lock) || \ > + cgroup_lock_is_held() || (__c)) > + > +static inline struct cgroup_subsys_state * > +task_subsys_state(struct task_struct *task, int subsys_id) > { > - return rcu_dereference_check(task->cgroups->subsys[subsys_id], > - rcu_read_lock_held() || > - lockdep_is_held(&task->alloc_lock) || > - cgroup_lock_is_held()); > + return task_subsys_state_check(task, subsys_id, false); > } > > static inline struct cgroup* task_cgroup(struct task_struct *task, > Index: linux-2.6/kernel/sched.c > =================================================================== > --- linux-2.6.orig/kernel/sched.c > +++ linux-2.6/kernel/sched.c > @@ -306,52 +306,6 @@ static int init_task_group_load = INIT_T > */ > struct task_group init_task_group; > > -/* return group to which a task belongs */ > -static inline struct task_group *task_group(struct task_struct *p) > -{ > - struct task_group *tg; > - > -#ifdef CONFIG_CGROUP_SCHED > - tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), > - struct task_group, css); > -#else > - tg = &init_task_group; > -#endif > - return tg; > -} > - > -/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ > -static inline void set_task_rq(struct task_struct *p, unsigned int cpu) > -{ > - /* > - * Strictly speaking this rcu_read_lock() is not needed since the > - * task_group is tied to the cgroup, which in turn can never go away > - * as long as there are tasks attached to it. > - * > - * However since task_group() uses task_subsys_state() which is an > - * rcu_dereference() user, this quiets CONFIG_PROVE_RCU. > - */ > - rcu_read_lock(); > -#ifdef CONFIG_FAIR_GROUP_SCHED > - p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; > - p->se.parent = task_group(p)->se[cpu]; > -#endif > - > -#ifdef CONFIG_RT_GROUP_SCHED > - p->rt.rt_rq = task_group(p)->rt_rq[cpu]; > - p->rt.parent = task_group(p)->rt_se[cpu]; > -#endif > - rcu_read_unlock(); > -} > - > -#else > - > -static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } > -static inline struct task_group *task_group(struct task_struct *p) > -{ > - return NULL; > -} > - > #endif /* CONFIG_CGROUP_SCHED */ > > /* CFS-related fields in a runqueue */ > @@ -644,6 +598,49 @@ static inline int cpu_of(struct rq *rq) > #define cpu_curr(cpu) (cpu_rq(cpu)->curr) > #define raw_rq() (&__raw_get_cpu_var(runqueues)) > > +#ifdef CONFIG_CGROUP_SCHED > + > +/* > + * Return the group to which this tasks belongs. > + * > + * We use task_subsys_state_check() and extend the RCU verification > + * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach() > + * holds that lock for each task it moves into the cgroup. Therefore > + * by holding that lock, we pin the task to the current cgroup. > + */ > +static inline struct task_group *task_group(struct task_struct *p) > +{ > + struct cgroup_subsys_state *css; > + > + css = task_subsys_state_check(p, cpu_cgroup_subsys_id, > + lockdep_is_held(&task_rq(p)->lock)); > + return container_of(css, struct task_group, css); > +} > + > +/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ > +static inline void set_task_rq(struct task_struct *p, unsigned int cpu) > +{ > +#ifdef CONFIG_FAIR_GROUP_SCHED > + p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; > + p->se.parent = task_group(p)->se[cpu]; > +#endif > + > +#ifdef CONFIG_RT_GROUP_SCHED > + p->rt.rt_rq = task_group(p)->rt_rq[cpu]; > + p->rt.parent = task_group(p)->rt_se[cpu]; > +#endif > +} > + > +#else /* CONFIG_CGROUP_SCHED */ > + > +static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } > +static inline struct task_group *task_group(struct task_struct *p) > +{ > + return NULL; > +} > + > +#endif /* CONFIG_CGROUP_SCHED */ > + > inline void update_rq_clock(struct rq *rq) > { > if (!rq->skip_clock_update) > @@ -4465,16 +4462,6 @@ recheck: > } > > if (user) { > -#ifdef CONFIG_RT_GROUP_SCHED > - /* > - * Do not allow realtime tasks into groups that have no runtime > - * assigned. > - */ > - if (rt_bandwidth_enabled() && rt_policy(policy) && > - task_group(p)->rt_bandwidth.rt_runtime == 0) > - return -EPERM; > -#endif > - > retval = security_task_setscheduler(p, policy, param); > if (retval) > return retval; > @@ -4490,6 +4477,22 @@ recheck: > * runqueue lock must be held. > */ > rq = __task_rq_lock(p); > + > +#ifdef CONFIG_RT_GROUP_SCHED > + if (user) { > + /* > + * Do not allow realtime tasks into groups that have no runtime > + * assigned. > + */ > + if (rt_bandwidth_enabled() && rt_policy(policy) && > + task_group(p)->rt_bandwidth.rt_runtime == 0) { > + __task_rq_unlock(rq); > + raw_spin_unlock_irqrestore(&p->pi_lock, flags); > + return -EPERM; > + } > + } > +#endif > + > /* recheck policy now with rq lock held */ > if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { > policy = oldpolicy = -1; > > Sorry. I misunderstood this message when I first read it. I didn't realize this message include a new version of the patch. Anyhow, I just tried to apply the patch to 2.6.35-rc2-git3 and got this: # patch -p1 -l -F 20 --dry-run < ../5.patch patching file include/linux/cgroup.h patching file kernel/sched.c Hunk #1 succeeded at 306 with fuzz 1. Hunk #3 FAILED at 4462. Hunk #4 succeeded at 4487 with fuzz 3. 1 out of 4 hunks FAILED -- saving rejects to file kernel/sched.c.rej -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 2010-06-09 at 11:11 -0400, Miles Lane wrote: > > Sorry. I misunderstood this message when I first read it. I didn't > realize this message include a new version of the patch. > Anyhow, I just tried to apply the patch to 2.6.35-rc2-git3 and got this: > > # patch -p1 -l -F 20 --dry-run < ../5.patch > patching file include/linux/cgroup.h > patching file kernel/sched.c > Hunk #1 succeeded at 306 with fuzz 1. > Hunk #3 FAILED at 4462. > Hunk #4 succeeded at 4487 with fuzz 3. > 1 out of 4 hunks FAILED -- saving rejects to file kernel/sched.c.rej Weird.. it seems to apply without trouble to Linus' git tree. root@twins:/usr/src/linux-2.6# git checkout -f origin/master HEAD is now at 84f7586... Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394-2.6 root@twins:/usr/src/linux-2.6# quilt push Applying patch patches/sched-rcu-validation.patch patching file include/linux/cgroup.h patching file kernel/sched.c Now at patch patches/sched-rcu-validation.patch root@twins:/usr/src/linux-2.6# git describe v2.6.35-rc2-54-g84f7586 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Jun 9, 2010 at 11:24 AM, Peter Zijlstra <peterz@infradead.org> wrote: > On Wed, 2010-06-09 at 11:11 -0400, Miles Lane wrote: >> >> Sorry. I misunderstood this message when I first read it. I didn't >> realize this message include a new version of the patch. >> Anyhow, I just tried to apply the patch to 2.6.35-rc2-git3 and got this: >> >> # patch -p1 -l -F 20 --dry-run < ../5.patch >> patching file include/linux/cgroup.h >> patching file kernel/sched.c >> Hunk #1 succeeded at 306 with fuzz 1. >> Hunk #3 FAILED at 4462. >> Hunk #4 succeeded at 4487 with fuzz 3. >> 1 out of 4 hunks FAILED -- saving rejects to file kernel/sched.c.rej > > Weird.. it seems to apply without trouble to Linus' git tree. > > root@twins:/usr/src/linux-2.6# git checkout -f origin/master > HEAD is now at 84f7586... Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394-2.6 > root@twins:/usr/src/linux-2.6# quilt push > Applying patch patches/sched-rcu-validation.patch > patching file include/linux/cgroup.h > patching file kernel/sched.c > > Now at patch patches/sched-rcu-validation.patch > root@twins:/usr/src/linux-2.6# git describe > v2.6.35-rc2-54-g84f7586 Oh. Duh. I know what is going on. I had received another patch to sched.c. They must conflict. I will test with just your patch now. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Jun 9, 2010 at 11:29 AM, Miles Lane <miles.lane@gmail.com> wrote: > On Wed, Jun 9, 2010 at 11:24 AM, Peter Zijlstra <peterz@infradead.org> wrote: >> On Wed, 2010-06-09 at 11:11 -0400, Miles Lane wrote: >>> >>> Sorry. I misunderstood this message when I first read it. I didn't >>> realize this message include a new version of the patch. >>> Anyhow, I just tried to apply the patch to 2.6.35-rc2-git3 and got this: >>> >>> # patch -p1 -l -F 20 --dry-run < ../5.patch >>> patching file include/linux/cgroup.h >>> patching file kernel/sched.c >>> Hunk #1 succeeded at 306 with fuzz 1. >>> Hunk #3 FAILED at 4462. >>> Hunk #4 succeeded at 4487 with fuzz 3. >>> 1 out of 4 hunks FAILED -- saving rejects to file kernel/sched.c.rej >> >> Weird.. it seems to apply without trouble to Linus' git tree. >> >> root@twins:/usr/src/linux-2.6# git checkout -f origin/master >> HEAD is now at 84f7586... Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394-2.6 >> root@twins:/usr/src/linux-2.6# quilt push >> Applying patch patches/sched-rcu-validation.patch >> patching file include/linux/cgroup.h >> patching file kernel/sched.c >> >> Now at patch patches/sched-rcu-validation.patch >> root@twins:/usr/src/linux-2.6# git describe >> v2.6.35-rc2-54-g84f7586 > > Oh. Duh. I know what is going on. I had received another patch to > sched.c. They must conflict. I will test with just your patch now. > ACPI: Core revision 20100428 [ 0.061088] [ 0.061090] =================================================== [ 0.062009] [ INFO: suspicious rcu_dereference_check() usage. ] [ 0.062138] --------------------------------------------------- [ 0.062268] kernel/sched.c:616 invoked rcu_dereference_check() without protection! [ 0.062470] [ 0.062471] other info that might help us debug this: [ 0.062472] [ 0.062835] [ 0.062836] rcu_scheduler_active = 1, debug_locks = 1 [ 0.063009] no locks held by swapper/0. [ 0.063134] [ 0.063135] stack backtrace: [ 0.063378] Pid: 0, comm: swapper Not tainted 2.6.35-rc2-git3 #3 [ 0.063507] Call Trace: [ 0.063638] [<ffffffff81072205>] lockdep_rcu_dereference+0x9d/0xa5 [ 0.063773] [<ffffffff810379f9>] task_group+0x7b/0x8a [ 0.064012] [<ffffffff81037a1d>] set_task_rq+0x15/0x6e [ 0.064143] [<ffffffff8103e50f>] set_task_cpu+0xa9/0xba [ 0.064274] [<ffffffff81042dbb>] sched_fork+0x10a/0x1b3 [ 0.064405] [<ffffffff810446f9>] copy_process+0x617/0x10e6 [ 0.064537] [<ffffffff8104533d>] do_fork+0x175/0x39b [ 0.064670] [<ffffffff8106589b>] ? up+0xf/0x39 [ 0.064800] [<ffffffff8106589b>] ? up+0xf/0x39 [ 0.065013] [<ffffffff811dbf73>] ? do_raw_spin_lock+0x79/0x13e [ 0.065148] [<ffffffff81011526>] kernel_thread+0x70/0x72 [ 0.065279] [<ffffffff816cc5e4>] ? kernel_init+0x0/0x1ce [ 0.065411] [<ffffffff8100aba0>] ? kernel_thread_helper+0x0/0x10 [ 0.065545] [<ffffffff81096bea>] ? rcu_scheduler_starting+0x2a/0x4c [ 0.065679] [<ffffffff813a8a4d>] rest_init+0x21/0xde [ 0.065810] [<ffffffff816cce28>] start_kernel+0x448/0x453 [ 0.066013] [<ffffffff816cc2c8>] x86_64_start_reservations+0xb3/0xb7 [ 0.066148] [<ffffffff816cc418>] x86_64_start_kernel+0x14c/0x15b [ 0.066499] Setting APIC routing to flat -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Index: linux-2.6/include/linux/cgroup.h =================================================================== --- linux-2.6.orig/include/linux/cgroup.h +++ linux-2.6/include/linux/cgroup.h @@ -525,13 +525,21 @@ static inline struct cgroup_subsys_state return cgrp->subsys[subsys_id]; } -static inline struct cgroup_subsys_state *task_subsys_state( - struct task_struct *task, int subsys_id) +/* + * function to get the cgroup_subsys_state which allows for extra + * rcu_dereference_check() conditions, such as locks used during the + * cgroup_subsys::attach() methods. + */ +#define task_subsys_state_check(task, subsys_id, __c) \ + rcu_dereference_check(task->cgroups->subsys[subsys_id], \ + rcu_read_lock_held() || \ + lockdep_is_held(&task->alloc_lock) || \ + cgroup_lock_is_held() || (__c)) + +static inline struct cgroup_subsys_state * +task_subsys_state(struct task_struct *task, int subsys_id) { - return rcu_dereference_check(task->cgroups->subsys[subsys_id], - rcu_read_lock_held() || - lockdep_is_held(&task->alloc_lock) || - cgroup_lock_is_held()); + return task_subsys_state_check(task, subsys_id, false); } static inline struct cgroup* task_cgroup(struct task_struct *task, Index: linux-2.6/kernel/sched.c =================================================================== --- linux-2.6.orig/kernel/sched.c +++ linux-2.6/kernel/sched.c @@ -306,52 +306,6 @@ static int init_task_group_load = INIT_T */ struct task_group init_task_group; -/* return group to which a task belongs */ -static inline struct task_group *task_group(struct task_struct *p) -{ - struct task_group *tg; - -#ifdef CONFIG_CGROUP_SCHED - tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), - struct task_group, css); -#else - tg = &init_task_group; -#endif - return tg; -} - -/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ -static inline void set_task_rq(struct task_struct *p, unsigned int cpu) -{ - /* - * Strictly speaking this rcu_read_lock() is not needed since the - * task_group is tied to the cgroup, which in turn can never go away - * as long as there are tasks attached to it. - * - * However since task_group() uses task_subsys_state() which is an - * rcu_dereference() user, this quiets CONFIG_PROVE_RCU. - */ - rcu_read_lock(); -#ifdef CONFIG_FAIR_GROUP_SCHED - p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; - p->se.parent = task_group(p)->se[cpu]; -#endif - -#ifdef CONFIG_RT_GROUP_SCHED - p->rt.rt_rq = task_group(p)->rt_rq[cpu]; - p->rt.parent = task_group(p)->rt_se[cpu]; -#endif - rcu_read_unlock(); -} - -#else - -static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } -static inline struct task_group *task_group(struct task_struct *p) -{ - return NULL; -} - #endif /* CONFIG_CGROUP_SCHED */ /* CFS-related fields in a runqueue */ @@ -644,6 +598,49 @@ static inline int cpu_of(struct rq *rq) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) #define raw_rq() (&__raw_get_cpu_var(runqueues)) +#ifdef CONFIG_CGROUP_SCHED + +/* + * Return the group to which this tasks belongs. + * + * We use task_subsys_state_check() and extend the RCU verification + * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach() + * holds that lock for each task it moves into the cgroup. Therefore + * by holding that lock, we pin the task to the current cgroup. + */ +static inline struct task_group *task_group(struct task_struct *p) +{ + struct cgroup_subsys_state *css; + + css = task_subsys_state_check(p, cpu_cgroup_subsys_id, + lockdep_is_held(&task_rq(p)->lock)); + return container_of(css, struct task_group, css); +} + +/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ +static inline void set_task_rq(struct task_struct *p, unsigned int cpu) +{ +#ifdef CONFIG_FAIR_GROUP_SCHED + p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; + p->se.parent = task_group(p)->se[cpu]; +#endif + +#ifdef CONFIG_RT_GROUP_SCHED + p->rt.rt_rq = task_group(p)->rt_rq[cpu]; + p->rt.parent = task_group(p)->rt_se[cpu]; +#endif +} + +#else /* CONFIG_CGROUP_SCHED */ + +static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } +static inline struct task_group *task_group(struct task_struct *p) +{ + return NULL; +} + +#endif /* CONFIG_CGROUP_SCHED */ + inline void update_rq_clock(struct rq *rq) { if (!rq->skip_clock_update) @@ -4465,16 +4462,6 @@ recheck: } if (user) { -#ifdef CONFIG_RT_GROUP_SCHED - /* - * Do not allow realtime tasks into groups that have no runtime - * assigned. - */ - if (rt_bandwidth_enabled() && rt_policy(policy) && - task_group(p)->rt_bandwidth.rt_runtime == 0) - return -EPERM; -#endif - retval = security_task_setscheduler(p, policy, param); if (retval) return retval; @@ -4490,6 +4477,22 @@ recheck: * runqueue lock must be held. */ rq = __task_rq_lock(p); + +#ifdef CONFIG_RT_GROUP_SCHED + if (user) { + /* + * Do not allow realtime tasks into groups that have no runtime + * assigned. + */ + if (rt_bandwidth_enabled() && rt_policy(policy) && + task_group(p)->rt_bandwidth.rt_runtime == 0) { + __task_rq_unlock(rq); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + return -EPERM; + } + } +#endif + /* recheck policy now with rq lock held */ if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { policy = oldpolicy = -1;