Message ID | alpine.LFD.2.00.1305021848450.31548@ja.ssi.bg |
---|---|
State | Not Applicable |
Headers | show |
On Thu, May 02, 2013 at 06:54:05PM +0300, Julian Anastasov wrote: > > Hello, > > On Thu, 2 May 2013, Peter Zijlstra wrote: > > > On Wed, May 01, 2013 at 09:22:08PM +0300, Julian Anastasov wrote: > > > > +extern int __cond_resched_rcu(void); > > > > + > > > > +#define cond_resched_rcu() ({ \ > > > > + __might_sleep(__FILE__, __LINE__, 0); \ > > > > > > I see your goal. But digging into __might_sleep() > > > I see that rcu_sleep_check() will scream for the non-preempt > > > case because we are under rcu_read_lock. > > > > > > #ifdef CONFIG_PREEMPT_RCU > > #define PREEMPT_RCU_OFFSET 0 > > #else > > #define PREEMPT_RCU_OFFSET 1 > > #endif > > > > #define cond_resched_rcu() ({ \ > > __might_sleep(__FILE__, __LINE__, PREEMPT_RCU_OFFSET); \ > > __cond_resched_rcu(); \ > > }) > > > > Should work I think.. > > I implemented your idea. > > I tested the following patch in 2 variants, > TINY_RCU and CONFIG_TREE_PREEMPT_RCU. I see the Could you please also try CONFIG_TREE_RCU? > error if extra rcu_read_lock is added for testing. > > I'm using the PREEMPT_ACTIVE flag to indicate > that we are already under lock. It should work because > __might_sleep is not called with such bit. I also tried to > add new flag in include/linux/hardirq.h but PREEMPT_ACTIVE > depends on the arch, so this alternative looked difficult to > implement. > > include/linux/rcupdate.h | 7 ++++--- > include/linux/sched.h | 14 ++++++++++++++ > kernel/sched/core.c | 20 ++++++++++++++++++-- > 3 files changed, 36 insertions(+), 5 deletions(-) > > diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h > index b758ce1..b594759 100644 > --- a/include/linux/rcupdate.h > +++ b/include/linux/rcupdate.h > @@ -480,9 +480,10 @@ static inline void rcu_preempt_sleep_check(void) > } > #endif /* #else #ifdef CONFIG_PROVE_RCU */ > > -#define rcu_sleep_check() \ > +#define rcu_sleep_check(locked) \ > do { \ > - rcu_preempt_sleep_check(); \ > + if (!(locked)) \ > + rcu_preempt_sleep_check(); \ > rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), \ > "Illegal context switch in RCU-bh" \ > " read-side critical section"); \ > @@ -494,7 +495,7 @@ static inline void rcu_preempt_sleep_check(void) > #else /* #ifdef CONFIG_PROVE_RCU */ > > #define rcu_lockdep_assert(c, s) do { } while (0) > -#define rcu_sleep_check() do { } while (0) > +#define rcu_sleep_check(locked) do { } while (0) > > #endif /* #else #ifdef CONFIG_PROVE_RCU */ > > diff --git a/include/linux/sched.h b/include/linux/sched.h > index e692a02..027deea 100644 > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -2608,6 +2608,20 @@ extern int __cond_resched_softirq(void); > __cond_resched_softirq(); \ > }) > > +#ifdef CONFIG_PREEMPT_RCU > +#define PREEMPT_RCU_OFFSET 1 > +#else > +#define PREEMPT_RCU_OFFSET PREEMPT_CHECK_OFFSET > +#endif > + > +extern int __cond_resched_rcu(void); > + > +#define cond_resched_rcu() ({ \ > + __might_sleep(__FILE__, __LINE__, PREEMPT_ACTIVE | \ > + PREEMPT_RCU_OFFSET); \ > + __cond_resched_rcu(); \ > +}) > + > /* > * Does a critical section need to be broken due to another > * task waiting?: (technically does not depend on CONFIG_PREEMPT, > diff --git a/kernel/sched/core.c b/kernel/sched/core.c > index 67d0465..2724be7 100644 > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -2793,7 +2793,7 @@ static inline void schedule_debug(struct task_struct *prev) > */ > if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) > __schedule_bug(prev); > - rcu_sleep_check(); > + rcu_sleep_check(0); > > profile_hit(SCHED_PROFILING, __builtin_return_address(0)); > > @@ -4364,6 +4364,20 @@ int __sched __cond_resched_softirq(void) > } > EXPORT_SYMBOL(__cond_resched_softirq); > > +int __sched __cond_resched_rcu(void) > +{ > +#ifndef CONFIG_PREEMPT_RCU > + if (should_resched()) { > + rcu_read_unlock(); > + __cond_resched(); > + rcu_read_lock(); > + return 1; > + } > +#endif > + return 0; > +} > +EXPORT_SYMBOL(__cond_resched_rcu); > + > /** > * yield - yield the current processor to other threads. > * > @@ -7062,7 +7076,9 @@ void __might_sleep(const char *file, int line, int preempt_offset) > { > static unsigned long prev_jiffy; /* ratelimiting */ > > - rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ > + /* WARN_ON_ONCE() by default, no rate limit reqd. */ > + rcu_sleep_check(preempt_offset & PREEMPT_ACTIVE); Color me confused. From what I can see, the two values passed in through preempt_offset are PREEMPT_LOCK_OFFSET and SOFTIRQ_DISABLE_OFFSET. PREEMPT_ACTIVE is normally a high-order bit, above PREEMPT_MASK, SOFTIRQ_MASK, and HARDIRQ_MASK. PREEMPT_LOCK_OFFSET and SOFTIRQ_DISABLE_OFFSET have only low-order bits, so I don't see how rcu_sleep_check() is passed anything other than zero. Am I going blind, or what? Thanx, Paul > + preempt_offset &= ~PREEMPT_ACTIVE; > if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || > system_state != SYSTEM_RUNNING || oops_in_progress) > return; > -- > 1.7.3.4 > > > Regards > > -- > Julian Anastasov <ja@ssi.bg> > -- > To unsubscribe from this list: send the line "unsubscribe netdev" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hello, On Thu, 2 May 2013, Paul E. McKenney wrote: > On Thu, May 02, 2013 at 06:54:05PM +0300, Julian Anastasov wrote: > > > > I tested the following patch in 2 variants, > > TINY_RCU and CONFIG_TREE_PREEMPT_RCU. I see the > > Could you please also try CONFIG_TREE_RCU? Note that I'm testing on some 9-year old UP system, i.e. 1 CPU. Now I enabled SMP to test CONFIG_TREE_RCU and the results are same. I think, it should be just like the TINY_RCU in terms of these debuggings (non-preempt). Extra rcu_read_lock gives me "Illegal context switch in RCU read-side critical section" in addition to the "BUG: sleeping function called from invalid context" message. > > error if extra rcu_read_lock is added for testing. > > > > I'm using the PREEMPT_ACTIVE flag to indicate > > that we are already under lock. It should work because > > __might_sleep is not called with such bit. I also tried to > > add new flag in include/linux/hardirq.h but PREEMPT_ACTIVE > > depends on the arch, so this alternative looked difficult to > > implement. > > +extern int __cond_resched_rcu(void); > > + > > +#define cond_resched_rcu() ({ \ > > + __might_sleep(__FILE__, __LINE__, PREEMPT_ACTIVE | \ > > + PREEMPT_RCU_OFFSET); \ > > + __cond_resched_rcu(); \ > > +}) > > + > > @@ -7062,7 +7076,9 @@ void __might_sleep(const char *file, int line, int preempt_offset) > > { > > static unsigned long prev_jiffy; /* ratelimiting */ > > > > - rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ > > + /* WARN_ON_ONCE() by default, no rate limit reqd. */ > > + rcu_sleep_check(preempt_offset & PREEMPT_ACTIVE); > > Color me confused. > > >From what I can see, the two values passed in through preempt_offset > are PREEMPT_LOCK_OFFSET and SOFTIRQ_DISABLE_OFFSET. PREEMPT_ACTIVE > is normally a high-order bit, above PREEMPT_MASK, SOFTIRQ_MASK, and > HARDIRQ_MASK. > > PREEMPT_LOCK_OFFSET and SOFTIRQ_DISABLE_OFFSET have only low-order bits, > so I don't see how rcu_sleep_check() is passed anything other than zero. > Am I going blind, or what? Only the new cond_resched_rcu() macro provides PREEMPT_ACTIVE flag to skip the rcu_preempt_sleep_check() call. The old macros provide locked=0 as you noticed. Does it answer your question or I'm missing something? Regards -- Julian Anastasov <ja@ssi.bg> -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hello, On Thu, 2 May 2013, Julian Anastasov wrote: > Note that I'm testing on some 9-year old > UP system, i.e. 1 CPU. Now I enabled SMP to test CONFIG_TREE_RCU > and the results are same. I think, it should be just like > the TINY_RCU in terms of these debuggings (non-preempt). Extra > rcu_read_lock gives me "Illegal context switch in RCU read-side > critical section" in addition to the "BUG: sleeping function > called from invalid context" message. Just to clarify about the test with extra rcu_read_lock because above paragraph is very confusing: - The __might_sleep call with PREEMPT_ACTIVE | PREEMPT_RCU_OFFSET just warns with "BUG: sleeping function called from invalid context" because its rcu_sleep_check is silenced. We match the nesting depth only. - but __cond_resched -> __schedule -> schedule_debug warns about the extra rcu_read_lock() with "BUG: scheduling while atomic" and then with "Illegal context switch in RCU read-side critical section" from rcu_sleep_check(0). Regards -- Julian Anastasov <ja@ssi.bg> -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, May 02, 2013 at 09:55:54PM +0300, Julian Anastasov wrote: > > Hello, > > On Thu, 2 May 2013, Paul E. McKenney wrote: > > > On Thu, May 02, 2013 at 06:54:05PM +0300, Julian Anastasov wrote: > > > > > > I tested the following patch in 2 variants, > > > TINY_RCU and CONFIG_TREE_PREEMPT_RCU. I see the > > > > Could you please also try CONFIG_TREE_RCU? > > Note that I'm testing on some 9-year old > UP system, i.e. 1 CPU. Now I enabled SMP to test CONFIG_TREE_RCU > and the results are same. I think, it should be just like > the TINY_RCU in terms of these debuggings (non-preempt). Extra > rcu_read_lock gives me "Illegal context switch in RCU read-side > critical section" in addition to the "BUG: sleeping function > called from invalid context" message. OK... > > > error if extra rcu_read_lock is added for testing. > > > > > > I'm using the PREEMPT_ACTIVE flag to indicate > > > that we are already under lock. It should work because > > > __might_sleep is not called with such bit. I also tried to > > > add new flag in include/linux/hardirq.h but PREEMPT_ACTIVE > > > depends on the arch, so this alternative looked difficult to > > > implement. > > > > +extern int __cond_resched_rcu(void); > > > + > > > +#define cond_resched_rcu() ({ \ > > > + __might_sleep(__FILE__, __LINE__, PREEMPT_ACTIVE | \ > > > + PREEMPT_RCU_OFFSET); \ > > > + __cond_resched_rcu(); \ > > > +}) > > > + > > > > @@ -7062,7 +7076,9 @@ void __might_sleep(const char *file, int line, int preempt_offset) > > > { > > > static unsigned long prev_jiffy; /* ratelimiting */ > > > > > > - rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ > > > + /* WARN_ON_ONCE() by default, no rate limit reqd. */ > > > + rcu_sleep_check(preempt_offset & PREEMPT_ACTIVE); > > > > Color me confused. > > > > >From what I can see, the two values passed in through preempt_offset > > are PREEMPT_LOCK_OFFSET and SOFTIRQ_DISABLE_OFFSET. PREEMPT_ACTIVE > > is normally a high-order bit, above PREEMPT_MASK, SOFTIRQ_MASK, and > > HARDIRQ_MASK. > > > > PREEMPT_LOCK_OFFSET and SOFTIRQ_DISABLE_OFFSET have only low-order bits, > > so I don't see how rcu_sleep_check() is passed anything other than zero. > > Am I going blind, or what? > > Only the new cond_resched_rcu() macro provides > PREEMPT_ACTIVE flag to skip the rcu_preempt_sleep_check() > call. The old macros provide locked=0 as you noticed. Does it > answer your question or I'm missing something? PREEMPT_ACTIVE's value is usually 0x10000000. Did it change since 3.9? If not, rcu_sleep_check(preempt_offset & PREEMPT_ACTIVE) is the same as rcu_sleep_check(0). Thanx, Paul -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hello, On Thu, 2 May 2013, Paul E. McKenney wrote: > > Only the new cond_resched_rcu() macro provides > > PREEMPT_ACTIVE flag to skip the rcu_preempt_sleep_check() > > call. The old macros provide locked=0 as you noticed. Does it > > answer your question or I'm missing something? > > PREEMPT_ACTIVE's value is usually 0x10000000. Did it change > since 3.9? If not, rcu_sleep_check(preempt_offset & PREEMPT_ACTIVE) > is the same as rcu_sleep_check(0). Yes, the different platforms use different bit, that is why I mentioned about my failed attempt at changing hardirq.h. PREEMPT_ACTIVE is always != 0. But I don't understand what do you mean by 'preempt_offset & PREEMPT_ACTIVE' being always 0. It is always 0 for cond_resched(), cond_resched_lock() and cond_resched_softirq(), not for cond_resched_rcu(): (PREEMPT_ACTIVE | PREEMPT_RCU_OFFSET) & PREEMPT_ACTIVE should give PREEMPT_ACTIVE, not 0. We have 2 cases in rcu_sleep_check() for the if: 1. !(PREEMPT_ACTIVE) => FALSE for cond_resched_rcu 2. !(0) => TRUE for other cond_resched_* macros On x86 the code is: __might_sleep: pushl %ebp # testl $268435456, %ecx #, preempt_offset ... jne .L240 #, // rcu_lock_map checked when PREEMPT_ACTIVE is missing .L240: // rcu_bh_lock_map checked Regards -- Julian Anastasov <ja@ssi.bg> -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, May 02, 2013 at 11:19:12PM +0300, Julian Anastasov wrote: > > Hello, > > On Thu, 2 May 2013, Paul E. McKenney wrote: > > > > Only the new cond_resched_rcu() macro provides > > > PREEMPT_ACTIVE flag to skip the rcu_preempt_sleep_check() > > > call. The old macros provide locked=0 as you noticed. Does it > > > answer your question or I'm missing something? > > > > PREEMPT_ACTIVE's value is usually 0x10000000. Did it change > > since 3.9? If not, rcu_sleep_check(preempt_offset & PREEMPT_ACTIVE) > > is the same as rcu_sleep_check(0). > > Yes, the different platforms use different bit, > that is why I mentioned about my failed attempt at > changing hardirq.h. PREEMPT_ACTIVE is always != 0. > > But I don't understand what do you mean by > 'preempt_offset & PREEMPT_ACTIVE' being always 0. > It is always 0 for cond_resched(), cond_resched_lock() > and cond_resched_softirq(), not for cond_resched_rcu(): > > (PREEMPT_ACTIVE | PREEMPT_RCU_OFFSET) & PREEMPT_ACTIVE > should give PREEMPT_ACTIVE, not 0. We have 2 cases in > rcu_sleep_check() for the if: > > 1. !(PREEMPT_ACTIVE) => FALSE for cond_resched_rcu > 2. !(0) => TRUE for other cond_resched_* macros > > On x86 the code is: > > __might_sleep: > pushl %ebp # > testl $268435456, %ecx #, preempt_offset > ... > jne .L240 #, > // rcu_lock_map checked when PREEMPT_ACTIVE is missing > .L240: > // rcu_bh_lock_map checked OK, apologies -- I was looking at the calls to __might_sleep() in mainline, and missed the one that you added. Revisiting that, a question: > +#ifdef CONFIG_PREEMPT_RCU > +#define PREEMPT_RCU_OFFSET 1 Does this really want to be "1" instead of PREEMPT_OFFSET? > +#else > +#define PREEMPT_RCU_OFFSET PREEMPT_CHECK_OFFSET > +#endif > + > +extern int __cond_resched_rcu(void); > + > +#define cond_resched_rcu() ({ \ > + __might_sleep(__FILE__, __LINE__, PREEMPT_ACTIVE | \ > + PREEMPT_RCU_OFFSET); \ > + __cond_resched_rcu(); \ > +}) > + For the rest, I clearly need to revisit when more alert, because right now I am not seeing the connection to preemptible RCU's rcu_read_lock() implementation. Thanx, Paul -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hello, On Thu, 2 May 2013, Paul E. McKenney wrote: > mainline, and missed the one that you added. Revisiting that, a > question: > > > +#ifdef CONFIG_PREEMPT_RCU > > +#define PREEMPT_RCU_OFFSET 1 > > Does this really want to be "1" instead of PREEMPT_OFFSET? In this case when CONFIG_PREEMPT_RCU is enabled we (RCU) do not touch the preempt counters. Instead, the units are accounted in current->rcu_read_lock_nesting: #define rcu_preempt_depth() (current->rcu_read_lock_nesting) __rcu_read_lock: current->rcu_read_lock_nesting++; and the path is __might_sleep -> preempt_count_equals -> rcu_preempt_depth For now both places do not use PREEMPT_OFFSET: - #define inc_preempt_count() add_preempt_count(1) - __rcu_read_lock: current->rcu_read_lock_nesting++; so, ... it does not matter much for me. In short, the trick is in preempt_count_equals() where preempt_offset is a combination of preempt count and RCU preempt depth: #ifdef CONFIG_PREEMPT_RCU #define PREEMPT_RCU_OFFSET (0 /* preempt */ + 1 /* RCU */) #else #define PREEMPT_RCU_OFFSET (PREEMPT_CHECK_OFFSET + 0 /* RCU */) #endif Let me know for your preference about this definition... Regards -- Julian Anastasov <ja@ssi.bg> -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, May 03, 2013 at 10:52:36AM +0300, Julian Anastasov wrote: > > Hello, > > On Thu, 2 May 2013, Paul E. McKenney wrote: > > > mainline, and missed the one that you added. Revisiting that, a > > question: > > > > > +#ifdef CONFIG_PREEMPT_RCU > > > +#define PREEMPT_RCU_OFFSET 1 > > > > Does this really want to be "1" instead of PREEMPT_OFFSET? > > In this case when CONFIG_PREEMPT_RCU is enabled > we (RCU) do not touch the preempt counters. Instead, the units > are accounted in current->rcu_read_lock_nesting: > > #define rcu_preempt_depth() (current->rcu_read_lock_nesting) > > __rcu_read_lock: > current->rcu_read_lock_nesting++; > > and the path is __might_sleep -> preempt_count_equals -> > rcu_preempt_depth > > For now both places do not use PREEMPT_OFFSET: > > - #define inc_preempt_count() add_preempt_count(1) > - __rcu_read_lock: current->rcu_read_lock_nesting++; > > so, ... it does not matter much for me. In short, > the trick is in preempt_count_equals() where preempt_offset > is a combination of preempt count and RCU preempt depth: > > #ifdef CONFIG_PREEMPT_RCU > #define PREEMPT_RCU_OFFSET (0 /* preempt */ + 1 /* RCU */) > #else > #define PREEMPT_RCU_OFFSET (PREEMPT_CHECK_OFFSET + 0 /* RCU */) > #endif > > Let me know for your preference about this definition... OK, after getting some sleep, I might have located the root cause of my confusion yesterday. The key point is that I don't understand why we cannot get the effect we are looking for with the following in sched.h (or wherever): static inline int cond_resched_rcu(void) { #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU) rcu_read_unlock(); cond_resched(); rcu_read_lock(); #endif } This adds absolutely no overhead in non-debug builds of CONFIG_PREEMPT_RCU, does the checking in debug builds, and allows voluntary preemption in !CONFIG_PREEMPT_RCU builds. CONFIG_PROVE_RCU builds will check for an (illegal) outer rcu_read_lock() in CONFIG_PREEMPT_RCU builds, and you will get "scheduling while atomic" in response to an outer rcu_read_lock() in !CONFIG_PREEMPT_RCU builds. It also seems to me a lot simpler. Does this work, or am I still missing something? Thanx, Paul -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> The key point is that I don't understand why we cannot get the effect > we are looking for with the following in sched.h (or wherever): > > static inline int cond_resched_rcu(void) > { > #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU) > rcu_read_unlock(); > cond_resched(); > rcu_read_lock(); > #endif > } > > This adds absolutely no overhead in non-debug builds of CONFIG_PREEMPT_RCU, > does the checking in debug builds, and allows voluntary preemption in > !CONFIG_PREEMPT_RCU builds. CONFIG_PROVE_RCU builds will check for an > (illegal) outer rcu_read_lock() in CONFIG_PREEMPT_RCU builds, and you > will get "scheduling while atomic" in response to an outer rcu_read_lock() > in !CONFIG_PREEMPT_RCU builds. > > It also seems to me a lot simpler. > > Does this work, or am I still missing something? It can do quite a number of superfluous rcu_read_unlock()/lock() pairs for voluntary preemption kernels? -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, May 03, 2013 at 07:04:47PM +0200, Peter Zijlstra wrote: > > The key point is that I don't understand why we cannot get the effect > > we are looking for with the following in sched.h (or wherever): > > > > static inline int cond_resched_rcu(void) > > { > > #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU) > > rcu_read_unlock(); > > cond_resched(); > > rcu_read_lock(); > > #endif > > } > > > > This adds absolutely no overhead in non-debug builds of CONFIG_PREEMPT_RCU, > > does the checking in debug builds, and allows voluntary preemption in > > !CONFIG_PREEMPT_RCU builds. CONFIG_PROVE_RCU builds will check for an > > (illegal) outer rcu_read_lock() in CONFIG_PREEMPT_RCU builds, and you > > will get "scheduling while atomic" in response to an outer rcu_read_lock() > > in !CONFIG_PREEMPT_RCU builds. > > > > It also seems to me a lot simpler. > > > > Does this work, or am I still missing something? > > It can do quite a number of superfluous rcu_read_unlock()/lock() pairs for > voluntary preemption kernels? This happens in only two cases: 1. CONFIG_PREEMPT_RCU=n kernels. But in this case, rcu_read_unlock() and rcu_read_lock() are free, at least for CONFIG_PROVE_LOCKING=n kernels. And if you have CONFIG_PROVE_LOCKING=y, any contribution from rcu_read_unlock() and rcu_read_lock() will be in the noise. 2. CONFIG_DEBUG_ATOMIC_SLEEP=y kernels -- but in this case, you -want- the debugging. So either the overhead is non-existent, or you explicitly asked for the resulting debugging. In particular, CONFIG_PREEMPT_RCU=y kernels have an empty static inline function, which is free -- unless CONFIG_DEBUG_ATOMIC_SLEEP=y, in which case you again explicitly asked for the debugging. So I do not believe that the extra rcu_read_unlock()/lock() pairs are a problem in any of the possible combinations of configurations. Thanx, Paul -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hello, On Fri, 3 May 2013, Paul E. McKenney wrote: > OK, after getting some sleep, I might have located the root cause of > my confusion yesterday. > > The key point is that I don't understand why we cannot get the effect > we are looking for with the following in sched.h (or wherever): > > static inline int cond_resched_rcu(void) > { > #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU) > rcu_read_unlock(); > cond_resched(); > rcu_read_lock(); > #endif > } > > This adds absolutely no overhead in non-debug builds of CONFIG_PREEMPT_RCU, > does the checking in debug builds, and allows voluntary preemption in > !CONFIG_PREEMPT_RCU builds. CONFIG_PROVE_RCU builds will check for an > (illegal) outer rcu_read_lock() in CONFIG_PREEMPT_RCU builds, and you > will get "scheduling while atomic" in response to an outer rcu_read_lock() > in !CONFIG_PREEMPT_RCU builds. > > It also seems to me a lot simpler. > > Does this work, or am I still missing something? It should work. It is a better version of the 2nd variant I mentioned here: http://marc.info/?l=linux-netdev&m=136741839021257&w=2 I'll stick to this version, hope Peter Zijlstra agrees. Playing with PREEMPT_ACTIVE or another bit makes the things more complex. To summarize: - CONFIG_PREEMPT_RCU: - no empty functions called - CONFIG_DEBUG_ATOMIC_SLEEP can catch errors even for this case - non-CONFIG_PREEMPT_RCU: - rcu_read_lock and rcu_read_unlock are barrier(), so it expands just to cond_resched() I'll repeat the tests tomorrow and if there are no problems will post official version after the merge window. Regards -- Julian Anastasov <ja@ssi.bg> -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> This happens in only two cases: > > 1. CONFIG_PREEMPT_RCU=n kernels. But in this case, rcu_read_unlock() > and rcu_read_lock() are free, at least for CONFIG_PROVE_LOCKING=n > kernels. And if you have CONFIG_PROVE_LOCKING=y, any contribution > from rcu_read_unlock() and rcu_read_lock() will be in the noise. Oh argh.. I completely overlooked that rcu_read_{,un}lock() are NOPs for PREEMPT=n kernels. /me crawls back under his stone.. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hello, On Fri, 3 May 2013, Paul E. McKenney wrote: > static inline int cond_resched_rcu(void) > { > #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU) > rcu_read_unlock(); > cond_resched(); > rcu_read_lock(); > #endif > } > > This adds absolutely no overhead in non-debug builds of CONFIG_PREEMPT_RCU, > does the checking in debug builds, and allows voluntary preemption in > !CONFIG_PREEMPT_RCU builds. CONFIG_PROVE_RCU builds will check for an > (illegal) outer rcu_read_lock() in CONFIG_PREEMPT_RCU builds, and you > will get "scheduling while atomic" in response to an outer rcu_read_lock() > in !CONFIG_PREEMPT_RCU builds. > > It also seems to me a lot simpler. > > Does this work, or am I still missing something? Works perfectly. Thanks! Tested CONFIG_PREEMPT_RCU=y/n, the errors messages with extra RCU lock. Regards -- Julian Anastasov <ja@ssi.bg> -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, May 04, 2013 at 10:23:31AM +0300, Julian Anastasov wrote: > > Hello, > > On Fri, 3 May 2013, Paul E. McKenney wrote: > > > static inline int cond_resched_rcu(void) > > { > > #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU) > > rcu_read_unlock(); > > cond_resched(); > > rcu_read_lock(); > > #endif > > } > > > > This adds absolutely no overhead in non-debug builds of CONFIG_PREEMPT_RCU, > > does the checking in debug builds, and allows voluntary preemption in > > !CONFIG_PREEMPT_RCU builds. CONFIG_PROVE_RCU builds will check for an > > (illegal) outer rcu_read_lock() in CONFIG_PREEMPT_RCU builds, and you > > will get "scheduling while atomic" in response to an outer rcu_read_lock() > > in !CONFIG_PREEMPT_RCU builds. > > > > It also seems to me a lot simpler. > > > > Does this work, or am I still missing something? > > Works perfectly. Thanks! Tested CONFIG_PREEMPT_RCU=y/n, > the errors messages with extra RCU lock. Very good! Please send the patch along, and I will ack it. Thanx, Paul -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b758ce1..b594759 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -480,9 +480,10 @@ static inline void rcu_preempt_sleep_check(void) } #endif /* #else #ifdef CONFIG_PROVE_RCU */ -#define rcu_sleep_check() \ +#define rcu_sleep_check(locked) \ do { \ - rcu_preempt_sleep_check(); \ + if (!(locked)) \ + rcu_preempt_sleep_check(); \ rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), \ "Illegal context switch in RCU-bh" \ " read-side critical section"); \ @@ -494,7 +495,7 @@ static inline void rcu_preempt_sleep_check(void) #else /* #ifdef CONFIG_PROVE_RCU */ #define rcu_lockdep_assert(c, s) do { } while (0) -#define rcu_sleep_check() do { } while (0) +#define rcu_sleep_check(locked) do { } while (0) #endif /* #else #ifdef CONFIG_PROVE_RCU */ diff --git a/include/linux/sched.h b/include/linux/sched.h index e692a02..027deea 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2608,6 +2608,20 @@ extern int __cond_resched_softirq(void); __cond_resched_softirq(); \ }) +#ifdef CONFIG_PREEMPT_RCU +#define PREEMPT_RCU_OFFSET 1 +#else +#define PREEMPT_RCU_OFFSET PREEMPT_CHECK_OFFSET +#endif + +extern int __cond_resched_rcu(void); + +#define cond_resched_rcu() ({ \ + __might_sleep(__FILE__, __LINE__, PREEMPT_ACTIVE | \ + PREEMPT_RCU_OFFSET); \ + __cond_resched_rcu(); \ +}) + /* * Does a critical section need to be broken due to another * task waiting?: (technically does not depend on CONFIG_PREEMPT, diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 67d0465..2724be7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2793,7 +2793,7 @@ static inline void schedule_debug(struct task_struct *prev) */ if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) __schedule_bug(prev); - rcu_sleep_check(); + rcu_sleep_check(0); profile_hit(SCHED_PROFILING, __builtin_return_address(0)); @@ -4364,6 +4364,20 @@ int __sched __cond_resched_softirq(void) } EXPORT_SYMBOL(__cond_resched_softirq); +int __sched __cond_resched_rcu(void) +{ +#ifndef CONFIG_PREEMPT_RCU + if (should_resched()) { + rcu_read_unlock(); + __cond_resched(); + rcu_read_lock(); + return 1; + } +#endif + return 0; +} +EXPORT_SYMBOL(__cond_resched_rcu); + /** * yield - yield the current processor to other threads. * @@ -7062,7 +7076,9 @@ void __might_sleep(const char *file, int line, int preempt_offset) { static unsigned long prev_jiffy; /* ratelimiting */ - rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ + /* WARN_ON_ONCE() by default, no rate limit reqd. */ + rcu_sleep_check(preempt_offset & PREEMPT_ACTIVE); + preempt_offset &= ~PREEMPT_ACTIVE; if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || system_state != SYSTEM_RUNNING || oops_in_progress) return;