diff mbox

[v2,1/2] sched: Add cond_resched_rcu_lock() helper

Message ID alpine.LFD.2.00.1305021848450.31548@ja.ssi.bg
State Not Applicable
Headers show

Commit Message

Julian Anastasov May 2, 2013, 3:54 p.m. UTC
Hello,

On Thu, 2 May 2013, Peter Zijlstra wrote:

> On Wed, May 01, 2013 at 09:22:08PM +0300, Julian Anastasov wrote:
> > > +extern int __cond_resched_rcu(void);
> > > +
> > > +#define cond_resched_rcu() ({			\
> > > +	__might_sleep(__FILE__, __LINE__, 0);	\
> > 
> > 	I see your goal. But digging into __might_sleep()
> > I see that rcu_sleep_check() will scream for the non-preempt
> > case because we are under rcu_read_lock.
> 
> 
> #ifdef CONFIG_PREEMPT_RCU
> #define PREEMPT_RCU_OFFSET 0
> #else
> #define PREEMPT_RCU_OFFSET 1
> #endif
> 
> #define cond_resched_rcu() ({	\
> 	__might_sleep(__FILE__, __LINE__, PREEMPT_RCU_OFFSET);	\
> 	__cond_resched_rcu();	\
> })
> 
> Should work I think..

	I implemented your idea.

	I tested the following patch in 2 variants,
TINY_RCU and CONFIG_TREE_PREEMPT_RCU. I see the
error if extra rcu_read_lock is added for testing.

	I'm using the PREEMPT_ACTIVE flag to indicate
that we are already under lock. It should work because
__might_sleep is not called with such bit. I also tried to
add new flag in include/linux/hardirq.h but PREEMPT_ACTIVE
depends on the arch, so this alternative looked difficult to
implement.

 include/linux/rcupdate.h |    7 ++++---
 include/linux/sched.h    |   14 ++++++++++++++
 kernel/sched/core.c      |   20 ++++++++++++++++++--
 3 files changed, 36 insertions(+), 5 deletions(-)

Comments

Paul E. McKenney May 2, 2013, 5:32 p.m. UTC | #1
On Thu, May 02, 2013 at 06:54:05PM +0300, Julian Anastasov wrote:
> 
> 	Hello,
> 
> On Thu, 2 May 2013, Peter Zijlstra wrote:
> 
> > On Wed, May 01, 2013 at 09:22:08PM +0300, Julian Anastasov wrote:
> > > > +extern int __cond_resched_rcu(void);
> > > > +
> > > > +#define cond_resched_rcu() ({			\
> > > > +	__might_sleep(__FILE__, __LINE__, 0);	\
> > > 
> > > 	I see your goal. But digging into __might_sleep()
> > > I see that rcu_sleep_check() will scream for the non-preempt
> > > case because we are under rcu_read_lock.
> > 
> > 
> > #ifdef CONFIG_PREEMPT_RCU
> > #define PREEMPT_RCU_OFFSET 0
> > #else
> > #define PREEMPT_RCU_OFFSET 1
> > #endif
> > 
> > #define cond_resched_rcu() ({	\
> > 	__might_sleep(__FILE__, __LINE__, PREEMPT_RCU_OFFSET);	\
> > 	__cond_resched_rcu();	\
> > })
> > 
> > Should work I think..
> 
> 	I implemented your idea.
> 
> 	I tested the following patch in 2 variants,
> TINY_RCU and CONFIG_TREE_PREEMPT_RCU. I see the

Could you please also try CONFIG_TREE_RCU?

> error if extra rcu_read_lock is added for testing.
> 
> 	I'm using the PREEMPT_ACTIVE flag to indicate
> that we are already under lock. It should work because
> __might_sleep is not called with such bit. I also tried to
> add new flag in include/linux/hardirq.h but PREEMPT_ACTIVE
> depends on the arch, so this alternative looked difficult to
> implement.
> 
>  include/linux/rcupdate.h |    7 ++++---
>  include/linux/sched.h    |   14 ++++++++++++++
>  kernel/sched/core.c      |   20 ++++++++++++++++++--
>  3 files changed, 36 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> index b758ce1..b594759 100644
> --- a/include/linux/rcupdate.h
> +++ b/include/linux/rcupdate.h
> @@ -480,9 +480,10 @@ static inline void rcu_preempt_sleep_check(void)
>  }
>  #endif /* #else #ifdef CONFIG_PROVE_RCU */
> 
> -#define rcu_sleep_check()						\
> +#define rcu_sleep_check(locked)						\
>  	do {								\
> -		rcu_preempt_sleep_check();				\
> +		if (!(locked))						\
> +			rcu_preempt_sleep_check();			\
>  		rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map),	\
>  				   "Illegal context switch in RCU-bh"	\
>  				   " read-side critical section");	\
> @@ -494,7 +495,7 @@ static inline void rcu_preempt_sleep_check(void)
>  #else /* #ifdef CONFIG_PROVE_RCU */
> 
>  #define rcu_lockdep_assert(c, s) do { } while (0)
> -#define rcu_sleep_check() do { } while (0)
> +#define rcu_sleep_check(locked) do { } while (0)
> 
>  #endif /* #else #ifdef CONFIG_PROVE_RCU */
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index e692a02..027deea 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -2608,6 +2608,20 @@ extern int __cond_resched_softirq(void);
>  	__cond_resched_softirq();					\
>  })
> 
> +#ifdef CONFIG_PREEMPT_RCU
> +#define PREEMPT_RCU_OFFSET	1
> +#else
> +#define PREEMPT_RCU_OFFSET	PREEMPT_CHECK_OFFSET
> +#endif
> +
> +extern int __cond_resched_rcu(void);
> +
> +#define cond_resched_rcu() ({					\
> +	__might_sleep(__FILE__, __LINE__, PREEMPT_ACTIVE |	\
> +					  PREEMPT_RCU_OFFSET);	\
> +	__cond_resched_rcu();					\
> +})
> +
>  /*
>   * Does a critical section need to be broken due to another
>   * task waiting?: (technically does not depend on CONFIG_PREEMPT,
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 67d0465..2724be7 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -2793,7 +2793,7 @@ static inline void schedule_debug(struct task_struct *prev)
>  	 */
>  	if (unlikely(in_atomic_preempt_off() && !prev->exit_state))
>  		__schedule_bug(prev);
> -	rcu_sleep_check();
> +	rcu_sleep_check(0);
> 
>  	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
> 
> @@ -4364,6 +4364,20 @@ int __sched __cond_resched_softirq(void)
>  }
>  EXPORT_SYMBOL(__cond_resched_softirq);
> 
> +int __sched __cond_resched_rcu(void)
> +{
> +#ifndef CONFIG_PREEMPT_RCU
> +	if (should_resched()) {
> +		rcu_read_unlock();
> +		__cond_resched();
> +		rcu_read_lock();
> +		return 1;
> +	}
> +#endif
> +	return 0;
> +}
> +EXPORT_SYMBOL(__cond_resched_rcu);
> +
>  /**
>   * yield - yield the current processor to other threads.
>   *
> @@ -7062,7 +7076,9 @@ void __might_sleep(const char *file, int line, int preempt_offset)
>  {
>  	static unsigned long prev_jiffy;	/* ratelimiting */
> 
> -	rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
> +	/* WARN_ON_ONCE() by default, no rate limit reqd. */
> +	rcu_sleep_check(preempt_offset & PREEMPT_ACTIVE);

Color me confused.

From what I can see, the two values passed in through preempt_offset
are PREEMPT_LOCK_OFFSET and SOFTIRQ_DISABLE_OFFSET.  PREEMPT_ACTIVE
is normally a high-order bit, above PREEMPT_MASK, SOFTIRQ_MASK, and
HARDIRQ_MASK.

PREEMPT_LOCK_OFFSET and SOFTIRQ_DISABLE_OFFSET have only low-order bits,
so I don't see how rcu_sleep_check() is passed anything other than zero.
Am I going blind, or what?

							Thanx, Paul

> +	preempt_offset &= ~PREEMPT_ACTIVE;
>  	if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
>  	    system_state != SYSTEM_RUNNING || oops_in_progress)
>  		return;
> -- 
> 1.7.3.4
> 
> 
> Regards
> 
> --
> Julian Anastasov <ja@ssi.bg>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Julian Anastasov May 2, 2013, 6:55 p.m. UTC | #2
Hello,

On Thu, 2 May 2013, Paul E. McKenney wrote:

> On Thu, May 02, 2013 at 06:54:05PM +0300, Julian Anastasov wrote:
> > 
> > 	I tested the following patch in 2 variants,
> > TINY_RCU and CONFIG_TREE_PREEMPT_RCU. I see the
> 
> Could you please also try CONFIG_TREE_RCU?

	Note that I'm testing on some 9-year old
UP system, i.e. 1 CPU. Now I enabled SMP to test CONFIG_TREE_RCU
and the results are same. I think, it should be just like
the TINY_RCU in terms of these debuggings (non-preempt). Extra 
rcu_read_lock gives me "Illegal context switch in RCU read-side
critical section" in addition to the "BUG: sleeping function
called from invalid context" message.

> > error if extra rcu_read_lock is added for testing.
> > 
> > 	I'm using the PREEMPT_ACTIVE flag to indicate
> > that we are already under lock. It should work because
> > __might_sleep is not called with such bit. I also tried to
> > add new flag in include/linux/hardirq.h but PREEMPT_ACTIVE
> > depends on the arch, so this alternative looked difficult to
> > implement.

> > +extern int __cond_resched_rcu(void);
> > +
> > +#define cond_resched_rcu() ({					\
> > +	__might_sleep(__FILE__, __LINE__, PREEMPT_ACTIVE |	\
> > +					  PREEMPT_RCU_OFFSET);	\
> > +	__cond_resched_rcu();					\
> > +})
> > +

> > @@ -7062,7 +7076,9 @@ void __might_sleep(const char *file, int line, int preempt_offset)
> >  {
> >  	static unsigned long prev_jiffy;	/* ratelimiting */
> > 
> > -	rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
> > +	/* WARN_ON_ONCE() by default, no rate limit reqd. */
> > +	rcu_sleep_check(preempt_offset & PREEMPT_ACTIVE);
> 
> Color me confused.
> 
> >From what I can see, the two values passed in through preempt_offset
> are PREEMPT_LOCK_OFFSET and SOFTIRQ_DISABLE_OFFSET.  PREEMPT_ACTIVE
> is normally a high-order bit, above PREEMPT_MASK, SOFTIRQ_MASK, and
> HARDIRQ_MASK.
> 
> PREEMPT_LOCK_OFFSET and SOFTIRQ_DISABLE_OFFSET have only low-order bits,
> so I don't see how rcu_sleep_check() is passed anything other than zero.
> Am I going blind, or what?

	Only the new cond_resched_rcu() macro provides
PREEMPT_ACTIVE flag to skip the rcu_preempt_sleep_check()
call. The old macros provide locked=0 as you noticed. Does it
answer your question or I'm missing something?

Regards

--
Julian Anastasov <ja@ssi.bg>
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Julian Anastasov May 2, 2013, 7:24 p.m. UTC | #3
Hello,

On Thu, 2 May 2013, Julian Anastasov wrote:

> 	Note that I'm testing on some 9-year old
> UP system, i.e. 1 CPU. Now I enabled SMP to test CONFIG_TREE_RCU
> and the results are same. I think, it should be just like
> the TINY_RCU in terms of these debuggings (non-preempt). Extra 
> rcu_read_lock gives me "Illegal context switch in RCU read-side
> critical section" in addition to the "BUG: sleeping function
> called from invalid context" message.

	Just to clarify about the test with extra
rcu_read_lock because above paragraph is very confusing:

- The __might_sleep call with PREEMPT_ACTIVE | PREEMPT_RCU_OFFSET
just warns with "BUG: sleeping function called from invalid context"
because its rcu_sleep_check is silenced. We match the
nesting depth only.

- but __cond_resched -> __schedule -> schedule_debug warns about
the extra rcu_read_lock() with "BUG: scheduling while atomic" and
then with "Illegal context switch in RCU read-side critical section"
from rcu_sleep_check(0).

Regards

--
Julian Anastasov <ja@ssi.bg>
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul E. McKenney May 2, 2013, 7:34 p.m. UTC | #4
On Thu, May 02, 2013 at 09:55:54PM +0300, Julian Anastasov wrote:
> 
> 	Hello,
> 
> On Thu, 2 May 2013, Paul E. McKenney wrote:
> 
> > On Thu, May 02, 2013 at 06:54:05PM +0300, Julian Anastasov wrote:
> > > 
> > > 	I tested the following patch in 2 variants,
> > > TINY_RCU and CONFIG_TREE_PREEMPT_RCU. I see the
> > 
> > Could you please also try CONFIG_TREE_RCU?
> 
> 	Note that I'm testing on some 9-year old
> UP system, i.e. 1 CPU. Now I enabled SMP to test CONFIG_TREE_RCU
> and the results are same. I think, it should be just like
> the TINY_RCU in terms of these debuggings (non-preempt). Extra 
> rcu_read_lock gives me "Illegal context switch in RCU read-side
> critical section" in addition to the "BUG: sleeping function
> called from invalid context" message.

OK...

> > > error if extra rcu_read_lock is added for testing.
> > > 
> > > 	I'm using the PREEMPT_ACTIVE flag to indicate
> > > that we are already under lock. It should work because
> > > __might_sleep is not called with such bit. I also tried to
> > > add new flag in include/linux/hardirq.h but PREEMPT_ACTIVE
> > > depends on the arch, so this alternative looked difficult to
> > > implement.
> 
> > > +extern int __cond_resched_rcu(void);
> > > +
> > > +#define cond_resched_rcu() ({					\
> > > +	__might_sleep(__FILE__, __LINE__, PREEMPT_ACTIVE |	\
> > > +					  PREEMPT_RCU_OFFSET);	\
> > > +	__cond_resched_rcu();					\
> > > +})
> > > +
> 
> > > @@ -7062,7 +7076,9 @@ void __might_sleep(const char *file, int line, int preempt_offset)
> > >  {
> > >  	static unsigned long prev_jiffy;	/* ratelimiting */
> > > 
> > > -	rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
> > > +	/* WARN_ON_ONCE() by default, no rate limit reqd. */
> > > +	rcu_sleep_check(preempt_offset & PREEMPT_ACTIVE);
> > 
> > Color me confused.
> > 
> > >From what I can see, the two values passed in through preempt_offset
> > are PREEMPT_LOCK_OFFSET and SOFTIRQ_DISABLE_OFFSET.  PREEMPT_ACTIVE
> > is normally a high-order bit, above PREEMPT_MASK, SOFTIRQ_MASK, and
> > HARDIRQ_MASK.
> > 
> > PREEMPT_LOCK_OFFSET and SOFTIRQ_DISABLE_OFFSET have only low-order bits,
> > so I don't see how rcu_sleep_check() is passed anything other than zero.
> > Am I going blind, or what?
> 
> 	Only the new cond_resched_rcu() macro provides
> PREEMPT_ACTIVE flag to skip the rcu_preempt_sleep_check()
> call. The old macros provide locked=0 as you noticed. Does it
> answer your question or I'm missing something?

PREEMPT_ACTIVE's value is usually 0x10000000.  Did it change
since 3.9?  If not, rcu_sleep_check(preempt_offset & PREEMPT_ACTIVE)
is the same as rcu_sleep_check(0).

							Thanx, Paul

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Julian Anastasov May 2, 2013, 8:19 p.m. UTC | #5
Hello,

On Thu, 2 May 2013, Paul E. McKenney wrote:

> > 	Only the new cond_resched_rcu() macro provides
> > PREEMPT_ACTIVE flag to skip the rcu_preempt_sleep_check()
> > call. The old macros provide locked=0 as you noticed. Does it
> > answer your question or I'm missing something?
> 
> PREEMPT_ACTIVE's value is usually 0x10000000.  Did it change
> since 3.9?  If not, rcu_sleep_check(preempt_offset & PREEMPT_ACTIVE)
> is the same as rcu_sleep_check(0).

	Yes, the different platforms use different bit,
that is why I mentioned about my failed attempt at
changing hardirq.h. PREEMPT_ACTIVE is always != 0.

	But I don't understand what do you mean by
'preempt_offset & PREEMPT_ACTIVE' being always 0.
It is always 0 for cond_resched(), cond_resched_lock()
and cond_resched_softirq(), not for cond_resched_rcu():

(PREEMPT_ACTIVE | PREEMPT_RCU_OFFSET) & PREEMPT_ACTIVE
should give PREEMPT_ACTIVE, not 0. We have 2 cases in
rcu_sleep_check() for the if:

1. !(PREEMPT_ACTIVE) => FALSE for cond_resched_rcu
2. !(0) => TRUE for other cond_resched_* macros

	On x86 the code is:

__might_sleep:
        pushl   %ebp    #
        testl   $268435456, %ecx        #, preempt_offset
...
        jne     .L240   #,
	// rcu_lock_map checked when PREEMPT_ACTIVE is missing
.L240:
	// rcu_bh_lock_map checked

Regards

--
Julian Anastasov <ja@ssi.bg>
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul E. McKenney May 2, 2013, 10:31 p.m. UTC | #6
On Thu, May 02, 2013 at 11:19:12PM +0300, Julian Anastasov wrote:
> 
> 	Hello,
> 
> On Thu, 2 May 2013, Paul E. McKenney wrote:
> 
> > > 	Only the new cond_resched_rcu() macro provides
> > > PREEMPT_ACTIVE flag to skip the rcu_preempt_sleep_check()
> > > call. The old macros provide locked=0 as you noticed. Does it
> > > answer your question or I'm missing something?
> > 
> > PREEMPT_ACTIVE's value is usually 0x10000000.  Did it change
> > since 3.9?  If not, rcu_sleep_check(preempt_offset & PREEMPT_ACTIVE)
> > is the same as rcu_sleep_check(0).
> 
> 	Yes, the different platforms use different bit,
> that is why I mentioned about my failed attempt at
> changing hardirq.h. PREEMPT_ACTIVE is always != 0.
> 
> 	But I don't understand what do you mean by
> 'preempt_offset & PREEMPT_ACTIVE' being always 0.
> It is always 0 for cond_resched(), cond_resched_lock()
> and cond_resched_softirq(), not for cond_resched_rcu():
> 
> (PREEMPT_ACTIVE | PREEMPT_RCU_OFFSET) & PREEMPT_ACTIVE
> should give PREEMPT_ACTIVE, not 0. We have 2 cases in
> rcu_sleep_check() for the if:
> 
> 1. !(PREEMPT_ACTIVE) => FALSE for cond_resched_rcu
> 2. !(0) => TRUE for other cond_resched_* macros
> 
> 	On x86 the code is:
> 
> __might_sleep:
>         pushl   %ebp    #
>         testl   $268435456, %ecx        #, preempt_offset
> ...
>         jne     .L240   #,
> 	// rcu_lock_map checked when PREEMPT_ACTIVE is missing
> .L240:
> 	// rcu_bh_lock_map checked

OK, apologies -- I was looking at the calls to __might_sleep() in
mainline, and missed the one that you added.  Revisiting that, a
question:

> +#ifdef CONFIG_PREEMPT_RCU
> +#define PREEMPT_RCU_OFFSET     1

Does this really want to be "1" instead of PREEMPT_OFFSET?

> +#else
> +#define PREEMPT_RCU_OFFSET     PREEMPT_CHECK_OFFSET
> +#endif
> +
> +extern int __cond_resched_rcu(void);
> +
> +#define cond_resched_rcu() ({                                  \
> +       __might_sleep(__FILE__, __LINE__, PREEMPT_ACTIVE |      \
> +                                         PREEMPT_RCU_OFFSET);  \
> +       __cond_resched_rcu();                                   \
> +})
> +

For the rest, I clearly need to revisit when more alert, because right
now I am not seeing the connection to preemptible RCU's rcu_read_lock()
implementation.

							Thanx, Paul

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Julian Anastasov May 3, 2013, 7:52 a.m. UTC | #7
Hello,

On Thu, 2 May 2013, Paul E. McKenney wrote:

> mainline, and missed the one that you added.  Revisiting that, a
> question:
> 
> > +#ifdef CONFIG_PREEMPT_RCU
> > +#define PREEMPT_RCU_OFFSET     1
> 
> Does this really want to be "1" instead of PREEMPT_OFFSET?

	In this case when CONFIG_PREEMPT_RCU is enabled
we (RCU) do not touch the preempt counters. Instead, the units
are accounted in current->rcu_read_lock_nesting:

#define rcu_preempt_depth() (current->rcu_read_lock_nesting)

__rcu_read_lock:
	current->rcu_read_lock_nesting++;

	and the path is __might_sleep -> preempt_count_equals ->
rcu_preempt_depth

	For now both places do not use PREEMPT_OFFSET:

- #define inc_preempt_count() add_preempt_count(1)
- __rcu_read_lock: current->rcu_read_lock_nesting++;

	so, ... it does not matter much for me. In short,
the trick is in preempt_count_equals() where preempt_offset
is a combination of preempt count and RCU preempt depth:

#ifdef CONFIG_PREEMPT_RCU
#define PREEMPT_RCU_OFFSET      (0 /* preempt */ + 1 /* RCU */)
#else
#define PREEMPT_RCU_OFFSET      (PREEMPT_CHECK_OFFSET + 0 /* RCU */)
#endif

	Let me know for your preference about this definition...

Regards

--
Julian Anastasov <ja@ssi.bg>
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul E. McKenney May 3, 2013, 4:30 p.m. UTC | #8
On Fri, May 03, 2013 at 10:52:36AM +0300, Julian Anastasov wrote:
> 
> 	Hello,
> 
> On Thu, 2 May 2013, Paul E. McKenney wrote:
> 
> > mainline, and missed the one that you added.  Revisiting that, a
> > question:
> > 
> > > +#ifdef CONFIG_PREEMPT_RCU
> > > +#define PREEMPT_RCU_OFFSET     1
> > 
> > Does this really want to be "1" instead of PREEMPT_OFFSET?
> 
> 	In this case when CONFIG_PREEMPT_RCU is enabled
> we (RCU) do not touch the preempt counters. Instead, the units
> are accounted in current->rcu_read_lock_nesting:
> 
> #define rcu_preempt_depth() (current->rcu_read_lock_nesting)
> 
> __rcu_read_lock:
> 	current->rcu_read_lock_nesting++;
> 
> 	and the path is __might_sleep -> preempt_count_equals ->
> rcu_preempt_depth
> 
> 	For now both places do not use PREEMPT_OFFSET:
> 
> - #define inc_preempt_count() add_preempt_count(1)
> - __rcu_read_lock: current->rcu_read_lock_nesting++;
> 
> 	so, ... it does not matter much for me. In short,
> the trick is in preempt_count_equals() where preempt_offset
> is a combination of preempt count and RCU preempt depth:
> 
> #ifdef CONFIG_PREEMPT_RCU
> #define PREEMPT_RCU_OFFSET      (0 /* preempt */ + 1 /* RCU */)
> #else
> #define PREEMPT_RCU_OFFSET      (PREEMPT_CHECK_OFFSET + 0 /* RCU */)
> #endif
> 
> 	Let me know for your preference about this definition...

OK, after getting some sleep, I might have located the root cause of
my confusion yesterday.

The key point is that I don't understand why we cannot get the effect
we are looking for with the following in sched.h (or wherever):

static inline int cond_resched_rcu(void)
{
#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
	rcu_read_unlock();
	cond_resched();
	rcu_read_lock();
#endif
}

This adds absolutely no overhead in non-debug builds of CONFIG_PREEMPT_RCU,
does the checking in debug builds, and allows voluntary preemption in
!CONFIG_PREEMPT_RCU builds.  CONFIG_PROVE_RCU builds will check for an
(illegal) outer rcu_read_lock() in CONFIG_PREEMPT_RCU builds, and you
will get "scheduling while atomic" in response to an outer rcu_read_lock()
in !CONFIG_PREEMPT_RCU builds.

It also seems to me a lot simpler.

Does this work, or am I still missing something?

							Thanx, Paul

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Zijlstra May 3, 2013, 5:04 p.m. UTC | #9
> The key point is that I don't understand why we cannot get the effect
> we are looking for with the following in sched.h (or wherever):
> 
> static inline int cond_resched_rcu(void)
> {
> #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
> 	rcu_read_unlock();
> 	cond_resched();
> 	rcu_read_lock();
> #endif
> }
> 
> This adds absolutely no overhead in non-debug builds of CONFIG_PREEMPT_RCU,
> does the checking in debug builds, and allows voluntary preemption in
> !CONFIG_PREEMPT_RCU builds.  CONFIG_PROVE_RCU builds will check for an
> (illegal) outer rcu_read_lock() in CONFIG_PREEMPT_RCU builds, and you
> will get "scheduling while atomic" in response to an outer rcu_read_lock()
> in !CONFIG_PREEMPT_RCU builds.
> 
> It also seems to me a lot simpler.
> 
> Does this work, or am I still missing something?

It can do quite a number of superfluous rcu_read_unlock()/lock() pairs for
voluntary preemption kernels?
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul E. McKenney May 3, 2013, 5:34 p.m. UTC | #10
On Fri, May 03, 2013 at 07:04:47PM +0200, Peter Zijlstra wrote:
> > The key point is that I don't understand why we cannot get the effect
> > we are looking for with the following in sched.h (or wherever):
> > 
> > static inline int cond_resched_rcu(void)
> > {
> > #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
> > 	rcu_read_unlock();
> > 	cond_resched();
> > 	rcu_read_lock();
> > #endif
> > }
> > 
> > This adds absolutely no overhead in non-debug builds of CONFIG_PREEMPT_RCU,
> > does the checking in debug builds, and allows voluntary preemption in
> > !CONFIG_PREEMPT_RCU builds.  CONFIG_PROVE_RCU builds will check for an
> > (illegal) outer rcu_read_lock() in CONFIG_PREEMPT_RCU builds, and you
> > will get "scheduling while atomic" in response to an outer rcu_read_lock()
> > in !CONFIG_PREEMPT_RCU builds.
> > 
> > It also seems to me a lot simpler.
> > 
> > Does this work, or am I still missing something?
> 
> It can do quite a number of superfluous rcu_read_unlock()/lock() pairs for
> voluntary preemption kernels?

This happens in only two cases:

1.	CONFIG_PREEMPT_RCU=n kernels.  But in this case, rcu_read_unlock()
	and rcu_read_lock() are free, at least for CONFIG_PROVE_LOCKING=n
	kernels.  And if you have CONFIG_PROVE_LOCKING=y, any contribution
	from rcu_read_unlock() and rcu_read_lock() will be in the noise.

2.	CONFIG_DEBUG_ATOMIC_SLEEP=y kernels -- but in this case, you
	-want- the debugging.

So either the overhead is non-existent, or you explicitly asked for the
resulting debugging.

In particular, CONFIG_PREEMPT_RCU=y kernels have an empty static inline
function, which is free -- unless CONFIG_DEBUG_ATOMIC_SLEEP=y, in which
case you again explicitly asked for the debugging.

So I do not believe that the extra rcu_read_unlock()/lock() pairs are a
problem in any of the possible combinations of configurations.

							Thanx, Paul

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Julian Anastasov May 3, 2013, 5:47 p.m. UTC | #11
Hello,

On Fri, 3 May 2013, Paul E. McKenney wrote:

> OK, after getting some sleep, I might have located the root cause of
> my confusion yesterday.
> 
> The key point is that I don't understand why we cannot get the effect
> we are looking for with the following in sched.h (or wherever):
> 
> static inline int cond_resched_rcu(void)
> {
> #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
> 	rcu_read_unlock();
> 	cond_resched();
> 	rcu_read_lock();
> #endif
> }
> 
> This adds absolutely no overhead in non-debug builds of CONFIG_PREEMPT_RCU,
> does the checking in debug builds, and allows voluntary preemption in
> !CONFIG_PREEMPT_RCU builds.  CONFIG_PROVE_RCU builds will check for an
> (illegal) outer rcu_read_lock() in CONFIG_PREEMPT_RCU builds, and you
> will get "scheduling while atomic" in response to an outer rcu_read_lock()
> in !CONFIG_PREEMPT_RCU builds.
> 
> It also seems to me a lot simpler.
> 
> Does this work, or am I still missing something?

	It should work. It is a better version of
the 2nd variant I mentioned here:

http://marc.info/?l=linux-netdev&m=136741839021257&w=2

	I'll stick to this version, hope Peter Zijlstra
agrees. Playing with PREEMPT_ACTIVE or another bit makes
the things more complex.

	To summarize:

- CONFIG_PREEMPT_RCU:
	- no empty functions called
	- CONFIG_DEBUG_ATOMIC_SLEEP can catch errors even
	for this case

- non-CONFIG_PREEMPT_RCU:
	- rcu_read_lock and rcu_read_unlock are barrier(),
	so it expands just to cond_resched()

	I'll repeat the tests tomorrow and if there are
no problems will post official version after the merge window.

Regards

--
Julian Anastasov <ja@ssi.bg>
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Zijlstra May 3, 2013, 6:09 p.m. UTC | #12
> This happens in only two cases:
> 
> 1.	CONFIG_PREEMPT_RCU=n kernels.  But in this case, rcu_read_unlock()
> 	and rcu_read_lock() are free, at least for CONFIG_PROVE_LOCKING=n
> 	kernels.  And if you have CONFIG_PROVE_LOCKING=y, any contribution
> 	from rcu_read_unlock() and rcu_read_lock() will be in the noise.

Oh argh.. I completely overlooked that rcu_read_{,un}lock() are NOPs for
PREEMPT=n kernels.

/me crawls back under his stone..
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Julian Anastasov May 4, 2013, 7:23 a.m. UTC | #13
Hello,

On Fri, 3 May 2013, Paul E. McKenney wrote:

> static inline int cond_resched_rcu(void)
> {
> #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
> 	rcu_read_unlock();
> 	cond_resched();
> 	rcu_read_lock();
> #endif
> }
> 
> This adds absolutely no overhead in non-debug builds of CONFIG_PREEMPT_RCU,
> does the checking in debug builds, and allows voluntary preemption in
> !CONFIG_PREEMPT_RCU builds.  CONFIG_PROVE_RCU builds will check for an
> (illegal) outer rcu_read_lock() in CONFIG_PREEMPT_RCU builds, and you
> will get "scheduling while atomic" in response to an outer rcu_read_lock()
> in !CONFIG_PREEMPT_RCU builds.
> 
> It also seems to me a lot simpler.
> 
> Does this work, or am I still missing something?

	Works perfectly. Thanks! Tested CONFIG_PREEMPT_RCU=y/n,
the errors messages with extra RCU lock.

Regards

--
Julian Anastasov <ja@ssi.bg>
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul E. McKenney May 4, 2013, 6:03 p.m. UTC | #14
On Sat, May 04, 2013 at 10:23:31AM +0300, Julian Anastasov wrote:
> 
> 	Hello,
> 
> On Fri, 3 May 2013, Paul E. McKenney wrote:
> 
> > static inline int cond_resched_rcu(void)
> > {
> > #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
> > 	rcu_read_unlock();
> > 	cond_resched();
> > 	rcu_read_lock();
> > #endif
> > }
> > 
> > This adds absolutely no overhead in non-debug builds of CONFIG_PREEMPT_RCU,
> > does the checking in debug builds, and allows voluntary preemption in
> > !CONFIG_PREEMPT_RCU builds.  CONFIG_PROVE_RCU builds will check for an
> > (illegal) outer rcu_read_lock() in CONFIG_PREEMPT_RCU builds, and you
> > will get "scheduling while atomic" in response to an outer rcu_read_lock()
> > in !CONFIG_PREEMPT_RCU builds.
> > 
> > It also seems to me a lot simpler.
> > 
> > Does this work, or am I still missing something?
> 
> 	Works perfectly. Thanks! Tested CONFIG_PREEMPT_RCU=y/n,
> the errors messages with extra RCU lock.

Very good!  Please send the patch along, and I will ack it.

						Thanx, Paul

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index b758ce1..b594759 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -480,9 +480,10 @@  static inline void rcu_preempt_sleep_check(void)
 }
 #endif /* #else #ifdef CONFIG_PROVE_RCU */
 
-#define rcu_sleep_check()						\
+#define rcu_sleep_check(locked)						\
 	do {								\
-		rcu_preempt_sleep_check();				\
+		if (!(locked))						\
+			rcu_preempt_sleep_check();			\
 		rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map),	\
 				   "Illegal context switch in RCU-bh"	\
 				   " read-side critical section");	\
@@ -494,7 +495,7 @@  static inline void rcu_preempt_sleep_check(void)
 #else /* #ifdef CONFIG_PROVE_RCU */
 
 #define rcu_lockdep_assert(c, s) do { } while (0)
-#define rcu_sleep_check() do { } while (0)
+#define rcu_sleep_check(locked) do { } while (0)
 
 #endif /* #else #ifdef CONFIG_PROVE_RCU */
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e692a02..027deea 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2608,6 +2608,20 @@  extern int __cond_resched_softirq(void);
 	__cond_resched_softirq();					\
 })
 
+#ifdef CONFIG_PREEMPT_RCU
+#define PREEMPT_RCU_OFFSET	1
+#else
+#define PREEMPT_RCU_OFFSET	PREEMPT_CHECK_OFFSET
+#endif
+
+extern int __cond_resched_rcu(void);
+
+#define cond_resched_rcu() ({					\
+	__might_sleep(__FILE__, __LINE__, PREEMPT_ACTIVE |	\
+					  PREEMPT_RCU_OFFSET);	\
+	__cond_resched_rcu();					\
+})
+
 /*
  * Does a critical section need to be broken due to another
  * task waiting?: (technically does not depend on CONFIG_PREEMPT,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 67d0465..2724be7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2793,7 +2793,7 @@  static inline void schedule_debug(struct task_struct *prev)
 	 */
 	if (unlikely(in_atomic_preempt_off() && !prev->exit_state))
 		__schedule_bug(prev);
-	rcu_sleep_check();
+	rcu_sleep_check(0);
 
 	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
 
@@ -4364,6 +4364,20 @@  int __sched __cond_resched_softirq(void)
 }
 EXPORT_SYMBOL(__cond_resched_softirq);
 
+int __sched __cond_resched_rcu(void)
+{
+#ifndef CONFIG_PREEMPT_RCU
+	if (should_resched()) {
+		rcu_read_unlock();
+		__cond_resched();
+		rcu_read_lock();
+		return 1;
+	}
+#endif
+	return 0;
+}
+EXPORT_SYMBOL(__cond_resched_rcu);
+
 /**
  * yield - yield the current processor to other threads.
  *
@@ -7062,7 +7076,9 @@  void __might_sleep(const char *file, int line, int preempt_offset)
 {
 	static unsigned long prev_jiffy;	/* ratelimiting */
 
-	rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
+	/* WARN_ON_ONCE() by default, no rate limit reqd. */
+	rcu_sleep_check(preempt_offset & PREEMPT_ACTIVE);
+	preempt_offset &= ~PREEMPT_ACTIVE;
 	if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
 	    system_state != SYSTEM_RUNNING || oops_in_progress)
 		return;