Patchwork [v2,10/10] kernel: might_fault does not imply might_sleep

login
register
mail settings
Submitter Peter Zijlstra
Date May 21, 2013, 11:57 a.m.
Message ID <20130521115734.GA9554@twins.programming.kicks-ass.net>
Download mbox | patch
Permalink /patch/245303/
State Not Applicable
Headers show

Comments

Peter Zijlstra - May 21, 2013, 11:57 a.m.
On Sun, May 19, 2013 at 12:35:26PM +0300, Michael S. Tsirkin wrote:
> > > --- a/include/linux/kernel.h
> > > +++ b/include/linux/kernel.h
> > > @@ -198,7 +198,6 @@ void might_fault(void);
> > >  #else
> > >  static inline void might_fault(void)
> > >  {
> > > -	might_sleep();
> > 
> > This removes potential resched points for PREEMPT_VOLUNTARY -- was that
> > intentional?
> 
> No it's a bug. Thanks for pointing this out.
> OK so I guess it should be might_sleep_if(!in_atomic())
> and this means might_fault would have to move from linux/kernel.h to
> linux/uaccess.h, since in_atomic() is in linux/hardirq.h
> 
> Makes sense?

So the only difference between PROVE_LOCKING and not should be the
might_lock_read() thing; so how about something like this?

---
 include/linux/kernel.h  |  7 ++-----
 include/linux/uaccess.h | 26 ++++++++++++++++++++++++++
 mm/memory.c             | 14 ++------------
 3 files changed, 30 insertions(+), 17 deletions(-)
Michael S. Tsirkin - May 22, 2013, 9:47 a.m.
On Tue, May 21, 2013 at 01:57:34PM +0200, Peter Zijlstra wrote:
> On Sun, May 19, 2013 at 12:35:26PM +0300, Michael S. Tsirkin wrote:
> > > > --- a/include/linux/kernel.h
> > > > +++ b/include/linux/kernel.h
> > > > @@ -198,7 +198,6 @@ void might_fault(void);
> > > >  #else
> > > >  static inline void might_fault(void)
> > > >  {
> > > > -	might_sleep();
> > > 
> > > This removes potential resched points for PREEMPT_VOLUNTARY -- was that
> > > intentional?
> > 
> > No it's a bug. Thanks for pointing this out.
> > OK so I guess it should be might_sleep_if(!in_atomic())
> > and this means might_fault would have to move from linux/kernel.h to
> > linux/uaccess.h, since in_atomic() is in linux/hardirq.h
> > 
> > Makes sense?
> 
> So the only difference between PROVE_LOCKING and not should be the
> might_lock_read() thing; so how about something like this?
> 
> ---
>  include/linux/kernel.h  |  7 ++-----
>  include/linux/uaccess.h | 26 ++++++++++++++++++++++++++
>  mm/memory.c             | 14 ++------------
>  3 files changed, 30 insertions(+), 17 deletions(-)
> 
> diff --git a/include/linux/kernel.h b/include/linux/kernel.h
> index e96329c..70812f4 100644
> --- a/include/linux/kernel.h
> +++ b/include/linux/kernel.h
> @@ -194,12 +194,9 @@ extern int _cond_resched(void);
>  	})
>  
>  #ifdef CONFIG_PROVE_LOCKING
> -void might_fault(void);
> +void might_fault_lockdep(void);
>  #else
> -static inline void might_fault(void)
> -{
> -	might_sleep();
> -}
> +static inline void might_fault_lockdep(void) { }
>  #endif
>  
>  extern struct atomic_notifier_head panic_notifier_list;
> diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
> index 5ca0951..50a2cc9 100644
> --- a/include/linux/uaccess.h
> +++ b/include/linux/uaccess.h
> @@ -38,6 +38,32 @@ static inline void pagefault_enable(void)
>  	preempt_check_resched();
>  }
>  
> +static inline bool __can_fault(void)
> +{
> +	/*
> +	 * Some code (nfs/sunrpc) uses socket ops on kernel memory while
> +	 * holding the mmap_sem, this is safe because kernel memory doesn't
> +	 * get paged out, therefore we'll never actually fault, and the
> +	 * below annotations will generate false positives.
> +	 */
> +	if (segment_eq(get_fs(), KERNEL_DS))
> +		return false;
> +
> +	if (in_atomic() /* || pagefault_disabled() */)

One question here: I'm guessing you put this comment here
for illustrative purposes, implying code that will
be enabled in -rt?
We don't want it upstream I think, right?


> +		return false;
> +
> +	return true;
> +}
> +
> +static inline void might_fault(void)
> +{
> +	if (!__can_fault())
> +		return;
> +
> +	might_sleep();
> +	might_fault_lockdep();
> +}
> +
>  #ifndef ARCH_HAS_NOCACHE_UACCESS
>  
>  static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
> diff --git a/mm/memory.c b/mm/memory.c
> index 6dc1882..266610c 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -4211,19 +4211,9 @@ void print_vma_addr(char *prefix, unsigned long ip)
>  }
>  
>  #ifdef CONFIG_PROVE_LOCKING
> -void might_fault(void)
> +void might_fault_lockdep(void)
>  {
>  	/*
> -	 * Some code (nfs/sunrpc) uses socket ops on kernel memory while
> -	 * holding the mmap_sem, this is safe because kernel memory doesn't
> -	 * get paged out, therefore we'll never actually fault, and the
> -	 * below annotations will generate false positives.
> -	 */
> -	if (segment_eq(get_fs(), KERNEL_DS))
> -		return;
> -
> -	might_sleep();
> -	/*
>  	 * it would be nicer only to annotate paths which are not under
>  	 * pagefault_disable, however that requires a larger audit and
>  	 * providing helpers like get_user_atomic.
> @@ -4231,7 +4221,7 @@ void might_fault(void)
>  	if (!in_atomic() && current->mm)
>  		might_lock_read(&current->mm->mmap_sem);
>  }
> -EXPORT_SYMBOL(might_fault);
> +EXPORT_SYMBOL(might_fault_lockdep);
>  #endif
>  
>  #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
Peter Zijlstra - May 22, 2013, 10:16 a.m.
On Wed, May 22, 2013 at 12:47:09PM +0300, Michael S. Tsirkin wrote:
> >  
> > +static inline bool __can_fault(void)
> > +{
> > +	/*
> > +	 * Some code (nfs/sunrpc) uses socket ops on kernel memory while
> > +	 * holding the mmap_sem, this is safe because kernel memory doesn't
> > +	 * get paged out, therefore we'll never actually fault, and the
> > +	 * below annotations will generate false positives.
> > +	 */
> > +	if (segment_eq(get_fs(), KERNEL_DS))
> > +		return false;
> > +
> > +	if (in_atomic() /* || pagefault_disabled() */)
> 
> One question here: I'm guessing you put this comment here
> for illustrative purposes, implying code that will
> be enabled in -rt?
> We don't want it upstream I think, right?

Right, and as a reminder that when we do this we need to add a patch to
-rt. But yeah, we should have a look and see if its worth pulling those
patches from -rt into mainline in some way shape or form. They're big
but trivial IIRC.

I'm fine with you leaving that comment out though..
Michael S. Tsirkin - May 22, 2013, 8:38 p.m.
On Tue, May 21, 2013 at 01:57:34PM +0200, Peter Zijlstra wrote:
> On Sun, May 19, 2013 at 12:35:26PM +0300, Michael S. Tsirkin wrote:
> > > > --- a/include/linux/kernel.h
> > > > +++ b/include/linux/kernel.h
> > > > @@ -198,7 +198,6 @@ void might_fault(void);
> > > >  #else
> > > >  static inline void might_fault(void)
> > > >  {
> > > > -	might_sleep();
> > > 
> > > This removes potential resched points for PREEMPT_VOLUNTARY -- was that
> > > intentional?
> > 
> > No it's a bug. Thanks for pointing this out.
> > OK so I guess it should be might_sleep_if(!in_atomic())
> > and this means might_fault would have to move from linux/kernel.h to
> > linux/uaccess.h, since in_atomic() is in linux/hardirq.h
> > 
> > Makes sense?
> 
> So the only difference between PROVE_LOCKING and not should be the
> might_lock_read() thing; so how about something like this?

So the problem with the below is that might_fault is needed
in asm/uaccess.h.
I'm still trying various approaches but the dependencies there
are very complex.

> ---
>  include/linux/kernel.h  |  7 ++-----
>  include/linux/uaccess.h | 26 ++++++++++++++++++++++++++
>  mm/memory.c             | 14 ++------------
>  3 files changed, 30 insertions(+), 17 deletions(-)
> 
> diff --git a/include/linux/kernel.h b/include/linux/kernel.h
> index e96329c..70812f4 100644
> --- a/include/linux/kernel.h
> +++ b/include/linux/kernel.h
> @@ -194,12 +194,9 @@ extern int _cond_resched(void);
>  	})
>  
>  #ifdef CONFIG_PROVE_LOCKING
> -void might_fault(void);
> +void might_fault_lockdep(void);
>  #else
> -static inline void might_fault(void)
> -{
> -	might_sleep();
> -}
> +static inline void might_fault_lockdep(void) { }
>  #endif
>  
>  extern struct atomic_notifier_head panic_notifier_list;
> diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
> index 5ca0951..50a2cc9 100644
> --- a/include/linux/uaccess.h
> +++ b/include/linux/uaccess.h
> @@ -38,6 +38,32 @@ static inline void pagefault_enable(void)
>  	preempt_check_resched();
>  }
>  
> +static inline bool __can_fault(void)
> +{
> +	/*
> +	 * Some code (nfs/sunrpc) uses socket ops on kernel memory while
> +	 * holding the mmap_sem, this is safe because kernel memory doesn't
> +	 * get paged out, therefore we'll never actually fault, and the
> +	 * below annotations will generate false positives.
> +	 */
> +	if (segment_eq(get_fs(), KERNEL_DS))
> +		return false;
> +
> +	if (in_atomic() /* || pagefault_disabled() */)
> +		return false;
> +
> +	return true;
> +}
> +
> +static inline void might_fault(void)
> +{
> +	if (!__can_fault())
> +		return;
> +
> +	might_sleep();
> +	might_fault_lockdep();
> +}
> +
>  #ifndef ARCH_HAS_NOCACHE_UACCESS
>  
>  static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
> diff --git a/mm/memory.c b/mm/memory.c
> index 6dc1882..266610c 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -4211,19 +4211,9 @@ void print_vma_addr(char *prefix, unsigned long ip)
>  }
>  
>  #ifdef CONFIG_PROVE_LOCKING
> -void might_fault(void)
> +void might_fault_lockdep(void)
>  {
>  	/*
> -	 * Some code (nfs/sunrpc) uses socket ops on kernel memory while
> -	 * holding the mmap_sem, this is safe because kernel memory doesn't
> -	 * get paged out, therefore we'll never actually fault, and the
> -	 * below annotations will generate false positives.
> -	 */
> -	if (segment_eq(get_fs(), KERNEL_DS))
> -		return;
> -
> -	might_sleep();
> -	/*
>  	 * it would be nicer only to annotate paths which are not under
>  	 * pagefault_disable, however that requires a larger audit and
>  	 * providing helpers like get_user_atomic.
> @@ -4231,7 +4221,7 @@ void might_fault(void)
>  	if (!in_atomic() && current->mm)
>  		might_lock_read(&current->mm->mmap_sem);
>  }
> -EXPORT_SYMBOL(might_fault);
> +EXPORT_SYMBOL(might_fault_lockdep);
>  #endif
>  
>  #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)

Patch

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index e96329c..70812f4 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -194,12 +194,9 @@  extern int _cond_resched(void);
 	})
 
 #ifdef CONFIG_PROVE_LOCKING
-void might_fault(void);
+void might_fault_lockdep(void);
 #else
-static inline void might_fault(void)
-{
-	might_sleep();
-}
+static inline void might_fault_lockdep(void) { }
 #endif
 
 extern struct atomic_notifier_head panic_notifier_list;
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 5ca0951..50a2cc9 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -38,6 +38,32 @@  static inline void pagefault_enable(void)
 	preempt_check_resched();
 }
 
+static inline bool __can_fault(void)
+{
+	/*
+	 * Some code (nfs/sunrpc) uses socket ops on kernel memory while
+	 * holding the mmap_sem, this is safe because kernel memory doesn't
+	 * get paged out, therefore we'll never actually fault, and the
+	 * below annotations will generate false positives.
+	 */
+	if (segment_eq(get_fs(), KERNEL_DS))
+		return false;
+
+	if (in_atomic() /* || pagefault_disabled() */)
+		return false;
+
+	return true;
+}
+
+static inline void might_fault(void)
+{
+	if (!__can_fault())
+		return;
+
+	might_sleep();
+	might_fault_lockdep();
+}
+
 #ifndef ARCH_HAS_NOCACHE_UACCESS
 
 static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
diff --git a/mm/memory.c b/mm/memory.c
index 6dc1882..266610c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4211,19 +4211,9 @@  void print_vma_addr(char *prefix, unsigned long ip)
 }
 
 #ifdef CONFIG_PROVE_LOCKING
-void might_fault(void)
+void might_fault_lockdep(void)
 {
 	/*
-	 * Some code (nfs/sunrpc) uses socket ops on kernel memory while
-	 * holding the mmap_sem, this is safe because kernel memory doesn't
-	 * get paged out, therefore we'll never actually fault, and the
-	 * below annotations will generate false positives.
-	 */
-	if (segment_eq(get_fs(), KERNEL_DS))
-		return;
-
-	might_sleep();
-	/*
 	 * it would be nicer only to annotate paths which are not under
 	 * pagefault_disable, however that requires a larger audit and
 	 * providing helpers like get_user_atomic.
@@ -4231,7 +4221,7 @@  void might_fault(void)
 	if (!in_atomic() && current->mm)
 		might_lock_read(&current->mm->mmap_sem);
 }
-EXPORT_SYMBOL(might_fault);
+EXPORT_SYMBOL(might_fault_lockdep);
 #endif
 
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)