diff mbox series

[v5,10/10] powerpc/signal64: Use __get_user() to copy sigset_t

Message ID 20210203184323.20792-11-cmr@codefail.de (mailing list archive)
State Superseded
Headers show
Series Improve signal performance on PPC64 with KUAP | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (44158b256b30415079588d0fcb1bccbdc2ccd009)
snowpatch_ozlabs/build-ppc64le warning Build succeeded but added 6 new sparse warnings
snowpatch_ozlabs/build-ppc64be warning Build succeeded but added 6 new sparse warnings
snowpatch_ozlabs/build-ppc64e warning Build succeeded but added 6 new sparse warnings
snowpatch_ozlabs/build-pmac32 success Build succeeded
snowpatch_ozlabs/checkpatch success total: 0 errors, 0 warnings, 0 checks, 34 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Christopher M. Riedl Feb. 3, 2021, 6:43 p.m. UTC
Usually sigset_t is exactly 8B which is a "trivial" size and does not
warrant using __copy_from_user(). Use __get_user() directly in
anticipation of future work to remove the trivial size optimizations
from __copy_from_user(). Calling __get_user() also results in a small
boost to signal handling throughput here.

Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
 arch/powerpc/kernel/signal_64.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

Comments

Christopher M. Riedl Feb. 5, 2021, 4:40 a.m. UTC | #1
On Wed Feb 3, 2021 at 12:43 PM CST, Christopher M. Riedl wrote:
> Usually sigset_t is exactly 8B which is a "trivial" size and does not
> warrant using __copy_from_user(). Use __get_user() directly in
> anticipation of future work to remove the trivial size optimizations
> from __copy_from_user(). Calling __get_user() also results in a small
> boost to signal handling throughput here.
>
> Signed-off-by: Christopher M. Riedl <cmr@codefail.de>

This patch triggered sparse warnings about 'different address spaces'.
This minor fixup cleans that up:

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 42fdc4a7ff72..1dfda6403e14 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -97,7 +97,7 @@ static void prepare_setup_sigcontext(struct task_struct *tsk, int ctx_has_vsx_re
 #endif /* CONFIG_VSX */
 }

-static inline int get_user_sigset(sigset_t *dst, const sigset_t *src)
+static inline int get_user_sigset(sigset_t *dst, const sigset_t __user *src)
 {
	if (sizeof(sigset_t) <= 8)
		return __get_user(dst->sig[0], &src->sig[0]);
Christophe Leroy Feb. 9, 2021, 9:45 p.m. UTC | #2
"Christopher M. Riedl" <cmr@codefail.de> a écrit :

> Usually sigset_t is exactly 8B which is a "trivial" size and does not
> warrant using __copy_from_user(). Use __get_user() directly in
> anticipation of future work to remove the trivial size optimizations
> from __copy_from_user(). Calling __get_user() also results in a small
> boost to signal handling throughput here.
>
> Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
> ---
>  arch/powerpc/kernel/signal_64.c | 14 ++++++++++++--
>  1 file changed, 12 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/kernel/signal_64.c  
> b/arch/powerpc/kernel/signal_64.c
> index 817b64e1e409..42fdc4a7ff72 100644
> --- a/arch/powerpc/kernel/signal_64.c
> +++ b/arch/powerpc/kernel/signal_64.c
> @@ -97,6 +97,14 @@ static void prepare_setup_sigcontext(struct  
> task_struct *tsk, int ctx_has_vsx_re
>  #endif /* CONFIG_VSX */
>  }
>
> +static inline int get_user_sigset(sigset_t *dst, const sigset_t *src)

Should be called __get_user_sigset() as it is a helper for __get_user()

> +{
> +	if (sizeof(sigset_t) <= 8)

We should always use __get_user(), see below.

> +		return __get_user(dst->sig[0], &src->sig[0]);

I think the above will not work on ppc32, it will only copy 4 bytes.
You must cast the source to u64*

> +	else
> +		return __copy_from_user(dst, src, sizeof(sigset_t));

I see no point in keeping this alternative. Today sigset_ t is fixed.
If you fear one day someone might change it to something different  
than a u64, just add a BUILD_BUG_ON(sizeof(sigset_t) != sizeof(u64));

> +}
> +
>  /*
>   * Set up the sigcontext for the signal frame.
>   */
> @@ -701,8 +709,9 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext  
> __user *, old_ctx,
>  	 * We kill the task with a SIGSEGV in this situation.
>  	 */
>
> -	if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set)))
> +	if (get_user_sigset(&set, &new_ctx->uc_sigmask))
>  		do_exit(SIGSEGV);
> +

This white space is not part of the change, keep patches to the  
minimum, avoid cosmetic

>  	set_current_blocked(&set);
>
>  	if (!user_read_access_begin(new_ctx, ctx_size))
> @@ -740,8 +749,9 @@ SYSCALL_DEFINE0(rt_sigreturn)
>  	if (!access_ok(uc, sizeof(*uc)))
>  		goto badframe;
>
> -	if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
> +	if (get_user_sigset(&set, &uc->uc_sigmask))
>  		goto badframe;
> +

Same

>  	set_current_blocked(&set);
>
>  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> --
> 2.26.1
Christopher M. Riedl Feb. 10, 2021, 4:16 a.m. UTC | #3
On Tue Feb 9, 2021 at 3:45 PM CST, Christophe Leroy wrote:
> "Christopher M. Riedl" <cmr@codefail.de> a écrit :
>
> > Usually sigset_t is exactly 8B which is a "trivial" size and does not
> > warrant using __copy_from_user(). Use __get_user() directly in
> > anticipation of future work to remove the trivial size optimizations
> > from __copy_from_user(). Calling __get_user() also results in a small
> > boost to signal handling throughput here.
> >
> > Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
> > ---
> >  arch/powerpc/kernel/signal_64.c | 14 ++++++++++++--
> >  1 file changed, 12 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/powerpc/kernel/signal_64.c  
> > b/arch/powerpc/kernel/signal_64.c
> > index 817b64e1e409..42fdc4a7ff72 100644
> > --- a/arch/powerpc/kernel/signal_64.c
> > +++ b/arch/powerpc/kernel/signal_64.c
> > @@ -97,6 +97,14 @@ static void prepare_setup_sigcontext(struct  
> > task_struct *tsk, int ctx_has_vsx_re
> >  #endif /* CONFIG_VSX */
> >  }
> >
> > +static inline int get_user_sigset(sigset_t *dst, const sigset_t *src)
>
> Should be called __get_user_sigset() as it is a helper for __get_user()

Ok makes sense.

>
> > +{
> > +	if (sizeof(sigset_t) <= 8)
>
> We should always use __get_user(), see below.
>
> > +		return __get_user(dst->sig[0], &src->sig[0]);
>
> I think the above will not work on ppc32, it will only copy 4 bytes.
> You must cast the source to u64*

Well this is signal_64.c :) Looks like ppc32 needs the same thing so
I'll just move this into signal.h and use it for both. 

The only exception would be the COMPAT case in signal_32.c which ends up
calling the common get_compat_sigset(). Updating that is probably
outside the scope of this series.

>
> > +	else
> > +		return __copy_from_user(dst, src, sizeof(sigset_t));
>
> I see no point in keeping this alternative. Today sigset_ t is fixed.
> If you fear one day someone might change it to something different
> than a u64, just add a BUILD_BUG_ON(sizeof(sigset_t) != sizeof(u64));

Ah yes that is much better - thanks for the suggestion.

>
> > +}
> > +
> >  /*
> >   * Set up the sigcontext for the signal frame.
> >   */
> > @@ -701,8 +709,9 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext  
> > __user *, old_ctx,
> >  	 * We kill the task with a SIGSEGV in this situation.
> >  	 */
> >
> > -	if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set)))
> > +	if (get_user_sigset(&set, &new_ctx->uc_sigmask))
> >  		do_exit(SIGSEGV);
> > +
>
> This white space is not part of the change, keep patches to the
> minimum, avoid cosmetic

Just a (bad?) habit on my part that I missed - I'll remove this one and
the one further below.

>
> >  	set_current_blocked(&set);
> >
> >  	if (!user_read_access_begin(new_ctx, ctx_size))
> > @@ -740,8 +749,9 @@ SYSCALL_DEFINE0(rt_sigreturn)
> >  	if (!access_ok(uc, sizeof(*uc)))
> >  		goto badframe;
> >
> > -	if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
> > +	if (get_user_sigset(&set, &uc->uc_sigmask))
> >  		goto badframe;
> > +
>
> Same
>
> >  	set_current_blocked(&set);
> >
> >  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> > --
> > 2.26.1
Daniel Axtens Feb. 12, 2021, 9:21 p.m. UTC | #4
"Christopher M. Riedl" <cmr@codefail.de> writes:

> Usually sigset_t is exactly 8B which is a "trivial" size and does not
> warrant using __copy_from_user(). Use __get_user() directly in
> anticipation of future work to remove the trivial size optimizations
> from __copy_from_user(). Calling __get_user() also results in a small
> boost to signal handling throughput here.

Modulo the comments from Christophe, this looks good to me. It seems to
do what it says on the tin.

Reviewed-by: Daniel Axtens <dja@axtens.net>

Do you know if this patch is responsible for the slightly increase in
radix performance you observed in your cover letter? The rest of the
series shouldn't really make things faster than the no-KUAP case...

Kind regards,
Daniel

>
> Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
> ---
>  arch/powerpc/kernel/signal_64.c | 14 ++++++++++++--
>  1 file changed, 12 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
> index 817b64e1e409..42fdc4a7ff72 100644
> --- a/arch/powerpc/kernel/signal_64.c
> +++ b/arch/powerpc/kernel/signal_64.c
> @@ -97,6 +97,14 @@ static void prepare_setup_sigcontext(struct task_struct *tsk, int ctx_has_vsx_re
>  #endif /* CONFIG_VSX */
>  }
>  
> +static inline int get_user_sigset(sigset_t *dst, const sigset_t *src)
> +{
> +	if (sizeof(sigset_t) <= 8)
> +		return __get_user(dst->sig[0], &src->sig[0]);
> +	else
> +		return __copy_from_user(dst, src, sizeof(sigset_t));
> +}
> +
>  /*
>   * Set up the sigcontext for the signal frame.
>   */
> @@ -701,8 +709,9 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
>  	 * We kill the task with a SIGSEGV in this situation.
>  	 */
>  
> -	if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set)))
> +	if (get_user_sigset(&set, &new_ctx->uc_sigmask))
>  		do_exit(SIGSEGV);
> +
>  	set_current_blocked(&set);
>  
>  	if (!user_read_access_begin(new_ctx, ctx_size))
> @@ -740,8 +749,9 @@ SYSCALL_DEFINE0(rt_sigreturn)
>  	if (!access_ok(uc, sizeof(*uc)))
>  		goto badframe;
>  
> -	if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
> +	if (get_user_sigset(&set, &uc->uc_sigmask))
>  		goto badframe;
> +
>  	set_current_blocked(&set);
>  
>  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> -- 
> 2.26.1
Christopher M. Riedl Feb. 17, 2021, 7:53 p.m. UTC | #5
On Fri Feb 12, 2021 at 3:21 PM CST, Daniel Axtens wrote:
> "Christopher M. Riedl" <cmr@codefail.de> writes:
>
> > Usually sigset_t is exactly 8B which is a "trivial" size and does not
> > warrant using __copy_from_user(). Use __get_user() directly in
> > anticipation of future work to remove the trivial size optimizations
> > from __copy_from_user(). Calling __get_user() also results in a small
> > boost to signal handling throughput here.
>
> Modulo the comments from Christophe, this looks good to me. It seems to
> do what it says on the tin.
>
> Reviewed-by: Daniel Axtens <dja@axtens.net>
>
> Do you know if this patch is responsible for the slightly increase in
> radix performance you observed in your cover letter? The rest of the
> series shouldn't really make things faster than the no-KUAP case...

No, this patch just results in a really small improvement overall.

I looked over this again and I think the reason for the speedup is that
my implementation of unsafe_copy_from_user() in this series calls
__copy_tofrom_user() directly avoiding a barrier_nospec(). This speeds
up all the unsafe_get_from_user() calls in this version.

Skipping the barrier_nospec() like that is incorrect, but Christophe
recently sent a patch which moves barrier_nospec() into the uaccess
allowance helpers. It looks like mpe has already accepted that patch:

https://lists.ozlabs.org/pipermail/linuxppc-dev/2021-February/223959.html

That means that my implementation of unsafe_copy_from_user() is _now_
correct _and_ faster. We do not need to call barrier_nospec() since the
precondition for the 'unsafe' routine is that we called one of the
helpers to open up a uaccess window earlier.

>
> Kind regards,
> Daniel
>
> >
> > Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
> > ---
> >  arch/powerpc/kernel/signal_64.c | 14 ++++++++++++--
> >  1 file changed, 12 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
> > index 817b64e1e409..42fdc4a7ff72 100644
> > --- a/arch/powerpc/kernel/signal_64.c
> > +++ b/arch/powerpc/kernel/signal_64.c
> > @@ -97,6 +97,14 @@ static void prepare_setup_sigcontext(struct task_struct *tsk, int ctx_has_vsx_re
> >  #endif /* CONFIG_VSX */
> >  }
> >  
> > +static inline int get_user_sigset(sigset_t *dst, const sigset_t *src)
> > +{
> > +	if (sizeof(sigset_t) <= 8)
> > +		return __get_user(dst->sig[0], &src->sig[0]);
> > +	else
> > +		return __copy_from_user(dst, src, sizeof(sigset_t));
> > +}
> > +
> >  /*
> >   * Set up the sigcontext for the signal frame.
> >   */
> > @@ -701,8 +709,9 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
> >  	 * We kill the task with a SIGSEGV in this situation.
> >  	 */
> >  
> > -	if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set)))
> > +	if (get_user_sigset(&set, &new_ctx->uc_sigmask))
> >  		do_exit(SIGSEGV);
> > +
> >  	set_current_blocked(&set);
> >  
> >  	if (!user_read_access_begin(new_ctx, ctx_size))
> > @@ -740,8 +749,9 @@ SYSCALL_DEFINE0(rt_sigreturn)
> >  	if (!access_ok(uc, sizeof(*uc)))
> >  		goto badframe;
> >  
> > -	if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
> > +	if (get_user_sigset(&set, &uc->uc_sigmask))
> >  		goto badframe;
> > +
> >  	set_current_blocked(&set);
> >  
> >  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> > -- 
> > 2.26.1
diff mbox series

Patch

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 817b64e1e409..42fdc4a7ff72 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -97,6 +97,14 @@  static void prepare_setup_sigcontext(struct task_struct *tsk, int ctx_has_vsx_re
 #endif /* CONFIG_VSX */
 }
 
+static inline int get_user_sigset(sigset_t *dst, const sigset_t *src)
+{
+	if (sizeof(sigset_t) <= 8)
+		return __get_user(dst->sig[0], &src->sig[0]);
+	else
+		return __copy_from_user(dst, src, sizeof(sigset_t));
+}
+
 /*
  * Set up the sigcontext for the signal frame.
  */
@@ -701,8 +709,9 @@  SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	 * We kill the task with a SIGSEGV in this situation.
 	 */
 
-	if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set)))
+	if (get_user_sigset(&set, &new_ctx->uc_sigmask))
 		do_exit(SIGSEGV);
+
 	set_current_blocked(&set);
 
 	if (!user_read_access_begin(new_ctx, ctx_size))
@@ -740,8 +749,9 @@  SYSCALL_DEFINE0(rt_sigreturn)
 	if (!access_ok(uc, sizeof(*uc)))
 		goto badframe;
 
-	if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
+	if (get_user_sigset(&set, &uc->uc_sigmask))
 		goto badframe;
+
 	set_current_blocked(&set);
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM