diff mbox series

[RFC,V1,6/6] powerpc: Enable Generic Entry/Exit for syscalls.

Message ID 20250428152225.66044-9-mchauras@linux.ibm.com (mailing list archive)
State New
Headers show
Series Generic Entry/Exit support for ppc64 | expand

Commit Message

Mukesh Kumar Chaurasiya April 28, 2025, 3:22 p.m. UTC
Enable the syscall entry and exit path from generic framework.

Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>
---
 arch/powerpc/Kconfig                |   1 +
 arch/powerpc/kernel/interrupt.c     |  46 +++++++----
 arch/powerpc/kernel/ptrace/ptrace.c | 103 ------------------------
 arch/powerpc/kernel/signal.c        |   8 +-
 arch/powerpc/kernel/syscall.c       | 117 +---------------------------
 5 files changed, 38 insertions(+), 237 deletions(-)

Comments

Shrikanth Hegde April 29, 2025, 6:11 a.m. UTC | #1
On 4/28/25 20:52, Mukesh Kumar Chaurasiya wrote:
> Enable the syscall entry and exit path from generic framework.
> 
> Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>

Hi Mukesh. Thanks for working on this. Trying to go through it.

> ---
>   arch/powerpc/Kconfig                |   1 +
>   arch/powerpc/kernel/interrupt.c     |  46 +++++++----
>   arch/powerpc/kernel/ptrace/ptrace.c | 103 ------------------------
>   arch/powerpc/kernel/signal.c        |   8 +-
>   arch/powerpc/kernel/syscall.c       | 117 +---------------------------
>   5 files changed, 38 insertions(+), 237 deletions(-)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 6722625a406a0..45b70ccf7c89e 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -202,6 +202,7 @@ config PPC
>   	select GENERIC_CPU_AUTOPROBE
>   	select GENERIC_CPU_VULNERABILITIES	if PPC_BARRIER_NOSPEC
>   	select GENERIC_EARLY_IOREMAP
> +	select GENERIC_ENTRY

IMHO this should be enabled after irqentry_* is supported too.

>   	select GENERIC_GETTIMEOFDAY
>   	select GENERIC_IDLE_POLL_SETUP
>   	select GENERIC_IOREMAP
> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
> index 7f31f3fb9c1d8..8731064631de0 100644
> --- a/arch/powerpc/kernel/interrupt.c
> +++ b/arch/powerpc/kernel/interrupt.c
> @@ -1,6 +1,7 @@
>   // SPDX-License-Identifier: GPL-2.0-or-later
>   
>   #include <linux/context_tracking.h>
> +#include <linux/entry-common.h>
>   #include <linux/err.h>
>   #include <linux/compat.h>
>   #include <linux/rseq.h>
> @@ -163,15 +164,10 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
>   	unsigned long ret = 0;
>   	bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
>   
> -	CT_WARN_ON(ct_state() == CT_STATE_USER);
> -
>   	kuap_assert_locked();
>   
>   	regs->result = r3;
>   
> -	/* Check whether the syscall is issued inside a restartable sequence */
> -	rseq_syscall(regs);
> -
>   	ti_flags = read_thread_flags();
>   
>   	if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {
> @@ -192,13 +188,27 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
>   	}
>   
>   	if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
> -		do_syscall_trace_leave(regs);
>   		ret |= _TIF_RESTOREALL;
>   	}
>   
> -	local_irq_disable();
> -	ret = interrupt_exit_user_prepare_main(ret, regs);

This loop i dont understand why.

> +again:
> +	syscall_exit_to_user_mode(regs);

exit_to_user_mode also calls user_enter_irqoff.
so below user_enter_irqoff may be a nop or wrong.

also, syscall_exit_to_user_mode disables local interrupts first.

is local_irq_enable fixing something here?

> +
> +	user_enter_irqoff();
> +	if (!prep_irq_for_enabled_exit(true)) {
> +		user_exit_irqoff();
> +		local_irq_enable();
> +		local_irq_disable();
> +		goto again;
> +	}
> +
> +	/* Restore user access locks last */
> +	kuap_user_restore(regs);
>   
> +	if (unlikely((local_paca->generic_fw_flags & GFW_RESTORE_ALL) == GFW_RESTORE_ALL)) {
> +		ret |= _TIF_RESTOREALL;
> +		local_paca->generic_fw_flags &= ~GFW_RESTORE_ALL;
> +	}
>   #ifdef CONFIG_PPC64
>   	regs->exit_result = ret;
>   #endif
> @@ -209,6 +219,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
>   #ifdef CONFIG_PPC64
>   notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs)
>   {
> +	unsigned long ret = 0;
>   	/*
>   	 * This is called when detecting a soft-pending interrupt as well as
>   	 * an alternate-return interrupt. So we can't just have the alternate
> @@ -222,14 +233,23 @@ notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *reg
>   #ifdef CONFIG_PPC_BOOK3S_64
>   	set_kuap(AMR_KUAP_BLOCKED);
>   #endif
> +again:
> +	syscall_exit_to_user_mode(regs);
>   
> -	trace_hardirqs_off();
> -	user_exit_irqoff();
> -	account_cpu_user_entry();
> +	user_enter_irqoff();
> +	if (!prep_irq_for_enabled_exit(true)) {
> +		user_exit_irqoff();
> +		local_irq_enable();
> +		local_irq_disable();
> +		goto again;
> +	}
>   
> -	BUG_ON(!user_mode(regs));
> +	if (unlikely((local_paca->generic_fw_flags & GFW_RESTORE_ALL) == GFW_RESTORE_ALL)) {
> +		ret = _TIF_RESTOREALL;
> +		local_paca->generic_fw_flags &= ~GFW_RESTORE_ALL;
> +	}
>   
> -	regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, regs);
> +	regs->exit_result |= ret;
>   
>   	return regs->exit_result;
>   }
> diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
> index c6997df632873..2a2b0b94a3eaa 100644
> --- a/arch/powerpc/kernel/ptrace/ptrace.c
> +++ b/arch/powerpc/kernel/ptrace/ptrace.c
> @@ -21,9 +21,6 @@
>   #include <asm/switch_to.h>
>   #include <asm/debug.h>
>   
> -#define CREATE_TRACE_POINTS
> -#include <trace/events/syscalls.h>
> -
>   #include "ptrace-decl.h"
>   
>   /*
> @@ -233,106 +230,6 @@ static int do_seccomp(struct pt_regs *regs)
>   static inline int do_seccomp(struct pt_regs *regs) { return 0; }
>   #endif /* CONFIG_SECCOMP */
>   
> -/**
> - * do_syscall_trace_enter() - Do syscall tracing on kernel entry.
> - * @regs: the pt_regs of the task to trace (current)
> - *
> - * Performs various types of tracing on syscall entry. This includes seccomp,
> - * ptrace, syscall tracepoints and audit.
> - *
> - * The pt_regs are potentially visible to userspace via ptrace, so their
> - * contents is ABI.
> - *
> - * One or more of the tracers may modify the contents of pt_regs, in particular
> - * to modify arguments or even the syscall number itself.
> - *
> - * It's also possible that a tracer can choose to reject the system call. In
> - * that case this function will return an illegal syscall number, and will put
> - * an appropriate return value in regs->r3.
> - *
> - * Return: the (possibly changed) syscall number.
> - */
> -long do_syscall_trace_enter(struct pt_regs *regs)
> -{
> -	u32 flags;
> -
> -	flags = read_thread_flags() & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
> -
> -	if (flags) {
> -		int rc = ptrace_report_syscall_entry(regs);
> -
> -		if (unlikely(flags & _TIF_SYSCALL_EMU)) {
> -			/*
> -			 * A nonzero return code from
> -			 * ptrace_report_syscall_entry() tells us to prevent
> -			 * the syscall execution, but we are not going to
> -			 * execute it anyway.
> -			 *
> -			 * Returning -1 will skip the syscall execution. We want
> -			 * to avoid clobbering any registers, so we don't goto
> -			 * the skip label below.
> -			 */
> -			return -1;
> -		}
> -
> -		if (rc) {
> -			/*
> -			 * The tracer decided to abort the syscall. Note that
> -			 * the tracer may also just change regs->gpr[0] to an
> -			 * invalid syscall number, that is handled below on the
> -			 * exit path.
> -			 */
> -			goto skip;
> -		}
> -	}
> -
> -	/* Run seccomp after ptrace; allow it to set gpr[3]. */
> -	if (do_seccomp(regs))
> -		return -1;
> -
> -	/* Avoid trace and audit when syscall is invalid. */
> -	if (regs->gpr[0] >= NR_syscalls)
> -		goto skip;
> -
> -	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
> -		trace_sys_enter(regs, regs->gpr[0]);
> -
> -	if (!is_32bit_task())
> -		audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4],
> -				    regs->gpr[5], regs->gpr[6]);
> -	else
> -		audit_syscall_entry(regs->gpr[0],
> -				    regs->gpr[3] & 0xffffffff,
> -				    regs->gpr[4] & 0xffffffff,
> -				    regs->gpr[5] & 0xffffffff,
> -				    regs->gpr[6] & 0xffffffff);
> -
> -	/* Return the possibly modified but valid syscall number */
> -	return regs->gpr[0];
> -
> -skip:
> -	/*
> -	 * If we are aborting explicitly, or if the syscall number is
> -	 * now invalid, set the return value to -ENOSYS.
> -	 */
> -	regs->gpr[3] = -ENOSYS;
> -	return -1;
> -}
> -
> -void do_syscall_trace_leave(struct pt_regs *regs)
> -{
> -	int step;
> -
> -	audit_syscall_exit(regs);
> -
> -	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
> -		trace_sys_exit(regs, regs->result);
> -
> -	step = test_thread_flag(TIF_SINGLESTEP);
> -	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
> -		ptrace_report_syscall_exit(regs, step);
> -}
> -
>   void __init pt_regs_check(void);
>   
>   /*
> diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
> index 719930cf4ae1f..8e1a1b26b5eae 100644
> --- a/arch/powerpc/kernel/signal.c
> +++ b/arch/powerpc/kernel/signal.c
> @@ -6,6 +6,7 @@
>    *    Extracted from signal_32.c and signal_64.c
>    */
>   
> +#include <linux/entry-common.h>
>   #include <linux/resume_user_mode.h>
>   #include <linux/signal.h>
>   #include <linux/uprobes.h>
> @@ -22,11 +23,6 @@
>   
>   #include "signal.h"
>   
> -/* This will be removed */
> -#ifdef CONFIG_GENERIC_ENTRY
> -#include <linux/entry-common.h>
> -#endif /* CONFIG_GENERIC_ENTRY */
> -
>   #ifdef CONFIG_VSX
>   unsigned long copy_fpr_to_user(void __user *to,
>   			       struct task_struct *task)
> @@ -374,11 +370,9 @@ void signal_fault(struct task_struct *tsk, struct pt_regs *regs,
>   				   task_pid_nr(tsk), where, ptr, regs->nip, regs->link);
>   }
>   
> -#ifdef CONFIG_GENERIC_ENTRY
>   void arch_do_signal_or_restart(struct pt_regs *regs)
>   {
>   	BUG_ON(regs != current->thread.regs);
>   	local_paca->generic_fw_flags |= GFW_RESTORE_ALL;
>   	do_signal(current);
>   }
> -#endif /* CONFIG_GENERIC_ENTRY */
> diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
> index 9f03a6263fb41..66fd6ca4462b0 100644
> --- a/arch/powerpc/kernel/syscall.c
> +++ b/arch/powerpc/kernel/syscall.c
> @@ -3,6 +3,7 @@
>   #include <linux/compat.h>
>   #include <linux/context_tracking.h>
>   #include <linux/randomize_kstack.h>
> +#include <linux/entry-common.h>
>   
>   #include <asm/interrupt.h>
>   #include <asm/kup.h>
> @@ -21,121 +22,9 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
>   	kuap_lock();
>   
>   	add_random_kstack_offset();
> +	r0 = syscall_enter_from_user_mode(regs, r0);
>   
> -	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
> -		BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
> -
> -	trace_hardirqs_off(); /* finish reconciling */
> -
> -	CT_WARN_ON(ct_state() == CT_STATE_KERNEL);
> -	user_exit_irqoff();
> -
> -	BUG_ON(regs_is_unrecoverable(regs));
> -	BUG_ON(!user_mode(regs));
> -	BUG_ON(regs_irqs_disabled(regs));
> -
> -#ifdef CONFIG_PPC_PKEY
> -	if (mmu_has_feature(MMU_FTR_PKEY)) {
> -		unsigned long amr, iamr;
> -		bool flush_needed = false;
> -		/*
> -		 * When entering from userspace we mostly have the AMR/IAMR
> -		 * different from kernel default values. Hence don't compare.
> -		 */
> -		amr = mfspr(SPRN_AMR);
> -		iamr = mfspr(SPRN_IAMR);
> -		regs->amr  = amr;
> -		regs->iamr = iamr;
> -		if (mmu_has_feature(MMU_FTR_KUAP)) {
> -			mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
> -			flush_needed = true;
> -		}
> -		if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
> -			mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
> -			flush_needed = true;
> -		}
> -		if (flush_needed)
> -			isync();
> -	} else
> -#endif
> -		kuap_assert_locked();
> -
> -	booke_restore_dbcr0();
> -
> -	account_cpu_user_entry();
> -
> -	account_stolen_time();
> -
> -	/*
> -	 * This is not required for the syscall exit path, but makes the
> -	 * stack frame look nicer. If this was initialised in the first stack
> -	 * frame, or if the unwinder was taught the first stack frame always
> -	 * returns to user with IRQS_ENABLED, this store could be avoided!
> -	 */
> -	irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
> -
> -	/*
> -	 * If system call is called with TM active, set _TIF_RESTOREALL to
> -	 * prevent RFSCV being used to return to userspace, because POWER9
> -	 * TM implementation has problems with this instruction returning to
> -	 * transactional state. Final register values are not relevant because
> -	 * the transaction will be aborted upon return anyway. Or in the case
> -	 * of unsupported_scv SIGILL fault, the return state does not much
> -	 * matter because it's an edge case.
> -	 */
> -	if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
> -			unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
> -		set_bits(_TIF_RESTOREALL, &current_thread_info()->flags);
> -
> -	/*
> -	 * If the system call was made with a transaction active, doom it and
> -	 * return without performing the system call. Unless it was an
> -	 * unsupported scv vector, in which case it's treated like an illegal
> -	 * instruction.
> -	 */
> -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> -	if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) &&
> -	    !trap_is_unsupported_scv(regs)) {
> -		/* Enable TM in the kernel, and disable EE (for scv) */
> -		hard_irq_disable();
> -		mtmsr(mfmsr() | MSR_TM);
> -
> -		/* tabort, this dooms the transaction, nothing else */
> -		asm volatile(".long 0x7c00071d | ((%0) << 16)"
> -				:: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT));
> -
> -		/*
> -		 * Userspace will never see the return value. Execution will
> -		 * resume after the tbegin. of the aborted transaction with the
> -		 * checkpointed register state. A context switch could occur
> -		 * or signal delivered to the process before resuming the
> -		 * doomed transaction context, but that should all be handled
> -		 * as expected.
> -		 */
> -		return -ENOSYS;
> -	}
> -#endif // CONFIG_PPC_TRANSACTIONAL_MEM
> -
> -	local_irq_enable();
> -
> -	if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) {
> -		if (unlikely(trap_is_unsupported_scv(regs))) {
> -			/* Unsupported scv vector */
> -			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
> -			return regs->gpr[3];
> -		}
> -		/*
> -		 * We use the return value of do_syscall_trace_enter() as the
> -		 * syscall number. If the syscall was rejected for any reason
> -		 * do_syscall_trace_enter() returns an invalid syscall number
> -		 * and the test against NR_syscalls will fail and the return
> -		 * value to be used is in regs->gpr[3].
> -		 */
> -		r0 = do_syscall_trace_enter(regs);
> -		if (unlikely(r0 >= NR_syscalls))
> -			return regs->gpr[3];
> -
> -	} else if (unlikely(r0 >= NR_syscalls)) {
> +	if (unlikely(r0 >= NR_syscalls)) {
>   		if (unlikely(trap_is_unsupported_scv(regs))) {
>   			/* Unsupported scv vector */
>   			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
Mukesh Kumar Chaurasiya May 2, 2025, 8:01 a.m. UTC | #2
On Tue, Apr 29, 2025 at 11:41:09AM +0530, Shrikanth Hegde wrote:
> 
> 
> On 4/28/25 20:52, Mukesh Kumar Chaurasiya wrote:
> > Enable the syscall entry and exit path from generic framework.
> > 
> > Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>
> 
> Hi Mukesh. Thanks for working on this. Trying to go through it.
> 
> > ---
> >   arch/powerpc/Kconfig                |   1 +
> >   arch/powerpc/kernel/interrupt.c     |  46 +++++++----
> >   arch/powerpc/kernel/ptrace/ptrace.c | 103 ------------------------
> >   arch/powerpc/kernel/signal.c        |   8 +-
> >   arch/powerpc/kernel/syscall.c       | 117 +---------------------------
> >   5 files changed, 38 insertions(+), 237 deletions(-)
> > 
> > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> > index 6722625a406a0..45b70ccf7c89e 100644
> > --- a/arch/powerpc/Kconfig
> > +++ b/arch/powerpc/Kconfig
> > @@ -202,6 +202,7 @@ config PPC
> >   	select GENERIC_CPU_AUTOPROBE
> >   	select GENERIC_CPU_VULNERABILITIES	if PPC_BARRIER_NOSPEC
> >   	select GENERIC_EARLY_IOREMAP
> > +	select GENERIC_ENTRY
> 
> IMHO this should be enabled after irqentry_* is supported too.
>
Yeah, it will be done after irq is supported. I did this just for the sake of syscall.
> >   	select GENERIC_GETTIMEOFDAY
> >   	select GENERIC_IDLE_POLL_SETUP
> >   	select GENERIC_IOREMAP
> > diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
> > index 7f31f3fb9c1d8..8731064631de0 100644
> > --- a/arch/powerpc/kernel/interrupt.c
> > +++ b/arch/powerpc/kernel/interrupt.c
> > @@ -1,6 +1,7 @@
> >   // SPDX-License-Identifier: GPL-2.0-or-later
> >   #include <linux/context_tracking.h>
> > +#include <linux/entry-common.h>
> >   #include <linux/err.h>
> >   #include <linux/compat.h>
> >   #include <linux/rseq.h>
> > @@ -163,15 +164,10 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
> >   	unsigned long ret = 0;
> >   	bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
> > -	CT_WARN_ON(ct_state() == CT_STATE_USER);
> > -
> >   	kuap_assert_locked();
> >   	regs->result = r3;
> > -	/* Check whether the syscall is issued inside a restartable sequence */
> > -	rseq_syscall(regs);
> > -
> >   	ti_flags = read_thread_flags();
> >   	if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {
> > @@ -192,13 +188,27 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
> >   	}
> >   	if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
> > -		do_syscall_trace_leave(regs);
> >   		ret |= _TIF_RESTOREALL;
> >   	}
> > -	local_irq_disable();
> > -	ret = interrupt_exit_user_prepare_main(ret, regs);
> 
> This loop i dont understand why.
>
This is if there are any interrupts to process.
> > +again:
> > +	syscall_exit_to_user_mode(regs);
> 
> exit_to_user_mode also calls user_enter_irqoff.
> so below user_enter_irqoff may be a nop or wrong.
Oh yeah, i'll fix this in next version.
> 
> also, syscall_exit_to_user_mode disables local interrupts first.
> 
> is local_irq_enable fixing something here?
>
It just enables irq for a short while so we can receive interrupts.
> > +
> > +	user_enter_irqoff();
> > +	if (!prep_irq_for_enabled_exit(true)) {
> > +		user_exit_irqoff();
> > +		local_irq_enable();
> > +		local_irq_disable();
> > +		goto again;
> > +	}
> > +
> > +	/* Restore user access locks last */
> > +	kuap_user_restore(regs);
> > +	if (unlikely((local_paca->generic_fw_flags & GFW_RESTORE_ALL) == GFW_RESTORE_ALL)) {
> > +		ret |= _TIF_RESTOREALL;
> > +		local_paca->generic_fw_flags &= ~GFW_RESTORE_ALL;
> > +	}
> >   #ifdef CONFIG_PPC64
> >   	regs->exit_result = ret;
> >   #endif
> > @@ -209,6 +219,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
> >   #ifdef CONFIG_PPC64
> >   notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs)
> >   {
> > +	unsigned long ret = 0;
> >   	/*
> >   	 * This is called when detecting a soft-pending interrupt as well as
> >   	 * an alternate-return interrupt. So we can't just have the alternate
> > @@ -222,14 +233,23 @@ notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *reg
> >   #ifdef CONFIG_PPC_BOOK3S_64
> >   	set_kuap(AMR_KUAP_BLOCKED);
> >   #endif
> > +again:
> > +	syscall_exit_to_user_mode(regs);
> > -	trace_hardirqs_off();
> > -	user_exit_irqoff();
> > -	account_cpu_user_entry();
> > +	user_enter_irqoff();
> > +	if (!prep_irq_for_enabled_exit(true)) {
> > +		user_exit_irqoff();
> > +		local_irq_enable();
> > +		local_irq_disable();
> > +		goto again;
> > +	}
> > -	BUG_ON(!user_mode(regs));
> > +	if (unlikely((local_paca->generic_fw_flags & GFW_RESTORE_ALL) == GFW_RESTORE_ALL)) {
> > +		ret = _TIF_RESTOREALL;
> > +		local_paca->generic_fw_flags &= ~GFW_RESTORE_ALL;
> > +	}
> > -	regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, regs);
> > +	regs->exit_result |= ret;
> >   	return regs->exit_result;
> >   }
> > diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
> > index c6997df632873..2a2b0b94a3eaa 100644
> > --- a/arch/powerpc/kernel/ptrace/ptrace.c
> > +++ b/arch/powerpc/kernel/ptrace/ptrace.c
> > @@ -21,9 +21,6 @@
> >   #include <asm/switch_to.h>
> >   #include <asm/debug.h>
> > -#define CREATE_TRACE_POINTS
> > -#include <trace/events/syscalls.h>
> > -
> >   #include "ptrace-decl.h"
> >   /*
> > @@ -233,106 +230,6 @@ static int do_seccomp(struct pt_regs *regs)
> >   static inline int do_seccomp(struct pt_regs *regs) { return 0; }
> >   #endif /* CONFIG_SECCOMP */
> > -/**
> > - * do_syscall_trace_enter() - Do syscall tracing on kernel entry.
> > - * @regs: the pt_regs of the task to trace (current)
> > - *
> > - * Performs various types of tracing on syscall entry. This includes seccomp,
> > - * ptrace, syscall tracepoints and audit.
> > - *
> > - * The pt_regs are potentially visible to userspace via ptrace, so their
> > - * contents is ABI.
> > - *
> > - * One or more of the tracers may modify the contents of pt_regs, in particular
> > - * to modify arguments or even the syscall number itself.
> > - *
> > - * It's also possible that a tracer can choose to reject the system call. In
> > - * that case this function will return an illegal syscall number, and will put
> > - * an appropriate return value in regs->r3.
> > - *
> > - * Return: the (possibly changed) syscall number.
> > - */
> > -long do_syscall_trace_enter(struct pt_regs *regs)
> > -{
> > -	u32 flags;
> > -
> > -	flags = read_thread_flags() & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
> > -
> > -	if (flags) {
> > -		int rc = ptrace_report_syscall_entry(regs);
> > -
> > -		if (unlikely(flags & _TIF_SYSCALL_EMU)) {
> > -			/*
> > -			 * A nonzero return code from
> > -			 * ptrace_report_syscall_entry() tells us to prevent
> > -			 * the syscall execution, but we are not going to
> > -			 * execute it anyway.
> > -			 *
> > -			 * Returning -1 will skip the syscall execution. We want
> > -			 * to avoid clobbering any registers, so we don't goto
> > -			 * the skip label below.
> > -			 */
> > -			return -1;
> > -		}
> > -
> > -		if (rc) {
> > -			/*
> > -			 * The tracer decided to abort the syscall. Note that
> > -			 * the tracer may also just change regs->gpr[0] to an
> > -			 * invalid syscall number, that is handled below on the
> > -			 * exit path.
> > -			 */
> > -			goto skip;
> > -		}
> > -	}
> > -
> > -	/* Run seccomp after ptrace; allow it to set gpr[3]. */
> > -	if (do_seccomp(regs))
> > -		return -1;
> > -
> > -	/* Avoid trace and audit when syscall is invalid. */
> > -	if (regs->gpr[0] >= NR_syscalls)
> > -		goto skip;
> > -
> > -	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
> > -		trace_sys_enter(regs, regs->gpr[0]);
> > -
> > -	if (!is_32bit_task())
> > -		audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4],
> > -				    regs->gpr[5], regs->gpr[6]);
> > -	else
> > -		audit_syscall_entry(regs->gpr[0],
> > -				    regs->gpr[3] & 0xffffffff,
> > -				    regs->gpr[4] & 0xffffffff,
> > -				    regs->gpr[5] & 0xffffffff,
> > -				    regs->gpr[6] & 0xffffffff);
> > -
> > -	/* Return the possibly modified but valid syscall number */
> > -	return regs->gpr[0];
> > -
> > -skip:
> > -	/*
> > -	 * If we are aborting explicitly, or if the syscall number is
> > -	 * now invalid, set the return value to -ENOSYS.
> > -	 */
> > -	regs->gpr[3] = -ENOSYS;
> > -	return -1;
> > -}
> > -
> > -void do_syscall_trace_leave(struct pt_regs *regs)
> > -{
> > -	int step;
> > -
> > -	audit_syscall_exit(regs);
> > -
> > -	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
> > -		trace_sys_exit(regs, regs->result);
> > -
> > -	step = test_thread_flag(TIF_SINGLESTEP);
> > -	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
> > -		ptrace_report_syscall_exit(regs, step);
> > -}
> > -
> >   void __init pt_regs_check(void);
> >   /*
> > diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
> > index 719930cf4ae1f..8e1a1b26b5eae 100644
> > --- a/arch/powerpc/kernel/signal.c
> > +++ b/arch/powerpc/kernel/signal.c
> > @@ -6,6 +6,7 @@
> >    *    Extracted from signal_32.c and signal_64.c
> >    */
> > +#include <linux/entry-common.h>
> >   #include <linux/resume_user_mode.h>
> >   #include <linux/signal.h>
> >   #include <linux/uprobes.h>
> > @@ -22,11 +23,6 @@
> >   #include "signal.h"
> > -/* This will be removed */
> > -#ifdef CONFIG_GENERIC_ENTRY
> > -#include <linux/entry-common.h>
> > -#endif /* CONFIG_GENERIC_ENTRY */
> > -
> >   #ifdef CONFIG_VSX
> >   unsigned long copy_fpr_to_user(void __user *to,
> >   			       struct task_struct *task)
> > @@ -374,11 +370,9 @@ void signal_fault(struct task_struct *tsk, struct pt_regs *regs,
> >   				   task_pid_nr(tsk), where, ptr, regs->nip, regs->link);
> >   }
> > -#ifdef CONFIG_GENERIC_ENTRY
> >   void arch_do_signal_or_restart(struct pt_regs *regs)
> >   {
> >   	BUG_ON(regs != current->thread.regs);
> >   	local_paca->generic_fw_flags |= GFW_RESTORE_ALL;
> >   	do_signal(current);
> >   }
> > -#endif /* CONFIG_GENERIC_ENTRY */
> > diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
> > index 9f03a6263fb41..66fd6ca4462b0 100644
> > --- a/arch/powerpc/kernel/syscall.c
> > +++ b/arch/powerpc/kernel/syscall.c
> > @@ -3,6 +3,7 @@
> >   #include <linux/compat.h>
> >   #include <linux/context_tracking.h>
> >   #include <linux/randomize_kstack.h>
> > +#include <linux/entry-common.h>
> >   #include <asm/interrupt.h>
> >   #include <asm/kup.h>
> > @@ -21,121 +22,9 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
> >   	kuap_lock();
> >   	add_random_kstack_offset();
> > +	r0 = syscall_enter_from_user_mode(regs, r0);
> > -	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
> > -		BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
> > -
> > -	trace_hardirqs_off(); /* finish reconciling */
> > -
> > -	CT_WARN_ON(ct_state() == CT_STATE_KERNEL);
> > -	user_exit_irqoff();
> > -
> > -	BUG_ON(regs_is_unrecoverable(regs));
> > -	BUG_ON(!user_mode(regs));
> > -	BUG_ON(regs_irqs_disabled(regs));
> > -
> > -#ifdef CONFIG_PPC_PKEY
> > -	if (mmu_has_feature(MMU_FTR_PKEY)) {
> > -		unsigned long amr, iamr;
> > -		bool flush_needed = false;
> > -		/*
> > -		 * When entering from userspace we mostly have the AMR/IAMR
> > -		 * different from kernel default values. Hence don't compare.
> > -		 */
> > -		amr = mfspr(SPRN_AMR);
> > -		iamr = mfspr(SPRN_IAMR);
> > -		regs->amr  = amr;
> > -		regs->iamr = iamr;
> > -		if (mmu_has_feature(MMU_FTR_KUAP)) {
> > -			mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
> > -			flush_needed = true;
> > -		}
> > -		if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
> > -			mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
> > -			flush_needed = true;
> > -		}
> > -		if (flush_needed)
> > -			isync();
> > -	} else
> > -#endif
> > -		kuap_assert_locked();
> > -
> > -	booke_restore_dbcr0();
> > -
> > -	account_cpu_user_entry();
> > -
> > -	account_stolen_time();
> > -
> > -	/*
> > -	 * This is not required for the syscall exit path, but makes the
> > -	 * stack frame look nicer. If this was initialised in the first stack
> > -	 * frame, or if the unwinder was taught the first stack frame always
> > -	 * returns to user with IRQS_ENABLED, this store could be avoided!
> > -	 */
> > -	irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
> > -
> > -	/*
> > -	 * If system call is called with TM active, set _TIF_RESTOREALL to
> > -	 * prevent RFSCV being used to return to userspace, because POWER9
> > -	 * TM implementation has problems with this instruction returning to
> > -	 * transactional state. Final register values are not relevant because
> > -	 * the transaction will be aborted upon return anyway. Or in the case
> > -	 * of unsupported_scv SIGILL fault, the return state does not much
> > -	 * matter because it's an edge case.
> > -	 */
> > -	if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
> > -			unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
> > -		set_bits(_TIF_RESTOREALL, &current_thread_info()->flags);
> > -
> > -	/*
> > -	 * If the system call was made with a transaction active, doom it and
> > -	 * return without performing the system call. Unless it was an
> > -	 * unsupported scv vector, in which case it's treated like an illegal
> > -	 * instruction.
> > -	 */
> > -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> > -	if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) &&
> > -	    !trap_is_unsupported_scv(regs)) {
> > -		/* Enable TM in the kernel, and disable EE (for scv) */
> > -		hard_irq_disable();
> > -		mtmsr(mfmsr() | MSR_TM);
> > -
> > -		/* tabort, this dooms the transaction, nothing else */
> > -		asm volatile(".long 0x7c00071d | ((%0) << 16)"
> > -				:: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT));
> > -
> > -		/*
> > -		 * Userspace will never see the return value. Execution will
> > -		 * resume after the tbegin. of the aborted transaction with the
> > -		 * checkpointed register state. A context switch could occur
> > -		 * or signal delivered to the process before resuming the
> > -		 * doomed transaction context, but that should all be handled
> > -		 * as expected.
> > -		 */
> > -		return -ENOSYS;
> > -	}
> > -#endif // CONFIG_PPC_TRANSACTIONAL_MEM
> > -
> > -	local_irq_enable();
> > -
> > -	if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) {
> > -		if (unlikely(trap_is_unsupported_scv(regs))) {
> > -			/* Unsupported scv vector */
> > -			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
> > -			return regs->gpr[3];
> > -		}
> > -		/*
> > -		 * We use the return value of do_syscall_trace_enter() as the
> > -		 * syscall number. If the syscall was rejected for any reason
> > -		 * do_syscall_trace_enter() returns an invalid syscall number
> > -		 * and the test against NR_syscalls will fail and the return
> > -		 * value to be used is in regs->gpr[3].
> > -		 */
> > -		r0 = do_syscall_trace_enter(regs);
> > -		if (unlikely(r0 >= NR_syscalls))
> > -			return regs->gpr[3];
> > -
> > -	} else if (unlikely(r0 >= NR_syscalls)) {
> > +	if (unlikely(r0 >= NR_syscalls)) {
> >   		if (unlikely(trap_is_unsupported_scv(regs))) {
> >   			/* Unsupported scv vector */
> >   			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
>
diff mbox series

Patch

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 6722625a406a0..45b70ccf7c89e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -202,6 +202,7 @@  config PPC
 	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_CPU_VULNERABILITIES	if PPC_BARRIER_NOSPEC
 	select GENERIC_EARLY_IOREMAP
+	select GENERIC_ENTRY
 	select GENERIC_GETTIMEOFDAY
 	select GENERIC_IDLE_POLL_SETUP
 	select GENERIC_IOREMAP
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 7f31f3fb9c1d8..8731064631de0 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -1,6 +1,7 @@ 
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #include <linux/context_tracking.h>
+#include <linux/entry-common.h>
 #include <linux/err.h>
 #include <linux/compat.h>
 #include <linux/rseq.h>
@@ -163,15 +164,10 @@  notrace unsigned long syscall_exit_prepare(unsigned long r3,
 	unsigned long ret = 0;
 	bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
 
-	CT_WARN_ON(ct_state() == CT_STATE_USER);
-
 	kuap_assert_locked();
 
 	regs->result = r3;
 
-	/* Check whether the syscall is issued inside a restartable sequence */
-	rseq_syscall(regs);
-
 	ti_flags = read_thread_flags();
 
 	if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {
@@ -192,13 +188,27 @@  notrace unsigned long syscall_exit_prepare(unsigned long r3,
 	}
 
 	if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
-		do_syscall_trace_leave(regs);
 		ret |= _TIF_RESTOREALL;
 	}
 
-	local_irq_disable();
-	ret = interrupt_exit_user_prepare_main(ret, regs);
+again:
+	syscall_exit_to_user_mode(regs);
+
+	user_enter_irqoff();
+	if (!prep_irq_for_enabled_exit(true)) {
+		user_exit_irqoff();
+		local_irq_enable();
+		local_irq_disable();
+		goto again;
+	}
+
+	/* Restore user access locks last */
+	kuap_user_restore(regs);
 
+	if (unlikely((local_paca->generic_fw_flags & GFW_RESTORE_ALL) == GFW_RESTORE_ALL)) {
+		ret |= _TIF_RESTOREALL;
+		local_paca->generic_fw_flags &= ~GFW_RESTORE_ALL;
+	}
 #ifdef CONFIG_PPC64
 	regs->exit_result = ret;
 #endif
@@ -209,6 +219,7 @@  notrace unsigned long syscall_exit_prepare(unsigned long r3,
 #ifdef CONFIG_PPC64
 notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs)
 {
+	unsigned long ret = 0;
 	/*
 	 * This is called when detecting a soft-pending interrupt as well as
 	 * an alternate-return interrupt. So we can't just have the alternate
@@ -222,14 +233,23 @@  notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *reg
 #ifdef CONFIG_PPC_BOOK3S_64
 	set_kuap(AMR_KUAP_BLOCKED);
 #endif
+again:
+	syscall_exit_to_user_mode(regs);
 
-	trace_hardirqs_off();
-	user_exit_irqoff();
-	account_cpu_user_entry();
+	user_enter_irqoff();
+	if (!prep_irq_for_enabled_exit(true)) {
+		user_exit_irqoff();
+		local_irq_enable();
+		local_irq_disable();
+		goto again;
+	}
 
-	BUG_ON(!user_mode(regs));
+	if (unlikely((local_paca->generic_fw_flags & GFW_RESTORE_ALL) == GFW_RESTORE_ALL)) {
+		ret = _TIF_RESTOREALL;
+		local_paca->generic_fw_flags &= ~GFW_RESTORE_ALL;
+	}
 
-	regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, regs);
+	regs->exit_result |= ret;
 
 	return regs->exit_result;
 }
diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
index c6997df632873..2a2b0b94a3eaa 100644
--- a/arch/powerpc/kernel/ptrace/ptrace.c
+++ b/arch/powerpc/kernel/ptrace/ptrace.c
@@ -21,9 +21,6 @@ 
 #include <asm/switch_to.h>
 #include <asm/debug.h>
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/syscalls.h>
-
 #include "ptrace-decl.h"
 
 /*
@@ -233,106 +230,6 @@  static int do_seccomp(struct pt_regs *regs)
 static inline int do_seccomp(struct pt_regs *regs) { return 0; }
 #endif /* CONFIG_SECCOMP */
 
-/**
- * do_syscall_trace_enter() - Do syscall tracing on kernel entry.
- * @regs: the pt_regs of the task to trace (current)
- *
- * Performs various types of tracing on syscall entry. This includes seccomp,
- * ptrace, syscall tracepoints and audit.
- *
- * The pt_regs are potentially visible to userspace via ptrace, so their
- * contents is ABI.
- *
- * One or more of the tracers may modify the contents of pt_regs, in particular
- * to modify arguments or even the syscall number itself.
- *
- * It's also possible that a tracer can choose to reject the system call. In
- * that case this function will return an illegal syscall number, and will put
- * an appropriate return value in regs->r3.
- *
- * Return: the (possibly changed) syscall number.
- */
-long do_syscall_trace_enter(struct pt_regs *regs)
-{
-	u32 flags;
-
-	flags = read_thread_flags() & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
-
-	if (flags) {
-		int rc = ptrace_report_syscall_entry(regs);
-
-		if (unlikely(flags & _TIF_SYSCALL_EMU)) {
-			/*
-			 * A nonzero return code from
-			 * ptrace_report_syscall_entry() tells us to prevent
-			 * the syscall execution, but we are not going to
-			 * execute it anyway.
-			 *
-			 * Returning -1 will skip the syscall execution. We want
-			 * to avoid clobbering any registers, so we don't goto
-			 * the skip label below.
-			 */
-			return -1;
-		}
-
-		if (rc) {
-			/*
-			 * The tracer decided to abort the syscall. Note that
-			 * the tracer may also just change regs->gpr[0] to an
-			 * invalid syscall number, that is handled below on the
-			 * exit path.
-			 */
-			goto skip;
-		}
-	}
-
-	/* Run seccomp after ptrace; allow it to set gpr[3]. */
-	if (do_seccomp(regs))
-		return -1;
-
-	/* Avoid trace and audit when syscall is invalid. */
-	if (regs->gpr[0] >= NR_syscalls)
-		goto skip;
-
-	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
-		trace_sys_enter(regs, regs->gpr[0]);
-
-	if (!is_32bit_task())
-		audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4],
-				    regs->gpr[5], regs->gpr[6]);
-	else
-		audit_syscall_entry(regs->gpr[0],
-				    regs->gpr[3] & 0xffffffff,
-				    regs->gpr[4] & 0xffffffff,
-				    regs->gpr[5] & 0xffffffff,
-				    regs->gpr[6] & 0xffffffff);
-
-	/* Return the possibly modified but valid syscall number */
-	return regs->gpr[0];
-
-skip:
-	/*
-	 * If we are aborting explicitly, or if the syscall number is
-	 * now invalid, set the return value to -ENOSYS.
-	 */
-	regs->gpr[3] = -ENOSYS;
-	return -1;
-}
-
-void do_syscall_trace_leave(struct pt_regs *regs)
-{
-	int step;
-
-	audit_syscall_exit(regs);
-
-	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
-		trace_sys_exit(regs, regs->result);
-
-	step = test_thread_flag(TIF_SINGLESTEP);
-	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
-		ptrace_report_syscall_exit(regs, step);
-}
-
 void __init pt_regs_check(void);
 
 /*
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index 719930cf4ae1f..8e1a1b26b5eae 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -6,6 +6,7 @@ 
  *    Extracted from signal_32.c and signal_64.c
  */
 
+#include <linux/entry-common.h>
 #include <linux/resume_user_mode.h>
 #include <linux/signal.h>
 #include <linux/uprobes.h>
@@ -22,11 +23,6 @@ 
 
 #include "signal.h"
 
-/* This will be removed */
-#ifdef CONFIG_GENERIC_ENTRY
-#include <linux/entry-common.h>
-#endif /* CONFIG_GENERIC_ENTRY */
-
 #ifdef CONFIG_VSX
 unsigned long copy_fpr_to_user(void __user *to,
 			       struct task_struct *task)
@@ -374,11 +370,9 @@  void signal_fault(struct task_struct *tsk, struct pt_regs *regs,
 				   task_pid_nr(tsk), where, ptr, regs->nip, regs->link);
 }
 
-#ifdef CONFIG_GENERIC_ENTRY
 void arch_do_signal_or_restart(struct pt_regs *regs)
 {
 	BUG_ON(regs != current->thread.regs);
 	local_paca->generic_fw_flags |= GFW_RESTORE_ALL;
 	do_signal(current);
 }
-#endif /* CONFIG_GENERIC_ENTRY */
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index 9f03a6263fb41..66fd6ca4462b0 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -3,6 +3,7 @@ 
 #include <linux/compat.h>
 #include <linux/context_tracking.h>
 #include <linux/randomize_kstack.h>
+#include <linux/entry-common.h>
 
 #include <asm/interrupt.h>
 #include <asm/kup.h>
@@ -21,121 +22,9 @@  notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
 	kuap_lock();
 
 	add_random_kstack_offset();
+	r0 = syscall_enter_from_user_mode(regs, r0);
 
-	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
-		BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
-
-	trace_hardirqs_off(); /* finish reconciling */
-
-	CT_WARN_ON(ct_state() == CT_STATE_KERNEL);
-	user_exit_irqoff();
-
-	BUG_ON(regs_is_unrecoverable(regs));
-	BUG_ON(!user_mode(regs));
-	BUG_ON(regs_irqs_disabled(regs));
-
-#ifdef CONFIG_PPC_PKEY
-	if (mmu_has_feature(MMU_FTR_PKEY)) {
-		unsigned long amr, iamr;
-		bool flush_needed = false;
-		/*
-		 * When entering from userspace we mostly have the AMR/IAMR
-		 * different from kernel default values. Hence don't compare.
-		 */
-		amr = mfspr(SPRN_AMR);
-		iamr = mfspr(SPRN_IAMR);
-		regs->amr  = amr;
-		regs->iamr = iamr;
-		if (mmu_has_feature(MMU_FTR_KUAP)) {
-			mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
-			flush_needed = true;
-		}
-		if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
-			mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
-			flush_needed = true;
-		}
-		if (flush_needed)
-			isync();
-	} else
-#endif
-		kuap_assert_locked();
-
-	booke_restore_dbcr0();
-
-	account_cpu_user_entry();
-
-	account_stolen_time();
-
-	/*
-	 * This is not required for the syscall exit path, but makes the
-	 * stack frame look nicer. If this was initialised in the first stack
-	 * frame, or if the unwinder was taught the first stack frame always
-	 * returns to user with IRQS_ENABLED, this store could be avoided!
-	 */
-	irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
-
-	/*
-	 * If system call is called with TM active, set _TIF_RESTOREALL to
-	 * prevent RFSCV being used to return to userspace, because POWER9
-	 * TM implementation has problems with this instruction returning to
-	 * transactional state. Final register values are not relevant because
-	 * the transaction will be aborted upon return anyway. Or in the case
-	 * of unsupported_scv SIGILL fault, the return state does not much
-	 * matter because it's an edge case.
-	 */
-	if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
-			unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
-		set_bits(_TIF_RESTOREALL, &current_thread_info()->flags);
-
-	/*
-	 * If the system call was made with a transaction active, doom it and
-	 * return without performing the system call. Unless it was an
-	 * unsupported scv vector, in which case it's treated like an illegal
-	 * instruction.
-	 */
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) &&
-	    !trap_is_unsupported_scv(regs)) {
-		/* Enable TM in the kernel, and disable EE (for scv) */
-		hard_irq_disable();
-		mtmsr(mfmsr() | MSR_TM);
-
-		/* tabort, this dooms the transaction, nothing else */
-		asm volatile(".long 0x7c00071d | ((%0) << 16)"
-				:: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT));
-
-		/*
-		 * Userspace will never see the return value. Execution will
-		 * resume after the tbegin. of the aborted transaction with the
-		 * checkpointed register state. A context switch could occur
-		 * or signal delivered to the process before resuming the
-		 * doomed transaction context, but that should all be handled
-		 * as expected.
-		 */
-		return -ENOSYS;
-	}
-#endif // CONFIG_PPC_TRANSACTIONAL_MEM
-
-	local_irq_enable();
-
-	if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) {
-		if (unlikely(trap_is_unsupported_scv(regs))) {
-			/* Unsupported scv vector */
-			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
-			return regs->gpr[3];
-		}
-		/*
-		 * We use the return value of do_syscall_trace_enter() as the
-		 * syscall number. If the syscall was rejected for any reason
-		 * do_syscall_trace_enter() returns an invalid syscall number
-		 * and the test against NR_syscalls will fail and the return
-		 * value to be used is in regs->gpr[3].
-		 */
-		r0 = do_syscall_trace_enter(regs);
-		if (unlikely(r0 >= NR_syscalls))
-			return regs->gpr[3];
-
-	} else if (unlikely(r0 >= NR_syscalls)) {
+	if (unlikely(r0 >= NR_syscalls)) {
 		if (unlikely(trap_is_unsupported_scv(regs))) {
 			/* Unsupported scv vector */
 			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);