diff mbox series

[v3,18/21] linux-user/aarch64: Signal SEGV_MTEAERR for async tag check error

Message ID 20210115224645.1196742-19-richard.henderson@linaro.org
State New
Headers show
Series target-arm: Implement ARMv8.5-MemTag, user mode | expand

Commit Message

Richard Henderson Jan. 15, 2021, 10:46 p.m. UTC
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 linux-user/aarch64/target_signal.h |  1 +
 linux-user/aarch64/cpu_loop.c      | 34 +++++++++++++++++++++---------
 target/arm/mte_helper.c            | 10 +++++++++
 3 files changed, 35 insertions(+), 10 deletions(-)

Comments

Peter Maydell Jan. 22, 2021, 1:59 p.m. UTC | #1
On Fri, 15 Jan 2021 at 22:47, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

So when does the real kernel report async MTE exceptions to userspace?
The commit message would be a good place to briefly describe the
kernel's strategy and where QEMU differs from it (if anywhere)...

> ---
>  linux-user/aarch64/target_signal.h |  1 +
>  linux-user/aarch64/cpu_loop.c      | 34 +++++++++++++++++++++---------
>  target/arm/mte_helper.c            | 10 +++++++++
>  3 files changed, 35 insertions(+), 10 deletions(-)
>
> diff --git a/linux-user/aarch64/target_signal.h b/linux-user/aarch64/target_signal.h
> index 777fb667fe..18013e1b23 100644
> --- a/linux-user/aarch64/target_signal.h
> +++ b/linux-user/aarch64/target_signal.h
> @@ -21,6 +21,7 @@ typedef struct target_sigaltstack {
>
>  #include "../generic/signal.h"
>
> +#define TARGET_SEGV_MTEAERR  8  /* Asynchronous ARM MTE error */
>  #define TARGET_SEGV_MTESERR  9  /* Synchronous ARM MTE exception */
>
>  #define TARGET_ARCH_HAS_SETUP_FRAME
> diff --git a/linux-user/aarch64/cpu_loop.c b/linux-user/aarch64/cpu_loop.c
> index 6867f0db2b..6160a401bd 100644
> --- a/linux-user/aarch64/cpu_loop.c
> +++ b/linux-user/aarch64/cpu_loop.c
> @@ -72,6 +72,21 @@
>          put_user_u16(__x, (gaddr));                     \
>      })
>
> +static bool check_mte_async_fault(CPUARMState *env, target_siginfo_t *info)
> +{
> +    if (likely(env->cp15.tfsr_el[0] == 0)) {
> +        return false;
> +    }
> +
> +    env->cp15.tfsr_el[0] = 0;
> +    info->si_signo = TARGET_SIGSEGV;
> +    info->si_errno = 0;
> +    info->_sifields._sigfault._addr = 0;
> +    info->si_code = TARGET_SEGV_MTEAERR;
> +    queue_signal(env, info->si_signo, QEMU_SI_FAULT, info);
> +    return true;
> +}
> +
>  /* AArch64 main loop */
>  void cpu_loop(CPUARMState *env)
>  {
> @@ -88,15 +103,13 @@ void cpu_loop(CPUARMState *env)
>
>          switch (trapnr) {
>          case EXCP_SWI:
> -            ret = do_syscall(env,
> -                             env->xregs[8],
> -                             env->xregs[0],
> -                             env->xregs[1],
> -                             env->xregs[2],
> -                             env->xregs[3],
> -                             env->xregs[4],
> -                             env->xregs[5],
> -                             0, 0);
> +            if (check_mte_async_fault(env, &info)) {
> +                ret = -TARGET_ERESTARTSYS;
> +            } else {
> +                ret = do_syscall(env, env->xregs[8], env->xregs[0],
> +                                 env->xregs[1], env->xregs[2], env->xregs[3],
> +                                 env->xregs[4], env->xregs[5], 0, 0);
> +            }
>              if (ret == -TARGET_ERESTARTSYS) {
>                  env->pc -= 4;
>              } else if (ret != -TARGET_QEMU_ESIGRETURN) {
> @@ -104,7 +117,8 @@ void cpu_loop(CPUARMState *env)
>              }
>              break;
>          case EXCP_INTERRUPT:
> -            /* just indicate that signals should be handled asap */
> +            /* Just indicate that signals should be handled asap. */
> +            check_mte_async_fault(env, &info);
>              break;
>          case EXCP_UDEF:
>              info.si_signo = TARGET_SIGILL;

So this doesn't guarantee to check the async-fault status on
every exit from cpu_exec(), which means we might miss things.
For instance I think this slightly contrived example would not
ever take the SEGV:
    STR x0, [x1]   # with a bad tag
    YIELD
l:  B l

because the STR and YIELD go into the same TB, the YIELD causes us
to leave the TB with EXCP_YIELD, we don't check for an async fault
in that code path, and then we'll go into the infinite loop and
have nothing to prompt us to come out and look at the async fault flags.

Does it work if we just always queue the SEGV on exit from cpu_exec()
and let the signal handling machinery prioritize if we also pend
some other signal because this was an EXCP_UDEF or whatever?
It would be neater if we could keep the fault-check outside the
switch (trapnr) somehow.

> diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
> index 153bd1e9df..d55f8d1e1e 100644
> --- a/target/arm/mte_helper.c
> +++ b/target/arm/mte_helper.c
> @@ -565,6 +565,16 @@ static void mte_check_fail(CPUARMState *env, uint32_t desc,
>              select = 0;
>          }
>          env->cp15.tfsr_el[el] |= 1 << select;
> +#ifdef CONFIG_USER_ONLY
> +        /*
> +         * Stand in for a timer irq, setting _TIF_MTE_ASYNC_FAULT,
> +         * which then sends a SIGSEGV when the thread is next scheduled.
> +         * This cpu will return to the main loop at the end of the TB,
> +         * which is rather sooner than "normal".  But the alternative
> +         * is waiting until the next syscall.
> +         */
> +        qemu_cpu_kick(env_cpu(env));
> +#endif
>          break;

This does the right thing, but qemu_cpu_kick() is one of those
functions that's in a category of "not used much at all in the
codebase" and which always make me wonder if there's a reason.
(In particular there's exactly one use in the whole of target/
right now.) I suppose the case of "helper function wants to cause
us to leave the TB loop but not to abort the current insn" is
an unusual one...

thanks
-- PMM
Richard Henderson Jan. 28, 2021, 8:49 a.m. UTC | #2
On 1/22/21 3:59 AM, Peter Maydell wrote:
> On Fri, 15 Jan 2021 at 22:47, Richard Henderson
> <richard.henderson@linaro.org> wrote:
>>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> 
> So when does the real kernel report async MTE exceptions to userspace?
> The commit message would be a good place to briefly describe the
> kernel's strategy and where QEMU differs from it (if anywhere)...

I can add that, sure.

>>          case EXCP_INTERRUPT:
>> -            /* just indicate that signals should be handled asap */
>> +            /* Just indicate that signals should be handled asap. */
>> +            check_mte_async_fault(env, &info);
>>              break;
>>          case EXCP_UDEF:
>>              info.si_signo = TARGET_SIGILL;
> 
> So this doesn't guarantee to check the async-fault status on
> every exit from cpu_exec(), which means we might miss things.
> For instance I think this slightly contrived example would not
> ever take the SEGV:
>     STR x0, [x1]   # with a bad tag
>     YIELD
> l:  B l
> 
> because the STR and YIELD go into the same TB, the YIELD causes us
> to leave the TB with EXCP_YIELD, we don't check for an async fault
> in that code path, and then we'll go into the infinite loop and
> have nothing to prompt us to come out and look at the async fault flags.

I'll add that test case to the smoke test.

> Does it work if we just always queue the SEGV on exit from cpu_exec()
> and let the signal handling machinery prioritize if we also pend
> some other signal because this was an EXCP_UDEF or whatever?
> It would be neater if we could keep the fault-check outside the
> switch (trapnr) somehow.

I would think so.  I'll try that.

>> +#ifdef CONFIG_USER_ONLY
>> +        /*
>> +         * Stand in for a timer irq, setting _TIF_MTE_ASYNC_FAULT,
>> +         * which then sends a SIGSEGV when the thread is next scheduled.
>> +         * This cpu will return to the main loop at the end of the TB,
>> +         * which is rather sooner than "normal".  But the alternative
>> +         * is waiting until the next syscall.
>> +         */
>> +        qemu_cpu_kick(env_cpu(env));
>> +#endif
>>          break;
> 
> This does the right thing, but qemu_cpu_kick() is one of those
> functions that's in a category of "not used much at all in the
> codebase" and which always make me wonder if there's a reason.
> (In particular there's exactly one use in the whole of target/
> right now.) I suppose the case of "helper function wants to cause
> us to leave the TB loop but not to abort the current insn" is
> an unusual one...

Exactly.

Usually something in target/ calls (via mmio or whatnot) something in hw/ which
raises an interrupt, which does the kick.


r~
Peter Maydell Jan. 28, 2021, 10:44 a.m. UTC | #3
On Thu, 28 Jan 2021 at 08:49, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 1/22/21 3:59 AM, Peter Maydell wrote:
> > Does it work if we just always queue the SEGV on exit from cpu_exec()
> > and let the signal handling machinery prioritize if we also pend
> > some other signal because this was an EXCP_UDEF or whatever?
> > It would be neater if we could keep the fault-check outside the
> > switch (trapnr) somehow.
>
> I would think so.  I'll try that.

In particular it seems worth testing whether 'singlestep in gdbstub
over an insn that takes an async MTE fault' has reasonable user-facing
behaviour.

thanks
-- PMM
diff mbox series

Patch

diff --git a/linux-user/aarch64/target_signal.h b/linux-user/aarch64/target_signal.h
index 777fb667fe..18013e1b23 100644
--- a/linux-user/aarch64/target_signal.h
+++ b/linux-user/aarch64/target_signal.h
@@ -21,6 +21,7 @@  typedef struct target_sigaltstack {
 
 #include "../generic/signal.h"
 
+#define TARGET_SEGV_MTEAERR  8  /* Asynchronous ARM MTE error */
 #define TARGET_SEGV_MTESERR  9  /* Synchronous ARM MTE exception */
 
 #define TARGET_ARCH_HAS_SETUP_FRAME
diff --git a/linux-user/aarch64/cpu_loop.c b/linux-user/aarch64/cpu_loop.c
index 6867f0db2b..6160a401bd 100644
--- a/linux-user/aarch64/cpu_loop.c
+++ b/linux-user/aarch64/cpu_loop.c
@@ -72,6 +72,21 @@ 
         put_user_u16(__x, (gaddr));                     \
     })
 
+static bool check_mte_async_fault(CPUARMState *env, target_siginfo_t *info)
+{
+    if (likely(env->cp15.tfsr_el[0] == 0)) {
+        return false;
+    }
+
+    env->cp15.tfsr_el[0] = 0;
+    info->si_signo = TARGET_SIGSEGV;
+    info->si_errno = 0;
+    info->_sifields._sigfault._addr = 0;
+    info->si_code = TARGET_SEGV_MTEAERR;
+    queue_signal(env, info->si_signo, QEMU_SI_FAULT, info);
+    return true;
+}
+
 /* AArch64 main loop */
 void cpu_loop(CPUARMState *env)
 {
@@ -88,15 +103,13 @@  void cpu_loop(CPUARMState *env)
 
         switch (trapnr) {
         case EXCP_SWI:
-            ret = do_syscall(env,
-                             env->xregs[8],
-                             env->xregs[0],
-                             env->xregs[1],
-                             env->xregs[2],
-                             env->xregs[3],
-                             env->xregs[4],
-                             env->xregs[5],
-                             0, 0);
+            if (check_mte_async_fault(env, &info)) {
+                ret = -TARGET_ERESTARTSYS;
+            } else {
+                ret = do_syscall(env, env->xregs[8], env->xregs[0],
+                                 env->xregs[1], env->xregs[2], env->xregs[3],
+                                 env->xregs[4], env->xregs[5], 0, 0);
+            }
             if (ret == -TARGET_ERESTARTSYS) {
                 env->pc -= 4;
             } else if (ret != -TARGET_QEMU_ESIGRETURN) {
@@ -104,7 +117,8 @@  void cpu_loop(CPUARMState *env)
             }
             break;
         case EXCP_INTERRUPT:
-            /* just indicate that signals should be handled asap */
+            /* Just indicate that signals should be handled asap. */
+            check_mte_async_fault(env, &info);
             break;
         case EXCP_UDEF:
             info.si_signo = TARGET_SIGILL;
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index 153bd1e9df..d55f8d1e1e 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -565,6 +565,16 @@  static void mte_check_fail(CPUARMState *env, uint32_t desc,
             select = 0;
         }
         env->cp15.tfsr_el[el] |= 1 << select;
+#ifdef CONFIG_USER_ONLY
+        /*
+         * Stand in for a timer irq, setting _TIF_MTE_ASYNC_FAULT,
+         * which then sends a SIGSEGV when the thread is next scheduled.
+         * This cpu will return to the main loop at the end of the TB,
+         * which is rather sooner than "normal".  But the alternative
+         * is waiting until the next syscall.
+         */
+        qemu_cpu_kick(env_cpu(env));
+#endif
         break;
 
     default: