diff mbox series

[v4,2/2] aarch64: Set the syscall register right before doing the syscall.

Message ID 20230417212034.3890596-3-josimmon@redhat.com
State New
Headers show
Series x86_64: aarch64: Set call number just before syscall | expand

Commit Message

Joe Simmons-Talbott April 17, 2023, 9:20 p.m. UTC
To make identifying syscalls easier during call tree analysis load the
syscall number just before performing the syscall.

Compiler optimizations can place quite a few instructions between the
setting of the syscall number and the syscall instruction.  During call
tree analysis the number of instructions between the two can lead to
more difficulty for both tools and humans in properly identifying the
syscall number.  Having the syscall number set in the prior instruction
to the syscall instruction makes this task easier and less error prone.
Being able to reliably identify syscalls made by a given API will make
it easier to understand and verify the safety and security of glibc.
---
 sysdeps/unix/sysv/linux/aarch64/sysdep.h | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

Comments

Szabolcs Nagy April 18, 2023, 12:57 p.m. UTC | #1
The 04/17/2023 17:20, Joe Simmons-Talbott via Libc-alpha wrote:
> To make identifying syscalls easier during call tree analysis load the
> syscall number just before performing the syscall.
> 
> Compiler optimizations can place quite a few instructions between the
> setting of the syscall number and the syscall instruction.  During call
> tree analysis the number of instructions between the two can lead to
> more difficulty for both tools and humans in properly identifying the
> syscall number.  Having the syscall number set in the prior instruction
> to the syscall instruction makes this task easier and less error prone.
> Being able to reliably identify syscalls made by a given API will make
> it easier to understand and verify the safety and security of glibc.

since the code has !__builtin_constant_p(name) case
how would that be handled by these tools?

> ---
>  sysdeps/unix/sysv/linux/aarch64/sysdep.h | 20 +++++++++++++++++---
>  1 file changed, 17 insertions(+), 3 deletions(-)
> 
> diff --git a/sysdeps/unix/sysv/linux/aarch64/sysdep.h b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
> index e94d1703ad..b91656fdff 100644
> --- a/sysdeps/unix/sysv/linux/aarch64/sysdep.h
> +++ b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
> @@ -167,14 +167,28 @@
>  
>  # define HAVE_CLONE3_WRAPPER		1
>  
> +# define MSTR_HELPER(x) # x
> +# define MSTR(x) MSTR_HELPER(x)
> +

i dont see this used.

>  # undef INTERNAL_SYSCALL_RAW
>  # define INTERNAL_SYSCALL_RAW(name, nr, args...)		\
>    ({ long _sys_result;						\
>       {								\
>         LOAD_ARGS_##nr (args)					\
> -       register long _x8 asm ("x8") = (name);			\
> -       asm volatile ("svc	0	// syscall " # name     \
> -		     : "=r" (_x0) : "r"(_x8) ASM_ARGS_##nr : "memory");	\
> +       if (__builtin_constant_p(name))				\
> +         asm volatile ("mov	x8, %1\n"			\
> +		       "svc	0	// syscall " # name	\
> +                       : "=r" (_x0)				\
> +		       : "i" (name) ASM_ARGS_##nr		\
> +		       : "x8", "memory");			\
> +       else							\
> +         {							\
> +           register long _x8 asm ("x8") = (name);		\
> +           asm volatile ("svc	0	// syscall " # name     \
> +		         : "=r" (_x0)				\
> +		         : "r"(_x8) ASM_ARGS_##nr		\
> +		         : "memory");				\
> +         }							\
>         _sys_result = _x0;					\
>       }								\
>       _sys_result; })

i guess this is ok.

i would probably move the generated comment to the
mov x8,%1 line and remove it in the non-const case.
but i rarely look at compiler output..

it seems the only cases when the name is non-const are

nptl/nptl_setxid.c:  result = INTERNAL_SYSCALL_NCS (xidcmd->syscall_no, 3, xidcmd->id[0],
nptl/nptl_setxid.c:  result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, 3,

which in turn only happens for syscalls

sysdeps/unix/sysv/linux/setuid.c:  return INLINE_SETXID_SYSCALL (setuid32, 1, uid);
sysdeps/unix/sysv/linux/setuid.c:  return INLINE_SETXID_SYSCALL (setuid, 1, uid);
sysdeps/unix/sysv/linux/setreuid.c:  return INLINE_SETXID_SYSCALL (setreuid32, 2, ruid, euid);
sysdeps/unix/sysv/linux/setreuid.c:  return INLINE_SETXID_SYSCALL (setreuid, 2, ruid, euid);
sysdeps/unix/sysv/linux/setegid.c:  result = INLINE_SETXID_SYSCALL (setresgid32, 3, -1, gid, -1);
sysdeps/unix/sysv/linux/setegid.c:  result = INLINE_SETXID_SYSCALL (setresgid, 3, -1, gid, -1);
sysdeps/unix/sysv/linux/setregid.c:  return INLINE_SETXID_SYSCALL (setregid32, 2, rgid, egid);
sysdeps/unix/sysv/linux/setregid.c:  return INLINE_SETXID_SYSCALL (setregid, 2, rgid, egid);
sysdeps/unix/sysv/linux/setgid.c:  return INLINE_SETXID_SYSCALL (setgid32, 1, gid);
sysdeps/unix/sysv/linux/setgid.c:  return INLINE_SETXID_SYSCALL (setgid, 1, gid);
sysdeps/unix/sysv/linux/setgroups.c:  return INLINE_SETXID_SYSCALL (setgroups32, 2, n, groups);
sysdeps/unix/sysv/linux/setgroups.c:  return INLINE_SETXID_SYSCALL (setgroups, 2, n, groups);
sysdeps/unix/sysv/linux/setresgid.c:  return INLINE_SETXID_SYSCALL (setresgid32, 3, rgid, egid, sgid);
sysdeps/unix/sysv/linux/setresgid.c:  return INLINE_SETXID_SYSCALL (setresgid, 3, rgid, egid, sgid);
sysdeps/unix/sysv/linux/setresuid.c:  return INLINE_SETXID_SYSCALL (setresuid32, 3, ruid, euid, suid);
sysdeps/unix/sysv/linux/setresuid.c:  return INLINE_SETXID_SYSCALL (setresuid, 3, ruid, euid, suid);
sysdeps/unix/sysv/linux/seteuid.c:  result = INLINE_SETXID_SYSCALL (setresuid32, 3, -1, uid, -1);
sysdeps/unix/sysv/linux/seteuid.c:  result = INLINE_SETXID_SYSCALL (setresuid, 3, -1, uid, -1);

so if we wanted we could have a switch statement in setxid
and make all syscalls compile time const (other than
explicit external calls to syscall())
Joe Simmons-Talbott April 18, 2023, 7:33 p.m. UTC | #2
On Tue, Apr 18, 2023 at 01:57:28PM +0100, Szabolcs Nagy wrote:
> The 04/17/2023 17:20, Joe Simmons-Talbott via Libc-alpha wrote:
> > To make identifying syscalls easier during call tree analysis load the
> > syscall number just before performing the syscall.
> > 
> > Compiler optimizations can place quite a few instructions between the
> > setting of the syscall number and the syscall instruction.  During call
> > tree analysis the number of instructions between the two can lead to
> > more difficulty for both tools and humans in properly identifying the
> > syscall number.  Having the syscall number set in the prior instruction
> > to the syscall instruction makes this task easier and less error prone.
> > Being able to reliably identify syscalls made by a given API will make
> > it easier to understand and verify the safety and security of glibc.
> 
> since the code has !__builtin_constant_p(name) case
> how would that be handled by these tools?
> 
> > ---
> >  sysdeps/unix/sysv/linux/aarch64/sysdep.h | 20 +++++++++++++++++---
> >  1 file changed, 17 insertions(+), 3 deletions(-)
> > 
> > diff --git a/sysdeps/unix/sysv/linux/aarch64/sysdep.h b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
> > index e94d1703ad..b91656fdff 100644
> > --- a/sysdeps/unix/sysv/linux/aarch64/sysdep.h
> > +++ b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
> > @@ -167,14 +167,28 @@
> >  
> >  # define HAVE_CLONE3_WRAPPER		1
> >  
> > +# define MSTR_HELPER(x) # x
> > +# define MSTR(x) MSTR_HELPER(x)
> > +
> 
> i dont see this used.
> 
> >  # undef INTERNAL_SYSCALL_RAW
> >  # define INTERNAL_SYSCALL_RAW(name, nr, args...)		\
> >    ({ long _sys_result;						\
> >       {								\
> >         LOAD_ARGS_##nr (args)					\
> > -       register long _x8 asm ("x8") = (name);			\
> > -       asm volatile ("svc	0	// syscall " # name     \
> > -		     : "=r" (_x0) : "r"(_x8) ASM_ARGS_##nr : "memory");	\
> > +       if (__builtin_constant_p(name))				\
> > +         asm volatile ("mov	x8, %1\n"			\
> > +		       "svc	0	// syscall " # name	\
> > +                       : "=r" (_x0)				\
> > +		       : "i" (name) ASM_ARGS_##nr		\
> > +		       : "x8", "memory");			\
> > +       else							\
> > +         {							\
> > +           register long _x8 asm ("x8") = (name);		\
> > +           asm volatile ("svc	0	// syscall " # name     \
> > +		         : "=r" (_x0)				\
> > +		         : "r"(_x8) ASM_ARGS_##nr		\
> > +		         : "memory");				\
> > +         }							\
> >         _sys_result = _x0;					\
> >       }								\
> >       _sys_result; })
> 
> i guess this is ok.
> 
> i would probably move the generated comment to the
> mov x8,%1 line and remove it in the non-const case.
> but i rarely look at compiler output..
> 
> it seems the only cases when the name is non-const are
> 
> nptl/nptl_setxid.c:  result = INTERNAL_SYSCALL_NCS (xidcmd->syscall_no, 3, xidcmd->id[0],
> nptl/nptl_setxid.c:  result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, 3,
> 
> which in turn only happens for syscalls
> 
> sysdeps/unix/sysv/linux/setuid.c:  return INLINE_SETXID_SYSCALL (setuid32, 1, uid);
> sysdeps/unix/sysv/linux/setuid.c:  return INLINE_SETXID_SYSCALL (setuid, 1, uid);
> sysdeps/unix/sysv/linux/setreuid.c:  return INLINE_SETXID_SYSCALL (setreuid32, 2, ruid, euid);
> sysdeps/unix/sysv/linux/setreuid.c:  return INLINE_SETXID_SYSCALL (setreuid, 2, ruid, euid);
> sysdeps/unix/sysv/linux/setegid.c:  result = INLINE_SETXID_SYSCALL (setresgid32, 3, -1, gid, -1);
> sysdeps/unix/sysv/linux/setegid.c:  result = INLINE_SETXID_SYSCALL (setresgid, 3, -1, gid, -1);
> sysdeps/unix/sysv/linux/setregid.c:  return INLINE_SETXID_SYSCALL (setregid32, 2, rgid, egid);
> sysdeps/unix/sysv/linux/setregid.c:  return INLINE_SETXID_SYSCALL (setregid, 2, rgid, egid);
> sysdeps/unix/sysv/linux/setgid.c:  return INLINE_SETXID_SYSCALL (setgid32, 1, gid);
> sysdeps/unix/sysv/linux/setgid.c:  return INLINE_SETXID_SYSCALL (setgid, 1, gid);
> sysdeps/unix/sysv/linux/setgroups.c:  return INLINE_SETXID_SYSCALL (setgroups32, 2, n, groups);
> sysdeps/unix/sysv/linux/setgroups.c:  return INLINE_SETXID_SYSCALL (setgroups, 2, n, groups);
> sysdeps/unix/sysv/linux/setresgid.c:  return INLINE_SETXID_SYSCALL (setresgid32, 3, rgid, egid, sgid);
> sysdeps/unix/sysv/linux/setresgid.c:  return INLINE_SETXID_SYSCALL (setresgid, 3, rgid, egid, sgid);
> sysdeps/unix/sysv/linux/setresuid.c:  return INLINE_SETXID_SYSCALL (setresuid32, 3, ruid, euid, suid);
> sysdeps/unix/sysv/linux/setresuid.c:  return INLINE_SETXID_SYSCALL (setresuid, 3, ruid, euid, suid);
> sysdeps/unix/sysv/linux/seteuid.c:  result = INLINE_SETXID_SYSCALL (setresuid32, 3, -1, uid, -1);
> sysdeps/unix/sysv/linux/seteuid.c:  result = INLINE_SETXID_SYSCALL (setresuid, 3, -1, uid, -1);
> 
> so if we wanted we could have a switch statement in setxid
> and make all syscalls compile time const (other than
> explicit external calls to syscall())
> 

If we go this route should I include #ifdef guards around each case or
just the 32 bit ones?

Thanks,
Joe
diff mbox series

Patch

diff --git a/sysdeps/unix/sysv/linux/aarch64/sysdep.h b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
index e94d1703ad..b91656fdff 100644
--- a/sysdeps/unix/sysv/linux/aarch64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
@@ -167,14 +167,28 @@ 
 
 # define HAVE_CLONE3_WRAPPER		1
 
+# define MSTR_HELPER(x) # x
+# define MSTR(x) MSTR_HELPER(x)
+
 # undef INTERNAL_SYSCALL_RAW
 # define INTERNAL_SYSCALL_RAW(name, nr, args...)		\
   ({ long _sys_result;						\
      {								\
        LOAD_ARGS_##nr (args)					\
-       register long _x8 asm ("x8") = (name);			\
-       asm volatile ("svc	0	// syscall " # name     \
-		     : "=r" (_x0) : "r"(_x8) ASM_ARGS_##nr : "memory");	\
+       if (__builtin_constant_p(name))				\
+         asm volatile ("mov	x8, %1\n"			\
+		       "svc	0	// syscall " # name	\
+                       : "=r" (_x0)				\
+		       : "i" (name) ASM_ARGS_##nr		\
+		       : "x8", "memory");			\
+       else							\
+         {							\
+           register long _x8 asm ("x8") = (name);		\
+           asm volatile ("svc	0	// syscall " # name     \
+		         : "=r" (_x0)				\
+		         : "r"(_x8) ASM_ARGS_##nr		\
+		         : "memory");				\
+         }							\
        _sys_result = _x0;					\
      }								\
      _sys_result; })