diff mbox series

[v1,2/2] x86: Implement clock_nanosleep{_time64} syscall for x86 only.

Message ID 20230608090050.2056824-2-goldstein.w.n@gmail.com
State New
Headers show
Series [v1,1/2] x86: Implement sched_yield syscall for x86 only. | expand

Commit Message

Noah Goldstein June 8, 2023, 9 a.m. UTC
We slightly optimize it by using `vzeroall` before the actual syscall.
This returns the SSE, AVX, and ZMM_HI256 xsave/xrstor states to the
init-state which allows the imminent context switch to skip
saving/restoring those states.
---
 sysdeps/unix/sysv/linux/clock_nanosleep.c     | 33 ++++++--
 .../unix/sysv/linux/kernel-posix-cpu-timers.h |  4 +
 .../unix/sysv/linux/x86_64/clock_nanosleep.c  | 82 +++++++++++++++++++
 3 files changed, 113 insertions(+), 6 deletions(-)
 create mode 100644 sysdeps/unix/sysv/linux/x86_64/clock_nanosleep.c
diff mbox series

Patch

diff --git a/sysdeps/unix/sysv/linux/clock_nanosleep.c b/sysdeps/unix/sysv/linux/clock_nanosleep.c
index ac2d810632..31a2aa24af 100644
--- a/sysdeps/unix/sysv/linux/clock_nanosleep.c
+++ b/sysdeps/unix/sysv/linux/clock_nanosleep.c
@@ -24,10 +24,19 @@ 
 
 #include <shlib-compat.h>
 
+#ifndef CLOCK_NANOSLEEP_TIME64
+# define CLOCK_NANOSLEEP_TIME64 __clock_nanosleep_time64
+# define CLOCK_NANOSLEEP __clock_nanosleep
+# define STATIC
+# define TARGET
+# define MAKE_DEFS
+# define PREPARE_CONTEXT_SWITCH()
+#endif
+
 /* We can simply use the syscall.  The CPU clocks are not supported
    with this function.  */
-int
-__clock_nanosleep_time64 (clockid_t clock_id, int flags,
+STATIC int TARGET
+CLOCK_NANOSLEEP_TIME64 (clockid_t clock_id, int flags,
 			  const struct __timespec64 *req,
 			  struct __timespec64 *rem)
 {
@@ -44,6 +53,7 @@  __clock_nanosleep_time64 (clockid_t clock_id, int flags,
 #endif
 
   int r;
+  PREPARE_CONTEXT_SWITCH ();
 #ifdef __ASSUME_TIME64_SYSCALLS
   r = INTERNAL_SYSCALL_CANCEL (clock_nanosleep_time64, clock_id, flags, req,
 			       rem);
@@ -72,17 +82,19 @@  __clock_nanosleep_time64 (clockid_t clock_id, int flags,
 }
 
 #if __TIMESIZE != 64
+# ifdef MAKE_DEFS
 libc_hidden_def (__clock_nanosleep_time64)
+# endif
 
-int
-__clock_nanosleep (clockid_t clock_id, int flags, const struct timespec *req,
+STATIC int
+CLOCK_NANOSLEEP (clockid_t clock_id, int flags, const struct timespec *req,
                    struct timespec *rem)
 {
   int r;
   struct __timespec64 treq64, trem64;
 
   treq64 = valid_timespec_to_timespec64 (*req);
-  r = __clock_nanosleep_time64 (clock_id, flags, &treq64,
+  r = CLOCK_NANOSLEEP_TIME64 (clock_id, flags, &treq64,
                                 rem != NULL ? &trem64 : NULL);
 
   if (r == EINTR && rem != NULL && (flags & TIMER_ABSTIME) == 0)
@@ -91,11 +103,20 @@  __clock_nanosleep (clockid_t clock_id, int flags, const struct timespec *req,
   return r;
 }
 #endif
+#ifdef MAKE_DEFS
 libc_hidden_def (__clock_nanosleep)
 versioned_symbol (libc, __clock_nanosleep, clock_nanosleep, GLIBC_2_17);
 /* clock_nanosleep moved to libc in version 2.17;
    old binaries may expect the symbol version it had in librt.  */
-#if SHLIB_COMPAT (libc, GLIBC_2_2, GLIBC_2_17)
+# if SHLIB_COMPAT (libc, GLIBC_2_2, GLIBC_2_17)
 strong_alias (__clock_nanosleep, __clock_nanosleep_2);
 compat_symbol (libc, __clock_nanosleep_2, clock_nanosleep, GLIBC_2_2);
+# endif
 #endif
+
+#undef CLOCK_NANOSLEEP_TIME64
+#undef CLOCK_NANOSLEEP
+#undef STATIC
+#undef TARGET
+#undef MAKE_DEFS
+#undef PREPARE_CONTEXT_SWITCH
diff --git a/sysdeps/unix/sysv/linux/kernel-posix-cpu-timers.h b/sysdeps/unix/sysv/linux/kernel-posix-cpu-timers.h
index bea1e0e62d..76a3be9e0d 100644
--- a/sysdeps/unix/sysv/linux/kernel-posix-cpu-timers.h
+++ b/sysdeps/unix/sysv/linux/kernel-posix-cpu-timers.h
@@ -1,3 +1,6 @@ 
+#ifndef _KERNEL_POSIX_CPU_TIMERS_H
+#define _KERNEL_POSIX_CPU_TIMERS_H
+
 /*
   Parameters for the Linux kernel ABI for CPU clocks, the bit fields within
   a clockid:
@@ -34,3 +37,4 @@  make_thread_cpuclock (unsigned int tid, clockid_t clock)
 
 #define PROCESS_CLOCK  make_process_cpuclock (0, CPUCLOCK_SCHED)
 #define THREAD_CLOCK   make_thread_cpuclock (0, CPUCLOCK_SCHED)
+#endif
diff --git a/sysdeps/unix/sysv/linux/x86_64/clock_nanosleep.c b/sysdeps/unix/sysv/linux/x86_64/clock_nanosleep.c
new file mode 100644
index 0000000000..ae9a7d1ead
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/clock_nanosleep.c
@@ -0,0 +1,82 @@ 
+/* clock_nanosleep for x86_64.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Only difference is if we have AVX, use vzeroall to clear inuse for SSE, AVX,
+   and ZMM_HI256 xsave/xrstor state.  This enables the init-state optimization
+   saving overhead on context switches.  */
+
+#include <time.h>
+#include <isa-level.h>
+#if ISA_SHOULD_BUILD(4)
+# include <immintrin.h>
+# define TARGET __attribute__ ((target ("avx")))
+# define PREPARE_CONTEXT_SWITCH() _mm256_zeroall ()
+# define CLOCK_NANOSLEEP_TIME64 __clock_nanosleep_time64_avx
+# define CLOCK_NANOSLEEP __clock_nanosleep_avx
+# define STATIC static
+# include <sysdeps/unix/sysv/linux/clock_nanosleep.c>
+#endif
+#if ISA_SHOULD_BUILD(2)
+# define TARGET
+# define PREPARE_CONTEXT_SWITCH()
+# define CLOCK_NANOSLEEP_TIME64 __clock_nanosleep_time64_generic
+# define CLOCK_NANOSLEEP __clock_nanosleep_generic
+# define STATIC static
+# include <sysdeps/unix/sysv/linux/clock_nanosleep.c>
+#endif
+
+#include <init-arch.h>
+#include <ifunc-init.h>
+
+static inline void *
+__clock_nanosleep_time64_ifunc_selector (void)
+{
+#if MINIMUM_X86_ISA_LEVEL >= 3
+  return __clock_nanosleep_time64_avx;
+#else
+  const struct cpu_features *cpu_features = __get_cpu_features ();
+  if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX))
+    return __clock_nanosleep_time64_avx;
+  return __clock_nanosleep_time64_generic;
+#endif
+}
+
+libc_ifunc (__clock_nanosleep_time64,
+	    __clock_nanosleep_time64_ifunc_selector ());
+#if __TIMESIZE != 64
+libc_hidden_def (__clock_nanosleep_time64);
+static inline void *
+__clock_nanosleep_ifunc_selector (void)
+{
+# if MINIMUM_X86_ISA_LEVEL >= 3
+  return __clock_nanosleep_avx;
+# else
+  const struct cpu_features *cpu_features = __get_cpu_features ();
+  if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX))
+    return __clock_nanosleep_avx;
+  return __clock_nanosleep_generic;
+# endif
+}
+libc_ifunc (__clock_nanosleep, __clock_nanosleep_ifunc_selector ());
+#endif
+libc_hidden_def (__clock_nanosleep);
+versioned_symbol (libc, __clock_nanosleep, clock_nanosleep, GLIBC_2_17);
+#if SHLIB_COMPAT(libc, GLIBC_2_2, GLIBC_2_17)
+strong_alias (__clock_nanosleep, __clock_nanosleep_2);
+compat_symbol (libc, __clock_nanosleep_2, clock_nanosleep, GLIBC_2_2);
+#endif