diff mbox series

[v8,8/8] Linux: Use rseq to accelerate getcpu

Message ID 20240206162801.882585-9-mjeanson@efficios.com
State New
Headers show
Series Extend rseq support | expand

Commit Message

Michael Jeanson Feb. 6, 2024, 4:28 p.m. UTC
On architectures that implement rseq_load32_load32_relaxed() (and thus
define RSEQ_HAS_LOAD32_LOAD32_RELAXED), when the node_id feature is
available, use rseq to fetch the cpu_id and node_id atomically with
respect to preemption and signal delivery to speed up getcpu() compared
to a vsyscall or system call implementation.

Loading both cpu_id and node_id atomically with respect to preemption
is required to ensure consistency of the topology mapping between cpu_id
and node_id due to migration between both loads.

On an aarch64 system (Snapdragon 8cx Gen 3) which lacks a vDSO for
getcpu() we measured an improvement from 130 ns to 1 ns while on x86_64
(i7-8550U) which has a vDSO we measured a more modest improvement from
10 ns to 2 ns.

Co-authored-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
---
 sysdeps/unix/sysv/linux/getcpu.c | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

Comments

DJ Delorie Feb. 17, 2024, 3:57 a.m. UTC | #1
Michael Jeanson <mjeanson@efficios.com> writes:
> diff --git a/sysdeps/unix/sysv/linux/getcpu.c b/sysdeps/unix/sysv/linux/getcpu.c
> index 0e7c3238c9..7e34d6d1eb 100644
> --- a/sysdeps/unix/sysv/linux/getcpu.c
> +++ b/sysdeps/unix/sysv/linux/getcpu.c
> @@ -19,9 +19,10 @@
>  #include <sched.h>
>  #include <sysdep.h>
>  #include <sysdep-vdso.h>
> +#include <rseq-internal.h>
>  
> -int
> -__getcpu (unsigned int *cpu, unsigned int *node)
> +static int
> +vsyscall_getcpu (unsigned int *cpu, unsigned int *node)

Ok; this is the original function.  Any reason (other than "cleaner
diffs") to have this separate, and not just make it the #else clause
below?

Ah, it's called twice below.  Got it.  Ok.  Do we want to tag this as
always inline for the better CPU performance?

> @@ -29,5 +30,32 @@ __getcpu (unsigned int *cpu, unsigned int *node)

> +#if defined (RSEQ_SIG) && defined (RSEQ_HAS_LOAD32_LOAD32_RELAXED)
> +int
> +__getcpu (unsigned int *cpu, unsigned int *node)
> +{
> +  /* Check if rseq is registered and the node_id feature is available.  */
> +  if (__glibc_likely (rseq_node_id_available()))
> +  {
> +    struct rseq_area *rseq_area = rseq_get_area();
> +
> +    if (rseq_load32_load32_relaxed(cpu, &rseq_area->cpu_id,
> +                                   node, &rseq_area->node_id) == 0)
> +    {
> +      /* The critical section was not aborted, return 0.  */
> +      return 0;
> +    }
> +  }
> +
> +  return vsyscall_getcpu (cpu, node);
> +}
> +#else
> +int
> +__getcpu (unsigned int *cpu, unsigned int *node)
> +{
> +  return vsyscall_getcpu (cpu, node);
> +}
> +#endif
>  weak_alias (__getcpu, getcpu)
>  libc_hidden_def (__getcpu)

Ok.
Michael Jeanson Feb. 19, 2024, 10:14 p.m. UTC | #2
On 2024-02-16 22:57, DJ Delorie wrote:
> Michael Jeanson <mjeanson@efficios.com> writes:
>> diff --git a/sysdeps/unix/sysv/linux/getcpu.c b/sysdeps/unix/sysv/linux/getcpu.c
>> index 0e7c3238c9..7e34d6d1eb 100644
>> --- a/sysdeps/unix/sysv/linux/getcpu.c
>> +++ b/sysdeps/unix/sysv/linux/getcpu.c
>> @@ -19,9 +19,10 @@
>>   #include <sched.h>
>>   #include <sysdep.h>
>>   #include <sysdep-vdso.h>
>> +#include <rseq-internal.h>
>>   
>> -int
>> -__getcpu (unsigned int *cpu, unsigned int *node)
>> +static int
>> +vsyscall_getcpu (unsigned int *cpu, unsigned int *node)
> 
> Ok; this is the original function.  Any reason (other than "cleaner
> diffs") to have this separate, and not just make it the #else clause
> below?
> 
> Ah, it's called twice below.  Got it.  Ok.  Do we want to tag this as
> always inline for the better CPU performance?

Yes, will do.
diff mbox series

Patch

diff --git a/sysdeps/unix/sysv/linux/getcpu.c b/sysdeps/unix/sysv/linux/getcpu.c
index 0e7c3238c9..7e34d6d1eb 100644
--- a/sysdeps/unix/sysv/linux/getcpu.c
+++ b/sysdeps/unix/sysv/linux/getcpu.c
@@ -19,9 +19,10 @@ 
 #include <sched.h>
 #include <sysdep.h>
 #include <sysdep-vdso.h>
+#include <rseq-internal.h>
 
-int
-__getcpu (unsigned int *cpu, unsigned int *node)
+static int
+vsyscall_getcpu (unsigned int *cpu, unsigned int *node)
 {
 #ifdef HAVE_GETCPU_VSYSCALL
   return INLINE_VSYSCALL (getcpu, 3, cpu, node, NULL);
@@ -29,5 +30,32 @@  __getcpu (unsigned int *cpu, unsigned int *node)
   return INLINE_SYSCALL_CALL (getcpu, cpu, node, NULL);
 #endif
 }
+
+#if defined (RSEQ_SIG) && defined (RSEQ_HAS_LOAD32_LOAD32_RELAXED)
+int
+__getcpu (unsigned int *cpu, unsigned int *node)
+{
+  /* Check if rseq is registered and the node_id feature is available.  */
+  if (__glibc_likely (rseq_node_id_available()))
+  {
+    struct rseq_area *rseq_area = rseq_get_area();
+
+    if (rseq_load32_load32_relaxed(cpu, &rseq_area->cpu_id,
+                                   node, &rseq_area->node_id) == 0)
+    {
+      /* The critical section was not aborted, return 0.  */
+      return 0;
+    }
+  }
+
+  return vsyscall_getcpu (cpu, node);
+}
+#else
+int
+__getcpu (unsigned int *cpu, unsigned int *node)
+{
+  return vsyscall_getcpu (cpu, node);
+}
+#endif
 weak_alias (__getcpu, getcpu)
 libc_hidden_def (__getcpu)