diff mbox series

LoongArch: Add fstat64 and fstatat64.

Message ID 20240911094020.275599-1-caiyinyu@loongson.cn
State New
Headers show
Series LoongArch: Add fstat64 and fstatat64. | expand

Commit Message

caiyinyu Sept. 11, 2024, 9:40 a.m. UTC
In Linux 6.11, the fstat (80) and newfstatat (79) syscalls have been
reintroduced. The definitions of these two syscalls have already been
backported to version 6.10.6 in the stable tree.

In this patch, we are adding dynamically probed implementations of
fstat64 and fstatat64 specifically for syscalls 79 and 80. This ensures
compatibility while maintaining relatively good performance on kernels
that both support and do not support syscalls 79 and 80.

By running an experiment where we invoke fstat64 and fstatat64 100
million times, we gathered the following efficiency statistics:

1. On kernels that support syscalls 79 and 80 (tested on version
   6.10.6), fstat64 and fstatat64 can directly invoke these syscalls
   [1]. The time overhead of our dynamic probing implementation
   increased by 0.5%-2.5% compared to directly calling the syscalls.
2. On kernels that support syscalls 79 and 80 (tested on version
   6.10.6), our dynamically probed implementation reduces the time
   overhead by more than 60% compared to directly invoking the statx
   (291) syscall.
3. On kernels that do not support syscalls 79 and 80 (tested on version
   6.8.0), fstat64 and fstatat64 fall back to using the statx (291)
   syscall (as before). In this case, the overhead of our dynamic
   probing implementation increased by 0.1%-1.3% compared to directly
   invoking statx.

[1][PATCH v5] Loongarch: adapt for the re-introduction of fstat and newfstatat in 6.10.6
https://sourceware.org/pipermail/libc-alpha/2024-August/159469.html
---
 .../unix/sysv/linux/loongarch/arch-syscall.h  |  2 +
 sysdeps/unix/sysv/linux/loongarch/fstat64.c   | 64 +++++++++++++++++++
 sysdeps/unix/sysv/linux/loongarch/fstatat64.c | 59 +++++++++++++++++
 .../linux/loongarch/fstatat64_time64_statx.h  | 54 ++++++++++++++++
 4 files changed, 179 insertions(+)
 create mode 100644 sysdeps/unix/sysv/linux/loongarch/fstat64.c
 create mode 100644 sysdeps/unix/sysv/linux/loongarch/fstatat64.c
 create mode 100644 sysdeps/unix/sysv/linux/loongarch/fstatat64_time64_statx.h

Comments

Xi Ruoyao Sept. 11, 2024, 11 a.m. UTC | #1
On Wed, 2024-09-11 at 17:40 +0800, caiyinyu wrote:

/* snip */

> diff --git a/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h b/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
> index 8bb82448a7..7e732256fd 100644
> --- a/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
> +++ b/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h

No, we are not allowed to edit this file manually.  We must wait for the
global maintainer to regenerate this file (and all arch-syscall.h files)
after Linux 6.11 release.

And we should avoid the runtime probe if --enable-kernel-6.10.6 or
later.

And we shouldn't use fstat64 or fstatat64 on LA32 (because they'll
suffer y2038 issue on 32-bit architecture).  IMO it's better to add the
check now so we don't need to remember to change this again when we add
LA32 support.

(See below for my suggestion to handle --enable-kernel and 32-bit but
there may be better ways.)

/* snip */


> diff --git a/sysdeps/unix/sysv/linux/loongarch/fstat64.c b/sysdeps/unix/sysv/linux/loongarch/fstat64.c
> new file mode 100644
> index 0000000000..b1a9e9c6a4
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/loongarch/fstat64.c
> @@ -0,0 +1,64 @@
> +/* Get file status.  LoongArch version.

/* snip */

> +   <https://www.gnu.org/licenses/>.  */
> +

Here add

#if __LINUX_KERNEL_VERSION >= 0x060a06 || !defined(__loongarch_lp64)
#include "../fstat64.c"
#else

and add #endif at the end of the file.  This should be enough to avoid
the runtime probe of --enable-kernel=6.10.6 or later, and avoid using
fstat64/fstatat64 for 32-bit.

> +#define __fstat __redirect___fstat
> +#define fstat   __redirect_fstat
> +#include <sys/stat.h>
> +#undef __fstat
> +#undef fstat
> +
> +#include "fstatat64_time64_statx.h"
> +
> +int __fstat_syscall_supported attribute_hidden = 1;

static int __fstat_syscall_supported = 1;

because it's only used in this translation unit.  Or you may want to
remove __newfstatat_syscall_supported and reuse this in fstatat64.c.

> +int
> +__fstat64 (int fd, struct __stat64_t64 *buf)
> +{
> +  int r;
> +  extern int __fstat_syscall_supported attribute_hidden;

Remove this.  The definition is in the same TU, so no need of this.

> +  int supported = atomic_load_relaxed (&__fstat_syscall_supported);
> +  if (__glibc_likely (supported))

I'd remove __glibc_likely here.  We don't have a priori knowledge of
what kernel version we are running on (unless --enable-kernel=6.10.6 or
later but then we should be just using ../fstat64.c).

> +  {
> +    r = INTERNAL_SYSCALL_CALL (fstat, fd, buf);
> +    if (r == 0)
> +      return r;
> +    else if (r != -ENOSYS)
> +      return INLINE_SYSCALL_ERROR_RETURN_VALUE (-r);
> +
> +    atomic_store_relaxed (&__fstat_syscall_supported, 0);
> +  }
> +
> +  if (fd < 0)
> +  {
> +    __set_errno (EBADF);
> +    return -1;
> +  }
> +
> +  r = fstatat64_time64_statx (fd, "", buf, AT_EMPTY_PATH);
> +  return INTERNAL_SYSCALL_ERROR_P (r)
> +    ? INLINE_SYSCALL_ERROR_RETURN_VALUE (-r)
> +    : 0;
> +}
> +
> +hidden_def (__fstat64)
> +weak_alias (__fstat64, fstat64)
> +
> +#if XSTAT_IS_XSTAT64

On LoongArch XSTAT_IS_XSTAT64 is always 1, so we can remove this #if.

> +strong_alias (__fstat64, __fstat)
> +weak_alias (__fstat64, fstat)
> +#endif
> diff --git a/sysdeps/unix/sysv/linux/loongarch/fstatat64.c b/sysdeps/unix/sysv/linux/loongarch/fstatat64.c
> new file mode 100644
> index 0000000000..64f5b3b8f6
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/loongarch/fstatat64.c
> @@ -0,0 +1,59 @@
> +/* Get file status.  LoongArch version.

/* snip */

> +   <https://www.gnu.org/licenses/>.  */

Likewise, add

#if __LINUX_KERNEL_VERSION >= 0x060a06 || !defined(__loongarch_lp64)
#include "../fstatat64.c"
#else

here.

> +#define __fstatat __redirect___fstatat
> +#define fstatat   __redirect_fstatat
> +#include <sys/stat.h>
> +#undef __fstatat
> +#undef fstatat
> +
> +#include "fstatat64_time64_statx.h"
> +
> +int __newfstatat_syscall_supported attribute_hidden = 1;

Likewise, use static (or remove it if you want to reuse
__fstat_syscall_supported instead).

> +
> +int
> +__fstatat64 (int fd, const char *file, struct __stat64_t64 *buf,
> +		    int flag)
> +{
> +  int r;
> +  extern int __newfstatat_syscall_supported attribute_hidden;

Likewise, remove this.  Or change to __fstat_syscall_supported if you
want to reuse it.

/* snip */

> +#if XSTAT_IS_XSTAT64

Likewise, remove this #if.

> +strong_alias (__fstatat64, __fstatat)
> +weak_alias (__fstatat64, fstatat)
> +strong_alias (__fstatat64, __GI___fstatat);
> +#endif


> diff --git a/sysdeps/unix/sysv/linux/loongarch/fstatat64_time64_statx.h b/sysdeps/unix/sysv/linux/loongarch/fstatat64_time64_statx.h
> new file mode 100644
> index 0000000000..97920a820f
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/loongarch/fstatat64_time64_statx.h

Hmm this is duplicating a large hunk of code from
sysdeps/unix/sysv/linux/fstatat64.c.  Maybe rename to
sysdeps/unix/sysv/linux/fstatat64_time64_statx.h and include it in both
linux/fstatat64.c and linux/loongarch/fstatat64.c?

> @@ -0,0 +1,54 @@
> +/* Get file status.  LoongArch version.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <fcntl.h>
> +#include <sys/sysmacros.h>
> +#include <internal-stat.h>
> +
> +static inline int
> +fstatat64_time64_statx (int fd, const char *file, struct __stat64_t64 *buf,
> +			int flag)
> +{
> +  struct statx tmp;
> +  int r = INTERNAL_SYSCALL_CALL (statx, fd, file, AT_NO_AUTOMOUNT | flag,
> +				 STATX_BASIC_STATS, &tmp);
> +  if (r != 0)
> +    return r;
> +
> +  *buf = (struct __stat64_t64)
> +  {
> +    .st_dev = __gnu_dev_makedev (tmp.stx_dev_major, tmp.stx_dev_minor),
> +    .st_rdev = __gnu_dev_makedev (tmp.stx_rdev_major, tmp.stx_rdev_minor),
> +    .st_ino = tmp.stx_ino,
> +    .st_mode = tmp.stx_mode,
> +    .st_nlink = tmp.stx_nlink,
> +    .st_uid = tmp.stx_uid,
> +    .st_gid = tmp.stx_gid,
> +    .st_atime = tmp.stx_atime.tv_sec,
> +    .st_atim.tv_nsec = tmp.stx_atime.tv_nsec,
> +    .st_mtime = tmp.stx_mtime.tv_sec,
> +    .st_mtim.tv_nsec = tmp.stx_mtime.tv_nsec,
> +    .st_ctime = tmp.stx_ctime.tv_sec,
> +    .st_ctim.tv_nsec = tmp.stx_ctime.tv_nsec,
> +    .st_size = tmp.stx_size,
> +    .st_blocks = tmp.stx_blocks,
> +    .st_blksize = tmp.stx_blksize,
> +  };
> +
> +  return r;
> +}
Miao Wang Sept. 11, 2024, 4:50 p.m. UTC | #2
> 2024年9月11日 17:40,caiyinyu <caiyinyu@loongson.cn> 写道:
> 
> In Linux 6.11, the fstat (80) and newfstatat (79) syscalls have been
> reintroduced. The definitions of these two syscalls have already been
> backported to version 6.10.6 in the stable tree.
> 
> In this patch, we are adding dynamically probed implementations of
> fstat64 and fstatat64 specifically for syscalls 79 and 80. This ensures
> compatibility while maintaining relatively good performance on kernels
> that both support and do not support syscalls 79 and 80.
> 
> By running an experiment where we invoke fstat64 and fstatat64 100
> million times, we gathered the following efficiency statistics:
> 
> 1. On kernels that support syscalls 79 and 80 (tested on version
>   6.10.6), fstat64 and fstatat64 can directly invoke these syscalls
>   [1]. The time overhead of our dynamic probing implementation
>   increased by 0.5%-2.5% compared to directly calling the syscalls.
> 2. On kernels that support syscalls 79 and 80 (tested on version
>   6.10.6), our dynamically probed implementation reduces the time
>   overhead by more than 60% compared to directly invoking the statx
>   (291) syscall.
> 3. On kernels that do not support syscalls 79 and 80 (tested on version
>   6.8.0), fstat64 and fstatat64 fall back to using the statx (291)
>   syscall (as before). In this case, the overhead of our dynamic
>   probing implementation increased by 0.1%-1.3% compared to directly
>   invoking statx.

Hi, I tried to reproduce your test result, by invoking fstat(2) for 1M times,
repeated by 100 times. The test was carried out on 3A6K, using 6.11-rc7 kernel.
The result is:

fstat using statx(fd, "", AT_EMPTY_PATH) (current glibc implementation)
mean: 210420528.100000(ns), sigma: 996145.440248(ns)

statx(fd, NULL, AT_EMPTY_PATH) (for comparison)
mean: 199410620.600000(ns), sigma: 111561.012101(ns)

fstat using statx(fd, NULL, AT_EMPTY_PATH) (The implementation I proposed)
mean: 208258640.700000(ns), sigma: 468451.704836(ns)

fstat using fstat(fd) (Your patch)
mean: 192936673.800000(ns), sigma: 251927.136307(ns)

As we can see in the result, the implementation using fstat is 8.31% faster
than the current implementation, instead of "reducing the time overhead by
more than 60%".

I prefer introducing dynamic probing of statx(fd, NULL, AT_EMPTY_PATH), which
can benefit all 32-bit platforms relying on statx for 64-bit timestamps[2], as
well as 64-bit loongarch, not only for performance, but also for seccomp
sandboxing. Furthermore, by doing so, we can eliminate the need of maintaining
our own copy of fstat in loongarch.

[2]: [PATCH v5] linux: Add linux statx(fd, NULL, AT_EMPTY_PATH) support
https://sourceware.org/pipermail/libc-alpha/2024-August/159499.html


Cheers,

Miao Wang
Miao Wang Sept. 11, 2024, 6 p.m. UTC | #3
> 2024年9月12日 00:50,Miao Wang <shankerwangmiao@gmail.com> 写道:
> 
>> 
>> 2024年9月11日 17:40,caiyinyu <caiyinyu@loongson.cn> 写道:
>> 
>> In Linux 6.11, the fstat (80) and newfstatat (79) syscalls have been
>> reintroduced. The definitions of these two syscalls have already been
>> backported to version 6.10.6 in the stable tree.
>> 
>> In this patch, we are adding dynamically probed implementations of
>> fstat64 and fstatat64 specifically for syscalls 79 and 80. This ensures
>> compatibility while maintaining relatively good performance on kernels
>> that both support and do not support syscalls 79 and 80.
>> 
>> By running an experiment where we invoke fstat64 and fstatat64 100
>> million times, we gathered the following efficiency statistics:
>> 
>> 1. On kernels that support syscalls 79 and 80 (tested on version
>>  6.10.6), fstat64 and fstatat64 can directly invoke these syscalls
>>  [1]. The time overhead of our dynamic probing implementation
>>  increased by 0.5%-2.5% compared to directly calling the syscalls.
>> 2. On kernels that support syscalls 79 and 80 (tested on version
>>  6.10.6), our dynamically probed implementation reduces the time
>>  overhead by more than 60% compared to directly invoking the statx
>>  (291) syscall.
>> 3. On kernels that do not support syscalls 79 and 80 (tested on version
>>  6.8.0), fstat64 and fstatat64 fall back to using the statx (291)
>>  syscall (as before). In this case, the overhead of our dynamic
>>  probing implementation increased by 0.1%-1.3% compared to directly
>>  invoking statx.
> 
> Hi, I tried to reproduce your test result, by invoking fstat(2) for 1M times,
> repeated by 100 times. The test was carried out on 3A6K, using 6.11-rc7 kernel.
> The result is:
> 
> fstat using statx(fd, "", AT_EMPTY_PATH) (current glibc implementation)
> mean: 210420528.100000(ns), sigma: 996145.440248(ns)
> 
> statx(fd, NULL, AT_EMPTY_PATH) (for comparison)
> mean: 199410620.600000(ns), sigma: 111561.012101(ns)
> 
> fstat using statx(fd, NULL, AT_EMPTY_PATH) (The implementation I proposed)
> mean: 208258640.700000(ns), sigma: 468451.704836(ns)
> 
> fstat using fstat(fd) (Your patch)
> mean: 192936673.800000(ns), sigma: 251927.136307(ns)
> 
> As we can see in the result, the implementation using fstat is 8.31% faster
> than the current implementation, instead of "reducing the time overhead by
> more than 60%".

I did another test on 6.10.7, using the current glibc implementation, i.e.
statx(fd, "", AT_EMPTY_PATH), and got the following result:

mean: 603344203.300000(ns), sigma: 715246.975336(ns)

If we use this as the baseline, we can get the following summary:

fstat(fd): 68.02% less time
statx(fd, NULL, AT_EMPTY_PATH): 65.48% less time
statx(fd, "", AT_EMPTY_PATH) (nothing is changed in glibc, only upgrade the
  kernel to 6.11): 65.12% less time

As a result, the performance gain is similar comparing using fstat(fd) and
statx(fd, NULL, AT_EMPTY_PATH).

> 
> I prefer introducing dynamic probing of statx(fd, NULL, AT_EMPTY_PATH), which
> can benefit all 32-bit platforms relying on statx for 64-bit timestamps[2], as
> well as 64-bit loongarch, not only for performance, but also for seccomp
> sandboxing. Furthermore, by doing so, we can eliminate the need of maintaining
> our own copy of fstat in loongarch.

To conclude, the question would be whether it is worthy to have a separately
maintained fstat in loongarch for the 2.54% performance difference.

> 
> [2]: [PATCH v5] linux: Add linux statx(fd, NULL, AT_EMPTY_PATH) support
> https://sourceware.org/pipermail/libc-alpha/2024-August/159499.html
> 
> 
> Cheers,
> 
> Miao Wang
caiyinyu Sept. 12, 2024, 1:41 a.m. UTC | #4
在 2024/9/11 下午7:00, Xi Ruoyao 写道:
> On Wed, 2024-09-11 at 17:40 +0800, caiyinyu wrote:
>
> /* snip */
>
>> diff --git a/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h b/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
>> index 8bb82448a7..7e732256fd 100644
>> --- a/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
>> +++ b/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
> No, we are not allowed to edit this file manually.  We must wait for the
> global maintainer to regenerate this file (and all arch-syscall.h files)
> after Linux 6.11 release.
>
> And we should avoid the runtime probe if --enable-kernel-6.10.6 or
> later.
Here's the situation: we need to maintain compatibility, and we can't 
ignore
complaints from downstream vendors. So, there are only two options: 
either we
blindly use statx, regardless of whether the kernel provides fstat and
newfstatat, keeping it consistent with our previous approach; or we use 
dynamic
probing.

>
> And we shouldn't use fstat64 or fstatat64 on LA32 (because they'll
> suffer y2038 issue on 32-bit architecture).  IMO it's better to add the
> check now so we don't need to remember to change this again when we add
> LA32 support.
>
> (See below for my suggestion to handle --enable-kernel and 32-bit but
> there may be better ways.)
>
> /* snip */
>
>
>> diff --git a/sysdeps/unix/sysv/linux/loongarch/fstat64.c b/sysdeps/unix/sysv/linux/loongarch/fstat64.c
>> new file mode 100644
>> index 0000000000..b1a9e9c6a4
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/loongarch/fstat64.c
>> @@ -0,0 +1,64 @@
>> +/* Get file status.  LoongArch version.
> /* snip */
>
>> +   <https://www.gnu.org/licenses/>.  */
>> +
> Here add
>
> #if __LINUX_KERNEL_VERSION >= 0x060a06 || !defined(__loongarch_lp64)
> #include "../fstat64.c"
> #else
>
> and add #endif at the end of the file.  This should be enough to avoid
> the runtime probe of --enable-kernel=6.10.6 or later, and avoid using
> fstat64/fstatat64 for 32-bit.
>
>> +#define __fstat __redirect___fstat
>> +#define fstat   __redirect_fstat
>> +#include <sys/stat.h>
>> +#undef __fstat
>> +#undef fstat
>> +
>> +#include "fstatat64_time64_statx.h"
>> +
>> +int __fstat_syscall_supported attribute_hidden = 1;
> static int __fstat_syscall_supported = 1;
>
> because it's only used in this translation unit.  Or you may want to
> remove __newfstatat_syscall_supported and reuse this in fstatat64.c.
>
>> +int
>> +__fstat64 (int fd, struct __stat64_t64 *buf)
>> +{
>> +  int r;
>> +  extern int __fstat_syscall_supported attribute_hidden;
> Remove this.  The definition is in the same TU, so no need of this.
>
>> +  int supported = atomic_load_relaxed (&__fstat_syscall_supported);
>> +  if (__glibc_likely (supported))
> I'd remove __glibc_likely here.  We don't have a priori knowledge of
> what kernel version we are running on (unless --enable-kernel=6.10.6 or
> later but then we should be just using ../fstat64.c).
>
>> +  {
>> +    r = INTERNAL_SYSCALL_CALL (fstat, fd, buf);
>> +    if (r == 0)
>> +      return r;
>> +    else if (r != -ENOSYS)
>> +      return INLINE_SYSCALL_ERROR_RETURN_VALUE (-r);
>> +
>> +    atomic_store_relaxed (&__fstat_syscall_supported, 0);
>> +  }
>> +
>> +  if (fd < 0)
>> +  {
>> +    __set_errno (EBADF);
>> +    return -1;
>> +  }
>> +
>> +  r = fstatat64_time64_statx (fd, "", buf, AT_EMPTY_PATH);
>> +  return INTERNAL_SYSCALL_ERROR_P (r)
>> +    ? INLINE_SYSCALL_ERROR_RETURN_VALUE (-r)
>> +    : 0;
>> +}
>> +
>> +hidden_def (__fstat64)
>> +weak_alias (__fstat64, fstat64)
>> +
>> +#if XSTAT_IS_XSTAT64
> On LoongArch XSTAT_IS_XSTAT64 is always 1, so we can remove this #if.
>
>> +strong_alias (__fstat64, __fstat)
>> +weak_alias (__fstat64, fstat)
>> +#endif
>> diff --git a/sysdeps/unix/sysv/linux/loongarch/fstatat64.c b/sysdeps/unix/sysv/linux/loongarch/fstatat64.c
>> new file mode 100644
>> index 0000000000..64f5b3b8f6
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/loongarch/fstatat64.c
>> @@ -0,0 +1,59 @@
>> +/* Get file status.  LoongArch version.
> /* snip */
>
>> +   <https://www.gnu.org/licenses/>.  */
> Likewise, add
>
> #if __LINUX_KERNEL_VERSION >= 0x060a06 || !defined(__loongarch_lp64)
> #include "../fstatat64.c"
> #else
>
> here.
>
>> +#define __fstatat __redirect___fstatat
>> +#define fstatat   __redirect_fstatat
>> +#include <sys/stat.h>
>> +#undef __fstatat
>> +#undef fstatat
>> +
>> +#include "fstatat64_time64_statx.h"
>> +
>> +int __newfstatat_syscall_supported attribute_hidden = 1;
> Likewise, use static (or remove it if you want to reuse
> __fstat_syscall_supported instead).
>
>> +
>> +int
>> +__fstatat64 (int fd, const char *file, struct __stat64_t64 *buf,
>> +		    int flag)
>> +{
>> +  int r;
>> +  extern int __newfstatat_syscall_supported attribute_hidden;
> Likewise, remove this.  Or change to __fstat_syscall_supported if you
> want to reuse it.
>
> /* snip */
>
>> +#if XSTAT_IS_XSTAT64
> Likewise, remove this #if.
>
>> +strong_alias (__fstatat64, __fstatat)
>> +weak_alias (__fstatat64, fstatat)
>> +strong_alias (__fstatat64, __GI___fstatat);
>> +#endif
>
>> diff --git a/sysdeps/unix/sysv/linux/loongarch/fstatat64_time64_statx.h b/sysdeps/unix/sysv/linux/loongarch/fstatat64_time64_statx.h
>> new file mode 100644
>> index 0000000000..97920a820f
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/loongarch/fstatat64_time64_statx.h
> Hmm this is duplicating a large hunk of code from
> sysdeps/unix/sysv/linux/fstatat64.c.  Maybe rename to
> sysdeps/unix/sysv/linux/fstatat64_time64_statx.h and include it in both
> linux/fstatat64.c and linux/loongarch/fstatat64.c?
>
>> @@ -0,0 +1,54 @@
>> +/* Get file status.  LoongArch version.
>> +   Copyright (C) 2024 Free Software Foundation, Inc.
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <https://www.gnu.org/licenses/>.  */
>> +
>> +#include <fcntl.h>
>> +#include <sys/sysmacros.h>
>> +#include <internal-stat.h>
>> +
>> +static inline int
>> +fstatat64_time64_statx (int fd, const char *file, struct __stat64_t64 *buf,
>> +			int flag)
>> +{
>> +  struct statx tmp;
>> +  int r = INTERNAL_SYSCALL_CALL (statx, fd, file, AT_NO_AUTOMOUNT | flag,
>> +				 STATX_BASIC_STATS, &tmp);
>> +  if (r != 0)
>> +    return r;
>> +
>> +  *buf = (struct __stat64_t64)
>> +  {
>> +    .st_dev = __gnu_dev_makedev (tmp.stx_dev_major, tmp.stx_dev_minor),
>> +    .st_rdev = __gnu_dev_makedev (tmp.stx_rdev_major, tmp.stx_rdev_minor),
>> +    .st_ino = tmp.stx_ino,
>> +    .st_mode = tmp.stx_mode,
>> +    .st_nlink = tmp.stx_nlink,
>> +    .st_uid = tmp.stx_uid,
>> +    .st_gid = tmp.stx_gid,
>> +    .st_atime = tmp.stx_atime.tv_sec,
>> +    .st_atim.tv_nsec = tmp.stx_atime.tv_nsec,
>> +    .st_mtime = tmp.stx_mtime.tv_sec,
>> +    .st_mtim.tv_nsec = tmp.stx_mtime.tv_nsec,
>> +    .st_ctime = tmp.stx_ctime.tv_sec,
>> +    .st_ctim.tv_nsec = tmp.stx_ctime.tv_nsec,
>> +    .st_size = tmp.stx_size,
>> +    .st_blocks = tmp.stx_blocks,
>> +    .st_blksize = tmp.stx_blksize,
>> +  };
>> +
>> +  return r;
>> +}
Miao Wang Sept. 12, 2024, 1:54 a.m. UTC | #5
> 2024年9月12日 09:41,caiyinyu <caiyinyu@loongson.cn> 写道:
> 
> 
> 在 2024/9/11 下午7:00, Xi Ruoyao 写道:
>> On Wed, 2024-09-11 at 17:40 +0800, caiyinyu wrote:
>> 
>> /* snip */
>> 
>>> diff --git a/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h b/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
>>> index 8bb82448a7..7e732256fd 100644
>>> --- a/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
>>> +++ b/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
>> No, we are not allowed to edit this file manually.  We must wait for the
>> global maintainer to regenerate this file (and all arch-syscall.h files)
>> after Linux 6.11 release.
>> 
>> And we should avoid the runtime probe if --enable-kernel-6.10.6 or
>> later.
> Here's the situation: we need to maintain compatibility, and we can't ignore
> complaints from downstream vendors. So, there are only two options: either we
> blindly use statx, regardless of whether the kernel provides fstat and
> newfstatat, keeping it consistent with our previous approach; or we use dynamic
> probing.

You are not getting Ruoyao's idea. The configure option of glibc --enable-kernel
controls the kernel compatibility of compiled glibc. This option is for
downstream vendors to adjust the compatibility baseline of kernel version. If
the baseline is high enough, i.e. not before 6.10.6 in our case, dynamic probing
can be eliminated, assuming the existence of fstat and newfstatat. The macro
__LINUX_KERNEL_VERSION contains the baseline version specified. There are many
examples of this in kernel-features.h

Cheers,

Miao Wang
Xi Ruoyao Sept. 12, 2024, 1:58 a.m. UTC | #6
On Thu, 2024-09-12 at 09:54 +0800, Miao Wang wrote:
> 
> 
> > 2024年9月12日 09:41,caiyinyu <caiyinyu@loongson.cn> 写道:
> > 
> > 
> > 在 2024/9/11 下午7:00, Xi Ruoyao 写道:
> > > On Wed, 2024-09-11 at 17:40 +0800, caiyinyu wrote:
> > > 
> > > /* snip */
> > > 
> > > > diff --git a/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h b/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
> > > > index 8bb82448a7..7e732256fd 100644
> > > > --- a/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
> > > > +++ b/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
> > > No, we are not allowed to edit this file manually.  We must wait for the
> > > global maintainer to regenerate this file (and all arch-syscall.h files)
> > > after Linux 6.11 release.
> > > 
> > > And we should avoid the runtime probe if --enable-kernel-6.10.6 or
> > > later.
> > Here's the situation: we need to maintain compatibility, and we can't ignore
> > complaints from downstream vendors. So, there are only two options: either we
> > blindly use statx, regardless of whether the kernel provides fstat and
> > newfstatat, keeping it consistent with our previous approach; or we use dynamic
> > probing.
> 
> You are not getting Ruoyao's idea. The configure option of glibc --enable-kernel
> controls the kernel compatibility of compiled glibc. This option is for
> downstream vendors to adjust the compatibility baseline of kernel version. If
> the baseline is high enough, i.e. not before 6.10.6 in our case, dynamic probing
> can be eliminated, assuming the existence of fstat and newfstatat. The macro
> __LINUX_KERNEL_VERSION contains the baseline version specified. There are many
> examples of this in kernel-features.h

Quote the doc:

‘--enable-kernel=VERSION’
     This option is currently only useful on GNU/Linux systems.  The
     VERSION parameter should have the form X.Y.Z and describes the
     smallest version of the Linux kernel the generated library is
     expected to support.  The higher the VERSION number is, the less
     compatibility code is added, and the faster the code gets.

So if they configure their glibc with --enable-kernel-6.10.6, they
actually *request* to ignore the compatibility with older kernels like
6.10.5 or 6.4.x.  If they want compatibility they shouldn't use this
option (or they should use a lower value like --enable-kernel=6.10.0).

The default is 5.19.0 (as we are specifying in configure.ac).
caiyinyu Sept. 12, 2024, 8:08 a.m. UTC | #7
在 2024/9/12 上午2:00, Miao Wang 写道:
>
>> 2024年9月12日 00:50,Miao Wang <shankerwangmiao@gmail.com> 写道:
>>
>>> 2024年9月11日 17:40,caiyinyu <caiyinyu@loongson.cn> 写道:
>>>
>>> In Linux 6.11, the fstat (80) and newfstatat (79) syscalls have been
>>> reintroduced. The definitions of these two syscalls have already been
>>> backported to version 6.10.6 in the stable tree.
>>>
>>> In this patch, we are adding dynamically probed implementations of
>>> fstat64 and fstatat64 specifically for syscalls 79 and 80. This ensures
>>> compatibility while maintaining relatively good performance on kernels
>>> that both support and do not support syscalls 79 and 80.
>>>
>>> By running an experiment where we invoke fstat64 and fstatat64 100
>>> million times, we gathered the following efficiency statistics:
>>>
>>> 1. On kernels that support syscalls 79 and 80 (tested on version
>>>   6.10.6), fstat64 and fstatat64 can directly invoke these syscalls
>>>   [1]. The time overhead of our dynamic probing implementation
>>>   increased by 0.5%-2.5% compared to directly calling the syscalls.
>>> 2. On kernels that support syscalls 79 and 80 (tested on version
>>>   6.10.6), our dynamically probed implementation reduces the time
>>>   overhead by more than 60% compared to directly invoking the statx
>>>   (291) syscall.
>>> 3. On kernels that do not support syscalls 79 and 80 (tested on version
>>>   6.8.0), fstat64 and fstatat64 fall back to using the statx (291)
>>>   syscall (as before). In this case, the overhead of our dynamic
>>>   probing implementation increased by 0.1%-1.3% compared to directly
>>>   invoking statx.
>> Hi, I tried to reproduce your test result, by invoking fstat(2) for 1M times,
>> repeated by 100 times. The test was carried out on 3A6K, using 6.11-rc7 kernel.
>> The result is:
>>
>> fstat using statx(fd, "", AT_EMPTY_PATH) (current glibc implementation)
>> mean: 210420528.100000(ns), sigma: 996145.440248(ns)
>>
>> statx(fd, NULL, AT_EMPTY_PATH) (for comparison)
>> mean: 199410620.600000(ns), sigma: 111561.012101(ns)
>>
>> fstat using statx(fd, NULL, AT_EMPTY_PATH) (The implementation I proposed)
>> mean: 208258640.700000(ns), sigma: 468451.704836(ns)
>>
>> fstat using fstat(fd) (Your patch)
>> mean: 192936673.800000(ns), sigma: 251927.136307(ns)
>>
>> As we can see in the result, the implementation using fstat is 8.31% faster
>> than the current implementation, instead of "reducing the time overhead by
>> more than 60%".
> I did another test on 6.10.7, using the current glibc implementation, i.e.
> statx(fd, "", AT_EMPTY_PATH), and got the following result:
>
> mean: 603344203.300000(ns), sigma: 715246.975336(ns)
>
> If we use this as the baseline, we can get the following summary:
>
> fstat(fd): 68.02% less time
> statx(fd, NULL, AT_EMPTY_PATH): 65.48% less time
> statx(fd, "", AT_EMPTY_PATH) (nothing is changed in glibc, only upgrade the
>    kernel to 6.11): 65.12% less time
>
> As a result, the performance gain is similar comparing using fstat(fd) and
> statx(fd, NULL, AT_EMPTY_PATH).
>
>> I prefer introducing dynamic probing of statx(fd, NULL, AT_EMPTY_PATH), which
>> can benefit all 32-bit platforms relying on statx for 64-bit timestamps[2], as
>> well as 64-bit loongarch, not only for performance, but also for seccomp
>> sandboxing. Furthermore, by doing so, we can eliminate the need of maintaining
>> our own copy of fstat in loongarch.
> To conclude, the question would be whether it is worthy to have a separately
> maintained fstat in loongarch for the 2.54% performance difference.

The key point here is that the dynamic probing solution can run directly on
kernels without support for the 79 and 80 system calls (which is 
essential for
us). However, after updating arch-syscall.h, the glibc compiled using Miao
plan[1] cannot run on kernels that do not support the 79 and 80 system 
calls.


>> [2]: [PATCH v5] linux: Add linux statx(fd, NULL, AT_EMPTY_PATH) support
>> https://sourceware.org/pipermail/libc-alpha/2024-August/159499.html
>>
>>
>> Cheers,
>>
>> Miao Wang
Miao Wang Sept. 12, 2024, 8:17 a.m. UTC | #8
> 2024年9月12日 16:08,caiyinyu <caiyinyu@loongson.cn> 写道:
> 
> 
> 在 2024/9/12 上午2:00, Miao Wang 写道:
>> 
>>> 2024年9月12日 00:50,Miao Wang <shankerwangmiao@gmail.com> 写道:
>>> 
>>>> 2024年9月11日 17:40,caiyinyu <caiyinyu@loongson.cn> 写道:
>>>> 
>>>> In Linux 6.11, the fstat (80) and newfstatat (79) syscalls have been
>>>> reintroduced. The definitions of these two syscalls have already been
>>>> backported to version 6.10.6 in the stable tree.
>>>> 
>>>> In this patch, we are adding dynamically probed implementations of
>>>> fstat64 and fstatat64 specifically for syscalls 79 and 80. This ensures
>>>> compatibility while maintaining relatively good performance on kernels
>>>> that both support and do not support syscalls 79 and 80.
>>>> 
>>>> By running an experiment where we invoke fstat64 and fstatat64 100
>>>> million times, we gathered the following efficiency statistics:
>>>> 
>>>> 1. On kernels that support syscalls 79 and 80 (tested on version
>>>>  6.10.6), fstat64 and fstatat64 can directly invoke these syscalls
>>>>  [1]. The time overhead of our dynamic probing implementation
>>>>  increased by 0.5%-2.5% compared to directly calling the syscalls.
>>>> 2. On kernels that support syscalls 79 and 80 (tested on version
>>>>  6.10.6), our dynamically probed implementation reduces the time
>>>>  overhead by more than 60% compared to directly invoking the statx
>>>>  (291) syscall.
>>>> 3. On kernels that do not support syscalls 79 and 80 (tested on version
>>>>  6.8.0), fstat64 and fstatat64 fall back to using the statx (291)
>>>>  syscall (as before). In this case, the overhead of our dynamic
>>>>  probing implementation increased by 0.1%-1.3% compared to directly
>>>>  invoking statx.
>>> Hi, I tried to reproduce your test result, by invoking fstat(2) for 1M times,
>>> repeated by 100 times. The test was carried out on 3A6K, using 6.11-rc7 kernel.
>>> The result is:
>>> 
>>> fstat using statx(fd, "", AT_EMPTY_PATH) (current glibc implementation)
>>> mean: 210420528.100000(ns), sigma: 996145.440248(ns)
>>> 
>>> statx(fd, NULL, AT_EMPTY_PATH) (for comparison)
>>> mean: 199410620.600000(ns), sigma: 111561.012101(ns)
>>> 
>>> fstat using statx(fd, NULL, AT_EMPTY_PATH) (The implementation I proposed)
>>> mean: 208258640.700000(ns), sigma: 468451.704836(ns)
>>> 
>>> fstat using fstat(fd) (Your patch)
>>> mean: 192936673.800000(ns), sigma: 251927.136307(ns)
>>> 
>>> As we can see in the result, the implementation using fstat is 8.31% faster
>>> than the current implementation, instead of "reducing the time overhead by
>>> more than 60%".
>> I did another test on 6.10.7, using the current glibc implementation, i.e.
>> statx(fd, "", AT_EMPTY_PATH), and got the following result:
>> 
>> mean: 603344203.300000(ns), sigma: 715246.975336(ns)
>> 
>> If we use this as the baseline, we can get the following summary:
>> 
>> fstat(fd): 68.02% less time
>> statx(fd, NULL, AT_EMPTY_PATH): 65.48% less time
>> statx(fd, "", AT_EMPTY_PATH) (nothing is changed in glibc, only upgrade the
>>   kernel to 6.11): 65.12% less time
>> 
>> As a result, the performance gain is similar comparing using fstat(fd) and
>> statx(fd, NULL, AT_EMPTY_PATH).
>> 
>>> I prefer introducing dynamic probing of statx(fd, NULL, AT_EMPTY_PATH), which
>>> can benefit all 32-bit platforms relying on statx for 64-bit timestamps[2], as
>>> well as 64-bit loongarch, not only for performance, but also for seccomp
>>> sandboxing. Furthermore, by doing so, we can eliminate the need of maintaining
>>> our own copy of fstat in loongarch.
>> To conclude, the question would be whether it is worthy to have a separately
>> maintained fstat in loongarch for the 2.54% performance difference.
> 
> The key point here is that the dynamic probing solution can run directly on
> kernels without support for the 79 and 80 system calls (which is essential for
> us). However, after updating arch-syscall.h, the glibc compiled using Miao
> plan[1] cannot run on kernels that do not support the 79 and 80 system calls.

Why? In my patch, when the requested kernel compatibility version is below
6.10.6, __ASSUME_LOONGARCH_NEWSTAT will not be defined, and the definition
of the syscalls __NR_fstat and __NR_newfstatat will be undef-ed. So the
implementation will choose to use statx() in the compile time, and thus able to
run on previous kernels.

Cheers,

Miao Wang

> 
> 
>>> [2]: [PATCH v5] linux: Add linux statx(fd, NULL, AT_EMPTY_PATH) support
>>> https://sourceware.org/pipermail/libc-alpha/2024-August/159499.html
>>> 
>>> 
>>> Cheers,
>>> 
>>> Miao Wang
caiyinyu Sept. 12, 2024, 8:32 a.m. UTC | #9
在 2024/9/12 下午4:17, Miao Wang 写道:
>
>> 2024年9月12日 16:08,caiyinyu <caiyinyu@loongson.cn> 写道:
>>
>>
>> 在 2024/9/12 上午2:00, Miao Wang 写道:
>>>> 2024年9月12日 00:50,Miao Wang <shankerwangmiao@gmail.com> 写道:
>>>>
>>>>> 2024年9月11日 17:40,caiyinyu <caiyinyu@loongson.cn> 写道:
>>>>>
>>>>> In Linux 6.11, the fstat (80) and newfstatat (79) syscalls have been
>>>>> reintroduced. The definitions of these two syscalls have already been
>>>>> backported to version 6.10.6 in the stable tree.
>>>>>
>>>>> In this patch, we are adding dynamically probed implementations of
>>>>> fstat64 and fstatat64 specifically for syscalls 79 and 80. This ensures
>>>>> compatibility while maintaining relatively good performance on kernels
>>>>> that both support and do not support syscalls 79 and 80.
>>>>>
>>>>> By running an experiment where we invoke fstat64 and fstatat64 100
>>>>> million times, we gathered the following efficiency statistics:
>>>>>
>>>>> 1. On kernels that support syscalls 79 and 80 (tested on version
>>>>>   6.10.6), fstat64 and fstatat64 can directly invoke these syscalls
>>>>>   [1]. The time overhead of our dynamic probing implementation
>>>>>   increased by 0.5%-2.5% compared to directly calling the syscalls.
>>>>> 2. On kernels that support syscalls 79 and 80 (tested on version
>>>>>   6.10.6), our dynamically probed implementation reduces the time
>>>>>   overhead by more than 60% compared to directly invoking the statx
>>>>>   (291) syscall.
>>>>> 3. On kernels that do not support syscalls 79 and 80 (tested on version
>>>>>   6.8.0), fstat64 and fstatat64 fall back to using the statx (291)
>>>>>   syscall (as before). In this case, the overhead of our dynamic
>>>>>   probing implementation increased by 0.1%-1.3% compared to directly
>>>>>   invoking statx.
>>>> Hi, I tried to reproduce your test result, by invoking fstat(2) for 1M times,
>>>> repeated by 100 times. The test was carried out on 3A6K, using 6.11-rc7 kernel.
>>>> The result is:
>>>>
>>>> fstat using statx(fd, "", AT_EMPTY_PATH) (current glibc implementation)
>>>> mean: 210420528.100000(ns), sigma: 996145.440248(ns)
>>>>
>>>> statx(fd, NULL, AT_EMPTY_PATH) (for comparison)
>>>> mean: 199410620.600000(ns), sigma: 111561.012101(ns)
>>>>
>>>> fstat using statx(fd, NULL, AT_EMPTY_PATH) (The implementation I proposed)
>>>> mean: 208258640.700000(ns), sigma: 468451.704836(ns)
>>>>
>>>> fstat using fstat(fd) (Your patch)
>>>> mean: 192936673.800000(ns), sigma: 251927.136307(ns)
>>>>
>>>> As we can see in the result, the implementation using fstat is 8.31% faster
>>>> than the current implementation, instead of "reducing the time overhead by
>>>> more than 60%".
>>> I did another test on 6.10.7, using the current glibc implementation, i.e.
>>> statx(fd, "", AT_EMPTY_PATH), and got the following result:
>>>
>>> mean: 603344203.300000(ns), sigma: 715246.975336(ns)
>>>
>>> If we use this as the baseline, we can get the following summary:
>>>
>>> fstat(fd): 68.02% less time
>>> statx(fd, NULL, AT_EMPTY_PATH): 65.48% less time
>>> statx(fd, "", AT_EMPTY_PATH) (nothing is changed in glibc, only upgrade the
>>>    kernel to 6.11): 65.12% less time
>>>
>>> As a result, the performance gain is similar comparing using fstat(fd) and
>>> statx(fd, NULL, AT_EMPTY_PATH).
>>>
>>>> I prefer introducing dynamic probing of statx(fd, NULL, AT_EMPTY_PATH), which
>>>> can benefit all 32-bit platforms relying on statx for 64-bit timestamps[2], as
>>>> well as 64-bit loongarch, not only for performance, but also for seccomp
>>>> sandboxing. Furthermore, by doing so, we can eliminate the need of maintaining
>>>> our own copy of fstat in loongarch.
>>> To conclude, the question would be whether it is worthy to have a separately
>>> maintained fstat in loongarch for the 2.54% performance difference.
>> The key point here is that the dynamic probing solution can run directly on
>> kernels without support for the 79 and 80 system calls (which is essential for
>> us). However, after updating arch-syscall.h, the glibc compiled using Miao
>> plan[1] cannot run on kernels that do not support the 79 and 80 system calls.
> Why? In my patch, when the requested kernel compatibility version is below
> 6.10.6, __ASSUME_LOONGARCH_NEWSTAT will not be defined, and the definition
> of the syscalls __NR_fstat and __NR_newfstatat will be undef-ed. So the
> implementation will choose to use statx() in the compile time, and thus able to
> run on previous kernels.


When glibc is compiled on kernel versions >= 6.10.6 (with 
--enable-kernel=6.10.6), it uses the 79 and 80 system calls. These 
precompiled libraries cannot work on kernels that do not support the 79 
and 80 system calls, unless they are recompiled. Unfortunately, we have 
to handle this situation.

>
> Cheers,
>
> Miao Wang
>
>>
>>>> [2]: [PATCH v5] linux: Add linux statx(fd, NULL, AT_EMPTY_PATH) support
>>>> https://sourceware.org/pipermail/libc-alpha/2024-August/159499.html
>>>>
>>>>
>>>> Cheers,
>>>>
>>>> Miao Wang
Xi Ruoyao Sept. 12, 2024, 8:37 a.m. UTC | #10
On Thu, 2024-09-12 at 16:17 +0800, Miao Wang wrote:
> 
> 
> > 2024年9月12日 16:08,caiyinyu <caiyinyu@loongson.cn> 写道:
> > 
> > 
> > 在 2024/9/12 上午2:00, Miao Wang 写道:
> > > 
> > > > 2024年9月12日 00:50,Miao Wang <shankerwangmiao@gmail.com> 写道:
> > > > 
> > > > > 2024年9月11日 17:40,caiyinyu <caiyinyu@loongson.cn> 写道:
> > > > > 
> > > > > In Linux 6.11, the fstat (80) and newfstatat (79) syscalls have been
> > > > > reintroduced. The definitions of these two syscalls have already been
> > > > > backported to version 6.10.6 in the stable tree.
> > > > > 
> > > > > In this patch, we are adding dynamically probed implementations of
> > > > > fstat64 and fstatat64 specifically for syscalls 79 and 80. This ensures
> > > > > compatibility while maintaining relatively good performance on kernels
> > > > > that both support and do not support syscalls 79 and 80.
> > > > > 
> > > > > By running an experiment where we invoke fstat64 and fstatat64 100
> > > > > million times, we gathered the following efficiency statistics:
> > > > > 
> > > > > 1. On kernels that support syscalls 79 and 80 (tested on version
> > > > >  6.10.6), fstat64 and fstatat64 can directly invoke these syscalls
> > > > >  [1]. The time overhead of our dynamic probing implementation
> > > > >  increased by 0.5%-2.5% compared to directly calling the syscalls.
> > > > > 2. On kernels that support syscalls 79 and 80 (tested on version
> > > > >  6.10.6), our dynamically probed implementation reduces the time
> > > > >  overhead by more than 60% compared to directly invoking the statx
> > > > >  (291) syscall.
> > > > > 3. On kernels that do not support syscalls 79 and 80 (tested on version
> > > > >  6.8.0), fstat64 and fstatat64 fall back to using the statx (291)
> > > > >  syscall (as before). In this case, the overhead of our dynamic
> > > > >  probing implementation increased by 0.1%-1.3% compared to directly
> > > > >  invoking statx.
> > > > Hi, I tried to reproduce your test result, by invoking fstat(2) for 1M times,
> > > > repeated by 100 times. The test was carried out on 3A6K, using 6.11-rc7 kernel.
> > > > The result is:
> > > > 
> > > > fstat using statx(fd, "", AT_EMPTY_PATH) (current glibc implementation)
> > > > mean: 210420528.100000(ns), sigma: 996145.440248(ns)
> > > > 
> > > > statx(fd, NULL, AT_EMPTY_PATH) (for comparison)
> > > > mean: 199410620.600000(ns), sigma: 111561.012101(ns)
> > > > 
> > > > fstat using statx(fd, NULL, AT_EMPTY_PATH) (The implementation I proposed)
> > > > mean: 208258640.700000(ns), sigma: 468451.704836(ns)
> > > > 
> > > > fstat using fstat(fd) (Your patch)
> > > > mean: 192936673.800000(ns), sigma: 251927.136307(ns)
> > > > 
> > > > As we can see in the result, the implementation using fstat is 8.31% faster
> > > > than the current implementation, instead of "reducing the time overhead by
> > > > more than 60%".
> > > I did another test on 6.10.7, using the current glibc implementation, i.e.
> > > statx(fd, "", AT_EMPTY_PATH), and got the following result:
> > > 
> > > mean: 603344203.300000(ns), sigma: 715246.975336(ns)
> > > 
> > > If we use this as the baseline, we can get the following summary:
> > > 
> > > fstat(fd): 68.02% less time
> > > statx(fd, NULL, AT_EMPTY_PATH): 65.48% less time
> > > statx(fd, "", AT_EMPTY_PATH) (nothing is changed in glibc, only upgrade the
> > >   kernel to 6.11): 65.12% less time
> > > 
> > > As a result, the performance gain is similar comparing using fstat(fd) and
> > > statx(fd, NULL, AT_EMPTY_PATH).
> > > 
> > > > I prefer introducing dynamic probing of statx(fd, NULL, AT_EMPTY_PATH), which
> > > > can benefit all 32-bit platforms relying on statx for 64-bit timestamps[2], as
> > > > well as 64-bit loongarch, not only for performance, but also for seccomp
> > > > sandboxing. Furthermore, by doing so, we can eliminate the need of maintaining
> > > > our own copy of fstat in loongarch.
> > > To conclude, the question would be whether it is worthy to have a separately
> > > maintained fstat in loongarch for the 2.54% performance difference.
> > 
> > The key point here is that the dynamic probing solution can run directly on
> > kernels without support for the 79 and 80 system calls (which is essential for
> > us). However, after updating arch-syscall.h, the glibc compiled using Miao
> > plan[1] cannot run on kernels that do not support the 79 and 80 system calls.
> 
> Why? In my patch, when the requested kernel compatibility version is below
> 6.10.6, __ASSUME_LOONGARCH_NEWSTAT will not be defined, and the definition
> of the syscalls __NR_fstat and __NR_newfstatat will be undef-ed. So the
> implementation will choose to use statx() in the compile time, and thus able to
> run on previous kernels.

Yes this should be correct.

Let me summarize the situation here:

1. Using statx for fstat and fstatat was stupidly slow.
2. After kernel commit 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5, statx
is already about 40-50% faster.  No user space change is needed to get
the gain.  Unfortunately this commit isn't backported.
3. After kernel commit 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5, we can
use NULL instead of "" with AT_EMPTY_PATH.  Miao has already submitted a
patch for it.  And it will be about 10-20% faster than 2.
4. After kernel commit 7697a0fe0154468f5df35c23ebd7aa48994c2cdc, we can
use native fstat and newfstatat calls on LoongArch 64-bit.  It's only
2.54% faster than 3 (per Miao, I've not measured but it matches my gut
feeling).

So the pros of 4:
a. it works on 6.1.x, 6.6.y, and 6.10.z if [xyz] is large enough to get
the backport.
b. 2.54% faster.

The con of 4: maintenance burden.

To solve a. we can backport 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 to
6.1.x, 6.6.y, and 6.10.z.  Then to me b. is really not significant
enough to outweigh the maintenance burden.

So can we attempt to convince Greg KH to backport
0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 and use Miao's (simple) patch
in Glibc instead (and live with the 2.54% difference)?

If both of you can acknowledge the plan I'll write to Greg.
Xi Ruoyao Sept. 12, 2024, 8:38 a.m. UTC | #11
On Thu, 2024-09-12 at 16:32 +0800, caiyinyu wrote:
> 
> 
> When glibc is compiled on kernel versions >= 6.10.6 (with --enable-
> kernel=6.10.6), it uses the 79 and 80 system calls. These precompiled
> libraries cannot work on kernels that do not support the 79 and 80
> system calls, unless they are recompiled. Unfortunately, we have to
> handle this situation.

Then they shouldn't use --enable-kernel=6.10.6.

--enable-kernel has nothing to do with where Glibc is compiled.  The
default is just 5.19.0, no matter which kernel version is running when
compiling Glibc.

Unless you use --enable-kernel=current, but again they shouldn't use
this if they need compatibility.
Xi Ruoyao Sept. 12, 2024, 8:42 a.m. UTC | #12
On Thu, 2024-09-12 at 16:38 +0800, Xi Ruoyao wrote:
> On Thu, 2024-09-12 at 16:32 +0800, caiyinyu wrote:
> > 
> > 
> > When glibc is compiled on kernel versions >= 6.10.6 (with --enable-
> > kernel=6.10.6), it uses the 79 and 80 system calls. These
> > precompiled
> > libraries cannot work on kernels that do not support the 79 and 80
> > system calls, unless they are recompiled. Unfortunately, we have to
> > handle this situation.
> 
> Then they shouldn't use --enable-kernel=6.10.6.

i.e. If they are using some stupid thing like --enable-kernel="$(uname -
r)", tell them to fix the stupidity instead of altering the meaning of -
-enable-kernel.  In Glibc --enable-kernel=x.y.z just **means** you do
**NOT** care the compatibility with kernels < x.y.z.
 
> --enable-kernel has nothing to do with where Glibc is compiled.  The
> default is just 5.19.0, no matter which kernel version is running when
> compiling Glibc.
> 
> Unless you use --enable-kernel=current, but again they shouldn't use
> this if they need compatibility.
>
Miao Wang Sept. 12, 2024, 9:49 a.m. UTC | #13
> 2024年9月12日 16:37,Xi Ruoyao <xry111@xry111.site> 写道:
> 
> On Thu, 2024-09-12 at 16:17 +0800, Miao Wang wrote:
>> 
>> 
>>> 2024年9月12日 16:08,caiyinyu <caiyinyu@loongson.cn> 写道:
>>> 
>>> 
>>> 在 2024/9/12 上午2:00, Miao Wang 写道:
>>>> 
>>>>> 2024年9月12日 00:50,Miao Wang <shankerwangmiao@gmail.com> 写道:
>>>>> 
>>>>>> 2024年9月11日 17:40,caiyinyu <caiyinyu@loongson.cn> 写道:
>>>>>> 
>>>>>> In Linux 6.11, the fstat (80) and newfstatat (79) syscalls have been
>>>>>> reintroduced. The definitions of these two syscalls have already been
>>>>>> backported to version 6.10.6 in the stable tree.
>>>>>> 
>>>>>> In this patch, we are adding dynamically probed implementations of
>>>>>> fstat64 and fstatat64 specifically for syscalls 79 and 80. This ensures
>>>>>> compatibility while maintaining relatively good performance on kernels
>>>>>> that both support and do not support syscalls 79 and 80.
>>>>>> 
>>>>>> By running an experiment where we invoke fstat64 and fstatat64 100
>>>>>> million times, we gathered the following efficiency statistics:
>>>>>> 
>>>>>> 1. On kernels that support syscalls 79 and 80 (tested on version
>>>>>>  6.10.6), fstat64 and fstatat64 can directly invoke these syscalls
>>>>>>  [1]. The time overhead of our dynamic probing implementation
>>>>>>  increased by 0.5%-2.5% compared to directly calling the syscalls.
>>>>>> 2. On kernels that support syscalls 79 and 80 (tested on version
>>>>>>  6.10.6), our dynamically probed implementation reduces the time
>>>>>>  overhead by more than 60% compared to directly invoking the statx
>>>>>>  (291) syscall.
>>>>>> 3. On kernels that do not support syscalls 79 and 80 (tested on version
>>>>>>  6.8.0), fstat64 and fstatat64 fall back to using the statx (291)
>>>>>>  syscall (as before). In this case, the overhead of our dynamic
>>>>>>  probing implementation increased by 0.1%-1.3% compared to directly
>>>>>>  invoking statx.
>>>>> Hi, I tried to reproduce your test result, by invoking fstat(2) for 1M times,
>>>>> repeated by 100 times. The test was carried out on 3A6K, using 6.11-rc7 kernel.
>>>>> The result is:
>>>>> 
>>>>> fstat using statx(fd, "", AT_EMPTY_PATH) (current glibc implementation)
>>>>> mean: 210420528.100000(ns), sigma: 996145.440248(ns)
>>>>> 
>>>>> statx(fd, NULL, AT_EMPTY_PATH) (for comparison)
>>>>> mean: 199410620.600000(ns), sigma: 111561.012101(ns)
>>>>> 
>>>>> fstat using statx(fd, NULL, AT_EMPTY_PATH) (The implementation I proposed)
>>>>> mean: 208258640.700000(ns), sigma: 468451.704836(ns)
>>>>> 
>>>>> fstat using fstat(fd) (Your patch)
>>>>> mean: 192936673.800000(ns), sigma: 251927.136307(ns)
>>>>> 
>>>>> As we can see in the result, the implementation using fstat is 8.31% faster
>>>>> than the current implementation, instead of "reducing the time overhead by
>>>>> more than 60%".
>>>> I did another test on 6.10.7, using the current glibc implementation, i.e.
>>>> statx(fd, "", AT_EMPTY_PATH), and got the following result:
>>>> 
>>>> mean: 603344203.300000(ns), sigma: 715246.975336(ns)
>>>> 
>>>> If we use this as the baseline, we can get the following summary:
>>>> 
>>>> fstat(fd): 68.02% less time
>>>> statx(fd, NULL, AT_EMPTY_PATH): 65.48% less time
>>>> statx(fd, "", AT_EMPTY_PATH) (nothing is changed in glibc, only upgrade the
>>>>   kernel to 6.11): 65.12% less time
>>>> 
>>>> As a result, the performance gain is similar comparing using fstat(fd) and
>>>> statx(fd, NULL, AT_EMPTY_PATH).
>>>> 
>>>>> I prefer introducing dynamic probing of statx(fd, NULL, AT_EMPTY_PATH), which
>>>>> can benefit all 32-bit platforms relying on statx for 64-bit timestamps[2], as
>>>>> well as 64-bit loongarch, not only for performance, but also for seccomp
>>>>> sandboxing. Furthermore, by doing so, we can eliminate the need of maintaining
>>>>> our own copy of fstat in loongarch.
>>>> To conclude, the question would be whether it is worthy to have a separately
>>>> maintained fstat in loongarch for the 2.54% performance difference.
>>> 
>>> The key point here is that the dynamic probing solution can run directly on
>>> kernels without support for the 79 and 80 system calls (which is essential for
>>> us). However, after updating arch-syscall.h, the glibc compiled using Miao
>>> plan[1] cannot run on kernels that do not support the 79 and 80 system calls.
>> 
>> Why? In my patch, when the requested kernel compatibility version is below
>> 6.10.6, __ASSUME_LOONGARCH_NEWSTAT will not be defined, and the definition
>> of the syscalls __NR_fstat and __NR_newfstatat will be undef-ed. So the
>> implementation will choose to use statx() in the compile time, and thus able to
>> run on previous kernels.
> 
> Yes this should be correct.
> 
> Let me summarize the situation here:
> 
> 1. Using statx for fstat and fstatat was stupidly slow.
> 2. After kernel commit 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5, statx
> is already about 40-50% faster.  No user space change is needed to get
> the gain.  Unfortunately this commit isn't backported.
> 3. After kernel commit 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5, we can
> use NULL instead of "" with AT_EMPTY_PATH.  Miao has already submitted a
> patch for it.  And it will be about 10-20% faster than 2.
> 4. After kernel commit 7697a0fe0154468f5df35c23ebd7aa48994c2cdc, we can
> use native fstat and newfstatat calls on LoongArch 64-bit.  It's only
> 2.54% faster than 3 (per Miao, I've not measured but it matches my gut
> feeling).
> 
> So the pros of 4:
> a. it works on 6.1.x, 6.6.y, and 6.10.z if [xyz] is large enough to get
> the backport.
> b. 2.54% faster.
> 
> The con of 4: maintenance burden.
> 
> To solve a. we can backport 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 to
> 6.1.x, 6.6.y, and 6.10.z.  Then to me b. is really not significant
> enough to outweigh the maintenance burden.
> 
> So can we attempt to convince Greg KH to backport
> 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 and use Miao's (simple) patch
> in Glibc instead (and live with the 2.54% difference)?

The actual patches needed to be backported are:

1bc6d4452d5c("fs: new helper vfs_empty_path()")
27a2d0cb2f38("stat: use vfs_empty_path() helper") (Not necessary, but for
  cleaner code)
0ef625bba6fb("vfs: support statx(..., NULL, AT_EMPTY_PATH, ...)")

Also, I suppose that this will also be beneficial for other 32-bit arches,
and as a result, I'm in favor of this backport.

Cheers,

Miao Wang

> 
> If both of you can acknowledge the plan I'll write to Greg.
> 
> -- 
> Xi Ruoyao <xry111@xry111.site>
> School of Aerospace Science and Technology, Xidian University
caiyinyu Sept. 12, 2024, 9:55 a.m. UTC | #14
I understand your point, but we cannot guarantee that downstream vendors 
will
clearly and correctly use --enable-kernel=6.10.6 to build packages, 
especially
when there are both kernel packages that support and do not support the 
79 and
80 system calls, as well as glibc packages that use and do not use the 
79 and
80 system calls both in the repo. We need the dynamic probing 
implementation as a transition.

在 2024/9/12 下午4:37, Xi Ruoyao 写道:
> On Thu, 2024-09-12 at 16:17 +0800, Miao Wang wrote:
>>
>>> 2024年9月12日 16:08,caiyinyu <caiyinyu@loongson.cn> 写道:
>>>
>>>
>>> 在 2024/9/12 上午2:00, Miao Wang 写道:
>>>>> 2024年9月12日 00:50,Miao Wang <shankerwangmiao@gmail.com> 写道:
>>>>>
>>>>>> 2024年9月11日 17:40,caiyinyu <caiyinyu@loongson.cn> 写道:
>>>>>>
>>>>>> In Linux 6.11, the fstat (80) and newfstatat (79) syscalls have been
>>>>>> reintroduced. The definitions of these two syscalls have already been
>>>>>> backported to version 6.10.6 in the stable tree.
>>>>>>
>>>>>> In this patch, we are adding dynamically probed implementations of
>>>>>> fstat64 and fstatat64 specifically for syscalls 79 and 80. This ensures
>>>>>> compatibility while maintaining relatively good performance on kernels
>>>>>> that both support and do not support syscalls 79 and 80.
>>>>>>
>>>>>> By running an experiment where we invoke fstat64 and fstatat64 100
>>>>>> million times, we gathered the following efficiency statistics:
>>>>>>
>>>>>> 1. On kernels that support syscalls 79 and 80 (tested on version
>>>>>>   6.10.6), fstat64 and fstatat64 can directly invoke these syscalls
>>>>>>   [1]. The time overhead of our dynamic probing implementation
>>>>>>   increased by 0.5%-2.5% compared to directly calling the syscalls.
>>>>>> 2. On kernels that support syscalls 79 and 80 (tested on version
>>>>>>   6.10.6), our dynamically probed implementation reduces the time
>>>>>>   overhead by more than 60% compared to directly invoking the statx
>>>>>>   (291) syscall.
>>>>>> 3. On kernels that do not support syscalls 79 and 80 (tested on version
>>>>>>   6.8.0), fstat64 and fstatat64 fall back to using the statx (291)
>>>>>>   syscall (as before). In this case, the overhead of our dynamic
>>>>>>   probing implementation increased by 0.1%-1.3% compared to directly
>>>>>>   invoking statx.
>>>>> Hi, I tried to reproduce your test result, by invoking fstat(2) for 1M times,
>>>>> repeated by 100 times. The test was carried out on 3A6K, using 6.11-rc7 kernel.
>>>>> The result is:
>>>>>
>>>>> fstat using statx(fd, "", AT_EMPTY_PATH) (current glibc implementation)
>>>>> mean: 210420528.100000(ns), sigma: 996145.440248(ns)
>>>>>
>>>>> statx(fd, NULL, AT_EMPTY_PATH) (for comparison)
>>>>> mean: 199410620.600000(ns), sigma: 111561.012101(ns)
>>>>>
>>>>> fstat using statx(fd, NULL, AT_EMPTY_PATH) (The implementation I proposed)
>>>>> mean: 208258640.700000(ns), sigma: 468451.704836(ns)
>>>>>
>>>>> fstat using fstat(fd) (Your patch)
>>>>> mean: 192936673.800000(ns), sigma: 251927.136307(ns)
>>>>>
>>>>> As we can see in the result, the implementation using fstat is 8.31% faster
>>>>> than the current implementation, instead of "reducing the time overhead by
>>>>> more than 60%".
>>>> I did another test on 6.10.7, using the current glibc implementation, i.e.
>>>> statx(fd, "", AT_EMPTY_PATH), and got the following result:
>>>>
>>>> mean: 603344203.300000(ns), sigma: 715246.975336(ns)
>>>>
>>>> If we use this as the baseline, we can get the following summary:
>>>>
>>>> fstat(fd): 68.02% less time
>>>> statx(fd, NULL, AT_EMPTY_PATH): 65.48% less time
>>>> statx(fd, "", AT_EMPTY_PATH) (nothing is changed in glibc, only upgrade the
>>>>    kernel to 6.11): 65.12% less time
>>>>
>>>> As a result, the performance gain is similar comparing using fstat(fd) and
>>>> statx(fd, NULL, AT_EMPTY_PATH).
>>>>
>>>>> I prefer introducing dynamic probing of statx(fd, NULL, AT_EMPTY_PATH), which
>>>>> can benefit all 32-bit platforms relying on statx for 64-bit timestamps[2], as
>>>>> well as 64-bit loongarch, not only for performance, but also for seccomp
>>>>> sandboxing. Furthermore, by doing so, we can eliminate the need of maintaining
>>>>> our own copy of fstat in loongarch.
>>>> To conclude, the question would be whether it is worthy to have a separately
>>>> maintained fstat in loongarch for the 2.54% performance difference.
>>> The key point here is that the dynamic probing solution can run directly on
>>> kernels without support for the 79 and 80 system calls (which is essential for
>>> us). However, after updating arch-syscall.h, the glibc compiled using Miao
>>> plan[1] cannot run on kernels that do not support the 79 and 80 system calls.
>> Why? In my patch, when the requested kernel compatibility version is below
>> 6.10.6, __ASSUME_LOONGARCH_NEWSTAT will not be defined, and the definition
>> of the syscalls __NR_fstat and __NR_newfstatat will be undef-ed. So the
>> implementation will choose to use statx() in the compile time, and thus able to
>> run on previous kernels.
> Yes this should be correct.
>
> Let me summarize the situation here:
>
> 1. Using statx for fstat and fstatat was stupidly slow.
> 2. After kernel commit 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5, statx
> is already about 40-50% faster.  No user space change is needed to get
> the gain.  Unfortunately this commit isn't backported.
> 3. After kernel commit 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5, we can
> use NULL instead of "" with AT_EMPTY_PATH.  Miao has already submitted a
> patch for it.  And it will be about 10-20% faster than 2.
> 4. After kernel commit 7697a0fe0154468f5df35c23ebd7aa48994c2cdc, we can
> use native fstat and newfstatat calls on LoongArch 64-bit.  It's only
> 2.54% faster than 3 (per Miao, I've not measured but it matches my gut
> feeling).
>
> So the pros of 4:
> a. it works on 6.1.x, 6.6.y, and 6.10.z if [xyz] is large enough to get
> the backport.
> b. 2.54% faster.
>
> The con of 4: maintenance burden.
>
> To solve a. we can backport 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 to
> 6.1.x, 6.6.y, and 6.10.z.  Then to me b. is really not significant
> enough to outweigh the maintenance burden.
>
> So can we attempt to convince Greg KH to backport
> 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 and use Miao's (simple) patch
> in Glibc instead (and live with the 2.54% difference)?
>
> If both of you can acknowledge the plan I'll write to Greg.
>
Xi Ruoyao Sept. 12, 2024, 10:52 a.m. UTC | #15
On Thu, 2024-09-12 at 17:55 +0800, caiyinyu wrote:
> I understand your point, but we cannot guarantee that downstream vendors 
> will
> clearly and correctly use --enable-kernel=6.10.6 to build packages, 
> especially
> when there are both kernel packages that support and do not support the 
> 79 and
> 80 system calls, as well as glibc packages that use and do not use the
> 79 and
> 80 system calls both in the repo. We need the dynamic probing 
> implementation as a transition.

Then they should build Glibc with --enable-
kernel=$(THE_MINIMAL_KERNEL_VERSION_THEY_PROVIDE_A_PACKAGE).  That's why
distros like Ubuntu is still using --enable-kernel=3.2 today for
x86_64...


And I guess I failed to explain the situation.  They don't need --
enable-kernel=6.11 to get the gain of the kernel commit
0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5.

To get a ~50% gain, they just need to upgrade the kernel to a version
with the commit 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5.  They don't
even need to rebuild Glibc: the optimize is solely in the kernel.

And if they want other ~10%, they can apply
https://sourceware.org/pipermail/libc-alpha/2024-August/159499.html to
Glibc.  This patch has runtime detection, so they still don't need to
use --enable-kernel.

So if we can backport 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 to 6.1,
6.6, and 6.10, the performance difference between the patched statx and
fstat/newfstatat won't be 60%, but only 3%.  So do we really want
hundreds lines of code for a 3% improvement?

I.e. if we cannot backport 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 I'm
OK with your proposal for a 60% improvement, but if we can backport
0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 the proposal will only create a
3% improvement and IMHO it won't be worthy.

(Also if the distros don't want to upgrade the kernel to 6.11, they
likely don't want to upgrade their Glibc to 2.41 anyway.  So IMO it's a
good idea to backport 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 then they
can simply get a ~50% improvement by upgrading the kernel to the latest
6.1 or 6.6 LTS point release, w/o upgrading Glibc).

> 在 2024/9/12 下午4:37, Xi Ruoyao 写道:
> > On Thu, 2024-09-12 at 16:17 +0800, Miao Wang wrote:
> > > 
> > > > 2024年9月12日 16:08,caiyinyu <caiyinyu@loongson.cn> 写道:
> > > > 
> > > > 
> > > > 在 2024/9/12 上午2:00, Miao Wang 写道:
> > > > > > 2024年9月12日 00:50,Miao Wang <shankerwangmiao@gmail.com> 写道:
> > > > > > 
> > > > > > > 2024年9月11日 17:40,caiyinyu <caiyinyu@loongson.cn> 写道:
> > > > > > > 
> > > > > > > In Linux 6.11, the fstat (80) and newfstatat (79) syscalls have been
> > > > > > > reintroduced. The definitions of these two syscalls have already been
> > > > > > > backported to version 6.10.6 in the stable tree.
> > > > > > > 
> > > > > > > In this patch, we are adding dynamically probed implementations of
> > > > > > > fstat64 and fstatat64 specifically for syscalls 79 and 80. This ensures
> > > > > > > compatibility while maintaining relatively good performance on kernels
> > > > > > > that both support and do not support syscalls 79 and 80.
> > > > > > > 
> > > > > > > By running an experiment where we invoke fstat64 and fstatat64 100
> > > > > > > million times, we gathered the following efficiency statistics:
> > > > > > > 
> > > > > > > 1. On kernels that support syscalls 79 and 80 (tested on version
> > > > > > >   6.10.6), fstat64 and fstatat64 can directly invoke these syscalls
> > > > > > >   [1]. The time overhead of our dynamic probing implementation
> > > > > > >   increased by 0.5%-2.5% compared to directly calling the syscalls.
> > > > > > > 2. On kernels that support syscalls 79 and 80 (tested on version
> > > > > > >   6.10.6), our dynamically probed implementation reduces the time
> > > > > > >   overhead by more than 60% compared to directly invoking the statx
> > > > > > >   (291) syscall.
> > > > > > > 3. On kernels that do not support syscalls 79 and 80 (tested on version
> > > > > > >   6.8.0), fstat64 and fstatat64 fall back to using the statx (291)
> > > > > > >   syscall (as before). In this case, the overhead of our dynamic
> > > > > > >   probing implementation increased by 0.1%-1.3% compared to directly
> > > > > > >   invoking statx.
> > > > > > Hi, I tried to reproduce your test result, by invoking fstat(2) for 1M times,
> > > > > > repeated by 100 times. The test was carried out on 3A6K, using 6.11-rc7 kernel.
> > > > > > The result is:
> > > > > > 
> > > > > > fstat using statx(fd, "", AT_EMPTY_PATH) (current glibc implementation)
> > > > > > mean: 210420528.100000(ns), sigma: 996145.440248(ns)
> > > > > > 
> > > > > > statx(fd, NULL, AT_EMPTY_PATH) (for comparison)
> > > > > > mean: 199410620.600000(ns), sigma: 111561.012101(ns)
> > > > > > 
> > > > > > fstat using statx(fd, NULL, AT_EMPTY_PATH) (The implementation I proposed)
> > > > > > mean: 208258640.700000(ns), sigma: 468451.704836(ns)
> > > > > > 
> > > > > > fstat using fstat(fd) (Your patch)
> > > > > > mean: 192936673.800000(ns), sigma: 251927.136307(ns)
> > > > > > 
> > > > > > As we can see in the result, the implementation using fstat is 8.31% faster
> > > > > > than the current implementation, instead of "reducing the time overhead by
> > > > > > more than 60%".
> > > > > I did another test on 6.10.7, using the current glibc implementation, i.e.
> > > > > statx(fd, "", AT_EMPTY_PATH), and got the following result:
> > > > > 
> > > > > mean: 603344203.300000(ns), sigma: 715246.975336(ns)
> > > > > 
> > > > > If we use this as the baseline, we can get the following summary:
> > > > > 
> > > > > fstat(fd): 68.02% less time
> > > > > statx(fd, NULL, AT_EMPTY_PATH): 65.48% less time
> > > > > statx(fd, "", AT_EMPTY_PATH) (nothing is changed in glibc, only upgrade the
> > > > >    kernel to 6.11): 65.12% less time
> > > > > 
> > > > > As a result, the performance gain is similar comparing using fstat(fd) and
> > > > > statx(fd, NULL, AT_EMPTY_PATH).
> > > > > 
> > > > > > I prefer introducing dynamic probing of statx(fd, NULL, AT_EMPTY_PATH), which
> > > > > > can benefit all 32-bit platforms relying on statx for 64-bit timestamps[2], as
> > > > > > well as 64-bit loongarch, not only for performance, but also for seccomp
> > > > > > sandboxing. Furthermore, by doing so, we can eliminate the need of maintaining
> > > > > > our own copy of fstat in loongarch.
> > > > > To conclude, the question would be whether it is worthy to have a separately
> > > > > maintained fstat in loongarch for the 2.54% performance difference.
> > > > The key point here is that the dynamic probing solution can run directly on
> > > > kernels without support for the 79 and 80 system calls (which is essential for
> > > > us). However, after updating arch-syscall.h, the glibc compiled using Miao
> > > > plan[1] cannot run on kernels that do not support the 79 and 80 system calls.
> > > Why? In my patch, when the requested kernel compatibility version is below
> > > 6.10.6, __ASSUME_LOONGARCH_NEWSTAT will not be defined, and the definition
> > > of the syscalls __NR_fstat and __NR_newfstatat will be undef-ed. So the
> > > implementation will choose to use statx() in the compile time, and thus able to
> > > run on previous kernels.
> > Yes this should be correct.
> > 
> > Let me summarize the situation here:
> > 
> > 1. Using statx for fstat and fstatat was stupidly slow.
> > 2. After kernel commit 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5, statx
> > is already about 40-50% faster.  No user space change is needed to get
> > the gain.  Unfortunately this commit isn't backported.
> > 3. After kernel commit 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5, we can
> > use NULL instead of "" with AT_EMPTY_PATH.  Miao has already submitted a
> > patch for it.  And it will be about 10-20% faster than 2.
> > 4. After kernel commit 7697a0fe0154468f5df35c23ebd7aa48994c2cdc, we can
> > use native fstat and newfstatat calls on LoongArch 64-bit.  It's only
> > 2.54% faster than 3 (per Miao, I've not measured but it matches my gut
> > feeling).
> > 
> > So the pros of 4:
> > a. it works on 6.1.x, 6.6.y, and 6.10.z if [xyz] is large enough to get
> > the backport.
> > b. 2.54% faster.
> > 
> > The con of 4: maintenance burden.
> > 
> > To solve a. we can backport 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 to
> > 6.1.x, 6.6.y, and 6.10.z.  Then to me b. is really not significant
> > enough to outweigh the maintenance burden.
> > 
> > So can we attempt to convince Greg KH to backport
> > 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 and use Miao's (simple) patch
> > in Glibc instead (and live with the 2.54% difference)?
> > 
> > If both of you can acknowledge the plan I'll write to Greg.
> > 
>
caiyinyu Sept. 12, 2024, 12:45 p.m. UTC | #16
在 2024/9/12 下午6:52, Xi Ruoyao 写道:
> On Thu, 2024-09-12 at 17:55 +0800, caiyinyu wrote:
>> I understand your point, but we cannot guarantee that downstream vendors
>> will
>> clearly and correctly use --enable-kernel=6.10.6 to build packages,
>> especially
>> when there are both kernel packages that support and do not support the
>> 79 and
>> 80 system calls, as well as glibc packages that use and do not use the
>> 79 and
>> 80 system calls both in the repo. We need the dynamic probing
>> implementation as a transition.
> Then they should build Glibc with --enable-
> kernel=$(THE_MINIMAL_KERNEL_VERSION_THEY_PROVIDE_A_PACKAGE).  That's why
> distros like Ubuntu is still using --enable-kernel=3.2 today for
> x86_64...


This really puts us in a difficult position.
I don't oppose the idea of "Then they should build Glibc with
--enable-kernel=$(THE_MINIMAL_KERNEL_VERSION_THEY_PROVIDE_A_PACKAGE)." 
However,
as I mentioned before, we can't guarantee that downstream vendors will 
always
compile it correctly in this way, so I still stand by my approach.

I've submitted a new patch (v3) that fixes several issues:

1. Removed !defined(__loongarch_lp64) since we currently only have a 64-bit
implementation.
2. __LINUX_KERNEL_VERSION is now used to determine whether to
apply the dynamic probing solution or the statx-only solution. This way,
regardless of how the user configures it, the compiled glibc will be 
able to
run on all kernels.

v1 links: 
https://sourceware.org/pipermail/libc-alpha/2024-September/159862.html
v2 links: 
https://sourceware.org/pipermail/libc-alpha/2024-September/159889.html

>
> And I guess I failed to explain the situation.  They don't need --
> enable-kernel=6.11 to get the gain of the kernel commit
> 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5.
>
> To get a ~50% gain, they just need to upgrade the kernel to a version
> with the commit 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5.  They don't
> even need to rebuild Glibc: the optimize is solely in the kernel.
>
> And if they want other ~10%, they can apply
> https://sourceware.org/pipermail/libc-alpha/2024-August/159499.html to
> Glibc.  This patch has runtime detection, so they still don't need to
> use --enable-kernel.
>
> So if we can backport 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 to 6.1,
> 6.6, and 6.10, the performance difference between the patched statx and
> fstat/newfstatat won't be 60%, but only 3%.  So do we really want
> hundreds lines of code for a 3% improvement?
>
> I.e. if we cannot backport 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 I'm
> OK with your proposal for a 60% improvement, but if we can backport
> 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 the proposal will only create a
> 3% improvement and IMHO it won't be worthy.
>
> (Also if the distros don't want to upgrade the kernel to 6.11, they
> likely don't want to upgrade their Glibc to 2.41 anyway.  So IMO it's a
> good idea to backport 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 then they
> can simply get a ~50% improvement by upgrading the kernel to the latest
> 6.1 or 6.6 LTS point release, w/o upgrading Glibc).
>
>> 在 2024/9/12 下午4:37, Xi Ruoyao 写道:
>>> On Thu, 2024-09-12 at 16:17 +0800, Miao Wang wrote:
>>>>> 2024年9月12日 16:08,caiyinyu <caiyinyu@loongson.cn> 写道:
>>>>>
>>>>>
>>>>> 在 2024/9/12 上午2:00, Miao Wang 写道:
>>>>>>> 2024年9月12日 00:50,Miao Wang <shankerwangmiao@gmail.com> 写道:
>>>>>>>
>>>>>>>> 2024年9月11日 17:40,caiyinyu <caiyinyu@loongson.cn> 写道:
>>>>>>>>
>>>>>>>> In Linux 6.11, the fstat (80) and newfstatat (79) syscalls have been
>>>>>>>> reintroduced. The definitions of these two syscalls have already been
>>>>>>>> backported to version 6.10.6 in the stable tree.
>>>>>>>>
>>>>>>>> In this patch, we are adding dynamically probed implementations of
>>>>>>>> fstat64 and fstatat64 specifically for syscalls 79 and 80. This ensures
>>>>>>>> compatibility while maintaining relatively good performance on kernels
>>>>>>>> that both support and do not support syscalls 79 and 80.
>>>>>>>>
>>>>>>>> By running an experiment where we invoke fstat64 and fstatat64 100
>>>>>>>> million times, we gathered the following efficiency statistics:
>>>>>>>>
>>>>>>>> 1. On kernels that support syscalls 79 and 80 (tested on version
>>>>>>>>    6.10.6), fstat64 and fstatat64 can directly invoke these syscalls
>>>>>>>>    [1]. The time overhead of our dynamic probing implementation
>>>>>>>>    increased by 0.5%-2.5% compared to directly calling the syscalls.
>>>>>>>> 2. On kernels that support syscalls 79 and 80 (tested on version
>>>>>>>>    6.10.6), our dynamically probed implementation reduces the time
>>>>>>>>    overhead by more than 60% compared to directly invoking the statx
>>>>>>>>    (291) syscall.
>>>>>>>> 3. On kernels that do not support syscalls 79 and 80 (tested on version
>>>>>>>>    6.8.0), fstat64 and fstatat64 fall back to using the statx (291)
>>>>>>>>    syscall (as before). In this case, the overhead of our dynamic
>>>>>>>>    probing implementation increased by 0.1%-1.3% compared to directly
>>>>>>>>    invoking statx.
>>>>>>> Hi, I tried to reproduce your test result, by invoking fstat(2) for 1M times,
>>>>>>> repeated by 100 times. The test was carried out on 3A6K, using 6.11-rc7 kernel.
>>>>>>> The result is:
>>>>>>>
>>>>>>> fstat using statx(fd, "", AT_EMPTY_PATH) (current glibc implementation)
>>>>>>> mean: 210420528.100000(ns), sigma: 996145.440248(ns)
>>>>>>>
>>>>>>> statx(fd, NULL, AT_EMPTY_PATH) (for comparison)
>>>>>>> mean: 199410620.600000(ns), sigma: 111561.012101(ns)
>>>>>>>
>>>>>>> fstat using statx(fd, NULL, AT_EMPTY_PATH) (The implementation I proposed)
>>>>>>> mean: 208258640.700000(ns), sigma: 468451.704836(ns)
>>>>>>>
>>>>>>> fstat using fstat(fd) (Your patch)
>>>>>>> mean: 192936673.800000(ns), sigma: 251927.136307(ns)
>>>>>>>
>>>>>>> As we can see in the result, the implementation using fstat is 8.31% faster
>>>>>>> than the current implementation, instead of "reducing the time overhead by
>>>>>>> more than 60%".
>>>>>> I did another test on 6.10.7, using the current glibc implementation, i.e.
>>>>>> statx(fd, "", AT_EMPTY_PATH), and got the following result:
>>>>>>
>>>>>> mean: 603344203.300000(ns), sigma: 715246.975336(ns)
>>>>>>
>>>>>> If we use this as the baseline, we can get the following summary:
>>>>>>
>>>>>> fstat(fd): 68.02% less time
>>>>>> statx(fd, NULL, AT_EMPTY_PATH): 65.48% less time
>>>>>> statx(fd, "", AT_EMPTY_PATH) (nothing is changed in glibc, only upgrade the
>>>>>>     kernel to 6.11): 65.12% less time
>>>>>>
>>>>>> As a result, the performance gain is similar comparing using fstat(fd) and
>>>>>> statx(fd, NULL, AT_EMPTY_PATH).
>>>>>>
>>>>>>> I prefer introducing dynamic probing of statx(fd, NULL, AT_EMPTY_PATH), which
>>>>>>> can benefit all 32-bit platforms relying on statx for 64-bit timestamps[2], as
>>>>>>> well as 64-bit loongarch, not only for performance, but also for seccomp
>>>>>>> sandboxing. Furthermore, by doing so, we can eliminate the need of maintaining
>>>>>>> our own copy of fstat in loongarch.
>>>>>> To conclude, the question would be whether it is worthy to have a separately
>>>>>> maintained fstat in loongarch for the 2.54% performance difference.
>>>>> The key point here is that the dynamic probing solution can run directly on
>>>>> kernels without support for the 79 and 80 system calls (which is essential for
>>>>> us). However, after updating arch-syscall.h, the glibc compiled using Miao
>>>>> plan[1] cannot run on kernels that do not support the 79 and 80 system calls.
>>>> Why? In my patch, when the requested kernel compatibility version is below
>>>> 6.10.6, __ASSUME_LOONGARCH_NEWSTAT will not be defined, and the definition
>>>> of the syscalls __NR_fstat and __NR_newfstatat will be undef-ed. So the
>>>> implementation will choose to use statx() in the compile time, and thus able to
>>>> run on previous kernels.
>>> Yes this should be correct.
>>>
>>> Let me summarize the situation here:
>>>
>>> 1. Using statx for fstat and fstatat was stupidly slow.
>>> 2. After kernel commit 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5, statx
>>> is already about 40-50% faster.  No user space change is needed to get
>>> the gain.  Unfortunately this commit isn't backported.
>>> 3. After kernel commit 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5, we can
>>> use NULL instead of "" with AT_EMPTY_PATH.  Miao has already submitted a
>>> patch for it.  And it will be about 10-20% faster than 2.
>>> 4. After kernel commit 7697a0fe0154468f5df35c23ebd7aa48994c2cdc, we can
>>> use native fstat and newfstatat calls on LoongArch 64-bit.  It's only
>>> 2.54% faster than 3 (per Miao, I've not measured but it matches my gut
>>> feeling).
>>>
>>> So the pros of 4:
>>> a. it works on 6.1.x, 6.6.y, and 6.10.z if [xyz] is large enough to get
>>> the backport.
>>> b. 2.54% faster.
>>>
>>> The con of 4: maintenance burden.
>>>
>>> To solve a. we can backport 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 to
>>> 6.1.x, 6.6.y, and 6.10.z.  Then to me b. is really not significant
>>> enough to outweigh the maintenance burden.
>>>
>>> So can we attempt to convince Greg KH to backport
>>> 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 and use Miao's (simple) patch
>>> in Glibc instead (and live with the 2.54% difference)?
>>>
>>> If both of you can acknowledge the plan I'll write to Greg.
>>>
caiyinyu Sept. 12, 2024, 12:46 p.m. UTC | #17
v3 links 
https://sourceware.org/pipermail/libc-alpha/2024-September/159902.html

在 2024/9/12 下午8:45, caiyinyu 写道:
>
> 在 2024/9/12 下午6:52, Xi Ruoyao 写道:
>> On Thu, 2024-09-12 at 17:55 +0800, caiyinyu wrote:
>>> I understand your point, but we cannot guarantee that downstream 
>>> vendors
>>> will
>>> clearly and correctly use --enable-kernel=6.10.6 to build packages,
>>> especially
>>> when there are both kernel packages that support and do not support the
>>> 79 and
>>> 80 system calls, as well as glibc packages that use and do not use the
>>> 79 and
>>> 80 system calls both in the repo. We need the dynamic probing
>>> implementation as a transition.
>> Then they should build Glibc with --enable-
>> kernel=$(THE_MINIMAL_KERNEL_VERSION_THEY_PROVIDE_A_PACKAGE). That's why
>> distros like Ubuntu is still using --enable-kernel=3.2 today for
>> x86_64...
>
>
> This really puts us in a difficult position.
> I don't oppose the idea of "Then they should build Glibc with
> --enable-kernel=$(THE_MINIMAL_KERNEL_VERSION_THEY_PROVIDE_A_PACKAGE)." 
> However,
> as I mentioned before, we can't guarantee that downstream vendors will 
> always
> compile it correctly in this way, so I still stand by my approach.
>
> I've submitted a new patch (v3) that fixes several issues:
>
> 1. Removed !defined(__loongarch_lp64) since we currently only have a 
> 64-bit
> implementation.
> 2. __LINUX_KERNEL_VERSION is now used to determine whether to
> apply the dynamic probing solution or the statx-only solution. This way,
> regardless of how the user configures it, the compiled glibc will be 
> able to
> run on all kernels.
>
> v1 links: 
> https://sourceware.org/pipermail/libc-alpha/2024-September/159862.html
> v2 links: 
> https://sourceware.org/pipermail/libc-alpha/2024-September/159889.html
>
>>
>> And I guess I failed to explain the situation.  They don't need --
>> enable-kernel=6.11 to get the gain of the kernel commit
>> 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5.
>>
>> To get a ~50% gain, they just need to upgrade the kernel to a version
>> with the commit 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5.  They don't
>> even need to rebuild Glibc: the optimize is solely in the kernel.
>>
>> And if they want other ~10%, they can apply
>> https://sourceware.org/pipermail/libc-alpha/2024-August/159499.html to
>> Glibc.  This patch has runtime detection, so they still don't need to
>> use --enable-kernel.
>>
>> So if we can backport 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 to 6.1,
>> 6.6, and 6.10, the performance difference between the patched statx and
>> fstat/newfstatat won't be 60%, but only 3%.  So do we really want
>> hundreds lines of code for a 3% improvement?
>>
>> I.e. if we cannot backport 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 I'm
>> OK with your proposal for a 60% improvement, but if we can backport
>> 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 the proposal will only create a
>> 3% improvement and IMHO it won't be worthy.
>>
>> (Also if the distros don't want to upgrade the kernel to 6.11, they
>> likely don't want to upgrade their Glibc to 2.41 anyway.  So IMO it's a
>> good idea to backport 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 then they
>> can simply get a ~50% improvement by upgrading the kernel to the latest
>> 6.1 or 6.6 LTS point release, w/o upgrading Glibc).
>>
>>> 在 2024/9/12 下午4:37, Xi Ruoyao 写道:
>>>> On Thu, 2024-09-12 at 16:17 +0800, Miao Wang wrote:
>>>>>> 2024年9月12日 16:08,caiyinyu <caiyinyu@loongson.cn> 写道:
>>>>>>
>>>>>>
>>>>>> 在 2024/9/12 上午2:00, Miao Wang 写道:
>>>>>>>> 2024年9月12日 00:50,Miao Wang <shankerwangmiao@gmail.com> 写道:
>>>>>>>>
>>>>>>>>> 2024年9月11日 17:40,caiyinyu <caiyinyu@loongson.cn> 写道:
>>>>>>>>>
>>>>>>>>> In Linux 6.11, the fstat (80) and newfstatat (79) syscalls 
>>>>>>>>> have been
>>>>>>>>> reintroduced. The definitions of these two syscalls have 
>>>>>>>>> already been
>>>>>>>>> backported to version 6.10.6 in the stable tree.
>>>>>>>>>
>>>>>>>>> In this patch, we are adding dynamically probed 
>>>>>>>>> implementations of
>>>>>>>>> fstat64 and fstatat64 specifically for syscalls 79 and 80. 
>>>>>>>>> This ensures
>>>>>>>>> compatibility while maintaining relatively good performance on 
>>>>>>>>> kernels
>>>>>>>>> that both support and do not support syscalls 79 and 80.
>>>>>>>>>
>>>>>>>>> By running an experiment where we invoke fstat64 and fstatat64 
>>>>>>>>> 100
>>>>>>>>> million times, we gathered the following efficiency statistics:
>>>>>>>>>
>>>>>>>>> 1. On kernels that support syscalls 79 and 80 (tested on version
>>>>>>>>>    6.10.6), fstat64 and fstatat64 can directly invoke these 
>>>>>>>>> syscalls
>>>>>>>>>    [1]. The time overhead of our dynamic probing implementation
>>>>>>>>>    increased by 0.5%-2.5% compared to directly calling the 
>>>>>>>>> syscalls.
>>>>>>>>> 2. On kernels that support syscalls 79 and 80 (tested on version
>>>>>>>>>    6.10.6), our dynamically probed implementation reduces the 
>>>>>>>>> time
>>>>>>>>>    overhead by more than 60% compared to directly invoking the 
>>>>>>>>> statx
>>>>>>>>>    (291) syscall.
>>>>>>>>> 3. On kernels that do not support syscalls 79 and 80 (tested 
>>>>>>>>> on version
>>>>>>>>>    6.8.0), fstat64 and fstatat64 fall back to using the statx 
>>>>>>>>> (291)
>>>>>>>>>    syscall (as before). In this case, the overhead of our dynamic
>>>>>>>>>    probing implementation increased by 0.1%-1.3% compared to 
>>>>>>>>> directly
>>>>>>>>>    invoking statx.
>>>>>>>> Hi, I tried to reproduce your test result, by invoking fstat(2) 
>>>>>>>> for 1M times,
>>>>>>>> repeated by 100 times. The test was carried out on 3A6K, using 
>>>>>>>> 6.11-rc7 kernel.
>>>>>>>> The result is:
>>>>>>>>
>>>>>>>> fstat using statx(fd, "", AT_EMPTY_PATH) (current glibc 
>>>>>>>> implementation)
>>>>>>>> mean: 210420528.100000(ns), sigma: 996145.440248(ns)
>>>>>>>>
>>>>>>>> statx(fd, NULL, AT_EMPTY_PATH) (for comparison)
>>>>>>>> mean: 199410620.600000(ns), sigma: 111561.012101(ns)
>>>>>>>>
>>>>>>>> fstat using statx(fd, NULL, AT_EMPTY_PATH) (The implementation 
>>>>>>>> I proposed)
>>>>>>>> mean: 208258640.700000(ns), sigma: 468451.704836(ns)
>>>>>>>>
>>>>>>>> fstat using fstat(fd) (Your patch)
>>>>>>>> mean: 192936673.800000(ns), sigma: 251927.136307(ns)
>>>>>>>>
>>>>>>>> As we can see in the result, the implementation using fstat is 
>>>>>>>> 8.31% faster
>>>>>>>> than the current implementation, instead of "reducing the time 
>>>>>>>> overhead by
>>>>>>>> more than 60%".
>>>>>>> I did another test on 6.10.7, using the current glibc 
>>>>>>> implementation, i.e.
>>>>>>> statx(fd, "", AT_EMPTY_PATH), and got the following result:
>>>>>>>
>>>>>>> mean: 603344203.300000(ns), sigma: 715246.975336(ns)
>>>>>>>
>>>>>>> If we use this as the baseline, we can get the following summary:
>>>>>>>
>>>>>>> fstat(fd): 68.02% less time
>>>>>>> statx(fd, NULL, AT_EMPTY_PATH): 65.48% less time
>>>>>>> statx(fd, "", AT_EMPTY_PATH) (nothing is changed in glibc, only 
>>>>>>> upgrade the
>>>>>>>     kernel to 6.11): 65.12% less time
>>>>>>>
>>>>>>> As a result, the performance gain is similar comparing using 
>>>>>>> fstat(fd) and
>>>>>>> statx(fd, NULL, AT_EMPTY_PATH).
>>>>>>>
>>>>>>>> I prefer introducing dynamic probing of statx(fd, NULL, 
>>>>>>>> AT_EMPTY_PATH), which
>>>>>>>> can benefit all 32-bit platforms relying on statx for 64-bit 
>>>>>>>> timestamps[2], as
>>>>>>>> well as 64-bit loongarch, not only for performance, but also 
>>>>>>>> for seccomp
>>>>>>>> sandboxing. Furthermore, by doing so, we can eliminate the need 
>>>>>>>> of maintaining
>>>>>>>> our own copy of fstat in loongarch.
>>>>>>> To conclude, the question would be whether it is worthy to have 
>>>>>>> a separately
>>>>>>> maintained fstat in loongarch for the 2.54% performance difference.
>>>>>> The key point here is that the dynamic probing solution can run 
>>>>>> directly on
>>>>>> kernels without support for the 79 and 80 system calls (which is 
>>>>>> essential for
>>>>>> us). However, after updating arch-syscall.h, the glibc compiled 
>>>>>> using Miao
>>>>>> plan[1] cannot run on kernels that do not support the 79 and 80 
>>>>>> system calls.
>>>>> Why? In my patch, when the requested kernel compatibility version 
>>>>> is below
>>>>> 6.10.6, __ASSUME_LOONGARCH_NEWSTAT will not be defined, and the 
>>>>> definition
>>>>> of the syscalls __NR_fstat and __NR_newfstatat will be undef-ed. 
>>>>> So the
>>>>> implementation will choose to use statx() in the compile time, and 
>>>>> thus able to
>>>>> run on previous kernels.
>>>> Yes this should be correct.
>>>>
>>>> Let me summarize the situation here:
>>>>
>>>> 1. Using statx for fstat and fstatat was stupidly slow.
>>>> 2. After kernel commit 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5, statx
>>>> is already about 40-50% faster.  No user space change is needed to get
>>>> the gain.  Unfortunately this commit isn't backported.
>>>> 3. After kernel commit 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5, we 
>>>> can
>>>> use NULL instead of "" with AT_EMPTY_PATH.  Miao has already 
>>>> submitted a
>>>> patch for it.  And it will be about 10-20% faster than 2.
>>>> 4. After kernel commit 7697a0fe0154468f5df35c23ebd7aa48994c2cdc, we 
>>>> can
>>>> use native fstat and newfstatat calls on LoongArch 64-bit. It's only
>>>> 2.54% faster than 3 (per Miao, I've not measured but it matches my gut
>>>> feeling).
>>>>
>>>> So the pros of 4:
>>>> a. it works on 6.1.x, 6.6.y, and 6.10.z if [xyz] is large enough to 
>>>> get
>>>> the backport.
>>>> b. 2.54% faster.
>>>>
>>>> The con of 4: maintenance burden.
>>>>
>>>> To solve a. we can backport 
>>>> 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 to
>>>> 6.1.x, 6.6.y, and 6.10.z.  Then to me b. is really not significant
>>>> enough to outweigh the maintenance burden.
>>>>
>>>> So can we attempt to convince Greg KH to backport
>>>> 0ef625bba6fb2bc0c8ed2aab9524fdf423f67dd5 and use Miao's (simple) patch
>>>> in Glibc instead (and live with the 2.54% difference)?
>>>>
>>>> If both of you can acknowledge the plan I'll write to Greg.
>>>>
Xi Ruoyao Sept. 12, 2024, 12:55 p.m. UTC | #18
On Thu, 2024-09-12 at 20:45 +0800, caiyinyu wrote:
> This really puts us in a difficult position.
> I don't oppose the idea of "Then they should build Glibc with
> --enable-kernel=$(THE_MINIMAL_KERNEL_VERSION_THEY_PROVIDE_A_PACKAGE)."
> However,
> as I mentioned before, we can't guarantee that downstream vendors will
> always
> compile it correctly in this way, so I still stand by my approach.

Then they are doing wrong things.  Why they have to specify a version
higher than their kernel?  This does not make any sense to me.

> I've submitted a new patch (v3) that fixes several issues:
> 
> 1. Removed !defined(__loongarch_lp64) since we currently only have a 64-bit
> implementation.
> 2. __LINUX_KERNEL_VERSION is now used to determine whether to
> apply the dynamic probing solution or the statx-only solution. This way,
> regardless of how the user configures it, the compiled glibc will be 
> able to
> run on all kernels.

No it's not true.  If you configure it with --enable-kernel=6.10.6, the
*generic* code will already omit the runtime checks for older kernels
and it will fail to work anyway.

This is how --enable-kernel is defined in Glibc.  You are altering its
meaning thus this is not a port-specific change anyway.  I'm requesting
the general maintainers to either NAK this patch or globally change the
meaning of --enable-kernel (which isn't likely to happen).
Xi Ruoyao Sept. 12, 2024, 1:09 p.m. UTC | #19
On Thu, 2024-09-12 at 20:55 +0800, Xi Ruoyao wrote:
> On Thu, 2024-09-12 at 20:45 +0800, caiyinyu wrote:
> > This really puts us in a difficult position.
> > I don't oppose the idea of "Then they should build Glibc with
> > --enable-kernel=$(THE_MINIMAL_KERNEL_VERSION_THEY_PROVIDE_A_PACKAGE)."
> > However,
> > as I mentioned before, we can't guarantee that downstream vendors will
> > always
> > compile it correctly in this way, so I still stand by my approach.
> 
> Then they are doing wrong things.  Why they have to specify a version
> higher than their kernel?  This does not make any sense to me.
> 
> > I've submitted a new patch (v3) that fixes several issues:
> > 
> > 1. Removed !defined(__loongarch_lp64) since we currently only have a 64-bit
> > implementation.
> > 2. __LINUX_KERNEL_VERSION is now used to determine whether to
> > apply the dynamic probing solution or the statx-only solution. This way,
> > regardless of how the user configures it, the compiled glibc will be
> > able to
> > run on all kernels.
> 
> No it's not true.  If you configure it with --enable-kernel=6.10.6, the
> *generic* code will already omit the runtime checks for older kernels
> and it will fail to work anyway.
> 
> This is how --enable-kernel is defined in Glibc.  You are altering its
> meaning thus this is not a port-specific change anyway.  I'm requesting
> the general maintainers to either NAK this patch or globally change the
> meaning of --enable-kernel (which isn't likely to happen).
> 

BTW it's always "correct things shouldn't make compromise for broken
things" in development.  Otherwise we cannot make any progress.  For
example if we say "not all people knows undefined behaviors in C and
avoids them correctly, so let's not break their code," everyone would be
still using GCC 4.2 today.

They really need to either improve, or resign from their job if they
cannot maintain a distro correctly.  Instead of changing the meaning of
a well-established Glibc configure option.

If you just push this with your port-maintainer privilege I'll send a
patch to avoid checking with --enable-kernel=6.10.6 or later and request
a direct review from global maintainers.  And I have 99.99% confidence
they'll agree with me.

I don't want to be so aggressive but I've had enough of all this stat
mess.
caiyinyu Sept. 13, 2024, 2:11 a.m. UTC | #20
First of all, we will not force a patch into the mainline while ignoring the
community's opinions (on the contrary, we highly value community feedback).
This goes against our original intention of contributing to the community.

Secondly, let's forget about dynamic probing. How about just using statx,
regardless of whether the kernel supports syscalls 79 and 80, just like 
we did
before?
patch like:
diff --git a/sysdeps/unix/sysv/linux/loongarch/sysdep.h 
b/sysdeps/unix/sysv/linux/loongarch/sysdep.h
index eb0ba790da..7c95a039c8 100644
--- a/sysdeps/unix/sysv/linux/loongarch/sysdep.h
+++ b/sysdeps/unix/sysv/linux/loongarch/sysdep.h
@@ -109,6 +109,9 @@
  #undef SYS_ify
  #define SYS_ify(syscall_name) __NR_##syscall_name

+#undef __NR_fstat
+#undef __NR_newfstatat
+
  #ifndef __ASSEMBLER__

  #define VDSO_NAME "LINUX_5.10"

Lastly, regarding "No it's not true.  If you configure it with
--enable-kernel=6.10.6, the *generic* code will already omit the runtime 
checks
for older kernels and it will fail to work anyway.

This is how --enable-kernel is defined in Glibc.  You are altering its 
meaning
thus this is not a port-specific change anyway.  I'm requesting the general
maintainers to either NAK this patch or globally change the meaning of
--enable-kernel (which isn't likely to happen)."

*just for correction*, is the following the correct way?

diff --git a/sysdeps/unix/sysv/linux/loongarch/kernel-features.h 
b/sysdeps/unix/sysv/linux/loongarch/kernel-features.h
new file mode 100644
index 0000000000..8e2927c501
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/loongarch/kernel-features.h
@@ -0,0 +1,27 @@
...
+
+#include_next <kernel-features.h>
+
+#define __ASSUME_LOONGARCH_NEWSTAT 1
+
+/* No support for fstat or newfstatat before 6.10.6.  */
+#if __LINUX_KERNEL_VERSION < 0x060a06
+# undef __ASSUME_LOONGARCH_NEWSTAT
+#endif
...

diff --git a/sysdeps/unix/sysv/linux/loongarch/sysdep.h 
b/sysdeps/unix/sysv/linux/loongarch/sysdep.h
index eb0ba790da..1fdf18197f 100644
--- a/sysdeps/unix/sysv/linux/loongarch/sysdep.h
+++ b/sysdeps/unix/sysv/linux/loongarch/sysdep.h
@@ -109,6 +109,11 @@
  #undef SYS_ify
  #define SYS_ify(syscall_name) __NR_##syscall_name

+#ifndef __ASSUME_LOONGARCH_NEWSTAT
+#undef __NR_fstat
+#undef __NR_newfstatat
+#endif
+
  #ifndef __ASSEMBLER__

  diff in fstat64.c
  ...
+
+#ifdef __NR_fstat
+static int __fstat_syscall_supported = 1;
+#endif
+
...

diff in fstatat64.c
...
+#ifdef __NR_newfstatat
+static int __newfstatat_syscall_supported = 1;
+#endif
+
...


在 2024/9/12 下午9:09, Xi Ruoyao 写道:
> On Thu, 2024-09-12 at 20:55 +0800, Xi Ruoyao wrote:
>> On Thu, 2024-09-12 at 20:45 +0800, caiyinyu wrote:
>>> This really puts us in a difficult position.
>>> I don't oppose the idea of "Then they should build Glibc with
>>> --enable-kernel=$(THE_MINIMAL_KERNEL_VERSION_THEY_PROVIDE_A_PACKAGE)."
>>> However,
>>> as I mentioned before, we can't guarantee that downstream vendors will
>>> always
>>> compile it correctly in this way, so I still stand by my approach.
>> Then they are doing wrong things.  Why they have to specify a version
>> higher than their kernel?  This does not make any sense to me.
>>
>>> I've submitted a new patch (v3) that fixes several issues:
>>>
>>> 1. Removed !defined(__loongarch_lp64) since we currently only have a 64-bit
>>> implementation.
>>> 2. __LINUX_KERNEL_VERSION is now used to determine whether to
>>> apply the dynamic probing solution or the statx-only solution. This way,
>>> regardless of how the user configures it, the compiled glibc will be
>>> able to
>>> run on all kernels.
>> No it's not true.  If you configure it with --enable-kernel=6.10.6, the
>> *generic* code will already omit the runtime checks for older kernels
>> and it will fail to work anyway.
>>
>> This is how --enable-kernel is defined in Glibc.  You are altering its
>> meaning thus this is not a port-specific change anyway.  I'm requesting
>> the general maintainers to either NAK this patch or globally change the
>> meaning of --enable-kernel (which isn't likely to happen).
>>
> BTW it's always "correct things shouldn't make compromise for broken
> things" in development.  Otherwise we cannot make any progress.  For
> example if we say "not all people knows undefined behaviors in C and
> avoids them correctly, so let's not break their code," everyone would be
> still using GCC 4.2 today.
>
> They really need to either improve, or resign from their job if they
> cannot maintain a distro correctly.  Instead of changing the meaning of
> a well-established Glibc configure option.
>
> If you just push this with your port-maintainer privilege I'll send a
> patch to avoid checking with --enable-kernel=6.10.6 or later and request
> a direct review from global maintainers.  And I have 99.99% confidence
> they'll agree with me.
>
> I don't want to be so aggressive but I've had enough of all this stat
> mess.
>
Xi Ruoyao Sept. 13, 2024, 6:44 a.m. UTC | #21
On Fri, 2024-09-13 at 10:11 +0800, caiyinyu wrote:
> First of all, we will not force a patch into the mainline while ignoring the
> community's opinions (on the contrary, we highly value community feedback).
> This goes against our original intention of contributing to the community.
> 
> Secondly, let's forget about dynamic probing. How about just using statx,
> regardless of whether the kernel supports syscalls 79 and 80, just like 
> we did
> before?

I'm OK with it.  Huacai also told me he likes this approach.

> patch like:
> diff --git a/sysdeps/unix/sysv/linux/loongarch/sysdep.h 
> b/sysdeps/unix/sysv/linux/loongarch/sysdep.h
> index eb0ba790da..7c95a039c8 100644
> --- a/sysdeps/unix/sysv/linux/loongarch/sysdep.h
> +++ b/sysdeps/unix/sysv/linux/loongarch/sysdep.h
> @@ -109,6 +109,9 @@
>   #undef SYS_ify
>   #define SYS_ify(syscall_name) __NR_##syscall_name
> 
> +#undef __NR_fstat
> +#undef __NR_newfstatat
> +
>   #ifndef __ASSEMBLER__
> 
>   #define VDSO_NAME "LINUX_5.10"
> 
> Lastly, regarding "No it's not true.  If you configure it with
> --enable-kernel=6.10.6, the *generic* code will already omit the runtime 
> checks
> for older kernels and it will fail to work anyway.
> 
> This is how --enable-kernel is defined in Glibc.  You are altering its
> meaning
> thus this is not a port-specific change anyway.  I'm requesting the general
> maintainers to either NAK this patch or globally change the meaning of
> --enable-kernel (which isn't likely to happen)."
> 
> *just for correction*, is the following the correct way?
> 
> diff --git a/sysdeps/unix/sysv/linux/loongarch/kernel-features.h 
> b/sysdeps/unix/sysv/linux/loongarch/kernel-features.h
> new file mode 100644
> index 0000000000..8e2927c501
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/loongarch/kernel-features.h
> @@ -0,0 +1,27 @@
> ...
> +
> +#include_next <kernel-features.h>
> +
> +#define __ASSUME_LOONGARCH_NEWSTAT 1
> +
> +/* No support for fstat or newfstatat before 6.10.6.  */
> +#if __LINUX_KERNEL_VERSION < 0x060a06
> +# undef __ASSUME_LOONGARCH_NEWSTAT
> +#endif
> ...
> 
> diff --git a/sysdeps/unix/sysv/linux/loongarch/sysdep.h 
> b/sysdeps/unix/sysv/linux/loongarch/sysdep.h
> index eb0ba790da..1fdf18197f 100644
> --- a/sysdeps/unix/sysv/linux/loongarch/sysdep.h
> +++ b/sysdeps/unix/sysv/linux/loongarch/sysdep.h
> @@ -109,6 +109,11 @@
>   #undef SYS_ify
>   #define SYS_ify(syscall_name) __NR_##syscall_name
> 
> +#ifndef __ASSUME_LOONGARCH_NEWSTAT
> +#undef __NR_fstat
> +#undef __NR_newfstatat
> +#endif
> +
>   #ifndef __ASSEMBLER__
> 
>   diff in fstat64.c
>   ...
> +
> +#ifdef __NR_fstat
> +static int __fstat_syscall_supported = 1;

If we don't need dynamic probe we don't need the variable?  Or am I
missing something here?

> +#endif
> +
> ...
> 
> diff in fstatat64.c
> ...
> +#ifdef __NR_newfstatat
> +static int __newfstatat_syscall_supported = 1;
> +#endif
> +
> ...
Miao Wang Sept. 13, 2024, 8:25 a.m. UTC | #22
> 2024年9月13日 14:44,Xi Ruoyao <xry111@xry111.site> 写道:
> 
> On Fri, 2024-09-13 at 10:11 +0800, caiyinyu wrote:
>> First of all, we will not force a patch into the mainline while ignoring the
>> community's opinions (on the contrary, we highly value community feedback).
>> This goes against our original intention of contributing to the community.
>> 
>> Secondly, let's forget about dynamic probing. How about just using statx,
>> regardless of whether the kernel supports syscalls 79 and 80, just like 
>> we did
>> before?
> 
> I'm OK with it.  Huacai also told me he likes this approach.

I'm selling my previous proposal again. My consideration is that introducing
dynamic probing for fstat is theoretically correct for better performance
utilizing available kernel features. However, introducing this will hugely
add maintenance burden. As a result, I recommend against dynamic probing for
fstat.

However, I don't think it is good to totally ignore an available kernel feature,
since fstat is slightly faster than statx(..., NULL, AT_EMPTY_PATH). To make a
compromise, we may choose to use fstat and newfstatat statically when the
declared supported kernel version is above 6.10.6, which was implemented in my
previous patch in [1], with minor code changes and leaving most of current
common implementation of fstat unchanged.

Considering the current performance issue, on the one hand, the kernel improves
its performance hugely when calling statx(..., "", AT_EMPTY_PATH), no changes
applied to glibc; on the other hand, glibc can also try to use statx(..., NULL,
AT_EMPTY_PATH), for a even better performance and easier sandboxing.

Cheers,

Miao Wang

> 
>> patch like:
>> diff --git a/sysdeps/unix/sysv/linux/loongarch/sysdep.h 
>> b/sysdeps/unix/sysv/linux/loongarch/sysdep.h
>> index eb0ba790da..7c95a039c8 100644
>> --- a/sysdeps/unix/sysv/linux/loongarch/sysdep.h
>> +++ b/sysdeps/unix/sysv/linux/loongarch/sysdep.h
>> @@ -109,6 +109,9 @@
>>   #undef SYS_ify
>>   #define SYS_ify(syscall_name) __NR_##syscall_name
>> 
>> +#undef __NR_fstat
>> +#undef __NR_newfstatat
>> +
>>   #ifndef __ASSEMBLER__
>> 
>>   #define VDSO_NAME "LINUX_5.10"
>> 
>> Lastly, regarding "No it's not true.  If you configure it with
>> --enable-kernel=6.10.6, the *generic* code will already omit the runtime 
>> checks
>> for older kernels and it will fail to work anyway.
>> 
>> This is how --enable-kernel is defined in Glibc.  You are altering its
>> meaning
>> thus this is not a port-specific change anyway.  I'm requesting the general
>> maintainers to either NAK this patch or globally change the meaning of
>> --enable-kernel (which isn't likely to happen)."
>> 
>> *just for correction*, is the following the correct way?
>> 
>> diff --git a/sysdeps/unix/sysv/linux/loongarch/kernel-features.h 
>> b/sysdeps/unix/sysv/linux/loongarch/kernel-features.h
>> new file mode 100644
>> index 0000000000..8e2927c501
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/loongarch/kernel-features.h
>> @@ -0,0 +1,27 @@
>> ...
>> +
>> +#include_next <kernel-features.h>
>> +
>> +#define __ASSUME_LOONGARCH_NEWSTAT 1
>> +
>> +/* No support for fstat or newfstatat before 6.10.6.  */
>> +#if __LINUX_KERNEL_VERSION < 0x060a06
>> +# undef __ASSUME_LOONGARCH_NEWSTAT
>> +#endif
>> ...
>> 
>> diff --git a/sysdeps/unix/sysv/linux/loongarch/sysdep.h 
>> b/sysdeps/unix/sysv/linux/loongarch/sysdep.h
>> index eb0ba790da..1fdf18197f 100644
>> --- a/sysdeps/unix/sysv/linux/loongarch/sysdep.h
>> +++ b/sysdeps/unix/sysv/linux/loongarch/sysdep.h
>> @@ -109,6 +109,11 @@
>>   #undef SYS_ify
>>   #define SYS_ify(syscall_name) __NR_##syscall_name
>> 
>> +#ifndef __ASSUME_LOONGARCH_NEWSTAT
>> +#undef __NR_fstat
>> +#undef __NR_newfstatat
>> +#endif
>> +
>>   #ifndef __ASSEMBLER__
>> 
>>   diff in fstat64.c
>>   ...
>> +
>> +#ifdef __NR_fstat
>> +static int __fstat_syscall_supported = 1;
> 
> If we don't need dynamic probe we don't need the variable?  Or am I
> missing something here?
> 
>> +#endif
>> +
>> ...
>> 
>> diff in fstatat64.c
>> ...
>> +#ifdef __NR_newfstatat
>> +static int __newfstatat_syscall_supported = 1;
>> +#endif
>> +
>> ...
> 
> -- 
> Xi Ruoyao <xry111@xry111.site>
> School of Aerospace Science and Technology, Xidian University
Xi Ruoyao Sept. 13, 2024, 9:04 a.m. UTC | #23
On Fri, 2024-09-13 at 16:25 +0800, Miao Wang wrote:
> 
> 
> > 2024年9月13日 14:44,Xi Ruoyao <xry111@xry111.site> 写道:
> > 
> > On Fri, 2024-09-13 at 10:11 +0800, caiyinyu wrote:
> > > First of all, we will not force a patch into the mainline while ignoring the
> > > community's opinions (on the contrary, we highly value community feedback).
> > > This goes against our original intention of contributing to the community.
> > > 
> > > Secondly, let's forget about dynamic probing. How about just using statx,
> > > regardless of whether the kernel supports syscalls 79 and 80, just like 
> > > we did
> > > before?
> > 
> > I'm OK with it.  Huacai also told me he likes this approach.
> 
> I'm selling my previous proposal again. My consideration is that introducing
> dynamic probing for fstat is theoretically correct for better performance
> utilizing available kernel features. However, introducing this will hugely
> add maintenance burden. As a result, I recommend against dynamic probing for
> fstat.
> 
> However, I don't think it is good to totally ignore an available kernel feature,
> since fstat is slightly faster than statx(..., NULL, AT_EMPTY_PATH). To make a
> compromise, we may choose to use fstat and newfstatat statically when the
> declared supported kernel version is above 6.10.6, which was implemented in my
> previous patch in [1], with minor code changes and leaving most of current
> common implementation of fstat unchanged.

I don't have a strong preference to these 3 proposals:

1. Just undef the two syscalls.
2. Undef the two syscalls iff. --enable-kernel < 6.10.6.
3. Do a dynamic probe iff. --enable-kernel < 6.10.6.

I just cannot accept misinterpreting --enable-kernel.

> Considering the current performance issue, on the one hand, the kernel improves
> its performance hugely when calling statx(..., "", AT_EMPTY_PATH), no changes
> applied to glibc; on the other hand, glibc can also try to use statx(..., NULL,
> AT_EMPTY_PATH), for a even better performance and easier sandboxing.

The statx change would be a global change and it's orthogonal with any
of [1-3].  Even if not considering LoongArch, the use of NULL in statx
still helps all 32-bit platforms.

Thus we can apply the statx change and one of [1-3] w/o conflict.
caiyinyu Sept. 14, 2024, 7:36 a.m. UTC | #24
在 2024/9/13 下午5:04, Xi Ruoyao 写道:
> On Fri, 2024-09-13 at 16:25 +0800, Miao Wang wrote:
>>
>>> 2024年9月13日 14:44,Xi Ruoyao <xry111@xry111.site> 写道:
>>>
>>> On Fri, 2024-09-13 at 10:11 +0800, caiyinyu wrote:
>>>> First of all, we will not force a patch into the mainline while ignoring the
>>>> community's opinions (on the contrary, we highly value community feedback).
>>>> This goes against our original intention of contributing to the community.
>>>>
>>>> Secondly, let's forget about dynamic probing. How about just using statx,
>>>> regardless of whether the kernel supports syscalls 79 and 80, just like
>>>> we did
>>>> before?
>>> I'm OK with it.  Huacai also told me he likes this approach.

OK, thanks for the community's prompt feedback. We will go with statx only.
Below is the new submitted patch, and I will merge it once the global 
maintainer updates arch-syscall.h.

links: 
https://sourceware.org/pipermail/libc-alpha/2024-September/159932.html

commit aaa18c8109e127c4e28b231e5e99380216a38de9 (HEAD -> master)
Author: caiyinyu <caiyinyu@loongson.cn>
Date:   Sat Sep 14 14:10:10 2024 +0800

     LoongArch: Undef __NR_fstat and __NR_newfstatat.

     In Linux 6.11, fstat and newfstatat are added back. To avoid the messy
     usage of the fstat, newfstatat, and statx system calls, we will 
continue
     using statx only in glibc, maintaining consistency with previous 
versions of
     the LoongArch-specific glibc implementation.

     Signed-off-by: caiyinyu <caiyinyu@loongson.cn>
     Reviewed-by: Xi Ruoyao <xry111@xry111.site>
     Reviewed-by: Miao Wang <shankerwangmiao@gmail.com>

diff --git a/sysdeps/unix/sysv/linux/loongarch/sysdep.h 
b/sysdeps/unix/sysv/linux/loongarch/sysdep.h
index eb0ba790da..8a6c33de38 100644
--- a/sysdeps/unix/sysv/linux/loongarch/sysdep.h
+++ b/sysdeps/unix/sysv/linux/loongarch/sysdep.h
@@ -109,6 +109,11 @@
  #undef SYS_ify
  #define SYS_ify(syscall_name) __NR_##syscall_name

+/* To avoid the messy usage of the fstat, newfstatat, and statx system 
calls, we
+only use statx.  */
+#undef __NR_fstat
+#undef __NR_newfstatat
+
  #ifndef __ASSEMBLER__

  #define VDSO_NAME "LINUX_5.10"


>> I'm selling my previous proposal again. My consideration is that introducing
>> dynamic probing for fstat is theoretically correct for better performance
>> utilizing available kernel features. However, introducing this will hugely
>> add maintenance burden. As a result, I recommend against dynamic probing for
>> fstat.
>>
>> However, I don't think it is good to totally ignore an available kernel feature,
>> since fstat is slightly faster than statx(..., NULL, AT_EMPTY_PATH). To make a
>> compromise, we may choose to use fstat and newfstatat statically when the
>> declared supported kernel version is above 6.10.6, which was implemented in my
>> previous patch in [1], with minor code changes and leaving most of current
>> common implementation of fstat unchanged.
> I don't have a strong preference to these 3 proposals:
>
> 1. Just undef the two syscalls.
> 2. Undef the two syscalls iff. --enable-kernel < 6.10.6.
> 3. Do a dynamic probe iff. --enable-kernel < 6.10.6.
>
> I just cannot accept misinterpreting --enable-kernel.
>
>> Considering the current performance issue, on the one hand, the kernel improves
>> its performance hugely when calling statx(..., "", AT_EMPTY_PATH), no changes
>> applied to glibc; on the other hand, glibc can also try to use statx(..., NULL,
>> AT_EMPTY_PATH), for a even better performance and easier sandboxing.
> The statx change would be a global change and it's orthogonal with any
> of [1-3].  Even if not considering LoongArch, the use of NULL in statx
> still helps all 32-bit platforms.
>
> Thus we can apply the statx change and one of [1-3] w/o conflict.
>
Xi Ruoyao Sept. 14, 2024, 1:33 p.m. UTC | #25
On Sat, 2024-09-14 at 15:36 +0800, caiyinyu wrote:
> > > > > Secondly, let's forget about dynamic probing. How about just using statx,
> > > > > regardless of whether the kernel supports syscalls 79 and 80, just like
> > > > > we did
> > > > > before?
> > > > I'm OK with it.  Huacai also told me he likes this approach.
> 
> OK, thanks for the community's prompt feedback. We will go with statx only.
> Below is the new submitted patch, and I will merge it once the global 
> maintainer updates arch-syscall.h.
> 
> links: 
> https://sourceware.org/pipermail/libc-alpha/2024-September/159932.html

Ok.

But still I'd suggest you to issue a bulletin or advisory to your
downstream about how to use --enable-kernel correctly, if you know some
of them are using it incorrectly.

Or there build will finally be broken when a generic change depending on
--enable-kernel happens, sooner or later.
diff mbox series

Patch

diff --git a/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h b/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
index 8bb82448a7..7e732256fd 100644
--- a/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
@@ -59,6 +59,7 @@ 
 #define __NR_fsmount 432
 #define __NR_fsopen 430
 #define __NR_fspick 433
+#define __NR_fstat 80
 #define __NR_fstatfs 44
 #define __NR_fsync 82
 #define __NR_ftruncate 46
@@ -166,6 +167,7 @@ 
 #define __NR_munmap 215
 #define __NR_name_to_handle_at 264
 #define __NR_nanosleep 101
+#define __NR_newfstatat 79
 #define __NR_nfsservctl 42
 #define __NR_open_by_handle_at 265
 #define __NR_open_tree 428
diff --git a/sysdeps/unix/sysv/linux/loongarch/fstat64.c b/sysdeps/unix/sysv/linux/loongarch/fstat64.c
new file mode 100644
index 0000000000..b1a9e9c6a4
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/loongarch/fstat64.c
@@ -0,0 +1,64 @@ 
+/* Get file status.  LoongArch version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define __fstat __redirect___fstat
+#define fstat   __redirect_fstat
+#include <sys/stat.h>
+#undef __fstat
+#undef fstat
+
+#include "fstatat64_time64_statx.h"
+
+int __fstat_syscall_supported attribute_hidden = 1;
+
+int
+__fstat64 (int fd, struct __stat64_t64 *buf)
+{
+  int r;
+  extern int __fstat_syscall_supported attribute_hidden;
+  int supported = atomic_load_relaxed (&__fstat_syscall_supported);
+  if (__glibc_likely (supported))
+  {
+    r = INTERNAL_SYSCALL_CALL (fstat, fd, buf);
+    if (r == 0)
+      return r;
+    else if (r != -ENOSYS)
+      return INLINE_SYSCALL_ERROR_RETURN_VALUE (-r);
+
+    atomic_store_relaxed (&__fstat_syscall_supported, 0);
+  }
+
+  if (fd < 0)
+  {
+    __set_errno (EBADF);
+    return -1;
+  }
+
+  r = fstatat64_time64_statx (fd, "", buf, AT_EMPTY_PATH);
+  return INTERNAL_SYSCALL_ERROR_P (r)
+    ? INLINE_SYSCALL_ERROR_RETURN_VALUE (-r)
+    : 0;
+}
+
+hidden_def (__fstat64)
+weak_alias (__fstat64, fstat64)
+
+#if XSTAT_IS_XSTAT64
+strong_alias (__fstat64, __fstat)
+weak_alias (__fstat64, fstat)
+#endif
diff --git a/sysdeps/unix/sysv/linux/loongarch/fstatat64.c b/sysdeps/unix/sysv/linux/loongarch/fstatat64.c
new file mode 100644
index 0000000000..64f5b3b8f6
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/loongarch/fstatat64.c
@@ -0,0 +1,59 @@ 
+/* Get file status.  LoongArch version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define __fstatat __redirect___fstatat
+#define fstatat   __redirect_fstatat
+#include <sys/stat.h>
+#undef __fstatat
+#undef fstatat
+
+#include "fstatat64_time64_statx.h"
+
+int __newfstatat_syscall_supported attribute_hidden = 1;
+
+int
+__fstatat64 (int fd, const char *file, struct __stat64_t64 *buf,
+		    int flag)
+{
+  int r;
+  extern int __newfstatat_syscall_supported attribute_hidden;
+  int supported = atomic_load_relaxed (&__newfstatat_syscall_supported);
+  if (__glibc_likely (supported))
+  {
+    r = INTERNAL_SYSCALL_CALL (newfstatat, fd, file, buf, flag);
+    if (r == 0)
+      return r;
+    else if (r != -ENOSYS)
+      return INLINE_SYSCALL_ERROR_RETURN_VALUE (-r);
+    atomic_store_relaxed (&__newfstatat_syscall_supported, 0);
+  }
+
+  r = fstatat64_time64_statx (fd, file, buf, flag);
+  return INTERNAL_SYSCALL_ERROR_P (r)
+	 ? INLINE_SYSCALL_ERROR_RETURN_VALUE (-r)
+	 : 0;
+}
+
+hidden_def (__fstatat64)
+weak_alias (__fstatat64, fstatat64)
+
+#if XSTAT_IS_XSTAT64
+strong_alias (__fstatat64, __fstatat)
+weak_alias (__fstatat64, fstatat)
+strong_alias (__fstatat64, __GI___fstatat);
+#endif
diff --git a/sysdeps/unix/sysv/linux/loongarch/fstatat64_time64_statx.h b/sysdeps/unix/sysv/linux/loongarch/fstatat64_time64_statx.h
new file mode 100644
index 0000000000..97920a820f
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/loongarch/fstatat64_time64_statx.h
@@ -0,0 +1,54 @@ 
+/* Get file status.  LoongArch version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <fcntl.h>
+#include <sys/sysmacros.h>
+#include <internal-stat.h>
+
+static inline int
+fstatat64_time64_statx (int fd, const char *file, struct __stat64_t64 *buf,
+			int flag)
+{
+  struct statx tmp;
+  int r = INTERNAL_SYSCALL_CALL (statx, fd, file, AT_NO_AUTOMOUNT | flag,
+				 STATX_BASIC_STATS, &tmp);
+  if (r != 0)
+    return r;
+
+  *buf = (struct __stat64_t64)
+  {
+    .st_dev = __gnu_dev_makedev (tmp.stx_dev_major, tmp.stx_dev_minor),
+    .st_rdev = __gnu_dev_makedev (tmp.stx_rdev_major, tmp.stx_rdev_minor),
+    .st_ino = tmp.stx_ino,
+    .st_mode = tmp.stx_mode,
+    .st_nlink = tmp.stx_nlink,
+    .st_uid = tmp.stx_uid,
+    .st_gid = tmp.stx_gid,
+    .st_atime = tmp.stx_atime.tv_sec,
+    .st_atim.tv_nsec = tmp.stx_atime.tv_nsec,
+    .st_mtime = tmp.stx_mtime.tv_sec,
+    .st_mtim.tv_nsec = tmp.stx_mtime.tv_nsec,
+    .st_ctime = tmp.stx_ctime.tv_sec,
+    .st_ctim.tv_nsec = tmp.stx_ctime.tv_nsec,
+    .st_size = tmp.stx_size,
+    .st_blocks = tmp.stx_blocks,
+    .st_blksize = tmp.stx_blksize,
+  };
+
+  return r;
+}