Message ID | 20201221215027.2176966-1-adhemerval.zanella@linaro.org |
---|---|
State | New |
Headers | show |
Series | [1/3] Linux: Add close_range | expand |
* Adhemerval Zanella via Libc-alpha: > diff --git a/include/bits/unistd_ext.h b/include/bits/unistd_ext.h > new file mode 100644 > index 0000000000..277be05746 > --- /dev/null > +++ b/include/bits/unistd_ext.h > @@ -0,0 +1,6 @@ > +#include_next <bits/unistd_ext.h> > + > +#ifndef _ISOMAC > +extern int __close_range (unsigned int lowfd, unsigned int highfd, int flags); > +libc_hidden_proto (__close_range); > +#endif > diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile Missing __THROW? (But see below.) > diff --git a/sysdeps/unix/sysv/linux/bits/unistd_ext.h b/sysdeps/unix/sysv/linux/bits/unistd_ext.h > index c315cc5cb8..799c59512c 100644 > --- a/sysdeps/unix/sysv/linux/bits/unistd_ext.h > +++ b/sysdeps/unix/sysv/linux/bits/unistd_ext.h > @@ -33,4 +33,15 @@ > not detached and has not been joined. */ > extern __pid_t gettid (void) __THROW; > > +/* Unshare the file descriptor table before closing file descriptors. */ > +#define CLOSE_RANGE_UNSHARE (1U << 1) > + > +/* Close all file descriptors in the range FD up to MAX_FD. The flag FLAGS > + are define by the CLOSE_RANGE prefix. This function behaves like close > + on the range, but in a fail-safe where it will either fail and not close > + any file descriptor or close all of them. Returns 0 on successor or -1 > + for failure (and sets errno accordingly). */ > +extern int close_range (unsigned int __fd, unsigned int __max_fd, > + int __flags) __THROW; > + > #endif I think we generally use int for file descriptors, following POSIX. CLOSE_RANGE_CLOEXEC is coming (but it's not in the released 5.10). I think it makes sense to adjust the comment to reflect that. __THROW is incompatible with close_range as a cancellation point. I'm not sure if it is useful for this function to be a cancellation point, given that it's mostly used for cleanup. It's true that close can block for an indefinite time for certain descriptors, but I don't know if close_range inherits that behavior. It certainly does not apply to CLOSE_RANGE_CLOEXEC. > diff --git a/sysdeps/unix/sysv/linux/close_range.c b/sysdeps/unix/sysv/linux/close_range.c > new file mode 100644 > index 0000000000..a664e158ee > --- /dev/null > +++ b/sysdeps/unix/sysv/linux/close_range.c > +int > +__close_range (unsigned int lowfd, unsigned int highfd, int flags) > +{ > + return SYSCALL_CANCEL (close_range, lowfd, highfd, flags); > +} > +libc_hidden_def (__close_range) > +weak_alias (__close_range, close_range) See above, this should probably be INLINE_SYSCALL_CALL. And you could use the syscall lists file to define the system call either way. > diff --git a/sysdeps/unix/sysv/linux/tst-clone.h b/sysdeps/unix/sysv/linux/tst-clone.h > new file mode 100644 > index 0000000000..2344ac5101 > --- /dev/null > +++ b/sysdeps/unix/sysv/linux/tst-clone.h > @@ -0,0 +1,48 @@ > +/* Auxiliary wrapper to issue the clone symbol. It's not clear to me what this means. > + Copyright (C) 2020 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#ifndef _TST_CLONE_H > +#define _TST_CLONE_H > + > +#include <sched.h> > +#include <stackinfo.h> /* For _STACK_GROWS_{UP,DOWN}. */ > + > +#define DEFINE_STACK(name, size) \ > + char name[size] __attribute__ ((aligned)) > + > +static inline pid_t > +clone_call (int (*fn) (void *arg), void *arg, void *stack, size_t stack_size, > + int flags) > +{ > +#ifdef __ia64__ > + extern int __clone2 (int (*fn) (void *arg), void *stack, size_t stack_size, > + int flags, void *arg, ...); > + return __clone2 (f, stack, stack_size, flags, arg, /* ptid */ NULL, > + /* tls */ NULL, /* ctid */ ctid); > +#else > +# if _STACK_GROWS_DOWN > + return clone (fn, stack + stack_size, flags, arg, /* ptid */ NULL, > + /* tls */ NULL, /* ctid */ NULL); > +# elif _STACK_GROWS_UP > + return clone (fn, stack, flags, arg, /* ptid */ NULL, /* tls */ NULL, > + &ctid); > +# endif > +#endif > +} > + > +#endif Should this go into support/? > diff --git a/sysdeps/unix/sysv/linux/tst-close_range.c b/sysdeps/unix/sysv/linux/tst-close_range.c > new file mode 100644 > index 0000000000..a685e7d359 > --- /dev/null > +++ b/sysdeps/unix/sysv/linux/tst-close_range.c > +static unsigned int > +first_invalid_flag (void) > +{ > + static const unsigned int flags[] = { > + CLOSE_RANGE_UNSHARE, > + }; > + > + unsigned int r = 1; > + for (int i = 0; i < array_length (flags); i++) > + if ((1U << r) & flags[i]) > + r++; > + return 1U << r; > +} > + > +enum > +{ > + maximum_fd = 100, > + half_fd = maximum_fd / 2, > + gap_1 = maximum_fd - 8, > + gap_0 = (maximum_fd * 3) / 4 > +}; > + > +static void > +close_range_test_common (int *fds, unsigned int flags) > +{ > + /* Basic test for invalid flags, bail out if unsupported. */ > + TEST_COMPARE (close_range (fds[0], fds[half_fd], first_invalid_flag()), -1); This test is not future-proof at all. I don't think it's valuable. > + if (errno == ENOSYS) > + FAIL_UNSUPPORTED ("close_range not supported"); > + TEST_COMPARE (errno, EINVAL); > + > + /* Close half of the descriptors and check result. */ > + TEST_COMPARE (close_range (fds[0], fds[half_fd], flags), 0); > + for (int i = 0; i <= half_fd; i++) > + { > + TEST_COMPARE (fcntl (fds[i], F_GETFL), -1); > + TEST_COMPARE (errno, EBADF); > + } I have some difficulty figuring out whether you expect the descriptors to be in a continuous range or not. I think it would make sense to create one specific layout of file descriptors and test with that. > +static int > +close_range_unshare_test_fn (void *arg) > +{ > + close_range_test_common (arg, CLOSE_RANGE_UNSHARE); > + exit (EXIT_SUCCESS); > +} > + > +/* Check if a close_range with CLOSE_RANGE_UNSHARE issued from a subprocess > + created with CLONE_FILES does not close the parent file descriptor list. */ > +static void > +close_range_unshare_test (void) > +{ I think there could also be a test without CLOSE_RANGE_UNSHARE that verifies that the subprocess operators on the shared file descriptor table. Thanks, Florian
On 22/12/2020 06:05, Florian Weimer wrote: > * Adhemerval Zanella via Libc-alpha: > >> diff --git a/include/bits/unistd_ext.h b/include/bits/unistd_ext.h >> new file mode 100644 >> index 0000000000..277be05746 >> --- /dev/null >> +++ b/include/bits/unistd_ext.h >> @@ -0,0 +1,6 @@ >> +#include_next <bits/unistd_ext.h> >> + >> +#ifndef _ISOMAC >> +extern int __close_range (unsigned int lowfd, unsigned int highfd, int flags); >> +libc_hidden_proto (__close_range); >> +#endif >> diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile > > Missing __THROW? (But see below.) > >> diff --git a/sysdeps/unix/sysv/linux/bits/unistd_ext.h b/sysdeps/unix/sysv/linux/bits/unistd_ext.h >> index c315cc5cb8..799c59512c 100644 >> --- a/sysdeps/unix/sysv/linux/bits/unistd_ext.h >> +++ b/sysdeps/unix/sysv/linux/bits/unistd_ext.h >> @@ -33,4 +33,15 @@ >> not detached and has not been joined. */ >> extern __pid_t gettid (void) __THROW; >> >> +/* Unshare the file descriptor table before closing file descriptors. */ >> +#define CLOSE_RANGE_UNSHARE (1U << 1) >> + >> +/* Close all file descriptors in the range FD up to MAX_FD. The flag FLAGS >> + are define by the CLOSE_RANGE prefix. This function behaves like close >> + on the range, but in a fail-safe where it will either fail and not close >> + any file descriptor or close all of them. Returns 0 on successor or -1 >> + for failure (and sets errno accordingly). */ >> +extern int close_range (unsigned int __fd, unsigned int __max_fd, >> + int __flags) __THROW; >> + >> #endif > > I think we generally use int for file descriptors, following POSIX. The Linux interface uses unsigned integer, meaning that negative values won't really trigger an invalid usage (kernel will return EINVAL if second argument is larger than first one though). I would prefer for syscall wrapper to be close as possible of the kernel interface, otherwise it would require to add additional semantic to handle potential pitfall cases. On this case, if we go for 'int' as argument should we return EBADF for invalid handles? > > CLOSE_RANGE_CLOEXEC is coming (but it's not in the released 5.10). I > think it makes sense to adjust the comment to reflect that. I have a patch to add CLOSE_RANGE_CLOEXEC as soon it is on a released kernel, it includes changes in the enum and some tests. > > __THROW is incompatible with close_range as a cancellation point. I'm > not sure if it is useful for this function to be a cancellation point, > given that it's mostly used for cleanup. It's true that close can block > for an indefinite time for certain descriptors, but I don't know if > close_range inherits that behavior. It certainly does not apply to > CLOSE_RANGE_CLOEXEC. I wasn't sure about making a cancellation entrypoint, so I followed current close semantic. The cancellation entrypoing would act also in an early mode, meaning that asynchronous cancellation is enabled and thread signaled the cancellation will act prior the syscall execution. In the other hand, reading the close_range code it does seen it returns with side-effects due signal interrupt. So maybe not making a cancellation entrypoint is indeed a better option. The FreeBSD implementation does not seem to make close_range a cancellation entrypoint. > >> diff --git a/sysdeps/unix/sysv/linux/close_range.c b/sysdeps/unix/sysv/linux/close_range.c >> new file mode 100644 >> index 0000000000..a664e158ee >> --- /dev/null >> +++ b/sysdeps/unix/sysv/linux/close_range.c > >> +int >> +__close_range (unsigned int lowfd, unsigned int highfd, int flags) >> +{ >> + return SYSCALL_CANCEL (close_range, lowfd, highfd, flags); >> +} >> +libc_hidden_def (__close_range) >> +weak_alias (__close_range, close_range) > > See above, this should probably be INLINE_SYSCALL_CALL. And you could > use the syscall lists file to define the system call either way. > >> diff --git a/sysdeps/unix/sysv/linux/tst-clone.h b/sysdeps/unix/sysv/linux/tst-clone.h >> new file mode 100644 >> index 0000000000..2344ac5101 >> --- /dev/null >> +++ b/sysdeps/unix/sysv/linux/tst-clone.h >> @@ -0,0 +1,48 @@ >> +/* Auxiliary wrapper to issue the clone symbol. > > It's not clear to me what this means. Maybe: Auxiliary functions to issue the clone syscall. ? > >> + Copyright (C) 2020 Free Software Foundation, Inc. >> + This file is part of the GNU C Library. >> + >> + The GNU C Library is free software; you can redistribute it and/or >> + modify it under the terms of the GNU Lesser General Public >> + License as published by the Free Software Foundation; either >> + version 2.1 of the License, or (at your option) any later version. >> + >> + The GNU C Library is distributed in the hope that it will be useful, >> + but WITHOUT ANY WARRANTY; without even the implied warranty of >> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> + Lesser General Public License for more details. >> + >> + You should have received a copy of the GNU Lesser General Public >> + License along with the GNU C Library; if not, see >> + <https://www.gnu.org/licenses/>. */ >> + >> +#ifndef _TST_CLONE_H >> +#define _TST_CLONE_H >> + >> +#include <sched.h> >> +#include <stackinfo.h> /* For _STACK_GROWS_{UP,DOWN}. */ >> + >> +#define DEFINE_STACK(name, size) \ >> + char name[size] __attribute__ ((aligned)) >> + >> +static inline pid_t >> +clone_call (int (*fn) (void *arg), void *arg, void *stack, size_t stack_size, >> + int flags) >> +{ >> +#ifdef __ia64__ >> + extern int __clone2 (int (*fn) (void *arg), void *stack, size_t stack_size, >> + int flags, void *arg, ...); >> + return __clone2 (f, stack, stack_size, flags, arg, /* ptid */ NULL, >> + /* tls */ NULL, /* ctid */ ctid); >> +#else >> +# if _STACK_GROWS_DOWN >> + return clone (fn, stack + stack_size, flags, arg, /* ptid */ NULL, >> + /* tls */ NULL, /* ctid */ NULL); >> +# elif _STACK_GROWS_UP >> + return clone (fn, stack, flags, arg, /* ptid */ NULL, /* tls */ NULL, >> + &ctid); >> +# endif >> +#endif >> +} >> + >> +#endif > > Should this go into support/? Ok, although it would be a Linux only interface. > >> diff --git a/sysdeps/unix/sysv/linux/tst-close_range.c b/sysdeps/unix/sysv/linux/tst-close_range.c >> new file mode 100644 >> index 0000000000..a685e7d359 >> --- /dev/null >> +++ b/sysdeps/unix/sysv/linux/tst-close_range.c > >> +static unsigned int >> +first_invalid_flag (void) >> +{ >> + static const unsigned int flags[] = { >> + CLOSE_RANGE_UNSHARE, >> + }; >> + >> + unsigned int r = 1; >> + for (int i = 0; i < array_length (flags); i++) >> + if ((1U << r) & flags[i]) >> + r++; >> + return 1U << r; >> +} >> + >> +enum >> +{ >> + maximum_fd = 100, >> + half_fd = maximum_fd / 2, >> + gap_1 = maximum_fd - 8, >> + gap_0 = (maximum_fd * 3) / 4 >> +}; >> + >> +static void >> +close_range_test_common (int *fds, unsigned int flags) >> +{ >> + /* Basic test for invalid flags, bail out if unsupported. */ >> + TEST_COMPARE (close_range (fds[0], fds[half_fd], first_invalid_flag()), -1); > > This test is not future-proof at all. I don't think it's valuable. But the idea is exactly to raise a warning if the underlying kernel supports more flags than the one support by glibc. > >> + if (errno == ENOSYS) >> + FAIL_UNSUPPORTED ("close_range not supported"); >> + TEST_COMPARE (errno, EINVAL); >> + >> + /* Close half of the descriptors and check result. */ >> + TEST_COMPARE (close_range (fds[0], fds[half_fd], flags), 0); >> + for (int i = 0; i <= half_fd; i++) >> + { >> + TEST_COMPARE (fcntl (fds[i], F_GETFL), -1); >> + TEST_COMPARE (errno, EBADF); >> + } > > I have some difficulty figuring out whether you expect the descriptors > to be in a continuous range or not. > > I think it would make sense to create one specific layout of file > descriptors and test with that. I think it is a fair assumption that descriptors are a continuous range. I modeled this tests using the kernel one as base, which make the same assumptions [1] [1] tools/testing/selftests/core/close_range_test.c > >> +static int >> +close_range_unshare_test_fn (void *arg) >> +{ >> + close_range_test_common (arg, CLOSE_RANGE_UNSHARE); >> + exit (EXIT_SUCCESS); >> +} >> + >> +/* Check if a close_range with CLOSE_RANGE_UNSHARE issued from a subprocess >> + created with CLONE_FILES does not close the parent file descriptor list. */ >> +static void >> +close_range_unshare_test (void) >> +{ > > I think there could also be a test without CLOSE_RANGE_UNSHARE that > verifies that the subprocess operators on the shared file descriptor > table. Do you mean issue a ? I may add it, although it would tests more CLONE_FILES support than close_range.
* Adhemerval Zanella: >> I think we generally use int for file descriptors, following POSIX. > > The Linux interface uses unsigned integer, meaning that negative values > won't really trigger an invalid usage (kernel will return EINVAL if > second argument is larger than first one though). > > I would prefer for syscall wrapper to be close as possible of the kernel > interface, otherwise it would require to add additional semantic to > handle potential pitfall cases. > > On this case, if we go for 'int' as argument should we return EBADF > for invalid handles? Hmm. I think unsigned int is needed for ~0U to work, which is what you used in the tests. If that's what applications use today when issuing the syscall directly, I think we need to stick to unsigned int. >> __THROW is incompatible with close_range as a cancellation point. I'm >> not sure if it is useful for this function to be a cancellation point, >> given that it's mostly used for cleanup. It's true that close can block >> for an indefinite time for certain descriptors, but I don't know if >> close_range inherits that behavior. It certainly does not apply to >> CLOSE_RANGE_CLOEXEC. > > I wasn't sure about making a cancellation entrypoint, so I followed > current close semantic. The cancellation entrypoing would act also > in an early mode, meaning that asynchronous cancellation is enabled > and thread signaled the cancellation will act prior the syscall > execution. > > In the other hand, reading the close_range code it does seen it > returns with side-effects due signal interrupt. So maybe not making > a cancellation entrypoint is indeed a better option. > > The FreeBSD implementation does not seem to make close_range a > cancellation entrypoint. Yes, let's do not act upon cancellation. >>> diff --git a/sysdeps/unix/sysv/linux/tst-clone.h b/sysdeps/unix/sysv/linux/tst-clone.h >>> new file mode 100644 >>> index 0000000000..2344ac5101 >>> --- /dev/null >>> +++ b/sysdeps/unix/sysv/linux/tst-clone.h >>> @@ -0,0 +1,48 @@ >>> +/* Auxiliary wrapper to issue the clone symbol. >> >> It's not clear to me what this means. > > Maybe: > > Auxiliary functions to issue the clone syscall. ? That's better, thanks. >>> diff --git a/sysdeps/unix/sysv/linux/tst-close_range.c b/sysdeps/unix/sysv/linux/tst-close_range.c >>> new file mode 100644 >>> index 0000000000..a685e7d359 >>> --- /dev/null >>> +++ b/sysdeps/unix/sysv/linux/tst-close_range.c >> >>> +static unsigned int >>> +first_invalid_flag (void) >>> +{ >>> + static const unsigned int flags[] = { >>> + CLOSE_RANGE_UNSHARE, >>> + }; >>> + >>> + unsigned int r = 1; >>> + for (int i = 0; i < array_length (flags); i++) >>> + if ((1U << r) & flags[i]) >>> + r++; >>> + return 1U << r; >>> +} >>> + >>> +enum >>> +{ >>> + maximum_fd = 100, >>> + half_fd = maximum_fd / 2, >>> + gap_1 = maximum_fd - 8, >>> + gap_0 = (maximum_fd * 3) / 4 >>> +}; >>> + >>> +static void >>> +close_range_test_common (int *fds, unsigned int flags) >>> +{ >>> + /* Basic test for invalid flags, bail out if unsupported. */ >>> + TEST_COMPARE (close_range (fds[0], fds[half_fd], first_invalid_flag()), -1); >> >> This test is not future-proof at all. I don't think it's valuable. > > But the idea is exactly to raise a warning if the underlying kernel > supports more flags than the one support by glibc. But it causes a test failure, not a warning. I don't think it's valid to poke for unknown system call flags like this, so even a proper warning would be hard. >>> + if (errno == ENOSYS) >>> + FAIL_UNSUPPORTED ("close_range not supported"); >>> + TEST_COMPARE (errno, EINVAL); >>> + >>> + /* Close half of the descriptors and check result. */ >>> + TEST_COMPARE (close_range (fds[0], fds[half_fd], flags), 0); >>> + for (int i = 0; i <= half_fd; i++) >>> + { >>> + TEST_COMPARE (fcntl (fds[i], F_GETFL), -1); >>> + TEST_COMPARE (errno, EBADF); >>> + } >> >> I have some difficulty figuring out whether you expect the descriptors >> to be in a continuous range or not. >> >> I think it would make sense to create one specific layout of file >> descriptors and test with that. > > I think it is a fair assumption that descriptors are a continuous range. > I modeled this tests using the kernel one as base, which make the same > assumptions [1] > > [1] tools/testing/selftests/core/close_range_test.c The you don't need the fds array. It's what makes this confusing. >>> +static int >>> +close_range_unshare_test_fn (void *arg) >>> +{ >>> + close_range_test_common (arg, CLOSE_RANGE_UNSHARE); >>> + exit (EXIT_SUCCESS); >>> +} >>> + >>> +/* Check if a close_range with CLOSE_RANGE_UNSHARE issued from a subprocess >>> + created with CLONE_FILES does not close the parent file descriptor list. */ >>> +static void >>> +close_range_unshare_test (void) >>> +{ >> >> I think there could also be a test without CLOSE_RANGE_UNSHARE that >> verifies that the subprocess operators on the shared file descriptor >> table. > > Do you mean issue a ? I may add it, although it would tests > more CLONE_FILES support than close_range. Sorry? I meant a test that shows that flags == 0 and flags == CLOSE_RANGE_UNSHARE behave differently. Thanks, Florian
On 22/12/2020 08:41, Florian Weimer wrote: > * Adhemerval Zanella: > >>> I think we generally use int for file descriptors, following POSIX. >> >> The Linux interface uses unsigned integer, meaning that negative values >> won't really trigger an invalid usage (kernel will return EINVAL if >> second argument is larger than first one though). >> >> I would prefer for syscall wrapper to be close as possible of the kernel >> interface, otherwise it would require to add additional semantic to >> handle potential pitfall cases. >> >> On this case, if we go for 'int' as argument should we return EBADF >> for invalid handles? > > Hmm. I think unsigned int is needed for ~0U to work, which is what you > used in the tests. If that's what applications use today when issuing > the syscall directly, I think we need to stick to unsigned int. Ack. > >>> __THROW is incompatible with close_range as a cancellation point. I'm >>> not sure if it is useful for this function to be a cancellation point, >>> given that it's mostly used for cleanup. It's true that close can block >>> for an indefinite time for certain descriptors, but I don't know if >>> close_range inherits that behavior. It certainly does not apply to >>> CLOSE_RANGE_CLOEXEC. >> >> I wasn't sure about making a cancellation entrypoint, so I followed >> current close semantic. The cancellation entrypoing would act also >> in an early mode, meaning that asynchronous cancellation is enabled >> and thread signaled the cancellation will act prior the syscall >> execution. >> >> In the other hand, reading the close_range code it does seen it >> returns with side-effects due signal interrupt. So maybe not making >> a cancellation entrypoint is indeed a better option. >> >> The FreeBSD implementation does not seem to make close_range a >> cancellation entrypoint. > > Yes, let's do not act upon cancellation. Ack. > >>>> diff --git a/sysdeps/unix/sysv/linux/tst-clone.h b/sysdeps/unix/sysv/linux/tst-clone.h >>>> new file mode 100644 >>>> index 0000000000..2344ac5101 >>>> --- /dev/null >>>> +++ b/sysdeps/unix/sysv/linux/tst-clone.h >>>> @@ -0,0 +1,48 @@ >>>> +/* Auxiliary wrapper to issue the clone symbol. >>> >>> It's not clear to me what this means. >> >> Maybe: >> >> Auxiliary functions to issue the clone syscall. ? > > That's better, thanks. Ack. > >>>> diff --git a/sysdeps/unix/sysv/linux/tst-close_range.c b/sysdeps/unix/sysv/linux/tst-close_range.c >>>> new file mode 100644 >>>> index 0000000000..a685e7d359 >>>> --- /dev/null >>>> +++ b/sysdeps/unix/sysv/linux/tst-close_range.c >>> >>>> +static unsigned int >>>> +first_invalid_flag (void) >>>> +{ >>>> + static const unsigned int flags[] = { >>>> + CLOSE_RANGE_UNSHARE, >>>> + }; >>>> + >>>> + unsigned int r = 1; >>>> + for (int i = 0; i < array_length (flags); i++) >>>> + if ((1U << r) & flags[i]) >>>> + r++; >>>> + return 1U << r; >>>> +} >>>> + >>>> +enum >>>> +{ >>>> + maximum_fd = 100, >>>> + half_fd = maximum_fd / 2, >>>> + gap_1 = maximum_fd - 8, >>>> + gap_0 = (maximum_fd * 3) / 4 >>>> +}; >>>> + >>>> +static void >>>> +close_range_test_common (int *fds, unsigned int flags) >>>> +{ >>>> + /* Basic test for invalid flags, bail out if unsupported. */ >>>> + TEST_COMPARE (close_range (fds[0], fds[half_fd], first_invalid_flag()), -1); >>> >>> This test is not future-proof at all. I don't think it's valuable. >> >> But the idea is exactly to raise a warning if the underlying kernel >> supports more flags than the one support by glibc. > > But it causes a test failure, not a warning. I don't think it's valid > to poke for unknown system call flags like this, so even a proper > warning would be hard. Alright, I will remove the invalid flag warning. > >>>> + if (errno == ENOSYS) >>>> + FAIL_UNSUPPORTED ("close_range not supported"); >>>> + TEST_COMPARE (errno, EINVAL); >>>> + >>>> + /* Close half of the descriptors and check result. */ >>>> + TEST_COMPARE (close_range (fds[0], fds[half_fd], flags), 0); >>>> + for (int i = 0; i <= half_fd; i++) >>>> + { >>>> + TEST_COMPARE (fcntl (fds[i], F_GETFL), -1); >>>> + TEST_COMPARE (errno, EBADF); >>>> + } >>> >>> I have some difficulty figuring out whether you expect the descriptors >>> to be in a continuous range or not. >>> >>> I think it would make sense to create one specific layout of file >>> descriptors and test with that. >> >> I think it is a fair assumption that descriptors are a continuous range. >> I modeled this tests using the kernel one as base, which make the same >> assumptions [1] >> >> [1] tools/testing/selftests/core/close_range_test.c > > The you don't need the fds array. It's what makes this confusing. Ack, I will remove it. > >>>> +static int >>>> +close_range_unshare_test_fn (void *arg) >>>> +{ >>>> + close_range_test_common (arg, CLOSE_RANGE_UNSHARE); >>>> + exit (EXIT_SUCCESS); >>>> +} >>>> + >>>> +/* Check if a close_range with CLOSE_RANGE_UNSHARE issued from a subprocess >>>> + created with CLONE_FILES does not close the parent file descriptor list. */ >>>> +static void >>>> +close_range_unshare_test (void) >>>> +{ >>> >>> I think there could also be a test without CLOSE_RANGE_UNSHARE that >>> verifies that the subprocess operators on the shared file descriptor >>> table. >> >> Do you mean issue a ? I may add it, although it would tests >> more CLONE_FILES support than close_range. > > Sorry? I meant a test that shows that flags == 0 and flags == > CLOSE_RANGE_UNSHARE behave differently. My questioning was mangled (the '?' should have been CLOSE_RANGE_UNSHARE). Alright I will add a clone tests with both flags.
On Tue, Dec 22, 2020 at 12:41:50PM +0100, Florian Weimer via Libc-alpha wrote: > * Adhemerval Zanella: > > >> I think we generally use int for file descriptors, following POSIX. > > > > The Linux interface uses unsigned integer, meaning that negative values > > won't really trigger an invalid usage (kernel will return EINVAL if > > second argument is larger than first one though). > > > > I would prefer for syscall wrapper to be close as possible of the kernel > > interface, otherwise it would require to add additional semantic to > > handle potential pitfall cases. > > > > On this case, if we go for 'int' as argument should we return EBADF > > for invalid handles? > > Hmm. I think unsigned int is needed for ~0U to work, which is what you > used in the tests. If that's what applications use today when issuing > the syscall directly, I think we need to stick to unsigned int. For the record, the initial reason we chose unsigned int for close_range() was that the close() syscall uses unsigned int too: SYSCALL_DEFINE1(close, unsigned int, fd) SYSCALL_DEFINE3(close_range, unsigned int, fd, unsigned int, max_fd, unsigned int, flags) Furthermore, the kernel doesn't care whether the fd is negative. It simply checks whether the passed in fd number falls within the possible range of the fdtable, i.e. fd < fdt->max_fds. If the passed-in fd falls within the possible range that the fdtable can handle it will check whether there's a file open at that position in the fdtable, i.e. from the kernel's perspective "negative" fd values aren't special in any way. But from what I can tell a lot of (non-libc) userspace isn't aware of the fact that the kernel uses unsigned int which can lead to some confusion. A little while ago I had to talk to Lennart about this when they added support for close_range() which they had been waiting for. Their syscall wrapper is now documented with: /* Kernel-side the syscall expects fds as unsigned integers (just like close() actually), while * userspace exclusively uses signed integers for fds. We don't know just yet how glibc is going to * wrap this syscall, but let's assume it's going to be similar to what they do for close(), * i.e. make the same unsigned → signed type change from the raw kernel syscall compared to the * userspace wrapper. There's only one caveat for this: unlike for close() there's the special * UINT_MAX fd value for the 'end_fd' argument. Let's safely map that to -1 here. And let's refuse * any other negative values. */ https://github.com/systemd/systemd/commit/441e0fdb900b49888fb6d7901a2b5aa92c0a2017 Christian
diff --git a/NEWS b/NEWS index 0820984547..9e829841f6 100644 --- a/NEWS +++ b/NEWS @@ -28,6 +28,8 @@ Major new features: The 32-bit RISC-V port requires at least Linux 5.4, GCC 7.1 and binutils 2.28. +* On Linux, the close_range function has been added. + Deprecated and removed features, and other changes affecting compatibility: * The mallinfo function is marked deprecated. Callers should call diff --git a/include/bits/unistd_ext.h b/include/bits/unistd_ext.h new file mode 100644 index 0000000000..277be05746 --- /dev/null +++ b/include/bits/unistd_ext.h @@ -0,0 +1,6 @@ +#include_next <bits/unistd_ext.h> + +#ifndef _ISOMAC +extern int __close_range (unsigned int lowfd, unsigned int highfd, int flags); +libc_hidden_proto (__close_range); +#endif diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile index 09604e128b..e06c2a1f0d 100644 --- a/sysdeps/unix/sysv/linux/Makefile +++ b/sysdeps/unix/sysv/linux/Makefile @@ -61,7 +61,7 @@ sysdep_routines += adjtimex clone umount umount2 readahead sysctl \ open_by_handle_at mlock2 pkey_mprotect pkey_set pkey_get \ timerfd_gettime timerfd_settime prctl \ process_vm_readv process_vm_writev clock_adjtime \ - time64-support pselect32 + time64-support pselect32 close_range CFLAGS-gethostid.c = -fexceptions CFLAGS-tee.c = -fexceptions -fasynchronous-unwind-tables @@ -103,7 +103,8 @@ tests += tst-clone tst-clone2 tst-clone3 tst-fanotify tst-personality \ tst-quota tst-sync_file_range tst-sysconf-iov_max tst-ttyname \ test-errno-linux tst-memfd_create tst-mlock2 tst-pkey \ tst-rlimit-infinity tst-ofdlocks tst-gettid tst-gettid-kill \ - tst-tgkill tst-sysvsem-linux tst-sysvmsg-linux tst-sysvshm-linux + tst-tgkill tst-sysvsem-linux tst-sysvmsg-linux tst-sysvshm-linux \ + tst-close_range tests-internal += tst-ofdlocks-compat tst-sigcontext-get_pc CFLAGS-tst-sigcontext-get_pc.c = -fasynchronous-unwind-tables @@ -177,6 +178,14 @@ $(objpfx)tst-mman-consts.out: ../sysdeps/unix/sysv/linux/tst-mman-consts.py < /dev/null > $@ 2>&1; $(evaluate-test) $(objpfx)tst-mman-consts.out: $(sysdeps-linux-python-deps) +tests-special += $(objpfx)tst-close_range-consts.out +$(objpfx)tst-close_range-consts.out: ../sysdeps/unix/sysv/linux/tst-close_range-consts.py + $(sysdeps-linux-python) \ + ../sysdeps/unix/sysv/linux/tst-close_range-consts.py \ + $(sysdeps-linux-python-cc) \ + < /dev/null > $@ 2>&1; $(evaluate-test) +$(objpfx)tst-close_range-consts.out: $(sysdeps-linux-python-deps) + $(objpfx)tst-gettid: $(shared-thread-library) $(objpfx)tst-gettid-kill: $(shared-thread-library) $(objpfx)tst-tgkill: $(shared-thread-library) diff --git a/sysdeps/unix/sysv/linux/Versions b/sysdeps/unix/sysv/linux/Versions index c35f783e2a..e2a6fa763f 100644 --- a/sysdeps/unix/sysv/linux/Versions +++ b/sysdeps/unix/sysv/linux/Versions @@ -169,6 +169,9 @@ libc { } GLIBC_2.32 { } + GLIBC_2.33 { + close_range; + } GLIBC_PRIVATE { # functions used in other libraries __syscall_rt_sigqueueinfo; diff --git a/sysdeps/unix/sysv/linux/aarch64/libc.abilist b/sysdeps/unix/sysv/linux/aarch64/libc.abilist index 4cc1c6a591..a52decb822 100644 --- a/sysdeps/unix/sysv/linux/aarch64/libc.abilist +++ b/sysdeps/unix/sysv/linux/aarch64/libc.abilist @@ -2160,6 +2160,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/alpha/libc.abilist b/sysdeps/unix/sysv/linux/alpha/libc.abilist index 26ad9845e4..ed085c9b3c 100644 --- a/sysdeps/unix/sysv/linux/alpha/libc.abilist +++ b/sysdeps/unix/sysv/linux/alpha/libc.abilist @@ -2242,6 +2242,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/arc/libc.abilist b/sysdeps/unix/sysv/linux/arc/libc.abilist index bb9dfd4daf..88a17c2fdf 100644 --- a/sysdeps/unix/sysv/linux/arc/libc.abilist +++ b/sysdeps/unix/sysv/linux/arc/libc.abilist @@ -1920,6 +1920,7 @@ GLIBC_2.32 wprintf F GLIBC_2.32 write F GLIBC_2.32 writev F GLIBC_2.32 wscanf F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/arm/le/libc.abilist b/sysdeps/unix/sysv/linux/arm/le/libc.abilist index 9ab3924888..a690962068 100644 --- a/sysdeps/unix/sysv/linux/arm/le/libc.abilist +++ b/sysdeps/unix/sysv/linux/arm/le/libc.abilist @@ -141,6 +141,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/bits/unistd_ext.h b/sysdeps/unix/sysv/linux/bits/unistd_ext.h index c315cc5cb8..799c59512c 100644 --- a/sysdeps/unix/sysv/linux/bits/unistd_ext.h +++ b/sysdeps/unix/sysv/linux/bits/unistd_ext.h @@ -33,4 +33,15 @@ not detached and has not been joined. */ extern __pid_t gettid (void) __THROW; +/* Unshare the file descriptor table before closing file descriptors. */ +#define CLOSE_RANGE_UNSHARE (1U << 1) + +/* Close all file descriptors in the range FD up to MAX_FD. The flag FLAGS + are define by the CLOSE_RANGE prefix. This function behaves like close + on the range, but in a fail-safe where it will either fail and not close + any file descriptor or close all of them. Returns 0 on successor or -1 + for failure (and sets errno accordingly). */ +extern int close_range (unsigned int __fd, unsigned int __max_fd, + int __flags) __THROW; + #endif diff --git a/sysdeps/unix/sysv/linux/close_range.c b/sysdeps/unix/sysv/linux/close_range.c new file mode 100644 index 0000000000..a664e158ee --- /dev/null +++ b/sysdeps/unix/sysv/linux/close_range.c @@ -0,0 +1,28 @@ +/* Close a range of file descriptors. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <unistd.h> +#include <sysdep.h> + +int +__close_range (unsigned int lowfd, unsigned int highfd, int flags) +{ + return SYSCALL_CANCEL (close_range, lowfd, highfd, flags); +} +libc_hidden_def (__close_range) +weak_alias (__close_range, close_range) diff --git a/sysdeps/unix/sysv/linux/csky/libc.abilist b/sysdeps/unix/sysv/linux/csky/libc.abilist index 14a84dac8f..d37882413c 100644 --- a/sysdeps/unix/sysv/linux/csky/libc.abilist +++ b/sysdeps/unix/sysv/linux/csky/libc.abilist @@ -2104,6 +2104,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/hppa/libc.abilist b/sysdeps/unix/sysv/linux/hppa/libc.abilist index 5c8502f3d3..61450e47d5 100644 --- a/sysdeps/unix/sysv/linux/hppa/libc.abilist +++ b/sysdeps/unix/sysv/linux/hppa/libc.abilist @@ -2063,6 +2063,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/i386/libc.abilist b/sysdeps/unix/sysv/linux/i386/libc.abilist index 4f0d3c1eb5..672ff8a002 100644 --- a/sysdeps/unix/sysv/linux/i386/libc.abilist +++ b/sysdeps/unix/sysv/linux/i386/libc.abilist @@ -2229,6 +2229,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/ia64/libc.abilist b/sysdeps/unix/sysv/linux/ia64/libc.abilist index e3b345b803..53e53005e9 100644 --- a/sysdeps/unix/sysv/linux/ia64/libc.abilist +++ b/sysdeps/unix/sysv/linux/ia64/libc.abilist @@ -2095,6 +2095,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist b/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist index c4891479d3..cc0d93b381 100644 --- a/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist +++ b/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist @@ -2175,6 +2175,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist b/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist index 143b0163b4..b4acf52c41 100644 --- a/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist +++ b/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist @@ -2155,6 +2155,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist index b2295f1937..fadbf99d4b 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist +++ b/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist @@ -2146,6 +2146,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist index aa9c6a4dca..c456ebcea3 100644 --- a/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist +++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist @@ -2152,6 +2152,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist index 5939588ad5..5ed54e7c94 100644 --- a/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist +++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist @@ -2146,6 +2146,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/nios2/libc.abilist b/sysdeps/unix/sysv/linux/nios2/libc.abilist index 92556c4237..67b611ef6d 100644 --- a/sysdeps/unix/sysv/linux/nios2/libc.abilist +++ b/sysdeps/unix/sysv/linux/nios2/libc.abilist @@ -2193,6 +2193,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist index 26c93dff05..ed0f239cd5 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist @@ -2202,6 +2202,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist index c2ca00709e..a4b9900036 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist @@ -2065,6 +2065,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist index 0ea50dc851..e5d666dab3 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist @@ -2355,6 +2355,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist b/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist index 0263e284d4..f8c34aca7c 100644 --- a/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist +++ b/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist @@ -595,6 +595,7 @@ GLIBC_2.33 clock_nanosleep F GLIBC_2.33 clock_settime F GLIBC_2.33 clone F GLIBC_2.33 close F +GLIBC_2.33 close_range F GLIBC_2.33 closedir F GLIBC_2.33 closelog F GLIBC_2.33 confstr F diff --git a/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist b/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist index 1626c5351f..6042d42aba 100644 --- a/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist +++ b/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist @@ -2122,6 +2122,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist b/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist index a66426eb4d..46d2de3030 100644 --- a/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist +++ b/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist @@ -2200,6 +2200,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist b/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist index ab351873ae..9fda4babba 100644 --- a/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist +++ b/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist @@ -2101,6 +2101,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/sh/le/libc.abilist b/sysdeps/unix/sysv/linux/sh/le/libc.abilist index d36f228192..110f7ebf22 100644 --- a/sysdeps/unix/sysv/linux/sh/le/libc.abilist +++ b/sysdeps/unix/sysv/linux/sh/le/libc.abilist @@ -2067,6 +2067,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist b/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist index 59b4313280..1b31685a24 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist @@ -2191,6 +2191,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist b/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist index 266dcdfa08..9ad88bf7b3 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist @@ -2118,6 +2118,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/tst-clone.h b/sysdeps/unix/sysv/linux/tst-clone.h new file mode 100644 index 0000000000..2344ac5101 --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-clone.h @@ -0,0 +1,48 @@ +/* Auxiliary wrapper to issue the clone symbol. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _TST_CLONE_H +#define _TST_CLONE_H + +#include <sched.h> +#include <stackinfo.h> /* For _STACK_GROWS_{UP,DOWN}. */ + +#define DEFINE_STACK(name, size) \ + char name[size] __attribute__ ((aligned)) + +static inline pid_t +clone_call (int (*fn) (void *arg), void *arg, void *stack, size_t stack_size, + int flags) +{ +#ifdef __ia64__ + extern int __clone2 (int (*fn) (void *arg), void *stack, size_t stack_size, + int flags, void *arg, ...); + return __clone2 (f, stack, stack_size, flags, arg, /* ptid */ NULL, + /* tls */ NULL, /* ctid */ ctid); +#else +# if _STACK_GROWS_DOWN + return clone (fn, stack + stack_size, flags, arg, /* ptid */ NULL, + /* tls */ NULL, /* ctid */ NULL); +# elif _STACK_GROWS_UP + return clone (fn, stack, flags, arg, /* ptid */ NULL, /* tls */ NULL, + &ctid); +# endif +#endif +} + +#endif diff --git a/sysdeps/unix/sysv/linux/tst-close_range-consts.py b/sysdeps/unix/sysv/linux/tst-close_range-consts.py new file mode 100644 index 0000000000..3a0d4423e8 --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-close_range-consts.py @@ -0,0 +1,49 @@ +#!/usr/bin/python3 +# Test that glibc's unistd_ext.h constants match the kernel's. +# Copyright (C) 2020 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <https://www.gnu.org/licenses/>. + +import argparse +import sys + +import glibcextract +import glibcsyscalls + + +def main(): + """The main entry point.""" + parser = argparse.ArgumentParser( + description="Test that glibc's unistd_ext.h constants match " + "the kernel's.") + parser.add_argument('--cc', metavar='CC', + help='C compiler (including options) to use') + args = parser.parse_args() + linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc) + linux_version_glibc = (5, 9) + sys.exit(glibcextract.compare_macro_consts( + '#define _GNU_SOURCE 1\n' + '#include <unistd.h>\n', + '#define _GNU_SOURCE 1\n' + '#include <linux/close_range.h>\n', + args.cc, + 'CLOSE_RANGE_.*', + None, + linux_version_glibc > linux_version_headers, + linux_version_headers > linux_version_glibc)) + +if __name__ == '__main__': + main() diff --git a/sysdeps/unix/sysv/linux/tst-close_range.c b/sysdeps/unix/sysv/linux/tst-close_range.c new file mode 100644 index 0000000000..a685e7d359 --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-close_range.c @@ -0,0 +1,175 @@ +/* Test for the close_range system call. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <getopt.h> +#include <signal.h> +#include <stdbool.h> +#include <stdlib.h> + +#include <support/capture_subprocess.h> +#include <support/check.h> +#include <support/descriptors.h> +#include <support/support.h> +#include <support/xunistd.h> + +#include <array_length.h> +#include <tst-clone.h> + +static unsigned int +first_invalid_flag (void) +{ + static const unsigned int flags[] = { + CLOSE_RANGE_UNSHARE, + }; + + unsigned int r = 1; + for (int i = 0; i < array_length (flags); i++) + if ((1U << r) & flags[i]) + r++; + return 1U << r; +} + +enum +{ + maximum_fd = 100, + half_fd = maximum_fd / 2, + gap_1 = maximum_fd - 8, + gap_0 = (maximum_fd * 3) / 4 +}; + +static void +close_range_test_common (int *fds, unsigned int flags) +{ + /* Basic test for invalid flags, bail out if unsupported. */ + TEST_COMPARE (close_range (fds[0], fds[half_fd], first_invalid_flag()), -1); + if (errno == ENOSYS) + FAIL_UNSUPPORTED ("close_range not supported"); + TEST_COMPARE (errno, EINVAL); + + /* Close half of the descriptors and check result. */ + TEST_COMPARE (close_range (fds[0], fds[half_fd], flags), 0); + for (int i = 0; i <= half_fd; i++) + { + TEST_COMPARE (fcntl (fds[i], F_GETFL), -1); + TEST_COMPARE (errno, EBADF); + } + for (int i = half_fd + 1; i < maximum_fd; i++) + TEST_VERIFY (fcntl (fds[i], F_GETFL) > -1); + + /* Create some gaps, close up to a threshold, and check result. */ + xclose (57); + xclose (78); + xclose (81); + xclose (82); + xclose (84); + xclose (90); + + TEST_COMPARE (close_range (fds[half_fd + 1], fds[gap_1], flags), 0); + for (int i = half_fd + 1; i < gap_1; i++) + { + TEST_COMPARE (fcntl (fds[i], F_GETFL), -1); + TEST_COMPARE (errno, EBADF); + } + for (int i = gap_1 + 1; i < maximum_fd; i++) + TEST_VERIFY (fcntl (fds[i], F_GETFL) > -1); + + /* Close the remmaining but the last one. */ + TEST_COMPARE (close_range (fds[gap_1 + 1], fds[maximum_fd - 1], flags), 0); + for (int i = gap_1 + 1; i < maximum_fd - 1; i++) + { + TEST_COMPARE (fcntl (fds[i], F_GETFL), -1); + TEST_COMPARE (errno, EBADF); + } + TEST_VERIFY (fcntl (fds[maximum_fd], F_GETFL) > -1); + + /* Close the last one. */ + TEST_COMPARE (close_range (fds[maximum_fd], fds[maximum_fd], flags), 0); + TEST_COMPARE (fcntl (fds[maximum_fd], F_GETFL), -1); + TEST_COMPARE (errno, EBADF); +} + +/* Basic tests: check if the syscall close ranges with and without gaps. */ +static void +close_range_test (void) +{ + int fds[maximum_fd + 1]; + + struct support_descriptors *descrs = support_descriptors_list (); + + for (int i = 0; i < array_length (fds); i++) + fds[i] = xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600); + + close_range_test_common (fds, 0); + + /* Double check by check the /proc. */ + support_descriptors_check (descrs); + support_descriptors_free (descrs); +} + +static int +close_range_unshare_test_fn (void *arg) +{ + close_range_test_common (arg, CLOSE_RANGE_UNSHARE); + exit (EXIT_SUCCESS); +} + +/* Check if a close_range with CLOSE_RANGE_UNSHARE issued from a subprocess + created with CLONE_FILES does not close the parent file descriptor list. */ +static void +close_range_unshare_test (void) +{ + int fds[maximum_fd + 1]; + + for (int i = 0; i < array_length (fds); i++) + fds[i] = xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600); + + struct support_descriptors *descrs = support_descriptors_list (); + + enum { stack_size = 4096 }; + DEFINE_STACK (stack, stack_size); + pid_t pid = clone_call (close_range_unshare_test_fn, fds, stack, stack_size, + CLONE_FILES | SIGCHLD); + TEST_VERIFY_EXIT (pid > 0); + int status; + xwaitpid (pid, &status, 0); + TEST_VERIFY (WIFEXITED (status)); + TEST_COMPARE (WEXITSTATUS(status), 0); + + for (int i = 0; i < maximum_fd; i++) + TEST_VERIFY (fcntl (fds[i], F_GETFL) > -1); + + support_descriptors_check (descrs); + support_descriptors_free (descrs); + + TEST_COMPARE (close_range (fds[0], fds[maximum_fd], 0), 0); +} + +static int +do_test (void) +{ + close_range_test (); + close_range_unshare_test (); + + return 0; +} + +#include <support/test-driver.c> diff --git a/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist b/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist index 4fff61818b..5089a21981 100644 --- a/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist +++ b/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist @@ -2076,6 +2076,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist b/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist index 102ed47a9c..c3db0fc7ca 100644 --- a/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist +++ b/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist @@ -2173,6 +2173,7 @@ GLIBC_2.32 sigabbrev_np F GLIBC_2.32 sigdescr_np F GLIBC_2.32 strerrordesc_np F GLIBC_2.32 strerrorname_np F +GLIBC_2.33 close_range F GLIBC_2.33 fstat F GLIBC_2.33 fstat64 F GLIBC_2.33 fstatat F