Message ID | alpine.DEB.2.20.1705091552540.16186@digraph.polyomino.org.uk |
---|---|
State | New |
Headers | show |
On 09/05/2017 12:54, Joseph Myers wrote: > Now we can assume a kernel with sendmmsg support, this patch > simplifies the implementation to be similar to that for accept4: > either using socketcall or the syscall according to whether the > syscall is known to be available, without further fallback > implementations. The __ASSUME_SENDMMSG macro is kept (now defined > unconditionally), since it's used in resolv/res_send.c. I think we can add a sysdeps/posix sendmmsg based on sendmsg so we can safely remove this macro usage on resolv code. I will work on this after your patch inclusion. > > Tested for x86_64 and x86. > > 2017-05-09 Joseph Myers <joseph@codesourcery.com> > > * sysdeps/unix/sysv/linux/kernel-features.h > (__ASSUME_SENDMMSG_SYSCALL): Define unconditionally. > (__ASSUME_SENDMMSG): Likewise. > (__ASSUME_SENDMMSG_SOCKETCALL): Remove macro. > * sysdeps/unix/sysv/linux/sendmmsg.c (__sendmmsg): Define using > sendmmsg syscall if that can be assumed to be present, socketcall > otherwise, with no fallback for runtime failure. LGTM, thanks. > > diff --git a/sysdeps/unix/sysv/linux/kernel-features.h b/sysdeps/unix/sysv/linux/kernel-features.h > index 74ac627..c9212b4 100644 > --- a/sysdeps/unix/sysv/linux/kernel-features.h > +++ b/sysdeps/unix/sysv/linux/kernel-features.h > @@ -105,13 +105,8 @@ > > /* Support for sendmmsg functionality was added in 3.0. The macros > defined correspond to those for accept4 and recvmmsg. */ > -#if __LINUX_KERNEL_VERSION >= 0x030000 > -# ifdef __ASSUME_SOCKETCALL > -# define __ASSUME_SENDMMSG_SOCKETCALL 1 > -# endif > -# define __ASSUME_SENDMMSG_SYSCALL 1 > -# define __ASSUME_SENDMMSG 1 > -#endif > +#define __ASSUME_SENDMMSG_SYSCALL 1 > +#define __ASSUME_SENDMMSG 1 > > /* On most architectures, most socket syscalls are supported for all > supported kernel versions, but on some socketcall architectures > diff --git a/sysdeps/unix/sysv/linux/sendmmsg.c b/sysdeps/unix/sysv/linux/sendmmsg.c > index 2dd0eba..c559623 100644 > --- a/sysdeps/unix/sysv/linux/sendmmsg.c > +++ b/sysdeps/unix/sysv/linux/sendmmsg.c > @@ -21,73 +21,22 @@ > > #include <sysdep-cancel.h> > #include <sys/syscall.h> > +#include <socketcall.h> > #include <kernel-features.h> > > -/* Do not use the sendmmsg syscall on socketcall architectures unless > - it was added at the same time as the socketcall support or can be > - assumed to be present. */ > -#if defined __ASSUME_SOCKETCALL \ > - && !defined __ASSUME_SENDMMSG_SYSCALL_WITH_SOCKETCALL \ > - && !defined __ASSUME_SENDMMSG_SYSCALL > -# undef __NR_sendmmsg > -#endif > - > -#ifdef __NR_sendmmsg > -int > -__sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags) > -{ > - return SYSCALL_CANCEL (sendmmsg, fd, vmessages, vlen, flags); > -} > -libc_hidden_def (__sendmmsg) > -weak_alias (__sendmmsg, sendmmsg) > -#elif defined __NR_socketcall > -# include <socketcall.h> > -# ifdef __ASSUME_SENDMMSG_SOCKETCALL > int > __sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags) > { > + /* Do not use the sendmmsg syscall on socketcall architectures unless > + it was added at the same time as the socketcall support or can be > + assumed to be present. */ > +#if defined __ASSUME_SOCKETCALL \ > + && !defined __ASSUME_SENDMMSG_SYSCALL_WITH_SOCKETCALL \ > + && !defined __ASSUME_SENDMMSG_SYSCALL > return SOCKETCALL_CANCEL (sendmmsg, fd, vmessages, vlen, flags); > +#else > + return SYSCALL_CANCEL (sendmmsg, fd, vmessages, vlen, flags); > +#endif > } > -# else > -static int have_sendmmsg; > - > -int > -__sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags) > -{ > - if (__glibc_likely (have_sendmmsg >= 0)) > - { > - int ret = SOCKETCALL_CANCEL (sendmmsg, fd, vmessages, vlen, flags); > - /* The kernel returns -EINVAL for unknown socket operations. > - We need to convert that error to an ENOSYS error. */ > - if (__builtin_expect (ret < 0, 0) > - && have_sendmmsg == 0 > - && errno == EINVAL) > - { > - /* Try another call, this time with an invalid file > - descriptor and all other parameters cleared. This call > - will not cause any harm and it will return > - immediately. */ > - ret = SOCKETCALL_CANCEL (invalid, -1); > - if (errno == EINVAL) > - { > - have_sendmmsg = -1; > - __set_errno (ENOSYS); > - } > - else > - { > - have_sendmmsg = 1; > - __set_errno (EINVAL); > - } > - return -1; > - } > - return ret; > - } > - __set_errno (ENOSYS); > - return -1; > -} > -# endif /* __ASSUME_SENDMMSG_SOCKETCALL */ > libc_hidden_def (__sendmmsg) > weak_alias (__sendmmsg, sendmmsg) > -#else > -# include <socket/sendmmsg.c> > -#endif >
On 05/09/2017 08:41 PM, Adhemerval Zanella wrote: > I think we can add a sysdeps/posix sendmmsg based on sendmsg so we can > safely remove this macro usage on resolv code. I will work on this > after your patch inclusion. The use of sendmmsg in the resolv code is problematic. We need to use two separate sockets with different source ports to improve compatibility anyway. This might also help firewalls with tearing down state more quickly (single request/response very likely does not bump the UDP timeout, but two requests and two responses might switch into stream mode with a prolonged timeout; maybe that's why we originally hit the Netfilter table overflow in some of the glibc resolver tests). I'm not sure how quickly I will get to fixing this (I hope for 2.26), so please do whatever you feel is necessary in the meantime. Thanks, Florian
On 09/05/2017 17:28, Florian Weimer wrote: > On 05/09/2017 08:41 PM, Adhemerval Zanella wrote: >> I think we can add a sysdeps/posix sendmmsg based on sendmsg so we can >> safely remove this macro usage on resolv code. I will work on this >> after your patch inclusion. > > The use of sendmmsg in the resolv code is problematic. We need to use two separate sockets with different source ports to improve compatibility anyway. This might also help firewalls with tearing down state more quickly (single request/response very likely does not bump the UDP timeout, but two requests and two responses might switch into stream mode with a prolonged timeout; maybe that's why we originally hit the Netfilter table overflow in some of the glibc resolver tests). > > I'm not sure how quickly I will get to fixing this (I hope for 2.26), so please do whatever you feel is necessary in the meantime. > > Thanks, > Florian So if I understood correctly, the idea is to *remove* sendmmsg on resolv code then? Because now with __ASSUME_SENDMMSG being used as default for Linux this code will be used on all architectures. In any case, my understanding for sendmmsg is it is a GNU extension (similar to pwritev), so the idea is to have a fallback usable code on sysdeps/{posix,unix} to we do not need to actually tests its usability in the rest of glibc code. That's why I think that if the idea is actually get rid of sendmmsg usage in resolv, we can add a generic implementation for sendmmsg and then remove its usage on two cleanup patches.
On 05/09/2017 10:58 PM, Adhemerval Zanella wrote: > So if I understood correctly, the idea is to *remove* sendmmsg on resolv code then? > Because now with __ASSUME_SENDMMSG being used as default for Linux this code will > be used on all architectures. Yes, once we have two sockets, we need to call sendmsg for each one separately. But that's an *algorithmic* change, it's not simply about replacing one call to sendmmsg with two to sendmsg. But that would be fine as an intermediate step as well if you want to simplify things in this way. > In any case, my understanding for sendmmsg is it is a GNU extension (similar to > pwritev), so the idea is to have a fallback usable code on sysdeps/{posix,unix} > to we do not need to actually tests its usability in the rest of glibc code. As far as I understand things, both sendmmsg and pwritev cannot be emulated in userspace because the fallback implementation would not have the atomicity guarantees provided by the kernel implementation. IMHO, the generic versions should just return ENOSYS (or not linked in at all, so that configure scripts can note their absence). Thanks, Florian
On 10/05/2017 04:41, Florian Weimer wrote: > On 05/09/2017 10:58 PM, Adhemerval Zanella wrote: > >> So if I understood correctly, the idea is to *remove* sendmmsg on resolv code then? >> Because now with __ASSUME_SENDMMSG being used as default for Linux this code will >> be used on all architectures. > > Yes, once we have two sockets, we need to call sendmsg for each one separately. But that's an *algorithmic* change, it's not simply about replacing one call to sendmmsg with two to sendmsg. But that would be fine as an intermediate step as well if you want to simplify things in this way. > Right, so I think we an proceed with the removal of sendmmsg and later change the two socket utilization. >> In any case, my understanding for sendmmsg is it is a GNU extension (similar to >> pwritev), so the idea is to have a fallback usable code on sysdeps/{posix,unix} >> to we do not need to actually tests its usability in the rest of glibc code. > > As far as I understand things, both sendmmsg and pwritev cannot be emulated in userspace because the fallback implementation would not have the atomicity guarantees provided by the kernel implementation. IMHO, the generic versions should just return ENOSYS (or not linked in at all, so that configure scripts can note their absence). Indeed the emulation brings a lot of issue, but it allows us to use the system specific syscalls on generic code without relying on additional checks for functionality existence. However I think we can to Linux optimizations in generic code less intrusive way if necessary with a different approach, maybe by implementing the functionally in a more constrained way and reimplement in the target system. What I would like to avoid is multiple build option for the same implementation which are not actually tested and makes us with non exercised code paths (in a lot of time dead code).
On 05/10/2017 02:14 PM, Adhemerval Zanella wrote: > Indeed the emulation brings a lot of issue, but it allows us to use the system > specific syscalls on generic code without relying on additional checks for > functionality existence. However I think we can to Linux optimizations in > generic code less intrusive way if necessary with a different approach, maybe > by implementing the functionally in a more constrained way and reimplement > in the target system. Yes, optimizations are fine. But I think it is wrong to expose the fallback implementation to applications as an implementation of the system call, when in fact the semantics are different. Every time we do this, it comes back to haunt as eventually (see posix_fallocate or euidaccess for examples). Thanks, Florian
On 12/05/2017 06:17, Florian Weimer wrote: > On 05/10/2017 02:14 PM, Adhemerval Zanella wrote: >> Indeed the emulation brings a lot of issue, but it allows us to use the system >> specific syscalls on generic code without relying on additional checks for >> functionality existence. However I think we can to Linux optimizations in >> generic code less intrusive way if necessary with a different approach, maybe >> by implementing the functionally in a more constrained way and reimplement >> in the target system. > > Yes, optimizations are fine. But I think it is wrong to expose the fallback implementation to applications as an implementation of the system call, when in fact the semantics are different. Every time we do this, it comes back to haunt as eventually (see posix_fallocate or euidaccess for examples). I tend to agree with you, but I think we should at least lay ground on how we will handle possible future GNU extensions and/or new syscall wrapper. My understanding is we should aim to provide fallback implementation iff the semantics of the implementation matches the one we aim to emulate, otherwise we should return ENOTSUP/EINVAL. That is what I am aiming for p{read,write}v2 and the p{read,write}v. For former my current proposal returns ENOTSUPP for all flags different than zero and for former we still use p{read,pwrite} with an aligned buffer (p{read,write} may still be emulated, although not current in Linux).
On Fri, 12 May 2017, Florian Weimer wrote: > On 05/10/2017 02:14 PM, Adhemerval Zanella wrote: > > Indeed the emulation brings a lot of issue, but it allows us to use the > > system > > specific syscalls on generic code without relying on additional checks for > > functionality existence. However I think we can to Linux optimizations in > > generic code less intrusive way if necessary with a different approach, > > maybe > > by implementing the functionally in a more constrained way and reimplement > > in the target system. > > Yes, optimizations are fine. But I think it is wrong to expose the fallback > implementation to applications as an implementation of the system call, when > in fact the semantics are different. Every time we do this, it comes back to > haunt as eventually (see posix_fallocate or euidaccess for examples). Or the pselect emulation (for Linux, now only used on MicroBlaze); see bug 9813.
diff --git a/sysdeps/unix/sysv/linux/kernel-features.h b/sysdeps/unix/sysv/linux/kernel-features.h index 74ac627..c9212b4 100644 --- a/sysdeps/unix/sysv/linux/kernel-features.h +++ b/sysdeps/unix/sysv/linux/kernel-features.h @@ -105,13 +105,8 @@ /* Support for sendmmsg functionality was added in 3.0. The macros defined correspond to those for accept4 and recvmmsg. */ -#if __LINUX_KERNEL_VERSION >= 0x030000 -# ifdef __ASSUME_SOCKETCALL -# define __ASSUME_SENDMMSG_SOCKETCALL 1 -# endif -# define __ASSUME_SENDMMSG_SYSCALL 1 -# define __ASSUME_SENDMMSG 1 -#endif +#define __ASSUME_SENDMMSG_SYSCALL 1 +#define __ASSUME_SENDMMSG 1 /* On most architectures, most socket syscalls are supported for all supported kernel versions, but on some socketcall architectures diff --git a/sysdeps/unix/sysv/linux/sendmmsg.c b/sysdeps/unix/sysv/linux/sendmmsg.c index 2dd0eba..c559623 100644 --- a/sysdeps/unix/sysv/linux/sendmmsg.c +++ b/sysdeps/unix/sysv/linux/sendmmsg.c @@ -21,73 +21,22 @@ #include <sysdep-cancel.h> #include <sys/syscall.h> +#include <socketcall.h> #include <kernel-features.h> -/* Do not use the sendmmsg syscall on socketcall architectures unless - it was added at the same time as the socketcall support or can be - assumed to be present. */ -#if defined __ASSUME_SOCKETCALL \ - && !defined __ASSUME_SENDMMSG_SYSCALL_WITH_SOCKETCALL \ - && !defined __ASSUME_SENDMMSG_SYSCALL -# undef __NR_sendmmsg -#endif - -#ifdef __NR_sendmmsg -int -__sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags) -{ - return SYSCALL_CANCEL (sendmmsg, fd, vmessages, vlen, flags); -} -libc_hidden_def (__sendmmsg) -weak_alias (__sendmmsg, sendmmsg) -#elif defined __NR_socketcall -# include <socketcall.h> -# ifdef __ASSUME_SENDMMSG_SOCKETCALL int __sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags) { + /* Do not use the sendmmsg syscall on socketcall architectures unless + it was added at the same time as the socketcall support or can be + assumed to be present. */ +#if defined __ASSUME_SOCKETCALL \ + && !defined __ASSUME_SENDMMSG_SYSCALL_WITH_SOCKETCALL \ + && !defined __ASSUME_SENDMMSG_SYSCALL return SOCKETCALL_CANCEL (sendmmsg, fd, vmessages, vlen, flags); +#else + return SYSCALL_CANCEL (sendmmsg, fd, vmessages, vlen, flags); +#endif } -# else -static int have_sendmmsg; - -int -__sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags) -{ - if (__glibc_likely (have_sendmmsg >= 0)) - { - int ret = SOCKETCALL_CANCEL (sendmmsg, fd, vmessages, vlen, flags); - /* The kernel returns -EINVAL for unknown socket operations. - We need to convert that error to an ENOSYS error. */ - if (__builtin_expect (ret < 0, 0) - && have_sendmmsg == 0 - && errno == EINVAL) - { - /* Try another call, this time with an invalid file - descriptor and all other parameters cleared. This call - will not cause any harm and it will return - immediately. */ - ret = SOCKETCALL_CANCEL (invalid, -1); - if (errno == EINVAL) - { - have_sendmmsg = -1; - __set_errno (ENOSYS); - } - else - { - have_sendmmsg = 1; - __set_errno (EINVAL); - } - return -1; - } - return ret; - } - __set_errno (ENOSYS); - return -1; -} -# endif /* __ASSUME_SENDMMSG_SOCKETCALL */ libc_hidden_def (__sendmmsg) weak_alias (__sendmmsg, sendmmsg) -#else -# include <socket/sendmmsg.c> -#endif