Message ID | f006b92f2656f50808bcb4d7ede5580b1a7ca01a.1269616764.git.riku.voipio@nokia.com |
---|---|
State | New |
Headers | show |
On Fri, Mar 26, 2010 at 03:25:09PM +0000, Riku Voipio wrote: > From: Michael Casadevall <mcasadevall@ubuntu.com> > > This patch adds support for the pselect syscall in linux-user emulation > and also adds several support functions required to translate the > timespec structs between the target and the host. > > Signed-off-by: Riku Voipio <riku.voipio@nokia.com> > Signed-off-by: Michael Casadevall <mcasadevall@ubuntu.com> > --- > linux-user/syscall.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 files changed, 119 insertions(+), 0 deletions(-) This patch basically needs ok, but need to conform to CODING_STYLE (see below). > diff --git a/linux-user/syscall.c b/linux-user/syscall.c > index 80d8633..845bb60 100644 > --- a/linux-user/syscall.c > +++ b/linux-user/syscall.c > @@ -850,6 +850,38 @@ static inline abi_long copy_to_user_timeval(abi_ulong target_tv_addr, > return 0; > } > > +static inline abi_long copy_from_user_timespec(struct timespec *ts, > + abi_ulong target_ts_addr) > +{ > + struct target_timespec *target_ts; > + > + if (!lock_user_struct(VERIFY_READ, target_ts, target_ts_addr, 1)) > + return -TARGET_EFAULT; You should use curly braces around the return; > + > + __get_user(ts->tv_sec, &target_ts->tv_sec); > + __get_user(ts->tv_nsec, &target_ts->tv_nsec); > + > + unlock_user_struct(target_ts, target_ts_addr, 0); > + > + return 0; > +} > + > + > +static inline abi_long copy_to_user_timespec(abi_ulong target_ts_addr, > + const struct timespec *ts) > +{ > + struct target_timespec *target_ts; > + > + if (!lock_user_struct(VERIFY_WRITE, target_ts, target_ts_addr, 0)) > + return -TARGET_EFAULT; > + > + __put_user(ts->tv_sec, &target_ts->tv_sec); > + __put_user(ts->tv_nsec, &target_ts->tv_nsec); > + > + unlock_user_struct(target_ts, target_ts_addr, 1); > + > + return 0; > +} > #if defined(TARGET_NR_mq_open) && defined(__NR_mq_open) > #include <mqueue.h> > > @@ -949,6 +981,75 @@ static abi_long do_select(int n, > return ret; > } > > +#ifdef TARGET_NR_pselect6 > +/* do_pselect() must return target values and target errnos. */ > +static abi_long do_pselect(int n, > + abi_ulong rfd_addr, abi_ulong wfd_addr, > + abi_ulong efd_addr, abi_ulong target_tv_addr, > + abi_ulong set_addr) > +{ > + fd_set rfds, wfds, efds; > + fd_set *rfds_ptr, *wfds_ptr, *efds_ptr; > + struct timespec tv, *tv_ptr; > + sigset_t set, *set_ptr; > + abi_long ret; > + > + if (rfd_addr) { > + if (copy_from_user_fdset(&rfds, rfd_addr, n)) > + return -TARGET_EFAULT; Same here, and later on. > + rfds_ptr = &rfds; > + } else { > + rfds_ptr = NULL; > + } > + if (wfd_addr) { > + if (copy_from_user_fdset(&wfds, wfd_addr, n)) > + return -TARGET_EFAULT; > + wfds_ptr = &wfds; > + } else { > + wfds_ptr = NULL; > + } > + if (efd_addr) { > + if (copy_from_user_fdset(&efds, efd_addr, n)) > + return -TARGET_EFAULT; > + efds_ptr = &efds; > + } else { > + efds_ptr = NULL; > + } > + > + if (target_tv_addr) { > + if (copy_from_user_timespec(&tv, target_tv_addr)) > + return -TARGET_EFAULT; > + tv_ptr = &tv; > + } else { > + tv_ptr = NULL; > + } > + > + /* We don't need to return sigmask to target */ > + if (set_addr) { > + target_to_host_old_sigset(&set, &set_addr); > + set_ptr = &set; > + } else { > + set_ptr = NULL; > + } > + > + ret = get_errno(pselect(n, rfds_ptr, wfds_ptr, efds_ptr, tv_ptr, set_ptr)); > + > + if (!is_error(ret)) { > + if (rfd_addr && copy_to_user_fdset(rfd_addr, &rfds, n)) > + return -TARGET_EFAULT; > + if (wfd_addr && copy_to_user_fdset(wfd_addr, &wfds, n)) > + return -TARGET_EFAULT; > + if (efd_addr && copy_to_user_fdset(efd_addr, &efds, n)) > + return -TARGET_EFAULT; > + > + if (target_tv_addr && copy_to_user_timespec(target_tv_addr, &tv)) > + return -TARGET_EFAULT; > + } > + > + return ret; > +} > +#endif > + > static abi_long do_pipe2(int host_pipe[], int flags) > { > #ifdef CONFIG_PIPE2 > @@ -5186,6 +5287,24 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, > } > break; > #endif > + > +#ifdef TARGET_NR_pselect6 > + case TARGET_NR_pselect6: > + { > + abi_ulong inp, outp, exp, tvp, set; > + long nsel; > + > + nsel = tswapl(arg1); > + inp = tswapl(arg2); > + outp = tswapl(arg3); > + exp = tswapl(arg4); > + tvp = tswapl(arg5); > + set = tswapl(arg6); > + > + ret = do_pselect(nsel, inp, outp, exp, tvp, set); > + } > + break; > +#endif > case TARGET_NR_symlink: > { > void *p2; > -- > 1.6.5 > > > >
>This patch adds support for the pselect syscall in linux-user emulation >and also adds several support functions required to translate the >timespec structs between the target and the host. IIUC the whole point of the pselect is that it should be atomic. By emulating this in a non-atomic fasion I think you're re-introducing the race condition that it is designed to avoid. Wouldn't it be better to just return ENOSYS and let the guest deal with the problem? Paul
Paul Brook wrote: > >This patch adds support for the pselect syscall in linux-user emulation > >and also adds several support functions required to translate the > >timespec structs between the target and the host. > > IIUC the whole point of the pselect is that it should be atomic. By > emulating this in a non-atomic fasion I think you're re-introducing > the race condition that it is designed to avoid. > > Wouldn't it be better to just return ENOSYS and let the guest deal with the > problem? Imho, it's very important to return ENOSYS if the atomic behaviour is not provided. The patch actually calls the host's pselect() - it looks almost ok... But actually, host's pselect() may be Glibc or some other equally buggy libc, which "emulates" pselect wrongly. FreeBSD's pselect has the same problem going back years - with a very recent patch to support it in the kernel. It's not just Glibc. Careful apps detect and avoid libc's bug by calling the pselect syscall directly. If it returns ENOSYS, they just use a different (but slightly slower) race-free technique, or abort - at least they don't have a quiet corruption bug. But here we're providing the kernel syscall to guest apps. There's no way for them to detect if it's really atomic or not. And the ones written carefully to avoid buggy libc will wrongly use it, because they think it's a kernel call. I think it's really important we don't pass on libc's buggy behaviour to the emulated kernel interface, to even break careful apps. Solution, Riku: Instead of calling pselect(...), qemu should call syscall(SYS_pselect, ...), with a comment like: /* * Glibc and some other libcs unhelpfully "emulate" pselect() * using select() when the real system call isn't available, and * break the atomic guarantee which is the entire point of * pselect(). To avoid these libc bugs, go straight to the host * kernel using syscall(). If the host kernel doesn't support * pselect() it will return ENOSYS, which is what we want to * return to the guest. */ There might need to be some header file included to get syscall() and SYS_pselect, which sometimes has other names like NR_pselect. And unfortunately the call arguments aren't quite the same, at least on Linux. Same goes for ppoll() if that is ever added. -- Jamie
diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 80d8633..845bb60 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -850,6 +850,38 @@ static inline abi_long copy_to_user_timeval(abi_ulong target_tv_addr, return 0; } +static inline abi_long copy_from_user_timespec(struct timespec *ts, + abi_ulong target_ts_addr) +{ + struct target_timespec *target_ts; + + if (!lock_user_struct(VERIFY_READ, target_ts, target_ts_addr, 1)) + return -TARGET_EFAULT; + + __get_user(ts->tv_sec, &target_ts->tv_sec); + __get_user(ts->tv_nsec, &target_ts->tv_nsec); + + unlock_user_struct(target_ts, target_ts_addr, 0); + + return 0; +} + + +static inline abi_long copy_to_user_timespec(abi_ulong target_ts_addr, + const struct timespec *ts) +{ + struct target_timespec *target_ts; + + if (!lock_user_struct(VERIFY_WRITE, target_ts, target_ts_addr, 0)) + return -TARGET_EFAULT; + + __put_user(ts->tv_sec, &target_ts->tv_sec); + __put_user(ts->tv_nsec, &target_ts->tv_nsec); + + unlock_user_struct(target_ts, target_ts_addr, 1); + + return 0; +} #if defined(TARGET_NR_mq_open) && defined(__NR_mq_open) #include <mqueue.h> @@ -949,6 +981,75 @@ static abi_long do_select(int n, return ret; } +#ifdef TARGET_NR_pselect6 +/* do_pselect() must return target values and target errnos. */ +static abi_long do_pselect(int n, + abi_ulong rfd_addr, abi_ulong wfd_addr, + abi_ulong efd_addr, abi_ulong target_tv_addr, + abi_ulong set_addr) +{ + fd_set rfds, wfds, efds; + fd_set *rfds_ptr, *wfds_ptr, *efds_ptr; + struct timespec tv, *tv_ptr; + sigset_t set, *set_ptr; + abi_long ret; + + if (rfd_addr) { + if (copy_from_user_fdset(&rfds, rfd_addr, n)) + return -TARGET_EFAULT; + rfds_ptr = &rfds; + } else { + rfds_ptr = NULL; + } + if (wfd_addr) { + if (copy_from_user_fdset(&wfds, wfd_addr, n)) + return -TARGET_EFAULT; + wfds_ptr = &wfds; + } else { + wfds_ptr = NULL; + } + if (efd_addr) { + if (copy_from_user_fdset(&efds, efd_addr, n)) + return -TARGET_EFAULT; + efds_ptr = &efds; + } else { + efds_ptr = NULL; + } + + if (target_tv_addr) { + if (copy_from_user_timespec(&tv, target_tv_addr)) + return -TARGET_EFAULT; + tv_ptr = &tv; + } else { + tv_ptr = NULL; + } + + /* We don't need to return sigmask to target */ + if (set_addr) { + target_to_host_old_sigset(&set, &set_addr); + set_ptr = &set; + } else { + set_ptr = NULL; + } + + ret = get_errno(pselect(n, rfds_ptr, wfds_ptr, efds_ptr, tv_ptr, set_ptr)); + + if (!is_error(ret)) { + if (rfd_addr && copy_to_user_fdset(rfd_addr, &rfds, n)) + return -TARGET_EFAULT; + if (wfd_addr && copy_to_user_fdset(wfd_addr, &wfds, n)) + return -TARGET_EFAULT; + if (efd_addr && copy_to_user_fdset(efd_addr, &efds, n)) + return -TARGET_EFAULT; + + if (target_tv_addr && copy_to_user_timespec(target_tv_addr, &tv)) + return -TARGET_EFAULT; + } + + return ret; +} +#endif + static abi_long do_pipe2(int host_pipe[], int flags) { #ifdef CONFIG_PIPE2 @@ -5186,6 +5287,24 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } break; #endif + +#ifdef TARGET_NR_pselect6 + case TARGET_NR_pselect6: + { + abi_ulong inp, outp, exp, tvp, set; + long nsel; + + nsel = tswapl(arg1); + inp = tswapl(arg2); + outp = tswapl(arg3); + exp = tswapl(arg4); + tvp = tswapl(arg5); + set = tswapl(arg6); + + ret = do_pselect(nsel, inp, outp, exp, tvp, set); + } + break; +#endif case TARGET_NR_symlink: { void *p2;