diff mbox

[1/4] linux-user: add pselect syscall

Message ID f006b92f2656f50808bcb4d7ede5580b1a7ca01a.1269616764.git.riku.voipio@nokia.com
State New
Headers show

Commit Message

Riku Voipio March 26, 2010, 3:25 p.m. UTC
From: Michael Casadevall <mcasadevall@ubuntu.com>

This patch adds support for the pselect syscall in linux-user emulation
and also adds several support functions required to translate the
timespec structs between the target and the host.

Signed-off-by: Riku Voipio <riku.voipio@nokia.com>
Signed-off-by: Michael Casadevall <mcasadevall@ubuntu.com>
---
 linux-user/syscall.c |  119 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 119 insertions(+), 0 deletions(-)

Comments

Aurelien Jarno March 26, 2010, 10:15 p.m. UTC | #1
On Fri, Mar 26, 2010 at 03:25:09PM +0000, Riku Voipio wrote:
> From: Michael Casadevall <mcasadevall@ubuntu.com>
> 
> This patch adds support for the pselect syscall in linux-user emulation
> and also adds several support functions required to translate the
> timespec structs between the target and the host.
> 
> Signed-off-by: Riku Voipio <riku.voipio@nokia.com>
> Signed-off-by: Michael Casadevall <mcasadevall@ubuntu.com>
> ---
>  linux-user/syscall.c |  119 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 119 insertions(+), 0 deletions(-)

This patch basically needs ok, but need to conform to CODING_STYLE (see
below).

> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index 80d8633..845bb60 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -850,6 +850,38 @@ static inline abi_long copy_to_user_timeval(abi_ulong target_tv_addr,
>      return 0;
>  }
>  
> +static inline abi_long copy_from_user_timespec(struct timespec *ts,
> +                                              abi_ulong target_ts_addr)
> +{
> +    struct target_timespec *target_ts;
> +
> +    if (!lock_user_struct(VERIFY_READ, target_ts, target_ts_addr, 1))
> +        return -TARGET_EFAULT;

You should use curly braces around the return;

> +
> +    __get_user(ts->tv_sec, &target_ts->tv_sec);
> +    __get_user(ts->tv_nsec, &target_ts->tv_nsec);
> +
> +    unlock_user_struct(target_ts, target_ts_addr, 0);
> +
> +    return 0;
> +}
> +
> +
> +static inline abi_long copy_to_user_timespec(abi_ulong target_ts_addr,
> +                                            const struct timespec *ts)
> +{
> +    struct target_timespec *target_ts;
> +
> +    if (!lock_user_struct(VERIFY_WRITE, target_ts, target_ts_addr, 0))
> +        return -TARGET_EFAULT;
> +
> +    __put_user(ts->tv_sec, &target_ts->tv_sec);
> +    __put_user(ts->tv_nsec, &target_ts->tv_nsec);
> +
> +    unlock_user_struct(target_ts, target_ts_addr, 1);
> +
> +    return 0;
> +}
>  #if defined(TARGET_NR_mq_open) && defined(__NR_mq_open)
>  #include <mqueue.h>
>  
> @@ -949,6 +981,75 @@ static abi_long do_select(int n,
>      return ret;
>  }
>  
> +#ifdef TARGET_NR_pselect6
> +/* do_pselect() must return target values and target errnos. */
> +static abi_long do_pselect(int n,
> +                          abi_ulong rfd_addr, abi_ulong wfd_addr,
> +                          abi_ulong efd_addr, abi_ulong target_tv_addr,
> +                          abi_ulong set_addr)
> +{
> +    fd_set rfds, wfds, efds;
> +    fd_set *rfds_ptr, *wfds_ptr, *efds_ptr;
> +    struct timespec tv, *tv_ptr;
> +    sigset_t set, *set_ptr;
> +    abi_long ret;
> +
> +    if (rfd_addr) {
> +        if (copy_from_user_fdset(&rfds, rfd_addr, n))
> +            return -TARGET_EFAULT;

Same here, and later on.

> +        rfds_ptr = &rfds;
> +    } else {
> +        rfds_ptr = NULL;
> +    }
> +    if (wfd_addr) {
> +        if (copy_from_user_fdset(&wfds, wfd_addr, n))
> +            return -TARGET_EFAULT;
> +        wfds_ptr = &wfds;
> +    } else {
> +        wfds_ptr = NULL;
> +    }
> +    if (efd_addr) {
> +        if (copy_from_user_fdset(&efds, efd_addr, n))
> +            return -TARGET_EFAULT;
> +        efds_ptr = &efds;
> +    } else {
> +        efds_ptr = NULL;
> +    }
> +
> +    if (target_tv_addr) {
> +        if (copy_from_user_timespec(&tv, target_tv_addr))
> +            return -TARGET_EFAULT;
> +        tv_ptr = &tv;
> +    } else {
> +        tv_ptr = NULL;
> +    }
> +
> +    /* We don't need to return sigmask to target */
> +    if (set_addr) {
> +        target_to_host_old_sigset(&set, &set_addr);
> +        set_ptr = &set;
> +    } else {
> +        set_ptr = NULL;
> +    }
> +
> +    ret = get_errno(pselect(n, rfds_ptr, wfds_ptr, efds_ptr, tv_ptr, set_ptr));
> +
> +    if (!is_error(ret)) {
> +        if (rfd_addr && copy_to_user_fdset(rfd_addr, &rfds, n))
> +            return -TARGET_EFAULT;
> +        if (wfd_addr && copy_to_user_fdset(wfd_addr, &wfds, n))
> +            return -TARGET_EFAULT;
> +        if (efd_addr && copy_to_user_fdset(efd_addr, &efds, n))
> +            return -TARGET_EFAULT;
> +
> +        if (target_tv_addr && copy_to_user_timespec(target_tv_addr, &tv))
> +            return -TARGET_EFAULT;
> +    }
> +
> +    return ret;
> +}
> +#endif
> +
>  static abi_long do_pipe2(int host_pipe[], int flags)
>  {
>  #ifdef CONFIG_PIPE2
> @@ -5186,6 +5287,24 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
>          }
>          break;
>  #endif
> +
> +#ifdef TARGET_NR_pselect6
> +    case TARGET_NR_pselect6:
> +        {
> +            abi_ulong inp, outp, exp, tvp, set;
> +            long nsel;
> +
> +            nsel = tswapl(arg1);
> +            inp = tswapl(arg2);
> +            outp = tswapl(arg3);
> +            exp = tswapl(arg4);
> +            tvp = tswapl(arg5);
> +            set = tswapl(arg6);
> +
> +            ret = do_pselect(nsel, inp, outp, exp, tvp, set);
> +        }
> +        break;
> +#endif
>      case TARGET_NR_symlink:
>          {
>              void *p2;
> -- 
> 1.6.5
> 
> 
> 
>
Paul Brook March 26, 2010, 11:05 p.m. UTC | #2
>This patch adds support for the pselect syscall in linux-user emulation
>and also adds several support functions required to translate the
>timespec structs between the target and the host.

IIUC the whole point of the pselect is that it should be atomic. By emulating 
this in a non-atomic fasion I think you're re-introducing the race condition 
that it is designed to avoid.

Wouldn't it be better to just return ENOSYS and let the guest deal with the 
problem?

Paul
Jamie Lokier March 28, 2010, 6:49 p.m. UTC | #3
Paul Brook wrote:
> >This patch adds support for the pselect syscall in linux-user emulation
> >and also adds several support functions required to translate the
> >timespec structs between the target and the host.
> 
> IIUC the whole point of the pselect is that it should be atomic. By
> emulating this in a non-atomic fasion I think you're re-introducing
> the race condition that it is designed to avoid.
>
> Wouldn't it be better to just return ENOSYS and let the guest deal with the 
> problem?

Imho, it's very important to return ENOSYS if the atomic behaviour is
not provided.

The patch actually calls the host's pselect() - it looks almost ok...

But actually, host's pselect() may be Glibc or some other equally
buggy libc, which "emulates" pselect wrongly.  FreeBSD's pselect has
the same problem going back years - with a very recent patch to
support it in the kernel.  It's not just Glibc.

Careful apps detect and avoid libc's bug by calling the pselect syscall
directly.  If it returns ENOSYS, they just use a different (but
slightly slower) race-free technique, or abort - at least they don't
have a quiet corruption bug.

But here we're providing the kernel syscall to guest apps.  There's no
way for them to detect if it's really atomic or not.  And the ones
written carefully to avoid buggy libc will wrongly use it, because
they think it's a kernel call.

I think it's really important we don't pass on libc's buggy behaviour
to the emulated kernel interface, to even break careful apps.

Solution, Riku: Instead of calling pselect(...), qemu should call
syscall(SYS_pselect, ...), with a comment like:

    /*
     * Glibc and some other libcs unhelpfully "emulate" pselect()
     * using select() when the real system call isn't available, and
     * break the atomic guarantee which is the entire point of
     * pselect().  To avoid these libc bugs, go straight to the host
     * kernel using syscall().  If the host kernel doesn't support
     * pselect() it will return ENOSYS, which is what we want to
     * return to the guest.
     */

There might need to be some header file included to get syscall() and
SYS_pselect, which sometimes has other names like NR_pselect.  And
unfortunately the call arguments aren't quite the same, at least on
Linux.

Same goes for ppoll() if that is ever added.

-- Jamie
diff mbox

Patch

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 80d8633..845bb60 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -850,6 +850,38 @@  static inline abi_long copy_to_user_timeval(abi_ulong target_tv_addr,
     return 0;
 }
 
+static inline abi_long copy_from_user_timespec(struct timespec *ts,
+                                              abi_ulong target_ts_addr)
+{
+    struct target_timespec *target_ts;
+
+    if (!lock_user_struct(VERIFY_READ, target_ts, target_ts_addr, 1))
+        return -TARGET_EFAULT;
+
+    __get_user(ts->tv_sec, &target_ts->tv_sec);
+    __get_user(ts->tv_nsec, &target_ts->tv_nsec);
+
+    unlock_user_struct(target_ts, target_ts_addr, 0);
+
+    return 0;
+}
+
+
+static inline abi_long copy_to_user_timespec(abi_ulong target_ts_addr,
+                                            const struct timespec *ts)
+{
+    struct target_timespec *target_ts;
+
+    if (!lock_user_struct(VERIFY_WRITE, target_ts, target_ts_addr, 0))
+        return -TARGET_EFAULT;
+
+    __put_user(ts->tv_sec, &target_ts->tv_sec);
+    __put_user(ts->tv_nsec, &target_ts->tv_nsec);
+
+    unlock_user_struct(target_ts, target_ts_addr, 1);
+
+    return 0;
+}
 #if defined(TARGET_NR_mq_open) && defined(__NR_mq_open)
 #include <mqueue.h>
 
@@ -949,6 +981,75 @@  static abi_long do_select(int n,
     return ret;
 }
 
+#ifdef TARGET_NR_pselect6
+/* do_pselect() must return target values and target errnos. */
+static abi_long do_pselect(int n,
+                          abi_ulong rfd_addr, abi_ulong wfd_addr,
+                          abi_ulong efd_addr, abi_ulong target_tv_addr,
+                          abi_ulong set_addr)
+{
+    fd_set rfds, wfds, efds;
+    fd_set *rfds_ptr, *wfds_ptr, *efds_ptr;
+    struct timespec tv, *tv_ptr;
+    sigset_t set, *set_ptr;
+    abi_long ret;
+
+    if (rfd_addr) {
+        if (copy_from_user_fdset(&rfds, rfd_addr, n))
+            return -TARGET_EFAULT;
+        rfds_ptr = &rfds;
+    } else {
+        rfds_ptr = NULL;
+    }
+    if (wfd_addr) {
+        if (copy_from_user_fdset(&wfds, wfd_addr, n))
+            return -TARGET_EFAULT;
+        wfds_ptr = &wfds;
+    } else {
+        wfds_ptr = NULL;
+    }
+    if (efd_addr) {
+        if (copy_from_user_fdset(&efds, efd_addr, n))
+            return -TARGET_EFAULT;
+        efds_ptr = &efds;
+    } else {
+        efds_ptr = NULL;
+    }
+
+    if (target_tv_addr) {
+        if (copy_from_user_timespec(&tv, target_tv_addr))
+            return -TARGET_EFAULT;
+        tv_ptr = &tv;
+    } else {
+        tv_ptr = NULL;
+    }
+
+    /* We don't need to return sigmask to target */
+    if (set_addr) {
+        target_to_host_old_sigset(&set, &set_addr);
+        set_ptr = &set;
+    } else {
+        set_ptr = NULL;
+    }
+
+    ret = get_errno(pselect(n, rfds_ptr, wfds_ptr, efds_ptr, tv_ptr, set_ptr));
+
+    if (!is_error(ret)) {
+        if (rfd_addr && copy_to_user_fdset(rfd_addr, &rfds, n))
+            return -TARGET_EFAULT;
+        if (wfd_addr && copy_to_user_fdset(wfd_addr, &wfds, n))
+            return -TARGET_EFAULT;
+        if (efd_addr && copy_to_user_fdset(efd_addr, &efds, n))
+            return -TARGET_EFAULT;
+
+        if (target_tv_addr && copy_to_user_timespec(target_tv_addr, &tv))
+            return -TARGET_EFAULT;
+    }
+
+    return ret;
+}
+#endif
+
 static abi_long do_pipe2(int host_pipe[], int flags)
 {
 #ifdef CONFIG_PIPE2
@@ -5186,6 +5287,24 @@  abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         }
         break;
 #endif
+
+#ifdef TARGET_NR_pselect6
+    case TARGET_NR_pselect6:
+        {
+            abi_ulong inp, outp, exp, tvp, set;
+            long nsel;
+
+            nsel = tswapl(arg1);
+            inp = tswapl(arg2);
+            outp = tswapl(arg3);
+            exp = tswapl(arg4);
+            tvp = tswapl(arg5);
+            set = tswapl(arg6);
+
+            ret = do_pselect(nsel, inp, outp, exp, tvp, set);
+        }
+        break;
+#endif
     case TARGET_NR_symlink:
         {
             void *p2;