diff mbox series

[v2,1/1] linux-user: add openat2 support in linux-user

Message ID 20240829144413.6942-4-mvogt@redhat.com
State New
Headers show
Series linux-user: add openat2 support in linux-user | expand

Commit Message

Michael Vogt Aug. 29, 2024, 2:44 p.m. UTC
This commit adds support for the `openat2()` syscall in the
`linux-user` userspace emulator.

It is implemented by extracting a new helper `maybe_do_fake_open()`
out of the exiting `do_guest_openat()` and share that with the
new `do_guest_openat2()`. Unfortunatly we cannot just make
do_guest_openat2() a superset of do_guest_openat() because the
openat2() syscall is stricter with the argument checking and
will return an error for invalid flags or mode combinations (which
open()/openat() will ignore).

Note that in this commit using openat2() for a "faked" file in
/proc will ignore the "resolve" flags. This is not great but it
seems similar to the exiting behavior when openat() is called
with a dirfd to "/proc". Here too the fake file lookup may
not catch the special file because "realpath()" is used to
determine if the path is in /proc. Alternatively to ignoring
we could simply fail with `-TARGET_ENOSYS` (or similar) if
`resolve` flags are passed and we found something that looks
like a file in /proc that needs faking.

Signed-off-by: Michael Vogt <mvogt@redhat.com>
Buglink: https://github.com/osbuild/bootc-image-builder/issues/619
---
 linux-user/syscall.c      | 98 +++++++++++++++++++++++++++++++++++++--
 linux-user/syscall_defs.h |  7 +++
 meson.build               |  1 +
 3 files changed, 102 insertions(+), 4 deletions(-)

Comments

Richard Henderson Aug. 30, 2024, 1:50 a.m. UTC | #1
On 8/30/24 00:44, Michael Vogt wrote:
> +static int maybe_do_fake_open(CPUArchState *cpu_env, int dirfd,
> +                              const char *fname, int flags, mode_t mode,
> +                              bool safe, bool *use_returned_fd)
>   {
>       g_autofree char *proc_name = NULL;
>       const char *pathname;
> @@ -8362,6 +8371,7 @@ int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname,
>   #endif
>           { NULL, NULL, NULL }
>       };
> +    *use_returned_fd = true;
>   
>       /* if this is a file from /proc/ filesystem, expand full name */
>       proc_name = realpath(fname, NULL);
> @@ -8418,12 +8428,87 @@ int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname,
>           return fd;
>       }
>   
> +    *use_returned_fd = false;
> +    return -1;
> +}

Why is -1 insufficient for signalling "do not use"?


> +
> +int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname,
> +                    int flags, mode_t mode, bool safe)
> +{
> +    bool use_returned_fd;
> +    int fd = maybe_do_fake_open(cpu_env, dirfd, fname, flags, mode, safe,
> +                                &use_returned_fd);
> +    if (use_returned_fd)
> +        return fd;

Braces are required.  Use scripts/checkpatch.pl.

> +#ifdef HAVE_OPENAT2_H
> +static int do_guest_openat2(CPUArchState *cpu_env, int dirfd, const char *fname,
> +                            struct open_how *how)
> +{
> +    /*
> +     * Ideally we would pass "how->resolve" flags into this helper too but
> +     * the lookup for files that need faking is based on "realpath()" so
> +     * neither a dirfd for "proc" nor restrictions via "resolve" flags can
> +     * be honored right now.
> +     */
> +    bool use_returned_fd;
> +    int fd = maybe_do_fake_open(cpu_env, dirfd, fname, how->flags, how->mode,
> +                                true, &use_returned_fd);
> +    if (use_returned_fd)
> +        return fd;
> +
> +    return safe_openat2(dirfd, fname, how, sizeof(struct open_how));
> +}

I don't think this needs to be a separate function.
We did that for do_guest_openat for gdbstub.

> +
> +static int do_openat2(CPUArchState *cpu_env, abi_long arg1, abi_long arg2,
> +                      abi_long arg3, abi_long arg4)

You might as well name the arguments properly, and use abi_ptr/abi_ulong where it makes sense.

> +{
> +    struct open_how how = {0};
> +    struct target_open_how *target_how = NULL;
> +    int ret;
> +
> +    char *p = lock_user_string(arg2);
> +    if (!p) {
> +        ret = -TARGET_EFAULT;
> +        goto out;
> +    }
> +    if (!(lock_user_struct(VERIFY_READ, target_how, arg3, 1))) {
> +        ret = -TARGET_EFAULT;
> +        goto out;
> +    }
> +    size_t target_open_how_struct_size = arg4;
> +    if (target_open_how_struct_size < sizeof(struct target_open_how)) {
> +        ret = -TARGET_EINVAL;
> +        goto out;
> +    }

These checks should be in the same order as the kernel:

SYSCALL_DEFINE(openat2)
     usize < HOW_SIZE_VER0 -> EINVAL
     copy_struct_from_user(how) -> E2BIG

all come before examining the path argument.

> +    if (target_open_how_struct_size > sizeof(struct target_open_how)) {
> +        qemu_log_mask(LOG_UNIMP, "Unimplemented openat2 open_how size: %lu\n",
> +                      target_open_how_struct_size);
> +        ret = -TARGET_E2BIG;
> +        goto out;
>       }

 From copy_struct_from_user you're missing

         } else if (usize > ksize) {
                 int ret = check_zeroed_user(src + size, rest);
                 if (ret <= 0)
                         return ret ?: -E2BIG;

It's not just testing the size, it's reading the following bytes and checking for zeros.

It would be worth adding a helper function for this, matching the kernel.  I'm sure there 
are other places in linux-user that should be doing the same thing.


> +    how.flags = target_to_host_bitmask(target_how->flags, fcntl_flags_tbl);
> +    how.mode = tswap64(target_how->mode);
> +    how.resolve = tswap64(target_how->resolve);

With a linux-user copy_struct_from_user, I expect you'd swap in place:

     how.mode = tswap64(how.mode);

etc.


r~
diff mbox series

Patch

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 9d5415674d..c241c6d3a0 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -85,6 +85,10 @@ 
 #include <sys/kcov.h>
 #endif
 
+#ifdef HAVE_OPENAT2_H
+#include <linux/openat2.h>
+#endif
+
 #define termios host_termios
 #define winsize host_winsize
 #define termio host_termio
@@ -653,6 +657,10 @@  safe_syscall3(ssize_t, read, int, fd, void *, buff, size_t, count)
 safe_syscall3(ssize_t, write, int, fd, const void *, buff, size_t, count)
 safe_syscall4(int, openat, int, dirfd, const char *, pathname, \
               int, flags, mode_t, mode)
+#ifdef HAVE_OPENAT2_H
+safe_syscall4(int, openat2, int, dirfd, const char *, pathname, \
+              const struct open_how *, how, size_t, size)
+#endif
 #if defined(TARGET_NR_wait4) || defined(TARGET_NR_waitpid)
 safe_syscall4(pid_t, wait4, pid_t, pid, int *, status, int, options, \
               struct rusage *, rusage)
@@ -8334,8 +8342,9 @@  static int open_net_route(CPUArchState *cpu_env, int fd)
 }
 #endif
 
-int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname,
-                    int flags, mode_t mode, bool safe)
+static int maybe_do_fake_open(CPUArchState *cpu_env, int dirfd,
+                              const char *fname, int flags, mode_t mode,
+                              bool safe, bool *use_returned_fd)
 {
     g_autofree char *proc_name = NULL;
     const char *pathname;
@@ -8362,6 +8371,7 @@  int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname,
 #endif
         { NULL, NULL, NULL }
     };
+    *use_returned_fd = true;
 
     /* if this is a file from /proc/ filesystem, expand full name */
     proc_name = realpath(fname, NULL);
@@ -8418,12 +8428,87 @@  int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname,
         return fd;
     }
 
+    *use_returned_fd = false;
+    return -1;
+}
+
+int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname,
+                    int flags, mode_t mode, bool safe)
+{
+    bool use_returned_fd;
+    int fd = maybe_do_fake_open(cpu_env, dirfd, fname, flags, mode, safe,
+                                &use_returned_fd);
+    if (use_returned_fd)
+        return fd;
+
     if (safe) {
-        return safe_openat(dirfd, path(pathname), flags, mode);
+        return safe_openat(dirfd, path(fname), flags, mode);
     } else {
-        return openat(dirfd, path(pathname), flags, mode);
+        return openat(dirfd, path(fname), flags, mode);
+    }
+}
+
+#ifdef HAVE_OPENAT2_H
+static int do_guest_openat2(CPUArchState *cpu_env, int dirfd, const char *fname,
+                            struct open_how *how)
+{
+    /*
+     * Ideally we would pass "how->resolve" flags into this helper too but
+     * the lookup for files that need faking is based on "realpath()" so
+     * neither a dirfd for "proc" nor restrictions via "resolve" flags can
+     * be honored right now.
+     */
+    bool use_returned_fd;
+    int fd = maybe_do_fake_open(cpu_env, dirfd, fname, how->flags, how->mode,
+                                true, &use_returned_fd);
+    if (use_returned_fd)
+        return fd;
+
+    return safe_openat2(dirfd, fname, how, sizeof(struct open_how));
+}
+
+static int do_openat2(CPUArchState *cpu_env, abi_long arg1, abi_long arg2,
+                      abi_long arg3, abi_long arg4)
+{
+    struct open_how how = {0};
+    struct target_open_how *target_how = NULL;
+    int ret;
+
+    char *p = lock_user_string(arg2);
+    if (!p) {
+        ret = -TARGET_EFAULT;
+        goto out;
+    }
+    if (!(lock_user_struct(VERIFY_READ, target_how, arg3, 1))) {
+        ret = -TARGET_EFAULT;
+        goto out;
+    }
+    size_t target_open_how_struct_size = arg4;
+    if (target_open_how_struct_size < sizeof(struct target_open_how)) {
+        ret = -TARGET_EINVAL;
+        goto out;
+    }
+    if (target_open_how_struct_size > sizeof(struct target_open_how)) {
+        qemu_log_mask(LOG_UNIMP, "Unimplemented openat2 open_how size: %lu\n",
+                      target_open_how_struct_size);
+        ret = -TARGET_E2BIG;
+        goto out;
     }
+
+    how.flags = target_to_host_bitmask(target_how->flags, fcntl_flags_tbl);
+    how.mode = tswap64(target_how->mode);
+    how.resolve = tswap64(target_how->resolve);
+    ret = get_errno(do_guest_openat2(cpu_env, arg1, p, &how));
+
+    fd_trans_unregister(ret);
+ out:
+    if (target_how)
+        unlock_user_struct(target_how, arg3, 0);
+    if (p)
+        unlock_user(p, arg2, 0);
+    return ret;
 }
+#endif
 
 ssize_t do_guest_readlink(const char *pathname, char *buf, size_t bufsiz)
 {
@@ -9197,6 +9282,11 @@  static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1,
         fd_trans_unregister(ret);
         unlock_user(p, arg2, 0);
         return ret;
+#if defined(TARGET_NR_openat2) && defined(HAVE_OPENAT2_H)
+    case TARGET_NR_openat2:
+        ret = do_openat2(cpu_env, arg1, arg2, arg3, arg4);
+        return ret;
+#endif
 #if defined(TARGET_NR_name_to_handle_at) && defined(CONFIG_OPEN_BY_HANDLE)
     case TARGET_NR_name_to_handle_at:
         ret = do_name_to_handle_at(arg1, arg2, arg3, arg4, arg5);
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index a00b617cae..2a79ae13c9 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -2754,4 +2754,11 @@  struct target_sched_param {
     abi_int sched_priority;
 };
 
+/* from kernel's include/uapi/linux/openat2.h */
+struct target_open_how {
+    abi_ullong flags;
+    abi_ullong mode;
+    abi_ullong resolve;
+};
+
 #endif
diff --git a/meson.build b/meson.build
index fbda17c987..220ccbcbe6 100644
--- a/meson.build
+++ b/meson.build
@@ -2465,6 +2465,7 @@  config_host_data.set('CONFIG_LINUX_MAGIC_H', cc.has_header('linux/magic.h'))
 config_host_data.set('CONFIG_VALGRIND_H', cc.has_header('valgrind/valgrind.h'))
 config_host_data.set('HAVE_BTRFS_H', cc.has_header('linux/btrfs.h'))
 config_host_data.set('HAVE_DRM_H', cc.has_header('libdrm/drm.h'))
+config_host_data.set('HAVE_OPENAT2_H', cc.has_header('linux/openat2.h'))
 config_host_data.set('HAVE_PTY_H', cc.has_header('pty.h'))
 config_host_data.set('HAVE_SYS_DISK_H', cc.has_header('sys/disk.h'))
 config_host_data.set('HAVE_SYS_IOCCOM_H', cc.has_header('sys/ioccom.h'))