diff mbox

[RFC] virtio-9p: Use clone approach to fix TOCTOU vulnerability

Message ID 20110614081244.GB3428@in.ibm.com
State New
Headers show

Commit Message

Mohan Kumar M June 14, 2011, 8:12 a.m. UTC
[RFC PATCH] virtio-9p: Use clone approach to fix TOCTOU vulnerability

In passthrough security model, following a symbolic link in the server
side could result in TOCTTOU vulnerability.

Use clone system call to create a thread which runs in chrooted
environment. All passthrough model file operations are done from this
thread to avoid TOCTTOU vulnerability.

Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
---
 fsdev/file-op-9p.h         |    1 +
 hw/9pfs/virtio-9p-coth.c   |  105 +++++++++++++++++++++++++++++++++++++++++--
 hw/9pfs/virtio-9p-coth.h   |   13 +++++-
 hw/9pfs/virtio-9p-device.c |    7 +++-
 hw/9pfs/virtio-9p.h        |    6 ++-
 5 files changed, 124 insertions(+), 8 deletions(-)

Comments

Stefan Hajnoczi June 15, 2011, 3:24 p.m. UTC | #1
On Tue, Jun 14, 2011 at 9:12 AM, M. Mohan Kumar <mohan@in.ibm.com> wrote:
> [RFC PATCH] virtio-9p: Use clone approach to fix TOCTOU vulnerability
>
> In passthrough security model, following a symbolic link in the server
> side could result in TOCTTOU vulnerability.
>
> Use clone system call to create a thread which runs in chrooted
> environment. All passthrough model file operations are done from this
> thread to avoid TOCTTOU vulnerability.
>
> Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
> Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
> ---
>  fsdev/file-op-9p.h         |    1 +
>  hw/9pfs/virtio-9p-coth.c   |  105 +++++++++++++++++++++++++++++++++++++++++--
>  hw/9pfs/virtio-9p-coth.h   |   13 +++++-
>  hw/9pfs/virtio-9p-device.c |    7 +++-
>  hw/9pfs/virtio-9p.h        |    6 ++-
>  5 files changed, 124 insertions(+), 8 deletions(-)

This patch isn't against upstream virtio-9p.  Please post a link to a
repo or more information.

>
> diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h
> index 5d088d4..c54f6bf 100644
> --- a/fsdev/file-op-9p.h
> +++ b/fsdev/file-op-9p.h
> @@ -60,6 +60,7 @@ typedef struct FsContext
>     SecModel fs_sm;
>     uid_t uid;
>     int open_flags;
> +    int enable_chroot;

Please use bool.  Using int is like using void*, it throws away information.

>     struct xattr_operations **xops;
>     /* fs driver specific data */
>     void *private;
> diff --git a/hw/9pfs/virtio-9p-coth.c b/hw/9pfs/virtio-9p-coth.c
> index e61b656..aa71a83 100644
> --- a/hw/9pfs/virtio-9p-coth.c
> +++ b/hw/9pfs/virtio-9p-coth.c
> @@ -17,6 +17,8 @@
>  #include "qemu-thread.h"
>  #include "qemu-coroutine.h"
>  #include "virtio-9p-coth.h"
> +#include <sys/syscall.h>

Do you need this header?  Please include system headers first, then
QEMU headers.

> +#include "qemu-error.h"
>
>  /* v9fs glib thread pool */
>  static V9fsThPool v9fs_pool;
> @@ -56,7 +58,96 @@ static void v9fs_thread_routine(gpointer data, gpointer user_data)
>     } while (len == -1 && errno == EINTR);
>  }
>
> -int v9fs_init_worker_threads(void)
> +static int v9fs_chroot_function(void *arg)
> +{
> +    GError *err;
> +    V9fsChrootState *csp = arg;
> +    FsContext *fs_ctx = csp->fs_ctx;
> +    V9fsThPool *p = &v9fs_pool;
> +    int notifier_fds[2];
> +

Must acquire cs->mutex before calling any QEMU functions here -
otherwise this thread runs concurrently with QEMU threads and could
lead to race conditions.

> +    if (chroot(fs_ctx->fs_root) < 0) {
> +        error_report("chroot");
> +        goto error;
> +    }
> +
> +    if (qemu_pipe(notifier_fds) == -1) {
> +        return -1;
> +    }
> +    p->pool = g_thread_pool_new(v9fs_thread_routine, p, 10, TRUE, &err);
> +    if (!p->pool) {
> +        error_report("g_thread_pool_new");
> +        goto error;
> +    }
> +
> +    p->completed = g_async_queue_new();
> +    if (!p->completed) {
> +        /*
> +         * We are going to terminate.
> +         * So don't worry about cleanup
> +         */
> +        goto error;
> +    }
> +    p->rfd = notifier_fds[0];
> +    p->wfd = notifier_fds[1];
> +
> +    fcntl(p->rfd, F_SETFL, O_NONBLOCK);
> +    fcntl(p->wfd, F_SETFL, O_NONBLOCK);
> +
> +    qemu_set_fd_handler(p->rfd, v9fs_qemu_process_req_done, NULL, NULL);

These should be in a common function that v9fs_init_worker_threads()
can call instead of copy-paste.

> +
> +    /* Signal parent thread that thread pools are created */
> +    g_cond_signal(csp->cond);

Now cs->mutex can be released and the QEMU thread can continue.

> +    g_mutex_lock(csp->mutex_term);
> +    /* TODO: Wake up cs->terminate during 9p hotplug support */
> +    g_cond_wait(csp->terminate, csp->mutex);
> +    g_mutex_unlock(csp->mutex_term);
> +    g_mutex_free(csp->mutex);
> +    g_cond_free(csp->cond);
> +    g_mutex_free(csp->mutex_term);
> +    g_cond_free(csp->terminate);
> +    qemu_free(csp->stack);

There goes my stack!  It's only safe to free this in the QEMU thread
that signalled, but you need a way to join this thread.

Did you try syscall(SYS_exit, 0) instead?  I think this would make
cleanup much easier and you wouldn't have to juggle around many heap
allocated resources.

> +    qemu_free(csp);
> +    return 0;
> +error:
> +    p->pool = NULL;
> +    g_cond_signal(csp->cond);
> +    return 0;
> +}
> +
> +static int v9fs_clone_chroot_th(FsContext *fs_ctx)
> +{
> +    pid_t pid;
> +    V9fsChrootState *cs;
> +
> +    cs = qemu_malloc(sizeof(*cs));
> +    cs->stack = qemu_malloc(THREAD_STACK);
> +    cs->mutex = g_mutex_new();
> +    cs->cond = g_cond_new();
> +    cs->mutex_term = g_mutex_new();
> +    cs->terminate = g_cond_new();
> +    cs->fs_ctx = fs_ctx;
> +
> +    g_mutex_lock(cs->mutex);
> +    pid = clone(&v9fs_chroot_function, cs->stack + THREAD_STACK, CLONE_FILES |
> +                                CLONE_SIGHAND | CLONE_VM | CLONE_THREAD, cs);

I'm a little concerned that thread-local storage is going to be broken
and glib will act weird, but I don't know the implementation details.

> +    if (pid == -1) {
> +        error_report("clone");
> +        g_mutex_unlock(cs->mutex);
> +        g_mutex_free(cs->mutex);
> +        g_cond_free(cs->cond);
> +        g_mutex_free(cs->mutex_term);
> +        g_cond_free(cs->terminate);
> +        qemu_free(cs->stack);
> +        qemu_free(cs);
> +        return pid;

You can avoid all the freeing by only creating the stack and mutex
before cloning.  If clone(2) fails you only need to free the stack,
mutex, and cs.  Allocate the rest after clone has succeeded but before
unlocking cs->mutex.

Stefan
Stefan Hajnoczi June 15, 2011, 5:35 p.m. UTC | #2
On Tue, Jun 14, 2011 at 9:12 AM, M. Mohan Kumar <mohan@in.ibm.com> wrote:
> [RFC PATCH] virtio-9p: Use clone approach to fix TOCTOU vulnerability
>
> In passthrough security model, following a symbolic link in the server
> side could result in TOCTTOU vulnerability.
>
> Use clone system call to create a thread which runs in chrooted
> environment. All passthrough model file operations are done from this
> thread to avoid TOCTTOU vulnerability.

How will chroot(2) work when QEMU runs as non-root (i.e. secure
production environments)?

Stefan
jvrao June 15, 2011, 6:16 p.m. UTC | #3
On 06/15/2011 10:35 AM, Stefan Hajnoczi wrote:
> On Tue, Jun 14, 2011 at 9:12 AM, M. Mohan Kumar<mohan@in.ibm.com>  wrote:
>> [RFC PATCH] virtio-9p: Use clone approach to fix TOCTOU vulnerability
>>
>> In passthrough security model, following a symbolic link in the server
>> side could result in TOCTTOU vulnerability.
>>
>> Use clone system call to create a thread which runs in chrooted
>> environment. All passthrough model file operations are done from this
>> thread to avoid TOCTTOU vulnerability.
> How will chroot(2) work when QEMU runs as non-root (i.e. secure
> production environments)?
>
This is used only in passthrough mode; passthrough mode needs root 
access by design.
There is no TOCTTOU vulnerability in mapped mode as symlinks are not 
actual symlinks on host FS.

JV

> Stefan
Andreas Färber June 15, 2011, 8:10 p.m. UTC | #4
Am 14.06.2011 um 10:12 schrieb M. Mohan Kumar:

> [RFC PATCH] virtio-9p: Use clone approach to fix TOCTOU vulnerability

Subject doesn't need to be duplicated.

> In passthrough security model, following a symbolic link in the server
> side could result in TOCTTOU vulnerability.

TOCTOU or TOCTTOU? Don't know what either is, so probably others too -  
that acronym could use an explanation or link to CVE/etc.

Andreas

> Use clone system call to create a thread which runs in chrooted
> environment. All passthrough model file operations are done from this
> thread to avoid TOCTTOU vulnerability.
>
> Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
> Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
> ---
[...]
Stefan Hajnoczi June 16, 2011, 5:16 a.m. UTC | #5
On Wed, Jun 15, 2011 at 7:16 PM, Venkateswararao Jujjuri
<jvrao@linux.vnet.ibm.com> wrote:
> On 06/15/2011 10:35 AM, Stefan Hajnoczi wrote:
>>
>> On Tue, Jun 14, 2011 at 9:12 AM, M. Mohan Kumar<mohan@in.ibm.com>  wrote:
>>>
>>> [RFC PATCH] virtio-9p: Use clone approach to fix TOCTOU vulnerability
>>>
>>> In passthrough security model, following a symbolic link in the server
>>> side could result in TOCTTOU vulnerability.
>>>
>>> Use clone system call to create a thread which runs in chrooted
>>> environment. All passthrough model file operations are done from this
>>> thread to avoid TOCTTOU vulnerability.
>>
>> How will chroot(2) work when QEMU runs as non-root (i.e. secure
>> production environments)?
>>
> This is used only in passthrough mode; passthrough mode needs root access by
> design.
> There is no TOCTTOU vulnerability in mapped mode as symlinks are not actual
> symlinks on host FS.

So is passthrough mode something you only expect developers and
one-off command-line users to try?  I expect users would not want to
run QEMU as root in production.

Regarding mapped mode, I think jailing problems still exist there
since the guest could send a path that contains "../../../../.." and
escape the fs_root?

Stefan
Mohan Kumar M June 16, 2011, 11:20 a.m. UTC | #6
On Wed, Jun 15, 2011 at 10:10:00PM +0200, Andreas Färber wrote:
> Am 14.06.2011 um 10:12 schrieb M. Mohan Kumar:
>
>> [RFC PATCH] virtio-9p: Use clone approach to fix TOCTOU vulnerability
>
> Subject doesn't need to be duplicated.

Ok
>
>> In passthrough security model, following a symbolic link in the server
>> side could result in TOCTTOU vulnerability.
>
> TOCTOU or TOCTTOU? Don't know what either is, so probably others too -  
> that acronym could use an explanation or link to CVE/etc.

Its TOCTTOU (Time of check to time of usage). Sure next time I will include some more information about this.
Mohan Kumar M June 16, 2011, 11:28 a.m. UTC | #7
On Wed, Jun 15, 2011 at 04:24:12PM +0100, Stefan Hajnoczi wrote:
> On Tue, Jun 14, 2011 at 9:12 AM, M. Mohan Kumar <mohan@in.ibm.com> wrote:
> > [RFC PATCH] virtio-9p: Use clone approach to fix TOCTOU vulnerability
> >
> > In passthrough security model, following a symbolic link in the server
> > side could result in TOCTTOU vulnerability.
> >
> > Use clone system call to create a thread which runs in chrooted
> > environment. All passthrough model file operations are done from this
> > thread to avoid TOCTTOU vulnerability.
> >
> > Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
> > Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
> > ---
> >  fsdev/file-op-9p.h         |    1 +
> >  hw/9pfs/virtio-9p-coth.c   |  105 +++++++++++++++++++++++++++++++++++++++++--
> >  hw/9pfs/virtio-9p-coth.h   |   13 +++++-
> >  hw/9pfs/virtio-9p-device.c |    7 +++-
> >  hw/9pfs/virtio-9p.h        |    6 ++-
> >  5 files changed, 124 insertions(+), 8 deletions(-)
> 
> This patch isn't against upstream virtio-9p.  Please post a link to a
> repo or more information.

Hi Stefan,
Thanks for the detailed review. I will address review comments in next
version.
diff mbox

Patch

diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h
index 5d088d4..c54f6bf 100644
--- a/fsdev/file-op-9p.h
+++ b/fsdev/file-op-9p.h
@@ -60,6 +60,7 @@  typedef struct FsContext
     SecModel fs_sm;
     uid_t uid;
     int open_flags;
+    int enable_chroot;
     struct xattr_operations **xops;
     /* fs driver specific data */
     void *private;
diff --git a/hw/9pfs/virtio-9p-coth.c b/hw/9pfs/virtio-9p-coth.c
index e61b656..aa71a83 100644
--- a/hw/9pfs/virtio-9p-coth.c
+++ b/hw/9pfs/virtio-9p-coth.c
@@ -17,6 +17,8 @@ 
 #include "qemu-thread.h"
 #include "qemu-coroutine.h"
 #include "virtio-9p-coth.h"
+#include <sys/syscall.h>
+#include "qemu-error.h"
 
 /* v9fs glib thread pool */
 static V9fsThPool v9fs_pool;
@@ -56,7 +58,96 @@  static void v9fs_thread_routine(gpointer data, gpointer user_data)
     } while (len == -1 && errno == EINTR);
 }
 
-int v9fs_init_worker_threads(void)
+static int v9fs_chroot_function(void *arg)
+{
+    GError *err;
+    V9fsChrootState *csp = arg;
+    FsContext *fs_ctx = csp->fs_ctx;
+    V9fsThPool *p = &v9fs_pool;
+    int notifier_fds[2];
+
+    if (chroot(fs_ctx->fs_root) < 0) {
+        error_report("chroot");
+        goto error;
+    }
+
+    if (qemu_pipe(notifier_fds) == -1) {
+        return -1;
+    }
+    p->pool = g_thread_pool_new(v9fs_thread_routine, p, 10, TRUE, &err);
+    if (!p->pool) {
+        error_report("g_thread_pool_new");
+        goto error;
+    }
+
+    p->completed = g_async_queue_new();
+    if (!p->completed) {
+        /*
+         * We are going to terminate.
+         * So don't worry about cleanup
+         */
+        goto error;
+    }
+    p->rfd = notifier_fds[0];
+    p->wfd = notifier_fds[1];
+
+    fcntl(p->rfd, F_SETFL, O_NONBLOCK);
+    fcntl(p->wfd, F_SETFL, O_NONBLOCK);
+
+    qemu_set_fd_handler(p->rfd, v9fs_qemu_process_req_done, NULL, NULL);
+
+    /* Signal parent thread that thread pools are created */
+    g_cond_signal(csp->cond);
+    g_mutex_lock(csp->mutex_term);
+    /* TODO: Wake up cs->terminate during 9p hotplug support */
+    g_cond_wait(csp->terminate, csp->mutex);
+    g_mutex_unlock(csp->mutex_term);
+    g_mutex_free(csp->mutex);
+    g_cond_free(csp->cond);
+    g_mutex_free(csp->mutex_term);
+    g_cond_free(csp->terminate);
+    qemu_free(csp->stack);
+    qemu_free(csp);
+    return 0;
+error:
+    p->pool = NULL;
+    g_cond_signal(csp->cond);
+    return 0;
+}
+
+static int v9fs_clone_chroot_th(FsContext *fs_ctx)
+{
+    pid_t pid;
+    V9fsChrootState *cs;
+
+    cs = qemu_malloc(sizeof(*cs));
+    cs->stack = qemu_malloc(THREAD_STACK);
+    cs->mutex = g_mutex_new();
+    cs->cond = g_cond_new();
+    cs->mutex_term = g_mutex_new();
+    cs->terminate = g_cond_new();
+    cs->fs_ctx = fs_ctx;
+
+    g_mutex_lock(cs->mutex);
+    pid = clone(&v9fs_chroot_function, cs->stack + THREAD_STACK, CLONE_FILES |
+                                CLONE_SIGHAND | CLONE_VM | CLONE_THREAD, cs);
+    if (pid == -1) {
+        error_report("clone");
+        g_mutex_unlock(cs->mutex);
+        g_mutex_free(cs->mutex);
+        g_cond_free(cs->cond);
+        g_mutex_free(cs->mutex_term);
+        g_cond_free(cs->terminate);
+        qemu_free(cs->stack);
+        qemu_free(cs);
+        return pid;
+    }
+    g_cond_wait(cs->cond, cs->mutex);
+    g_mutex_unlock(cs->mutex);
+    return pid;
+}
+
+int v9fs_init_worker_threads(FsContext *fs_ctx)
 {
     int notifier_fds[2];
     V9fsThPool *p = &v9fs_pool;
@@ -66,14 +157,18 @@  int v9fs_init_worker_threads(void)
         g_thread_init(NULL);
     }
 
-    if (qemu_pipe(notifier_fds) == -1) {
-        return -1;
+    if (fs_ctx->enable_chroot) {
+        return v9fs_clone_chroot_th(fs_ctx);
+    } else {
+        p->pool = g_thread_pool_new(v9fs_thread_routine, p, -1, FALSE, NULL);
     }
-
-    p->pool = g_thread_pool_new(v9fs_thread_routine, p, -1, FALSE, NULL);
     if (!p->pool) {
         return -1;
     }
+    if (qemu_pipe(notifier_fds) == -1) {
+        return -1;
+    }
+
     p->completed = g_async_queue_new();
     if (!p->completed) {
         /*
diff --git a/hw/9pfs/virtio-9p-coth.h b/hw/9pfs/virtio-9p-coth.h
index 4630080..422354a 100644
--- a/hw/9pfs/virtio-9p-coth.h
+++ b/hw/9pfs/virtio-9p-coth.h
@@ -20,6 +20,8 @@ 
 #include "virtio-9p.h"
 #include <glib.h>
 
+#define THREAD_STACK (16 * 1024)
+
 typedef struct V9fsThPool {
     int rfd;
     int wfd;
@@ -27,6 +29,15 @@  typedef struct V9fsThPool {
     GAsyncQueue *completed;
 } V9fsThPool;
 
+typedef struct V9fsChrootState {
+    FsContext *fs_ctx;
+    GMutex *mutex;
+    GCond *cond;
+    GMutex *mutex_term;
+    GCond *terminate;
+    void *stack;
+} V9fsChrootState;
+
 /*
  * we want to use bottom half because we want to make sure the below
  * sequence of events.
@@ -55,7 +66,7 @@  typedef struct V9fsThPool {
     } while (0)
 
 extern void co_run_in_worker_bh(void *);
-extern int v9fs_init_worker_threads(void);
+extern int v9fs_init_worker_threads(FsContext *fs_ctx);
 extern int v9fs_co_readlink(V9fsPDU *, V9fsPath *, V9fsString *);
 extern int v9fs_co_readdir_r(V9fsPDU *, V9fsFidState *,
                            struct dirent *, struct dirent **result);
diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
index c7a7f44..5e2cc5a 100644
--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -82,10 +82,15 @@  VirtIODevice *virtio_9p_init(DeviceState *dev, V9fsConf *conf)
         exit(1);
     }
 
+    s->ctx.enable_chroot = 0;
+
     if (!strcmp(fse->security_model, "passthrough")) {
         /* Files on the Fileserver set to client user credentials */
         s->ctx.fs_sm = SM_PASSTHROUGH;
         s->ctx.xops = passthrough_xattr_ops;
+
+        /* TODO: Add a configuration option to enable this */
+        s->ctx.enable_chroot = 1;
     } else if (!strcmp(fse->security_model, "mapped")) {
         /* Files on the fileserver are set to QEMU credentials.
          * Client user credentials are saved in extended attributes.
@@ -146,7 +151,7 @@  VirtIODevice *virtio_9p_init(DeviceState *dev, V9fsConf *conf)
                 " and export path:%s\n", conf->fsdev_id, s->ctx.fs_root);
         exit(1);
     }
-    if (v9fs_init_worker_threads() < 0) {
+    if (v9fs_init_worker_threads(&s->ctx) < 0) {
         fprintf(stderr, "worker thread initialization failed\n");
         exit(1);
     }
diff --git a/hw/9pfs/virtio-9p.h b/hw/9pfs/virtio-9p.h
index 9e3d4d3..ffe4f60 100644
--- a/hw/9pfs/virtio-9p.h
+++ b/hw/9pfs/virtio-9p.h
@@ -113,7 +113,11 @@  enum p9_proto_version {
 #define FID_NON_RECLAIMABLE     0x2
 static inline const char *rpath(FsContext *ctx, const char *path, char *buffer)
 {
-    snprintf(buffer, PATH_MAX, "%s/%s", ctx->fs_root, path);
+    if (ctx->enable_chroot) {
+        snprintf(buffer, PATH_MAX, "%s", path);
+    } else {
+        snprintf(buffer, PATH_MAX, "%s/%s", ctx->fs_root, path);
+    }
     return buffer;
 }