diff mbox series

[RFC,v4,07/49] multi-process: define mpqemu-link object

Message ID b0bc2a517b0a41eb138ed4127aebe8a3952daec6.1571905346.git.jag.raman@oracle.com
State New
Headers show
Series Initial support of multi-process qemu | expand

Commit Message

Jag Raman Oct. 24, 2019, 9:08 a.m. UTC
Defines mpqemu-link object which forms the communication link between
QEMU & emulation program.
Adds functions to configure members of mpqemu-link object instance.
Adds functions to send and receive messages over the communication
channel.
Adds GMainLoop to handle events received on the communication channel.

Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
---
 v1 -> v2:
   - Use default context for main loop instead of a new context

 v2 -> v3:
   - Enabled multi-channel support in the communication link

 v3 -> v4:
  - Change the name of proxy-link to mpqemu-link
  - Use separate locks for sending and receiving messages

 include/io/mpqemu-link.h | 150 +++++++++++++++++++++++
 io/Makefile.objs         |   2 +
 io/mpqemu-link.c         | 309 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 461 insertions(+)
 create mode 100644 include/io/mpqemu-link.h
 create mode 100644 io/mpqemu-link.c

Comments

Stefan Hajnoczi Nov. 11, 2019, 4:41 p.m. UTC | #1
On Thu, Oct 24, 2019 at 05:08:48AM -0400, Jagannathan Raman wrote:
> +int mpqemu_msg_recv(MPQemuLinkState *s, MPQemuMsg *msg, MPQemuChannel *chan)
> +{
> +    int rc;
> +    uint8_t *data;
> +    union {
> +        char control[CMSG_SPACE(REMOTE_MAX_FDS * sizeof(int))];
> +        struct cmsghdr align;
> +    } u;
> +    struct msghdr hdr;
> +    struct cmsghdr *chdr;
> +    size_t fdsize;
> +    int sock = chan->sock;
> +    QemuMutex *lock = &chan->recv_lock;
> +
> +    struct iovec iov = {
> +        .iov_base = (char *) msg,
> +        .iov_len = MPQEMU_MSG_HDR_SIZE,
> +    };
> +
> +    memset(&hdr, 0, sizeof(hdr));
> +    memset(&u, 0, sizeof(u));
> +
> +    hdr.msg_iov = &iov;
> +    hdr.msg_iovlen = 1;
> +    hdr.msg_control = &u;
> +    hdr.msg_controllen = sizeof(u);
> +
> +    qemu_mutex_lock(lock);
> +
> +    do {
> +        rc = recvmsg(sock, &hdr, 0);
> +    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
> +
> +    if (rc < 0) {
> +        qemu_log_mask(LOG_REMOTE_DEBUG, "%s - recvmsg rc is %d, errno is %d,"
> +                      " sock %d\n", __func__, rc, errno, sock);
> +        qemu_mutex_unlock(lock);
> +        return rc;
> +    }
> +
> +    msg->num_fds = 0;
> +    for (chdr = CMSG_FIRSTHDR(&hdr); chdr != NULL;
> +         chdr = CMSG_NXTHDR(&hdr, chdr)) {
> +        if ((chdr->cmsg_level == SOL_SOCKET) &&
> +            (chdr->cmsg_type == SCM_RIGHTS)) {
> +            fdsize = chdr->cmsg_len - CMSG_LEN(0);
> +            msg->num_fds = fdsize / sizeof(int);
> +            if (msg->num_fds > REMOTE_MAX_FDS) {
> +                /*
> +                 * TODO: Security issue detected. Sender never sends more
> +                 * than REMOTE_MAX_FDS. This condition should be signaled to
> +                 * the admin
> +                 */
> +                qemu_log_mask(LOG_REMOTE_DEBUG, "%s: Max FDs exceeded\n", __func__);
> +                return -ERANGE;
> +            }
> +
> +            memcpy(msg->fds, CMSG_DATA(chdr), fdsize);
> +            break;
> +        }
> +    }
> +
> +    if (msg->size && msg->bytestream) {
> +        msg->data2 = calloc(1, msg->size);
> +        data = msg->data2;
> +    } else {
> +        data = (uint8_t *)&msg->data1;
> +    }
> +
> +    if (msg->size) {
> +        do {
> +            rc = read(sock, data, msg->size);
> +        } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
> +    }
> +
> +    qemu_mutex_unlock(lock);
> +
> +    return rc;
> +}

This code is still insecure.  Until the communication between processes
is made secure this series does not meet its goal of providing process
isolation.

1. An attacker can overflow msg->data1 easily by setting msg->size but
   not msg->bytestream.
2. An attacker can allocate data2, all mpqemu_msg_recv() callers
   need to free it to prevent memory leaks.
3. mpqemu_msg_recv() callers generally do not validate untrusted msg
   fields.  All the code needs to be audited.

Stefan
Jag Raman Nov. 13, 2019, 3:47 p.m. UTC | #2
On 11/11/2019 11:41 AM, Stefan Hajnoczi wrote:
> On Thu, Oct 24, 2019 at 05:08:48AM -0400, Jagannathan Raman wrote:
>> +int mpqemu_msg_recv(MPQemuLinkState *s, MPQemuMsg *msg, MPQemuChannel *chan)
>> +{
>> +    int rc;
>> +    uint8_t *data;
>> +    union {
>> +        char control[CMSG_SPACE(REMOTE_MAX_FDS * sizeof(int))];
>> +        struct cmsghdr align;
>> +    } u;
>> +    struct msghdr hdr;
>> +    struct cmsghdr *chdr;
>> +    size_t fdsize;
>> +    int sock = chan->sock;
>> +    QemuMutex *lock = &chan->recv_lock;
>> +
>> +    struct iovec iov = {
>> +        .iov_base = (char *) msg,
>> +        .iov_len = MPQEMU_MSG_HDR_SIZE,
>> +    };
>> +
>> +    memset(&hdr, 0, sizeof(hdr));
>> +    memset(&u, 0, sizeof(u));
>> +
>> +    hdr.msg_iov = &iov;
>> +    hdr.msg_iovlen = 1;
>> +    hdr.msg_control = &u;
>> +    hdr.msg_controllen = sizeof(u);
>> +
>> +    qemu_mutex_lock(lock);
>> +
>> +    do {
>> +        rc = recvmsg(sock, &hdr, 0);
>> +    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
>> +
>> +    if (rc < 0) {
>> +        qemu_log_mask(LOG_REMOTE_DEBUG, "%s - recvmsg rc is %d, errno is %d,"
>> +                      " sock %d\n", __func__, rc, errno, sock);
>> +        qemu_mutex_unlock(lock);
>> +        return rc;
>> +    }
>> +
>> +    msg->num_fds = 0;
>> +    for (chdr = CMSG_FIRSTHDR(&hdr); chdr != NULL;
>> +         chdr = CMSG_NXTHDR(&hdr, chdr)) {
>> +        if ((chdr->cmsg_level == SOL_SOCKET) &&
>> +            (chdr->cmsg_type == SCM_RIGHTS)) {
>> +            fdsize = chdr->cmsg_len - CMSG_LEN(0);
>> +            msg->num_fds = fdsize / sizeof(int);
>> +            if (msg->num_fds > REMOTE_MAX_FDS) {
>> +                /*
>> +                 * TODO: Security issue detected. Sender never sends more
>> +                 * than REMOTE_MAX_FDS. This condition should be signaled to
>> +                 * the admin
>> +                 */
>> +                qemu_log_mask(LOG_REMOTE_DEBUG, "%s: Max FDs exceeded\n", __func__);
>> +                return -ERANGE;
>> +            }
>> +
>> +            memcpy(msg->fds, CMSG_DATA(chdr), fdsize);
>> +            break;
>> +        }
>> +    }
>> +
>> +    if (msg->size && msg->bytestream) {
>> +        msg->data2 = calloc(1, msg->size);
>> +        data = msg->data2;
>> +    } else {
>> +        data = (uint8_t *)&msg->data1;
>> +    }
>> +
>> +    if (msg->size) {
>> +        do {
>> +            rc = read(sock, data, msg->size);
>> +        } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
>> +    }
>> +
>> +    qemu_mutex_unlock(lock);
>> +
>> +    return rc;
>> +}
> 
> This code is still insecure.  Until the communication between processes
> is made secure this series does not meet its goal of providing process
> isolation.
> 
> 1. An attacker can overflow msg->data1 easily by setting msg->size but
>     not msg->bytestream.

We will add a check to ensure that msg->size is less than msg->data1 if
msg->bytestream is not set.

> 2. An attacker can allocate data2, all mpqemu_msg_recv() callers
>     need to free it to prevent memory leaks.

We will address this memory leak.

> 3. mpqemu_msg_recv() callers generally do not validate untrusted msg
>     fields.  All the code needs to be audited.

mpqemu_msg_recv() callers validate the num_fds field. But we will add
more fields for validation by the callers.

Thanks!
--
Jag

> 
> Stefan
>
Stefan Hajnoczi Nov. 13, 2019, 3:53 p.m. UTC | #3
On Thu, Oct 24, 2019 at 05:08:48AM -0400, Jagannathan Raman wrote:
> +#ifndef MPQEMU_LINK_H
> +#define MPQEMU_LINK_H
> +
> +#include "qemu/osdep.h"
> +#include "qemu-common.h"
> +
> +#include <stddef.h>
> +#include <stdint.h>

These are already included by "qemu/osdep.h".

> +#include <pthread.h>

Is <pthread.h> needed?

> +
> +#include "qom/object.h"
> +#include "qemu/thread.h"
> +
> +#define TYPE_MPQEMU_LINK "mpqemu-link"
> +#define MPQEMU_LINK(obj) \
> +    OBJECT_CHECK(MPQemuLinkState, (obj), TYPE_MPQEMU_LINK)
> +
> +#define REMOTE_MAX_FDS 8
> +
> +#define MPQEMU_MSG_HDR_SIZE offsetof(MPQemuMsg, data1.u64)
> +
> +/**
> + * mpqemu_cmd_t:
> + * CONF_READ        PCI config. space read
> + * CONF_WRITE       PCI config. space write
> + *
> + * proc_cmd_t enum type to specify the command to be executed on the remote
> + * device.
> + */
> +typedef enum {
> +    INIT = 0,
> +    CONF_READ,
> +    CONF_WRITE,
> +    MAX,
> +} mpqemu_cmd_t;

Please allow for future non-PCI devices by clearly naming PCI-specific
commands and including a bus type in the initialization messages.

> diff --git a/io/mpqemu-link.c b/io/mpqemu-link.c
> new file mode 100644
> index 0000000..b39f4d0
> --- /dev/null
> +++ b/io/mpqemu-link.c
> @@ -0,0 +1,309 @@
> +/*
> + * Communication channel between QEMU and remote device process
> + *
> + * Copyright 2019, Oracle and/or its affiliates.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu-common.h"
> +
> +#include <assert.h>
> +#include <errno.h>
> +#include <pthread.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <unistd.h>
> +#include <limits.h>
> +#include <poll.h>

Many of these are already included by "qemu/osdep.h".  Some of them
shouldn't be used directly because QEMU or glib have abstractions that
hide the platform-specific differences (e.g. pthread, poll).

> +MPQemuLinkState *mpqemu_link_create(void)
> +{
> +    return MPQEMU_LINK(object_new(TYPE_MPQEMU_LINK));
> +}

I'm not sure what the purpose of this object is.  mpqemu_link_create()
suggests the objects will be created internally instead of via -object
mpqemu-link,..., which is unusual.

mpqemu_msg_send() and mpqemu_msg_recv() seem to be the main functions
but they do not even use their MPQemuLinkState *s argument.

> +void mpqemu_start_coms(MPQemuLinkState *s)
> +{
> +
> +    g_assert(g_source_attach(&s->com->gsrc, s->ctx));
> +
> +    g_main_loop_run(s->loop);
> +}

There is already IOThread if you need an event loop thread.  But does
this need to be its own thread?  The communication should be
asynchronous and therefore it can run in the main event loop or any
existing IOThread.
Jag Raman Nov. 18, 2019, 3:26 p.m. UTC | #4
On 11/13/2019 10:53 AM, Stefan Hajnoczi wrote:
> On Thu, Oct 24, 2019 at 05:08:48AM -0400, Jagannathan Raman wrote:
>> +#ifndef MPQEMU_LINK_H
>> +#define MPQEMU_LINK_H
>> +
>> +#include "qemu/osdep.h"
>> +#include "qemu-common.h"
>> +
>> +#include <stddef.h>
>> +#include <stdint.h>
> 
> These are already included by "qemu/osdep.h".
> 
>> +#include <pthread.h>
> 
> Is <pthread.h> needed?

It's not needed. We'll remove it.

> 
>> +
>> +#include "qom/object.h"
>> +#include "qemu/thread.h"
>> +
>> +#define TYPE_MPQEMU_LINK "mpqemu-link"
>> +#define MPQEMU_LINK(obj) \
>> +    OBJECT_CHECK(MPQemuLinkState, (obj), TYPE_MPQEMU_LINK)
>> +
>> +#define REMOTE_MAX_FDS 8
>> +
>> +#define MPQEMU_MSG_HDR_SIZE offsetof(MPQemuMsg, data1.u64)
>> +
>> +/**
>> + * mpqemu_cmd_t:
>> + * CONF_READ        PCI config. space read
>> + * CONF_WRITE       PCI config. space write
>> + *
>> + * proc_cmd_t enum type to specify the command to be executed on the remote
>> + * device.
>> + */
>> +typedef enum {
>> +    INIT = 0,
>> +    CONF_READ,
>> +    CONF_WRITE,
>> +    MAX,
>> +} mpqemu_cmd_t;
> 
> Please allow for future non-PCI devices by clearly naming PCI-specific
> commands and including a bus type in the initialization messages.

OK, will do.

> 
>> diff --git a/io/mpqemu-link.c b/io/mpqemu-link.c
>> new file mode 100644
>> index 0000000..b39f4d0
>> --- /dev/null
>> +++ b/io/mpqemu-link.c
>> @@ -0,0 +1,309 @@
>> +/*
>> + * Communication channel between QEMU and remote device process
>> + *
>> + * Copyright 2019, Oracle and/or its affiliates.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a copy
>> + * of this software and associated documentation files (the "Software"), to deal
>> + * in the Software without restriction, including without limitation the rights
>> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
>> + * copies of the Software, and to permit persons to whom the Software is
>> + * furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
>> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>> + * THE SOFTWARE.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qemu-common.h"
>> +
>> +#include <assert.h>
>> +#include <errno.h>
>> +#include <pthread.h>
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <sys/types.h>
>> +#include <sys/socket.h>
>> +#include <sys/un.h>
>> +#include <unistd.h>
>> +#include <limits.h>
>> +#include <poll.h>
> 
> Many of these are already included by "qemu/osdep.h".  Some of them
> shouldn't be used directly because QEMU or glib have abstractions that
> hide the platform-specific differences (e.g. pthread, poll).
> 
>> +MPQemuLinkState *mpqemu_link_create(void)
>> +{
>> +    return MPQEMU_LINK(object_new(TYPE_MPQEMU_LINK));
>> +}
> 
> I'm not sure what the purpose of this object is.  mpqemu_link_create()
> suggests the objects will be created internally instead of via -object
> mpqemu-link,..., which is unusual.
> 
> mpqemu_msg_send() and mpqemu_msg_recv() seem to be the main functions
> but they do not even use their MPQemuLinkState *s argument.

The LINK object is made up of multiple CHANNELS objects. For example, a
link between QEMU & the remote process could be comprised of multiple
channels.

You're correct, mpqemu_msg_send() & mpqemu_msg_recv() don't use the the
argument "s". This was a consequence of adding the multi-channel
support, before which this argument was used. We will fix this in the
next review.

Thank you!
--
Jag

> 
>> +void mpqemu_start_coms(MPQemuLinkState *s)
>> +{
>> +
>> +    g_assert(g_source_attach(&s->com->gsrc, s->ctx));
>> +
>> +    g_main_loop_run(s->loop);
>> +}
> 
> There is already IOThread if you need an event loop thread.  But does
> this need to be its own thread?  The communication should be
> asynchronous and therefore it can run in the main event loop or any
> existing IOThread.
>
diff mbox series

Patch

diff --git a/include/io/mpqemu-link.h b/include/io/mpqemu-link.h
new file mode 100644
index 0000000..345c67e
--- /dev/null
+++ b/include/io/mpqemu-link.h
@@ -0,0 +1,150 @@ 
+/*
+ * Communication channel between QEMU and remote device process
+ *
+ * Copyright 2019, Oracle and/or its affiliates.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef MPQEMU_LINK_H
+#define MPQEMU_LINK_H
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include <stddef.h>
+#include <stdint.h>
+#include <pthread.h>
+
+#include "qom/object.h"
+#include "qemu/thread.h"
+
+#define TYPE_MPQEMU_LINK "mpqemu-link"
+#define MPQEMU_LINK(obj) \
+    OBJECT_CHECK(MPQemuLinkState, (obj), TYPE_MPQEMU_LINK)
+
+#define REMOTE_MAX_FDS 8
+
+#define MPQEMU_MSG_HDR_SIZE offsetof(MPQemuMsg, data1.u64)
+
+/**
+ * mpqemu_cmd_t:
+ * CONF_READ        PCI config. space read
+ * CONF_WRITE       PCI config. space write
+ *
+ * proc_cmd_t enum type to specify the command to be executed on the remote
+ * device.
+ */
+typedef enum {
+    INIT = 0,
+    CONF_READ,
+    CONF_WRITE,
+    MAX,
+} mpqemu_cmd_t;
+
+/**
+ * MPQemuMsg:
+ * @cmd: The remote command
+ * @bytestream: Indicates if the data to be shared is structured (data1)
+ *              or unstructured (data2)
+ * @size: Size of the data to be shared
+ * @data1: Structured data
+ * @fds: File descriptors to be shared with remote device
+ * @data2: Unstructured data
+ *
+ * MPQemuMsg Format of the message sent to the remote device from QEMU.
+ *
+ */
+typedef struct {
+    mpqemu_cmd_t cmd;
+    int bytestream;
+    size_t size;
+
+    union {
+        uint64_t u64;
+    } data1;
+
+    int fds[REMOTE_MAX_FDS];
+    int num_fds;
+
+    uint8_t *data2;
+} MPQemuMsg;
+
+struct conf_data_msg {
+    uint32_t addr;
+    uint32_t val;
+    int l;
+};
+
+/**
+ * MPQemuChannel:
+ * @gsrc: GSource object to be used by loop
+ * @gpfd: GPollFD object containing the socket & events to monitor
+ * @sock: Socket to send/receive communication, same as the one in gpfd
+ * @send_lock: Mutex to synchronize access to the send stream
+ * @recv_lock: Mutex to synchronize access to the recv stream
+ *
+ * Defines the channel that make up the communication link
+ * between QEMU and remote process
+ */
+
+typedef struct MPQemuChannel {
+    GSource gsrc;
+    GPollFD gpfd;
+    int sock;
+    QemuMutex send_lock;
+    QemuMutex recv_lock;
+} MPQemuChannel;
+
+typedef void (*mpqemu_link_callback)(GIOCondition cond, MPQemuChannel *chan);
+
+/*
+ * MPQemuLinkState Instance info. of the communication
+ * link between QEMU and remote process. The Link could
+ * be made up of multiple channels.
+ *
+ * ctx        GMainContext to be used for communication
+ * loop       Main loop that would be used to poll for incoming data
+ * com        Communication channel to transport control messages
+ *
+ */
+
+typedef struct MPQemuLinkState {
+    Object obj;
+
+    GMainContext *ctx;
+    GMainLoop *loop;
+
+    MPQemuChannel *com;
+
+    mpqemu_link_callback callback;
+} MPQemuLinkState;
+
+MPQemuLinkState *mpqemu_link_create(void);
+void mpqemu_link_finalize(MPQemuLinkState *s);
+
+void mpqemu_msg_send(MPQemuLinkState *s, MPQemuMsg *msg, MPQemuChannel *chan);
+int mpqemu_msg_recv(MPQemuLinkState *s, MPQemuMsg *msg, MPQemuChannel *chan);
+
+void mpqemu_init_channel(MPQemuLinkState *s, MPQemuChannel **chan, int fd);
+void mpqemu_destroy_channel(MPQemuChannel *chan);
+void mpqemu_link_set_callback(MPQemuLinkState *s, mpqemu_link_callback callback);
+void mpqemu_start_coms(MPQemuLinkState *s);
+
+#endif
diff --git a/io/Makefile.objs b/io/Makefile.objs
index 9a20fce..5875ab0 100644
--- a/io/Makefile.objs
+++ b/io/Makefile.objs
@@ -10,3 +10,5 @@  io-obj-y += channel-util.o
 io-obj-y += dns-resolver.o
 io-obj-y += net-listener.o
 io-obj-y += task.o
+
+io-obj-$(CONFIG_MPQEMU) += mpqemu-link.o
diff --git a/io/mpqemu-link.c b/io/mpqemu-link.c
new file mode 100644
index 0000000..b39f4d0
--- /dev/null
+++ b/io/mpqemu-link.c
@@ -0,0 +1,309 @@ 
+/*
+ * Communication channel between QEMU and remote device process
+ *
+ * Copyright 2019, Oracle and/or its affiliates.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+#include <limits.h>
+#include <poll.h>
+
+#include "qemu/module.h"
+#include "io/mpqemu-link.h"
+#include "qemu/log.h"
+
+GSourceFuncs gsrc_funcs;
+
+static void mpqemu_link_inst_init(Object *obj)
+{
+    MPQemuLinkState *s = MPQEMU_LINK(obj);
+
+    s->ctx = g_main_context_default();
+    s->loop = g_main_loop_new(s->ctx, FALSE);
+}
+
+static const TypeInfo mpqemu_link_info = {
+    .name = TYPE_MPQEMU_LINK,
+    .parent = TYPE_OBJECT,
+    .instance_size = sizeof(MPQemuLinkState),
+    .instance_init = mpqemu_link_inst_init,
+};
+
+static void mpqemu_link_register_types(void)
+{
+    type_register_static(&mpqemu_link_info);
+}
+
+type_init(mpqemu_link_register_types)
+
+MPQemuLinkState *mpqemu_link_create(void)
+{
+    return MPQEMU_LINK(object_new(TYPE_MPQEMU_LINK));
+}
+
+void mpqemu_link_finalize(MPQemuLinkState *s)
+{
+    g_main_loop_unref(s->loop);
+    g_main_context_unref(s->ctx);
+    g_main_loop_quit(s->loop);
+
+    mpqemu_destroy_channel(s->com);
+
+    object_unref(OBJECT(s));
+}
+
+void mpqemu_msg_send(MPQemuLinkState *s, MPQemuMsg *msg, MPQemuChannel *chan)
+{
+    int rc;
+    uint8_t *data;
+    union {
+        char control[CMSG_SPACE(REMOTE_MAX_FDS * sizeof(int))];
+        struct cmsghdr align;
+    } u;
+    struct msghdr hdr;
+    struct cmsghdr *chdr;
+    int sock = chan->sock;
+    QemuMutex *lock = &chan->send_lock;
+
+    struct iovec iov = {
+        .iov_base = (char *) msg,
+        .iov_len = MPQEMU_MSG_HDR_SIZE,
+    };
+
+    memset(&hdr, 0, sizeof(hdr));
+    memset(&u, 0, sizeof(u));
+
+    hdr.msg_iov = &iov;
+    hdr.msg_iovlen = 1;
+
+    if (msg->num_fds > REMOTE_MAX_FDS) {
+        qemu_log_mask(LOG_REMOTE_DEBUG, "%s: Max FDs exceeded\n", __func__);
+        return;
+    }
+
+    if (msg->num_fds > 0) {
+        size_t fdsize = msg->num_fds * sizeof(int);
+
+        hdr.msg_control = &u;
+        hdr.msg_controllen = sizeof(u);
+
+        chdr = CMSG_FIRSTHDR(&hdr);
+        chdr->cmsg_len = CMSG_LEN(fdsize);
+        chdr->cmsg_level = SOL_SOCKET;
+        chdr->cmsg_type = SCM_RIGHTS;
+        memcpy(CMSG_DATA(chdr), msg->fds, fdsize);
+        hdr.msg_controllen = CMSG_SPACE(fdsize);
+    }
+
+    qemu_mutex_lock(lock);
+
+    do {
+        rc = sendmsg(sock, &hdr, 0);
+    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+    if (rc < 0) {
+        qemu_log_mask(LOG_REMOTE_DEBUG, "%s - sendmsg rc is %d, errno is %d,"
+                      " sock %d\n", __func__, rc, errno, sock);
+        qemu_mutex_unlock(lock);
+        return;
+    }
+
+    if (msg->bytestream) {
+        data = msg->data2;
+    } else {
+        data = (uint8_t *)msg + MPQEMU_MSG_HDR_SIZE;
+    }
+
+    do {
+        rc = write(sock, data, msg->size);
+    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+    qemu_mutex_unlock(lock);
+}
+
+
+int mpqemu_msg_recv(MPQemuLinkState *s, MPQemuMsg *msg, MPQemuChannel *chan)
+{
+    int rc;
+    uint8_t *data;
+    union {
+        char control[CMSG_SPACE(REMOTE_MAX_FDS * sizeof(int))];
+        struct cmsghdr align;
+    } u;
+    struct msghdr hdr;
+    struct cmsghdr *chdr;
+    size_t fdsize;
+    int sock = chan->sock;
+    QemuMutex *lock = &chan->recv_lock;
+
+    struct iovec iov = {
+        .iov_base = (char *) msg,
+        .iov_len = MPQEMU_MSG_HDR_SIZE,
+    };
+
+    memset(&hdr, 0, sizeof(hdr));
+    memset(&u, 0, sizeof(u));
+
+    hdr.msg_iov = &iov;
+    hdr.msg_iovlen = 1;
+    hdr.msg_control = &u;
+    hdr.msg_controllen = sizeof(u);
+
+    qemu_mutex_lock(lock);
+
+    do {
+        rc = recvmsg(sock, &hdr, 0);
+    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+    if (rc < 0) {
+        qemu_log_mask(LOG_REMOTE_DEBUG, "%s - recvmsg rc is %d, errno is %d,"
+                      " sock %d\n", __func__, rc, errno, sock);
+        qemu_mutex_unlock(lock);
+        return rc;
+    }
+
+    msg->num_fds = 0;
+    for (chdr = CMSG_FIRSTHDR(&hdr); chdr != NULL;
+         chdr = CMSG_NXTHDR(&hdr, chdr)) {
+        if ((chdr->cmsg_level == SOL_SOCKET) &&
+            (chdr->cmsg_type == SCM_RIGHTS)) {
+            fdsize = chdr->cmsg_len - CMSG_LEN(0);
+            msg->num_fds = fdsize / sizeof(int);
+            if (msg->num_fds > REMOTE_MAX_FDS) {
+                /*
+                 * TODO: Security issue detected. Sender never sends more
+                 * than REMOTE_MAX_FDS. This condition should be signaled to
+                 * the admin
+                 */
+                qemu_log_mask(LOG_REMOTE_DEBUG, "%s: Max FDs exceeded\n", __func__);
+                return -ERANGE;
+            }
+
+            memcpy(msg->fds, CMSG_DATA(chdr), fdsize);
+            break;
+        }
+    }
+
+    if (msg->size && msg->bytestream) {
+        msg->data2 = calloc(1, msg->size);
+        data = msg->data2;
+    } else {
+        data = (uint8_t *)&msg->data1;
+    }
+
+    if (msg->size) {
+        do {
+            rc = read(sock, data, msg->size);
+        } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+    }
+
+    qemu_mutex_unlock(lock);
+
+    return rc;
+}
+
+static gboolean mpqemu_link_handler_prepare(GSource *gsrc, gint *timeout)
+{
+    g_assert(timeout);
+
+    *timeout = -1;
+
+    return FALSE;
+}
+
+static gboolean mpqemu_link_handler_check(GSource *gsrc)
+{
+    MPQemuChannel *chan = (MPQemuChannel *)gsrc;
+
+    return chan->gpfd.events & chan->gpfd.revents;
+}
+
+static gboolean mpqemu_link_handler_dispatch(GSource *gsrc, GSourceFunc func,
+                                             gpointer data)
+{
+    MPQemuLinkState *s = (MPQemuLinkState *)data;
+    MPQemuChannel *chan = (MPQemuChannel *)gsrc;
+
+    s->callback(chan->gpfd.revents, chan);
+
+    if ((chan->gpfd.revents & G_IO_HUP) || (chan->gpfd.revents & G_IO_ERR)) {
+        return G_SOURCE_REMOVE;
+    }
+
+    return G_SOURCE_CONTINUE;
+}
+
+void mpqemu_link_set_callback(MPQemuLinkState *s, mpqemu_link_callback callback)
+{
+    s->callback = callback;
+}
+
+void mpqemu_init_channel(MPQemuLinkState *s, MPQemuChannel **chan, int fd)
+{
+    MPQemuChannel *src;
+
+    gsrc_funcs = (GSourceFuncs){
+        .prepare = mpqemu_link_handler_prepare,
+        .check = mpqemu_link_handler_check,
+        .dispatch = mpqemu_link_handler_dispatch,
+        .finalize = NULL,
+    };
+
+    src = (MPQemuChannel *)g_source_new(&gsrc_funcs, sizeof(MPQemuChannel));
+
+    src->sock = fd;
+    qemu_mutex_init(&src->send_lock);
+    qemu_mutex_init(&src->recv_lock);
+
+    g_source_set_callback(&src->gsrc, NULL, (gpointer)s, NULL);
+    src->gpfd.fd = fd;
+    src->gpfd.events = G_IO_IN | G_IO_HUP | G_IO_ERR;
+    g_source_add_poll(&src->gsrc, &src->gpfd);
+
+    *chan = src;
+}
+
+void mpqemu_destroy_channel(MPQemuChannel *chan)
+{
+    g_source_unref(&chan->gsrc);
+    close(chan->sock);
+    qemu_mutex_destroy(&chan->send_lock);
+    qemu_mutex_destroy(&chan->recv_lock);
+}
+
+void mpqemu_start_coms(MPQemuLinkState *s)
+{
+
+    g_assert(g_source_attach(&s->com->gsrc, s->ctx));
+
+    g_main_loop_run(s->loop);
+}