diff mbox series

[RFC,v4,32/49] multi-process: Use separate MMIO communication channel

Message ID b2594fdefb278f890762d12639524c4db7667393.1571905346.git.jag.raman@oracle.com
State New
Headers show
Series Initial support of multi-process qemu | expand

Commit Message

Jag Raman Oct. 24, 2019, 9:09 a.m. UTC
Using a separate communication channel for MMIO helps
with improving Performance

Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
---
 New patch in v3

 hw/proxy/qemu-proxy.c         | 39 +++++++++++++++++++++++++++------------
 include/hw/proxy/qemu-proxy.h |  1 +
 include/io/mpqemu-link.h      |  7 +++++++
 io/mpqemu-link.c              |  2 ++
 qdev-monitor.c                |  1 +
 remote/remote-main.c          | 18 +++++++++++++-----
 6 files changed, 51 insertions(+), 17 deletions(-)

Comments

Stefan Hajnoczi Nov. 11, 2019, 4:21 p.m. UTC | #1
On Thu, Oct 24, 2019 at 05:09:13AM -0400, Jagannathan Raman wrote:
> Using a separate communication channel for MMIO helps
> with improving Performance

Why?
Jag Raman Nov. 13, 2019, 4:14 p.m. UTC | #2
On 11/11/2019 11:21 AM, Stefan Hajnoczi wrote:
> On Thu, Oct 24, 2019 at 05:09:13AM -0400, Jagannathan Raman wrote:
>> Using a separate communication channel for MMIO helps
>> with improving Performance
> 
> Why?

Typical initiation of IO operations involves multiple MMIO accesses per
IO operation. In some legacy devices like LSI, the completion of the IO
operations is also accomplished by polling on MMIO registers. Therefore,
MMIO traffic can be hefty in some cases and contribute to Performance.

Having a dedicated channel for MMIO ensures that it doesn't have to
compete with other messages to the remote process, especially when there
are multiple devices emulated by a single remote process.

Thanks!
--
Jag

>
Stefan Hajnoczi Nov. 21, 2019, 12:31 p.m. UTC | #3
On Wed, Nov 13, 2019 at 11:14:50AM -0500, Jag Raman wrote:
> On 11/11/2019 11:21 AM, Stefan Hajnoczi wrote:
> > On Thu, Oct 24, 2019 at 05:09:13AM -0400, Jagannathan Raman wrote:
> > > Using a separate communication channel for MMIO helps
> > > with improving Performance
> > 
> > Why?
> 
> Typical initiation of IO operations involves multiple MMIO accesses per
> IO operation. In some legacy devices like LSI, the completion of the IO
> operations is also accomplished by polling on MMIO registers. Therefore,
> MMIO traffic can be hefty in some cases and contribute to Performance.
> 
> Having a dedicated channel for MMIO ensures that it doesn't have to
> compete with other messages to the remote process, especially when there
> are multiple devices emulated by a single remote process.

A vCPU doing a polling read on an MMIO register will cause a BAR_READ
message to be sent to the remote process.  The vCPU thread waits for the
response to this message.

When there are multiple remote devices each has its own socket, so
communication with different remote processes does not interfere.

The only scenarios I can think of are:
1. Interference within a single device between vCPUs and/or the QEMU
   monitor.
2. A single process serving multiple devices that is implemented in a
   way such that different devices interfere with each other.

It sounds like you are saying the problem is #2, but this is still
unclear to me.  If the remote process can be implemented in a way such
that there is no interference when each device has a special MMIO
socket, then why can't it be implemented in a way such that there is no
interference when each device's main socket is used (each device has
it's own!).

Maybe I've missed the point.  It would be good if you could explain in
more detail.

Stefan
diff mbox series

Patch

diff --git a/hw/proxy/qemu-proxy.c b/hw/proxy/qemu-proxy.c
index 691b991..74aecd3 100644
--- a/hw/proxy/qemu-proxy.c
+++ b/hw/proxy/qemu-proxy.c
@@ -201,20 +201,22 @@  static int make_argv(char *command_str, char **argv, int argc)
 int remote_spawn(PCIProxyDev *pdev, const char *command, Error **errp)
 {
     pid_t rpid;
-    int fd[2] = {-1, -1};
+    int fd[2], mmio[2];
     Error *local_error = NULL;
     char *argv[64];
     int argc = 0, _argc;
     char *sfd;
     char *exec_dir;
     int rc = -EINVAL;
+    struct timeval timeout = {.tv_sec = 10, .tv_usec = 0};
 
     if (pdev->managed) {
         /* Child is forked by external program (such as libvirt). */
         return rc;
     }
 
-    if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd)) {
+    if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd) ||
+        socketpair(AF_UNIX, SOCK_STREAM, 0, mmio)) {
         error_setg(errp, "Unable to create unix socket.");
         return rc;
     }
@@ -222,6 +224,8 @@  int remote_spawn(PCIProxyDev *pdev, const char *command, Error **errp)
     argc = add_argv(exec_dir, argv, argc);
     sfd = g_strdup_printf("%d", fd[1]);
     argc = add_argv(sfd, argv, argc);
+    sfd = g_strdup_printf("%d", mmio[1]);
+    argc = add_argv(sfd, argv, argc);
     _argc = argc;
     argc = make_argv((char *)command, argv, argc);
 
@@ -231,22 +235,32 @@  int remote_spawn(PCIProxyDev *pdev, const char *command, Error **errp)
     if (rpid == -1) {
         error_setg(errp, "Unable to spawn emulation program.");
         close(fd[0]);
+        close(mmio[0]);
         goto fail;
     }
 
     if (rpid == 0) {
         close(fd[0]);
+        close(mmio[0]);
         execvp(argv[0], (char *const *)argv);
         exit(1);
     }
     pdev->remote_pid = rpid;
     pdev->rsocket = fd[1];
     pdev->socket = fd[0];
+    pdev->mmio_sock = mmio[0];
+
+    if (setsockopt(mmio[0], SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout,
+                   sizeof(timeout)) < 0) {
+        error_setg(errp, "Unable to set timeout for socket");
+        goto fail;
+    }
 
     rc = 0;
 
 fail:
     close(fd[1]);
+    close(mmio[1]);
 
     for (int i = 0; i < _argc; i++) {
         g_free(argv[i]);
@@ -443,6 +457,9 @@  static void init_proxy(PCIDevice *dev, char *command, bool need_spawn, Error **e
     mpqemu_init_channel(pdev->mpqemu_link, &pdev->mpqemu_link->com,
                         pdev->socket);
 
+    mpqemu_init_channel(pdev->mpqemu_link, &pdev->mpqemu_link->mmio,
+                        pdev->mmio_sock);
+
     configure_memory_sync(pdev->sync, pdev->mpqemu_link);
 }
 
@@ -503,8 +520,7 @@  static void send_bar_access_msg(PCIProxyDev *dev, MemoryRegion *mr,
                                 unsigned size, bool memory)
 {
     MPQemuLinkState *mpqemu_link = dev->mpqemu_link;
-    MPQemuMsg msg;
-    int wait;
+    MPQemuMsg msg, ret;
 
     memset(&msg, 0, sizeof(MPQemuMsg));
 
@@ -518,19 +534,18 @@  static void send_bar_access_msg(PCIProxyDev *dev, MemoryRegion *mr,
         msg.cmd = BAR_WRITE;
         msg.data1.bar_access.val = *val;
     } else {
-        wait = GET_REMOTE_WAIT;
-
         msg.cmd = BAR_READ;
-        msg.num_fds = 1;
-        msg.fds[0] = wait;
     }
 
-    mpqemu_msg_send(mpqemu_link, &msg, mpqemu_link->com);
+    mpqemu_msg_send(mpqemu_link, &msg, mpqemu_link->mmio);
 
-    if (!write) {
-        *val = wait_for_remote(wait);
-        PUT_REMOTE_WAIT(wait);
+    if (write) {
+        return;
     }
+
+    mpqemu_msg_recv(mpqemu_link, &ret, mpqemu_link->mmio);
+
+    *val = ret.data1.mmio_ret.val;
 }
 
 void proxy_default_bar_write(void *opaque, hwaddr addr, uint64_t val,
diff --git a/include/hw/proxy/qemu-proxy.h b/include/hw/proxy/qemu-proxy.h
index ac61a9b..5e858cc 100644
--- a/include/hw/proxy/qemu-proxy.h
+++ b/include/hw/proxy/qemu-proxy.h
@@ -71,6 +71,7 @@  struct PCIProxyDev {
     pid_t remote_pid;
     int rsocket;
     int socket;
+    int mmio_sock;
 
     char *rid;
 
diff --git a/include/io/mpqemu-link.h b/include/io/mpqemu-link.h
index 16a913b..4911eea 100644
--- a/include/io/mpqemu-link.h
+++ b/include/io/mpqemu-link.h
@@ -73,6 +73,7 @@  typedef enum {
     DEVICE_ADD,
     DEVICE_DEL,
     PROXY_PING,
+    MMIO_RETURN,
     MAX,
 } mpqemu_cmd_t;
 
@@ -107,6 +108,10 @@  typedef struct {
 } set_irqfd_msg_t;
 
 typedef struct {
+    uint64_t val;
+} mmio_ret_msg_t;
+
+typedef struct {
     mpqemu_cmd_t cmd;
     int bytestream;
     size_t size;
@@ -116,6 +121,7 @@  typedef struct {
         sync_sysmem_msg_t sync_sysmem;
         bar_access_msg_t bar_access;
         set_irqfd_msg_t set_irqfd;
+        mmio_ret_msg_t mmio_ret;
     } data1;
 
     int fds[REMOTE_MAX_FDS];
@@ -170,6 +176,7 @@  typedef struct MPQemuLinkState {
     GMainLoop *loop;
 
     MPQemuChannel *com;
+    MPQemuChannel *mmio;
 
     mpqemu_link_callback callback;
 } MPQemuLinkState;
diff --git a/io/mpqemu-link.c b/io/mpqemu-link.c
index 696aeb1..d91936e 100644
--- a/io/mpqemu-link.c
+++ b/io/mpqemu-link.c
@@ -77,6 +77,7 @@  void mpqemu_link_finalize(MPQemuLinkState *s)
     g_main_loop_quit(s->loop);
 
     mpqemu_destroy_channel(s->com);
+    mpqemu_destroy_channel(s->mmio);
 
     object_unref(OBJECT(s));
 }
@@ -344,6 +345,7 @@  void mpqemu_start_coms(MPQemuLinkState *s)
 {
 
     g_assert(g_source_attach(&s->com->gsrc, s->ctx));
+    g_assert(g_source_attach(&s->mmio->gsrc, s->ctx));
 
     g_main_loop_run(s->loop);
 }
diff --git a/qdev-monitor.c b/qdev-monitor.c
index e1d05e4..f38849e 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -711,6 +711,7 @@  DeviceState *qdev_proxy_add(const char *rid, const char *id, char *bus,
     if (old_pdev) {
         pdev->rsocket = old_pdev->rsocket;
         pdev->socket = old_pdev->socket;
+        pdev->mmio_sock = old_pdev->mmio_sock;
         pdev->remote_pid = old_pdev->remote_pid;
     } else {
         pdev->rsocket = managed ? rsocket : -1;
diff --git a/remote/remote-main.c b/remote/remote-main.c
index 27e4492..0a1326d 100644
--- a/remote/remote-main.c
+++ b/remote/remote-main.c
@@ -117,8 +117,8 @@  static void process_bar_write(MPQemuMsg *msg, Error **errp)
 static void process_bar_read(MPQemuMsg *msg, Error **errp)
 {
     bar_access_msg_t *bar_access = &msg->data1.bar_access;
+    MPQemuMsg ret = { 0 };
     AddressSpace *as;
-    int wait = msg->fds[0];
     MemTxResult res;
     uint64_t val = 0;
 
@@ -152,9 +152,10 @@  static void process_bar_read(MPQemuMsg *msg, Error **errp)
     }
 
 fail:
-    notify_proxy(wait, val);
-
-    PUT_REMOTE_WAIT(wait);
+    ret.cmd = MMIO_RETURN;
+    ret.data1.mmio_ret.val = val;
+    ret.size = sizeof(ret.data1);
+    mpqemu_msg_send(mpqemu_link, &ret, mpqemu_link->mmio);
 }
 
 static void process_device_add_msg(MPQemuMsg *msg)
@@ -497,7 +498,14 @@  int main(int argc, char *argv[])
 
     mpqemu_init_channel(mpqemu_link, &mpqemu_link->com, fd);
 
-    parse_cmdline(argc - 2, argv + 2, NULL);
+    fd = qemu_parse_fd(argv[2]);
+    if (fd == -1) {
+        printf("Failed to parse fd for remote process.\n");
+        return -EINVAL;
+    }
+    mpqemu_init_channel(mpqemu_link, &mpqemu_link->mmio, fd);
+
+    parse_cmdline(argc - 3, argv + 3, NULL);
 
     mpqemu_link_set_callback(mpqemu_link, process_msg);