diff mbox series

[RFC,server,05/11] vfio-user: run vfio-user context

Message ID 9ea4e77a39aaff3dfac3e463d3de76b0e4e1844f.1626722742.git.jag.raman@oracle.com
State New
Headers show
Series vfio-user server in QEMU | expand

Commit Message

Jag Raman July 19, 2021, 8 p.m. UTC
Setup a separate thread to run the vfio-user context. The thread acts as
the main loop for the device.

Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
 hw/remote/vfio-user-obj.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

Comments

Thanos Makatos July 20, 2021, 2:17 p.m. UTC | #1
> -----Original Message-----
> From: Jagannathan Raman <jag.raman@oracle.com>
> Sent: 19 July 2021 21:00
> To: qemu-devel@nongnu.org
> Cc: stefanha@redhat.com; alex.williamson@redhat.com;
> elena.ufimtseva@oracle.com; John Levon <john.levon@nutanix.com>;
> john.g.johnson@oracle.com; Thanos Makatos
> <thanos.makatos@nutanix.com>; Swapnil Ingle
> <swapnil.ingle@nutanix.com>; jag.raman@oracle.com
> Subject: [PATCH RFC server 05/11] vfio-user: run vfio-user context
> 
> Setup a separate thread to run the vfio-user context. The thread acts as
> the main loop for the device.

In your "vfio-user: instantiate vfio-user context" patch you create the vfu context in blocking-mode, so the only way to run device emulation is in a separate thread.
Were you going to create a separate thread anyway? You can run device emulation in polling mode therefore you can avoid creating a separate thread, thus saving resources. Do plan to do that in the future?

> 
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> ---
>  hw/remote/vfio-user-obj.c | 44
> ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 44 insertions(+)
> 
> diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
> index e362709..6a2d0f5 100644
> --- a/hw/remote/vfio-user-obj.c
> +++ b/hw/remote/vfio-user-obj.c
> @@ -35,6 +35,7 @@
>  #include "trace.h"
>  #include "sysemu/runstate.h"
>  #include "qemu/notify.h"
> +#include "qemu/thread.h"
>  #include "qapi/error.h"
>  #include "sysemu/sysemu.h"
>  #include "hw/qdev-core.h"
> @@ -66,6 +67,8 @@ struct VfuObject {
>      vfu_ctx_t *vfu_ctx;
> 
>      PCIDevice *pci_dev;
> +
> +    QemuThread vfu_ctx_thread;
>  };
> 
>  static void vfu_object_set_socket(Object *obj, const char *str, Error **errp)
> @@ -90,6 +93,44 @@ static void vfu_object_set_devid(Object *obj, const
> char *str, Error **errp)
>      trace_vfu_prop("devid", str);
>  }
> 
> +static void *vfu_object_ctx_run(void *opaque)
> +{
> +    VfuObject *o = opaque;
> +    int ret;
> +
> +    ret = vfu_realize_ctx(o->vfu_ctx);
> +    if (ret < 0) {
> +        error_setg(&error_abort, "vfu: Failed to realize device %s- %s",
> +                   o->devid, strerror(errno));
> +        return NULL;
> +    }
> +
> +    ret = vfu_attach_ctx(o->vfu_ctx);
> +    if (ret < 0) {
> +        error_setg(&error_abort,
> +                   "vfu: Failed to attach device %s to context - %s",
> +                   o->devid, strerror(errno));
> +        return NULL;
> +    }
> +
> +    do {
> +        ret = vfu_run_ctx(o->vfu_ctx);
> +        if (ret < 0) {
> +            if (errno == EINTR) {
> +                ret = 0;
> +            } else if (errno == ENOTCONN) {
> +                object_unparent(OBJECT(o));
> +                break;
> +            } else {
> +                error_setg(&error_abort, "vfu: Failed to run device %s - %s",
> +                           o->devid, strerror(errno));
> +            }
> +        }
> +    } while (ret == 0);
> +
> +    return NULL;
> +}
> +
>  static void vfu_object_machine_done(Notifier *notifier, void *data)
>  {
>      VfuObject *o = container_of(notifier, VfuObject, machine_done);
> @@ -125,6 +166,9 @@ static void vfu_object_machine_done(Notifier
> *notifier, void *data)
>                     pci_get_word(o->pci_dev->config + PCI_DEVICE_ID),
>                     pci_get_word(o->pci_dev->config +
> PCI_SUBSYSTEM_VENDOR_ID),
>                     pci_get_word(o->pci_dev->config + PCI_SUBSYSTEM_ID));
> +
> +    qemu_thread_create(&o->vfu_ctx_thread, "VFU ctx runner",
> vfu_object_ctx_run,
> +                       o, QEMU_THREAD_JOINABLE);
>  }
> 
>  static void vfu_object_init(Object *obj)
> --
> 1.8.3.1
Jag Raman Aug. 13, 2021, 2:51 p.m. UTC | #2
> On Jul 20, 2021, at 10:17 AM, Thanos Makatos <thanos.makatos@nutanix.com> wrote:
> 
>> -----Original Message-----
>> From: Jagannathan Raman <jag.raman@oracle.com>
>> Sent: 19 July 2021 21:00
>> To: qemu-devel@nongnu.org
>> Cc: stefanha@redhat.com; alex.williamson@redhat.com;
>> elena.ufimtseva@oracle.com; John Levon <john.levon@nutanix.com>;
>> john.g.johnson@oracle.com; Thanos Makatos
>> <thanos.makatos@nutanix.com>; Swapnil Ingle
>> <swapnil.ingle@nutanix.com>; jag.raman@oracle.com
>> Subject: [PATCH RFC server 05/11] vfio-user: run vfio-user context
>> 
>> Setup a separate thread to run the vfio-user context. The thread acts as
>> the main loop for the device.
> 
> In your "vfio-user: instantiate vfio-user context" patch you create the vfu context in blocking-mode, so the only way to run device emulation is in a separate thread.
> Were you going to create a separate thread anyway? You can run device emulation in polling mode therefore you can avoid creating a separate thread, thus saving resources. Do plan to do that in the future?

Thanks for the information about the Blocking and Non-Blocking mode.

I’d like to explain why we are using a separate thread presently and
check with you if it’s possible to poll on multiple vfu contexts at the
same time (similar to select/poll for fds).

Concerning my understanding on how devices are executed in QEMU,
QEMU initializes the device instance - where the device registers
callbacks for BAR and config space accesses. The device is then
subsequently driven by these callbacks - whenever the vcpu thread tries
to access the BAR addresses or places a config space access to the PCI
bus, the vcpu exits to QEMU which handles these accesses. As such, the
device is driven by the vcpu thread. Since there are no vcpu threads in the
remote process, we created a separate thread as a replacement. As you
can see already, this thread blocks on vfu_run_ctx() which I believe polls
on the socket for messages from client.

If there is a way to run multiple vfu contexts at the same time, that would
help with conserving threads on the host CPU. For example, if there’s a
way to add vfu contexts to a list of contexts that expect messages from
client, that could be a good idea. Alternatively, this QEMU server could
also implement a similar mechanism to group all non-blocking vfu
contexts to just a single thread, instead of having separate threads for
each context.

--
Jag

> 
>> 
>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
>> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
>> ---
>> hw/remote/vfio-user-obj.c | 44
>> ++++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 44 insertions(+)
>> 
>> diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
>> index e362709..6a2d0f5 100644
>> --- a/hw/remote/vfio-user-obj.c
>> +++ b/hw/remote/vfio-user-obj.c
>> @@ -35,6 +35,7 @@
>> #include "trace.h"
>> #include "sysemu/runstate.h"
>> #include "qemu/notify.h"
>> +#include "qemu/thread.h"
>> #include "qapi/error.h"
>> #include "sysemu/sysemu.h"
>> #include "hw/qdev-core.h"
>> @@ -66,6 +67,8 @@ struct VfuObject {
>>     vfu_ctx_t *vfu_ctx;
>> 
>>     PCIDevice *pci_dev;
>> +
>> +    QemuThread vfu_ctx_thread;
>> };
>> 
>> static void vfu_object_set_socket(Object *obj, const char *str, Error **errp)
>> @@ -90,6 +93,44 @@ static void vfu_object_set_devid(Object *obj, const
>> char *str, Error **errp)
>>     trace_vfu_prop("devid", str);
>> }
>> 
>> +static void *vfu_object_ctx_run(void *opaque)
>> +{
>> +    VfuObject *o = opaque;
>> +    int ret;
>> +
>> +    ret = vfu_realize_ctx(o->vfu_ctx);
>> +    if (ret < 0) {
>> +        error_setg(&error_abort, "vfu: Failed to realize device %s- %s",
>> +                   o->devid, strerror(errno));
>> +        return NULL;
>> +    }
>> +
>> +    ret = vfu_attach_ctx(o->vfu_ctx);
>> +    if (ret < 0) {
>> +        error_setg(&error_abort,
>> +                   "vfu: Failed to attach device %s to context - %s",
>> +                   o->devid, strerror(errno));
>> +        return NULL;
>> +    }
>> +
>> +    do {
>> +        ret = vfu_run_ctx(o->vfu_ctx);
>> +        if (ret < 0) {
>> +            if (errno == EINTR) {
>> +                ret = 0;
>> +            } else if (errno == ENOTCONN) {
>> +                object_unparent(OBJECT(o));
>> +                break;
>> +            } else {
>> +                error_setg(&error_abort, "vfu: Failed to run device %s - %s",
>> +                           o->devid, strerror(errno));
>> +            }
>> +        }
>> +    } while (ret == 0);
>> +
>> +    return NULL;
>> +}
>> +
>> static void vfu_object_machine_done(Notifier *notifier, void *data)
>> {
>>     VfuObject *o = container_of(notifier, VfuObject, machine_done);
>> @@ -125,6 +166,9 @@ static void vfu_object_machine_done(Notifier
>> *notifier, void *data)
>>                    pci_get_word(o->pci_dev->config + PCI_DEVICE_ID),
>>                    pci_get_word(o->pci_dev->config +
>> PCI_SUBSYSTEM_VENDOR_ID),
>>                    pci_get_word(o->pci_dev->config + PCI_SUBSYSTEM_ID));
>> +
>> +    qemu_thread_create(&o->vfu_ctx_thread, "VFU ctx runner",
>> vfu_object_ctx_run,
>> +                       o, QEMU_THREAD_JOINABLE);
>> }
>> 
>> static void vfu_object_init(Object *obj)
>> --
>> 1.8.3.1
>
John Levon Aug. 16, 2021, 12:52 p.m. UTC | #3
On Fri, Aug 13, 2021 at 02:51:53PM +0000, Jag Raman wrote:

> Thanks for the information about the Blocking and Non-Blocking mode.
> 
> I’d like to explain why we are using a separate thread presently and
> check with you if it’s possible to poll on multiple vfu contexts at the
> same time (similar to select/poll for fds).
> 
> Concerning my understanding on how devices are executed in QEMU,
> QEMU initializes the device instance - where the device registers
> callbacks for BAR and config space accesses. The device is then
> subsequently driven by these callbacks - whenever the vcpu thread tries
> to access the BAR addresses or places a config space access to the PCI
> bus, the vcpu exits to QEMU which handles these accesses. As such, the
> device is driven by the vcpu thread. Since there are no vcpu threads in the
> remote process, we created a separate thread as a replacement. As you
> can see already, this thread blocks on vfu_run_ctx() which I believe polls
> on the socket for messages from client.
> 
> If there is a way to run multiple vfu contexts at the same time, that would
> help with conserving threads on the host CPU. For example, if there’s a
> way to add vfu contexts to a list of contexts that expect messages from
> client, that could be a good idea. Alternatively, this QEMU server could
> also implement a similar mechanism to group all non-blocking vfu
> contexts to just a single thread, instead of having separate threads for
> each context.

You can use vfu_get_poll_fd() to retrieve the underlying socket fd (simplest
would be to do this after vfu_attach_ctx(), but that might depend), then poll on
the fd set, doing vfu_run_ctx() when the fd is ready. An async hangup on the
socket would show up as ENOTCONN, in which case you'd remove the fd from the
set.

Note that we're not completely async yet (e.g. the actual socket read/writes are
synchronous). In practice that's not typically an issue but it could be if you
wanted to support multiple VMs from a single server, etc.


regards
john
Jag Raman Aug. 16, 2021, 2:10 p.m. UTC | #4
> On Aug 16, 2021, at 8:52 AM, John Levon <john.levon@nutanix.com> wrote:
> 
> On Fri, Aug 13, 2021 at 02:51:53PM +0000, Jag Raman wrote:
> 
>> Thanks for the information about the Blocking and Non-Blocking mode.
>> 
>> I’d like to explain why we are using a separate thread presently and
>> check with you if it’s possible to poll on multiple vfu contexts at the
>> same time (similar to select/poll for fds).
>> 
>> Concerning my understanding on how devices are executed in QEMU,
>> QEMU initializes the device instance - where the device registers
>> callbacks for BAR and config space accesses. The device is then
>> subsequently driven by these callbacks - whenever the vcpu thread tries
>> to access the BAR addresses or places a config space access to the PCI
>> bus, the vcpu exits to QEMU which handles these accesses. As such, the
>> device is driven by the vcpu thread. Since there are no vcpu threads in the
>> remote process, we created a separate thread as a replacement. As you
>> can see already, this thread blocks on vfu_run_ctx() which I believe polls
>> on the socket for messages from client.
>> 
>> If there is a way to run multiple vfu contexts at the same time, that would
>> help with conserving threads on the host CPU. For example, if there’s a
>> way to add vfu contexts to a list of contexts that expect messages from
>> client, that could be a good idea. Alternatively, this QEMU server could
>> also implement a similar mechanism to group all non-blocking vfu
>> contexts to just a single thread, instead of having separate threads for
>> each context.
> 
> You can use vfu_get_poll_fd() to retrieve the underlying socket fd (simplest
> would be to do this after vfu_attach_ctx(), but that might depend), then poll on
> the fd set, doing vfu_run_ctx() when the fd is ready. An async hangup on the
> socket would show up as ENOTCONN, in which case you'd remove the fd from the
> set.

OK sounds good, will check this model out. Thank you!

--
Jag

> 
> Note that we're not completely async yet (e.g. the actual socket read/writes are
> synchronous). In practice that's not typically an issue but it could be if you
> wanted to support multiple VMs from a single server, etc.
> 
> 
> regards
> john
diff mbox series

Patch

diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
index e362709..6a2d0f5 100644
--- a/hw/remote/vfio-user-obj.c
+++ b/hw/remote/vfio-user-obj.c
@@ -35,6 +35,7 @@ 
 #include "trace.h"
 #include "sysemu/runstate.h"
 #include "qemu/notify.h"
+#include "qemu/thread.h"
 #include "qapi/error.h"
 #include "sysemu/sysemu.h"
 #include "hw/qdev-core.h"
@@ -66,6 +67,8 @@  struct VfuObject {
     vfu_ctx_t *vfu_ctx;
 
     PCIDevice *pci_dev;
+
+    QemuThread vfu_ctx_thread;
 };
 
 static void vfu_object_set_socket(Object *obj, const char *str, Error **errp)
@@ -90,6 +93,44 @@  static void vfu_object_set_devid(Object *obj, const char *str, Error **errp)
     trace_vfu_prop("devid", str);
 }
 
+static void *vfu_object_ctx_run(void *opaque)
+{
+    VfuObject *o = opaque;
+    int ret;
+
+    ret = vfu_realize_ctx(o->vfu_ctx);
+    if (ret < 0) {
+        error_setg(&error_abort, "vfu: Failed to realize device %s- %s",
+                   o->devid, strerror(errno));
+        return NULL;
+    }
+
+    ret = vfu_attach_ctx(o->vfu_ctx);
+    if (ret < 0) {
+        error_setg(&error_abort,
+                   "vfu: Failed to attach device %s to context - %s",
+                   o->devid, strerror(errno));
+        return NULL;
+    }
+
+    do {
+        ret = vfu_run_ctx(o->vfu_ctx);
+        if (ret < 0) {
+            if (errno == EINTR) {
+                ret = 0;
+            } else if (errno == ENOTCONN) {
+                object_unparent(OBJECT(o));
+                break;
+            } else {
+                error_setg(&error_abort, "vfu: Failed to run device %s - %s",
+                           o->devid, strerror(errno));
+            }
+        }
+    } while (ret == 0);
+
+    return NULL;
+}
+
 static void vfu_object_machine_done(Notifier *notifier, void *data)
 {
     VfuObject *o = container_of(notifier, VfuObject, machine_done);
@@ -125,6 +166,9 @@  static void vfu_object_machine_done(Notifier *notifier, void *data)
                    pci_get_word(o->pci_dev->config + PCI_DEVICE_ID),
                    pci_get_word(o->pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID),
                    pci_get_word(o->pci_dev->config + PCI_SUBSYSTEM_ID));
+
+    qemu_thread_create(&o->vfu_ctx_thread, "VFU ctx runner", vfu_object_ctx_run,
+                       o, QEMU_THREAD_JOINABLE);
 }
 
 static void vfu_object_init(Object *obj)