diff mbox series

[v2,16/23] vfio-user: proxy container connect/disconnect

Message ID e1e14729756134109ab344a35aadfb863ddd1fef.1675228037.git.john.g.johnson@oracle.com
State New
Headers show
Series vfio-user client | expand

Commit Message

John Johnson Feb. 2, 2023, 5:55 a.m. UTC
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
 hw/vfio/user.h                |   6 +-
 include/hw/vfio/vfio-common.h |  10 +++
 hw/vfio/common.c              | 100 ++++++++++++++++-----------
 hw/vfio/user-pci.c            |  12 +++-
 hw/vfio/user.c                | 152 +++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 237 insertions(+), 43 deletions(-)
diff mbox series

Patch

diff --git a/hw/vfio/user.h b/hw/vfio/user.h
index 3012a86..b89e5ca 100644
--- a/hw/vfio/user.h
+++ b/hw/vfio/user.h
@@ -91,9 +91,13 @@  void vfio_user_disconnect(VFIOUserProxy *proxy);
 void vfio_user_set_handler(VFIODevice *vbasedev,
                            void (*handler)(void *opaque, VFIOUserMsg *msg),
                            void *reqarg);
-int vfio_user_get_device(VFIODevice *vbasedev, Error **errp);
+int vfio_user_get_device(VFIOGroup *group, VFIODevice *vbasedev, Error **errp);
+VFIOGroup *vfio_user_get_group(VFIOUserProxy *proxy, AddressSpace *as,
+                               Error **errp);
+void vfio_user_put_group(VFIOGroup *group);
 int vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp);
 
 extern VFIODeviceIO vfio_dev_io_sock;
+extern VFIOContainerIO vfio_cont_io_sock;
 
 #endif /* VFIO_USER_H */
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 2c58d7d..b0c4453 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -94,6 +94,7 @@  typedef struct VFIOContainer {
     uint64_t max_dirty_bitmap_size;
     unsigned long pgsizes;
     unsigned int dma_max_mappings;
+    VFIOUserProxy *proxy;
     QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
     QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
     QLIST_HEAD(, VFIOGroup) group_list;
@@ -236,6 +237,7 @@  typedef struct VFIODisplay {
     } dmabuf;
 } VFIODisplay;
 
+int vfio_ram_block_discard_disable(VFIOContainer *container, bool state);
 void vfio_put_base_device(VFIODevice *vbasedev);
 void vfio_disable_irqindex(VFIODevice *vbasedev, int index);
 void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index);
@@ -244,6 +246,9 @@  void vfio_unmask_single_irq(VFIODevice *vbasedev, int index, int irq);
 void vfio_mask_single_irq(VFIODevice *vbasedev, int index, int irq);
 int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex,
                            int action, int fd, Error **errp);
+void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova,
+                       hwaddr max_iova, uint64_t iova_pgsizes);
+void vfio_listener_release(VFIOContainer *container);
 void vfio_region_write(void *opaque, hwaddr addr,
                            uint64_t data, unsigned size);
 uint64_t vfio_region_read(void *opaque,
@@ -256,11 +261,16 @@  void vfio_region_unmap(VFIORegion *region);
 void vfio_region_exit(VFIORegion *region);
 void vfio_region_finalize(VFIORegion *region);
 void vfio_reset_handler(void *opaque);
+VFIOAddressSpace *vfio_get_address_space(AddressSpace *as);
+void vfio_put_address_space(VFIOAddressSpace *space);
 VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp);
 void vfio_put_group(VFIOGroup *group);
 int vfio_get_device(VFIOGroup *group, const char *name,
                     VFIODevice *vbasedev, Error **errp);
 
+VFIOContainer *vfio_new_container(VFIOAddressSpace *space);
+void vfio_link_container(VFIOContainer *container, VFIOGroup *group);
+void vfio_unmap_container(VFIOContainer *container);
 void vfio_init_device(VFIODevice *vbasedev, VFIOGroup *group,
                       struct vfio_device_info *info);
 
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 0c1cb21..6f99907 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -164,7 +164,7 @@  static const char *index_to_str(VFIODevice *vbasedev, int index)
     }
 }
 
-static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state)
+int vfio_ram_block_discard_disable(VFIOContainer *container, bool state)
 {
     switch (container->iommu_type) {
     case VFIO_TYPE1v2_IOMMU:
@@ -532,7 +532,7 @@  static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
     return ret;
 }
 
-static void vfio_host_win_add(VFIOContainer *container,
+void vfio_host_win_add(VFIOContainer *container,
                               hwaddr min_iova, hwaddr max_iova,
                               uint64_t iova_pgsizes)
 {
@@ -1495,7 +1495,7 @@  static const MemoryListener vfio_memory_listener = {
     .log_sync = vfio_listener_log_sync,
 };
 
-static void vfio_listener_release(VFIOContainer *container)
+void vfio_listener_release(VFIOContainer *container)
 {
     memory_listener_unregister(&container->listener);
     if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
@@ -1873,7 +1873,7 @@  static void vfio_kvm_device_del_group(VFIOGroup *group)
 #endif
 }
 
-static VFIOAddressSpace *vfio_get_address_space(AddressSpace *as)
+VFIOAddressSpace *vfio_get_address_space(AddressSpace *as)
 {
     VFIOAddressSpace *space;
 
@@ -1893,7 +1893,7 @@  static VFIOAddressSpace *vfio_get_address_space(AddressSpace *as)
     return space;
 }
 
-static void vfio_put_address_space(VFIOAddressSpace *space)
+void vfio_put_address_space(VFIOAddressSpace *space)
 {
     if (QLIST_EMPTY(&space->containers)) {
         QLIST_REMOVE(space, list);
@@ -2024,6 +2024,34 @@  static void vfio_get_iommu_info_migration(VFIOContainer *container,
     }
 }
 
+VFIOContainer *vfio_new_container(VFIOAddressSpace *space)
+{
+    VFIOContainer *container;
+
+    container = g_malloc0(sizeof(*container));
+    container->space = space;
+    container->error = NULL;
+    QLIST_INIT(&container->giommu_list);
+    QLIST_INIT(&container->hostwin_list);
+    QLIST_INIT(&container->vrdl_list);
+    QLIST_INIT(&container->group_list);
+
+    return container;
+}
+
+void vfio_link_container(VFIOContainer *container, VFIOGroup *group)
+{
+    VFIOAddressSpace *space = container->space;
+
+    QLIST_INSERT_HEAD(&space->containers, container, next);
+
+    group->container = container;
+    QLIST_INSERT_HEAD(&container->group_list, group, container_next);
+
+    container->listener = vfio_memory_listener;
+    memory_listener_register(&container->listener, space->as);
+}
+
 static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
                                   Error **errp)
 {
@@ -2099,16 +2127,11 @@  static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
         goto close_fd_exit;
     }
 
-    container = g_malloc0(sizeof(*container));
-    container->space = space;
+    container = vfio_new_container(space);
     container->fd = fd;
-    container->error = NULL;
     container->dirty_pages_supported = false;
     container->dma_max_mappings = 0;
     container->io = &vfio_cont_io_ioctl;
-    QLIST_INIT(&container->giommu_list);
-    QLIST_INIT(&container->hostwin_list);
-    QLIST_INIT(&container->vrdl_list);
 
     ret = vfio_init_container(container, group->fd, errp);
     if (ret) {
@@ -2223,15 +2246,7 @@  static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
 
     vfio_kvm_device_add_group(group);
 
-    QLIST_INIT(&container->group_list);
-    QLIST_INSERT_HEAD(&space->containers, container, next);
-
-    group->container = container;
-    QLIST_INSERT_HEAD(&container->group_list, group, container_next);
-
-    container->listener = vfio_memory_listener;
-
-    memory_listener_register(&container->listener, container->space->as);
+    vfio_link_container(container, group);
 
     if (container->error) {
         ret = -1;
@@ -2264,9 +2279,31 @@  put_space_exit:
     return ret;
 }
 
+void vfio_unmap_container(VFIOContainer *container)
+{
+    VFIOGuestIOMMU *giommu, *tmp;
+    VFIOHostDMAWindow *hostwin, *next;
+
+    QLIST_REMOVE(container, next);
+
+    QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) {
+        memory_region_unregister_iommu_notifier(
+            MEMORY_REGION(giommu->iommu_mr), &giommu->n);
+        QLIST_REMOVE(giommu, giommu_next);
+        g_free(giommu);
+    }
+
+    QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next,
+                       next) {
+        QLIST_REMOVE(hostwin, hostwin_next);
+        g_free(hostwin);
+    }
+}
+
 static void vfio_disconnect_container(VFIOGroup *group)
 {
     VFIOContainer *container = group->container;
+    VFIOAddressSpace *space = container->space;
 
     QLIST_REMOVE(group, container_next);
     group->container = NULL;
@@ -2286,24 +2323,7 @@  static void vfio_disconnect_container(VFIOGroup *group)
     }
 
     if (QLIST_EMPTY(&container->group_list)) {
-        VFIOAddressSpace *space = container->space;
-        VFIOGuestIOMMU *giommu, *tmp;
-        VFIOHostDMAWindow *hostwin, *next;
-
-        QLIST_REMOVE(container, next);
-
-        QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) {
-            memory_region_unregister_iommu_notifier(
-                    MEMORY_REGION(giommu->iommu_mr), &giommu->n);
-            QLIST_REMOVE(giommu, giommu_next);
-            g_free(giommu);
-        }
-
-        QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next,
-                           next) {
-            QLIST_REMOVE(hostwin, hostwin_next);
-            g_free(hostwin);
-        }
+        vfio_unmap_container(container);
 
         trace_vfio_disconnect_container(container->fd);
         close(container->fd);
@@ -2503,7 +2523,9 @@  void vfio_put_base_device(VFIODevice *vbasedev)
     QLIST_REMOVE(vbasedev, next);
     vbasedev->group = NULL;
     trace_vfio_put_base_device(vbasedev->fd);
-    close(vbasedev->fd);
+    if (vbasedev->fd != -1) {
+        close(vbasedev->fd);
+    }
 }
 
 int vfio_get_region_info(VFIODevice *vbasedev, int index,
diff --git a/hw/vfio/user-pci.c b/hw/vfio/user-pci.c
index bc1d01a..a0aa320 100644
--- a/hw/vfio/user-pci.c
+++ b/hw/vfio/user-pci.c
@@ -134,6 +134,7 @@  static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
     VFIODevice *vbasedev = &vdev->vbasedev;
     SocketAddress addr;
     VFIOUserProxy *proxy;
+    VFIOGroup *group = NULL;
     int ret;
     Error *err = NULL;
 
@@ -180,8 +181,15 @@  static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
     vbasedev->use_regfds = true;
     vbasedev->can_mask_irq = true;
 
-    ret = vfio_user_get_device(vbasedev, errp);
+    group = vfio_user_get_group(proxy, pci_device_iommu_address_space(pdev),
+                                errp);
+    if (!group) {
+        goto error;
+    }
+
+    ret = vfio_user_get_device(group, vbasedev, errp);
     if (ret) {
+        vfio_user_put_group(group);
         goto error;
     }
 
@@ -241,6 +249,7 @@  static void vfio_user_instance_finalize(Object *obj)
 {
     VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
     VFIODevice *vbasedev = &vdev->vbasedev;
+    VFIOGroup *group = vbasedev->group;
 
     vfio_bars_finalize(vdev);
     g_free(vdev->emulated_config_bits);
@@ -251,6 +260,7 @@  static void vfio_user_instance_finalize(Object *obj)
     }
 
     vfio_put_device(vdev);
+    vfio_user_put_group(group);
 
     if (vbasedev->proxy != NULL) {
         vfio_user_disconnect(vbasedev->proxy);
diff --git a/hw/vfio/user.c b/hw/vfio/user.c
index d66dc1b..aebf44c 100644
--- a/hw/vfio/user.c
+++ b/hw/vfio/user.c
@@ -18,10 +18,14 @@ 
 #include "hw/hw.h"
 #include "hw/vfio/vfio-common.h"
 #include "hw/vfio/vfio.h"
+#include "exec/address-spaces.h"
+#include "exec/memory.h"
+#include "exec/ram_addr.h"
 #include "qemu/sockets.h"
 #include "io/channel.h"
 #include "io/channel-socket.h"
 #include "io/channel-util.h"
+#include "sysemu/reset.h"
 #include "sysemu/iothread.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qjson.h"
@@ -847,7 +851,102 @@  void vfio_user_disconnect(VFIOUserProxy *proxy)
     g_free(proxy);
 }
 
-int vfio_user_get_device(VFIODevice *vbasedev, Error **errp)
+static int vfio_connect_proxy(VFIOUserProxy *proxy, VFIOGroup *group,
+                              AddressSpace *as, Error **errp)
+{
+    VFIOAddressSpace *space;
+    VFIOContainer *container;
+    int ret;
+
+    /*
+     * try to mirror vfio_connect_container()
+     * as much as possible
+     */
+
+    space = vfio_get_address_space(as);
+
+    container = vfio_new_container(space);
+    container->fd = -1;
+    container->io = &vfio_cont_io_sock;
+    container->proxy = proxy;
+
+    /*
+     * The proxy uses a SW IOMMU in lieu of the HW one
+     * used in the ioctl() version.  Mascarade as TYPE1
+     * for maximum compatibility
+     */
+    container->iommu_type = VFIO_TYPE1_IOMMU;
+
+    /*
+     * VFIO user allows the device server to map guest
+     * memory so it has the same issue with discards as
+     * a local IOMMU has.
+     */
+    ret = vfio_ram_block_discard_disable(container, true);
+    if (ret) {
+        error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
+        goto free_container_exit;
+    }
+
+    vfio_host_win_add(container, 0, (hwaddr)-1, proxy->dma_pgsizes);
+    container->pgsizes = proxy->dma_pgsizes;
+    container->dma_max_mappings = proxy->max_dma;
+
+    /* setup bitmask now, but migration support won't be ready until v2 */
+    container->dirty_pages_supported = true;
+    container->max_dirty_bitmap_size = proxy->max_bitmap;
+    container->dirty_pgsizes = proxy->migr_pgsize;
+
+    vfio_link_container(container, group);
+
+    if (container->error) {
+        ret = -1;
+        error_propagate_prepend(errp, container->error,
+            "memory listener initialization failed: ");
+        goto listener_release_exit;
+    }
+
+    container->initialized = true;
+
+    return 0;
+
+listener_release_exit:
+    QLIST_REMOVE(group, container_next);
+    QLIST_REMOVE(container, next);
+    vfio_listener_release(container);
+    vfio_ram_block_discard_disable(container, false);
+
+free_container_exit:
+    g_free(container);
+
+    vfio_put_address_space(space);
+
+    return ret;
+}
+
+static void vfio_disconnect_proxy(VFIOGroup *group)
+{
+    VFIOContainer *container = group->container;
+    VFIOAddressSpace *space = container->space;
+
+    /*
+     * try to mirror vfio_disconnect_container()
+     * as much as possible, knowing each device
+     * is in one group and one container
+     */
+
+    QLIST_REMOVE(group, container_next);
+    group->container = NULL;
+
+    memory_listener_unregister(&container->listener);
+
+    vfio_unmap_container(container);
+
+    g_free(container);
+    vfio_put_address_space(space);
+}
+
+int vfio_user_get_device(VFIOGroup *group, VFIODevice *vbasedev, Error **errp)
 {
     struct vfio_device_info info = { .argsz = sizeof(info) };
     int ret;
@@ -866,11 +965,57 @@  int vfio_user_get_device(VFIODevice *vbasedev, Error **errp)
     }
 
     vbasedev->fd = -1;
-    vfio_init_device(vbasedev, NULL, &info);
+    vfio_init_device(vbasedev, group, &info);
 
     return 0;
 }
 
+VFIOGroup *vfio_user_get_group(VFIOUserProxy *proxy, AddressSpace *as,
+                               Error **errp)
+{
+    VFIOGroup *group;
+
+    /*
+     * Mirror vfio_get_group(), except that each
+     * device gets its own group and container,
+     * unrelated to any host IOMMU groupings
+     */
+    group = g_malloc0(sizeof(*group));
+    group->fd = -1;
+    group->groupid = -1;
+    QLIST_INIT(&group->device_list);
+
+    if (vfio_connect_proxy(proxy, group, as, errp)) {
+        error_prepend(errp, "failed to connect proxy");
+        g_free(group);
+        group = NULL;
+    }
+
+    if (QLIST_EMPTY(&vfio_group_list)) {
+        qemu_register_reset(vfio_reset_handler, NULL);
+    }
+
+    QLIST_INSERT_HEAD(&vfio_group_list, group, next);
+
+    return group;
+}
+
+void vfio_user_put_group(VFIOGroup *group)
+{
+    if (!group || !QLIST_EMPTY(&group->device_list)) {
+        return;
+    }
+
+    vfio_ram_block_discard_disable(group->container, false);
+    vfio_disconnect_proxy(group);
+    QLIST_REMOVE(group, next);
+    g_free(group);
+
+    if (QLIST_EMPTY(&vfio_group_list)) {
+        qemu_unregister_reset(vfio_reset_handler, NULL);
+    }
+}
+
 static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd,
                                   uint32_t size, uint32_t flags)
 {
@@ -1463,3 +1608,6 @@  VFIODeviceIO vfio_dev_io_sock = {
     .region_read = vfio_user_io_region_read,
     .region_write = vfio_user_io_region_write,
 };
+
+VFIOContainerIO vfio_cont_io_sock = {
+};